From c602537de3c137e55582d7fccfb18e50f1cd9c83 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 20 Dec 2024 20:22:42 +0100 Subject: [PATCH 0001/2411] apparmor: Use str_yes_no() helper function Remove hard-coded strings by using the str_yes_no() helper function. Fix a typo in a comment: s/unpritable/unprintable/ Signed-off-by: Thorsten Blum Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 2c0185ebc900..1bce9a7d2129 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -997,7 +997,7 @@ static int aa_sfs_seq_show(struct seq_file *seq, void *v) switch (fs_file->v_type) { case AA_SFS_TYPE_BOOLEAN: - seq_printf(seq, "%s\n", fs_file->v.boolean ? "yes" : "no"); + seq_printf(seq, "%s\n", str_yes_no(fs_file->v.boolean)); break; case AA_SFS_TYPE_STRING: seq_printf(seq, "%s\n", fs_file->v.string); @@ -1006,7 +1006,7 @@ static int aa_sfs_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%#08lx\n", fs_file->v.u64); break; default: - /* Ignore unpritable entry types. */ + /* Ignore unprintable entry types. */ break; } @@ -1152,7 +1152,7 @@ static int seq_ns_stacked_show(struct seq_file *seq, void *v) struct aa_label *label; label = begin_current_label_crit_section(); - seq_printf(seq, "%s\n", label->size > 1 ? "yes" : "no"); + seq_printf(seq, "%s\n", str_yes_no(label->size > 1)); end_current_label_crit_section(label); return 0; @@ -1175,7 +1175,7 @@ static int seq_ns_nsstacked_show(struct seq_file *seq, void *v) } } - seq_printf(seq, "%s\n", count > 1 ? "yes" : "no"); + seq_printf(seq, "%s\n", str_yes_no(count > 1)); end_current_label_crit_section(label); return 0; From 71e6cff3e0dde6f6a3355d6c73ca3e176567995e Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 23 Sep 2022 16:36:10 -0700 Subject: [PATCH 0002/2411] apparmor: Improve debug print infrastructure Make it so apparmor debug output can be controlled by class flags as well as the debug flag on labels. This provides much finer control at what is being output so apparmor doesn't flood the logs with information that is not needed, making it hard to find what is important. Signed-off-by: John Johansen --- security/apparmor/domain.c | 19 +++--- security/apparmor/include/apparmor.h | 2 +- security/apparmor/include/lib.h | 37 +++++++---- security/apparmor/label.c | 12 ++-- security/apparmor/lib.c | 91 ++++++++++++++++++++++++++++ security/apparmor/lsm.c | 36 ++++++++++- security/apparmor/policy.c | 6 +- security/apparmor/policy_ns.c | 2 +- security/apparmor/procattr.c | 6 +- 9 files changed, 177 insertions(+), 34 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 5939bd9a9b9b..c906ab98f53a 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -652,7 +652,7 @@ static struct aa_label *profile_transition(const struct cred *subj_cred, if (error) { if (profile_unconfined(profile) || (profile->label.flags & FLAG_IX_ON_NAME_ERROR)) { - AA_DEBUG("name lookup ix on error"); + AA_DEBUG(DEBUG_DOMAIN, "name lookup ix on error"); error = 0; new = aa_get_newest_label(&profile->label); } @@ -664,10 +664,10 @@ static struct aa_label *profile_transition(const struct cred *subj_cred, new = find_attach(bprm, profile->ns, &profile->ns->base.profiles, name, &info); if (new) { - AA_DEBUG("unconfined attached to new label"); + AA_DEBUG(DEBUG_DOMAIN, "unconfined attached to new label"); return new; } - AA_DEBUG("unconfined exec no attachment"); + AA_DEBUG(DEBUG_DOMAIN, "unconfined exec no attachment"); return aa_get_newest_label(&profile->label); } @@ -766,7 +766,7 @@ static int profile_onexec(const struct cred *subj_cred, if (error) { if (profile_unconfined(profile) || (profile->label.flags & FLAG_IX_ON_NAME_ERROR)) { - AA_DEBUG("name lookup ix on error"); + AA_DEBUG(DEBUG_DOMAIN, "name lookup ix on error"); error = 0; } xname = bprm->filename; @@ -1216,7 +1216,8 @@ int aa_change_hat(const char *hats[], int count, u64 token, int flags) if (task_no_new_privs(current) && !unconfined(label) && !aa_label_is_unconfined_subset(new, ctx->nnp)) { /* not an apparmor denial per se, so don't log it */ - AA_DEBUG("no_new_privs - change_hat denied"); + AA_DEBUG(DEBUG_DOMAIN, + "no_new_privs - change_hat denied"); error = -EPERM; goto out; } @@ -1237,7 +1238,8 @@ int aa_change_hat(const char *hats[], int count, u64 token, int flags) if (task_no_new_privs(current) && !unconfined(label) && !aa_label_is_unconfined_subset(previous, ctx->nnp)) { /* not an apparmor denial per se, so don't log it */ - AA_DEBUG("no_new_privs - change_hat denied"); + AA_DEBUG(DEBUG_DOMAIN, + "no_new_privs - change_hat denied"); error = -EPERM; goto out; } @@ -1343,7 +1345,7 @@ int aa_change_profile(const char *fqname, int flags) if (!fqname || !*fqname) { aa_put_label(label); - AA_DEBUG("no profile name"); + AA_DEBUG(DEBUG_DOMAIN, "no profile name"); return -EINVAL; } @@ -1462,7 +1464,8 @@ int aa_change_profile(const char *fqname, int flags) if (task_no_new_privs(current) && !unconfined(label) && !aa_label_is_unconfined_subset(new, ctx->nnp)) { /* not an apparmor denial per se, so don't log it */ - AA_DEBUG("no_new_privs - change_hat denied"); + AA_DEBUG(DEBUG_DOMAIN, + "no_new_privs - change_hat denied"); error = -EPERM; goto out; } diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h index f83934913b0f..56767b1a8f06 100644 --- a/security/apparmor/include/apparmor.h +++ b/security/apparmor/include/apparmor.h @@ -43,7 +43,7 @@ /* Control parameters settable through module/boot flags */ extern enum audit_mode aa_g_audit; extern bool aa_g_audit_header; -extern bool aa_g_debug; +extern int aa_g_debug; extern bool aa_g_hash_policy; extern bool aa_g_export_binary; extern int aa_g_rawdata_compression_level; diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index f11a0db7f51d..256f4577c653 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -18,23 +18,35 @@ extern struct aa_dfa *stacksplitdfa; -/* - * DEBUG remains global (no per profile flag) since it is mostly used in sysctl - * which is not related to profile accesses. - */ - -#define DEBUG_ON (aa_g_debug) /* * split individual debug cases out in preparation for finer grained * debug controls in the future. */ -#define AA_DEBUG_LABEL DEBUG_ON #define dbg_printk(__fmt, __args...) pr_debug(__fmt, ##__args) -#define AA_DEBUG(fmt, args...) \ + +#define DEBUG_NONE 0 +#define DEBUG_LABEL_ABS_ROOT 1 +#define DEBUG_LABEL 2 +#define DEBUG_DOMAIN 4 +#define DEBUG_POLICY 8 +#define DEBUG_INTERFACE 0x10 + +#define DEBUG_ALL 0x1f /* update if new DEBUG_X added */ +#define DEBUG_PARSE_ERROR (-1) + +#define DEBUG_ON (aa_g_debug != DEBUG_NONE) +#define DEBUG_ABS_ROOT (aa_g_debug & DEBUG_LABEL_ABS_ROOT) + +#define AA_DEBUG(opt, fmt, args...) \ do { \ - if (DEBUG_ON) \ - pr_debug_ratelimited("AppArmor: " fmt, ##args); \ + if (aa_g_debug & opt) \ + pr_warn_ratelimited("%s: " fmt, __func__, ##args); \ } while (0) +#define AA_DEBUG_LABEL(LAB, X, fmt, args) \ +do { \ + if ((LAB)->flags & FLAG_DEBUG1) \ + AA_DEBUG(X, fmt, args); \ +} while (0) #define AA_WARN(X) WARN((X), "APPARMOR WARN %s: %s\n", __func__, #X) @@ -51,6 +63,9 @@ extern struct aa_dfa *stacksplitdfa; #define AA_BUG_FMT(X, fmt, args...) no_printk(fmt, ##args) #endif +int aa_parse_debug_params(const char *str); +int aa_print_debug_params(char *buffer); + #define AA_ERROR(fmt, args...) \ pr_err_ratelimited("AppArmor: " fmt, ##args) @@ -281,7 +296,7 @@ __do_cleanup: \ } \ __done: \ if (!__new_) \ - AA_DEBUG("label build failed\n"); \ + AA_DEBUG(DEBUG_LABEL, "label build failed\n"); \ (__new_); \ }) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index 91483ecacc16..f950dcc1842b 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -431,7 +431,7 @@ struct aa_label *aa_label_alloc(int size, struct aa_proxy *proxy, gfp_t gfp) /* + 1 for null terminator entry on vec */ new = kzalloc(struct_size(new, vec, size + 1), gfp); - AA_DEBUG("%s (%p)\n", __func__, new); + AA_DEBUG(DEBUG_LABEL, "%s (%p)\n", __func__, new); if (!new) goto fail; @@ -1617,7 +1617,7 @@ int aa_label_snxprint(char *str, size_t size, struct aa_ns *ns, AA_BUG(!str && size != 0); AA_BUG(!label); - if (AA_DEBUG_LABEL && (flags & FLAG_ABS_ROOT)) { + if (DEBUG_ABS_ROOT && (flags & FLAG_ABS_ROOT)) { ns = root_ns; len = snprintf(str, size, "_"); update_for_len(total, len, size, str); @@ -1731,7 +1731,7 @@ void aa_label_xaudit(struct audit_buffer *ab, struct aa_ns *ns, display_mode(ns, label, flags)) { len = aa_label_asxprint(&name, ns, label, flags, gfp); if (len < 0) { - AA_DEBUG("label print error"); + AA_DEBUG(DEBUG_LABEL, "label print error"); return; } str = name; @@ -1759,7 +1759,7 @@ void aa_label_seq_xprint(struct seq_file *f, struct aa_ns *ns, len = aa_label_asxprint(&str, ns, label, flags, gfp); if (len < 0) { - AA_DEBUG("label print error"); + AA_DEBUG(DEBUG_LABEL, "label print error"); return; } seq_puts(f, str); @@ -1782,7 +1782,7 @@ void aa_label_xprintk(struct aa_ns *ns, struct aa_label *label, int flags, len = aa_label_asxprint(&str, ns, label, flags, gfp); if (len < 0) { - AA_DEBUG("label print error"); + AA_DEBUG(DEBUG_LABEL, "label print error"); return; } pr_info("%s", str); @@ -1865,7 +1865,7 @@ struct aa_label *aa_label_strn_parse(struct aa_label *base, const char *str, AA_BUG(!str); str = skipn_spaces(str, n); - if (str == NULL || (AA_DEBUG_LABEL && *str == '_' && + if (str == NULL || (DEBUG_ABS_ROOT && *str == '_' && base != &root_ns->unconfined->label)) return ERR_PTR(-EINVAL); diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 7db62213e352..dd5dcbe5daf7 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -25,6 +25,97 @@ struct aa_perms allperms = { .allow = ALL_PERMS_MASK, .quiet = ALL_PERMS_MASK, .hide = ALL_PERMS_MASK }; +struct val_table_ent { + const char *str; + int value; +}; + +struct val_table_ent debug_values_table[] = { + { "N", DEBUG_NONE }, + { "none", DEBUG_NONE }, + { "n", DEBUG_NONE }, + { "0", DEBUG_NONE }, + { "all", DEBUG_ALL }, + { "Y", DEBUG_ALL }, + { "y", DEBUG_ALL }, + { "1", DEBUG_ALL }, + { "abs_root", DEBUG_LABEL_ABS_ROOT }, + { "label", DEBUG_LABEL }, + { "domain", DEBUG_DOMAIN }, + { "policy", DEBUG_POLICY }, + { "interface", DEBUG_INTERFACE }, + { NULL, 0 } +}; + +static struct val_table_ent *val_table_find_ent(struct val_table_ent *table, + const char *name, size_t len) +{ + struct val_table_ent *entry; + + for (entry = table; entry->str != NULL; entry++) { + if (strncmp(entry->str, name, len) == 0 && + strlen(entry->str) == len) + return entry; + } + return NULL; +} + +int aa_parse_debug_params(const char *str) +{ + struct val_table_ent *ent; + const char *next; + int val = 0; + + do { + size_t n = strcspn(str, "\r\n,"); + + next = str + n; + ent = val_table_find_ent(debug_values_table, str, next - str); + if (ent) + val |= ent->value; + else + AA_DEBUG(DEBUG_INTERFACE, "unknown debug type '%.*s'", + (int)(next - str), str); + str = next + 1; + } while (*next != 0); + return val; +} + +/** + * aa_mask_to_str - convert a perm mask to its short string + * @str: character buffer to store string in (at least 10 characters) + * @str_size: size of the @str buffer + * @chrs: NUL-terminated character buffer of permission characters + * @mask: permission mask to convert + */ +static int val_mask_to_str(char *str, size_t size, + const struct val_table_ent *table, u32 mask) +{ + const struct val_table_ent *ent; + int total = 0; + + for (ent = table; ent->str; ent++) { + if (ent->value && (ent->value & mask) == ent->value) { + int len = scnprintf(str, size, "%s%s", total ? "," : "", + ent->str); + size -= len; + str += len; + total += len; + mask &= ~ent->value; + } + } + + return total; +} + +int aa_print_debug_params(char *buffer) +{ + if (!aa_g_debug) + return sprintf(buffer, "N"); + return val_mask_to_str(buffer, PAGE_SIZE, debug_values_table, + aa_g_debug); +} + /** * aa_free_str_table - free entries str table * @t: the string table to free (MAYBE NULL) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 1edc12862a7d..72c3d1536f69 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1571,6 +1571,9 @@ static const struct kernel_param_ops param_ops_aalockpolicy = { .get = param_get_aalockpolicy }; +static int param_set_debug(const char *val, const struct kernel_param *kp); +static int param_get_debug(char *buffer, const struct kernel_param *kp); + static int param_set_audit(const char *val, const struct kernel_param *kp); static int param_get_audit(char *buffer, const struct kernel_param *kp); @@ -1604,8 +1607,9 @@ module_param_named(rawdata_compression_level, aa_g_rawdata_compression_level, aacompressionlevel, 0400); /* Debug mode */ -bool aa_g_debug = IS_ENABLED(CONFIG_SECURITY_APPARMOR_DEBUG_MESSAGES); -module_param_named(debug, aa_g_debug, aabool, S_IRUSR | S_IWUSR); +int aa_g_debug; +module_param_call(debug, param_set_debug, param_get_debug, + &aa_g_debug, 0600); /* Audit mode */ enum audit_mode aa_g_audit; @@ -1798,6 +1802,34 @@ static int param_get_aacompressionlevel(char *buffer, return param_get_int(buffer, kp); } +static int param_get_debug(char *buffer, const struct kernel_param *kp) +{ + if (!apparmor_enabled) + return -EINVAL; + if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) + return -EPERM; + return aa_print_debug_params(buffer); +} + +static int param_set_debug(const char *val, const struct kernel_param *kp) +{ + int i; + + if (!apparmor_enabled) + return -EINVAL; + if (!val) + return -EINVAL; + if (apparmor_initialized && !aa_current_policy_admin_capable(NULL)) + return -EPERM; + + i = aa_parse_debug_params(val); + if (i == DEBUG_PARSE_ERROR) + return -EINVAL; + + aa_g_debug = i; + return 0; +} + static int param_get_audit(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index d0244fab0653..25cb34e43786 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -280,7 +280,7 @@ void aa_free_profile(struct aa_profile *profile) struct aa_ruleset *rule, *tmp; struct rhashtable *rht; - AA_DEBUG("%s(%p)\n", __func__, profile); + AA_DEBUG(DEBUG_POLICY, "%s(%p)\n", __func__, profile); if (!profile) return; @@ -833,8 +833,8 @@ bool aa_policy_admin_capable(const struct cred *subj_cred, bool capable = policy_ns_capable(subj_cred, label, user_ns, CAP_MAC_ADMIN) == 0; - AA_DEBUG("cap_mac_admin? %d\n", capable); - AA_DEBUG("policy locked? %d\n", aa_g_lock_policy); + AA_DEBUG(DEBUG_POLICY, "cap_mac_admin? %d\n", capable); + AA_DEBUG(DEBUG_POLICY, "policy locked? %d\n", aa_g_lock_policy); return aa_policy_view_capable(subj_cred, label, ns) && capable && !aa_g_lock_policy; diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c index 1f02cfe1d974..64783ca3b0f2 100644 --- a/security/apparmor/policy_ns.c +++ b/security/apparmor/policy_ns.c @@ -107,7 +107,7 @@ static struct aa_ns *alloc_ns(const char *prefix, const char *name) struct aa_ns *ns; ns = kzalloc(sizeof(*ns), GFP_KERNEL); - AA_DEBUG("%s(%p)\n", __func__, ns); + AA_DEBUG(DEBUG_POLICY, "%s(%p)\n", __func__, ns); if (!ns) return NULL; if (!aa_policy_init(&ns->base, prefix, name, GFP_KERNEL)) diff --git a/security/apparmor/procattr.c b/security/apparmor/procattr.c index e3857e3d7c6c..ce40f15d4952 100644 --- a/security/apparmor/procattr.c +++ b/security/apparmor/procattr.c @@ -125,12 +125,14 @@ int aa_setprocattr_changehat(char *args, size_t size, int flags) for (count = 0; (hat < end) && count < 16; ++count) { char *next = hat + strlen(hat) + 1; hats[count] = hat; - AA_DEBUG("%s: (pid %d) Magic 0x%llx count %d hat '%s'\n" + AA_DEBUG(DEBUG_DOMAIN, + "%s: (pid %d) Magic 0x%llx count %d hat '%s'\n" , __func__, current->pid, token, count, hat); hat = next; } } else - AA_DEBUG("%s: (pid %d) Magic 0x%llx count %d Hat '%s'\n", + AA_DEBUG(DEBUG_DOMAIN, + "%s: (pid %d) Magic 0x%llx count %d Hat '%s'\n", __func__, current->pid, token, count, ""); return aa_change_hat(hats, count, token, flags); From 280799f724088ceea409564f4412181e354aba22 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 16 Nov 2022 22:17:09 -0800 Subject: [PATCH 0003/2411] apparmor: cleanup: attachment perm lookup to use lookup_perms() Remove another case of code duplications. Switch to using the generic routine instead of the current custom checks. Signed-off-by: John Johansen --- security/apparmor/domain.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index c906ab98f53a..b1bf1a0b29bb 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -323,7 +323,7 @@ static int aa_xattrs_match(const struct linux_binprm *bprm, size = vfs_getxattr_alloc(&nop_mnt_idmap, d, attach->xattrs[i], &value, value_size, GFP_KERNEL); if (size >= 0) { - u32 index, perm; + struct aa_perms *perms; /* * Check the xattr presence before value. This ensure @@ -335,9 +335,8 @@ static int aa_xattrs_match(const struct linux_binprm *bprm, /* Check xattr value */ state = aa_dfa_match_len(attach->xmatch->dfa, state, value, size); - index = ACCEPT_TABLE(attach->xmatch->dfa)[state]; - perm = attach->xmatch->perms[index].allow; - if (!(perm & MAY_EXEC)) { + perms = aa_lookup_perms(attach->xmatch, state); + if (!(perms->allow & MAY_EXEC)) { ret = -EINVAL; goto out; } @@ -415,15 +414,14 @@ static struct aa_label *find_attach(const struct linux_binprm *bprm, if (attach->xmatch->dfa) { unsigned int count; aa_state_t state; - u32 index, perm; + struct aa_perms *perms; state = aa_dfa_leftmatch(attach->xmatch->dfa, attach->xmatch->start[AA_CLASS_XMATCH], name, &count); - index = ACCEPT_TABLE(attach->xmatch->dfa)[state]; - perm = attach->xmatch->perms[index].allow; + perms = aa_lookup_perms(attach->xmatch, state); /* any accepting state means a valid match. */ - if (perm & MAY_EXEC) { + if (perms->allow & MAY_EXEC) { int ret = 0; if (count < candidate_len) From 46b9b994dd554099b3ca74a20a0d1fb392c83a87 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sun, 29 Jan 2023 01:55:03 -0800 Subject: [PATCH 0004/2411] apparmor: remove redundant unconfined check. profile_af_perm and profile_af_sk_perm are only ever called after checking that the profile is not unconfined. So we can drop these redundant checks. Signed-off-by: John Johansen --- security/apparmor/net.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/security/apparmor/net.c b/security/apparmor/net.c index 77413a519117..8b7a63c08ba1 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -118,9 +118,8 @@ int aa_profile_af_perm(struct aa_profile *profile, AA_BUG(family >= AF_MAX); AA_BUG(type < 0 || type >= SOCK_MAX); + AA_BUG(profile_unconfined(profile)); - if (profile_unconfined(profile)) - return 0; state = RULE_MEDIATES(rules, AA_CLASS_NET); if (!state) return 0; From 0bc8c6862faaa80a2c89c73cc3936cbe2d35235c Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sun, 29 Jan 2023 02:13:56 -0800 Subject: [PATCH 0005/2411] apparmor: switch signal mediation to use RULE_MEDIATES Currently signal mediation is using a hard coded form of the RULE_MEDIATES check. This hides the intended semantics, and means this specific check won't pickup any changes or improvements made in the RULE_MEDIATES check. Switch to using RULE_MEDIATES(). Signed-off-by: John Johansen --- security/apparmor/ipc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index 0cdf4340b02d..3566d875645e 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -85,16 +85,16 @@ static int profile_signal_perm(const struct cred *cred, struct aa_perms perms; aa_state_t state; - if (profile_unconfined(profile) || - !ANY_RULE_MEDIATES(&profile->rules, AA_CLASS_SIGNAL)) + if (profile_unconfined(profile)) return 0; ad->subj_cred = cred; ad->peer = peer; /* TODO: secondary cache check */ - state = aa_dfa_next(rules->policy->dfa, - rules->policy->start[AA_CLASS_SIGNAL], - ad->signal); + state = RULE_MEDIATES(rules, AA_CLASS_SIGNAL); + if (!state) + return 0; + state = aa_dfa_next(rules->policy->dfa, state, ad->signal); aa_label_match(profile, rules, peer, state, false, request, &perms); aa_apply_modes_to_perms(profile, &perms); return aa_check_perms(profile, &perms, request, ad, audit_signal_cb); From cd769b05cc87fb527dbab547e65b934b45705d6b Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 19 Jan 2024 00:12:16 -0800 Subject: [PATCH 0006/2411] apparmor: ensure labels with more than one entry have correct flags labels containing more than one entry need to accumulate flag info from profiles that the label is constructed from. This is done correctly for labels created by a merge but is not being done for labels created by an update or directly created via a parse. This technically is a bug fix, however the effect in current code is to cause early unconfined bail out to not happen (ie. without the fix it is slower) on labels that were created via update or a parse. Signed-off-by: John Johansen --- security/apparmor/label.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index f950dcc1842b..868874ef3d35 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -645,6 +645,7 @@ static bool __label_replace(struct aa_label *old, struct aa_label *new) rb_replace_node(&old->node, &new->node, &ls->root); old->flags &= ~FLAG_IN_TREE; new->flags |= FLAG_IN_TREE; + new->flags |= accum_vec_flags(new->vec, new->size); return true; } @@ -705,6 +706,7 @@ static struct aa_label *__label_insert(struct aa_labelset *ls, rb_link_node(&label->node, parent, new); rb_insert_color(&label->node, &ls->root); label->flags |= FLAG_IN_TREE; + label->flags |= accum_vec_flags(label->vec, label->size); return aa_get_label(label); } @@ -1085,7 +1087,6 @@ static struct aa_label *label_merge_insert(struct aa_label *new, else if (k == b->size) return aa_get_label(b); } - new->flags |= accum_vec_flags(new->vec, new->size); ls = labels_set(new); write_lock_irqsave(&ls->lock, flags); label = __label_insert(labels_set(new), new, false); From 35fad5b462224e0da3764f68b69827281eeaac8c Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 19 Jan 2024 00:24:03 -0800 Subject: [PATCH 0007/2411] apparmor: remove explicit restriction that unconfined cannot use change_hat There does not need to be an explicit restriction that unconfined can't use change_hat. Traditionally unconfined doesn't have hats so change_hat could not be used. But newer unconfined profiles have the potential of having hats, and even system unconfined will be able to be replaced with a profile that allows for hats. To remain backwards compitible with expected return codes, continue to return -EPERM if the unconfined profile does not have any hats. Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 1 + security/apparmor/domain.c | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 1bce9a7d2129..e42ac7aadd31 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -2332,6 +2332,7 @@ static struct aa_sfs_entry aa_sfs_entry_attach[] = { static struct aa_sfs_entry aa_sfs_entry_domain[] = { AA_SFS_FILE_BOOLEAN("change_hat", 1), AA_SFS_FILE_BOOLEAN("change_hatv", 1), + AA_SFS_FILE_BOOLEAN("unconfined_allowed_children", 1), AA_SFS_FILE_BOOLEAN("change_onexec", 1), AA_SFS_FILE_BOOLEAN("change_profile", 1), AA_SFS_FILE_BOOLEAN("stack", 1), diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index b1bf1a0b29bb..af196005d5ee 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -1186,10 +1186,24 @@ int aa_change_hat(const char *hats[], int count, u64 token, int flags) if (task_no_new_privs(current) && !unconfined(label) && !ctx->nnp) ctx->nnp = aa_get_label(label); + /* return -EPERM when unconfined doesn't have children to avoid + * changing the traditional error code for unconfined. + */ if (unconfined(label)) { - info = "unconfined can not change_hat"; - error = -EPERM; - goto fail; + struct label_it i; + bool empty = true; + + rcu_read_lock(); + label_for_each_in_ns(i, labels_ns(label), label, profile) { + empty &= list_empty(&profile->base.profiles); + } + rcu_read_unlock(); + + if (empty) { + info = "unconfined can not change_hat"; + error = -EPERM; + goto fail; + } } if (count) { From 34d31f23385b018890295414acaee31d786cf73d Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 19 Jan 2024 01:23:55 -0800 Subject: [PATCH 0008/2411] apparmor: cleanup: refactor file_perm() to doc semantics of some checks Provide semantics, via fn names, for some checks being done in file_perm(). This is a preparatory patch for improvements to both permission caching and delegation, where the check will become more involved. Signed-off-by: John Johansen --- security/apparmor/file.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/security/apparmor/file.c b/security/apparmor/file.c index d52a5b14dad4..81c54ffd63cb 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -557,6 +557,19 @@ static int __file_sock_perm(const char *op, const struct cred *subj_cred, return error; } +/* wrapper fn to indicate semantics of the check */ +static bool __subj_label_is_cached(struct aa_label *subj_label, + struct aa_label *obj_label) +{ + return aa_label_is_subset(obj_label, subj_label); +} + +/* for now separate fn to indicate semantics of the check */ +static bool __file_is_delegated(struct aa_label *obj_label) +{ + return unconfined(obj_label); +} + /** * aa_file_perm - do permission revalidation check & audit for @file * @op: operation being checked @@ -594,8 +607,8 @@ int aa_file_perm(const char *op, const struct cred *subj_cred, * delegation from unconfined tasks */ denied = request & ~fctx->allow; - if (unconfined(label) || unconfined(flabel) || - (!denied && aa_label_is_subset(flabel, label))) { + if (unconfined(label) || __file_is_delegated(flabel) || + (!denied && __subj_label_is_cached(label, flabel))) { rcu_read_unlock(); goto done; } From de4754c801f4ceefc6ce0d13480c506e0a91b449 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 27 Oct 2023 10:31:06 -0700 Subject: [PATCH 0009/2411] apparmor: carry mediation check on label In order to speed up the mediated check, precompute and store the result as a bit per class type. This will not only allow us to speed up the mediation check but is also a step to removing the unconfined special cases as the unconfined check can be replaced with the generic label_mediates() check. Note: label check does not currently work for capabilities and resources which need to have their mediation updated first. Signed-off-by: John Johansen --- security/apparmor/include/apparmor.h | 1 + security/apparmor/include/label.h | 24 +++++++++++------------- security/apparmor/include/policy.h | 13 +++++++++++++ security/apparmor/label.c | 24 ++++++++++++++---------- security/apparmor/policy.c | 28 +++++++++++++++++++++++++++- security/apparmor/policy_unpack.c | 2 ++ 6 files changed, 68 insertions(+), 24 deletions(-) diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h index 56767b1a8f06..dd12cba8139d 100644 --- a/security/apparmor/include/apparmor.h +++ b/security/apparmor/include/apparmor.h @@ -38,6 +38,7 @@ #define AA_CLASS_X 31 #define AA_CLASS_DBUS 32 +/* NOTE: if AA_CLASS_LAST > 63 need to update label->mediates */ #define AA_CLASS_LAST AA_CLASS_DBUS /* Control parameters settable through module/boot flags */ diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h index 93290ae300bb..5e7d199c15e2 100644 --- a/security/apparmor/include/label.h +++ b/security/apparmor/include/label.h @@ -129,6 +129,7 @@ struct aa_label { long flags; u32 secid; int size; + u64 mediates; struct aa_profile *vec[]; }; @@ -231,20 +232,17 @@ int aa_label_next_confined(struct aa_label *l, int i); #define fn_for_each_not_in_set(L1, L2, P, FN) \ fn_for_each2_XXX((L1), (L2), P, FN, _not_in_set) -#define LABEL_MEDIATES(L, C) \ -({ \ - struct aa_profile *profile; \ - struct label_it i; \ - int ret = 0; \ - label_for_each(i, (L), profile) { \ - if (RULE_MEDIATES(&profile->rules, (C))) { \ - ret = 1; \ - break; \ - } \ - } \ - ret; \ -}) +static inline bool label_mediates(struct aa_label *L, unsigned char C) +{ + return (L)->mediates & (((u64) 1) << (C)); +} +static inline bool label_mediates_safe(struct aa_label *L, unsigned char C) +{ + if (C > AA_CLASS_LAST) + return false; + return label_mediates(L, C); +} void aa_labelset_destroy(struct aa_labelset *ls); void aa_labelset_init(struct aa_labelset *ls); diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 757e3c232c57..256fb27e5c3a 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -318,6 +318,19 @@ static inline aa_state_t ANY_RULE_MEDIATES(struct list_head *head, return RULE_MEDIATES(rule, class); } +void aa_compute_profile_mediates(struct aa_profile *profile); +static inline bool profile_mediates(struct aa_profile *profile, + unsigned char class) +{ + return label_mediates(&profile->label, class); +} + +static inline bool profile_mediates_safe(struct aa_profile *profile, + unsigned char class) +{ + return label_mediates_safe(&profile->label, class); +} + /** * aa_get_profile - increment refcount on profile @p * @p: profile (MAYBE NULL) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index 868874ef3d35..afded9996f61 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -198,21 +198,25 @@ static bool vec_is_stale(struct aa_profile **vec, int n) return false; } -static long accum_vec_flags(struct aa_profile **vec, int n) +static void accum_label_info(struct aa_label *new) { long u = FLAG_UNCONFINED; int i; - AA_BUG(!vec); + AA_BUG(!new->vec); - for (i = 0; i < n; i++) { - u |= vec[i]->label.flags & (FLAG_DEBUG1 | FLAG_DEBUG2 | - FLAG_STALE); - if (!(u & vec[i]->label.flags & FLAG_UNCONFINED)) + /* size == 1 is a profile and flags must be set as part of creation */ + if (new->size == 1) + return; + + for (i = 0; i < new->size; i++) { + u |= new->vec[i]->label.flags & (FLAG_DEBUG1 | FLAG_DEBUG2 | + FLAG_STALE); + if (!(u & new->vec[i]->label.flags & FLAG_UNCONFINED)) u &= ~FLAG_UNCONFINED; + new->mediates |= new->vec[i]->label.mediates; } - - return u; + new->flags |= u; } static int sort_cmp(const void *a, const void *b) @@ -645,7 +649,7 @@ static bool __label_replace(struct aa_label *old, struct aa_label *new) rb_replace_node(&old->node, &new->node, &ls->root); old->flags &= ~FLAG_IN_TREE; new->flags |= FLAG_IN_TREE; - new->flags |= accum_vec_flags(new->vec, new->size); + accum_label_info(new); return true; } @@ -706,7 +710,7 @@ static struct aa_label *__label_insert(struct aa_labelset *ls, rb_link_node(&label->node, parent, new); rb_insert_color(&label->node, &ls->root); label->flags |= FLAG_IN_TREE; - label->flags |= accum_vec_flags(label->vec, label->size); + accum_label_info(label); return aa_get_label(label); } diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 25cb34e43786..2857e771e2a9 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -373,6 +373,30 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy, return NULL; } +/* set of rules that are mediated by unconfined */ +static int unconfined_mediates[] = { AA_CLASS_NS, AA_CLASS_IO_URING, 0 }; + +/* must be called after profile rulesets and start information is setup */ +void aa_compute_profile_mediates(struct aa_profile *profile) +{ + int c; + + if (profile_unconfined(profile)) { + int *pos; + + for (pos = unconfined_mediates; *pos; pos++) { + if (ANY_RULE_MEDIATES(&profile->rules, AA_CLASS_NS) != + DFA_NOMATCH) + profile->label.mediates |= ((u64) 1) << AA_CLASS_NS; + } + return; + } + for (c = 0; c <= AA_CLASS_LAST; c++) { + if (ANY_RULE_MEDIATES(&profile->rules, c) != DFA_NOMATCH) + profile->label.mediates |= ((u64) 1) << c; + } +} + /* TODO: profile accounting - setup in remove */ /** @@ -624,10 +648,12 @@ struct aa_profile *aa_alloc_null(struct aa_profile *parent, const char *name, rules = list_first_entry(&profile->rules, typeof(*rules), list); rules->file = aa_get_pdb(nullpdb); rules->policy = aa_get_pdb(nullpdb); + aa_compute_profile_mediates(profile); if (parent) { profile->path_flags = parent->path_flags; - + /* override/inherit what is mediated from parent */ + profile->label.mediates = parent->label.mediates; /* released on free_profile */ rcu_assign_pointer(profile->parent, aa_get_profile(parent)); profile->ns = aa_get_ns(parent->ns); diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 992b74c50d64..287e08ac4b4b 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -1101,6 +1101,8 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) goto fail; } + aa_compute_profile_mediates(profile); + return profile; fail: From 2e12c5f060176ede209673e4f63ea5d0e3c5814c Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sun, 23 Jul 2023 02:30:33 -0700 Subject: [PATCH 0010/2411] apparmor: add additional flags to extended permission. This is a step towards merging the file and policy state machines. With the switch to extended permissions the state machine's ACCEPT2 table became unused freeing it up to store state specific flags. The first flags to be stored are FLAG_OWNER and FLAG other which paves the way towards merging the file and policydb perms into a single permission table. Currently Lookups based on the objects ownership conditional will still need separate fns, this will be address in a following patch. Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 5 +++-- security/apparmor/domain.c | 9 ++++++--- security/apparmor/file.c | 23 ++++++++++++++--------- security/apparmor/include/file.h | 5 +++-- security/apparmor/include/policy.h | 7 ++++++- security/apparmor/policy_compat.c | 6 +++--- security/apparmor/policy_unpack.c | 20 +++++++++++++++++++- 7 files changed, 54 insertions(+), 21 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index e42ac7aadd31..65191c5fc5e3 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -626,7 +626,8 @@ static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms, if (state) { struct path_cond cond = { }; - tmp = *(aa_lookup_fperms(rules->file, state, &cond)); + tmp = *(aa_lookup_condperms(current_fsuid(), + rules->file, state, &cond)); } } else if (rules->policy->dfa) { if (!RULE_MEDIATES(rules, *match_str)) @@ -2365,7 +2366,7 @@ static struct aa_sfs_entry aa_sfs_entry_policy[] = { AA_SFS_FILE_BOOLEAN("set_load", 1), /* number of out of band transitions supported */ AA_SFS_FILE_U64("outofband", MAX_OOB_SUPPORTED), - AA_SFS_FILE_U64("permstable32_version", 1), + AA_SFS_FILE_U64("permstable32_version", 3), AA_SFS_FILE_STRING("permstable32", PERMS32STR), AA_SFS_FILE_U64("state32", 1), AA_SFS_DIR("unconfined_restrictions", aa_sfs_entry_unconfined), diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index af196005d5ee..630806573793 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -154,7 +154,8 @@ static int label_compound_match(struct aa_profile *profile, if (!state) goto fail; } - *perms = *(aa_lookup_fperms(rules->file, state, &cond)); + *perms = *(aa_lookup_condperms(current_fsuid(), rules->file, state, + &cond)); aa_apply_modes_to_perms(profile, perms); if ((perms->allow & request) != request) return -EACCES; @@ -209,7 +210,8 @@ static int label_components_match(struct aa_profile *profile, return 0; next: - tmp = *(aa_lookup_fperms(rules->file, state, &cond)); + tmp = *(aa_lookup_condperms(current_fsuid(), rules->file, state, + &cond)); aa_apply_modes_to_perms(profile, &tmp); aa_perms_accum(perms, &tmp); label_for_each_cont(i, label, tp) { @@ -218,7 +220,8 @@ static int label_components_match(struct aa_profile *profile, state = match_component(profile, tp, stack, start); if (!state) goto fail; - tmp = *(aa_lookup_fperms(rules->file, state, &cond)); + tmp = *(aa_lookup_condperms(current_fsuid(), rules->file, state, + &cond)); aa_apply_modes_to_perms(profile, &tmp); aa_perms_accum(perms, &tmp); } diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 81c54ffd63cb..6ce6547301dc 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -169,7 +169,8 @@ static int path_name(const char *op, const struct cred *subj_cred, struct aa_perms default_perms = {}; /** * aa_lookup_fperms - convert dfa compressed perms to internal perms - * @file_rules: the aa_policydb to lookup perms for (NOT NULL) + * @subj_uid: uid to use for subject owner test + * @rules: the aa_policydb to lookup perms for (NOT NULL) * @state: state in dfa * @cond: conditions to consider (NOT NULL) * @@ -177,18 +178,21 @@ struct aa_perms default_perms = {}; * * Returns: a pointer to a file permission set */ -struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules, - aa_state_t state, struct path_cond *cond) +struct aa_perms *aa_lookup_condperms(kuid_t subj_uid, struct aa_policydb *rules, + aa_state_t state, struct path_cond *cond) { - unsigned int index = ACCEPT_TABLE(file_rules->dfa)[state]; + unsigned int index = ACCEPT_TABLE(rules->dfa)[state]; - if (!(file_rules->perms)) + if (!(rules->perms)) return &default_perms; - if (uid_eq(current_fsuid(), cond->uid)) - return &(file_rules->perms[index]); + if ((ACCEPT_TABLE2(rules->dfa)[state] & ACCEPT_FLAG_OWNER)) { + if (uid_eq(subj_uid, cond->uid)) + return &(rules->perms[index]); + return &(rules->perms[index + 1]); + } - return &(file_rules->perms[index + 1]); + return &(rules->perms[index]); } /** @@ -207,7 +211,8 @@ aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start, { aa_state_t state; state = aa_dfa_match(file_rules->dfa, start, name); - *perms = *(aa_lookup_fperms(file_rules, state, cond)); + *perms = *(aa_lookup_condperms(current_fsuid(), file_rules, state, + cond)); return state; } diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index 6e8f2aa66cd6..06d9899098a6 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -77,8 +77,9 @@ int aa_audit_file(const struct cred *cred, const char *target, struct aa_label *tlabel, kuid_t ouid, const char *info, int error); -struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules, - aa_state_t state, struct path_cond *cond); +struct aa_perms *aa_lookup_condperms(kuid_t subj_uid, + struct aa_policydb *file_rules, + aa_state_t state, struct path_cond *cond); aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start, const char *name, struct path_cond *cond, struct aa_perms *perms); diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 256fb27e5c3a..bfd8bf1a1ecd 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -59,6 +59,11 @@ extern const char *const aa_profile_mode_names[]; #define on_list_rcu(X) (!list_empty(X) && (X)->prev != LIST_POISON2) +/* flags in the dfa accept2 table */ +enum dfa_accept_flags { + ACCEPT_FLAG_OWNER = 1, +}; + /* * FIXME: currently need a clean way to replace and remove profiles as a * set. It should be done at the namespace level. @@ -124,6 +129,7 @@ static inline void aa_put_pdb(struct aa_policydb *pdb) kref_put(&pdb->count, aa_pdb_free_kref); } +/* lookup perm that doesn't have and object conditional */ static inline struct aa_perms *aa_lookup_perms(struct aa_policydb *policy, aa_state_t state) { @@ -135,7 +141,6 @@ static inline struct aa_perms *aa_lookup_perms(struct aa_policydb *policy, return &(policy->perms[index]); } - /* struct aa_data - generic data structure * key: name for retrieving this data * size: size of data in bytes diff --git a/security/apparmor/policy_compat.c b/security/apparmor/policy_compat.c index 423227670e68..cfc2207e5a12 100644 --- a/security/apparmor/policy_compat.c +++ b/security/apparmor/policy_compat.c @@ -286,10 +286,10 @@ static void remap_dfa_accept(struct aa_dfa *dfa, unsigned int factor) AA_BUG(!dfa); - for (state = 0; state < state_count; state++) + for (state = 0; state < state_count; state++) { ACCEPT_TABLE(dfa)[state] = state * factor; - kvfree(dfa->tables[YYTD_ID_ACCEPT2]); - dfa->tables[YYTD_ID_ACCEPT2] = NULL; + ACCEPT_TABLE2(dfa)[state] = factor > 1 ? ACCEPT_FLAG_OWNER : 0; + } } /* TODO: merge different dfa mappings into single map_policy fn */ diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 287e08ac4b4b..7813920a21e5 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -716,6 +716,7 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy, void *pos = e->pos; int i, flags, error = -EPROTO; ssize_t size; + u32 version = 0; pdb = aa_alloc_pdb(GFP_KERNEL); if (!pdb) @@ -733,6 +734,9 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy, if (pdb->perms) { /* perms table present accept is index */ flags = TO_ACCEPT1_FLAG(YYTD_DATA32); + if (aa_unpack_u32(e, &version, "permsv") && version > 2) + /* accept2 used for dfa flags */ + flags |= TO_ACCEPT2_FLAG(YYTD_DATA32); } else { /* packed perms in accept1 and accept2 */ flags = TO_ACCEPT1_FLAG(YYTD_DATA32) | @@ -770,6 +774,20 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy, } } + if (pdb->perms && version <= 2) { + /* add dfa flags table missing in v2 */ + u32 noents = pdb->dfa->tables[YYTD_ID_ACCEPT]->td_lolen; + u16 tdflags = pdb->dfa->tables[YYTD_ID_ACCEPT]->td_flags; + size_t tsize = table_size(noents, tdflags); + + pdb->dfa->tables[YYTD_ID_ACCEPT2] = kvzalloc(tsize, GFP_KERNEL); + if (!pdb->dfa->tables[YYTD_ID_ACCEPT2]) { + *info = "failed to alloc dfa flags table"; + goto out; + } + pdb->dfa->tables[YYTD_ID_ACCEPT2]->td_lolen = noents; + pdb->dfa->tables[YYTD_ID_ACCEPT2]->td_flags = tdflags; + } /* * Unfortunately due to a bug in earlier userspaces, a * transition table may be present even when the dfa is @@ -785,7 +803,7 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy, /* TODO: move compat mapping here, requires dfa merging first */ /* TODO: move verify here, it has to be done after compat mappings */ - +out: *policy = pdb; return 0; From 84c455decf27ce97a23fb70b58075592ab88d66a Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 21 Aug 2023 16:54:58 -0700 Subject: [PATCH 0011/2411] apparmor: add support for profiles to define the kill signal Previously apparmor has only sent SIGKILL but there are cases where it can be useful to send a different signal. Allow the profile to optionally specify a different value. Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 1 + security/apparmor/audit.c | 2 +- security/apparmor/include/ipc.h | 3 +++ security/apparmor/include/policy.h | 1 + security/apparmor/include/sig_names.h | 6 +----- security/apparmor/include/signal.h | 19 +++++++++++++++++++ security/apparmor/policy.c | 1 + security/apparmor/policy_unpack.c | 7 +++++++ 8 files changed, 34 insertions(+), 6 deletions(-) create mode 100644 security/apparmor/include/signal.h diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 65191c5fc5e3..3455d223879b 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -2342,6 +2342,7 @@ static struct aa_sfs_entry aa_sfs_entry_domain[] = { AA_SFS_FILE_BOOLEAN("computed_longest_left", 1), AA_SFS_DIR("attach_conditions", aa_sfs_entry_attach), AA_SFS_FILE_BOOLEAN("disconnected.path", 1), + AA_SFS_FILE_BOOLEAN("kill.signal", 1), AA_SFS_FILE_STRING("version", "1.2"), { } }; diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c index 73087d76f649..ac89602aa2d9 100644 --- a/security/apparmor/audit.c +++ b/security/apparmor/audit.c @@ -192,7 +192,7 @@ int aa_audit(int type, struct aa_profile *profile, aa_audit_msg(type, ad, cb); if (ad->type == AUDIT_APPARMOR_KILL) - (void)send_sig_info(SIGKILL, NULL, + (void)send_sig_info(profile->signal, NULL, ad->common.type == LSM_AUDIT_DATA_TASK && ad->common.u.tsk ? ad->common.u.tsk : current); diff --git a/security/apparmor/include/ipc.h b/security/apparmor/include/ipc.h index 74d17052f76b..323dd071afe9 100644 --- a/security/apparmor/include/ipc.h +++ b/security/apparmor/include/ipc.h @@ -13,6 +13,9 @@ #include +#define SIGUNKNOWN 0 +#define MAXMAPPED_SIG 35 + int aa_may_signal(const struct cred *subj_cred, struct aa_label *sender, const struct cred *target_cred, struct aa_label *target, int sig); diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index bfd8bf1a1ecd..73cb84ef58f2 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -236,6 +236,7 @@ struct aa_profile { enum audit_mode audit; long mode; u32 path_flags; + int signal; const char *disconnected; struct aa_attachment attach; diff --git a/security/apparmor/include/sig_names.h b/security/apparmor/include/sig_names.h index cbf7a997ed84..c772668cdc62 100644 --- a/security/apparmor/include/sig_names.h +++ b/security/apparmor/include/sig_names.h @@ -1,9 +1,5 @@ #include - -#define SIGUNKNOWN 0 -#define MAXMAPPED_SIG 35 -#define MAXMAPPED_SIGNAME (MAXMAPPED_SIG + 1) -#define SIGRT_BASE 128 +#include "signal.h" /* provide a mapping of arch signal to internal signal # for mediation * those that are always an alias SIGCLD for SIGCLHD and SIGPOLL for SIGIO diff --git a/security/apparmor/include/signal.h b/security/apparmor/include/signal.h new file mode 100644 index 000000000000..729763fa7ce6 --- /dev/null +++ b/security/apparmor/include/signal.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AppArmor security module + * + * This file contains AppArmor ipc mediation function definitions. + * + * Copyright 2023 Canonical Ltd. + */ + +#ifndef __AA_SIGNAL_H +#define __AA_SIGNAL_H + +#define SIGUNKNOWN 0 +#define MAXMAPPED_SIG 35 + +#define MAXMAPPED_SIGNAME (MAXMAPPED_SIG + 1) +#define SIGRT_BASE 128 + +#endif /* __AA_SIGNAL_H */ diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 2857e771e2a9..04222eddd890 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -364,6 +364,7 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy, profile->label.flags |= FLAG_PROFILE; profile->label.vec[0] = profile; + profile->signal = SIGKILL; /* refcount released by caller */ return profile; diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 7813920a21e5..73139189df0f 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -29,6 +29,7 @@ #include "include/policy.h" #include "include/policy_unpack.h" #include "include/policy_compat.h" +#include "include/signal.h" /* audit callback for unpack fields */ static void audit_cb(struct audit_buffer *ab, void *va) @@ -916,6 +917,12 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) (void) aa_unpack_strdup(e, &disconnected, "disconnected"); profile->disconnected = disconnected; + /* optional */ + (void) aa_unpack_u32(e, &profile->signal, "kill"); + if (profile->signal < 1 && profile->signal > MAXMAPPED_SIG) { + info = "profile kill.signal invalid value"; + goto fail; + } /* per profile debug flags (complain, audit) */ if (!aa_unpack_nameX(e, AA_STRUCT, "flags")) { info = "profile missing flags"; From a9eb185be84e998aa9a99c7760534ccc06216705 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 2 Jan 2024 21:54:30 -0800 Subject: [PATCH 0012/2411] apparmor: fix x_table_lookup when stacking is not the first entry x_table_lookup currently does stacking during label_parse() if the target specifies a stack but its only caller ensures that it will never be used with stacking. Refactor to slightly simplify the code in x_to_label(), this also fixes a long standing problem where x_to_labels check on stacking is only on the first element to the table option list, instead of the element that is found and used. Signed-off-by: John Johansen --- security/apparmor/domain.c | 52 +++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 630806573793..b9c299097372 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -509,6 +509,7 @@ static const char *next_name(int xtype, const char *name) * @name: returns: name tested to find label (NOT NULL) * * Returns: refcounted label, or NULL on failure (MAYBE NULL) + * @name will always be set with the last name tried */ struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex, const char **name) @@ -518,6 +519,7 @@ struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex, struct aa_label *label = NULL; u32 xtype = xindex & AA_X_TYPE_MASK; int index = xindex & AA_X_INDEX_MASK; + const char *next; AA_BUG(!name); @@ -525,25 +527,27 @@ struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex, /* TODO: move lookup parsing to unpack time so this is a straight * index into the resultant label */ - for (*name = rules->file->trans.table[index]; !label && *name; - *name = next_name(xtype, *name)) { + for (next = rules->file->trans.table[index]; next; + next = next_name(xtype, next)) { + const char *lookup = (*next == '&') ? next + 1 : next; + *name = next; if (xindex & AA_X_CHILD) { - struct aa_profile *new_profile; - /* release by caller */ - new_profile = aa_find_child(profile, *name); - if (new_profile) - label = &new_profile->label; + /* TODO: switich to parse to get stack of child */ + struct aa_profile *new = aa_find_child(profile, lookup); + + if (new) + /* release by caller */ + return &new->label; continue; } - label = aa_label_parse(&profile->label, *name, GFP_KERNEL, + label = aa_label_parse(&profile->label, lookup, GFP_KERNEL, true, false); - if (IS_ERR(label)) - label = NULL; + if (!IS_ERR_OR_NULL(label)) + /* release by caller */ + return label; } - /* released by caller */ - - return label; + return NULL; } /** @@ -568,9 +572,9 @@ static struct aa_label *x_to_label(struct aa_profile *profile, struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); struct aa_label *new = NULL; + struct aa_label *stack = NULL; struct aa_ns *ns = profile->ns; u32 xtype = xindex & AA_X_TYPE_MASK; - const char *stack = NULL; switch (xtype) { case AA_X_NONE: @@ -579,13 +583,14 @@ static struct aa_label *x_to_label(struct aa_profile *profile, break; case AA_X_TABLE: /* TODO: fix when perm mapping done at unload */ - stack = rules->file->trans.table[xindex & AA_X_INDEX_MASK]; - if (*stack != '&') { - /* released by caller */ - new = x_table_lookup(profile, xindex, lookupname); - stack = NULL; + /* released by caller + * if null for both stack and direct want to try fallback + */ + new = x_table_lookup(profile, xindex, lookupname); + if (!new || **lookupname != '&') break; - } + stack = new; + new = NULL; fallthrough; /* to X_NAME */ case AA_X_NAME: if (xindex & AA_X_CHILD) @@ -600,6 +605,7 @@ static struct aa_label *x_to_label(struct aa_profile *profile, break; } + /* fallback transition check */ if (!new) { if (xindex & AA_X_INHERIT) { /* (p|c|n)ix - don't change profile but do @@ -618,12 +624,12 @@ static struct aa_label *x_to_label(struct aa_profile *profile, /* base the stack on post domain transition */ struct aa_label *base = new; - new = aa_label_parse(base, stack, GFP_KERNEL, true, false); - if (IS_ERR(new)) - new = NULL; + new = aa_label_merge(base, stack, GFP_KERNEL); + /* null on error */ aa_put_label(base); } + aa_put_label(stack); /* released by caller */ return new; } From ce9e3b3fa25a239f5c80989a1d05719bb2793fd4 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 4 Jan 2024 09:00:49 -0800 Subject: [PATCH 0013/2411] apparmor: add ability to mediate caps with policy state machine Currently the caps encoding is very limited and can't be used with conditionals. Allow capabilities to be mediated by the state machine. This will allow us to add conditionals to capabilities that aren't possible with the current encoding. This patch only adds support for using the state machine and retains the old encoding lookup as part of the runtime mediation code to support older policy abis. A follow on patch will move backwards compatibility to a mapping function done at policy load time. Signed-off-by: John Johansen --- security/apparmor/capability.c | 56 ++++++++++++++++++++++++++ security/apparmor/include/capability.h | 1 + security/apparmor/lsm.c | 11 +++-- 3 files changed, 62 insertions(+), 6 deletions(-) diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c index 7ca489ee1054..25b6219cdeb6 100644 --- a/security/apparmor/capability.c +++ b/security/apparmor/capability.c @@ -27,6 +27,7 @@ struct aa_sfs_entry aa_sfs_entry_caps[] = { AA_SFS_FILE_STRING("mask", AA_SFS_CAPS_MASK), + AA_SFS_FILE_BOOLEAN("extended", 1), { } }; @@ -123,8 +124,31 @@ static int profile_capable(struct aa_profile *profile, int cap, { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); + aa_state_t state; int error; + state = RULE_MEDIATES(rules, ad->class); + if (state) { + struct aa_perms perms = { }; + u32 request; + + /* caps broken into 256 x 32 bit permission chunks */ + state = aa_dfa_next(rules->policy->dfa, state, cap >> 5); + request = 1 << (cap & 0x1f); + perms = *aa_lookup_perms(rules->policy, state); + aa_apply_modes_to_perms(profile, &perms); + + if (opts & CAP_OPT_NOAUDIT) { + if (perms.complain & request) + ad->info = "optional: no audit"; + else + ad = NULL; + } + return aa_check_perms(profile, &perms, request, ad, + audit_cb); + } + + /* fallback to old caps mediation that doesn't support conditionals */ if (cap_raised(rules->caps.allow, cap) && !cap_raised(rules->caps.denied, cap)) error = 0; @@ -168,3 +192,35 @@ int aa_capable(const struct cred *subj_cred, struct aa_label *label, return error; } + +kernel_cap_t aa_profile_capget(struct aa_profile *profile) +{ + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); + aa_state_t state; + + state = RULE_MEDIATES(rules, AA_CLASS_CAP); + if (state) { + kernel_cap_t caps = CAP_EMPTY_SET; + int i; + + /* caps broken into up to 256, 32 bit permission chunks */ + for (i = 0; i < (CAP_LAST_CAP >> 5); i++) { + struct aa_perms perms = { }; + aa_state_t tmp; + + tmp = aa_dfa_next(rules->policy->dfa, state, i); + perms = *aa_lookup_perms(rules->policy, tmp); + aa_apply_modes_to_perms(profile, &perms); + caps.val |= ((u64)(perms.allow)) << (i * 5); + caps.val |= ((u64)(perms.complain)) << (i * 5); + } + return caps; + } + + /* fallback to old caps */ + if (COMPLAIN_MODE(profile)) + return CAP_FULL_SET; + + return rules->caps.allow; +} diff --git a/security/apparmor/include/capability.h b/security/apparmor/include/capability.h index d6dcc604ec0c..1ddcec2d1160 100644 --- a/security/apparmor/include/capability.h +++ b/security/apparmor/include/capability.h @@ -36,6 +36,7 @@ struct aa_caps { extern struct aa_sfs_entry aa_sfs_entry_caps[]; +kernel_cap_t aa_profile_capget(struct aa_profile *profile); int aa_capable(const struct cred *subj_cred, struct aa_label *label, int cap, unsigned int opts); diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 72c3d1536f69..479bfea064af 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -177,14 +177,13 @@ static int apparmor_capget(const struct task_struct *target, kernel_cap_t *effec label_for_each_confined(i, label, profile) { struct aa_ruleset *rules; - if (COMPLAIN_MODE(profile)) - continue; + kernel_cap_t allowed; + rules = list_first_entry(&profile->rules, typeof(*rules), list); - *effective = cap_intersect(*effective, - rules->caps.allow); - *permitted = cap_intersect(*permitted, - rules->caps.allow); + allowed = aa_profile_capget(profile); + *effective = cap_intersect(*effective, allowed); + *permitted = cap_intersect(*permitted, allowed); } } rcu_read_unlock(); From 9045aa25d17cf1d13a1c31fc45ed1f9ca725e30e Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 23 Apr 2024 08:59:33 -0700 Subject: [PATCH 0014/2411] apparmor: remove af_select macro The af_select macro just adds a layer of unnecessary abstraction that makes following what the code is doing harder. Signed-off-by: John Johansen --- security/apparmor/include/net.h | 10 ---------- security/apparmor/lsm.c | 35 +++++++++------------------------ 2 files changed, 9 insertions(+), 36 deletions(-) diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h index c42ed8a73f1c..82dc38e4c925 100644 --- a/security/apparmor/include/net.h +++ b/security/apparmor/include/net.h @@ -73,16 +73,6 @@ static inline struct aa_sk_ctx *aa_sock(const struct sock *sk) (SK)->sk_protocol) -#define af_select(FAMILY, FN, DEF_FN) \ -({ \ - int __e; \ - switch ((FAMILY)) { \ - default: \ - __e = DEF_FN; \ - } \ - __e; \ -}) - struct aa_secmark { u8 audit; u8 deny; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 479bfea064af..1246115b7435 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1097,11 +1097,8 @@ static int apparmor_socket_create(int family, int type, int protocol, int kern) label = begin_current_label_crit_section(); if (!(kern || unconfined(label))) - error = af_select(family, - create_perm(label, family, type, protocol), - aa_af_perm(current_cred(), label, - OP_CREATE, AA_MAY_CREATE, - family, type, protocol)); + error = aa_af_perm(current_cred(), label, OP_CREATE, + AA_MAY_CREATE, family, type, protocol); end_current_label_crit_section(label); return error; @@ -1150,9 +1147,7 @@ static int apparmor_socket_bind(struct socket *sock, AA_BUG(!address); AA_BUG(in_interrupt()); - return af_select(sock->sk->sk_family, - bind_perm(sock, address, addrlen), - aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk)); + return aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk); } static int apparmor_socket_connect(struct socket *sock, @@ -1163,9 +1158,7 @@ static int apparmor_socket_connect(struct socket *sock, AA_BUG(!address); AA_BUG(in_interrupt()); - return af_select(sock->sk->sk_family, - connect_perm(sock, address, addrlen), - aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk)); + return aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk); } static int apparmor_socket_listen(struct socket *sock, int backlog) @@ -1174,9 +1167,7 @@ static int apparmor_socket_listen(struct socket *sock, int backlog) AA_BUG(!sock->sk); AA_BUG(in_interrupt()); - return af_select(sock->sk->sk_family, - listen_perm(sock, backlog), - aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk)); + return aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk); } /* @@ -1190,9 +1181,7 @@ static int apparmor_socket_accept(struct socket *sock, struct socket *newsock) AA_BUG(!newsock); AA_BUG(in_interrupt()); - return af_select(sock->sk->sk_family, - accept_perm(sock, newsock), - aa_sk_perm(OP_ACCEPT, AA_MAY_ACCEPT, sock->sk)); + return aa_sk_perm(OP_ACCEPT, AA_MAY_ACCEPT, sock->sk); } static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock, @@ -1203,9 +1192,7 @@ static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock, AA_BUG(!msg); AA_BUG(in_interrupt()); - return af_select(sock->sk->sk_family, - msg_perm(op, request, sock, msg, size), - aa_sk_perm(op, request, sock->sk)); + return aa_sk_perm(op, request, sock->sk); } static int apparmor_socket_sendmsg(struct socket *sock, @@ -1227,9 +1214,7 @@ static int aa_sock_perm(const char *op, u32 request, struct socket *sock) AA_BUG(!sock->sk); AA_BUG(in_interrupt()); - return af_select(sock->sk->sk_family, - sock_perm(op, request, sock), - aa_sk_perm(op, request, sock->sk)); + return aa_sk_perm(op, request, sock->sk); } static int apparmor_socket_getsockname(struct socket *sock) @@ -1250,9 +1235,7 @@ static int aa_sock_opt_perm(const char *op, u32 request, struct socket *sock, AA_BUG(!sock->sk); AA_BUG(in_interrupt()); - return af_select(sock->sk->sk_family, - opt_perm(op, request, sock, level, optname), - aa_sk_perm(op, request, sock->sk)); + return aa_sk_perm(op, request, sock->sk); } static int apparmor_socket_getsockopt(struct socket *sock, int level, From 6cc6a0523dde5b1f001d559d0e034494bc8b0db0 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 10 Apr 2024 14:49:43 -0700 Subject: [PATCH 0015/2411] apparmor: lift kernel socket check out of critical section There is no need for the kern check to be in the critical section, it only complicates the code and slows down the case where the socket is being created by the kernel. Lifting it out will also allow socket_create to share common template code, with other socket_permission checks. Signed-off-by: John Johansen --- security/apparmor/lsm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 1246115b7435..f7b2d4bb1d97 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1095,10 +1095,14 @@ static int apparmor_socket_create(int family, int type, int protocol, int kern) AA_BUG(in_interrupt()); + if (kern) + return 0; + label = begin_current_label_crit_section(); - if (!(kern || unconfined(label))) + if (!unconfined(label)) { error = aa_af_perm(current_cred(), label, OP_CREATE, AA_MAY_CREATE, family, type, protocol); + } end_current_label_crit_section(label); return error; From b4940d913cc2c67f8f6bf17abbf3e5301f95e260 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 24 Apr 2024 15:54:26 -0700 Subject: [PATCH 0016/2411] apparmor: in preparation for finer networking rules rework match_prot Rework match_prot into a common fn that can be shared by all the networking rules. This will provide compatibility with current socket mediation, via the early bailout permission encoding. Signed-off-by: John Johansen --- security/apparmor/include/net.h | 8 +++- security/apparmor/net.c | 81 ++++++++++++++++++++++++++++----- 2 files changed, 75 insertions(+), 14 deletions(-) diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h index 82dc38e4c925..9361ba000398 100644 --- a/security/apparmor/include/net.h +++ b/security/apparmor/include/net.h @@ -82,10 +82,14 @@ struct aa_secmark { extern struct aa_sfs_entry aa_sfs_entry_network[]; +/* passing in state returned by XXX_mediates(class) */ +aa_state_t aa_match_to_prot(struct aa_policydb *policy, aa_state_t state, + u32 request, u16 family, int type, int protocol, + struct aa_perms **p, const char **info); void audit_net_cb(struct audit_buffer *ab, void *va); int aa_profile_af_perm(struct aa_profile *profile, struct apparmor_audit_data *ad, - u32 request, u16 family, int type); + u32 request, u16 family, int type, int protocol); int aa_af_perm(const struct cred *subj_cred, struct aa_label *label, const char *op, u32 request, u16 family, int type, int protocol); @@ -95,7 +99,7 @@ static inline int aa_profile_af_sk_perm(struct aa_profile *profile, struct sock *sk) { return aa_profile_af_perm(profile, ad, request, sk->sk_family, - sk->sk_type); + sk->sk_type, sk->sk_protocol); } int aa_sk_perm(const char *op, u32 request, struct sock *sk); diff --git a/security/apparmor/net.c b/security/apparmor/net.c index 8b7a63c08ba1..c76e0f5dcc93 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -105,16 +105,78 @@ void audit_net_cb(struct audit_buffer *ab, void *va) } } +/* standard permission lookup pattern - supports early bailout */ +static int do_perms(struct aa_profile *profile, struct aa_policydb *policy, + unsigned int state, u32 request, + struct aa_perms *p, struct apparmor_audit_data *ad) +{ + struct aa_perms perms; + + AA_BUG(!profile); + AA_BUG(!policy); + + + if (state || !p) + p = aa_lookup_perms(policy, state); + perms = *p; + aa_apply_modes_to_perms(profile, &perms); + return aa_check_perms(profile, &perms, request, ad, + audit_net_cb); +} + +/* only continue match if + * insufficient current perms at current state + * indicates there are more perms in later state + * Returns: perms struct if early match + */ +static struct aa_perms *early_match(struct aa_policydb *policy, + aa_state_t state, u32 request) +{ + struct aa_perms *p; + + p = aa_lookup_perms(policy, state); + if (((p->allow & request) != request) && (p->allow & AA_CONT_MATCH)) + return NULL; + return p; +} + +/* passing in state returned by PROFILE_MEDIATES_AF */ +aa_state_t aa_match_to_prot(struct aa_policydb *policy, aa_state_t state, + u32 request, u16 family, int type, int protocol, + struct aa_perms **p, const char **info) +{ + __be16 buffer; + + buffer = cpu_to_be16(family); + state = aa_dfa_match_len(policy->dfa, state, (char *) &buffer, 2); + if (!state) { + *info = "failed af match"; + return DFA_NOMATCH; + } + buffer = cpu_to_be16((u16)type); + state = aa_dfa_match_len(policy->dfa, state, (char *) &buffer, 2); + if (!state) + *info = "failed type match"; + *p = early_match(policy, state, request); + if (!*p) { + buffer = cpu_to_be16((u16)protocol); + state = aa_dfa_match_len(policy->dfa, state, (char *) &buffer, + 2); + if (!state) + *info = "failed protocol match"; + } + return state; +} + /* Generic af perm */ int aa_profile_af_perm(struct aa_profile *profile, struct apparmor_audit_data *ad, u32 request, u16 family, - int type) + int type, int protocol) { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); - struct aa_perms perms = { }; + struct aa_perms *p = NULL; aa_state_t state; - __be16 buffer[2]; AA_BUG(family >= AF_MAX); AA_BUG(type < 0 || type >= SOCK_MAX); @@ -124,14 +186,9 @@ int aa_profile_af_perm(struct aa_profile *profile, if (!state) return 0; - buffer[0] = cpu_to_be16(family); - buffer[1] = cpu_to_be16((u16) type); - state = aa_dfa_match_len(rules->policy->dfa, state, (char *) &buffer, - 4); - perms = *aa_lookup_perms(rules->policy, state); - aa_apply_modes_to_perms(profile, &perms); - - return aa_check_perms(profile, &perms, request, ad, audit_net_cb); + state = aa_match_to_prot(rules->policy, state, request, family, type, + protocol, &p, &ad->info); + return do_perms(profile, rules->policy, state, request, p, ad); } int aa_af_perm(const struct cred *subj_cred, struct aa_label *label, @@ -142,7 +199,7 @@ int aa_af_perm(const struct cred *subj_cred, struct aa_label *label, return fn_for_each_confined(label, profile, aa_profile_af_perm(profile, &ad, request, family, - type)); + type, protocol)); } static int aa_label_sk_perm(const struct cred *subj_cred, From c05e705812d179f4b85aeacc34a555a42bc4f9ac Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 7 Sep 2022 12:46:30 -0700 Subject: [PATCH 0017/2411] apparmor: add fine grained af_unix mediation Extend af_unix mediation to support fine grained controls based on the type (abstract, anonymous, fs), the address, and the labeling on the socket. This allows for using socket addresses to label and the socket and control which subjects can communicate. The unix rule format follows standard apparmor rules except that fs based unix sockets can be mediated by existing file rules. None fs unix sockets can be mediated by a unix socket rule. Where The address of an abstract unix domain socket begins with the @ character, similar to how they are reported (as paths) by netstat -x. The address then follows and may contain pattern matching and any characters including the null character. In apparmor null characters must be specified by using an escape sequence \000 or \x00. The pattern matching is the same as is used by file path matching so * will not match / even though it has no special meaning with in an abstract socket name. Eg. allow unix addr=@*, Autobound unix domain sockets have a unix sun_path assigned to them by the kernel, as such specifying a policy based address is not possible. The autobinding of sockets can be controlled by specifying the special auto keyword. Eg. allow unix addr=auto, To indicate that the rule only applies to auto binding of unix domain sockets. It is important to note this only applies to the bind permission as once the socket is bound to an address it is indistinguishable from a socket that have an addr bound with a specified name. When the auto keyword is used with other permissions or as part of a peer addr it will be replaced with a pattern that can match an autobound socket. Eg. For some kernels allow unix rw addr=auto, It is important to note, this pattern may match abstract sockets that were not autobound but have an addr that fits what is generated by the kernel when autobinding a socket. Anonymous unix domain sockets have no sun_path associated with the socket address, however it can be specified with the special none keyword to indicate the rule only applies to anonymous unix domain sockets. Eg. allow unix addr=none, If the address component of a rule is not specified then the rule applies to autobind, abstract and anonymous sockets. The label on the socket can be compared using the standard label= rule conditional. Eg. allow unix addr=@foo peer=(label=bar), see man apparmor.d for full syntax description. Signed-off-by: John Johansen --- security/apparmor/Makefile | 2 +- security/apparmor/af_unix.c | 702 +++++++++++++++++++++++++++ security/apparmor/apparmorfs.c | 7 + security/apparmor/file.c | 16 +- security/apparmor/include/af_unix.h | 57 +++ security/apparmor/include/apparmor.h | 1 + security/apparmor/include/file.h | 4 + security/apparmor/include/net.h | 17 +- security/apparmor/include/path.h | 1 + security/apparmor/include/policy.h | 9 +- security/apparmor/lsm.c | 163 ++++++- security/apparmor/net.c | 142 ++++-- 12 files changed, 1063 insertions(+), 58 deletions(-) create mode 100644 security/apparmor/af_unix.c create mode 100644 security/apparmor/include/af_unix.h diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile index b9c5879dd599..be51607f52b6 100644 --- a/security/apparmor/Makefile +++ b/security/apparmor/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o apparmor-y := apparmorfs.o audit.o capability.o task.o ipc.o lib.o match.o \ path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \ resource.o secid.o file.o policy_ns.o label.o mount.o net.o \ - policy_compat.o + policy_compat.o af_unix.o apparmor-$(CONFIG_SECURITY_APPARMOR_HASH) += crypto.o obj-$(CONFIG_SECURITY_APPARMOR_KUNIT_TEST) += apparmor_policy_unpack_test.o diff --git a/security/apparmor/af_unix.c b/security/apparmor/af_unix.c new file mode 100644 index 000000000000..ce7dc9d98fb1 --- /dev/null +++ b/security/apparmor/af_unix.c @@ -0,0 +1,702 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AppArmor security module + * + * This file contains AppArmor af_unix fine grained mediation + * + * Copyright 2023 Canonical Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + */ + +#include + +#include "include/audit.h" +#include "include/af_unix.h" +#include "include/apparmor.h" +#include "include/file.h" +#include "include/label.h" +#include "include/path.h" +#include "include/policy.h" +#include "include/cred.h" + + +static inline struct sock *aa_unix_sk(struct unix_sock *u) +{ + return &u->sk; +} + +static int unix_fs_perm(const char *op, u32 mask, const struct cred *subj_cred, + struct aa_label *label, struct unix_sock *u) +{ + AA_BUG(!label); + AA_BUG(!u); + AA_BUG(!is_unix_fs(aa_unix_sk(u))); + + if (unconfined(label) || !label_mediates(label, AA_CLASS_FILE)) + return 0; + + mask &= NET_FS_PERMS; + /* if !u->path.dentry socket is being shutdown - implicit delegation + * until obj delegation is supported + */ + if (u->path.dentry) { + /* the sunpath may not be valid for this ns so use the path */ + struct path_cond cond = { u->path.dentry->d_inode->i_uid, + u->path.dentry->d_inode->i_mode + }; + + return aa_path_perm(op, subj_cred, label, &u->path, + PATH_SOCK_COND, mask, &cond); + } /* else implicitly delegated */ + + return 0; +} + +/* match_addr special constants */ +#define ABSTRACT_ADDR "\x00" /* abstract socket addr */ +#define ANONYMOUS_ADDR "\x01" /* anonymous endpoint, no addr */ +#define DISCONNECTED_ADDR "\x02" /* addr is another namespace */ +#define SHUTDOWN_ADDR "\x03" /* path addr is shutdown and cleared */ +#define FS_ADDR "/" /* path addr in fs */ + +static aa_state_t match_addr(struct aa_dfa *dfa, aa_state_t state, + struct sockaddr_un *addr, int addrlen) +{ + if (addr) + /* include leading \0 */ + state = aa_dfa_match_len(dfa, state, addr->sun_path, + unix_addr_len(addrlen)); + else + state = aa_dfa_match_len(dfa, state, ANONYMOUS_ADDR, 1); + /* todo: could change to out of band for cleaner separation */ + state = aa_dfa_null_transition(dfa, state); + + return state; +} + +static aa_state_t match_to_local(struct aa_policydb *policy, + aa_state_t state, u32 request, + int type, int protocol, + struct sockaddr_un *addr, int addrlen, + struct aa_perms **p, + const char **info) +{ + state = aa_match_to_prot(policy, state, request, PF_UNIX, type, + protocol, NULL, info); + if (state) { + state = match_addr(policy->dfa, state, addr, addrlen); + if (state) { + /* todo: local label matching */ + state = aa_dfa_null_transition(policy->dfa, state); + if (!state) + *info = "failed local label match"; + } else { + *info = "failed local address match"; + } + } + + return state; +} + +static aa_state_t match_to_sk(struct aa_policydb *policy, + aa_state_t state, u32 request, + struct unix_sock *u, struct aa_perms **p, + const char **info) +{ + struct sockaddr_un *addr = NULL; + int addrlen = 0; + + if (u->addr) { + addr = u->addr->name; + addrlen = u->addr->len; + } + + return match_to_local(policy, state, request, u->sk.sk_type, + u->sk.sk_protocol, addr, addrlen, p, info); +} + +#define CMD_ADDR 1 +#define CMD_LISTEN 2 +#define CMD_OPT 4 + +static aa_state_t match_to_cmd(struct aa_policydb *policy, aa_state_t state, + u32 request, struct unix_sock *u, + char cmd, struct aa_perms **p, + const char **info) +{ + AA_BUG(!p); + + state = match_to_sk(policy, state, request, u, p, info); + if (state && !*p) { + state = aa_dfa_match_len(policy->dfa, state, &cmd, 1); + if (!state) + *info = "failed cmd selection match"; + } + + return state; +} + +static aa_state_t match_to_peer(struct aa_policydb *policy, aa_state_t state, + u32 request, struct unix_sock *u, + struct sockaddr_un *peer_addr, int peer_addrlen, + struct aa_perms **p, const char **info) +{ + AA_BUG(!p); + + state = match_to_cmd(policy, state, request, u, CMD_ADDR, p, info); + if (state && !*p) { + state = match_addr(policy->dfa, state, peer_addr, peer_addrlen); + if (!state) + *info = "failed peer address match"; + } + + return state; +} + +static aa_state_t match_label(struct aa_profile *profile, + struct aa_ruleset *rule, aa_state_t state, + u32 request, struct aa_profile *peer, + struct aa_perms *p, + struct apparmor_audit_data *ad) +{ + AA_BUG(!profile); + AA_BUG(!peer); + + ad->peer = &peer->label; + + if (state && !p) { + state = aa_dfa_match(rule->policy->dfa, state, + peer->base.hname); + if (!state) + ad->info = "failed peer label match"; + + } + + return aa_do_perms(profile, rule->policy, state, request, p, ad); +} + + +/* unix sock creation comes before we know if the socket will be an fs + * socket + * v6 - semantics are handled by mapping in profile load + * v7 - semantics require sock create for tasks creating an fs socket. + * v8 - same as v7 + */ +static int profile_create_perm(struct aa_profile *profile, int family, + int type, int protocol, + struct apparmor_audit_data *ad) +{ + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); + aa_state_t state; + + AA_BUG(!profile); + AA_BUG(profile_unconfined(profile)); + + state = RULE_MEDIATES_NET(rules); + if (state) { + state = aa_match_to_prot(rules->policy, state, AA_MAY_CREATE, + PF_UNIX, type, protocol, NULL, + &ad->info); + + return aa_do_perms(profile, rules->policy, state, AA_MAY_CREATE, + NULL, ad); + } + + return aa_profile_af_perm(profile, ad, AA_MAY_CREATE, family, type, + protocol); +} + +static int profile_sk_perm(struct aa_profile *profile, + struct apparmor_audit_data *ad, + u32 request, struct sock *sk) +{ + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), + list); + struct aa_perms *p = NULL; + aa_state_t state; + + AA_BUG(!profile); + AA_BUG(!sk); + AA_BUG(is_unix_fs(sk)); + AA_BUG(profile_unconfined(profile)); + + state = RULE_MEDIATES_NET(rules); + if (state) { + state = match_to_sk(rules->policy, state, request, unix_sk(sk), + &p, &ad->info); + + return aa_do_perms(profile, rules->policy, state, request, p, + ad); + } + + return aa_profile_af_sk_perm(profile, ad, request, sk); +} + +static int profile_bind_perm(struct aa_profile *profile, struct sock *sk, + struct apparmor_audit_data *ad) +{ + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); + struct aa_perms *p = NULL; + aa_state_t state; + + AA_BUG(!profile); + AA_BUG(!sk); + AA_BUG(!ad); + AA_BUG(profile_unconfined(profile)); + + state = RULE_MEDIATES_NET(rules); + if (state) { + /* bind for abstract socket */ + state = match_to_local(rules->policy, state, AA_MAY_BIND, + sk->sk_type, sk->sk_protocol, + unix_addr(ad->net.addr), + ad->net.addrlen, + &p, &ad->info); + + return aa_do_perms(profile, rules->policy, state, AA_MAY_BIND, + p, ad); + } + + return aa_profile_af_sk_perm(profile, ad, AA_MAY_BIND, sk); +} + +static int profile_listen_perm(struct aa_profile *profile, struct sock *sk, + int backlog, struct apparmor_audit_data *ad) +{ + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); + struct aa_perms *p = NULL; + aa_state_t state; + + AA_BUG(!profile); + AA_BUG(!sk); + AA_BUG(is_unix_fs(sk)); + AA_BUG(!ad); + AA_BUG(profile_unconfined(profile)); + + state = RULE_MEDIATES_NET(rules); + if (state) { + __be16 b = cpu_to_be16(backlog); + + state = match_to_cmd(rules->policy, state, AA_MAY_LISTEN, + unix_sk(sk), CMD_LISTEN, &p, &ad->info); + if (state && !p) { + state = aa_dfa_match_len(rules->policy->dfa, state, + (char *) &b, 2); + if (!state) + ad->info = "failed listen backlog match"; + } + return aa_do_perms(profile, rules->policy, state, AA_MAY_LISTEN, + p, ad); + } + + return aa_profile_af_sk_perm(profile, ad, AA_MAY_LISTEN, sk); +} + +static int profile_accept_perm(struct aa_profile *profile, + struct sock *sk, + struct apparmor_audit_data *ad) +{ + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); + struct aa_perms *p = NULL; + aa_state_t state; + + AA_BUG(!profile); + AA_BUG(!sk); + AA_BUG(is_unix_fs(sk)); + AA_BUG(!ad); + AA_BUG(profile_unconfined(profile)); + + state = RULE_MEDIATES_NET(rules); + if (state) { + state = match_to_sk(rules->policy, state, AA_MAY_ACCEPT, + unix_sk(sk), &p, &ad->info); + + return aa_do_perms(profile, rules->policy, state, AA_MAY_ACCEPT, + p, ad); + } + + return aa_profile_af_sk_perm(profile, ad, AA_MAY_ACCEPT, sk); +} + +static int profile_opt_perm(struct aa_profile *profile, u32 request, + struct sock *sk, int optname, + struct apparmor_audit_data *ad) +{ + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); + struct aa_perms *p = NULL; + aa_state_t state; + + AA_BUG(!profile); + AA_BUG(!sk); + AA_BUG(is_unix_fs(sk)); + AA_BUG(!ad); + AA_BUG(profile_unconfined(profile)); + + state = RULE_MEDIATES_NET(rules); + if (state) { + __be16 b = cpu_to_be16(optname); + + state = match_to_cmd(rules->policy, state, request, unix_sk(sk), + CMD_OPT, &p, &ad->info); + if (state && !p) { + state = aa_dfa_match_len(rules->policy->dfa, state, + (char *) &b, 2); + if (!state) + ad->info = "failed sockopt match"; + } + return aa_do_perms(profile, rules->policy, state, request, p, + ad); + } + + return aa_profile_af_sk_perm(profile, ad, request, sk); +} + +/* null peer_label is allowed, in which case the peer_sk label is used */ +static int profile_peer_perm(struct aa_profile *profile, u32 request, + struct sock *sk, struct sock *peer_sk, + struct aa_label *peer_label, + struct apparmor_audit_data *ad) +{ + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); + struct aa_perms *p = NULL; + aa_state_t state; + + AA_BUG(!profile); + AA_BUG(profile_unconfined(profile)); + AA_BUG(!sk); + AA_BUG(!peer_sk); + AA_BUG(!ad); + AA_BUG(is_unix_fs(peer_sk)); /* currently always calls unix_fs_perm */ + + state = RULE_MEDIATES_NET(rules); + if (state) { + struct aa_sk_ctx *peer_ctx = aa_sock(peer_sk); + struct aa_profile *peerp; + struct sockaddr_un *addr = NULL; + int len = 0; + + if (unix_sk(peer_sk)->addr) { + addr = unix_sk(peer_sk)->addr->name; + len = unix_sk(peer_sk)->addr->len; + } + state = match_to_peer(rules->policy, state, request, + unix_sk(sk), + addr, len, &p, &ad->info); + if (!peer_label) + peer_label = peer_ctx->label; + + return fn_for_each_in_ns(peer_label, peerp, + match_label(profile, rules, state, request, + peerp, p, ad)); + } + + return aa_profile_af_sk_perm(profile, ad, request, sk); +} + +/* -------------------------------- */ + +int aa_unix_create_perm(struct aa_label *label, int family, int type, + int protocol) +{ + if (!unconfined(label)) { + struct aa_profile *profile; + DEFINE_AUDIT_NET(ad, OP_CREATE, current_cred(), NULL, family, + type, protocol); + + return fn_for_each_confined(label, profile, + profile_create_perm(profile, family, type, + protocol, &ad)); + } + + return 0; +} + +int aa_unix_label_sk_perm(const struct cred *subj_cred, + struct aa_label *label, const char *op, u32 request, + struct sock *sk) +{ + if (!unconfined(label)) { + struct aa_profile *profile; + DEFINE_AUDIT_SK(ad, op, subj_cred, sk); + + return fn_for_each_confined(label, profile, + profile_sk_perm(profile, &ad, request, sk)); + } + return 0; +} + +static int unix_label_sock_perm(const struct cred *subj_cred, + struct aa_label *label, const char *op, + u32 request, struct socket *sock) +{ + if (unconfined(label)) + return 0; + if (is_unix_fs(sock->sk)) + return unix_fs_perm(op, request, subj_cred, label, + unix_sk(sock->sk)); + + return aa_unix_label_sk_perm(subj_cred, label, op, request, sock->sk); +} + +/* revalidation, get/set attr, shutdown */ +int aa_unix_sock_perm(const char *op, u32 request, struct socket *sock) +{ + struct aa_label *label; + int error; + + label = begin_current_label_crit_section(); + error = unix_label_sock_perm(current_cred(), label, op, request, sock); + end_current_label_crit_section(label); + + return error; +} + +static int valid_addr(struct sockaddr *addr, int addr_len) +{ + struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; + + /* addr_len == offsetof(struct sockaddr_un, sun_path) is autobind */ + if (addr_len < offsetof(struct sockaddr_un, sun_path) || + addr_len > sizeof(*sunaddr)) + return -EINVAL; + return 0; +} + +int aa_unix_bind_perm(struct socket *sock, struct sockaddr *addr, + int addrlen) +{ + struct aa_profile *profile; + struct aa_label *label; + int error = 0; + + error = valid_addr(addr, addrlen); + if (error) + return error; + + label = begin_current_label_crit_section(); + /* fs bind is handled by mknod */ + if (!(unconfined(label) || is_unix_addr_fs(addr, addrlen))) { + DEFINE_AUDIT_SK(ad, OP_BIND, current_cred(), sock->sk); + + ad.net.addr = unix_addr(addr); + ad.net.addrlen = addrlen; + + error = fn_for_each_confined(label, profile, + profile_bind_perm(profile, sock->sk, &ad)); + } + end_current_label_crit_section(label); + + return error; +} + +/* + * unix connections are covered by the + * - unix_stream_connect (stream) and unix_may_send hooks (dgram) + * - fs connect is handled by open + * This is just here to document this is not needed for af_unix + * +int aa_unix_connect_perm(struct socket *sock, struct sockaddr *address, + int addrlen) +{ + return 0; +} +*/ + +int aa_unix_listen_perm(struct socket *sock, int backlog) +{ + struct aa_profile *profile; + struct aa_label *label; + int error = 0; + + label = begin_current_label_crit_section(); + if (!(unconfined(label) || is_unix_fs(sock->sk))) { + DEFINE_AUDIT_SK(ad, OP_LISTEN, current_cred(), sock->sk); + + error = fn_for_each_confined(label, profile, + profile_listen_perm(profile, sock->sk, + backlog, &ad)); + } + end_current_label_crit_section(label); + + return error; +} + + +/* ability of sock to connect, not peer address binding */ +int aa_unix_accept_perm(struct socket *sock, struct socket *newsock) +{ + struct aa_profile *profile; + struct aa_label *label; + int error = 0; + + label = begin_current_label_crit_section(); + if (!(unconfined(label) || is_unix_fs(sock->sk))) { + DEFINE_AUDIT_SK(ad, OP_ACCEPT, current_cred(), sock->sk); + + error = fn_for_each_confined(label, profile, + profile_accept_perm(profile, sock->sk, &ad)); + } + end_current_label_crit_section(label); + + return error; +} + + +/* + * dgram handled by unix_may_sendmsg, right to send on stream done at connect + * could do per msg unix_stream here, but connect + socket transfer is + * sufficient. This is just here to document this is not needed for af_unix + * + * sendmsg, recvmsg +int aa_unix_msg_perm(const char *op, u32 request, struct socket *sock, + struct msghdr *msg, int size) +{ + return 0; +} +*/ + +int aa_unix_opt_perm(const char *op, u32 request, struct socket *sock, + int level, int optname) +{ + struct aa_profile *profile; + struct aa_label *label; + int error = 0; + + label = begin_current_label_crit_section(); + if (!(unconfined(label) || is_unix_fs(sock->sk))) { + DEFINE_AUDIT_SK(ad, op, current_cred(), sock->sk); + + error = fn_for_each_confined(label, profile, + profile_opt_perm(profile, request, + sock->sk, optname, &ad)); + } + end_current_label_crit_section(label); + + return error; +} + +/** + * + * Requires: lock held on both @sk and @peer_sk + * called by unix_stream_connect, unix_may_send + */ +int aa_unix_peer_perm(const struct cred *subj_cred, + struct aa_label *label, const char *op, u32 request, + struct sock *sk, struct sock *peer_sk, + struct aa_label *peer_label) +{ + struct unix_sock *peeru = unix_sk(peer_sk); + struct unix_sock *u = unix_sk(sk); + + AA_BUG(!label); + AA_BUG(!sk); + AA_BUG(!peer_sk); + + if (is_unix_fs(aa_unix_sk(peeru))) { + return unix_fs_perm(op, request, subj_cred, label, peeru); + } else if (is_unix_fs(aa_unix_sk(u))) { + return unix_fs_perm(op, request, subj_cred, label, u); + } else if (!unconfined(label)) { + struct aa_profile *profile; + DEFINE_AUDIT_SK(ad, op, subj_cred, sk); + + ad.net.peer_sk = peer_sk; + + return fn_for_each_confined(label, profile, + profile_peer_perm(profile, request, sk, + peer_sk, peer_label, &ad)); + } + + return 0; +} + +static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) +{ + if (unlikely(sk1 == sk2) || !sk2) { + unix_state_lock(sk1); + return; + } + if (sk1 < sk2) { + unix_state_lock(sk1); + unix_state_lock(sk2); + } else { + unix_state_lock(sk2); + unix_state_lock(sk1); + } +} + +static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) +{ + if (unlikely(sk1 == sk2) || !sk2) { + unix_state_unlock(sk1); + return; + } + unix_state_unlock(sk1); + unix_state_unlock(sk2); +} + +/* TODO: examine replacing double lock with cached addr */ + +int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, + const char *op, u32 request, struct file *file) +{ + struct socket *sock = (struct socket *) file->private_data; + struct sock *peer_sk = NULL; + u32 sk_req = request & ~NET_PEER_MASK; + bool is_sk_fs; + int error = 0; + + AA_BUG(!label); + AA_BUG(!sock); + AA_BUG(!sock->sk); + AA_BUG(sock->sk->sk_family != PF_UNIX); + + /* TODO: update sock label with new task label */ + unix_state_lock(sock->sk); + peer_sk = unix_peer(sock->sk); + if (peer_sk) + sock_hold(peer_sk); + + is_sk_fs = is_unix_fs(sock->sk); + if (is_sk_fs && peer_sk) + sk_req = request; + if (sk_req) + error = unix_label_sock_perm(subj_cred, label, op, sk_req, + sock); + unix_state_unlock(sock->sk); + if (!peer_sk) + return error; + + unix_state_double_lock(sock->sk, peer_sk); + if (!is_sk_fs && is_unix_fs(peer_sk)) { + last_error(error, + unix_fs_perm(op, request, subj_cred, label, + unix_sk(peer_sk))); + } else if (!is_sk_fs) { + struct aa_sk_ctx *pctx = aa_sock(peer_sk); + + last_error(error, + xcheck(aa_unix_peer_perm(subj_cred, label, op, + MAY_READ | MAY_WRITE, + sock->sk, peer_sk, NULL), + aa_unix_peer_perm(file->f_cred, pctx->label, op, + MAY_READ | MAY_WRITE, + peer_sk, sock->sk, label))); + } + unix_state_double_unlock(sock->sk, peer_sk); + + sock_put(peer_sk); + + return error; +} diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 3455d223879b..45afd585b52b 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -2387,6 +2387,11 @@ static struct aa_sfs_entry aa_sfs_entry_ns[] = { { } }; +static struct aa_sfs_entry aa_sfs_entry_dbus[] = { + AA_SFS_FILE_STRING("mask", "acquire send receive"), + { } +}; + static struct aa_sfs_entry aa_sfs_entry_query_label[] = { AA_SFS_FILE_STRING("perms", "allow deny audit quiet"), AA_SFS_FILE_BOOLEAN("data", 1), @@ -2409,6 +2414,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = { AA_SFS_DIR("domain", aa_sfs_entry_domain), AA_SFS_DIR("file", aa_sfs_entry_file), AA_SFS_DIR("network_v8", aa_sfs_entry_network), + AA_SFS_DIR("network", aa_sfs_entry_networkv9), AA_SFS_DIR("mount", aa_sfs_entry_mount), AA_SFS_DIR("namespaces", aa_sfs_entry_ns), AA_SFS_FILE_U64("capability", VFS_CAP_FLAGS_MASK), @@ -2416,6 +2422,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = { AA_SFS_DIR("caps", aa_sfs_entry_caps), AA_SFS_DIR("ptrace", aa_sfs_entry_ptrace), AA_SFS_DIR("signal", aa_sfs_entry_signal), + AA_SFS_DIR("dbus", aa_sfs_entry_dbus), AA_SFS_DIR("query", aa_sfs_entry_query), AA_SFS_DIR("io_uring", aa_sfs_entry_io_uring), { } diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 6ce6547301dc..d918b5dc6f59 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -14,6 +14,7 @@ #include #include +#include "include/af_unix.h" #include "include/apparmor.h" #include "include/audit.h" #include "include/cred.h" @@ -217,16 +218,17 @@ aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start, return state; } -static int __aa_path_perm(const char *op, const struct cred *subj_cred, - struct aa_profile *profile, const char *name, - u32 request, struct path_cond *cond, int flags, - struct aa_perms *perms) +int __aa_path_perm(const char *op, const struct cred *subj_cred, + struct aa_profile *profile, const char *name, + u32 request, struct path_cond *cond, int flags, + struct aa_perms *perms) { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); int e = 0; - if (profile_unconfined(profile)) + if (profile_unconfined(profile) || + ((flags & PATH_SOCK_COND) && !RULE_MEDIATES_NET(rules))) return 0; aa_str_perms(rules->file, rules->file->start[AA_CLASS_FILE], name, cond, perms); @@ -549,12 +551,12 @@ static int __file_sock_perm(const char *op, const struct cred *subj_cred, return 0; /* TODO: improve to skip profiles cached in flabel */ - error = aa_sock_file_perm(subj_cred, label, op, request, sock); + error = aa_sock_file_perm(subj_cred, label, op, request, file); if (denied) { /* TODO: improve to skip profiles checked above */ /* check every profile in file label to is cached */ last_error(error, aa_sock_file_perm(subj_cred, flabel, op, - request, sock)); + request, file)); } if (!error) update_file_ctx(file_ctx(file), label, request); diff --git a/security/apparmor/include/af_unix.h b/security/apparmor/include/af_unix.h new file mode 100644 index 000000000000..28390eec3204 --- /dev/null +++ b/security/apparmor/include/af_unix.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AppArmor security module + * + * This file contains AppArmor af_unix fine grained mediation + * + * Copyright 2023 Canonical Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + */ +#ifndef __AA_AF_UNIX_H + +#include + +#include "label.h" + +#define unix_addr(A) ((struct sockaddr_un *)(A)) +#define unix_addr_len(L) ((L) - sizeof(sa_family_t)) +#define unix_peer(sk) (unix_sk(sk)->peer) +#define is_unix_addr_abstract_name(B) ((B)[0] == 0) +#define is_unix_addr_anon(A, L) ((A) && unix_addr_len(L) <= 0) +#define is_unix_addr_fs(A, L) (!is_unix_addr_anon(A, L) && \ + !is_unix_addr_abstract_name(unix_addr(A)->sun_path)) + +#define is_unix_anonymous(U) (!unix_sk(U)->addr) +#define is_unix_fs(U) (!is_unix_anonymous(U) && \ + unix_sk(U)->addr->name->sun_path[0]) +#define is_unix_connected(S) ((S)->state == SS_CONNECTED) + + +int aa_unix_peer_perm(const struct cred *subj_cred, + struct aa_label *label, const char *op, u32 request, + struct sock *sk, struct sock *peer_sk, + struct aa_label *peer_label); +int aa_unix_label_sk_perm(const struct cred *subj_cred, + struct aa_label *label, const char *op, u32 request, + struct sock *sk); +int aa_unix_sock_perm(const char *op, u32 request, struct socket *sock); +int aa_unix_create_perm(struct aa_label *label, int family, int type, + int protocol); +int aa_unix_bind_perm(struct socket *sock, struct sockaddr *address, + int addrlen); +int aa_unix_connect_perm(struct socket *sock, struct sockaddr *address, + int addrlen); +int aa_unix_listen_perm(struct socket *sock, int backlog); +int aa_unix_accept_perm(struct socket *sock, struct socket *newsock); +int aa_unix_msg_perm(const char *op, u32 request, struct socket *sock, + struct msghdr *msg, int size); +int aa_unix_opt_perm(const char *op, u32 request, struct socket *sock, int level, + int optname); +int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, + const char *op, u32 request, struct file *file); + +#endif /* __AA_AF_UNIX_H */ diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h index dd12cba8139d..cc6e3df1bc62 100644 --- a/security/apparmor/include/apparmor.h +++ b/security/apparmor/include/apparmor.h @@ -28,6 +28,7 @@ #define AA_CLASS_SIGNAL 10 #define AA_CLASS_XMATCH 11 #define AA_CLASS_NET 14 +#define AA_CLASS_NETV9 15 #define AA_CLASS_LABEL 16 #define AA_CLASS_POSIX_MQUEUE 17 #define AA_CLASS_MODULE 19 diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index 06d9899098a6..eb371dffbce3 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -84,6 +84,10 @@ aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start, const char *name, struct path_cond *cond, struct aa_perms *perms); +int __aa_path_perm(const char *op, const struct cred *subj_cred, + struct aa_profile *profile, const char *name, + u32 request, struct path_cond *cond, int flags, + struct aa_perms *perms); int aa_path_perm(const char *op, const struct cred *subj_cred, struct aa_label *label, const struct path *path, int flags, u32 request, struct path_cond *cond); diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h index 9361ba000398..5089e937d550 100644 --- a/security/apparmor/include/net.h +++ b/security/apparmor/include/net.h @@ -56,7 +56,7 @@ static inline struct aa_sk_ctx *aa_sock(const struct sock *sk) return sk->sk_security + apparmor_blob_sizes.lbs_sock; } -#define DEFINE_AUDIT_NET(NAME, OP, SK, F, T, P) \ +#define DEFINE_AUDIT_NET(NAME, OP, CRED, SK, F, T, P) \ struct lsm_network_audit NAME ## _net = { .sk = (SK), \ .family = (F)}; \ DEFINE_AUDIT_DATA(NAME, \ @@ -65,11 +65,12 @@ static inline struct aa_sk_ctx *aa_sock(const struct sock *sk) AA_CLASS_NET, \ OP); \ NAME.common.u.net = &(NAME ## _net); \ + NAME.subj_cred = (CRED); \ NAME.net.type = (T); \ NAME.net.protocol = (P) -#define DEFINE_AUDIT_SK(NAME, OP, SK) \ - DEFINE_AUDIT_NET(NAME, OP, SK, (SK)->sk_family, (SK)->sk_type, \ +#define DEFINE_AUDIT_SK(NAME, OP, CRED, SK) \ + DEFINE_AUDIT_NET(NAME, OP, CRED, SK, (SK)->sk_family, (SK)->sk_type, \ (SK)->sk_protocol) @@ -81,10 +82,14 @@ struct aa_secmark { }; extern struct aa_sfs_entry aa_sfs_entry_network[]; +extern struct aa_sfs_entry aa_sfs_entry_networkv9[]; -/* passing in state returned by XXX_mediates(class) */ +int aa_do_perms(struct aa_profile *profile, struct aa_policydb *policy, + aa_state_t state, u32 request, struct aa_perms *p, + struct apparmor_audit_data *ad); +/* passing in state returned by XXX_mediates_AF() */ aa_state_t aa_match_to_prot(struct aa_policydb *policy, aa_state_t state, - u32 request, u16 family, int type, int protocol, + u32 request, u16 af, int type, int protocol, struct aa_perms **p, const char **info); void audit_net_cb(struct audit_buffer *ab, void *va); int aa_profile_af_perm(struct aa_profile *profile, @@ -105,7 +110,7 @@ int aa_sk_perm(const char *op, u32 request, struct sock *sk); int aa_sock_file_perm(const struct cred *subj_cred, struct aa_label *label, const char *op, u32 request, - struct socket *sock); + struct file *file); int apparmor_secmark_check(struct aa_label *label, char *op, u32 request, u32 secid, const struct sock *sk); diff --git a/security/apparmor/include/path.h b/security/apparmor/include/path.h index 343189903dba..8bb915d48dc7 100644 --- a/security/apparmor/include/path.h +++ b/security/apparmor/include/path.h @@ -13,6 +13,7 @@ enum path_flags { PATH_IS_DIR = 0x1, /* path is a directory */ + PATH_SOCK_COND = 0x2, PATH_CONNECT_PATH = 0x4, /* connect disconnected paths to / */ PATH_CHROOT_REL = 0x8, /* do path lookup relative to chroot */ PATH_CHROOT_NSCONNECT = 0x10, /* connect paths that are at ns root */ diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 73cb84ef58f2..5128c5414f04 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -304,14 +304,9 @@ static inline aa_state_t RULE_MEDIATES(struct aa_ruleset *rules, rules->policy->start[0], &class, 1); } -static inline aa_state_t RULE_MEDIATES_AF(struct aa_ruleset *rules, u16 AF) +static inline aa_state_t RULE_MEDIATES_NET(struct aa_ruleset *rules) { - aa_state_t state = RULE_MEDIATES(rules, AA_CLASS_NET); - __be16 be_af = cpu_to_be16(AF); - - if (!state) - return DFA_NOMATCH; - return aa_dfa_match_len(rules->policy->dfa, state, (char *) &be_af, 2); + return RULE_MEDIATES(rules, AA_CLASS_NET); } static inline aa_state_t ANY_RULE_MEDIATES(struct list_head *head, diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index f7b2d4bb1d97..0b4f7e2e4135 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -26,6 +26,7 @@ #include #include +#include "include/af_unix.h" #include "include/apparmor.h" #include "include/apparmorfs.h" #include "include/audit.h" @@ -1088,6 +1089,94 @@ static void apparmor_sk_clone_security(const struct sock *sk, new->peer = aa_get_label(ctx->peer); } +static int unix_connect_perm(const struct cred *cred, struct aa_label *label, + struct sock *sk, struct sock *peer_sk) +{ + struct aa_sk_ctx *peer_ctx = aa_sock(peer_sk); + int error; + + error = aa_unix_peer_perm(cred, label, OP_CONNECT, + (AA_MAY_CONNECT | AA_MAY_SEND | AA_MAY_RECEIVE), + sk, peer_sk, NULL); + if (!is_unix_fs(peer_sk)) { + last_error(error, + aa_unix_peer_perm(cred, + peer_ctx->label, OP_CONNECT, + (AA_MAY_ACCEPT | AA_MAY_SEND | AA_MAY_RECEIVE), + peer_sk, sk, label)); + } + + return error; +} + +static void unix_connect_peers(struct aa_sk_ctx *sk_ctx, + struct aa_sk_ctx *peer_ctx) +{ + /* Cross reference the peer labels for SO_PEERSEC */ + aa_put_label(peer_ctx->peer); + aa_put_label(sk_ctx->peer); + + peer_ctx->peer = aa_get_label(sk_ctx->label); + sk_ctx->peer = aa_get_label(peer_ctx->label); +} + +/** + * apparmor_unix_stream_connect - check perms before making unix domain conn + * + * peer is locked when this hook is called + */ +static int apparmor_unix_stream_connect(struct sock *sk, struct sock *peer_sk, + struct sock *newsk) +{ + struct aa_sk_ctx *sk_ctx = aa_sock(sk); + struct aa_sk_ctx *peer_ctx = aa_sock(peer_sk); + struct aa_sk_ctx *new_ctx = aa_sock(newsk); + struct aa_label *label; + int error; + + label = __begin_current_label_crit_section(); + error = unix_connect_perm(current_cred(), label, sk, peer_sk); + __end_current_label_crit_section(label); + + if (error) + return error; + + /* newsk doesn't go through post_create */ + AA_BUG(new_ctx->label); + new_ctx->label = aa_get_label(peer_ctx->label); + + /* Cross reference the peer labels for SO_PEERSEC */ + unix_connect_peers(sk_ctx, new_ctx); + + return 0; +} + +/** + * apparmor_unix_may_send - check perms before conn or sending unix dgrams + * + * sock and peer are locked when this hook is called + * + * called by: dgram_connect peer setup but path not copied to newsk + */ +static int apparmor_unix_may_send(struct socket *sock, struct socket *peer) +{ + struct aa_sk_ctx *peer_ctx = aa_sock(peer->sk); + struct aa_label *label; + int error; + + label = __begin_current_label_crit_section(); + error = xcheck(aa_unix_peer_perm(current_cred(), + label, OP_SENDMSG, AA_MAY_SEND, + sock->sk, peer->sk, NULL), + aa_unix_peer_perm(peer->file ? peer->file->f_cred : NULL, + peer_ctx->label, OP_SENDMSG, + AA_MAY_RECEIVE, + peer->sk, sock->sk, label)); + __end_current_label_crit_section(label); + + return error; +} + static int apparmor_socket_create(int family, int type, int protocol, int kern) { struct aa_label *label; @@ -1100,8 +1189,13 @@ static int apparmor_socket_create(int family, int type, int protocol, int kern) label = begin_current_label_crit_section(); if (!unconfined(label)) { - error = aa_af_perm(current_cred(), label, OP_CREATE, - AA_MAY_CREATE, family, type, protocol); + if (family == PF_UNIX) + error = aa_unix_create_perm(label, family, type, + protocol); + else + error = aa_af_perm(current_cred(), label, OP_CREATE, + AA_MAY_CREATE, family, type, + protocol); } end_current_label_crit_section(label); @@ -1143,6 +1237,34 @@ static int apparmor_socket_post_create(struct socket *sock, int family, return 0; } +static int apparmor_socket_socketpair(struct socket *socka, + struct socket *sockb) +{ + struct aa_sk_ctx *a_ctx = aa_sock(socka->sk); + struct aa_sk_ctx *b_ctx = aa_sock(sockb->sk); + struct aa_label *label; + int error = 0; + + aa_put_label(a_ctx->label); + aa_put_label(b_ctx->label); + + label = begin_current_label_crit_section(); + a_ctx->label = aa_get_label(label); + b_ctx->label = aa_get_label(label); + + if (socka->sk->sk_family == PF_UNIX) { + /* unix socket pairs by-pass unix_stream_connect */ + if (!error) + unix_connect_peers(a_ctx, b_ctx); + } + end_current_label_crit_section(label); + + return error; +} + +/** + * apparmor_socket_bind - check perms before bind addr to socket + */ static int apparmor_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) { @@ -1151,6 +1273,8 @@ static int apparmor_socket_bind(struct socket *sock, AA_BUG(!address); AA_BUG(in_interrupt()); + if (sock->sk->sk_family == PF_UNIX) + return aa_unix_bind_perm(sock, address, addrlen); return aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk); } @@ -1162,6 +1286,9 @@ static int apparmor_socket_connect(struct socket *sock, AA_BUG(!address); AA_BUG(in_interrupt()); + /* PF_UNIX goes through unix_stream_connect && unix_may_send */ + if (sock->sk->sk_family == PF_UNIX) + return 0; return aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk); } @@ -1171,6 +1298,8 @@ static int apparmor_socket_listen(struct socket *sock, int backlog) AA_BUG(!sock->sk); AA_BUG(in_interrupt()); + if (sock->sk->sk_family == PF_UNIX) + return aa_unix_listen_perm(sock, backlog); return aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk); } @@ -1185,6 +1314,8 @@ static int apparmor_socket_accept(struct socket *sock, struct socket *newsock) AA_BUG(!newsock); AA_BUG(in_interrupt()); + if (sock->sk->sk_family == PF_UNIX) + return aa_unix_accept_perm(sock, newsock); return aa_sk_perm(OP_ACCEPT, AA_MAY_ACCEPT, sock->sk); } @@ -1196,6 +1327,9 @@ static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock, AA_BUG(!msg); AA_BUG(in_interrupt()); + /* PF_UNIX goes through unix_may_send */ + if (sock->sk->sk_family == PF_UNIX) + return 0; return aa_sk_perm(op, request, sock->sk); } @@ -1218,6 +1352,8 @@ static int aa_sock_perm(const char *op, u32 request, struct socket *sock) AA_BUG(!sock->sk); AA_BUG(in_interrupt()); + if (sock->sk->sk_family == PF_UNIX) + return aa_unix_sock_perm(op, request, sock); return aa_sk_perm(op, request, sock->sk); } @@ -1239,6 +1375,8 @@ static int aa_sock_opt_perm(const char *op, u32 request, struct socket *sock, AA_BUG(!sock->sk); AA_BUG(in_interrupt()); + if (sock->sk->sk_family == PF_UNIX) + return aa_unix_opt_perm(op, request, sock, level, optname); return aa_sk_perm(op, request, sock->sk); } @@ -1292,14 +1430,18 @@ static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) #endif -static struct aa_label *sk_peer_label(struct sock *sk) +static struct aa_label *sk_peer_get_label(struct sock *sk) { struct aa_sk_ctx *ctx = aa_sock(sk); + struct aa_label *label = ERR_PTR(-ENOPROTOOPT); if (ctx->peer) - return ctx->peer; + return aa_get_label(ctx->peer); - return ERR_PTR(-ENOPROTOOPT); + if (sk->sk_family != PF_UNIX) + return ERR_PTR(-ENOPROTOOPT); + + return label; } /** @@ -1322,7 +1464,7 @@ static int apparmor_socket_getpeersec_stream(struct socket *sock, struct aa_label *peer; label = begin_current_label_crit_section(); - peer = sk_peer_label(sock->sk); + peer = sk_peer_get_label(sock->sk); if (IS_ERR(peer)) { error = PTR_ERR(peer); goto done; @@ -1333,7 +1475,7 @@ static int apparmor_socket_getpeersec_stream(struct socket *sock, /* don't include terminating \0 in slen, it breaks some apps */ if (slen < 0) { error = -ENOMEM; - goto done; + goto done_put; } if (slen > len) { error = -ERANGE; @@ -1345,6 +1487,9 @@ static int apparmor_socket_getpeersec_stream(struct socket *sock, done_len: if (copy_to_sockptr(optlen, &slen, sizeof(slen))) error = -EFAULT; + +done_put: + aa_put_label(peer); done: end_current_label_crit_section(label); kfree(name); @@ -1456,8 +1601,12 @@ static struct security_hook_list apparmor_hooks[] __ro_after_init = { LSM_HOOK_INIT(sk_free_security, apparmor_sk_free_security), LSM_HOOK_INIT(sk_clone_security, apparmor_sk_clone_security), + LSM_HOOK_INIT(unix_stream_connect, apparmor_unix_stream_connect), + LSM_HOOK_INIT(unix_may_send, apparmor_unix_may_send), + LSM_HOOK_INIT(socket_create, apparmor_socket_create), LSM_HOOK_INIT(socket_post_create, apparmor_socket_post_create), + LSM_HOOK_INIT(socket_socketpair, apparmor_socket_socketpair), LSM_HOOK_INIT(socket_bind, apparmor_socket_bind), LSM_HOOK_INIT(socket_connect, apparmor_socket_connect), LSM_HOOK_INIT(socket_listen, apparmor_socket_listen), diff --git a/security/apparmor/net.c b/security/apparmor/net.c index c76e0f5dcc93..a256a4664826 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -8,6 +8,7 @@ * Copyright 2009-2017 Canonical Ltd. */ +#include "include/af_unix.h" #include "include/apparmor.h" #include "include/audit.h" #include "include/cred.h" @@ -24,6 +25,12 @@ struct aa_sfs_entry aa_sfs_entry_network[] = { { } }; +struct aa_sfs_entry aa_sfs_entry_networkv9[] = { + AA_SFS_FILE_STRING("af_mask", AA_SFS_AF_MASK), + AA_SFS_FILE_BOOLEAN("af_unix", 1), + { } +}; + static const char * const net_mask_names[] = { "unknown", "send", @@ -66,6 +73,37 @@ static const char * const net_mask_names[] = { "unknown", }; +static void audit_unix_addr(struct audit_buffer *ab, const char *str, + struct sockaddr_un *addr, int addrlen) +{ + int len = unix_addr_len(addrlen); + + if (!addr || len <= 0) { + audit_log_format(ab, " %s=none", str); + } else if (addr->sun_path[0]) { + audit_log_format(ab, " %s=", str); + audit_log_untrustedstring(ab, addr->sun_path); + } else { + audit_log_format(ab, " %s=\"@", str); + if (audit_string_contains_control(&addr->sun_path[1], len - 1)) + audit_log_n_hex(ab, &addr->sun_path[1], len - 1); + else + audit_log_format(ab, "%.*s", len - 1, + &addr->sun_path[1]); + audit_log_format(ab, "\""); + } +} + +static void audit_unix_sk_addr(struct audit_buffer *ab, const char *str, + const struct sock *sk) +{ + const struct unix_sock *u = unix_sk(sk); + + if (u && u->addr) + audit_unix_addr(ab, str, u->addr->name, u->addr->len); + else + audit_unix_addr(ab, str, NULL, 0); +} /* audit callback for net specific fields */ void audit_net_cb(struct audit_buffer *ab, void *va) @@ -73,12 +111,12 @@ void audit_net_cb(struct audit_buffer *ab, void *va) struct common_audit_data *sa = va; struct apparmor_audit_data *ad = aad(sa); - if (address_family_names[sa->u.net->family]) + if (address_family_names[ad->common.u.net->family]) audit_log_format(ab, " family=\"%s\"", - address_family_names[sa->u.net->family]); + address_family_names[ad->common.u.net->family]); else audit_log_format(ab, " family=\"unknown(%d)\"", - sa->u.net->family); + ad->common.u.net->family); if (sock_type_names[ad->net.type]) audit_log_format(ab, " sock_type=\"%s\"", sock_type_names[ad->net.type]); @@ -98,6 +136,23 @@ void audit_net_cb(struct audit_buffer *ab, void *va) net_mask_names, NET_PERMS_MASK); } } + if (ad->common.u.net->family == PF_UNIX) { + if ((ad->request & ~NET_PEER_MASK) && ad->net.addr) + audit_unix_addr(ab, "addr", + unix_addr(ad->net.addr), + ad->net.addrlen); + else + audit_unix_sk_addr(ab, "addr", ad->common.u.net->sk); + if (ad->request & NET_PEER_MASK) { + if (ad->net.addr) + audit_unix_addr(ab, "peer_addr", + unix_addr(ad->net.addr), + ad->net.addrlen); + else + audit_unix_sk_addr(ab, "peer_addr", + ad->net.peer_sk); + } + } if (ad->peer) { audit_log_format(ab, " peer="); aa_label_xaudit(ab, labels_ns(ad->subj_label), ad->peer, @@ -106,9 +161,9 @@ void audit_net_cb(struct audit_buffer *ab, void *va) } /* standard permission lookup pattern - supports early bailout */ -static int do_perms(struct aa_profile *profile, struct aa_policydb *policy, - unsigned int state, u32 request, - struct aa_perms *p, struct apparmor_audit_data *ad) +int aa_do_perms(struct aa_profile *profile, struct aa_policydb *policy, + aa_state_t state, u32 request, + struct aa_perms *p, struct apparmor_audit_data *ad) { struct aa_perms perms; @@ -140,31 +195,53 @@ static struct aa_perms *early_match(struct aa_policydb *policy, return p; } -/* passing in state returned by PROFILE_MEDIATES_AF */ +static aa_state_t aa_dfa_match_be16(struct aa_dfa *dfa, aa_state_t state, + u16 data) +{ + __be16 buffer = cpu_to_be16(data); + + return aa_dfa_match_len(dfa, state, (char *) &buffer, 2); +} + +/** + * aa_match_to_prot - match the af, type, protocol triplet + * @policy: policy being matched + * @state: state to start in + * @request: permissions being requested, ignored if @p == NULL + * @af: socket address family + * @type: socket type + * @protocol: socket protocol + * @p: output - pointer to permission associated with match + * @info: output - pointer to string describing failure + * + * RETURNS: state match stopped in. + * + * If @(p) is assigned a value the returned state will be the + * corresponding state. Will not set @p on failure or if match completes + * only if an early match occurs + */ aa_state_t aa_match_to_prot(struct aa_policydb *policy, aa_state_t state, - u32 request, u16 family, int type, int protocol, + u32 request, u16 af, int type, int protocol, struct aa_perms **p, const char **info) { - __be16 buffer; - - buffer = cpu_to_be16(family); - state = aa_dfa_match_len(policy->dfa, state, (char *) &buffer, 2); + state = aa_dfa_match_be16(policy->dfa, state, (u16)af); if (!state) { *info = "failed af match"; - return DFA_NOMATCH; + return state; } - buffer = cpu_to_be16((u16)type); - state = aa_dfa_match_len(policy->dfa, state, (char *) &buffer, 2); - if (!state) + state = aa_dfa_match_be16(policy->dfa, state, (u16)type); + if (state) { + if (p) + *p = early_match(policy, state, request); + if (!p || !*p) { + state = aa_dfa_match_be16(policy->dfa, state, (u16)protocol); + if (!state) + *info = "failed protocol match"; + } + } else { *info = "failed type match"; - *p = early_match(policy, state, request); - if (!*p) { - buffer = cpu_to_be16((u16)protocol); - state = aa_dfa_match_len(policy->dfa, state, (char *) &buffer, - 2); - if (!state) - *info = "failed protocol match"; } + return state; } @@ -182,20 +259,21 @@ int aa_profile_af_perm(struct aa_profile *profile, AA_BUG(type < 0 || type >= SOCK_MAX); AA_BUG(profile_unconfined(profile)); - state = RULE_MEDIATES(rules, AA_CLASS_NET); + if (profile_unconfined(profile)) + return 0; + state = RULE_MEDIATES_NET(rules); if (!state) return 0; - state = aa_match_to_prot(rules->policy, state, request, family, type, protocol, &p, &ad->info); - return do_perms(profile, rules->policy, state, request, p, ad); + return aa_do_perms(profile, rules->policy, state, request, p, ad); } int aa_af_perm(const struct cred *subj_cred, struct aa_label *label, const char *op, u32 request, u16 family, int type, int protocol) { struct aa_profile *profile; - DEFINE_AUDIT_NET(ad, op, NULL, family, type, protocol); + DEFINE_AUDIT_NET(ad, op, subj_cred, NULL, family, type, protocol); return fn_for_each_confined(label, profile, aa_profile_af_perm(profile, &ad, request, family, @@ -215,7 +293,7 @@ static int aa_label_sk_perm(const struct cred *subj_cred, if (ctx->label != kernel_t && !unconfined(label)) { struct aa_profile *profile; - DEFINE_AUDIT_SK(ad, op, sk); + DEFINE_AUDIT_SK(ad, op, subj_cred, sk); ad.subj_cred = subj_cred; error = fn_for_each_confined(label, profile, @@ -243,12 +321,16 @@ int aa_sk_perm(const char *op, u32 request, struct sock *sk) int aa_sock_file_perm(const struct cred *subj_cred, struct aa_label *label, - const char *op, u32 request, struct socket *sock) + const char *op, u32 request, struct file *file) { + struct socket *sock = (struct socket *) file->private_data; + AA_BUG(!label); AA_BUG(!sock); AA_BUG(!sock->sk); + if (sock->sk->sk_family == PF_UNIX) + return aa_unix_file_perm(subj_cred, label, op, request, file); return aa_label_sk_perm(subj_cred, label, op, request, sock->sk); } @@ -313,7 +395,7 @@ int apparmor_secmark_check(struct aa_label *label, char *op, u32 request, u32 secid, const struct sock *sk) { struct aa_profile *profile; - DEFINE_AUDIT_SK(ad, op, sk); + DEFINE_AUDIT_SK(ad, op, NULL, sk); return fn_for_each_confined(label, profile, aa_secmark_perm(profile, request, secid, From dcd7a559411e8e1cd627ad20ac70faee77329380 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sat, 12 Oct 2024 04:43:34 -0700 Subject: [PATCH 0018/2411] apparmor: gate make fine grained unix mediation behind v9 abi Fine grained unix mediation in Ubuntu used ABI v7, and policy using this has propogated onto systems where fine grained unix mediation was not supported. The userspace policy compiler supports downgrading policy so the policy could be shared without changes. Unfortunately this had the side effect that policy was not updated for the none Ubuntu systems and enabling fine grained unix mediation on those systems means that a new kernel can break a system with existing policy that worked with the previous kernel. With fine grained af_unix mediation this regression can easily break the system causing boot to fail, as it affect unix socket files, non-file based unix sockets, and dbus communication. To aoid this regression move fine grained af_unix mediation behind a new abi. This means that the system's userspace and policy must be updated to support the new policy before it takes affect and dropping a new kernel on existing system will not result in a regression. The abi bump is done in such a way as existing policy can be activated on the system by changing the policy abi declaration and existing unix policy rules will apply. Policy then only needs to be incrementally updated, can even be backported to existing Ubuntu policy. Signed-off-by: John Johansen --- security/apparmor/af_unix.c | 14 +++++++------- security/apparmor/apparmorfs.c | 2 +- security/apparmor/file.c | 2 +- security/apparmor/include/policy.h | 18 +++++++++++++++++- 4 files changed, 26 insertions(+), 10 deletions(-) diff --git a/security/apparmor/af_unix.c b/security/apparmor/af_unix.c index ce7dc9d98fb1..ed4b34b88e38 100644 --- a/security/apparmor/af_unix.c +++ b/security/apparmor/af_unix.c @@ -197,7 +197,7 @@ static int profile_create_perm(struct aa_profile *profile, int family, AA_BUG(!profile); AA_BUG(profile_unconfined(profile)); - state = RULE_MEDIATES_NET(rules); + state = RULE_MEDIATES_v9NET(rules); if (state) { state = aa_match_to_prot(rules->policy, state, AA_MAY_CREATE, PF_UNIX, type, protocol, NULL, @@ -226,7 +226,7 @@ static int profile_sk_perm(struct aa_profile *profile, AA_BUG(is_unix_fs(sk)); AA_BUG(profile_unconfined(profile)); - state = RULE_MEDIATES_NET(rules); + state = RULE_MEDIATES_v9NET(rules); if (state) { state = match_to_sk(rules->policy, state, request, unix_sk(sk), &p, &ad->info); @@ -251,7 +251,7 @@ static int profile_bind_perm(struct aa_profile *profile, struct sock *sk, AA_BUG(!ad); AA_BUG(profile_unconfined(profile)); - state = RULE_MEDIATES_NET(rules); + state = RULE_MEDIATES_v9NET(rules); if (state) { /* bind for abstract socket */ state = match_to_local(rules->policy, state, AA_MAY_BIND, @@ -281,7 +281,7 @@ static int profile_listen_perm(struct aa_profile *profile, struct sock *sk, AA_BUG(!ad); AA_BUG(profile_unconfined(profile)); - state = RULE_MEDIATES_NET(rules); + state = RULE_MEDIATES_v9NET(rules); if (state) { __be16 b = cpu_to_be16(backlog); @@ -315,7 +315,7 @@ static int profile_accept_perm(struct aa_profile *profile, AA_BUG(!ad); AA_BUG(profile_unconfined(profile)); - state = RULE_MEDIATES_NET(rules); + state = RULE_MEDIATES_v9NET(rules); if (state) { state = match_to_sk(rules->policy, state, AA_MAY_ACCEPT, unix_sk(sk), &p, &ad->info); @@ -342,7 +342,7 @@ static int profile_opt_perm(struct aa_profile *profile, u32 request, AA_BUG(!ad); AA_BUG(profile_unconfined(profile)); - state = RULE_MEDIATES_NET(rules); + state = RULE_MEDIATES_v9NET(rules); if (state) { __be16 b = cpu_to_be16(optname); @@ -379,7 +379,7 @@ static int profile_peer_perm(struct aa_profile *profile, u32 request, AA_BUG(!ad); AA_BUG(is_unix_fs(peer_sk)); /* currently always calls unix_fs_perm */ - state = RULE_MEDIATES_NET(rules); + state = RULE_MEDIATES_v9NET(rules); if (state) { struct aa_sk_ctx *peer_ctx = aa_sock(peer_sk); struct aa_profile *peerp; diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 45afd585b52b..c5c756dda5cf 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -2414,7 +2414,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = { AA_SFS_DIR("domain", aa_sfs_entry_domain), AA_SFS_DIR("file", aa_sfs_entry_file), AA_SFS_DIR("network_v8", aa_sfs_entry_network), - AA_SFS_DIR("network", aa_sfs_entry_networkv9), + AA_SFS_DIR("network_v9", aa_sfs_entry_networkv9), AA_SFS_DIR("mount", aa_sfs_entry_mount), AA_SFS_DIR("namespaces", aa_sfs_entry_ns), AA_SFS_FILE_U64("capability", VFS_CAP_FLAGS_MASK), diff --git a/security/apparmor/file.c b/security/apparmor/file.c index d918b5dc6f59..85f89814af1e 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -228,7 +228,7 @@ int __aa_path_perm(const char *op, const struct cred *subj_cred, int e = 0; if (profile_unconfined(profile) || - ((flags & PATH_SOCK_COND) && !RULE_MEDIATES_NET(rules))) + ((flags & PATH_SOCK_COND) && !RULE_MEDIATES_v9NET(rules))) return 0; aa_str_perms(rules->file, rules->file->start[AA_CLASS_FILE], name, cond, perms); diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 5128c5414f04..a6ddf3b7478e 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -304,11 +304,27 @@ static inline aa_state_t RULE_MEDIATES(struct aa_ruleset *rules, rules->policy->start[0], &class, 1); } +static inline aa_state_t RULE_MEDIATES_v9NET(struct aa_ruleset *rules) +{ + return RULE_MEDIATES(rules, AA_CLASS_NETV9); +} + static inline aa_state_t RULE_MEDIATES_NET(struct aa_ruleset *rules) { - return RULE_MEDIATES(rules, AA_CLASS_NET); + /* can not use RULE_MEDIATE_v9AF here, because AF match fail + * can not be distiguished from class match fail, and we only + * fallback to checking older class on class match failure + */ + aa_state_t state = RULE_MEDIATES(rules, AA_CLASS_NETV9); + + /* fallback and check v7/8 if v9 is NOT mediated */ + if (!state) + state = RULE_MEDIATES(rules, AA_CLASS_NET); + + return state; } + static inline aa_state_t ANY_RULE_MEDIATES(struct list_head *head, unsigned char class) { From e6b087676954e36a7b1ed51249362bb499f8c1c2 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 17 Jan 2025 05:02:33 -0800 Subject: [PATCH 0019/2411] apparmor: fix dbus permission queries to v9 ABI dbus permission queries need to be synced with fine grained unix mediation to avoid potential policy regressions. To ensure that dbus queries don't result in a case where fine grained unix mediation is not being applied but dbus mediation is check the loaded policy support ABI and abort the query if policy doesn't support the v9 ABI. Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index c5c756dda5cf..0b0e24cd4868 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -632,6 +632,14 @@ static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms, } else if (rules->policy->dfa) { if (!RULE_MEDIATES(rules, *match_str)) return; /* no change to current perms */ + /* old user space does not correctly detect dbus mediation + * support so we may get dbus policy and requests when + * the abi doesn't support it. This can cause mediation + * regressions, so explicitly test for this situation. + */ + if (*match_str == AA_CLASS_DBUS && + !RULE_MEDIATES_v9NET(rules)) + return; /* no change to current perms */ state = aa_dfa_match_len(rules->policy->dfa, rules->policy->start[0], match_str, match_len); From 509f8cb2fff927eeb5eb0ebdd410ec6f40430173 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 20 Jan 2025 06:12:01 -0700 Subject: [PATCH 0020/2411] apparmor: Fix checking address of an array in accum_label_info() clang warns: security/apparmor/label.c:206:15: error: address of array 'new->vec' will always evaluate to 'true' [-Werror,-Wpointer-bool-conversion] 206 | AA_BUG(!new->vec); | ~~~~~~^~~ The address of this array can never be NULL because it is not at the beginning of a structure. Convert the assertion to check that the new pointer is not NULL. Fixes: de4754c801f4 ("apparmor: carry mediation check on label") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501191802.bDp2voTJ-lkp@intel.com/ Signed-off-by: Nathan Chancellor Signed-off-by: John Johansen --- security/apparmor/label.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index afded9996f61..79be2d3d604b 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -203,7 +203,7 @@ static void accum_label_info(struct aa_label *new) long u = FLAG_UNCONFINED; int i; - AA_BUG(!new->vec); + AA_BUG(!new); /* size == 1 is a profile and flags must be set as part of creation */ if (new->size == 1) From aa904fa1182b1a4470bb082f6cddacc1dc4e8032 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 21 Jan 2025 10:44:44 +0800 Subject: [PATCH 0021/2411] apparmor: Modify mismatched function name No functional modification involved. security/apparmor/file.c:184: warning: expecting prototype for aa_lookup_fperms(). Prototype was for aa_lookup_condperms() instead. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=13605 Signed-off-by: Jiapeng Chong Signed-off-by: John Johansen --- security/apparmor/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 85f89814af1e..f113eedbc208 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -169,7 +169,7 @@ static int path_name(const char *op, const struct cred *subj_cred, struct aa_perms default_perms = {}; /** - * aa_lookup_fperms - convert dfa compressed perms to internal perms + * aa_lookup_condperms - convert dfa compressed perms to internal perms * @subj_uid: uid to use for subject owner test * @rules: the aa_policydb to lookup perms for (NOT NULL) * @state: state in dfa From 04fe43104e4ed103a8b55c21d1bc354fac409421 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 21 Jan 2025 10:44:43 +0800 Subject: [PATCH 0022/2411] apparmor: Modify mismatched function name No functional modification involved. security/apparmor/lib.c:93: warning: expecting prototype for aa_mask_to_str(). Prototype was for val_mask_to_str() instead. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=13606 Signed-off-by: Jiapeng Chong Signed-off-by: John Johansen --- security/apparmor/lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index dd5dcbe5daf7..325f26f39a63 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -82,7 +82,7 @@ int aa_parse_debug_params(const char *str) } /** - * aa_mask_to_str - convert a perm mask to its short string + * val_mask_to_str - convert a perm mask to its short string * @str: character buffer to store string in (at least 10 characters) * @str_size: size of the @str buffer * @chrs: NUL-terminated character buffer of permission characters From aabbe6f908d8264cd8aeeef8141665f71668ef36 Mon Sep 17 00:00:00 2001 From: Tanya Agarwal Date: Fri, 24 Jan 2025 00:51:00 +0530 Subject: [PATCH 0023/2411] apparmor: fix typos and spelling errors Fix typos and spelling errors in apparmor module comments that were identified using the codespell tool. No functional changes - documentation only. Signed-off-by: Tanya Agarwal Reviewed-by: Mimi Zohar Ryan Lee Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 6 +++--- security/apparmor/domain.c | 4 ++-- security/apparmor/label.c | 2 +- security/apparmor/lsm.c | 2 +- security/apparmor/policy.c | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 0b0e24cd4868..ecf22251c228 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -43,7 +43,7 @@ * The interface is split into two main components based on their function * a securityfs component: * used for static files that are always available, and which allows - * userspace to specificy the location of the security filesystem. + * userspace to specify the location of the security filesystem. * * fns and data are prefixed with * aa_sfs_ @@ -204,7 +204,7 @@ static struct file_system_type aafs_ops = { /** * __aafs_setup_d_inode - basic inode setup for apparmorfs * @dir: parent directory for the dentry - * @dentry: dentry we are seting the inode up for + * @dentry: dentry we are setting the inode up for * @mode: permissions the file should have * @data: data to store on inode.i_private, available in open() * @link: if symlink, symlink target string @@ -2253,7 +2253,7 @@ static void *p_next(struct seq_file *f, void *p, loff_t *pos) /** * p_stop - stop depth first traversal * @f: seq_file we are filling - * @p: the last profile writen + * @p: the last profile written * * Release all locking done by p_start/p_next on namespace tree */ diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index b9c299097372..a7447d976a31 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -762,7 +762,7 @@ static int profile_onexec(const struct cred *subj_cred, /* change_profile on exec already granted */ /* * NOTE: Domain transitions from unconfined are allowed - * even when no_new_privs is set because this aways results + * even when no_new_privs is set because this always results * in a further reduction of permissions. */ return 0; @@ -933,7 +933,7 @@ int apparmor_bprm_creds_for_exec(struct linux_binprm *bprm) * * NOTE: Domain transitions from unconfined and to stacked * subsets are allowed even when no_new_privs is set because this - * aways results in a further reduction of permissions. + * always results in a further reduction of permissions. */ if ((bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS) && !unconfined(label) && diff --git a/security/apparmor/label.c b/security/apparmor/label.c index 79be2d3d604b..913678f199c3 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -1461,7 +1461,7 @@ bool aa_update_label_name(struct aa_ns *ns, struct aa_label *label, gfp_t gfp) /* * cached label name is present and visible - * @label->hname only exists if label is namespace hierachical + * @label->hname only exists if label is namespace hierarchical */ static inline bool use_label_hname(struct aa_ns *ns, struct aa_label *label, int flags) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 0b4f7e2e4135..74e2f31ac2d8 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -2173,7 +2173,7 @@ static int __init alloc_buffers(void) * two should be enough, with more CPUs it is possible that more * buffers will be used simultaneously. The preallocated pool may grow. * This preallocation has also the side-effect that AppArmor will be - * disabled early at boot if aa_g_path_max is extremly high. + * disabled early at boot if aa_g_path_max is extremely high. */ if (num_online_cpus() > 1) num = 4 + RESERVE_COUNT; diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 04222eddd890..1f532fe48a1c 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -488,7 +488,7 @@ static struct aa_policy *__lookup_parent(struct aa_ns *ns, } /** - * __create_missing_ancestors - create place holders for missing ancestores + * __create_missing_ancestors - create place holders for missing ancestors * @ns: namespace to lookup profile in (NOT NULL) * @hname: hierarchical profile name to find parent of (NOT NULL) * @gfp: type of allocation. @@ -1095,7 +1095,7 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label, goto out; /* ensure that profiles are all for the same ns - * TODO: update locking to remove this constaint. All profiles in + * TODO: update locking to remove this constraint. All profiles in * the load set must succeed as a set or the load will * fail. Sort ent list and take ns locks in hierarchy order */ From 67e370aa7f968f6a4f3573ed61a77b36d1b26475 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Mon, 27 Jan 2025 21:54:04 +0100 Subject: [PATCH 0024/2411] apparmor: use the condition in AA_BUG_FMT even with debug disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This follows the established practice and fixes a build failure for me: security/apparmor/file.c: In function ‘__file_sock_perm’: security/apparmor/file.c:544:24: error: unused variable ‘sock’ [-Werror=unused-variable] 544 | struct socket *sock = (struct socket *) file->private_data; | ^~~~ Signed-off-by: Mateusz Guzik Signed-off-by: John Johansen --- security/apparmor/include/lib.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index 256f4577c653..d947998262b2 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -60,7 +60,11 @@ do { \ #define AA_BUG_FMT(X, fmt, args...) \ WARN((X), "AppArmor WARN %s: (" #X "): " fmt, __func__, ##args) #else -#define AA_BUG_FMT(X, fmt, args...) no_printk(fmt, ##args) +#define AA_BUG_FMT(X, fmt, args...) \ + do { \ + BUILD_BUG_ON_INVALID(X); \ + no_printk(fmt, ##args); \ + } while (0) #endif int aa_parse_debug_params(const char *str); From 3e45553acb14692519db853e4b5be35b45e46ad0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 20 Jan 2025 06:21:14 -0700 Subject: [PATCH 0025/2411] apparmor: Remove unused variable 'sock' in __file_sock_perm() When CONFIG_SECURITY_APPARMOR_DEBUG_ASSERTS is disabled, there is a warning that sock is unused: security/apparmor/file.c: In function '__file_sock_perm': security/apparmor/file.c:544:24: warning: unused variable 'sock' [-Wunused-variable] 544 | struct socket *sock = (struct socket *) file->private_data; | ^~~~ sock was moved into aa_sock_file_perm(), where the same check is present, so remove sock and the assertion from __file_sock_perm() to fix the warning. Fixes: c05e705812d1 ("apparmor: add fine grained af_unix mediation") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501190757.myuLxLyL-lkp@intel.com/ Signed-off-by: Nathan Chancellor Signed-off-by: John Johansen --- security/apparmor/file.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/security/apparmor/file.c b/security/apparmor/file.c index f113eedbc208..5c984792cbf0 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -541,11 +541,8 @@ static int __file_sock_perm(const char *op, const struct cred *subj_cred, struct aa_label *flabel, struct file *file, u32 request, u32 denied) { - struct socket *sock = (struct socket *) file->private_data; int error; - AA_BUG(!sock); - /* revalidation due to label out of date. No revocation at this time */ if (!denied && aa_label_is_subset(flabel, label)) return 0; From 7efa84b5cdd6d473c7e80912638fca9d7167f202 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 4 Apr 2025 15:10:02 -0700 Subject: [PATCH 0026/2411] compiler-gcc.h: Introduce __diag_GCC_all It is not possible disabling a diagnostic for all versions of GCC without hard coding the minimum supported version at the site, as the GCC specific macros require a minimum version to disable the warning for: __diag_ignore(GCC, 5, ...); __diag_ignore_all() does not solve this issue because it disables a diagnostic for all versions of both GCC and clang, not just one or the other. Introduce __diag_GCC_all so that developers can write __diag_ignore(GCC, all, ...); to disable a particular diagnostic for all versions of GCC, while not affecting clang. Closes: https://lore.kernel.org/r/CAHk-=wgfX9nBGE0Ap9GjhOy7Mn=RSy=rx0MvqfYFFDx31KJXqQ@mail.gmail.com Signed-off-by: Nathan Chancellor Tested-by: Andy Shevchenko Reviewed-by: Petr Mladek Link: https://patch.msgid.link/20250404-vsprintf-convert-pragmas-to-__diag-v1-1-5d6c5c55b2bd@kernel.org Signed-off-by: Petr Mladek --- include/linux/compiler-gcc.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index c9b58188ec61..c75a222880f9 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -127,6 +127,8 @@ #define __diag_GCC_8(s) #endif +#define __diag_GCC_all(s) __diag(s) + #define __diag_ignore_all(option, comment) \ __diag(__diag_GCC_ignore option) From b5960a06b90eeba147c50c2d14de57b923371651 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 4 Apr 2025 15:10:03 -0700 Subject: [PATCH 0027/2411] vsprintf: Use __diag macros to disable '-Wsuggest-attribute=format' The GCC specific warning '-Wsuggest-attribute=format' is disabled around va_format() using raw #pragma statements, which includes an '#ifndef __clang__' to avoid a warning about an unknown warning option from clang (which recognizes '#pragma GCC' for compatibility reasons): lib/vsprintf.c:1703:32: error: unknown warning group '-Wsuggest-attribute=format', ignored [-Werror,-Wunknown-warning-option] 1703 | #pragma GCC diagnostic ignored "-Wsuggest-attribute=format" | ^ While the current solution works, it is not visually appealing. The kernel already has some infrastructure that wraps these #pragma statements to give more specific control over diagnostics without needing #ifdef blocks for different compilers. Convert the existing statements over to the __diag macros. Closes: https://lore.kernel.org/r/CAHk-=wgfX9nBGE0Ap9GjhOy7Mn=RSy=rx0MvqfYFFDx31KJXqQ@mail.gmail.com Signed-off-by: Nathan Chancellor Tested-by: Andy Shevchenko Reviewed-by: Petr Mladek Link: https://patch.msgid.link/20250404-vsprintf-convert-pragmas-to-__diag-v1-2-5d6c5c55b2bd@kernel.org Signed-off-by: Petr Mladek --- lib/vsprintf.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index a2195bc81723..8a6cdee0d4ad 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1699,10 +1699,9 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec, return buf; } -#pragma GCC diagnostic push -#ifndef __clang__ -#pragma GCC diagnostic ignored "-Wsuggest-attribute=format" -#endif +__diag_push(); +__diag_ignore(GCC, all, "-Wsuggest-attribute=format", + "Not a valid __printf() conversion candidate."); static char *va_format(char *buf, char *end, struct va_format *va_fmt, struct printf_spec spec) { @@ -1717,7 +1716,7 @@ static char *va_format(char *buf, char *end, struct va_format *va_fmt, return buf; } -#pragma GCC diagnostic pop +__diag_pop(); static noinline_for_stack char *uuid_string(char *buf, char *end, const u8 *addr, From 2b270e2f43d7498ba00117c60d196435983d83d7 Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Fri, 18 Apr 2025 04:52:50 +0000 Subject: [PATCH 0028/2411] security/apparmor: use kfree_sensitive() in unpack_secmark() The unpack_secmark() function currently uses kfree() to release memory allocated for secmark structures and their labels. However, if a failure occurs after partially parsing secmark, sensitive data may remain in memory, posing a security risk. To mitigate this, replace kfree() with kfree_sensitive() for freeing secmark structures and their labels, aligning with the approach used in free_ruleset(). I am submitting this as an RFC to seek freedback on whether this change is appropriate and aligns with the subsystem's expectations. If confirmed to be helpful, I will send a formal patch. Signed-off-by: Zilin Guan Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 73139189df0f..459eb878c824 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -599,8 +599,8 @@ static bool unpack_secmark(struct aa_ext *e, struct aa_ruleset *rules) fail: if (rules->secmark) { for (i = 0; i < size; i++) - kfree(rules->secmark[i].label); - kfree(rules->secmark); + kfree_sensitive(rules->secmark[i].label); + kfree_sensitive(rules->secmark); rules->secmark_count = 0; rules->secmark = NULL; } From e9ed1eb8f6217e53843d82ecf2d50f8d1a93e77c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 28 Apr 2025 12:04:30 -0700 Subject: [PATCH 0029/2411] apparmor: use SHA-256 library API instead of crypto_shash API This user of SHA-256 does not support any other algorithm, so the crypto_shash abstraction provides no value. Just use the SHA-256 library API instead, which is much simpler and easier to use. Signed-off-by: Eric Biggers Signed-off-by: John Johansen --- security/apparmor/Kconfig | 3 +- security/apparmor/crypto.c | 85 ++++++-------------------------------- 2 files changed, 13 insertions(+), 75 deletions(-) diff --git a/security/apparmor/Kconfig b/security/apparmor/Kconfig index 64cc3044a42c..1e3bd44643da 100644 --- a/security/apparmor/Kconfig +++ b/security/apparmor/Kconfig @@ -59,8 +59,7 @@ config SECURITY_APPARMOR_INTROSPECT_POLICY config SECURITY_APPARMOR_HASH bool "Enable introspection of sha256 hashes for loaded profiles" depends on SECURITY_APPARMOR_INTROSPECT_POLICY - select CRYPTO - select CRYPTO_SHA256 + select CRYPTO_LIB_SHA256 default y help This option selects whether introspection of loaded policy diff --git a/security/apparmor/crypto.c b/security/apparmor/crypto.c index aad486b2fca6..40e17e153f1e 100644 --- a/security/apparmor/crypto.c +++ b/security/apparmor/crypto.c @@ -11,113 +11,52 @@ * it should be. */ -#include +#include #include "include/apparmor.h" #include "include/crypto.h" -static unsigned int apparmor_hash_size; - -static struct crypto_shash *apparmor_tfm; - unsigned int aa_hash_size(void) { - return apparmor_hash_size; + return SHA256_DIGEST_SIZE; } char *aa_calc_hash(void *data, size_t len) { - SHASH_DESC_ON_STACK(desc, apparmor_tfm); char *hash; - int error; - if (!apparmor_tfm) - return NULL; - - hash = kzalloc(apparmor_hash_size, GFP_KERNEL); + hash = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); if (!hash) return ERR_PTR(-ENOMEM); - desc->tfm = apparmor_tfm; - - error = crypto_shash_init(desc); - if (error) - goto fail; - error = crypto_shash_update(desc, (u8 *) data, len); - if (error) - goto fail; - error = crypto_shash_final(desc, hash); - if (error) - goto fail; - + sha256(data, len, hash); return hash; - -fail: - kfree(hash); - - return ERR_PTR(error); } int aa_calc_profile_hash(struct aa_profile *profile, u32 version, void *start, size_t len) { - SHASH_DESC_ON_STACK(desc, apparmor_tfm); - int error; + struct sha256_state state; __le32 le32_version = cpu_to_le32(version); if (!aa_g_hash_policy) return 0; - if (!apparmor_tfm) - return 0; - - profile->hash = kzalloc(apparmor_hash_size, GFP_KERNEL); + profile->hash = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); if (!profile->hash) return -ENOMEM; - desc->tfm = apparmor_tfm; - - error = crypto_shash_init(desc); - if (error) - goto fail; - error = crypto_shash_update(desc, (u8 *) &le32_version, 4); - if (error) - goto fail; - error = crypto_shash_update(desc, (u8 *) start, len); - if (error) - goto fail; - error = crypto_shash_final(desc, profile->hash); - if (error) - goto fail; - + sha256_init(&state); + sha256_update(&state, (u8 *)&le32_version, 4); + sha256_update(&state, (u8 *)start, len); + sha256_final(&state, profile->hash); return 0; - -fail: - kfree(profile->hash); - profile->hash = NULL; - - return error; } static int __init init_profile_hash(void) { - struct crypto_shash *tfm; - - if (!apparmor_initialized) - return 0; - - tfm = crypto_alloc_shash("sha256", 0, 0); - if (IS_ERR(tfm)) { - int error = PTR_ERR(tfm); - AA_ERROR("failed to setup profile sha256 hashing: %d\n", error); - return error; - } - apparmor_tfm = tfm; - apparmor_hash_size = crypto_shash_digestsize(apparmor_tfm); - - aa_info_message("AppArmor sha256 policy hashing enabled"); - + if (apparmor_initialized) + aa_info_message("AppArmor sha256 policy hashing enabled"); return 0; } - late_initcall(init_profile_hash); From 44fbeeb3087ee2ddce39d261d0a26688c2e22742 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 17 May 2025 01:49:20 -0700 Subject: [PATCH 0030/2411] apparmor: Fix incorrect profile->signal range check The check on profile->signal is always false, the value can never be less than 1 *and* greater than MAXMAPPED_SIG. Fix this by replacing the logical operator && with ||. Fixes: 84c455decf27 ("apparmor: add support for profiles to define the kill signal") Signed-off-by: Colin Ian King Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 459eb878c824..588dd1d5d364 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -919,7 +919,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) /* optional */ (void) aa_unpack_u32(e, &profile->signal, "kill"); - if (profile->signal < 1 && profile->signal > MAXMAPPED_SIG) { + if (profile->signal < 1 || profile->signal > MAXMAPPED_SIG) { info = "profile kill.signal invalid value"; goto fail; } From a949b46e7d82ef0fed09aa0590442156d44d39b1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 2 May 2025 21:49:19 -0700 Subject: [PATCH 0031/2411] apparmor: fix some kernel-doc issues in header files Fix kernel-doc warnings in apparmor header files as reported by scripts/kernel-doc: cred.h:128: warning: expecting prototype for end_label_crit_section(). Prototype was for end_current_label_crit_section() instead file.h:108: warning: expecting prototype for aa_map_file_perms(). Prototype was for aa_map_file_to_perms() instead lib.h:159: warning: Function parameter or struct member 'hname' not described in 'basename' lib.h:159: warning: Excess function parameter 'name' description in 'basename' match.h:21: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * The format used for transition tables is based on the GNU flex table perms.h:109: warning: Function parameter or struct member 'accum' not described in 'aa_perms_accum_raw' perms.h:109: warning: Function parameter or struct member 'addend' not described in 'aa_perms_accum_raw' perms.h:136: warning: Function parameter or struct member 'accum' not described in 'aa_perms_accum' perms.h:136: warning: Function parameter or struct member 'addend' not described in 'aa_perms_accum' Signed-off-by: Randy Dunlap Reviewed-by: Ryan Lee Cc: John Johansen Cc: John Johansen Cc: apparmor@lists.ubuntu.com Cc: linux-security-module@vger.kernel.org Cc: Paul Moore Cc: James Morris Cc: "Serge E. Hallyn" Signed-off-by: John Johansen --- security/apparmor/include/cred.h | 2 +- security/apparmor/include/file.h | 2 +- security/apparmor/include/lib.h | 2 +- security/apparmor/include/match.h | 2 +- security/apparmor/include/perms.h | 8 ++++---- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/security/apparmor/include/cred.h b/security/apparmor/include/cred.h index 7265d2f81dd5..674af3175905 100644 --- a/security/apparmor/include/cred.h +++ b/security/apparmor/include/cred.h @@ -117,7 +117,7 @@ static inline struct aa_label *aa_get_current_label(void) #define __end_current_label_crit_section(X) end_current_label_crit_section(X) /** - * end_label_crit_section - put a reference found with begin_current_label.. + * end_current_label_crit_section - put a reference found with begin_current_label.. * @label: label reference to put * * Should only be used with a reference obtained with diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index eb371dffbce3..ef60f99bc5ae 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -104,7 +104,7 @@ void aa_inherit_files(const struct cred *cred, struct files_struct *files); /** - * aa_map_file_perms - map file flags to AppArmor permissions + * aa_map_file_to_perms - map file flags to AppArmor permissions * @file: open file to map flags to AppArmor permissions * * Returns: apparmor permission set for the file diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index d947998262b2..e60bfa410e55 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -170,7 +170,7 @@ struct aa_policy { /** * basename - find the last component of an hname - * @name: hname to find the base profile name component of (NOT NULL) + * @hname: hname to find the base profile name component of (NOT NULL) * * Returns: the tail (base profile name) name component of an hname */ diff --git a/security/apparmor/include/match.h b/security/apparmor/include/match.h index 536ce3abd598..01a703fef8e1 100644 --- a/security/apparmor/include/match.h +++ b/security/apparmor/include/match.h @@ -17,7 +17,7 @@ #define DFA_START 1 -/** +/* * The format used for transition tables is based on the GNU flex table * file format (--tables-file option; see Table File Format in the flex * info pages and the flex sources for documentation). The magic number diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h index bbaa7d39a39a..37a3781b99a0 100644 --- a/security/apparmor/include/perms.h +++ b/security/apparmor/include/perms.h @@ -101,8 +101,8 @@ extern struct aa_perms allperms; /** * aa_perms_accum_raw - accumulate perms with out masking off overlapping perms - * @accum - perms struct to accumulate into - * @addend - perms struct to add to @accum + * @accum: perms struct to accumulate into + * @addend: perms struct to add to @accum */ static inline void aa_perms_accum_raw(struct aa_perms *accum, struct aa_perms *addend) @@ -128,8 +128,8 @@ static inline void aa_perms_accum_raw(struct aa_perms *accum, /** * aa_perms_accum - accumulate perms, masking off overlapping perms - * @accum - perms struct to accumulate into - * @addend - perms struct to add to @accum + * @accum: perms struct to accumulate into + * @addend: perms struct to add to @accum */ static inline void aa_perms_accum(struct aa_perms *accum, struct aa_perms *addend) From 6c055e62560b958354625604293652753d82bcae Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Thu, 1 May 2025 12:54:38 -0700 Subject: [PATCH 0032/2411] apparmor: ensure WB_HISTORY_SIZE value is a power of 2 WB_HISTORY_SIZE was defined to be a value not a power of 2, despite a comment in the declaration of struct match_workbuf stating it is and a modular arithmetic usage in the inc_wb_pos macro assuming that it is. Bump WB_HISTORY_SIZE's value up to 32 and add a BUILD_BUG_ON_NOT_POWER_OF_2 line to ensure that any future changes to the value of WB_HISTORY_SIZE respect this requirement. Fixes: 136db994852a ("apparmor: increase left match history buffer size") Signed-off-by: Ryan Lee Signed-off-by: John Johansen --- security/apparmor/include/match.h | 3 ++- security/apparmor/match.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/security/apparmor/include/match.h b/security/apparmor/include/match.h index 01a703fef8e1..21e049b40824 100644 --- a/security/apparmor/include/match.h +++ b/security/apparmor/include/match.h @@ -137,7 +137,8 @@ aa_state_t aa_dfa_matchn_until(struct aa_dfa *dfa, aa_state_t start, void aa_dfa_free_kref(struct kref *kref); -#define WB_HISTORY_SIZE 24 +/* This needs to be a power of 2 */ +#define WB_HISTORY_SIZE 32 struct match_workbuf { unsigned int count; unsigned int pos; diff --git a/security/apparmor/match.c b/security/apparmor/match.c index f2d9c57f8794..1ceabde550f2 100644 --- a/security/apparmor/match.c +++ b/security/apparmor/match.c @@ -681,6 +681,7 @@ aa_state_t aa_dfa_matchn_until(struct aa_dfa *dfa, aa_state_t start, #define inc_wb_pos(wb) \ do { \ + BUILD_BUG_ON_NOT_POWER_OF_2(WB_HISTORY_SIZE); \ wb->pos = (wb->pos + 1) & (WB_HISTORY_SIZE - 1); \ wb->len = (wb->len + 1) & (WB_HISTORY_SIZE - 1); \ } while (0) From a88db916b8c77552f49f7d9f8744095ea01a268f Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Thu, 1 May 2025 12:54:39 -0700 Subject: [PATCH 0033/2411] apparmor: fix loop detection used in conflicting attachment resolution Conflicting attachment resolution is based on the number of states traversed to reach an accepting state in the attachment DFA, accounting for DFA loops traversed during the matching process. However, the loop counting logic had multiple bugs: - The inc_wb_pos macro increments both position and length, but length is supposed to saturate upon hitting buffer capacity, instead of wrapping around. - If no revisited state is found when traversing the history, is_loop would still return true, as if there was a loop found the length of the history buffer, instead of returning false and signalling that no loop was found. As a result, the adjustment step of aa_dfa_leftmatch would sometimes produce negative counts with loop- free DFAs that traversed enough states. - The iteration in the is_loop for loop is supposed to stop before i = wb->len, so the conditional should be < instead of <=. This patch fixes the above bugs as well as the following nits: - The count and size fields in struct match_workbuf were not used, so they can be removed. - The history buffer in match_workbuf semantically stores aa_state_t and not unsigned ints, even if aa_state_t is currently unsigned int. - The local variables in is_loop are counters, and thus should be unsigned ints instead of aa_state_t's. Fixes: 21f606610502 ("apparmor: improve overlapping domain attachment resolution") Signed-off-by: Ryan Lee Co-developed-by: John Johansen Signed-off-by: John Johansen --- security/apparmor/include/match.h | 5 +---- security/apparmor/match.c | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/security/apparmor/include/match.h b/security/apparmor/include/match.h index 21e049b40824..1fbe82f5021b 100644 --- a/security/apparmor/include/match.h +++ b/security/apparmor/include/match.h @@ -140,15 +140,12 @@ void aa_dfa_free_kref(struct kref *kref); /* This needs to be a power of 2 */ #define WB_HISTORY_SIZE 32 struct match_workbuf { - unsigned int count; unsigned int pos; unsigned int len; - unsigned int size; /* power of 2, same as history size */ - unsigned int history[WB_HISTORY_SIZE]; + aa_state_t history[WB_HISTORY_SIZE]; }; #define DEFINE_MATCH_WB(N) \ struct match_workbuf N = { \ - .count = 0, \ .pos = 0, \ .len = 0, \ } diff --git a/security/apparmor/match.c b/security/apparmor/match.c index 1ceabde550f2..c5a91600842a 100644 --- a/security/apparmor/match.c +++ b/security/apparmor/match.c @@ -679,35 +679,35 @@ aa_state_t aa_dfa_matchn_until(struct aa_dfa *dfa, aa_state_t start, return state; } -#define inc_wb_pos(wb) \ -do { \ +#define inc_wb_pos(wb) \ +do { \ BUILD_BUG_ON_NOT_POWER_OF_2(WB_HISTORY_SIZE); \ wb->pos = (wb->pos + 1) & (WB_HISTORY_SIZE - 1); \ - wb->len = (wb->len + 1) & (WB_HISTORY_SIZE - 1); \ + wb->len = (wb->len + 1) > WB_HISTORY_SIZE ? WB_HISTORY_SIZE : \ + wb->len + 1; \ } while (0) /* For DFAs that don't support extended tagging of states */ +/* adjust is only set if is_loop returns true */ static bool is_loop(struct match_workbuf *wb, aa_state_t state, unsigned int *adjust) { - aa_state_t pos = wb->pos; - aa_state_t i; + int pos = wb->pos; + int i; if (wb->history[pos] < state) return false; - for (i = 0; i <= wb->len; i++) { + for (i = 0; i < wb->len; i++) { if (wb->history[pos] == state) { *adjust = i; return true; } - if (pos == 0) - pos = WB_HISTORY_SIZE; - pos--; + /* -1 wraps to WB_HISTORY_SIZE - 1 */ + pos = (pos - 1) & (WB_HISTORY_SIZE - 1); } - *adjust = i; - return true; + return false; } static aa_state_t leftmatch_fb(struct aa_dfa *dfa, aa_state_t start, From 95ff11895846eec76a19351a109fbabbdd86b417 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 8 Apr 2025 18:02:00 -0700 Subject: [PATCH 0034/2411] apparmor: make all generated string array headers const char *const address_family_names and sock_type_names were created as const char *a[], which declares them as (non-const) pointers to const chars. Since the pointers themselves would not be changed, they should be generated as const char *const a[]. Signed-off-by: Ryan Lee Signed-off-by: John Johansen --- security/apparmor/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile index be51607f52b6..12fb419714c0 100644 --- a/security/apparmor/Makefile +++ b/security/apparmor/Makefile @@ -28,7 +28,7 @@ clean-files := capability_names.h rlim_names.h net_names.h # to # #define AA_SFS_AF_MASK "local inet" quiet_cmd_make-af = GEN $@ -cmd_make-af = echo "static const char *address_family_names[] = {" > $@ ;\ +cmd_make-af = echo "static const char *const address_family_names[] = {" > $@ ;\ sed $< >>$@ -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "/AF_ROUTE/d" -e \ 's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\ echo "};" >> $@ ;\ @@ -43,7 +43,7 @@ cmd_make-af = echo "static const char *address_family_names[] = {" > $@ ;\ # to # [1] = "stream", quiet_cmd_make-sock = GEN $@ -cmd_make-sock = echo "static const char *sock_type_names[] = {" >> $@ ;\ +cmd_make-sock = echo "static const char *const sock_type_names[] = {" >> $@ ;\ sed $^ >>$@ -r -n \ -e 's/^\tSOCK_([A-Z0-9_]+)[\t]+=[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\ echo "};" >> $@ From 89a3561e69e5187fcce302eef429acd38aec1277 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Thu, 1 May 2025 17:55:43 -0700 Subject: [PATCH 0035/2411] apparmor: force audit on unconfined exec if info is set by find_attach find_attach may set info if something unusual happens during that process (currently only used to signal conflicting attachments, but this could be expanded in the future). This is information that should be propagated to userspace via an audit message. Signed-off-by: Ryan Lee Signed-off-by: John Johansen --- security/apparmor/domain.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index a7447d976a31..4263bb1ee4a8 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -670,6 +670,22 @@ static struct aa_label *profile_transition(const struct cred *subj_cred, if (profile_unconfined(profile)) { new = find_attach(bprm, profile->ns, &profile->ns->base.profiles, name, &info); + /* info set -> something unusual that we should report + * Currently this is only conflicting attachments, but other + * infos added in the future should also be logged by default + * and only excluded on a case-by-case basis + */ + if (info) { + /* Because perms is never used again after this audit + * we don't need to care about clobbering it + */ + perms.audit |= MAY_EXEC; + perms.allow |= MAY_EXEC; + /* Don't cause error if auditing fails */ + (void) aa_audit_file(subj_cred, profile, &perms, + OP_EXEC, MAY_EXEC, name, target, new, cond->uid, + info, error); + } if (new) { AA_DEBUG(DEBUG_DOMAIN, "unconfined attached to new label"); return new; From e76d733b1b1ff0bec6a305341fda3fe937fbf51f Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Thu, 1 May 2025 17:55:44 -0700 Subject: [PATCH 0036/2411] apparmor: move the "conflicting profile attachments" infostr to a const declaration Instead of having a literal, making this a constant will allow for (hacky) detection of conflicting profile attachments from inspection of the info pointer. This is used in the next patch to augment the information provided through domain.c:x_to_label for ix/ux fallback. Signed-off-by: Ryan Lee Signed-off-by: John Johansen --- security/apparmor/domain.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 4263bb1ee4a8..ca8cd7ea088b 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -28,6 +28,8 @@ #include "include/policy.h" #include "include/policy_ns.h" +static const char * const CONFLICTING_ATTACH_STR = "conflicting profile attachments"; + /** * may_change_ptraced_domain - check if can change profile on ptraced task * @to_cred: cred of task changing domain @@ -485,7 +487,7 @@ static struct aa_label *find_attach(const struct linux_binprm *bprm, if (!candidate || conflict) { if (conflict) - *info = "conflicting profile attachments"; + *info = CONFLICTING_ATTACH_STR; rcu_read_unlock(); return NULL; } From b824b5f82bbc8ace0982391a1718b04a1f93346e Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Thu, 1 May 2025 17:55:45 -0700 Subject: [PATCH 0037/2411] apparmor: include conflicting attachment info for confined ix/ux fallback Instead of silently overwriting the conflicting profile attachment string, include that information in the ix/ux fallback string that gets set as info instead. Also add a warning print if some other info is set that would be overwritten by the ix/ux fallback string or by the profile not found error. Signed-off-by: Ryan Lee Signed-off-by: John Johansen --- security/apparmor/domain.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index ca8cd7ea088b..b5e1defbd4ac 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -29,6 +29,10 @@ #include "include/policy_ns.h" static const char * const CONFLICTING_ATTACH_STR = "conflicting profile attachments"; +static const char * const CONFLICTING_ATTACH_STR_IX = + "conflicting profile attachments - ix fallback"; +static const char * const CONFLICTING_ATTACH_STR_UX = + "conflicting profile attachments - ux fallback"; /** * may_change_ptraced_domain - check if can change profile on ptraced task @@ -577,6 +581,8 @@ static struct aa_label *x_to_label(struct aa_profile *profile, struct aa_label *stack = NULL; struct aa_ns *ns = profile->ns; u32 xtype = xindex & AA_X_TYPE_MASK; + /* Used for info checks during fallback handling */ + const char *old_info = NULL; switch (xtype) { case AA_X_NONE: @@ -613,12 +619,32 @@ static struct aa_label *x_to_label(struct aa_profile *profile, /* (p|c|n)ix - don't change profile but do * use the newest version */ - *info = "ix fallback"; + if (*info == CONFLICTING_ATTACH_STR) { + *info = CONFLICTING_ATTACH_STR_IX; + } else { + old_info = *info; + *info = "ix fallback"; + } /* no profile && no error */ new = aa_get_newest_label(&profile->label); } else if (xindex & AA_X_UNCONFINED) { new = aa_get_newest_label(ns_unconfined(profile->ns)); - *info = "ux fallback"; + if (*info == CONFLICTING_ATTACH_STR) { + *info = CONFLICTING_ATTACH_STR_UX; + } else { + old_info = *info; + *info = "ux fallback"; + } + } + /* We set old_info on the code paths above where overwriting + * could have happened, so now check if info was set by + * find_attach as well (i.e. whether we actually overwrote) + * and warn accordingly. + */ + if (old_info && old_info != CONFLICTING_ATTACH_STR) { + pr_warn_ratelimited( + "AppArmor: find_attach (from profile %s) audit info \"%s\" dropped", + profile->base.hname, old_info); } } @@ -706,6 +732,11 @@ static struct aa_label *profile_transition(const struct cred *subj_cred, /* hack ix fallback - improve how this is detected */ goto audit; } else if (!new) { + if (info) { + pr_warn_ratelimited( + "AppArmor: %s (from profile %s) audit info \"%s\" dropped on missing transition", + __func__, profile->base.hname, info); + } info = "profile transition not found"; /* remove MAY_EXEC to audit as failure or complaint */ perms.allow &= ~MAY_EXEC; From 16916b17b4f80f99aad2ad29ad112313539ad219 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Thu, 1 May 2025 17:55:46 -0700 Subject: [PATCH 0038/2411] apparmor: force auditing of conflicting attachment execs from confined Conflicting attachment paths are an error state that result in the binary in question executing under an unexpected ix/ux fallback. As such, it should be audited to record the occurrence of conflicting attachments. Signed-off-by: Ryan Lee Signed-off-by: John Johansen --- security/apparmor/domain.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index b5e1defbd4ac..f9370a63a83c 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -729,6 +729,15 @@ static struct aa_label *profile_transition(const struct cred *subj_cred, new = x_to_label(profile, bprm, name, perms.xindex, &target, &info); if (new && new->proxy == profile->label.proxy && info) { + /* Force audit on conflicting attachment fallback + * Because perms is never used again after this audit + * we don't need to care about clobbering it + */ + if (info == CONFLICTING_ATTACH_STR_IX + || info == CONFLICTING_ATTACH_STR_UX) { + perms.audit |= MAY_EXEC; + perms.allow |= MAY_EXEC; + } /* hack ix fallback - improve how this is detected */ goto audit; } else if (!new) { From 4c0dc425fd613c5de0ca445f29d63150b52efc35 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 17 Feb 2025 01:50:36 -0800 Subject: [PATCH 0039/2411] apparmor: make debug_values_table static The debug_values_table is only referenced from lib.c so it should be static. Signed-off-by: John Johansen --- security/apparmor/lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 325f26f39a63..7cdf430762a8 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -30,7 +30,7 @@ struct val_table_ent { int value; }; -struct val_table_ent debug_values_table[] = { +static struct val_table_ent debug_values_table[] = { { "N", DEBUG_NONE }, { "none", DEBUG_NONE }, { "n", DEBUG_NONE }, From b1f87be7280ff48794f0fe55c9ca6df9d87d62c5 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sun, 16 Feb 2025 03:40:52 -0800 Subject: [PATCH 0040/2411] apparmor: Document that label must be last member in struct aa_profile The label struct is variable length. While its use in struct aa_profile is fixed length at 2 entries the variable length member needs to be the last member in the structure. The code already does this but the comment has it in the wrong location. Also add a comment to ensure it stays at the end of the structure. While we are at it, update the documentation for other profile members as well. Signed-off-by: John Johansen --- security/apparmor/include/policy.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index a6ddf3b7478e..a4c0f76fd03d 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -198,7 +198,6 @@ struct aa_attachment { /* struct aa_profile - basic confinement data * @base - base components of the profile (name, refcount, lists, lock ...) - * @label - label this profile is an extension of * @parent: parent of profile * @ns: namespace the profile is in * @rename: optional profile name that this profile renamed @@ -206,13 +205,19 @@ struct aa_attachment { * @audit: the auditing mode of the profile * @mode: the enforcement mode of the profile * @path_flags: flags controlling path generation behavior + * @signal: the signal that should be used when kill is used * @disconnected: what to prepend if attach_disconnected is specified * @attach: attachment rules for the profile * @rules: rules to be enforced * + * learning_cache: the accesses learned in complain mode + * raw_data: rawdata of the loaded profile policy + * hash: cryptographic hash of the profile * @dents: dentries for the profiles file entries in apparmorfs * @dirname: name of the profile dir in apparmorfs + * @dents: set of dentries associated with the profile * @data: hashtable for free-form policy aa_data + * @label - label this profile is an extension of * * The AppArmor profile contains the basic confinement data. Each profile * has a name, and exists in a namespace. The @name and @exec_match are @@ -247,6 +252,8 @@ struct aa_profile { char *dirname; struct dentry *dents[AAFS_PROF_SIZEOF]; struct rhashtable *data; + + /* special - variable length must be last entry in profile */ struct aa_label label; }; From da7870162f176ea38bff7380d67222e0430b0e35 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 14 Mar 2025 08:10:09 +0100 Subject: [PATCH 0041/2411] xtensa: Replace __ASSEMBLY__ with __ASSEMBLER__ in uapi headers __ASSEMBLY__ is only defined by the Makefile of the kernel, so this is not really useful for uapi headers (unless the userspace Makefile defines it, too). Let's switch to __ASSEMBLER__ which gets set automatically by the compiler when compiling assembly code. This is a completely mechanical patch (done with a simple "sed -i" statement). Cc: Chris Zankel Cc: Max Filippov Signed-off-by: Thomas Huth Message-Id: <20250314071013.1575167-39-thuth@redhat.com> Signed-off-by: Max Filippov --- arch/xtensa/include/uapi/asm/ptrace.h | 2 +- arch/xtensa/include/uapi/asm/signal.h | 6 +++--- arch/xtensa/include/uapi/asm/types.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/xtensa/include/uapi/asm/ptrace.h b/arch/xtensa/include/uapi/asm/ptrace.h index 9115e86ebc75..6e89ea301438 100644 --- a/arch/xtensa/include/uapi/asm/ptrace.h +++ b/arch/xtensa/include/uapi/asm/ptrace.h @@ -42,7 +42,7 @@ #define PTRACE_GETFDPIC_EXEC 0 #define PTRACE_GETFDPIC_INTERP 1 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct user_pt_regs { __u32 pc; diff --git a/arch/xtensa/include/uapi/asm/signal.h b/arch/xtensa/include/uapi/asm/signal.h index b8c824dd4b74..8060f1914400 100644 --- a/arch/xtensa/include/uapi/asm/signal.h +++ b/arch/xtensa/include/uapi/asm/signal.h @@ -19,7 +19,7 @@ #define _NSIG_BPW 32 #define _NSIG_WORDS (_NSIG / _NSIG_BPW) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -77,7 +77,7 @@ typedef struct { #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -106,5 +106,5 @@ typedef struct sigaltstack { __kernel_size_t ss_size; } stack_t; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_XTENSA_SIGNAL_H */ diff --git a/arch/xtensa/include/uapi/asm/types.h b/arch/xtensa/include/uapi/asm/types.h index 12db8ac38750..2e9217a06ebf 100644 --- a/arch/xtensa/include/uapi/asm/types.h +++ b/arch/xtensa/include/uapi/asm/types.h @@ -14,7 +14,7 @@ #include -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ # define __XTENSA_UL(x) (x) # define __XTENSA_UL_CONST(x) x #else @@ -23,7 +23,7 @@ # define __XTENSA_UL_CONST(x) ___XTENSA_UL_CONST(x) #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #endif From 44a4ef59d5506c6dc7599d876a3a1014697ec480 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 14 Mar 2025 08:10:10 +0100 Subject: [PATCH 0042/2411] xtensa: Replace __ASSEMBLY__ with __ASSEMBLER__ in non-uapi headers While the GCC and Clang compilers already define __ASSEMBLER__ automatically when compiling assembly code, __ASSEMBLY__ is a macro that only gets defined by the Makefiles in the kernel. This can be very confusing when switching between userspace and kernelspace coding, or when dealing with uapi headers that rather should use __ASSEMBLER__ instead. So let's standardize on the __ASSEMBLER__ macro that is provided by the compilers now. This is a completely mechanical patch (done with a simple "sed -i" statement). Cc: Chris Zankel Cc: Max Filippov Signed-off-by: Thomas Huth Message-Id: <20250314071013.1575167-40-thuth@redhat.com> Signed-off-by: Max Filippov --- arch/xtensa/include/asm/bootparam.h | 2 +- arch/xtensa/include/asm/cmpxchg.h | 4 ++-- arch/xtensa/include/asm/coprocessor.h | 8 ++++---- arch/xtensa/include/asm/current.h | 2 +- arch/xtensa/include/asm/ftrace.h | 8 ++++---- arch/xtensa/include/asm/initialize_mmu.h | 4 ++-- arch/xtensa/include/asm/jump_label.h | 4 ++-- arch/xtensa/include/asm/kasan.h | 2 +- arch/xtensa/include/asm/kmem_layout.h | 2 +- arch/xtensa/include/asm/page.h | 4 ++-- arch/xtensa/include/asm/pgtable.h | 8 ++++---- arch/xtensa/include/asm/processor.h | 4 ++-- arch/xtensa/include/asm/ptrace.h | 6 +++--- arch/xtensa/include/asm/signal.h | 4 ++-- arch/xtensa/include/asm/thread_info.h | 8 ++++---- arch/xtensa/include/asm/tlbflush.h | 4 ++-- 16 files changed, 37 insertions(+), 37 deletions(-) diff --git a/arch/xtensa/include/asm/bootparam.h b/arch/xtensa/include/asm/bootparam.h index 6333bd1eb9d2..a459ffbaf7ab 100644 --- a/arch/xtensa/include/asm/bootparam.h +++ b/arch/xtensa/include/asm/bootparam.h @@ -27,7 +27,7 @@ #define BP_TAG_FIRST 0x7B0B /* first tag with a version number */ #define BP_TAG_LAST 0x7E0B /* last tag */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* All records are aligned to 4 bytes */ diff --git a/arch/xtensa/include/asm/cmpxchg.h b/arch/xtensa/include/asm/cmpxchg.h index 95e33a913962..b6db4838b175 100644 --- a/arch/xtensa/include/asm/cmpxchg.h +++ b/arch/xtensa/include/asm/cmpxchg.h @@ -11,7 +11,7 @@ #ifndef _XTENSA_CMPXCHG_H #define _XTENSA_CMPXCHG_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -220,6 +220,6 @@ __arch_xchg(unsigned long x, volatile void * ptr, int size) } } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _XTENSA_CMPXCHG_H */ diff --git a/arch/xtensa/include/asm/coprocessor.h b/arch/xtensa/include/asm/coprocessor.h index 3b1a0d5d2169..e0447bcc52c5 100644 --- a/arch/xtensa/include/asm/coprocessor.h +++ b/arch/xtensa/include/asm/coprocessor.h @@ -16,7 +16,7 @@ #include #include -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ # include .macro xchal_sa_start a b @@ -69,7 +69,7 @@ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * XTENSA_HAVE_COPROCESSOR(x) returns 1 if coprocessor x is configured. @@ -87,7 +87,7 @@ #define XTENSA_HAVE_IO_PORTS \ XCHAL_CP_PORT_MASK -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Additional registers. @@ -151,5 +151,5 @@ void local_coprocessors_flush_release_all(void); #endif /* XTENSA_HAVE_COPROCESSORS */ -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _XTENSA_COPROCESSOR_H */ diff --git a/arch/xtensa/include/asm/current.h b/arch/xtensa/include/asm/current.h index df275d554788..7b483538f066 100644 --- a/arch/xtensa/include/asm/current.h +++ b/arch/xtensa/include/asm/current.h @@ -13,7 +13,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include diff --git a/arch/xtensa/include/asm/ftrace.h b/arch/xtensa/include/asm/ftrace.h index 0ea4f84cd558..f676d209d110 100644 --- a/arch/xtensa/include/asm/ftrace.h +++ b/arch/xtensa/include/asm/ftrace.h @@ -12,20 +12,20 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern unsigned long return_address(unsigned level); #define ftrace_return_address(n) return_address(n) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_ADDR ((unsigned long)(_mcount)) #define MCOUNT_INSN_SIZE 3 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern void _mcount(void); #define mcount _mcount -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_FUNCTION_TRACER */ #endif /* _XTENSA_FTRACE_H */ diff --git a/arch/xtensa/include/asm/initialize_mmu.h b/arch/xtensa/include/asm/initialize_mmu.h index 574795a20d6f..101bcb87e15b 100644 --- a/arch/xtensa/include/asm/initialize_mmu.h +++ b/arch/xtensa/include/asm/initialize_mmu.h @@ -34,7 +34,7 @@ #define CA_WRITEBACK (0x4) #endif -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define XTENSA_HWVERSION_RC_2009_0 230000 @@ -240,6 +240,6 @@ .endm -#endif /*__ASSEMBLY__*/ +#endif /*__ASSEMBLER__*/ #endif /* _XTENSA_INITIALIZE_MMU_H */ diff --git a/arch/xtensa/include/asm/jump_label.h b/arch/xtensa/include/asm/jump_label.h index 46c8596259d2..38e3e2a9b0fb 100644 --- a/arch/xtensa/include/asm/jump_label.h +++ b/arch/xtensa/include/asm/jump_label.h @@ -4,7 +4,7 @@ #ifndef _ASM_XTENSA_JUMP_LABEL_H #define _ASM_XTENSA_JUMP_LABEL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -61,5 +61,5 @@ struct jump_entry { jump_label_t key; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/xtensa/include/asm/kasan.h b/arch/xtensa/include/asm/kasan.h index 8d2b4248466f..0da91b64fab9 100644 --- a/arch/xtensa/include/asm/kasan.h +++ b/arch/xtensa/include/asm/kasan.h @@ -2,7 +2,7 @@ #ifndef __ASM_KASAN_H #define __ASM_KASAN_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_KASAN diff --git a/arch/xtensa/include/asm/kmem_layout.h b/arch/xtensa/include/asm/kmem_layout.h index 6fc05cba61a2..6949724625a0 100644 --- a/arch/xtensa/include/asm/kmem_layout.h +++ b/arch/xtensa/include/asm/kmem_layout.h @@ -80,7 +80,7 @@ #if (!XCHAL_HAVE_PTP_MMU || XCHAL_HAVE_SPANNING_WAY) && defined(CONFIG_USE_OF) #define XCHAL_KIO_PADDR xtensa_get_kio_paddr() -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern unsigned long xtensa_kio_paddr; static inline unsigned long xtensa_get_kio_paddr(void) diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h index 644413792bf3..20655174b111 100644 --- a/arch/xtensa/include/asm/page.h +++ b/arch/xtensa/include/asm/page.h @@ -80,7 +80,7 @@ #endif -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define __pgprot(x) (x) @@ -172,7 +172,7 @@ static inline unsigned long ___pa(unsigned long va) #define page_to_virt(page) __va(page_to_pfn(page) << PAGE_SHIFT) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #include #endif /* _XTENSA_PAGE_H */ diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 1647a7cc3fbf..46b634934f13 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -203,7 +203,7 @@ * What follows is the closest we can get by reasonable means.. * See linux/mm/mmap.c for protection_map[] array that uses these definitions. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define pte_ERROR(e) \ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) @@ -372,10 +372,10 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) return pte; } -#endif /* !defined (__ASSEMBLY__) */ +#endif /* !defined (__ASSEMBLER__) */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* Assembly macro _PGD_INDEX is the same as C pgd_index(unsigned long), * _PGD_OFFSET as C pgd_offset(struct mm_struct*, unsigned long), @@ -414,7 +414,7 @@ void update_mmu_tlb_range(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, unsigned int nr); #define update_mmu_tlb_range update_mmu_tlb_range -#endif /* !defined (__ASSEMBLY__) */ +#endif /* !defined (__ASSEMBLER__) */ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG #define __HAVE_ARCH_PTEP_GET_AND_CLEAR diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 47b5df86ab5c..60a211356335 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -105,7 +105,7 @@ #error Unsupported xtensa ABI #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #if defined(__XTENSA_WINDOWED_ABI__) @@ -263,5 +263,5 @@ static inline unsigned long get_er(unsigned long addr) #endif /* XCHAL_HAVE_EXTERN_REGS */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _XTENSA_PROCESSOR_H */ diff --git a/arch/xtensa/include/asm/ptrace.h b/arch/xtensa/include/asm/ptrace.h index 86c70117371b..f0f5e7c224c9 100644 --- a/arch/xtensa/include/asm/ptrace.h +++ b/arch/xtensa/include/asm/ptrace.h @@ -41,7 +41,7 @@ #define NO_SYSCALL (-1) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -109,11 +109,11 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) int do_syscall_trace_enter(struct pt_regs *regs); void do_syscall_trace_leave(struct pt_regs *regs); -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ # include #define PT_REGS_OFFSET (KERNEL_STACK_SIZE - PT_USER_SIZE) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _XTENSA_PTRACE_H */ diff --git a/arch/xtensa/include/asm/signal.h b/arch/xtensa/include/asm/signal.h index de169b4eaeef..d301e68573cc 100644 --- a/arch/xtensa/include/asm/signal.h +++ b/arch/xtensa/include/asm/signal.h @@ -14,10 +14,10 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __ARCH_HAS_SA_RESTORER #include -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _XTENSA_SIGNAL_H */ diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h index e0dffcc43b9e..5b74dfc35ef9 100644 --- a/arch/xtensa/include/asm/thread_info.h +++ b/arch/xtensa/include/asm/thread_info.h @@ -16,7 +16,7 @@ #define CURRENT_SHIFT KERNEL_STACK_SHIFT -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ # include #endif @@ -28,7 +28,7 @@ * must also be changed */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #if XTENSA_HAVE_COPROCESSORS @@ -80,7 +80,7 @@ struct thread_info { * macros/functions for gaining access to the thread information structure */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define INIT_THREAD_INFO(tsk) \ { \ @@ -99,7 +99,7 @@ static __always_inline struct thread_info *current_thread_info(void) return ti; } -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ /* how to get the thread information struct from ASM */ #define GET_THREAD_INFO(reg,sp) \ diff --git a/arch/xtensa/include/asm/tlbflush.h b/arch/xtensa/include/asm/tlbflush.h index 573df8cea200..3edaebeef423 100644 --- a/arch/xtensa/include/asm/tlbflush.h +++ b/arch/xtensa/include/asm/tlbflush.h @@ -20,7 +20,7 @@ #define ITLB_HIT_BIT 3 #define DTLB_HIT_BIT 4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* TLB flushing: * @@ -201,5 +201,5 @@ static inline unsigned long read_itlb_translation (int way) return tmp; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _XTENSA_TLBFLUSH_H */ From c403db6f00de4ecbde869ca79dd53513e9f7af0f Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 14 Mar 2025 08:09:53 +0100 Subject: [PATCH 0043/2411] openrisc: Replace __ASSEMBLY__ with __ASSEMBLER__ in uapi headers __ASSEMBLY__ is only defined by the Makefile of the kernel, so this is not really useful for uapi headers (unless the userspace Makefile defines it, too). Let's switch to __ASSEMBLER__ which gets set automatically by the compiler when compiling assembly code. This is a completely mechanical patch (done with a simple "sed -i" statement). Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: Stafford Horne Cc: linux-openrisc@vger.kernel.org Signed-off-by: Thomas Huth Signed-off-by: Stafford Horne --- arch/openrisc/include/uapi/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/openrisc/include/uapi/asm/ptrace.h b/arch/openrisc/include/uapi/asm/ptrace.h index a77cc9915ca8..1f12a60d5a06 100644 --- a/arch/openrisc/include/uapi/asm/ptrace.h +++ b/arch/openrisc/include/uapi/asm/ptrace.h @@ -20,7 +20,7 @@ #ifndef _UAPI__ASM_OPENRISC_PTRACE_H #define _UAPI__ASM_OPENRISC_PTRACE_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * This is the layout of the regset returned by the GETREGSET ptrace call */ From f0eedcf22581ca1cc438fb38a479ff41ab882d51 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 14 Mar 2025 08:09:54 +0100 Subject: [PATCH 0044/2411] openrisc: Replace __ASSEMBLY__ with __ASSEMBLER__ in non-uapi headers While the GCC and Clang compilers already define __ASSEMBLER__ automatically when compiling assembly code, __ASSEMBLY__ is a macro that only gets defined by the Makefiles in the kernel. This can be very confusing when switching between userspace and kernelspace coding, or when dealing with uapi headers that rather should use __ASSEMBLER__ instead. So let's standardize on the __ASSEMBLER__ macro that is provided by the compilers now. This is a completely mechanical patch (done with a simple "sed -i" statement). Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: Stafford Horne Cc: linux-openrisc@vger.kernel.org Signed-off-by: Thomas Huth Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/mmu.h | 2 +- arch/openrisc/include/asm/page.h | 8 ++++---- arch/openrisc/include/asm/pgtable.h | 4 ++-- arch/openrisc/include/asm/processor.h | 4 ++-- arch/openrisc/include/asm/ptrace.h | 4 ++-- arch/openrisc/include/asm/setup.h | 2 +- arch/openrisc/include/asm/thread_info.h | 8 ++++---- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/openrisc/include/asm/mmu.h b/arch/openrisc/include/asm/mmu.h index eb720110f3a2..e7826a681bc4 100644 --- a/arch/openrisc/include/asm/mmu.h +++ b/arch/openrisc/include/asm/mmu.h @@ -15,7 +15,7 @@ #ifndef __ASM_OPENRISC_MMU_H #define __ASM_OPENRISC_MMU_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef unsigned long mm_context_t; #endif diff --git a/arch/openrisc/include/asm/page.h b/arch/openrisc/include/asm/page.h index c589e96035e1..85797f94d1d7 100644 --- a/arch/openrisc/include/asm/page.h +++ b/arch/openrisc/include/asm/page.h @@ -25,7 +25,7 @@ */ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define clear_page(page) memset((page), 0, PAGE_SIZE) #define copy_page(to, from) memcpy((to), (from), PAGE_SIZE) @@ -55,10 +55,10 @@ typedef struct page *pgtable_t; #define __pgd(x) ((pgd_t) { (x) }) #define __pgprot(x) ((pgprot_t) { (x) }) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET)) #define __pa(x) ((unsigned long) (x) - PAGE_OFFSET) @@ -73,7 +73,7 @@ static inline unsigned long virt_to_pfn(const void *kaddr) #define virt_addr_valid(kaddr) (pfn_valid(virt_to_pfn(kaddr))) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #include #include diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 60c6ce7ff2dc..cd979bd28ab3 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -23,7 +23,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -432,5 +432,5 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) typedef pte_t *pte_addr_t; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_OPENRISC_PGTABLE_H */ diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h index e05d1b59e24e..3ff893a67c13 100644 --- a/arch/openrisc/include/asm/processor.h +++ b/arch/openrisc/include/asm/processor.h @@ -39,7 +39,7 @@ */ #define TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct task_struct; @@ -78,5 +78,5 @@ void show_registers(struct pt_regs *regs); #define cpu_relax() barrier() -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_OPENRISC_PROCESSOR_H */ diff --git a/arch/openrisc/include/asm/ptrace.h b/arch/openrisc/include/asm/ptrace.h index e5a282b67075..28facf2f3e00 100644 --- a/arch/openrisc/include/asm/ptrace.h +++ b/arch/openrisc/include/asm/ptrace.h @@ -27,7 +27,7 @@ * they share a cacheline (not done yet, though... future optimization). */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * This struct describes how the registers are laid out on the kernel stack * during a syscall or other kernel entry. @@ -147,7 +147,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, return *(unsigned long *)((unsigned long)regs + offset); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * Offsets used by 'ptrace' system call interface. diff --git a/arch/openrisc/include/asm/setup.h b/arch/openrisc/include/asm/setup.h index 9acbc5deda69..dce9f4d3b378 100644 --- a/arch/openrisc/include/asm/setup.h +++ b/arch/openrisc/include/asm/setup.h @@ -8,7 +8,7 @@ #include #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ void __init or1k_early_setup(void *fdt); #endif diff --git a/arch/openrisc/include/asm/thread_info.h b/arch/openrisc/include/asm/thread_info.h index 4af3049c34c2..e338fff7efb0 100644 --- a/arch/openrisc/include/asm/thread_info.h +++ b/arch/openrisc/include/asm/thread_info.h @@ -17,7 +17,7 @@ #ifdef __KERNEL__ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include #endif @@ -38,7 +38,7 @@ * - if the contents of this structure are changed, the assembly constants * must also be changed */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct thread_info { struct task_struct *task; /* main task structure */ @@ -58,7 +58,7 @@ struct thread_info { * * preempt_count needs to be 1 initially, until the scheduler is functional. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ @@ -75,7 +75,7 @@ register struct thread_info *current_thread_info_reg asm("r10"); #define get_thread_info(ti) get_task_struct((ti)->task) #define put_thread_info(ti) put_task_struct((ti)->task) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * thread information flags From 088eb0f161c58969bc7544e5d6c1451ed18d2ddb Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 9 Jun 2025 08:38:06 +0900 Subject: [PATCH 0045/2411] firewire: ohci: correct code comments about bus_reset tasklet The tasklet for bus reset event has been replaced with work item, while some code comments still address to the tasklet. This commit corrects them. Fixes: 2d7a36e23300 ("firewire: ohci: Move code from the bus reset tasklet into a workqueue") Link: https://lore.kernel.org/r/20250608233808.202355-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/ohci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index edaedd156a6d..27e3e998e6fc 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -2528,7 +2528,7 @@ static int ohci_enable(struct fw_card *card, * They shouldn't do that in this initial case where the link * isn't enabled. This means we have to use the same * workaround here, setting the bus header to 0 and then write - * the right values in the bus reset tasklet. + * the right values in the bus reset work item. */ if (config_rom) { @@ -2617,7 +2617,7 @@ static int ohci_set_config_rom(struct fw_card *card, * during the atomic update, even on little endian * architectures. The workaround we use is to put a 0 in the * header quadlet; 0 is endian agnostic and means that the - * config rom isn't ready yet. In the bus reset tasklet we + * config rom isn't ready yet. In the bus reset work item we * then set up the real values for the two registers. * * We use ohci->lock to avoid racing with the code that sets @@ -2659,7 +2659,7 @@ static int ohci_set_config_rom(struct fw_card *card, /* * Now initiate a bus reset to have the changes take * effect. We clean up the old config rom memory and DMA - * mappings in the bus reset tasklet, since the OHCI + * mappings in the bus reset work item, since the OHCI * controller could need to access it before the bus reset * takes effect. */ From 8ffef793bb6d62046472033d6fb1dfb435681a83 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 9 Jun 2025 08:38:07 +0900 Subject: [PATCH 0046/2411] firewire: ohci: use from_work() macro to expand parent structure of work_struct A commit 60b2ebf48526 ("workqueue: Introduce from_work() helper for cleaner callback declarations") introduces a new macro to retrieve a poiner for the parent structure of the work item. It is convenient to reduce input text. This commit uses the macro in PCI driver for 1394 OHCI. Link: https://lore.kernel.org/r/20250608233808.202355-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/ohci.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 27e3e998e6fc..40313a3ec63e 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -1190,7 +1190,7 @@ static void context_tasklet(unsigned long data) static void ohci_isoc_context_work(struct work_struct *work) { - struct fw_iso_context *base = container_of(work, struct fw_iso_context, work); + struct fw_iso_context *base = from_work(base, work, work); struct iso_context *isoc_ctx = container_of(base, struct iso_context, base); context_retire_descriptors(&isoc_ctx->context); @@ -2028,8 +2028,7 @@ static int find_and_insert_self_id(struct fw_ohci *ohci, int self_id_count) static void bus_reset_work(struct work_struct *work) { - struct fw_ohci *ohci = - container_of(work, struct fw_ohci, bus_reset_work); + struct fw_ohci *ohci = from_work(ohci, work, bus_reset_work); int self_id_count, generation, new_generation, i, j; u32 reg, quadlet; void *free_rom = NULL; From f657a680f84e29ed7c17edf3b14d637e0527270c Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 9 Jun 2025 08:38:08 +0900 Subject: [PATCH 0047/2411] firewire: core: use from_work() macro to expand parent structure of work_struct A commit 60b2ebf48526 ("workqueue: Introduce from_work() helper for cleaner callback declarations") introduces a new macro to retrieve a poiner for the parent structure of the work item. It is convenient to reduce input text. This commit uses the macro in core functionalities. Link: https://lore.kernel.org/r/20250608233808.202355-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-card.c | 4 ++-- drivers/firewire/core-cdev.c | 3 +-- drivers/firewire/core-device.c | 15 +++++---------- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 01354b9de8b2..2b6ad47b6d57 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -237,7 +237,7 @@ EXPORT_SYMBOL(fw_schedule_bus_reset); static void br_work(struct work_struct *work) { - struct fw_card *card = container_of(work, struct fw_card, br_work.work); + struct fw_card *card = from_work(card, work, br_work.work); /* Delay for 2s after last reset per IEEE 1394 clause 8.2.1. */ if (card->reset_jiffies != 0 && @@ -286,7 +286,7 @@ void fw_schedule_bm_work(struct fw_card *card, unsigned long delay) static void bm_work(struct work_struct *work) { - struct fw_card *card = container_of(work, struct fw_card, bm_work.work); + struct fw_card *card = from_work(card, work, bm_work.work); struct fw_device *root_device, *irm_device; struct fw_node *root_node; int root_id, new_root_id, irm_id, bm_id, local_id; diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index bd04980009a4..78b10c6ef7fe 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -1313,8 +1313,7 @@ static int ioctl_get_cycle_timer(struct client *client, union ioctl_arg *arg) static void iso_resource_work(struct work_struct *work) { struct iso_resource_event *e; - struct iso_resource *r = - container_of(work, struct iso_resource, work.work); + struct iso_resource *r = from_work(r, work, work.work); struct client *client = r->client; unsigned long index = r->resource.handle; int generation, channel, bandwidth, todo; diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index ec3e21ad2025..aeacd4cfd694 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -853,8 +853,7 @@ static void fw_schedule_device_work(struct fw_device *device, static void fw_device_shutdown(struct work_struct *work) { - struct fw_device *device = - container_of(work, struct fw_device, work.work); + struct fw_device *device = from_work(device, work, work.work); if (time_before64(get_jiffies_64(), device->card->reset_jiffies + SHUTDOWN_DELAY) @@ -921,8 +920,7 @@ static int update_unit(struct device *dev, void *data) static void fw_device_update(struct work_struct *work) { - struct fw_device *device = - container_of(work, struct fw_device, work.work); + struct fw_device *device = from_work(device, work, work.work); fw_device_cdev_update(device); device_for_each_child(&device->device, NULL, update_unit); @@ -1002,8 +1000,7 @@ static int compare_configuration_rom(struct device *dev, const void *data) static void fw_device_init(struct work_struct *work) { - struct fw_device *device = - container_of(work, struct fw_device, work.work); + struct fw_device *device = from_work(device, work, work.work); struct fw_card *card = device->card; struct device *found; u32 minor; @@ -1184,8 +1181,7 @@ static int reread_config_rom(struct fw_device *device, int generation, static void fw_device_refresh(struct work_struct *work) { - struct fw_device *device = - container_of(work, struct fw_device, work.work); + struct fw_device *device = from_work(device, work, work.work); struct fw_card *card = device->card; int ret, node_id = device->node_id; bool changed; @@ -1251,8 +1247,7 @@ static void fw_device_refresh(struct work_struct *work) static void fw_device_workfn(struct work_struct *work) { - struct fw_device *device = container_of(to_delayed_work(work), - struct fw_device, work); + struct fw_device *device = from_work(device, to_delayed_work(work), work); device->workfn(work); } From de1c831a7898f164c1c2703c6b2b9e4fb4bebefc Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 15 Apr 2025 10:02:33 -0700 Subject: [PATCH 0048/2411] slab: Decouple slab_debug and no_hash_pointers Some system owners use slab_debug=FPZ (or similar) as a hardening option, but do not want to be forced into having kernel addresses exposed due to the implicit "no_hash_pointers" boot param setting.[1] Introduce the "hash_pointers" boot param, which defaults to "auto" (the current behavior), but also includes "always" (forcing on hashing even when "slab_debug=..." is defined), and "never". The existing "no_hash_pointers" boot param becomes an alias for "hash_pointers=never". This makes it possible to boot with "slab_debug=FPZ hash_pointers=always". Link: https://github.com/KSPP/linux/issues/368 [1] Fixes: 792702911f58 ("slub: force on no_hash_pointers when slub_debug is enabled") Co-developed-by: Sergio Perez Gonzalez Signed-off-by: Sergio Perez Gonzalez Acked-by: Vlastimil Babka Acked-by: David Rientjes Reviewed-by: Bagas Sanjaya Signed-off-by: Kees Cook Reviewed-by: Harry Yoo Acked-by: Rafael Aquini Tested-by: Petr Mladek Reviewed-by: Petr Mladek Link: https://patch.msgid.link/20250415170232.it.467-kees@kernel.org [kees@kernel.org: Add note about hash_pointers into slab_debug kernel parameter documentation.] Signed-off-by: Petr Mladek --- .../admin-guide/kernel-parameters.txt | 38 ++++++++---- include/linux/sprintf.h | 2 +- lib/vsprintf.c | 61 +++++++++++++++++-- mm/slub.c | 5 +- 4 files changed, 86 insertions(+), 20 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3f35d5b8c296..0dd5cd17e87e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1798,6 +1798,27 @@ backtraces on all cpus. Format: 0 | 1 + hash_pointers= + [KNL,EARLY] + By default, when pointers are printed to the console + or buffers via the %p format string, that pointer is + "hashed", i.e. obscured by hashing the pointer value. + This is a security feature that hides actual kernel + addresses from unprivileged users, but it also makes + debugging the kernel more difficult since unequal + pointers can no longer be compared. The choices are: + Format: { auto | always | never } + Default: auto + + auto - Hash pointers unless slab_debug is enabled. + always - Always hash pointers (even if slab_debug is + enabled). + never - Never hash pointers. This option should only + be specified when debugging the kernel. Do + not use on production kernels. The boot + param "no_hash_pointers" is an alias for + this mode. + hashdist= [KNL,NUMA] Large hashes allocated during boot are distributed across NUMA nodes. Defaults on for 64-bit NUMA, off otherwise. @@ -4120,18 +4141,7 @@ no_hash_pointers [KNL,EARLY] - Force pointers printed to the console or buffers to be - unhashed. By default, when a pointer is printed via %p - format string, that pointer is "hashed", i.e. obscured - by hashing the pointer value. This is a security feature - that hides actual kernel addresses from unprivileged - users, but it also makes debugging the kernel more - difficult since unequal pointers can no longer be - compared. However, if this command-line option is - specified, then all normal pointers will have their true - value printed. This option should only be specified when - debugging the kernel. Please do not use on production - kernels. + Alias for "hash_pointers=never". nohibernate [HIBERNATION] Disable hibernation and resume. @@ -6481,6 +6491,10 @@ Documentation/mm/slub.rst. (slub_debug legacy name also accepted for now) + Using this option implies the "no_hash_pointers" + option which can be undone by adding the + "hash_pointers=always" option. + slab_max_order= [MM] Determines the maximum allowed order for slabs. A high setting may cause OOMs due to memory diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h index 51cab2def9ec..521bb2cd2648 100644 --- a/include/linux/sprintf.h +++ b/include/linux/sprintf.h @@ -22,7 +22,7 @@ __scanf(2, 0) int vsscanf(const char *, const char *, va_list); /* These are for specific cases, do not use without real need */ extern bool no_hash_pointers; -int no_hash_pointers_enable(char *str); +void hash_pointers_finalize(bool slub_debug); /* Used for Rust formatting ('%pA') */ char *rust_fmt_argument(char *buf, char *end, const void *ptr); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 01699852f30c..22cbd75266ef 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -60,6 +60,20 @@ bool no_hash_pointers __ro_after_init; EXPORT_SYMBOL_GPL(no_hash_pointers); +/* + * Hashed pointers policy selected by "hash_pointers=..." boot param + * + * `auto` - Hashed pointers enabled unless disabled by slub_debug_enabled=true + * `always` - Hashed pointers enabled unconditionally + * `never` - Hashed pointers disabled unconditionally + */ +enum hash_pointers_policy { + HASH_PTR_AUTO = 0, + HASH_PTR_ALWAYS, + HASH_PTR_NEVER +}; +static enum hash_pointers_policy hash_pointers_mode __initdata; + noinline static unsigned long long simple_strntoull(const char *startp, char **endp, unsigned int base, size_t max_chars) { @@ -2271,12 +2285,23 @@ char *resource_or_range(const char *fmt, char *buf, char *end, void *ptr, return resource_string(buf, end, ptr, spec, fmt); } -int __init no_hash_pointers_enable(char *str) +void __init hash_pointers_finalize(bool slub_debug) { - if (no_hash_pointers) - return 0; + switch (hash_pointers_mode) { + case HASH_PTR_ALWAYS: + no_hash_pointers = false; + break; + case HASH_PTR_NEVER: + no_hash_pointers = true; + break; + case HASH_PTR_AUTO: + default: + no_hash_pointers = slub_debug; + break; + } - no_hash_pointers = true; + if (!no_hash_pointers) + return; pr_warn("**********************************************************\n"); pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); @@ -2289,11 +2314,39 @@ int __init no_hash_pointers_enable(char *str) pr_warn("** the kernel, report this immediately to your system **\n"); pr_warn("** administrator! **\n"); pr_warn("** **\n"); + pr_warn("** Use hash_pointers=always to force this mode off **\n"); + pr_warn("** **\n"); pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); pr_warn("**********************************************************\n"); +} + +static int __init hash_pointers_mode_parse(char *str) +{ + if (!str) { + pr_warn("Hash pointers mode empty; falling back to auto.\n"); + hash_pointers_mode = HASH_PTR_AUTO; + } else if (strncmp(str, "auto", 4) == 0) { + pr_info("Hash pointers mode set to auto.\n"); + hash_pointers_mode = HASH_PTR_AUTO; + } else if (strncmp(str, "never", 5) == 0) { + pr_info("Hash pointers mode set to never.\n"); + hash_pointers_mode = HASH_PTR_NEVER; + } else if (strncmp(str, "always", 6) == 0) { + pr_info("Hash pointers mode set to always.\n"); + hash_pointers_mode = HASH_PTR_ALWAYS; + } else { + pr_warn("Unknown hash_pointers mode '%s' specified; assuming auto.\n", str); + hash_pointers_mode = HASH_PTR_AUTO; + } return 0; } +early_param("hash_pointers", hash_pointers_mode_parse); + +static int __init no_hash_pointers_enable(char *str) +{ + return hash_pointers_mode_parse("never"); +} early_param("no_hash_pointers", no_hash_pointers_enable); /* diff --git a/mm/slub.c b/mm/slub.c index b46f87662e71..f3d61b330a76 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6314,9 +6314,8 @@ void __init kmem_cache_init(void) if (debug_guardpage_minorder()) slub_max_order = 0; - /* Print slub debugging pointers without hashing */ - if (__slub_debug_enabled()) - no_hash_pointers_enable(NULL); + /* Inform pointer hashing choice about slub debugging state. */ + hash_pointers_finalize(__slub_debug_enabled()); kmem_cache_node = &boot_kmem_cache_node; kmem_cache = &boot_kmem_cache; From cf5543870186d6f99b631faaeca27beaa996d52f Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 4 Jun 2025 16:20:45 +0200 Subject: [PATCH 0049/2411] printk: Allow to use the printk kthread immediately even for 1st nbcon The kthreads for nbcon consoles are created by nbcon_alloc() at the beginning of the console registration. But it currently works only for the 2nd or later nbcon console because the code checks @printk_kthreads_running. The kthread for the 1st registered nbcon console is created at the very end of register_console() by printk_kthreads_check_locked(). As a result, the entire log is replayed synchronously when the "enabled" message gets printed. It might block the boot for a long time with a slow serial console. Prevent the synchronous flush by creating the kthread even for the 1st nbcon console when it is safe (kthreads ready and no boot consoles). Also inform printk() to use the kthread by setting @printk_kthreads_running. Note that the kthreads already must be running when it is safe and this is not the 1st nbcon console. Symmetrically, clear @printk_kthreads_running when the last nbcon console was unregistered by nbcon_free(). This requires updating @have_nbcon_console before nbcon_free() gets called. Note that there is _no_ problem when the 1st nbcon console replaces boot consoles. In this case, the kthread will be started at the end of registration after the boot consoles are removed. But the console does not reply the entire log buffer in this case. Note that the flag CON_PRINTBUFFER is always cleared when the boot consoles are removed and vice versa. Closes: https://lore.kernel.org/r/20250514173514.2117832-1-mcobb@thegoodpenguin.co.uk Tested-by: Michael Cobb Reviewed-by: John Ogness Link: https://patch.msgid.link/20250604142045.253301-1-pmladek@suse.com Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 2 ++ kernel/printk/nbcon.c | 26 ++++++++++++++++++++++++-- kernel/printk/printk.c | 20 +++++++++++--------- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 48a24e7b309d..567c9e100d47 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -64,6 +64,7 @@ struct dev_printk_info; extern struct printk_ringbuffer *prb; extern bool printk_kthreads_running; +extern bool printk_kthreads_ready; extern bool debug_non_panic_cpus; __printf(4, 0) @@ -180,6 +181,7 @@ static inline void nbcon_kthread_wake(struct console *con) #define PRINTKRB_RECORD_MAX 0 #define printk_kthreads_running (false) +#define printk_kthreads_ready (false) /* * In !PRINTK builds we still export console_sem diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index fd12efcc4aed..d60596777d27 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1671,6 +1671,9 @@ bool nbcon_alloc(struct console *con) { struct nbcon_state state = { }; + /* Synchronize the kthread start. */ + lockdep_assert_console_list_lock_held(); + /* The write_thread() callback is mandatory. */ if (WARN_ON(!con->write_thread)) return false; @@ -1701,12 +1704,15 @@ bool nbcon_alloc(struct console *con) return false; } - if (printk_kthreads_running) { + if (printk_kthreads_ready && !have_boot_console) { if (!nbcon_kthread_create(con)) { kfree(con->pbufs); con->pbufs = NULL; return false; } + + /* Might be the first kthread. */ + printk_kthreads_running = true; } } @@ -1716,14 +1722,30 @@ bool nbcon_alloc(struct console *con) /** * nbcon_free - Free and cleanup the nbcon console specific data * @con: Console to free/cleanup nbcon data + * + * Important: @have_nbcon_console must be updated before calling + * this function. In particular, it can be set only when there + * is still another nbcon console registered. */ void nbcon_free(struct console *con) { struct nbcon_state state = { }; - if (printk_kthreads_running) + /* Synchronize the kthread stop. */ + lockdep_assert_console_list_lock_held(); + + if (printk_kthreads_running) { nbcon_kthread_stop(con); + /* Might be the last nbcon console. + * + * Do not rely on printk_kthreads_check_locked(). It is not + * called in some code paths, see nbcon_free() callers. + */ + if (!have_nbcon_console) + printk_kthreads_running = false; + } + nbcon_state_set(con, &state); /* Boot consoles share global printk buffers. */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 1eea80d0648e..0efbcdda9aab 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3574,7 +3574,7 @@ EXPORT_SYMBOL(console_resume); static int unregister_console_locked(struct console *console); /* True when system boot is far enough to create printer threads. */ -static bool printk_kthreads_ready __ro_after_init; +bool printk_kthreads_ready __ro_after_init; static struct task_struct *printk_legacy_kthread; @@ -3713,6 +3713,7 @@ static void printk_kthreads_check_locked(void) if (!printk_kthreads_ready) return; + /* Start or stop the legacy kthread when needed. */ if (have_legacy_console || have_boot_console) { if (!printk_legacy_kthread && force_legacy_kthread() && @@ -4204,14 +4205,6 @@ static int unregister_console_locked(struct console *console) */ synchronize_srcu(&console_srcu); - if (console->flags & CON_NBCON) - nbcon_free(console); - - console_sysfs_notify(); - - if (console->exit) - res = console->exit(console); - /* * With this console gone, the global flags tracking registered * console types may have changed. Update them. @@ -4232,6 +4225,15 @@ static int unregister_console_locked(struct console *console) if (!found_nbcon_con) have_nbcon_console = found_nbcon_con; + /* @have_nbcon_console must be updated before calling nbcon_free(). */ + if (console->flags & CON_NBCON) + nbcon_free(console); + + console_sysfs_notify(); + + if (console->exit) + res = console->exit(console); + /* Changed console list, may require printer threads to start/stop. */ printk_kthreads_check_locked(); From 99d4a6e5c24fc05fc56a33d9d24e89720bfd5665 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 9 Jun 2025 15:30:39 +0100 Subject: [PATCH 0050/2411] MAINTAINERS: Remove Sanyog Kale as reviewer on SoundWire The given email address for Sanyog is no longer valid and bounces, so remove as a reviewer for now and he can add back with a new email if needed. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20250609143041.495049-2-ckeepax@opensource.cirrus.com Signed-off-by: Vinod Koul --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..93511f54492d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23279,7 +23279,6 @@ SOUNDWIRE SUBSYSTEM M: Vinod Koul M: Bard Liao R: Pierre-Louis Bossart -R: Sanyog Kale L: linux-sound@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/soundwire.git From ccb7bb13c00bcc3178d270da052635c56148bc16 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 9 Jun 2025 15:30:40 +0100 Subject: [PATCH 0051/2411] soundwire: Move handle_nested_irq outside of sdw_dev_lock The sdw_dev_lock protects the SoundWire driver callbacks against the probed flag, which is used to skip the callbacks if the driver gets removed. For more information see commit bd29c00edd0a ("soundwire: revisit driver bind/unbind and callbacks"). However, this lock is a frequent source of mutex inversions. Many audio operations eventually hit the hardware resulting in a SoundWire callback, this means that typically the driver has the locking order ALSA/ASoC locks -> sdw_dev_lock. Conversely, the IRQ comes in directly from the SoundWire hardware, but then will often want to access ALSA/ASoC, such as updating something in DAPM or an ALSA control. This gives the other lock order sdw_dev_lock -> ALSA/ASoC locks. When the IRQ handling was initially added to SoundWire this was through a callback mechanism. As such it required being covered by the lock because the callbacks are part of the sdw_driver structure and are thus present regardless of if the driver is currently probed. Since then a newer mechanism using the IRQ framework has been added, which is currently covered by the same lock but this isn't actually required. Handlers for the IRQ framework are registered in probe and should by released during remove, thus the IRQ framework will have already unbound the IRQ before the slave driver is removed. Avoid the aforementioned mutex inversion by moving the handle_nested_irq call outside of the sdw_dev_lock. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20250609143041.495049-3-ckeepax@opensource.cirrus.com Signed-off-by: Vinod Koul --- drivers/soundwire/bus.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index 68db4b67a86f..4fd5cac799c5 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -1753,15 +1753,15 @@ static int sdw_handle_slave_alerts(struct sdw_slave *slave) /* Update the Slave driver */ if (slave_notify) { + if (slave->prop.use_domain_irq && slave->irq) + handle_nested_irq(slave->irq); + mutex_lock(&slave->sdw_dev_lock); if (slave->probed) { struct device *dev = &slave->dev; struct sdw_driver *drv = drv_to_sdw_driver(dev->driver); - if (slave->prop.use_domain_irq && slave->irq) - handle_nested_irq(slave->irq); - if (drv->ops && drv->ops->interrupt_callback) { slave_intr.sdca_cascade = sdca_cascade; slave_intr.control_port = clear; From 0cbce868fffaf115a26d6cb45516627cf13cc3d2 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 9 Jun 2025 15:30:41 +0100 Subject: [PATCH 0052/2411] ASoC: cs42l43: Remove unnecessary work functions Now the SoundWire IRQ lock has been changed in the core, it is no longer necessary to use a bunch of work functions to dodge mutex inversions. Signed-off-by: Charles Keepax Acked-by: Mark Brown Link: https://lore.kernel.org/r/20250609143041.495049-4-ckeepax@opensource.cirrus.com Signed-off-by: Vinod Koul --- sound/soc/codecs/cs42l43-jack.c | 46 +++++++++++++-------------------- sound/soc/codecs/cs42l43.c | 24 ++++------------- sound/soc/codecs/cs42l43.h | 5 ---- 3 files changed, 23 insertions(+), 52 deletions(-) diff --git a/sound/soc/codecs/cs42l43-jack.c b/sound/soc/codecs/cs42l43-jack.c index 6165ac16c3a9..72a4150709de 100644 --- a/sound/soc/codecs/cs42l43-jack.c +++ b/sound/soc/codecs/cs42l43-jack.c @@ -362,14 +362,15 @@ static void cs42l43_stop_button_detect(struct cs42l43_codec *priv) priv->button_detect_running = false; } +#define CS42L43_BUTTON_COMB_US 11000 #define CS42L43_BUTTON_COMB_MAX 512 #define CS42L43_BUTTON_ROUT 2210 -void cs42l43_button_press_work(struct work_struct *work) +irqreturn_t cs42l43_button_press(int irq, void *data) { - struct cs42l43_codec *priv = container_of(work, struct cs42l43_codec, - button_press_work.work); + struct cs42l43_codec *priv = data; struct cs42l43 *cs42l43 = priv->core; + irqreturn_t iret = IRQ_NONE; unsigned int buttons = 0; unsigned int val = 0; int i, ret; @@ -377,7 +378,7 @@ void cs42l43_button_press_work(struct work_struct *work) ret = pm_runtime_resume_and_get(priv->dev); if (ret) { dev_err(priv->dev, "Failed to resume for button press: %d\n", ret); - return; + return iret; } mutex_lock(&priv->jack_lock); @@ -387,6 +388,9 @@ void cs42l43_button_press_work(struct work_struct *work) goto error; } + // Wait for 2 full cycles of comb filter to ensure good reading + usleep_range(2 * CS42L43_BUTTON_COMB_US, 2 * CS42L43_BUTTON_COMB_US + 50); + regmap_read(cs42l43->regmap, CS42L43_DETECT_STATUS_1, &val); /* Bail if jack removed, the button is irrelevant and likely invalid */ @@ -420,34 +424,27 @@ void cs42l43_button_press_work(struct work_struct *work) snd_soc_jack_report(priv->jack_hp, buttons, CS42L43_JACK_BUTTONS); + iret = IRQ_HANDLED; + error: mutex_unlock(&priv->jack_lock); pm_runtime_mark_last_busy(priv->dev); pm_runtime_put_autosuspend(priv->dev); + + return iret; } -irqreturn_t cs42l43_button_press(int irq, void *data) +irqreturn_t cs42l43_button_release(int irq, void *data) { struct cs42l43_codec *priv = data; - - // Wait for 2 full cycles of comb filter to ensure good reading - queue_delayed_work(system_wq, &priv->button_press_work, - msecs_to_jiffies(20)); - - return IRQ_HANDLED; -} - -void cs42l43_button_release_work(struct work_struct *work) -{ - struct cs42l43_codec *priv = container_of(work, struct cs42l43_codec, - button_release_work); + irqreturn_t iret = IRQ_NONE; int ret; ret = pm_runtime_resume_and_get(priv->dev); if (ret) { dev_err(priv->dev, "Failed to resume for button release: %d\n", ret); - return; + return iret; } mutex_lock(&priv->jack_lock); @@ -456,6 +453,8 @@ void cs42l43_button_release_work(struct work_struct *work) dev_dbg(priv->dev, "Button release IRQ\n"); snd_soc_jack_report(priv->jack_hp, 0, CS42L43_JACK_BUTTONS); + + iret = IRQ_HANDLED; } else { dev_dbg(priv->dev, "Spurious button release IRQ\n"); } @@ -464,15 +463,8 @@ void cs42l43_button_release_work(struct work_struct *work) pm_runtime_mark_last_busy(priv->dev); pm_runtime_put_autosuspend(priv->dev); -} -irqreturn_t cs42l43_button_release(int irq, void *data) -{ - struct cs42l43_codec *priv = data; - - queue_work(system_wq, &priv->button_release_work); - - return IRQ_HANDLED; + return iret; } void cs42l43_bias_sense_timeout(struct work_struct *work) @@ -787,8 +779,6 @@ irqreturn_t cs42l43_tip_sense(int irq, void *data) cancel_delayed_work(&priv->bias_sense_timeout); cancel_delayed_work(&priv->tip_sense_work); - cancel_delayed_work(&priv->button_press_work); - cancel_work(&priv->button_release_work); // Ensure delay after suspend is long enough to avoid false detection if (priv->suspend_jack_debounce) diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index ea84ac64c775..41a0f4529ea1 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -167,13 +167,14 @@ static void cs42l43_hp_ilimit_clear_work(struct work_struct *work) snd_soc_dapm_mutex_unlock(dapm); } -static void cs42l43_hp_ilimit_work(struct work_struct *work) +static irqreturn_t cs42l43_hp_ilimit(int irq, void *data) { - struct cs42l43_codec *priv = container_of(work, struct cs42l43_codec, - hp_ilimit_work); + struct cs42l43_codec *priv = data; struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(priv->component); struct cs42l43 *cs42l43 = priv->core; + dev_dbg(priv->dev, "headphone ilimit IRQ\n"); + snd_soc_dapm_mutex_lock(dapm); if (priv->hp_ilimit_count < CS42L43_HP_ILIMIT_MAX_COUNT) { @@ -183,7 +184,7 @@ static void cs42l43_hp_ilimit_work(struct work_struct *work) priv->hp_ilimit_count++; snd_soc_dapm_mutex_unlock(dapm); - return; + return IRQ_HANDLED; } dev_err(priv->dev, "Disabling headphone for %dmS, due to frequent current limit\n", @@ -218,15 +219,6 @@ static void cs42l43_hp_ilimit_work(struct work_struct *work) priv->hp_ilimited = false; snd_soc_dapm_mutex_unlock(dapm); -} - -static irqreturn_t cs42l43_hp_ilimit(int irq, void *data) -{ - struct cs42l43_codec *priv = data; - - dev_dbg(priv->dev, "headphone ilimit IRQ\n"); - - queue_work(system_long_wq, &priv->hp_ilimit_work); return IRQ_HANDLED; } @@ -2159,10 +2151,7 @@ static void cs42l43_component_remove(struct snd_soc_component *component) cancel_delayed_work_sync(&priv->bias_sense_timeout); cancel_delayed_work_sync(&priv->tip_sense_work); - cancel_delayed_work_sync(&priv->button_press_work); - cancel_work_sync(&priv->button_release_work); - cancel_work_sync(&priv->hp_ilimit_work); cancel_delayed_work_sync(&priv->hp_ilimit_clear_work); priv->component = NULL; @@ -2314,10 +2303,7 @@ static int cs42l43_codec_probe(struct platform_device *pdev) INIT_DELAYED_WORK(&priv->tip_sense_work, cs42l43_tip_sense_work); INIT_DELAYED_WORK(&priv->bias_sense_timeout, cs42l43_bias_sense_timeout); - INIT_DELAYED_WORK(&priv->button_press_work, cs42l43_button_press_work); INIT_DELAYED_WORK(&priv->hp_ilimit_clear_work, cs42l43_hp_ilimit_clear_work); - INIT_WORK(&priv->button_release_work, cs42l43_button_release_work); - INIT_WORK(&priv->hp_ilimit_work, cs42l43_hp_ilimit_work); pm_runtime_set_autosuspend_delay(priv->dev, 100); pm_runtime_use_autosuspend(priv->dev); diff --git a/sound/soc/codecs/cs42l43.h b/sound/soc/codecs/cs42l43.h index 1cd9d8a71c43..3ea36362b11a 100644 --- a/sound/soc/codecs/cs42l43.h +++ b/sound/soc/codecs/cs42l43.h @@ -88,8 +88,6 @@ struct cs42l43_codec { struct delayed_work tip_sense_work; struct delayed_work bias_sense_timeout; - struct delayed_work button_press_work; - struct work_struct button_release_work; struct completion type_detect; struct completion load_detect; @@ -99,7 +97,6 @@ struct cs42l43_codec { int jack_override; bool suspend_jack_debounce; - struct work_struct hp_ilimit_work; struct delayed_work hp_ilimit_clear_work; bool hp_ilimited; int hp_ilimit_count; @@ -134,8 +131,6 @@ int cs42l43_set_jack(struct snd_soc_component *component, struct snd_soc_jack *jack, void *d); void cs42l43_bias_sense_timeout(struct work_struct *work); void cs42l43_tip_sense_work(struct work_struct *work); -void cs42l43_button_press_work(struct work_struct *work); -void cs42l43_button_release_work(struct work_struct *work); irqreturn_t cs42l43_bias_detect_clamp(int irq, void *data); irqreturn_t cs42l43_button_press(int irq, void *data); irqreturn_t cs42l43_button_release(int irq, void *data); From 5b8c1f39b5e46505cf9cf7775759a9e9c2bfc2d9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 23 May 2025 10:53:17 +0200 Subject: [PATCH 0053/2411] soundwire: qcom: demote probe registration printk Driver should generally by quiet on successful probe. Demote the Qualcomm controller registration info message to debug level to make boot logs a little less noisy: qcom-soundwire 6ab0000.soundwire: Qualcomm Soundwire controller v2.0.0 Registered qcom-soundwire 6ad0000.soundwire: Qualcomm Soundwire controller v2.0.0 Registered qcom-soundwire 6b10000.soundwire: Qualcomm Soundwire controller v2.0.0 Registered qcom-soundwire 6d30000.soundwire: Qualcomm Soundwire controller v2.0.0 Registered Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Acked-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20250523085317.11439-1-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/soundwire/qcom.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index 295a46dc2be7..3265c39e6b51 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -1648,9 +1648,9 @@ static int qcom_swrm_probe(struct platform_device *pdev) if (ret) goto err_master_add; - dev_info(dev, "Qualcomm Soundwire controller v%x.%x.%x Registered\n", - (ctrl->version >> 24) & 0xff, (ctrl->version >> 16) & 0xff, - ctrl->version & 0xffff); + dev_dbg(dev, "Qualcomm Soundwire controller v%x.%x.%x registered\n", + (ctrl->version >> 24) & 0xff, (ctrl->version >> 16) & 0xff, + ctrl->version & 0xffff); pm_runtime_set_autosuspend_delay(dev, 3000); pm_runtime_use_autosuspend(dev); From 03837341790039d6f1cbf7a1ae7dfa2cb77ef0a4 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 30 May 2025 11:13:40 +0530 Subject: [PATCH 0054/2411] soundwire: amd: serialize amd manager resume sequence during pm_prepare During pm_prepare callback, pm_request_resume() delays SoundWire manager D0 entry sequence. Synchronize runtime resume sequence for amd_manager instance prior to invoking child devices resume sequence for both the amd power modes(ClockStop Mode and Power off mode). Change the power_mode_mask check and use pm_runtime_resume() in amd_pm_prepare() callback. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20250530054447.1645807-3-Vijendar.Mukunda@amd.com Signed-off-by: Vinod Koul --- drivers/soundwire/amd_manager.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index a12c68b93b1c..c833b3096255 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -1178,10 +1178,10 @@ static int __maybe_unused amd_pm_prepare(struct device *dev) * device is not in runtime suspend state, observed that device alerts are missing * without pm_prepare on AMD platforms in clockstop mode0. */ - if (amd_manager->power_mode_mask & AMD_SDW_CLK_STOP_MODE) { - ret = pm_request_resume(dev); + if (amd_manager->power_mode_mask) { + ret = pm_runtime_resume(dev); if (ret < 0) { - dev_err(bus->dev, "pm_request_resume failed: %d\n", ret); + dev_err(bus->dev, "pm_runtime_resume failed: %d\n", ret); return 0; } } From f93b697ed98e3c85d1973ea170d4f4e7a6b2b45d Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 30 May 2025 11:13:41 +0530 Subject: [PATCH 0055/2411] soundwire: amd: cancel pending slave status handling workqueue during remove sequence During remove sequence, cancel the pending slave status update workqueue. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20250530054447.1645807-4-Vijendar.Mukunda@amd.com Signed-off-by: Vinod Koul --- drivers/soundwire/amd_manager.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index c833b3096255..9a767704b603 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -1074,6 +1074,7 @@ static void amd_sdw_manager_remove(struct platform_device *pdev) int ret; pm_runtime_disable(&pdev->dev); + cancel_work_sync(&amd_manager->amd_sdw_work); amd_disable_sdw_interrupts(amd_manager); sdw_bus_master_delete(&amd_manager->bus); ret = amd_disable_sdw_manager(amd_manager); From fdf5596103455c62ab84293ddd95d9bf16f6519a Mon Sep 17 00:00:00 2001 From: Yumeng Fang Date: Fri, 23 May 2025 14:19:10 +0800 Subject: [PATCH 0056/2411] soundwire: intel_ace2.x: Use str_read_write() helper Remove hard-coded strings by using the str_read_write() helper. Signed-off-by: Yumeng Fang Signed-off-by: Yunjian Long Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20250523141910793yUFpjomfu0byK_yFddHQu@zte.com.cn Signed-off-by: Vinod Koul --- drivers/soundwire/intel_ace2x.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/soundwire/intel_ace2x.c b/drivers/soundwire/intel_ace2x.c index 5b31e1f69591..7e893e4f48d6 100644 --- a/drivers/soundwire/intel_ace2x.c +++ b/drivers/soundwire/intel_ace2x.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -183,7 +184,7 @@ static int intel_ace2x_bpt_open_stream(struct sdw_intel *sdw, struct sdw_slave * return 0; dev_err(cdns->dev, "%s: sdw_prepare_%s_dma_buffer failed %d\n", - __func__, command ? "read" : "write", ret); + __func__, str_read_write(command), ret); ret1 = hda_sdw_bpt_close(cdns->dev->parent, /* PCI device */ sdw->bpt_ctx.bpt_tx_stream, &sdw->bpt_ctx.dmab_tx_bdl, From 393350c1691f1cbf3a0436f2a12c2b4347c4e953 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 29 Apr 2025 20:23:37 +0800 Subject: [PATCH 0057/2411] soundwire: update Intel BPT message length limitation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The limitation of "must be multiples of 32 bytes" does not fit the requirement of current Intel platforms. Update it to meet the requirement. Signed-off-by: Bard Liao Reviewed-by: Ranjani Sridharan Reviewed-by: Péter Ujfalusi Link: https://lore.kernel.org/r/20250429122337.142551-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- Documentation/driver-api/soundwire/bra.rst | 2 +- drivers/soundwire/intel_ace2x.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/driver-api/soundwire/bra.rst b/Documentation/driver-api/soundwire/bra.rst index 8500253fa3e8..c08ab2591496 100644 --- a/Documentation/driver-api/soundwire/bra.rst +++ b/Documentation/driver-api/soundwire/bra.rst @@ -333,4 +333,4 @@ FIFO sizes to avoid xruns. Alignment requirements are currently not enforced at the core level but at the platform-level, e.g. for Intel the data sizes must be -multiples of 32 bytes. +equal to or larger than 16 bytes. diff --git a/drivers/soundwire/intel_ace2x.c b/drivers/soundwire/intel_ace2x.c index 7e893e4f48d6..5d08364ad6d1 100644 --- a/drivers/soundwire/intel_ace2x.c +++ b/drivers/soundwire/intel_ace2x.c @@ -246,7 +246,7 @@ static void intel_ace2x_bpt_close_stream(struct sdw_intel *sdw, struct sdw_slave cdns->bus.bpt_stream = NULL; } -#define INTEL_BPT_MSG_BYTE_ALIGNMENT 32 +#define INTEL_BPT_MSG_BYTE_MIN 16 static int intel_ace2x_bpt_send_async(struct sdw_intel *sdw, struct sdw_slave *slave, struct sdw_bpt_msg *msg) @@ -254,9 +254,9 @@ static int intel_ace2x_bpt_send_async(struct sdw_intel *sdw, struct sdw_slave *s struct sdw_cdns *cdns = &sdw->cdns; int ret; - if (msg->len % INTEL_BPT_MSG_BYTE_ALIGNMENT) { - dev_err(cdns->dev, "BPT message length %d is not a multiple of %d bytes\n", - msg->len, INTEL_BPT_MSG_BYTE_ALIGNMENT); + if (msg->len < INTEL_BPT_MSG_BYTE_MIN) { + dev_err(cdns->dev, "BPT message length %d is less than the minimum bytes %d\n", + msg->len, INTEL_BPT_MSG_BYTE_MIN); return -EINVAL; } From 189a977e4dc011b05aa1fee044d1a98cf904341b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 4 Jun 2025 10:48:35 -0700 Subject: [PATCH 0058/2411] perf bpf-filter: Improve error messages The BPF filter needs libbpf/BPF-skeleton support and root privilege. Add error messages to help users understand the problem easily. When it's not build with BPF support (make BUILD_BPF_SKEL=0). $ sudo perf record -e cycles --filter "pid != 0" true Error: BPF filter is requested but perf is not built with BPF. Please make sure to build with libbpf and BPF skeleton. Usage: perf record [] [] or: perf record [] -- [] --filter event filter When it supports BPF but runs without root or CAP_BPF. Note that it also checks pinned BPF filters. $ perf record -e cycles --filter "pid != 0" -o /dev/null true Error: BPF filter only works for users with the CAP_BPF capability! Please run 'perf record --setup-filter pin' as root first. Usage: perf record [] [] or: perf record [] -- [] --filter event filter Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250604174835.1852481-1-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/util/bpf-filter.c | 28 ++++++++++++++++++++++++++++ tools/perf/util/bpf-filter.h | 3 +++ tools/perf/util/cap.c | 1 - tools/perf/util/cap.h | 5 +++++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c index a4fdf6911ec1..92e2f054b45e 100644 --- a/tools/perf/util/bpf-filter.c +++ b/tools/perf/util/bpf-filter.c @@ -52,6 +52,7 @@ #include #include +#include "util/cap.h" #include "util/debug.h" #include "util/evsel.h" #include "util/target.h" @@ -618,11 +619,38 @@ struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(enum perf_bpf_filter_term return expr; } +static bool check_bpf_filter_capable(void) +{ + bool used_root; + + if (perf_cap__capable(CAP_BPF, &used_root)) + return true; + + if (!used_root) { + /* Check if root already pinned the filter programs and maps */ + int fd = get_pinned_fd("filters"); + + if (fd >= 0) { + close(fd); + return true; + } + } + + pr_err("Error: BPF filter only works for %s!\n" + "\tPlease run 'perf record --setup-filter pin' as root first.\n", + used_root ? "root" : "users with the CAP_BPF capability"); + + return false; +} + int perf_bpf_filter__parse(struct list_head *expr_head, const char *str) { YY_BUFFER_STATE buffer; int ret; + if (!check_bpf_filter_capable()) + return -EPERM; + buffer = perf_bpf_filter__scan_string(str); ret = perf_bpf_filter_parse(expr_head); diff --git a/tools/perf/util/bpf-filter.h b/tools/perf/util/bpf-filter.h index 916ed7770b73..122477f2de44 100644 --- a/tools/perf/util/bpf-filter.h +++ b/tools/perf/util/bpf-filter.h @@ -5,6 +5,7 @@ #include #include "bpf_skel/sample-filter.h" +#include "util/debug.h" struct perf_bpf_filter_expr { struct list_head list; @@ -38,6 +39,8 @@ int perf_bpf_filter__unpin(void); static inline int perf_bpf_filter__parse(struct list_head *expr_head __maybe_unused, const char *str __maybe_unused) { + pr_err("Error: BPF filter is requested but perf is not built with BPF.\n" + "\tPlease make sure to build with libbpf and BPF skeleton.\n"); return -EOPNOTSUPP; } static inline int perf_bpf_filter__prepare(struct evsel *evsel __maybe_unused, diff --git a/tools/perf/util/cap.c b/tools/perf/util/cap.c index 69d9a2bcd40b..24a0ea7e6d97 100644 --- a/tools/perf/util/cap.c +++ b/tools/perf/util/cap.c @@ -7,7 +7,6 @@ #include "debug.h" #include #include -#include #include #include diff --git a/tools/perf/util/cap.h b/tools/perf/util/cap.h index 0c6a1ff55f07..c1b8ac033ccc 100644 --- a/tools/perf/util/cap.h +++ b/tools/perf/util/cap.h @@ -3,6 +3,7 @@ #define __PERF_CAP_H #include +#include /* For older systems */ #ifndef CAP_SYSLOG @@ -13,6 +14,10 @@ #define CAP_PERFMON 38 #endif +#ifndef CAP_BPF +#define CAP_BPF 39 +#endif + /* Query if a capability is supported, used_root is set if the fallback root check was used. */ bool perf_cap__capable(int cap, bool *used_root); From 8b99e2f7a95297da80b0b7167a8c8327b65c019e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 4 Jun 2025 10:45:35 -0700 Subject: [PATCH 0059/2411] perf parse-events filter: Use evsel__find_pmu Rather than manually scanning PMUs, use evsel__find_pmu that can use the PMU set during event parsing. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250604174545.2853620-2-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/parse-events.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 2380de56a207..d96adf23dc94 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2564,9 +2564,8 @@ foreach_evsel_in_last_glob(struct evlist *evlist, static int set_filter(struct evsel *evsel, const void *arg) { const char *str = arg; - bool found = false; int nr_addr_filters = 0; - struct perf_pmu *pmu = NULL; + struct perf_pmu *pmu; if (evsel == NULL) { fprintf(stderr, @@ -2584,16 +2583,11 @@ static int set_filter(struct evsel *evsel, const void *arg) return 0; } - while ((pmu = perf_pmus__scan(pmu)) != NULL) - if (pmu->type == evsel->core.attr.type) { - found = true; - break; - } - - if (found) + pmu = evsel__find_pmu(evsel); + if (pmu) { perf_pmu__scan_file(pmu, "nr_addr_filters", "%d", &nr_addr_filters); - + } if (!nr_addr_filters) return perf_bpf_filter__parse(&evsel->bpf_filters, str); From 5ddf4c3a17dc499fcbaf35692bc894340062dee8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 4 Jun 2025 10:45:36 -0700 Subject: [PATCH 0060/2411] perf target: Separate parse_uid into its own function Allow parse_uid to be called without a struct target. Rather than have two errors, remove TARGET_ERRNO__USER_NOT_FOUND and use TARGET_ERRNO__INVALID_UID as the handling is identical. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250604174545.2853620-3-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/target.c | 22 ++++++++++++---------- tools/perf/util/target.h | 3 ++- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index 0f383418e3df..f3ad59ccfa99 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -94,15 +94,13 @@ enum target_errno target__validate(struct target *target) return ret; } -enum target_errno target__parse_uid(struct target *target) +uid_t parse_uid(const char *str) { struct passwd pwd, *result; char buf[1024]; - const char *str = target->uid_str; - target->uid = UINT_MAX; if (str == NULL) - return TARGET_ERRNO__SUCCESS; + return UINT_MAX; /* Try user name first */ getpwnam_r(str, &pwd, buf, sizeof(buf), &result); @@ -115,16 +113,22 @@ enum target_errno target__parse_uid(struct target *target) int uid = strtol(str, &endptr, 10); if (*endptr != '\0') - return TARGET_ERRNO__INVALID_UID; + return UINT_MAX; getpwuid_r(uid, &pwd, buf, sizeof(buf), &result); if (result == NULL) - return TARGET_ERRNO__USER_NOT_FOUND; + return UINT_MAX; } - target->uid = result->pw_uid; - return TARGET_ERRNO__SUCCESS; + return result->pw_uid; +} + +enum target_errno target__parse_uid(struct target *target) +{ + target->uid = parse_uid(target->uid_str); + + return target->uid != UINT_MAX ? TARGET_ERRNO__SUCCESS : TARGET_ERRNO__INVALID_UID; } /* @@ -142,7 +146,6 @@ static const char *target__error_str[] = { "BPF switch overriding UID", "BPF switch overriding THREAD", "Invalid User: %s", - "Problems obtaining information for user %s", }; int target__strerror(struct target *target, int errnum, @@ -171,7 +174,6 @@ int target__strerror(struct target *target, int errnum, break; case TARGET_ERRNO__INVALID_UID: - case TARGET_ERRNO__USER_NOT_FOUND: snprintf(buf, buflen, msg, target->uid_str); break; diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index 2ee2cc30340f..e082bda990fb 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -48,12 +48,13 @@ enum target_errno { /* for target__parse_uid() */ TARGET_ERRNO__INVALID_UID, - TARGET_ERRNO__USER_NOT_FOUND, __TARGET_ERRNO__END, }; enum target_errno target__validate(struct target *target); + +uid_t parse_uid(const char *str); enum target_errno target__parse_uid(struct target *target); int target__strerror(struct target *target, int errnum, char *buf, size_t buflen); From 466db4275edd35b7a9af7c82575bcb3289f2c9c0 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 4 Jun 2025 10:45:37 -0700 Subject: [PATCH 0061/2411] perf parse-events: Add parse_uid_filter helper Add parse_uid_filter filter as a helper to parse_filter, that constructs a uid filter string. As uid filters don't work with tracepoint filters, add a is_possible_tp_filter function so the tracepoint filter isn't attempted for tracepoint evsels. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250604174545.2853620-4-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/parse-events.c | 33 ++++++++++++++++++++++++++++++++- tools/perf/util/parse-events.h | 2 ++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d96adf23dc94..7f34e602fc08 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -25,6 +25,7 @@ #include "pmu.h" #include "pmus.h" #include "asm/bug.h" +#include "ui/ui.h" #include "util/parse-branch-options.h" #include "util/evsel_config.h" #include "util/event.h" @@ -2561,6 +2562,12 @@ foreach_evsel_in_last_glob(struct evlist *evlist, return 0; } +/* Will a tracepoint filter work for str or should a BPF filter be used? */ +static bool is_possible_tp_filter(const char *str) +{ + return strstr(str, "uid") == NULL; +} + static int set_filter(struct evsel *evsel, const void *arg) { const char *str = arg; @@ -2573,7 +2580,7 @@ static int set_filter(struct evsel *evsel, const void *arg) return -1; } - if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) { + if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT && is_possible_tp_filter(str)) { if (evsel__append_tp_filter(evsel, str) < 0) { fprintf(stderr, "not enough memory to hold filter string\n"); @@ -2609,6 +2616,30 @@ int parse_filter(const struct option *opt, const char *str, (const void *)str); } +int parse_uid_filter(struct evlist *evlist, uid_t uid) +{ + struct option opt = { + .value = &evlist, + }; + char buf[128]; + int ret; + + snprintf(buf, sizeof(buf), "uid == %d", uid); + ret = parse_filter(&opt, buf, /*unset=*/0); + if (ret) { + if (use_browser >= 1) { + /* + * Use ui__warning so a pop up appears above the + * underlying BPF error message. + */ + ui__warning("Failed to add UID filtering that uses BPF filtering.\n"); + } else { + fprintf(stderr, "Failed to add UID filtering that uses BPF filtering.\n"); + } + } + return ret; +} + static int add_exclude_perf_filter(struct evsel *evsel, const void *arg __maybe_unused) { diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index ab242f671031..1c20ed0879aa 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -11,6 +11,7 @@ #include #include #include +#include struct evsel; struct evlist; @@ -45,6 +46,7 @@ static inline int parse_events(struct evlist *evlist, const char *str, int parse_event(struct evlist *evlist, const char *str); int parse_filter(const struct option *opt, const char *str, int unset); +int parse_uid_filter(struct evlist *evlist, uid_t uid); int exclude_perf(const struct option *opt, const char *arg, int unset); enum parse_events__term_val_type { From 1151208e702267ad1ce2f24aa9d21deb47fa17f9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 4 Jun 2025 10:45:38 -0700 Subject: [PATCH 0062/2411] perf record: Switch user option to use BPF filter Finding user processes by scanning /proc is inherently racy and results in perf_event_open failures. Use a BPF filter to drop samples where the uid doesn't match. Ensure adding the BPF filter forces system-wide. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250604174545.2853620-5-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-record.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 8059bce85a51..0b566f300569 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -175,6 +175,7 @@ struct record { bool timestamp_boundary; bool off_cpu; const char *filter_action; + const char *uid_str; struct switch_output switch_output; unsigned long long samples; unsigned long output_max_size; /* = 0: unlimited */ @@ -3513,8 +3514,7 @@ static struct option __record_options[] = { "or ranges of time to enable events e.g. '-D 10-20,30-40'", record__parse_event_enable_time), OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), - OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", - "user to profile"), + OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, "branch any", "sample any taken branches", @@ -4256,19 +4256,24 @@ int cmd_record(int argc, const char **argv) ui__warning("%s\n", errbuf); } - err = target__parse_uid(&rec->opts.target); - if (err) { - int saved_errno = errno; + if (rec->uid_str) { + uid_t uid = parse_uid(rec->uid_str); - target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); - ui__error("%s", errbuf); + if (uid == UINT_MAX) { + ui__error("Invalid User: %s", rec->uid_str); + err = -EINVAL; + goto out; + } + err = parse_uid_filter(rec->evlist, uid); + if (err) + goto out; - err = -saved_errno; - goto out; + /* User ID filtering implies system wide. */ + rec->opts.target.system_wide = true; } - /* Enable ignoring missing threads when -u/-p option is defined. */ - rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; + /* Enable ignoring missing threads when -p option is defined. */ + rec->opts.ignore_missing_thread = rec->opts.target.pid; evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list); From c54e2f82721aadd59d2a354ae2b5cc32d32047d9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 4 Jun 2025 10:45:39 -0700 Subject: [PATCH 0063/2411] perf tests record: Add basic uid filtering test Based on the system-wide test with changes around how failure is handled as BPF permissions are a bigger issue than perf event paranoia. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250604174545.2853620-6-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/record.sh | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 587f62e34414..2022a4f739be 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -231,6 +231,31 @@ test_cgroup() { echo "Cgroup sampling test [Success]" } +test_uid() { + echo "Uid sampling test" + if ! perf record -aB --synth=no --uid "$(id -u)" -o "${perfdata}" ${testprog} \ + > "${script_output}" 2>&1 + then + if grep -q "libbpf.*EPERM" "${script_output}" + then + echo "Uid sampling [Skipped permissions]" + return + else + echo "Uid sampling [Failed to record]" + err=1 + # cat "${script_output}" + return + fi + fi + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" + then + echo "Uid sampling [Failed missing output]" + err=1 + return + fi + echo "Uid sampling test [Success]" +} + test_leader_sampling() { echo "Basic leader sampling test" if ! perf record -o "${perfdata}" -e "{cycles,cycles}:Su" -- \ @@ -345,6 +370,7 @@ test_system_wide test_workload test_branch_counter test_cgroup +test_uid test_leader_sampling test_topdown_leader_sampling test_precise_max From 38f83cc9ab8f74732de66044d1a126ca46347eea Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 4 Jun 2025 10:45:40 -0700 Subject: [PATCH 0064/2411] perf top: Switch user option to use BPF filter Finding user processes by scanning /proc is inherently racy and results in perf_event_open failures. Use a BPF filter to drop samples where the uid doesn't match. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250604174545.2853620-7-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-top.c | 22 ++++++++++++---------- tools/perf/util/top.c | 4 ++-- tools/perf/util/top.h | 1 + 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 7b6cde87d2af..051ded5ba9ba 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -643,7 +643,7 @@ static void *display_thread_tui(void *arg) */ evlist__for_each_entry(top->evlist, pos) { struct hists *hists = evsel__hists(pos); - hists->uid_filter_str = top->record_opts.target.uid_str; + hists->uid_filter_str = top->uid_str; } ret = evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent, @@ -1571,7 +1571,7 @@ int cmd_top(int argc, const char **argv) "Add prefix to source file path names in programs (with --prefix-strip)"), OPT_STRING(0, "prefix-strip", &annotate_opts.prefix_strip, "N", "Strip first N entries of source file path name in programs (with --prefix)"), - OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"), + OPT_STRING('u', "uid", &top.uid_str, "user", "user to profile"), OPT_CALLBACK(0, "percent-limit", &top, "percent", "Don't show entries under that percent", parse_percent_limit), OPT_CALLBACK(0, "percentage", NULL, "relative|absolute", @@ -1762,15 +1762,17 @@ int cmd_top(int argc, const char **argv) ui__warning("%s\n", errbuf); } - status = target__parse_uid(target); - if (status) { - int saved_errno = errno; + if (top.uid_str) { + uid_t uid = parse_uid(top.uid_str); - target__strerror(target, status, errbuf, BUFSIZ); - ui__error("%s\n", errbuf); - - status = -saved_errno; - goto out_delete_evlist; + if (uid == UINT_MAX) { + ui__error("Invalid User: %s", top.uid_str); + status = -EINVAL; + goto out_delete_evlist; + } + status = parse_uid_filter(top.evlist, uid); + if (status) + goto out_delete_evlist; } if (target__none(target)) diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 4db3d1bd686c..b06e10a116bb 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -88,9 +88,9 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) else if (target->tid) ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s", target->tid); - else if (target->uid_str != NULL) + else if (top->uid_str != NULL) ret += SNPRINTF(bf + ret, size - ret, " (uid: %s", - target->uid_str); + top->uid_str); else ret += SNPRINTF(bf + ret, size - ret, " (all"); diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 4c5588dbb131..04ff926846be 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -48,6 +48,7 @@ struct perf_top { const char *sym_filter; float min_percent; unsigned int nr_threads_synthesize; + const char *uid_str; struct { struct ordered_events *in; From bf1976dd28b4ec611d4f0bf5b0de40b1dd21b253 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 4 Jun 2025 10:45:41 -0700 Subject: [PATCH 0065/2411] perf trace: Switch user option to use BPF filter Finding user processes by scanning /proc is inherently racy and results in perf_event_open failures. Use a BPF filter to drop samples where the uid doesn't match. Ensure adding the BPF filter forces system-wide. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250604174545.2853620-8-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-trace.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 2ab1b8e05ad3..4bb062b96f51 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -236,6 +236,7 @@ struct trace { struct ordered_events data; u64 last; } oe; + const char *uid_str; }; static void trace__load_vmlinux_btf(struct trace *trace __maybe_unused) @@ -4412,8 +4413,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv) evlist__add(evlist, pgfault_min); } - /* Enable ignoring missing threads when -u/-p option is defined. */ - trace->opts.ignore_missing_thread = trace->opts.target.uid != UINT_MAX || trace->opts.target.pid; + /* Enable ignoring missing threads when -p option is defined. */ + trace->opts.ignore_missing_thread = trace->opts.target.pid; if (trace->sched && evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime)) @@ -5445,8 +5446,7 @@ int cmd_trace(int argc, const char **argv) "child tasks do not inherit counters"), OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages", "number of mmap data pages", evlist__parse_mmap_pages), - OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", - "user to profile"), + OPT_STRING('u', "uid", &trace.uid_str, "user", "user to profile"), OPT_CALLBACK(0, "duration", &trace, "float", "show only events with duration > N.M ms", trace__set_duration), @@ -5804,11 +5804,19 @@ int cmd_trace(int argc, const char **argv) goto out_close; } - err = target__parse_uid(&trace.opts.target); - if (err) { - target__strerror(&trace.opts.target, err, bf, sizeof(bf)); - fprintf(trace.output, "%s", bf); - goto out_close; + if (trace.uid_str) { + uid_t uid = parse_uid(trace.uid_str); + + if (uid == UINT_MAX) { + ui__error("Invalid User: %s", trace.uid_str); + err = -EINVAL; + goto out_close; + } + err = parse_uid_filter(trace.evlist, uid); + if (err) + goto out_close; + + trace.opts.target.system_wide = true; } if (!argc && target__none(&trace.opts.target)) From 278538ddf1af9f7a7fc0a983a23771083feda7f9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 4 Jun 2025 10:45:42 -0700 Subject: [PATCH 0066/2411] perf bench evlist-open-close: Switch user option to use BPF filter Finding user processes by scanning /proc is inherently racy and results in perf_event_open failures. Use a BPF filter to drop samples where the uid doesn't match. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250604174545.2853620-9-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/bench/evlist-open-close.c | 36 ++++++++++++++++------------ 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c index 79cedcf94a39..bfaf50e4e519 100644 --- a/tools/perf/bench/evlist-open-close.c +++ b/tools/perf/bench/evlist-open-close.c @@ -57,7 +57,7 @@ static int evlist__count_evsel_fds(struct evlist *evlist) return cnt; } -static struct evlist *bench__create_evlist(char *evstr) +static struct evlist *bench__create_evlist(char *evstr, const char *uid_str) { struct parse_events_error err; struct evlist *evlist = evlist__new(); @@ -78,6 +78,18 @@ static struct evlist *bench__create_evlist(char *evstr) goto out_delete_evlist; } parse_events_error__exit(&err); + if (uid_str) { + uid_t uid = parse_uid(uid_str); + + if (uid == UINT_MAX) { + pr_err("Invalid User: %s", uid_str); + ret = -EINVAL; + goto out_delete_evlist; + } + ret = parse_uid_filter(evlist, uid); + if (ret) + goto out_delete_evlist; + } ret = evlist__create_maps(evlist, &opts.target); if (ret < 0) { pr_err("Not enough memory to create thread/cpu maps\n"); @@ -117,10 +129,10 @@ static int bench__do_evlist_open_close(struct evlist *evlist) return 0; } -static int bench_evlist_open_close__run(char *evstr) +static int bench_evlist_open_close__run(char *evstr, const char *uid_str) { // used to print statistics only - struct evlist *evlist = bench__create_evlist(evstr); + struct evlist *evlist = bench__create_evlist(evstr, uid_str); double time_average, time_stddev; struct timeval start, end, diff; struct stats time_stats; @@ -142,7 +154,7 @@ static int bench_evlist_open_close__run(char *evstr) for (i = 0; i < iterations; i++) { pr_debug("Started iteration %d\n", i); - evlist = bench__create_evlist(evstr); + evlist = bench__create_evlist(evstr, uid_str); if (!evlist) return -ENOMEM; @@ -206,6 +218,7 @@ static char *bench__repeat_event_string(const char *evstr, int n) int bench_evlist_open_close(int argc, const char **argv) { + const char *uid_str = NULL; const struct option options[] = { OPT_STRING('e', "event", &event_string, "event", "event selector. use 'perf list' to list available events"), @@ -221,7 +234,7 @@ int bench_evlist_open_close(int argc, const char **argv) "record events on existing process id"), OPT_STRING('t', "tid", &opts.target.tid, "tid", "record events on existing thread id"), - OPT_STRING('u', "uid", &opts.target.uid_str, "user", "user to profile"), + OPT_STRING('u', "uid", &uid_str, "user", "user to profile"), OPT_BOOLEAN(0, "per-thread", &opts.target.per_thread, "use per-thread mmaps"), OPT_END() }; @@ -245,15 +258,8 @@ int bench_evlist_open_close(int argc, const char **argv) goto out; } - err = target__parse_uid(&opts.target); - if (err) { - target__strerror(&opts.target, err, errbuf, sizeof(errbuf)); - pr_err("%s", errbuf); - goto out; - } - - /* Enable ignoring missing threads when -u/-p option is defined. */ - opts.ignore_missing_thread = opts.target.uid != UINT_MAX || opts.target.pid; + /* Enable ignoring missing threads when -p option is defined. */ + opts.ignore_missing_thread = opts.target.pid; evstr = bench__repeat_event_string(event_string, nr_events); if (!evstr) { @@ -261,7 +267,7 @@ int bench_evlist_open_close(int argc, const char **argv) goto out; } - err = bench_evlist_open_close__run(evstr); + err = bench_evlist_open_close__run(evstr, uid_str); free(evstr); out: From b4c658d4d63d6149f4ba57c9c5c84b8a61aafa6f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 4 Jun 2025 10:45:43 -0700 Subject: [PATCH 0067/2411] perf target: Remove uid from target Gathering threads with a uid by scanning /proc is inherently racy leading to perf_event_open failures that quit perf. All users of the functionality now use BPF filters, so remove uid and uid_str from target. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250604174545.2853620-10-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-ftrace.c | 1 - tools/perf/builtin-kvm.c | 2 - tools/perf/builtin-stat.c | 4 +- tools/perf/builtin-trace.c | 1 - tools/perf/tests/backward-ring-buffer.c | 1 - tools/perf/tests/event-times.c | 4 +- tools/perf/tests/openat-syscall-tp-fields.c | 1 - tools/perf/tests/perf-record.c | 1 - tools/perf/tests/task-exit.c | 1 - tools/perf/util/bpf-filter.c | 2 +- tools/perf/util/evlist.c | 3 +- tools/perf/util/target.c | 46 +-------------------- tools/perf/util/target.h | 12 +----- 13 files changed, 6 insertions(+), 73 deletions(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index bba36ebc2aa7..3a253a1b9f45 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -1663,7 +1663,6 @@ int cmd_ftrace(int argc, const char **argv) int (*cmd_func)(struct perf_ftrace *) = NULL; struct perf_ftrace ftrace = { .tracer = DEFAULT_TRACER, - .target = { .uid = UINT_MAX, }, }; const struct option common_options[] = { OPT_STRING('p', "pid", &ftrace.target.pid, "pid", diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 67fd2b006b0b..d75bd3684980 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1871,8 +1871,6 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, kvm->opts.user_interval = 1; kvm->opts.mmap_pages = 512; kvm->opts.target.uses_mmap = false; - kvm->opts.target.uid_str = NULL; - kvm->opts.target.uid = UINT_MAX; symbol__init(NULL); disable_buildid_cache(); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index bf0e5e12d992..50fc53adb7e4 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -108,9 +108,7 @@ static struct parse_events_option_args parse_events_option_args = { static bool all_counters_use_bpf = true; -static struct target target = { - .uid = UINT_MAX, -}; +static struct target target; static volatile sig_atomic_t child_pid = -1; static int detailed_run = 0; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 4bb062b96f51..bf9b5d0630d3 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -5399,7 +5399,6 @@ int cmd_trace(int argc, const char **argv) struct trace trace = { .opts = { .target = { - .uid = UINT_MAX, .uses_mmap = true, }, .user_freq = UINT_MAX, diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 79a980b1e786..c5e7999f2817 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -91,7 +91,6 @@ static int test__backward_ring_buffer(struct test_suite *test __maybe_unused, in struct parse_events_error parse_error; struct record_opts opts = { .target = { - .uid = UINT_MAX, .uses_mmap = true, }, .freq = 0, diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index deefe5003bfc..2148024b4f4a 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -17,9 +17,7 @@ static int attach__enable_on_exec(struct evlist *evlist) { struct evsel *evsel = evlist__last(evlist); - struct target target = { - .uid = UINT_MAX, - }; + struct target target = {}; const char *argv[] = { "true", NULL, }; char sbuf[STRERR_BUFSIZE]; int err; diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 0ef4ba7c1571..2a139d2781a8 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -28,7 +28,6 @@ static int test__syscall_openat_tp_fields(struct test_suite *test __maybe_unused { struct record_opts opts = { .target = { - .uid = UINT_MAX, .uses_mmap = true, }, .no_buffering = true, diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 0958c7c8995f..0b3c37e66871 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -45,7 +45,6 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest { struct record_opts opts = { .target = { - .uid = UINT_MAX, .uses_mmap = true, }, .no_buffering = true, diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 8e328bbd509d..4053ff2813bb 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -46,7 +46,6 @@ static int test__task_exit(struct test_suite *test __maybe_unused, int subtest _ struct evsel *evsel; struct evlist *evlist; struct target target = { - .uid = UINT_MAX, .uses_mmap = true, }; const char *argv[] = { "true", NULL }; diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c index 92e2f054b45e..d0e013eeb0f7 100644 --- a/tools/perf/util/bpf-filter.c +++ b/tools/perf/util/bpf-filter.c @@ -450,7 +450,7 @@ int perf_bpf_filter__prepare(struct evsel *evsel, struct target *target) struct bpf_program *prog; struct bpf_link *link; struct perf_bpf_filter_entry *entry; - bool needs_idx_hash = !target__has_cpu(target) && !target->uid_str; + bool needs_idx_hash = !target__has_cpu(target); entry = calloc(MAX_FILTERS, sizeof(*entry)); if (entry == NULL) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index dcd1130502df..bed91bc88510 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1006,8 +1006,7 @@ int evlist__create_maps(struct evlist *evlist, struct target *target) * per-thread data. thread_map__new_str will call * thread_map__new_all_cpus to enumerate all threads. */ - threads = thread_map__new_str(target->pid, target->tid, target->uid, - all_threads); + threads = thread_map__new_str(target->pid, target->tid, UINT_MAX, all_threads); if (!threads) return -1; diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index f3ad59ccfa99..8cf71bea295a 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -28,20 +28,6 @@ enum target_errno target__validate(struct target *target) ret = TARGET_ERRNO__PID_OVERRIDE_CPU; } - /* UID and PID are mutually exclusive */ - if (target->tid && target->uid_str) { - target->uid_str = NULL; - if (ret == TARGET_ERRNO__SUCCESS) - ret = TARGET_ERRNO__PID_OVERRIDE_UID; - } - - /* UID and CPU are mutually exclusive */ - if (target->uid_str && target->cpu_list) { - target->cpu_list = NULL; - if (ret == TARGET_ERRNO__SUCCESS) - ret = TARGET_ERRNO__UID_OVERRIDE_CPU; - } - /* PID and SYSTEM are mutually exclusive */ if (target->tid && target->system_wide) { target->system_wide = false; @@ -49,13 +35,6 @@ enum target_errno target__validate(struct target *target) ret = TARGET_ERRNO__PID_OVERRIDE_SYSTEM; } - /* UID and SYSTEM are mutually exclusive */ - if (target->uid_str && target->system_wide) { - target->system_wide = false; - if (ret == TARGET_ERRNO__SUCCESS) - ret = TARGET_ERRNO__UID_OVERRIDE_SYSTEM; - } - /* BPF and CPU are mutually exclusive */ if (target->bpf_str && target->cpu_list) { target->cpu_list = NULL; @@ -70,13 +49,6 @@ enum target_errno target__validate(struct target *target) ret = TARGET_ERRNO__BPF_OVERRIDE_PID; } - /* BPF and UID are mutually exclusive */ - if (target->bpf_str && target->uid_str) { - target->uid_str = NULL; - if (ret == TARGET_ERRNO__SUCCESS) - ret = TARGET_ERRNO__BPF_OVERRIDE_UID; - } - /* BPF and THREADS are mutually exclusive */ if (target->bpf_str && target->per_thread) { target->per_thread = false; @@ -124,31 +96,19 @@ uid_t parse_uid(const char *str) return result->pw_uid; } -enum target_errno target__parse_uid(struct target *target) -{ - target->uid = parse_uid(target->uid_str); - - return target->uid != UINT_MAX ? TARGET_ERRNO__SUCCESS : TARGET_ERRNO__INVALID_UID; -} - /* * This must have a same ordering as the enum target_errno. */ static const char *target__error_str[] = { "PID/TID switch overriding CPU", - "PID/TID switch overriding UID", - "UID switch overriding CPU", "PID/TID switch overriding SYSTEM", - "UID switch overriding SYSTEM", "SYSTEM/CPU switch overriding PER-THREAD", "BPF switch overriding CPU", "BPF switch overriding PID/TID", - "BPF switch overriding UID", "BPF switch overriding THREAD", - "Invalid User: %s", }; -int target__strerror(struct target *target, int errnum, +int target__strerror(struct target *target __maybe_unused, int errnum, char *buf, size_t buflen) { int idx; @@ -173,10 +133,6 @@ int target__strerror(struct target *target, int errnum, snprintf(buf, buflen, "%s", msg); break; - case TARGET_ERRNO__INVALID_UID: - snprintf(buf, buflen, msg, target->uid_str); - break; - default: /* cannot reach here */ break; diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index e082bda990fb..84ebb9c940c6 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -9,9 +9,7 @@ struct target { const char *pid; const char *tid; const char *cpu_list; - const char *uid_str; const char *bpf_str; - uid_t uid; bool system_wide; bool uses_mmap; bool default_per_cpu; @@ -36,32 +34,24 @@ enum target_errno { /* for target__validate() */ TARGET_ERRNO__PID_OVERRIDE_CPU = __TARGET_ERRNO__START, - TARGET_ERRNO__PID_OVERRIDE_UID, - TARGET_ERRNO__UID_OVERRIDE_CPU, TARGET_ERRNO__PID_OVERRIDE_SYSTEM, - TARGET_ERRNO__UID_OVERRIDE_SYSTEM, TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD, TARGET_ERRNO__BPF_OVERRIDE_CPU, TARGET_ERRNO__BPF_OVERRIDE_PID, - TARGET_ERRNO__BPF_OVERRIDE_UID, TARGET_ERRNO__BPF_OVERRIDE_THREAD, - /* for target__parse_uid() */ - TARGET_ERRNO__INVALID_UID, - __TARGET_ERRNO__END, }; enum target_errno target__validate(struct target *target); uid_t parse_uid(const char *str); -enum target_errno target__parse_uid(struct target *target); int target__strerror(struct target *target, int errnum, char *buf, size_t buflen); static inline bool target__has_task(struct target *target) { - return target->tid || target->pid || target->uid_str; + return target->tid || target->pid; } static inline bool target__has_cpu(struct target *target) From 5128492b2b6bb3a2881e135da54fd8e224a5f610 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 4 Jun 2025 10:45:44 -0700 Subject: [PATCH 0068/2411] perf thread_map: Remove uid options Now the target doesn't have a uid, it is handled through BPF filters, remove the uid options to thread_map creation. Tidy up the functions used in tests to avoid passing unused arguments. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250604174545.2853620-11-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/tests/event-times.c | 4 +-- tools/perf/tests/keep-tracking.c | 2 +- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/openat-syscall-all-cpus.c | 2 +- tools/perf/tests/openat-syscall.c | 2 +- tools/perf/tests/perf-time-to-tsc.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/tests/thread-map.c | 2 +- tools/perf/util/evlist.c | 2 +- tools/perf/util/python.c | 10 +++---- tools/perf/util/thread_map.c | 32 ++-------------------- tools/perf/util/thread_map.h | 6 ++-- 12 files changed, 20 insertions(+), 48 deletions(-) diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index 2148024b4f4a..ae3b98bb42cf 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -62,7 +62,7 @@ static int attach__current_disabled(struct evlist *evlist) pr_debug("attaching to current thread as disabled\n"); - threads = thread_map__new(-1, getpid(), UINT_MAX); + threads = thread_map__new_by_tid(getpid()); if (threads == NULL) { pr_debug("thread_map__new\n"); return -1; @@ -88,7 +88,7 @@ static int attach__current_enabled(struct evlist *evlist) pr_debug("attaching to current thread as enabled\n"); - threads = thread_map__new(-1, getpid(), UINT_MAX); + threads = thread_map__new_by_tid(getpid()); if (threads == NULL) { pr_debug("failed to call thread_map__new\n"); return -1; diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 5a3b2bed07f3..eafb49eb0b56 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -78,7 +78,7 @@ static int test__keep_tracking(struct test_suite *test __maybe_unused, int subte int found, err = -1; const char *comm; - threads = thread_map__new(-1, getpid(), UINT_MAX); + threads = thread_map__new_by_tid(getpid()); CHECK_NOT_NULL__(threads); cpus = perf_cpu_map__new_online_cpus(); diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index bd2106628b34..04b547c6bdbe 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -46,7 +46,7 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest char sbuf[STRERR_BUFSIZE]; struct mmap *md; - threads = thread_map__new(-1, getpid(), UINT_MAX); + threads = thread_map__new_by_tid(getpid()); if (threads == NULL) { pr_debug("thread_map__new\n"); return -1; diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index fb114118c876..3644d6f52c07 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -28,7 +28,7 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb struct evsel *evsel; unsigned int nr_openat_calls = 111, i; cpu_set_t cpu_set; - struct perf_thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX); + struct perf_thread_map *threads = thread_map__new_by_tid(getpid()); char sbuf[STRERR_BUFSIZE]; char errbuf[BUFSIZ]; diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index 131b62271bfa..b54cbe5f1808 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -20,7 +20,7 @@ static int test__openat_syscall_event(struct test_suite *test __maybe_unused, int err = TEST_FAIL, fd; struct evsel *evsel; unsigned int nr_openat_calls = 111, i; - struct perf_thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX); + struct perf_thread_map *threads = thread_map__new_by_tid(getpid()); char sbuf[STRERR_BUFSIZE]; char errbuf[BUFSIZ]; diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index d3e40fa5482c..d4437410c99f 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -90,7 +90,7 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su struct mmap *md; - threads = thread_map__new(-1, getpid(), UINT_MAX); + threads = thread_map__new_by_tid(getpid()); CHECK_NOT_NULL__(threads); cpus = perf_cpu_map__new_online_cpus(); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 6b3aac283c37..5be294014d3b 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -351,7 +351,7 @@ static int test__switch_tracking(struct test_suite *test __maybe_unused, int sub const char *comm; int err = -1; - threads = thread_map__new(-1, getpid(), UINT_MAX); + threads = thread_map__new_by_tid(getpid()); if (!threads) { pr_debug("thread_map__new failed!\n"); goto out_err; diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index 1fe521466bf4..54209592168d 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -115,7 +115,7 @@ static int test__thread_map_remove(struct test_suite *test __maybe_unused, int s TEST_ASSERT_VAL("failed to allocate map string", asprintf(&str, "%d,%d", getpid(), getppid()) >= 0); - threads = thread_map__new_str(str, NULL, 0, false); + threads = thread_map__new_str(str, /*tid=*/NULL, /*all_threads=*/false); free(str); TEST_ASSERT_VAL("failed to allocate thread_map", diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index bed91bc88510..5664ebf6bbc6 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1006,7 +1006,7 @@ int evlist__create_maps(struct evlist *evlist, struct target *target) * per-thread data. thread_map__new_str will call * thread_map__new_all_cpus to enumerate all threads. */ - threads = thread_map__new_str(target->pid, target->tid, UINT_MAX, all_threads); + threads = thread_map__new_str(target->pid, target->tid, all_threads); if (!threads) return -1; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 321c333877fa..82666bcd2eda 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -566,14 +566,14 @@ struct pyrf_thread_map { static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = { "pid", "tid", "uid", NULL }; - int pid = -1, tid = -1, uid = UINT_MAX; + static char *kwlist[] = { "pid", "tid", NULL }; + int pid = -1, tid = -1; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii", - kwlist, &pid, &tid, &uid)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", + kwlist, &pid, &tid)) return -1; - pthreads->threads = thread_map__new(pid, tid, uid); + pthreads->threads = thread_map__new(pid, tid); if (pthreads->threads == NULL) return -1; return 0; diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index b5f12390c355..ca193c1374ed 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -72,7 +72,7 @@ struct perf_thread_map *thread_map__new_by_tid(pid_t tid) return threads; } -static struct perf_thread_map *__thread_map__new_all_cpus(uid_t uid) +static struct perf_thread_map *thread_map__new_all_cpus(void) { DIR *proc; int max_threads = 32, items, i; @@ -98,15 +98,6 @@ static struct perf_thread_map *__thread_map__new_all_cpus(uid_t uid) if (*end) /* only interested in proper numerical dirents */ continue; - snprintf(path, sizeof(path), "/proc/%s", dirent->d_name); - - if (uid != UINT_MAX) { - struct stat st; - - if (stat(path, &st) != 0 || st.st_uid != uid) - continue; - } - snprintf(path, sizeof(path), "/proc/%d/task", pid); items = scandir(path, &namelist, filter, NULL); if (items <= 0) { @@ -157,24 +148,11 @@ static struct perf_thread_map *__thread_map__new_all_cpus(uid_t uid) goto out_closedir; } -struct perf_thread_map *thread_map__new_all_cpus(void) -{ - return __thread_map__new_all_cpus(UINT_MAX); -} - -struct perf_thread_map *thread_map__new_by_uid(uid_t uid) -{ - return __thread_map__new_all_cpus(uid); -} - -struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid) +struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid) { if (pid != -1) return thread_map__new_by_pid(pid); - if (tid == -1 && uid != UINT_MAX) - return thread_map__new_by_uid(uid); - return thread_map__new_by_tid(tid); } @@ -289,15 +267,11 @@ struct perf_thread_map *thread_map__new_by_tid_str(const char *tid_str) goto out; } -struct perf_thread_map *thread_map__new_str(const char *pid, const char *tid, - uid_t uid, bool all_threads) +struct perf_thread_map *thread_map__new_str(const char *pid, const char *tid, bool all_threads) { if (pid) return thread_map__new_by_pid_str(pid); - if (!tid && uid != UINT_MAX) - return thread_map__new_by_uid(uid); - if (all_threads) return thread_map__new_all_cpus(); diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 00ec05fc1656..fc16d87f32fb 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -11,13 +11,11 @@ struct perf_record_thread_map; struct perf_thread_map *thread_map__new_dummy(void); struct perf_thread_map *thread_map__new_by_pid(pid_t pid); struct perf_thread_map *thread_map__new_by_tid(pid_t tid); -struct perf_thread_map *thread_map__new_by_uid(uid_t uid); -struct perf_thread_map *thread_map__new_all_cpus(void); -struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); +struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid); struct perf_thread_map *thread_map__new_event(struct perf_record_thread_map *event); struct perf_thread_map *thread_map__new_str(const char *pid, - const char *tid, uid_t uid, bool all_threads); + const char *tid, bool all_threads); struct perf_thread_map *thread_map__new_by_tid_str(const char *tid_str); From 5ae6a303c22a07234108430b5fba869d5d1697e3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 3 Jun 2025 15:13:58 -0700 Subject: [PATCH 0069/2411] tools/build: Remove some unused libbpf pre-1.0 feature test logic Commit 76a97cf2e169 ("perf build: Remove libbpf pre-1.0 feature tests") removed the libbpf feature test logic used by perf in favor of using LIBBPF_MAJOR_VERSION. Remove some build targets that should have been removed as part of that clean up. Fixes: 76a97cf2e169 ("perf build: Remove libbpf pre-1.0 feature tests") Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250603221358.2562167-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/build/Makefile.feature | 6 ------ tools/build/feature/Makefile | 21 --------------------- 2 files changed, 27 deletions(-) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 57bd995ce6af..3a1fddd38db0 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -126,12 +126,6 @@ FEATURE_TESTS_EXTRA := \ llvm \ clang \ libbpf \ - libbpf-btf__load_from_kernel_by_id \ - libbpf-bpf_prog_load \ - libbpf-bpf_object__next_program \ - libbpf-bpf_object__next_map \ - libbpf-bpf_program__set_insns \ - libbpf-bpf_create_map \ libpfm4 \ libdebuginfod \ clang-bpf-co-re \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index b8b5fb183dd4..4aa166d3eab6 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -339,27 +339,6 @@ $(OUTPUT)test-bpf.bin: $(OUTPUT)test-libbpf.bin: $(BUILD) -lbpf -$(OUTPUT)test-libbpf-btf__load_from_kernel_by_id.bin: - $(BUILD) -lbpf - -$(OUTPUT)test-libbpf-bpf_prog_load.bin: - $(BUILD) -lbpf - -$(OUTPUT)test-libbpf-bpf_map_create.bin: - $(BUILD) -lbpf - -$(OUTPUT)test-libbpf-bpf_object__next_program.bin: - $(BUILD) -lbpf - -$(OUTPUT)test-libbpf-bpf_object__next_map.bin: - $(BUILD) -lbpf - -$(OUTPUT)test-libbpf-bpf_program__set_insns.bin: - $(BUILD) -lbpf - -$(OUTPUT)test-libbpf-btf__raw_data.bin: - $(BUILD) -lbpf - $(OUTPUT)test-sdt.bin: $(BUILD) From 46e34646ae3e0e38da2454e2205ab49c6f97c578 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Sun, 1 Jun 2025 10:32:52 -0700 Subject: [PATCH 0070/2411] perf trace: Remove --map-dump documentation The --map-dump option was removed in 5e6da6be3082 ("perf trace: Migrate BPF augmentation to use a skeleton"), this patch removes its remaining documentation. Fixes: 5e6da6be3082 ("perf trace: Migrate BPF augmentation to use a skeleton") Signed-off-by: Howard Chu Link: https://lore.kernel.org/r/20250601173252.717780-1-howardchu95@gmail.com Signed-off-by: Namhyung Kim --- tools/perf/Documentation/perf-trace.txt | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index c1fb6056a0d3..973fede403a0 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -238,14 +238,6 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. the same beautifiers used in the strace-like enter+exit lines to augment the tracepoint arguments. ---map-dump:: - Dump BPF maps setup by events passed via -e, for instance the augmented_raw_syscalls - living in tools/perf/examples/bpf/augmented_raw_syscalls.c. For now this - dumps just boolean map values and integer keys, in time this will print in hex - by default and use BTF when available, as well as use functions to do pretty - printing using the existing 'perf trace' syscall arg beautifiers to map integer - arguments to strings (pid to comm, syscall id to syscall name, etc). - --force-btf:: Use btf_dump to pretty print syscall argument data, instead of using hand-crafted pretty printers. This option is intended for testing BTF integration in perf trace. btf_dump-based From 6612d4d4910d45b15dee4a989b1aa2ddce8cc617 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Wed, 28 May 2025 12:11:43 -0700 Subject: [PATCH 0071/2411] perf test trace: Use shell's -f flag to check if vmlinux exists To match the style of the existing codebase, no functional changes were applied. Signed-off-by: Howard Chu Tested-by: Namhyung Kim Link: https://lore.kernel.org/r/20250528191148.89118-2-howardchu95@gmail.com Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/trace_btf_enum.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/trace_btf_enum.sh b/tools/perf/tests/shell/trace_btf_enum.sh index f0b49f7fb57d..b3775209a0b1 100755 --- a/tools/perf/tests/shell/trace_btf_enum.sh +++ b/tools/perf/tests/shell/trace_btf_enum.sh @@ -17,7 +17,7 @@ skip_if_no_perf_trace || exit 2 check_vmlinux() { echo "Checking if vmlinux exists" - if ! ls /sys/kernel/btf/vmlinux 1>/dev/null 2>&1 + if [ ! -f /sys/kernel/btf/vmlinux ] then echo "trace+enum test [Skipped missing vmlinux BTF support]" err=2 From 78fc8bfe44bf4326fd295572ca2a6b01489459e6 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Wed, 28 May 2025 12:11:44 -0700 Subject: [PATCH 0072/2411] perf test trace: Remove set -e and print trace test's error messages Currently perf test utilizes the set -e option in shell that exit immediately if a command exits with a non-zero status, this prevents further error handling and introduces ambiguity. This patch removes set -e and prints the error message after invoking perf trace during perf tests. In my case, the command that exits with a non-zero status is perf trace instead of grep, because it can't find the 'timer:hrtimer_setup' tracepoint, see below. Before: $ sudo /tmp/perf test enum -vv 107: perf trace enum augmentation tests: 107: perf trace enum augmentation tests : Running --- start --- test child forked, pid 783533 Checking if vmlinux exists Tracing syscall landlock_add_rule Tracing non-syscall tracepoint syscall ---- end(-1) ---- 107: perf trace enum augmentation tests : FAILED! After: $ sudo /tmp/perf test enum -vv 107: perf trace enum augmentation tests: 107: perf trace enum augmentation tests : Running --- start --- test child forked, pid 851658 Checking if vmlinux exists Tracing syscall landlock_add_rule Tracing non-syscall tracepoint timer:hrtimer_setup,timer:hrtimer_start [tracepoint failure] Failed to trace tracepoint timer:hrtimer_setup,timer:hrtimer_start, output: event syntax error: 'timer:hrtimer_setup,timer:hrtimer_start' \___ unknown tracepoint Error: File /sys/kernel/tracing//events/timer/hrtimer_setup not found. Hint: Perhaps this kernel misses some CONFIG_ setting to enable this feature?. Run 'perf list' for a list of valid events Usage: perf trace [] [] or: perf trace [] -- [] or: perf trace record [] [] or: perf trace record [] -- [] -e, --event event/syscall selector. use 'perf list' to list available events---- end(-1) ---- 107: perf trace enum augmentation tests : FAILED! Signed-off-by: Howard Chu Tested-by: Namhyung Kim Link: https://lore.kernel.org/r/20250528191148.89118-3-howardchu95@gmail.com Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/trace_btf_enum.sh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/perf/tests/shell/trace_btf_enum.sh b/tools/perf/tests/shell/trace_btf_enum.sh index b3775209a0b1..f59ba34fac4c 100755 --- a/tools/perf/tests/shell/trace_btf_enum.sh +++ b/tools/perf/tests/shell/trace_btf_enum.sh @@ -3,7 +3,6 @@ # SPDX-License-Identifier: GPL-2.0 err=0 -set -e syscall="landlock_add_rule" non_syscall="timer:hrtimer_setup,timer:hrtimer_start" @@ -34,22 +33,24 @@ trace_landlock() { return fi - if perf trace -e $syscall $TESTPROG 2>&1 | \ - grep -q -E ".*landlock_add_rule\(ruleset_fd: 11, rule_type: (LANDLOCK_RULE_PATH_BENEATH|LANDLOCK_RULE_NET_PORT), rule_attr: 0x[a-f0-9]+, flags: 45\) = -1.*" + output="$(perf trace -e $syscall $TESTPROG 2>&1)" + if echo "$output" | grep -q -E ".*landlock_add_rule\(ruleset_fd: 11, rule_type: (LANDLOCK_RULE_PATH_BENEATH|LANDLOCK_RULE_NET_PORT), rule_attr: 0x[a-f0-9]+, flags: 45\) = -1.*" then err=0 else + printf "[syscall failure] Failed to trace syscall $syscall, output:\n$output\n" err=1 fi } trace_non_syscall() { - echo "Tracing non-syscall tracepoint ${non-syscall}" - if perf trace -e $non_syscall --max-events=1 2>&1 | \ - grep -q -E '.*timer:hrtimer_.*\(.*mode: HRTIMER_MODE_.*\)$' + echo "Tracing non-syscall tracepoint ${non_syscall}" + output="$(perf trace -e $non_syscall --max-events=1 2>&1)" + if echo "$output" | grep -q -E '.*timer:hrtimer_.*\(.*mode: HRTIMER_MODE_.*\)$' then err=0 else + printf "[tracepoint failure] Failed to trace tracepoint $non_syscall, output:\n$output\n" err=1 fi } From fc4a0ae7e19ed1d921202414b525aa275e831b64 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Wed, 28 May 2025 12:11:45 -0700 Subject: [PATCH 0073/2411] perf test trace: Stop tracing hrtimer_setup event in trace enum test The event 'timer:hrtimer_setup' is relatively new, for older kernels, perf trace enum tests won't run as the event 'timer:hrtimer_setup' cannot be found. It was originally called 'timer:hrtimer_init', before being renamed in: commit 244132c4e577 ("tracing/timers: Rename the hrtimer_init event to hrtimer_setup") Using timer:hrtimer_start should be enough for current testing, and hopefully 'start' won't be renamed in the future. Before: $ sudo /tmp/perf test enum -vv 107: perf trace enum augmentation tests: 107: perf trace enum augmentation tests : Running --- start --- test child forked, pid 786187 Checking if vmlinux exists Tracing syscall landlock_add_rule Tracing non-syscall tracepoint timer:hrtimer_setup,timer:hrtimer_start [tracepoint failure] Failed to trace timer:hrtimer_setup,timer:hrtimer_start tracepoint, output: event syntax error: 'timer:hrtimer_setup,timer:hrtimer_start' \___ unknown tracepoint Error: File /sys/kernel/tracing//events/timer/hrtimer_setup not found. Hint: Perhaps this kernel misses some CONFIG_ setting to enable this feature?. Run 'perf list' for a list of valid events Usage: perf trace [] [] or: perf trace [] -- [] or: perf trace record [] [] or: perf trace record [] -- [] -e, --event event/syscall selector. use 'perf list' to list available events ---- end(-1) ---- 107: perf trace enum augmentation tests : FAILED! After: $ sudo /tmp/perf test enum -vv 107: perf trace enum augmentation tests: 107: perf trace enum augmentation tests : Running --- start --- test child forked, pid 808547 Checking if vmlinux exists Tracing syscall landlock_add_rule Tracing non-syscall tracepoint timer:hrtimer_start ---- end(0) ---- 107: perf trace enum augmentation tests : Ok Signed-off-by: Howard Chu Tested-by: Namhyung Kim Link: https://lore.kernel.org/r/20250528191148.89118-4-howardchu95@gmail.com Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/trace_btf_enum.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/trace_btf_enum.sh b/tools/perf/tests/shell/trace_btf_enum.sh index f59ba34fac4c..c37017bfeb5e 100755 --- a/tools/perf/tests/shell/trace_btf_enum.sh +++ b/tools/perf/tests/shell/trace_btf_enum.sh @@ -5,7 +5,7 @@ err=0 syscall="landlock_add_rule" -non_syscall="timer:hrtimer_setup,timer:hrtimer_start" +non_syscall="timer:hrtimer_start" TESTPROG="perf test -w landlock" From d796c51ee52a10413435816ebdae8a0aa8df8f93 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Wed, 28 May 2025 12:11:46 -0700 Subject: [PATCH 0074/2411] perf test trace: Remove set -e for BTF general tests Remove set -e and print error messages in BTF general tests. Before: $ sudo /tmp/perf test btf -vv 108: perf trace BTF general tests: 108: perf trace BTF general tests : Running --- start --- test child forked, pid 889299 Checking if vmlinux BTF exists Testing perf trace's string augmentation String augmentation test failed ---- end(-1) ---- 108: perf trace BTF general tests : FAILED! After: $ sudo /tmp/perf test btf -vv 108: perf trace BTF general tests: 108: perf trace BTF general tests : Running --- start --- test child forked, pid 886551 Checking if vmlinux BTF exists Testing perf trace's string augmentation String augmentation test failed, output: :886566/886566 renameat2(CWD, "/tmp/file1_RcMa", CWD, "/tmp/file2_RcMa", NOREPLACE) = 0---- end(-1) ---- 108: perf trace BTF general tests : FAILED! Signed-off-by: Howard Chu Tested-by: Namhyung Kim Link: https://lore.kernel.org/r/20250528191148.89118-5-howardchu95@gmail.com Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/trace_btf_general.sh | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tools/perf/tests/shell/trace_btf_general.sh b/tools/perf/tests/shell/trace_btf_general.sh index a25d8744695e..5fa50d815203 100755 --- a/tools/perf/tests/shell/trace_btf_general.sh +++ b/tools/perf/tests/shell/trace_btf_general.sh @@ -3,7 +3,6 @@ # SPDX-License-Identifier: GPL-2.0 err=0 -set -e # shellcheck source=lib/probe.sh . "$(dirname $0)"/lib/probe.sh @@ -28,10 +27,10 @@ check_vmlinux() { trace_test_string() { echo "Testing perf trace's string augmentation" - if ! perf trace -e renameat* --max-events=1 -- mv ${file1} ${file2} 2>&1 | \ - grep -q -E "^mv/[0-9]+ renameat(2)?\(.*, \"${file1}\", .*, \"${file2}\", .*\) += +[0-9]+$" + output="$(perf trace -e renameat* --max-events=1 -- mv ${file1} ${file2} 2>&1)" + if ! echo "$output" | grep -q -E "^mv/[0-9]+ renameat(2)?\(.*, \"${file1}\", .*, \"${file2}\", .*\) += +[0-9]+$" then - echo "String augmentation test failed" + printf "String augmentation test failed, output:\n$output\n" err=1 fi } @@ -39,20 +38,20 @@ trace_test_string() { trace_test_buffer() { echo "Testing perf trace's buffer augmentation" # echo will insert a newline (\10) at the end of the buffer - if ! perf trace -e write --max-events=1 -- echo "${buffer}" 2>&1 | \ - grep -q -E "^echo/[0-9]+ write\([0-9]+, ${buffer}.*, [0-9]+\) += +[0-9]+$" + output="$(perf trace -e write --max-events=1 -- echo "${buffer}" 2>&1)" + if ! echo "$output" | grep -q -E "^echo/[0-9]+ write\([0-9]+, ${buffer}.*, [0-9]+\) += +[0-9]+$" then - echo "Buffer augmentation test failed" + printf "Buffer augmentation test failed, output:\n$output\n" err=1 fi } trace_test_struct_btf() { echo "Testing perf trace's struct augmentation" - if ! perf trace -e clock_nanosleep --force-btf --max-events=1 -- sleep 1 2>&1 | \ - grep -q -E "^sleep/[0-9]+ clock_nanosleep\(0, 0, \{1,\}, 0x[0-9a-f]+\) += +[0-9]+$" + output="$(perf trace -e clock_nanosleep --force-btf --max-events=1 -- sleep 1 2>&1)" + if ! echo "$output" | grep -q -E "^sleep/[0-9]+ clock_nanosleep\(0, 0, \{1,\}, 0x[0-9a-f]+\) += +[0-9]+$" then - echo "BTF struct augmentation test failed" + printf "BTF struct augmentation test failed, output:\n$output\n" err=1 fi } From 77e11efedba606af21224ee5ed5305aebbd029da Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Wed, 28 May 2025 12:11:47 -0700 Subject: [PATCH 0075/2411] perf test trace: Use --sort-events in BTF general tests Without the '--sort-events' flag, perf trace doesn't receive and process events based on their arrival time, thus PERF_RECORD_COMM event that assigns the correct comm to a PID, may be delivered and processed after regular samples, causing trace outputs not having a 'comm', e.g. 'mv', instead, having the default PID placeholder, e.g. ':14514'. Hopefully this answers Namhyung's question in [1]. You can simply justify the statement with this diff: [2]. Now, simply run this command multiple times: $ touch /tmp/file1 && sudo /tmp/perf trace -e renameat* -- mv /tmp/file1 /tmp/file2 && rm -f /tmp/file2 And you should see two types of results: $ touch /tmp/file1 && sudo /tmp/perf trace -e renameat* -- mv /tmp/file1 /tmp/file2 && rm -f /tmp/file2 [debug] deliver [debug] machine__process_comm_event [OVERRIDE] old :1221169 new mv str mv [debug] deliver [debug] deliver [debug] deliver [debug] deliver [debug] deliver [debug] deliver [debug] deliver [debug] deliver [debug] deliver [debug] deliver 0.000 ( 0.013 ms): mv/1221169 renameat2(olddfd: CWD, oldname: "/tmp/file1", newdfd: CWD, newname: "/tmp/file2", flags: NOREPLACE) = 0 [debug] deliver $ touch /tmp/file1 && sudo /tmp/perf trace -e renameat* -- mv /tmp/file1 /tmp/file2 && rm -f /tmp/file2 [debug] deliver [debug] deliver [debug] deliver [debug] deliver [debug] deliver [debug] deliver [debug] deliver 0.000 ( 0.014 ms): :1221398/1221398 renameat2(olddfd: CWD, oldname: "/tmp/file1", newdfd: CWD, newname: "/tmp/file2", flags: NOREPLACE) = 0 [debug] deliver [debug] deliver [debug] machine__process_comm_event [OVERRIDE] old :1221398 new mv str mv [debug] deliver [debug] deliver [debug] deliver Anyway, use --sort-events in BTF general tests to avoid :PID, a comm is preferred. [1]: https://lore.kernel.org/linux-perf-users/Z_AeswETE5xLcPT8@google.com/ [2]: https://gist.githubusercontent.com/Sberm/6b72b2a1cf1c62244f1f996481769baf/raw/529667bd74a2e7e1953bbd4be545bf875da8a3e7/unsorted.patch Signed-off-by: Howard Chu Tested-by: Namhyung Kim Link: https://lore.kernel.org/r/20250528191148.89118-6-howardchu95@gmail.com Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/trace_btf_general.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/shell/trace_btf_general.sh b/tools/perf/tests/shell/trace_btf_general.sh index 5fa50d815203..30cd3a53f868 100755 --- a/tools/perf/tests/shell/trace_btf_general.sh +++ b/tools/perf/tests/shell/trace_btf_general.sh @@ -27,7 +27,7 @@ check_vmlinux() { trace_test_string() { echo "Testing perf trace's string augmentation" - output="$(perf trace -e renameat* --max-events=1 -- mv ${file1} ${file2} 2>&1)" + output="$(perf trace --sort-events -e renameat* --max-events=1 -- mv ${file1} ${file2} 2>&1)" if ! echo "$output" | grep -q -E "^mv/[0-9]+ renameat(2)?\(.*, \"${file1}\", .*, \"${file2}\", .*\) += +[0-9]+$" then printf "String augmentation test failed, output:\n$output\n" @@ -38,7 +38,7 @@ trace_test_string() { trace_test_buffer() { echo "Testing perf trace's buffer augmentation" # echo will insert a newline (\10) at the end of the buffer - output="$(perf trace -e write --max-events=1 -- echo "${buffer}" 2>&1)" + output="$(perf trace --sort-events -e write --max-events=1 -- echo "${buffer}" 2>&1)" if ! echo "$output" | grep -q -E "^echo/[0-9]+ write\([0-9]+, ${buffer}.*, [0-9]+\) += +[0-9]+$" then printf "Buffer augmentation test failed, output:\n$output\n" @@ -48,7 +48,7 @@ trace_test_buffer() { trace_test_struct_btf() { echo "Testing perf trace's struct augmentation" - output="$(perf trace -e clock_nanosleep --force-btf --max-events=1 -- sleep 1 2>&1)" + output="$(perf trace --sort-events -e clock_nanosleep --force-btf --max-events=1 -- sleep 1 2>&1)" if ! echo "$output" | grep -q -E "^sleep/[0-9]+ clock_nanosleep\(0, 0, \{1,\}, 0x[0-9a-f]+\) += +[0-9]+$" then printf "BTF struct augmentation test failed, output:\n$output\n" From 63e37590cd73b0aaf0dbee3c8bdb00c3ff77c8da Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Wed, 28 May 2025 12:11:48 -0700 Subject: [PATCH 0076/2411] perf test trace: Change the regex pattern in the struct test Ian mentioned a reliably occurred failure in the trace_btf_general test where he obtained trace output of: sleep/279619 clock_nanosleep(0, 0, {1,1,}, 0x7ffcd47b6450) = 0 But the regex pattern used for verification is "^sleep/[0-9]+ clock_nanosleep\(0, 0, \{1,\}, ..." This lead to a mismatch. The reason is, different sleep commands use different timespec data to call clock_nanosleep, on my machine, the value of tv_nsec is 0. ~~~ $ sudo /tmp/perf/perf trace -e clock_nanosleep -- sleep 1 0.000 (1000.196 ms): sleep/54261 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7ffe13529550) = 0 ~~~ While Ian had this trace log: ~~~ $ sudo /tmp/perf/perf trace -e clock_nanosleep -- sleep 1 0.000 (1000.208 ms): sleep/1710732 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 1 }, rmtp: 0x7ffc091f4090) = 0 ~~~ Because sleep's behavior of setting 'tv_nsec' is not certain, and tv_sec is most definitely 1, this patch relaxes the key regex pattern to '\{1,.*\}' for a better chance of matching. Signed-off-by: Howard Chu Tested-by: Namhyung Kim Link: https://lore.kernel.org/r/20250528191148.89118-7-howardchu95@gmail.com Reported-by: Ian Rogers Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/trace_btf_general.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/trace_btf_general.sh b/tools/perf/tests/shell/trace_btf_general.sh index 30cd3a53f868..ef2da806be6b 100755 --- a/tools/perf/tests/shell/trace_btf_general.sh +++ b/tools/perf/tests/shell/trace_btf_general.sh @@ -49,7 +49,7 @@ trace_test_buffer() { trace_test_struct_btf() { echo "Testing perf trace's struct augmentation" output="$(perf trace --sort-events -e clock_nanosleep --force-btf --max-events=1 -- sleep 1 2>&1)" - if ! echo "$output" | grep -q -E "^sleep/[0-9]+ clock_nanosleep\(0, 0, \{1,\}, 0x[0-9a-f]+\) += +[0-9]+$" + if ! echo "$output" | grep -q -E "^sleep/[0-9]+ clock_nanosleep\(0, 0, \{1,.*\}, 0x[0-9a-f]+\) += +[0-9]+$" then printf "BTF struct augmentation test failed, output:\n$output\n" err=1 From 5d939fbdd480cdf276eccc01eda3ed41e37d3f8a Mon Sep 17 00:00:00 2001 From: Len Brown Date: Mon, 9 Jun 2025 23:34:04 -0400 Subject: [PATCH 0077/2411] tools/power turbostat: regression fix: --show C1E% The new default idle counter groupings broke "--show C1E%" (or any other C-state %) Also delete a stray debug printf from the same offending commit. Reported-by: Zhang Rui Fixes: ec4acd3166d8 ("tools/power turbostat: disable "cpuidle" invocation counters, by default") Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 5230e072e414..33a54a9e0781 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2429,7 +2429,6 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) } if (i == MAX_BIC) { - fprintf(stderr, "deferred %s\n", name_list); if (mode == SHOW_LIST) { deferred_add_names[deferred_add_index++] = name_list; if (deferred_add_index >= MAX_DEFERRED) { @@ -10537,9 +10536,6 @@ void probe_cpuidle_residency(void) int min_state = 1024, max_state = 0; char *sp; - if (!DO_BIC(BIC_pct_idle)) - return; - for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); From ea04fe1b90cbb42966b471a4982bc52215b62857 Mon Sep 17 00:00:00 2001 From: Aditya Bodkhe Date: Tue, 29 Apr 2025 12:21:32 +0530 Subject: [PATCH 0078/2411] perf script: perf script tests fails with segfault pert script tests fails with segmentation fault as below: 92: perf script tests: --- start --- test child forked, pid 103769 DB test [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB /tmp/perf-test-script.7rbftEpOzX/perf.data (9 samples) ] /usr/libexec/perf-core/tests/shell/script.sh: line 35: 103780 Segmentation fault (core dumped) perf script -i "${perfdatafile}" -s "${db_test}" --- Cleaning up --- ---- end(-1) ---- 92: perf script tests : FAILED! Backtrace pointed to : #0 0x0000000010247dd0 in maps.machine () #1 0x00000000101d178c in db_export.sample () #2 0x00000000103412c8 in python_process_event () #3 0x000000001004eb28 in process_sample_event () #4 0x000000001024fcd0 in machines.deliver_event () #5 0x000000001025005c in perf_session.deliver_event () #6 0x00000000102568b0 in __ordered_events__flush.part.0 () #7 0x0000000010251618 in perf_session.process_events () #8 0x0000000010053620 in cmd_script () #9 0x00000000100b5a28 in run_builtin () #10 0x00000000100b5f94 in handle_internal_command () #11 0x0000000010011114 in main () Further investigation reveals that this occurs in the `perf script tests`, because it uses `db_test.py` script. This script sets `perf_db_export_mode = True`. With `perf_db_export_mode` enabled, if a sample originates from a hypervisor, perf doesn't set maps for "[H]" sample in the code. Consequently, `al->maps` remains NULL when `maps__machine(al->maps)` is called from `db_export__sample`. As al->maps can be NULL in case of Hypervisor samples , use thread->maps because even for Hypervisor sample, machine should exist. If we don't have machine for some reason, return -1 to avoid segmentation fault. Reported-by: Disha Goel Signed-off-by: Aditya Bodkhe Reviewed-by: Adrian Hunter Tested-by: Disha Goel Link: https://lore.kernel.org/r/20250429065132.36839-1-adityab1@linux.ibm.com Suggested-by: Adrian Hunter Signed-off-by: Namhyung Kim --- tools/perf/util/db-export.c | 11 ++++++++--- .../perf/util/scripting-engines/trace-event-python.c | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 50f916374d87..8f52e8cefcf3 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -181,7 +181,7 @@ static int db_ids_from_al(struct db_export *dbe, struct addr_location *al, if (al->map) { struct dso *dso = map__dso(al->map); - err = db_export__dso(dbe, dso, maps__machine(al->maps)); + err = db_export__dso(dbe, dso, maps__machine(thread__maps(al->thread))); if (err) return err; *dso_db_id = dso__db_id(dso); @@ -256,6 +256,7 @@ static struct call_path *call_path_from_sample(struct db_export *dbe, al.map = map__get(node->ms.map); al.maps = maps__get(thread__maps(thread)); al.addr = node->ip; + al.thread = thread__get(thread); if (al.map && !al.sym) al.sym = dso__find_symbol(map__dso(al.map), al.addr); @@ -358,14 +359,18 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, }; struct thread *main_thread; struct comm *comm = NULL; - struct machine *machine; + struct machine *machine = NULL; int err; + if (thread__maps(thread)) + machine = maps__machine(thread__maps(thread)); + if (!machine) + return -1; + err = db_export__evsel(dbe, evsel); if (err) return err; - machine = maps__machine(al->maps); err = db_export__machine(dbe, machine); if (err) return err; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 520729e78965..00f2c6c5114d 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1306,7 +1306,7 @@ static void python_export_sample_table(struct db_export *dbe, tuple_set_d64(t, 0, es->db_id); tuple_set_d64(t, 1, es->evsel->db_id); - tuple_set_d64(t, 2, maps__machine(es->al->maps)->db_id); + tuple_set_d64(t, 2, maps__machine(thread__maps(es->al->thread))->db_id); tuple_set_d64(t, 3, thread__db_id(es->al->thread)); tuple_set_d64(t, 4, es->comm_db_id); tuple_set_d64(t, 5, es->dso_db_id); From 1190410772090a68995a758c979ba44b986e2df2 Mon Sep 17 00:00:00 2001 From: Yuzhuo Jing Date: Wed, 4 Jun 2025 10:36:32 -0700 Subject: [PATCH 0079/2411] perf: Fix libjvmti.c sign compare error Fix the compile errors when compiling with -Werror=sign-compare. This is a follow-up patch to a previous patch series for a separate issue. Link: https://lore.kernel.org/lkml/aC9lXhPFcs5fkHWH@x1/ Signed-off-by: Yuzhuo Jing Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250604173632.2362759-1-yuzhuo@google.com Signed-off-by: Namhyung Kim --- tools/perf/jvmti/libjvmti.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c index fcca275e5bf9..82514e6532b8 100644 --- a/tools/perf/jvmti/libjvmti.c +++ b/tools/perf/jvmti/libjvmti.c @@ -141,11 +141,11 @@ copy_class_filename(const char * class_sign, const char * file_name, char * resu * Assume path name is class hierarchy, this is a common practice with Java programs */ if (*class_sign == 'L') { - int j, i = 0; + size_t j, i = 0; char *p = strrchr(class_sign, '/'); if (p) { /* drop the 'L' prefix and copy up to the final '/' */ - for (i = 0; i < (p - class_sign); i++) + for (i = 0; i < (size_t)(p - class_sign); i++) result[i] = class_sign[i+1]; } /* From 317eb8b3d7b87c583867f752c72913e06c53c2e1 Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Tue, 27 May 2025 13:23:28 +0800 Subject: [PATCH 0080/2411] dt-bindings: pinctl: amlogic,pinctrl-a4: Add compatible string for S7/S7D/S6 Update dt-binding document for pinctrl of Amlogic S7/S7D/S6. Signed-off-by: Xianwei Zhao Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250527-s6-s7-pinctrl-v3-1-44f6a0451519@amlogic.com Signed-off-by: Linus Walleij --- .../devicetree/bindings/pinctrl/amlogic,pinctrl-a4.yaml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/pinctrl/amlogic,pinctrl-a4.yaml b/Documentation/devicetree/bindings/pinctrl/amlogic,pinctrl-a4.yaml index a6ef4797e5c5..6ba66c2033b4 100644 --- a/Documentation/devicetree/bindings/pinctrl/amlogic,pinctrl-a4.yaml +++ b/Documentation/devicetree/bindings/pinctrl/amlogic,pinctrl-a4.yaml @@ -15,11 +15,18 @@ allOf: properties: compatible: oneOf: - - const: amlogic,pinctrl-a4 + - enum: + - amlogic,pinctrl-a4 + - amlogic,pinctrl-s6 + - amlogic,pinctrl-s7 - items: - enum: - amlogic,pinctrl-a5 - const: amlogic,pinctrl-a4 + - items: + - enum: + - amlogic,pinctrl-s7d + - const: amlogic,pinctrl-s7 "#address-cells": const: 2 From cfdedf7392e16f7c077b02ec13961a1b28e4f0a7 Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Tue, 27 May 2025 13:23:29 +0800 Subject: [PATCH 0081/2411] pinctrl: meson: a4: remove special data processing According to the data specifications of Amlogic's existing SoCs, the function register offset and the bit offset are the same value among various chips. Therefore, general processing can be carried out without the need for private data modification. Drop special data processing. Signed-off-by: Xianwei Zhao Link: https://lore.kernel.org/20250527-s6-s7-pinctrl-v3-2-44f6a0451519@amlogic.com Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-amlogic-a4.c | 33 ++-------------------- 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c index 385cc619df13..11f68224342e 100644 --- a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c +++ b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c @@ -50,15 +50,8 @@ struct aml_pio_control { u32 bit_offset[AML_NUM_REG]; }; -struct aml_reg_bit { - u32 bank_id; - u32 reg_offs[AML_NUM_REG]; - u32 bit_offs[AML_NUM_REG]; -}; - struct aml_pctl_data { unsigned int number; - struct aml_reg_bit rb_offs[]; }; struct aml_pmx_func { @@ -829,31 +822,11 @@ static const struct gpio_chip aml_gpio_template = { static void init_bank_register_bit(struct aml_pinctrl *info, struct aml_gpio_bank *bank) { - const struct aml_pctl_data *data = info->data; - const struct aml_reg_bit *aml_rb; - bool def_offs = true; int i; - if (data) { - for (i = 0; i < data->number; i++) { - aml_rb = &data->rb_offs[i]; - if (bank->bank_id == aml_rb->bank_id) { - def_offs = false; - break; - } - } - } - - if (def_offs) { - for (i = 0; i < AML_NUM_REG; i++) { - bank->pc.reg_offset[i] = aml_def_regoffs[i]; - bank->pc.bit_offset[i] = 0; - } - } else { - for (i = 0; i < AML_NUM_REG; i++) { - bank->pc.reg_offset[i] = aml_rb->reg_offs[i]; - bank->pc.bit_offset[i] = aml_rb->bit_offs[i]; - } + for (i = 0; i < AML_NUM_REG; i++) { + bank->pc.reg_offset[i] = aml_def_regoffs[i]; + bank->pc.bit_offset[i] = 0; } } From 1f8e5dfddaa794c97a80b2a9952be368d8fdee6e Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Tue, 27 May 2025 13:23:30 +0800 Subject: [PATCH 0082/2411] pinctrl: meson: support amlogic S6/S7/S7D SoC In some Amlogic SoCs, to save register space or due to some abnormal arrangements, two sets of pins share one mux register. A group starting from pin0 is the main pin group, which acquires the register address through DTS and has management permissions, but the register bit offset is undetermined. Another GPIO group as a subordinate group. Some pins mux use share register and bit offset from bit0 . But this group do not have register management permissions. This submission implements this situation. Signed-off-by: Xianwei Zhao Link: https://lore.kernel.org/20250527-s6-s7-pinctrl-v3-3-44f6a0451519@amlogic.com Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-amlogic-a4.c | 101 ++++++++++++++++++++- 1 file changed, 99 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c index 11f68224342e..2541c864086d 100644 --- a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c +++ b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c @@ -50,8 +50,23 @@ struct aml_pio_control { u32 bit_offset[AML_NUM_REG]; }; +/* + * partial bank(subordinate) pins mux config use other bank(main) mux registgers + * m_bank_id: the main bank which pin_id from 0, but register bit not from bit 0 + * m_bit_offs: bit offset the main bank mux register + * sid: start pin_id of subordinate bank + * eid: end pin_id of subordinate bank + */ +struct multi_mux { + unsigned int m_bank_id; + unsigned int m_bit_offs; + unsigned int sid; + unsigned int eid; +}; + struct aml_pctl_data { unsigned int number; + const struct multi_mux *p_mux; }; struct aml_pmx_func { @@ -71,10 +86,12 @@ struct aml_gpio_bank { struct gpio_chip gpio_chip; struct aml_pio_control pc; u32 bank_id; + u32 mux_bit_offs; unsigned int pin_base; struct regmap *reg_mux; struct regmap *reg_gpio; struct regmap *reg_ds; + const struct multi_mux *p_mux; }; struct aml_pinctrl { @@ -106,13 +123,46 @@ static const char *aml_bank_name[31] = { "GPIOCC", "TEST_N", "ANALOG" }; +const struct multi_mux multi_mux_s7[] = { + { + .m_bank_id = AMLOGIC_GPIO_CC, + .m_bit_offs = 24, + .sid = (AMLOGIC_GPIO_X << 8) + 16, + .eid = (AMLOGIC_GPIO_X << 8) + 19, + }, +}; + +const struct aml_pctl_data s7_priv_data = { + .number = ARRAY_SIZE(multi_mux_s7), + .p_mux = multi_mux_s7, +}; + +const struct multi_mux multi_mux_s6[] = { + { + .m_bank_id = AMLOGIC_GPIO_CC, + .m_bit_offs = 24, + .sid = (AMLOGIC_GPIO_X << 8) + 16, + .eid = (AMLOGIC_GPIO_X << 8) + 19, + }, { + .m_bank_id = AMLOGIC_GPIO_F, + .m_bit_offs = 4, + .sid = (AMLOGIC_GPIO_D << 8) + 6, + .eid = (AMLOGIC_GPIO_D << 8) + 6, + }, +}; + +const struct aml_pctl_data s6_priv_data = { + .number = ARRAY_SIZE(multi_mux_s6), + .p_mux = multi_mux_s6, +}; + static int aml_pmx_calc_reg_and_offset(struct pinctrl_gpio_range *range, unsigned int pin, unsigned int *reg, unsigned int *offset) { unsigned int shift; - shift = (pin - range->pin_base) << 2; + shift = ((pin - range->pin_base) << 2) + *offset; *reg = (shift / 32) * 4; *offset = shift % 32; @@ -124,9 +174,36 @@ static int aml_pctl_set_function(struct aml_pinctrl *info, int pin_id, int func) { struct aml_gpio_bank *bank = gpio_chip_to_bank(range->gc); + unsigned int shift; int reg; - int offset; + int i; + unsigned int offset = bank->mux_bit_offs; + const struct multi_mux *p_mux; + /* peculiar mux reg set */ + if (bank->p_mux) { + p_mux = bank->p_mux; + if (pin_id >= p_mux->sid && pin_id <= p_mux->eid) { + bank = NULL; + for (i = 0; i < info->nbanks; i++) { + if (info->banks[i].bank_id == p_mux->m_bank_id) { + bank = &info->banks[i]; + break; + } + } + + if (!bank || !bank->reg_mux) + return -EINVAL; + + shift = (pin_id - p_mux->sid) << 2; + reg = (shift / 32) * 4; + offset = shift % 32; + return regmap_update_bits(bank->reg_mux, reg, + 0xf << offset, (func & 0xf) << offset); + } + } + + /* normal mux reg set */ if (!bank->reg_mux) return 0; @@ -822,12 +899,30 @@ static const struct gpio_chip aml_gpio_template = { static void init_bank_register_bit(struct aml_pinctrl *info, struct aml_gpio_bank *bank) { + const struct aml_pctl_data *data = info->data; + const struct multi_mux *p_mux; int i; for (i = 0; i < AML_NUM_REG; i++) { bank->pc.reg_offset[i] = aml_def_regoffs[i]; bank->pc.bit_offset[i] = 0; } + + bank->mux_bit_offs = 0; + + if (data) { + for (i = 0; i < data->number; i++) { + p_mux = &data->p_mux[i]; + if (bank->bank_id == p_mux->m_bank_id) { + bank->mux_bit_offs = p_mux->m_bit_offs; + break; + } + if (p_mux->sid >> 8 == bank->bank_id) { + bank->p_mux = p_mux; + break; + } + } + } } static int aml_gpiolib_register_bank(struct aml_pinctrl *info, @@ -994,6 +1089,8 @@ static int aml_pctl_probe(struct platform_device *pdev) static const struct of_device_id aml_pctl_of_match[] = { { .compatible = "amlogic,pinctrl-a4", }, + { .compatible = "amlogic,pinctrl-s7", .data = &s7_priv_data, }, + { .compatible = "amlogic,pinctrl-s6", .data = &s6_priv_data, }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, aml_pctl_dt_match); From 86491c2b99e5adbb56d76286d6668effb36d3c90 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Mon, 2 Jun 2025 18:19:12 +0200 Subject: [PATCH 0083/2411] dt-bindings: pinctrl: rockchip: increase max amount of device functions With the introduction of the RK3576, the maximum device function ID used increased to 14, as anyone can easily verify for themselves with: rg -g '*-pinctrl.dtsi' '<\d+\s+RK_P..\s+(?\d+)\s.*>;$' --trim \ -NI -r '$func' arch/arm64/boot/dts/rockchip/ | sort -g | uniq Unfortunately, this wasn't caught by dt-validate as those pins are omit-if-no-ref and we had no reference to them in any tree so far. Once again kick the can down the road by increasing the limit to 14. Acked-by: Conor Dooley Signed-off-by: Nicolas Frattaroli Reviewed-by: Linus Walleij Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/20250602-rk3576-pwm-v2-1-a6434b0ce60c@collabora.com Signed-off-by: Linus Walleij --- Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.yaml index 960758dc417f..125af766b992 100644 --- a/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.yaml @@ -135,7 +135,7 @@ additionalProperties: description: Pin bank index. - minimum: 0 - maximum: 13 + maximum: 14 description: Mux 0 means GPIO and mux 1 to N means the specific device function. From 90256033c11028a57437b145449c0dab196183b9 Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Mon, 9 Jun 2025 13:51:15 +0200 Subject: [PATCH 0084/2411] pinctrl: cirrus: madera-core: Use devm_pinctrl_register_mappings() Use devm_pinctrl_register_mappings(), so the mappings are automatically unregistered by the core. If pinctrl_enable() failed during the probe, pinctrl_mappings were not freed. Now it is done by the core. Fixes: 218d72a77b0b ("pinctrl: madera: Add driver for Cirrus Logic Madera codecs") Signed-off-by: Thomas Richard Reviewed-by: Richard Fitzgerald Link: https://lore.kernel.org/20250609-pinctrl-madera-devm-pinctrl-register-mappings-v1-1-ba2c2822cf6c@bootlin.com Signed-off-by: Linus Walleij --- drivers/pinctrl/cirrus/pinctrl-madera-core.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/pinctrl/cirrus/pinctrl-madera-core.c b/drivers/pinctrl/cirrus/pinctrl-madera-core.c index 73ec5b9beb49..d19ef13224cc 100644 --- a/drivers/pinctrl/cirrus/pinctrl-madera-core.c +++ b/drivers/pinctrl/cirrus/pinctrl-madera-core.c @@ -1061,8 +1061,9 @@ static int madera_pin_probe(struct platform_device *pdev) /* if the configuration is provided through pdata, apply it */ if (pdata->gpio_configs) { - ret = pinctrl_register_mappings(pdata->gpio_configs, - pdata->n_gpio_configs); + ret = devm_pinctrl_register_mappings(priv->dev, + pdata->gpio_configs, + pdata->n_gpio_configs); if (ret) return dev_err_probe(priv->dev, ret, "Failed to register pdata mappings\n"); @@ -1081,17 +1082,8 @@ static int madera_pin_probe(struct platform_device *pdev) return 0; } -static void madera_pin_remove(struct platform_device *pdev) -{ - struct madera_pin_private *priv = platform_get_drvdata(pdev); - - if (priv->madera->pdata.gpio_configs) - pinctrl_unregister_mappings(priv->madera->pdata.gpio_configs); -} - static struct platform_driver madera_pin_driver = { .probe = madera_pin_probe, - .remove = madera_pin_remove, .driver = { .name = "madera-pinctrl", }, From 2c0c883f895f16fd9d367ec2e64bccab907d8d87 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 5 Jun 2025 17:17:47 -0500 Subject: [PATCH 0085/2411] remoteproc: qcom: pas: Conclude the rename from adsp The change that renamed the driver from "adsp" to "pas" didn't change any of the implementation. The result is an aesthetic eyesore, and confusing to many. Conclude the rename of the driver, by updating function, structures and variable names to match what the driver actually is. The "Hexagon v5" is also dropped from the name and Kconfig, as this isn't correct either. No functional change. Fixes: 9e004f97161d ("remoteproc: qcom: Rename Hexagon v5 PAS driver") Signed-off-by: Bjorn Andersson Reviewed-by: Wasim Nazir Link: https://lore.kernel.org/r/20250605-pas-rename-v2-1-f1c89e49e691@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- drivers/remoteproc/Kconfig | 11 +- drivers/remoteproc/qcom_q6v5_pas.c | 615 ++++++++++++++--------------- 2 files changed, 310 insertions(+), 316 deletions(-) diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index 83962a114dc9..48a0d3a69ed0 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -214,7 +214,7 @@ config QCOM_Q6V5_MSS handled by QCOM_Q6V5_PAS driver. config QCOM_Q6V5_PAS - tristate "Qualcomm Hexagon v5 Peripheral Authentication Service support" + tristate "Qualcomm Peripheral Authentication Service support" depends on OF && ARCH_QCOM depends on QCOM_SMEM depends on RPMSG_QCOM_SMD || RPMSG_QCOM_SMD=n @@ -229,11 +229,10 @@ config QCOM_Q6V5_PAS select QCOM_RPROC_COMMON select QCOM_SCM help - Say y here to support the TrustZone based Peripheral Image Loader - for the Qualcomm Hexagon v5 based remote processors. This is commonly - used to control subsystems such as ADSP (Audio DSP), - CDSP (Compute DSP), MPSS (Modem Peripheral SubSystem), and - SLPI (Sensor Low Power Island). + Say y here to support the TrustZone based Peripheral Image Loader for + the Qualcomm remote processors. This is commonly used to control + subsystems such as ADSP (Audio DSP), CDSP (Compute DSP), MPSS (Modem + Peripheral SubSystem), and SLPI (Sensor Low Power Island). config QCOM_Q6V5_WCSS tristate "Qualcomm Hexagon based WCSS Peripheral Image Loader" diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c index b306f223127c..02e29171cbbe 100644 --- a/drivers/remoteproc/qcom_q6v5_pas.c +++ b/drivers/remoteproc/qcom_q6v5_pas.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Qualcomm ADSP/SLPI Peripheral Image Loader for MSM8974 and MSM8996 + * Qualcomm Peripheral Authentication Service remoteproc driver * * Copyright (C) 2016 Linaro Ltd * Copyright (C) 2014 Sony Mobile Communications AB @@ -31,11 +31,11 @@ #include "qcom_q6v5.h" #include "remoteproc_internal.h" -#define ADSP_DECRYPT_SHUTDOWN_DELAY_MS 100 +#define QCOM_PAS_DECRYPT_SHUTDOWN_DELAY_MS 100 #define MAX_ASSIGN_COUNT 3 -struct adsp_data { +struct qcom_pas_data { int crash_reason_smem; const char *firmware_name; const char *dtb_firmware_name; @@ -60,7 +60,7 @@ struct adsp_data { int region_assign_vmid; }; -struct qcom_adsp { +struct qcom_pas { struct device *dev; struct rproc *rproc; @@ -119,36 +119,37 @@ struct qcom_adsp { struct qcom_scm_pas_metadata dtb_pas_metadata; }; -static void adsp_segment_dump(struct rproc *rproc, struct rproc_dump_segment *segment, - void *dest, size_t offset, size_t size) +static void qcom_pas_segment_dump(struct rproc *rproc, + struct rproc_dump_segment *segment, + void *dest, size_t offset, size_t size) { - struct qcom_adsp *adsp = rproc->priv; + struct qcom_pas *pas = rproc->priv; int total_offset; - total_offset = segment->da + segment->offset + offset - adsp->mem_phys; - if (total_offset < 0 || total_offset + size > adsp->mem_size) { - dev_err(adsp->dev, + total_offset = segment->da + segment->offset + offset - pas->mem_phys; + if (total_offset < 0 || total_offset + size > pas->mem_size) { + dev_err(pas->dev, "invalid copy request for segment %pad with offset %zu and size %zu)\n", &segment->da, offset, size); memset(dest, 0xff, size); return; } - memcpy_fromio(dest, adsp->mem_region + total_offset, size); + memcpy_fromio(dest, pas->mem_region + total_offset, size); } -static void adsp_minidump(struct rproc *rproc) +static void qcom_pas_minidump(struct rproc *rproc) { - struct qcom_adsp *adsp = rproc->priv; + struct qcom_pas *pas = rproc->priv; if (rproc->dump_conf == RPROC_COREDUMP_DISABLED) return; - qcom_minidump(rproc, adsp->minidump_id, adsp_segment_dump); + qcom_minidump(rproc, pas->minidump_id, qcom_pas_segment_dump); } -static int adsp_pds_enable(struct qcom_adsp *adsp, struct device **pds, - size_t pd_count) +static int qcom_pas_pds_enable(struct qcom_pas *pas, struct device **pds, + size_t pd_count) { int ret; int i; @@ -174,8 +175,8 @@ static int adsp_pds_enable(struct qcom_adsp *adsp, struct device **pds, return ret; }; -static void adsp_pds_disable(struct qcom_adsp *adsp, struct device **pds, - size_t pd_count) +static void qcom_pas_pds_disable(struct qcom_pas *pas, struct device **pds, + size_t pd_count) { int i; @@ -185,65 +186,65 @@ static void adsp_pds_disable(struct qcom_adsp *adsp, struct device **pds, } } -static int adsp_shutdown_poll_decrypt(struct qcom_adsp *adsp) +static int qcom_pas_shutdown_poll_decrypt(struct qcom_pas *pas) { unsigned int retry_num = 50; int ret; do { - msleep(ADSP_DECRYPT_SHUTDOWN_DELAY_MS); - ret = qcom_scm_pas_shutdown(adsp->pas_id); + msleep(QCOM_PAS_DECRYPT_SHUTDOWN_DELAY_MS); + ret = qcom_scm_pas_shutdown(pas->pas_id); } while (ret == -EINVAL && --retry_num); return ret; } -static int adsp_unprepare(struct rproc *rproc) +static int qcom_pas_unprepare(struct rproc *rproc) { - struct qcom_adsp *adsp = rproc->priv; + struct qcom_pas *pas = rproc->priv; /* - * adsp_load() did pass pas_metadata to the SCM driver for storing + * qcom_pas_load() did pass pas_metadata to the SCM driver for storing * metadata context. It might have been released already if * auth_and_reset() was successful, but in other cases clean it up * here. */ - qcom_scm_pas_metadata_release(&adsp->pas_metadata); - if (adsp->dtb_pas_id) - qcom_scm_pas_metadata_release(&adsp->dtb_pas_metadata); + qcom_scm_pas_metadata_release(&pas->pas_metadata); + if (pas->dtb_pas_id) + qcom_scm_pas_metadata_release(&pas->dtb_pas_metadata); return 0; } -static int adsp_load(struct rproc *rproc, const struct firmware *fw) +static int qcom_pas_load(struct rproc *rproc, const struct firmware *fw) { - struct qcom_adsp *adsp = rproc->priv; + struct qcom_pas *pas = rproc->priv; int ret; - /* Store firmware handle to be used in adsp_start() */ - adsp->firmware = fw; + /* Store firmware handle to be used in qcom_pas_start() */ + pas->firmware = fw; - if (adsp->lite_pas_id) - ret = qcom_scm_pas_shutdown(adsp->lite_pas_id); + if (pas->lite_pas_id) + ret = qcom_scm_pas_shutdown(pas->lite_pas_id); - if (adsp->dtb_pas_id) { - ret = request_firmware(&adsp->dtb_firmware, adsp->dtb_firmware_name, adsp->dev); + if (pas->dtb_pas_id) { + ret = request_firmware(&pas->dtb_firmware, pas->dtb_firmware_name, pas->dev); if (ret) { - dev_err(adsp->dev, "request_firmware failed for %s: %d\n", - adsp->dtb_firmware_name, ret); + dev_err(pas->dev, "request_firmware failed for %s: %d\n", + pas->dtb_firmware_name, ret); return ret; } - ret = qcom_mdt_pas_init(adsp->dev, adsp->dtb_firmware, adsp->dtb_firmware_name, - adsp->dtb_pas_id, adsp->dtb_mem_phys, - &adsp->dtb_pas_metadata); + ret = qcom_mdt_pas_init(pas->dev, pas->dtb_firmware, pas->dtb_firmware_name, + pas->dtb_pas_id, pas->dtb_mem_phys, + &pas->dtb_pas_metadata); if (ret) goto release_dtb_firmware; - ret = qcom_mdt_load_no_init(adsp->dev, adsp->dtb_firmware, adsp->dtb_firmware_name, - adsp->dtb_pas_id, adsp->dtb_mem_region, - adsp->dtb_mem_phys, adsp->dtb_mem_size, - &adsp->dtb_mem_reloc); + ret = qcom_mdt_load_no_init(pas->dev, pas->dtb_firmware, pas->dtb_firmware_name, + pas->dtb_pas_id, pas->dtb_mem_region, + pas->dtb_mem_phys, pas->dtb_mem_size, + &pas->dtb_mem_reloc); if (ret) goto release_dtb_metadata; } @@ -251,248 +252,246 @@ static int adsp_load(struct rproc *rproc, const struct firmware *fw) return 0; release_dtb_metadata: - qcom_scm_pas_metadata_release(&adsp->dtb_pas_metadata); + qcom_scm_pas_metadata_release(&pas->dtb_pas_metadata); release_dtb_firmware: - release_firmware(adsp->dtb_firmware); + release_firmware(pas->dtb_firmware); return ret; } -static int adsp_start(struct rproc *rproc) +static int qcom_pas_start(struct rproc *rproc) { - struct qcom_adsp *adsp = rproc->priv; + struct qcom_pas *pas = rproc->priv; int ret; - ret = qcom_q6v5_prepare(&adsp->q6v5); + ret = qcom_q6v5_prepare(&pas->q6v5); if (ret) return ret; - ret = adsp_pds_enable(adsp, adsp->proxy_pds, adsp->proxy_pd_count); + ret = qcom_pas_pds_enable(pas, pas->proxy_pds, pas->proxy_pd_count); if (ret < 0) goto disable_irqs; - ret = clk_prepare_enable(adsp->xo); + ret = clk_prepare_enable(pas->xo); if (ret) goto disable_proxy_pds; - ret = clk_prepare_enable(adsp->aggre2_clk); + ret = clk_prepare_enable(pas->aggre2_clk); if (ret) goto disable_xo_clk; - if (adsp->cx_supply) { - ret = regulator_enable(adsp->cx_supply); + if (pas->cx_supply) { + ret = regulator_enable(pas->cx_supply); if (ret) goto disable_aggre2_clk; } - if (adsp->px_supply) { - ret = regulator_enable(adsp->px_supply); + if (pas->px_supply) { + ret = regulator_enable(pas->px_supply); if (ret) goto disable_cx_supply; } - if (adsp->dtb_pas_id) { - ret = qcom_scm_pas_auth_and_reset(adsp->dtb_pas_id); + if (pas->dtb_pas_id) { + ret = qcom_scm_pas_auth_and_reset(pas->dtb_pas_id); if (ret) { - dev_err(adsp->dev, + dev_err(pas->dev, "failed to authenticate dtb image and release reset\n"); goto disable_px_supply; } } - ret = qcom_mdt_pas_init(adsp->dev, adsp->firmware, rproc->firmware, adsp->pas_id, - adsp->mem_phys, &adsp->pas_metadata); + ret = qcom_mdt_pas_init(pas->dev, pas->firmware, rproc->firmware, pas->pas_id, + pas->mem_phys, &pas->pas_metadata); if (ret) goto disable_px_supply; - ret = qcom_mdt_load_no_init(adsp->dev, adsp->firmware, rproc->firmware, adsp->pas_id, - adsp->mem_region, adsp->mem_phys, adsp->mem_size, - &adsp->mem_reloc); + ret = qcom_mdt_load_no_init(pas->dev, pas->firmware, rproc->firmware, pas->pas_id, + pas->mem_region, pas->mem_phys, pas->mem_size, + &pas->mem_reloc); if (ret) goto release_pas_metadata; - qcom_pil_info_store(adsp->info_name, adsp->mem_phys, adsp->mem_size); + qcom_pil_info_store(pas->info_name, pas->mem_phys, pas->mem_size); - ret = qcom_scm_pas_auth_and_reset(adsp->pas_id); + ret = qcom_scm_pas_auth_and_reset(pas->pas_id); if (ret) { - dev_err(adsp->dev, + dev_err(pas->dev, "failed to authenticate image and release reset\n"); goto release_pas_metadata; } - ret = qcom_q6v5_wait_for_start(&adsp->q6v5, msecs_to_jiffies(5000)); + ret = qcom_q6v5_wait_for_start(&pas->q6v5, msecs_to_jiffies(5000)); if (ret == -ETIMEDOUT) { - dev_err(adsp->dev, "start timed out\n"); - qcom_scm_pas_shutdown(adsp->pas_id); + dev_err(pas->dev, "start timed out\n"); + qcom_scm_pas_shutdown(pas->pas_id); goto release_pas_metadata; } - qcom_scm_pas_metadata_release(&adsp->pas_metadata); - if (adsp->dtb_pas_id) - qcom_scm_pas_metadata_release(&adsp->dtb_pas_metadata); + qcom_scm_pas_metadata_release(&pas->pas_metadata); + if (pas->dtb_pas_id) + qcom_scm_pas_metadata_release(&pas->dtb_pas_metadata); - /* Remove pointer to the loaded firmware, only valid in adsp_load() & adsp_start() */ - adsp->firmware = NULL; + /* firmware is used to pass reference from qcom_pas_start(), drop it now */ + pas->firmware = NULL; return 0; release_pas_metadata: - qcom_scm_pas_metadata_release(&adsp->pas_metadata); - if (adsp->dtb_pas_id) - qcom_scm_pas_metadata_release(&adsp->dtb_pas_metadata); + qcom_scm_pas_metadata_release(&pas->pas_metadata); + if (pas->dtb_pas_id) + qcom_scm_pas_metadata_release(&pas->dtb_pas_metadata); disable_px_supply: - if (adsp->px_supply) - regulator_disable(adsp->px_supply); + if (pas->px_supply) + regulator_disable(pas->px_supply); disable_cx_supply: - if (adsp->cx_supply) - regulator_disable(adsp->cx_supply); + if (pas->cx_supply) + regulator_disable(pas->cx_supply); disable_aggre2_clk: - clk_disable_unprepare(adsp->aggre2_clk); + clk_disable_unprepare(pas->aggre2_clk); disable_xo_clk: - clk_disable_unprepare(adsp->xo); + clk_disable_unprepare(pas->xo); disable_proxy_pds: - adsp_pds_disable(adsp, adsp->proxy_pds, adsp->proxy_pd_count); + qcom_pas_pds_disable(pas, pas->proxy_pds, pas->proxy_pd_count); disable_irqs: - qcom_q6v5_unprepare(&adsp->q6v5); + qcom_q6v5_unprepare(&pas->q6v5); - /* Remove pointer to the loaded firmware, only valid in adsp_load() & adsp_start() */ - adsp->firmware = NULL; + /* firmware is used to pass reference from qcom_pas_start(), drop it now */ + pas->firmware = NULL; return ret; } static void qcom_pas_handover(struct qcom_q6v5 *q6v5) { - struct qcom_adsp *adsp = container_of(q6v5, struct qcom_adsp, q6v5); + struct qcom_pas *pas = container_of(q6v5, struct qcom_pas, q6v5); - if (adsp->px_supply) - regulator_disable(adsp->px_supply); - if (adsp->cx_supply) - regulator_disable(adsp->cx_supply); - clk_disable_unprepare(adsp->aggre2_clk); - clk_disable_unprepare(adsp->xo); - adsp_pds_disable(adsp, adsp->proxy_pds, adsp->proxy_pd_count); + if (pas->px_supply) + regulator_disable(pas->px_supply); + if (pas->cx_supply) + regulator_disable(pas->cx_supply); + clk_disable_unprepare(pas->aggre2_clk); + clk_disable_unprepare(pas->xo); + qcom_pas_pds_disable(pas, pas->proxy_pds, pas->proxy_pd_count); } -static int adsp_stop(struct rproc *rproc) +static int qcom_pas_stop(struct rproc *rproc) { - struct qcom_adsp *adsp = rproc->priv; + struct qcom_pas *pas = rproc->priv; int handover; int ret; - ret = qcom_q6v5_request_stop(&adsp->q6v5, adsp->sysmon); + ret = qcom_q6v5_request_stop(&pas->q6v5, pas->sysmon); if (ret == -ETIMEDOUT) - dev_err(adsp->dev, "timed out on wait\n"); + dev_err(pas->dev, "timed out on wait\n"); - ret = qcom_scm_pas_shutdown(adsp->pas_id); - if (ret && adsp->decrypt_shutdown) - ret = adsp_shutdown_poll_decrypt(adsp); + ret = qcom_scm_pas_shutdown(pas->pas_id); + if (ret && pas->decrypt_shutdown) + ret = qcom_pas_shutdown_poll_decrypt(pas); if (ret) - dev_err(adsp->dev, "failed to shutdown: %d\n", ret); + dev_err(pas->dev, "failed to shutdown: %d\n", ret); - if (adsp->dtb_pas_id) { - ret = qcom_scm_pas_shutdown(adsp->dtb_pas_id); + if (pas->dtb_pas_id) { + ret = qcom_scm_pas_shutdown(pas->dtb_pas_id); if (ret) - dev_err(adsp->dev, "failed to shutdown dtb: %d\n", ret); + dev_err(pas->dev, "failed to shutdown dtb: %d\n", ret); } - handover = qcom_q6v5_unprepare(&adsp->q6v5); + handover = qcom_q6v5_unprepare(&pas->q6v5); if (handover) - qcom_pas_handover(&adsp->q6v5); + qcom_pas_handover(&pas->q6v5); - if (adsp->smem_host_id) - ret = qcom_smem_bust_hwspin_lock_by_host(adsp->smem_host_id); + if (pas->smem_host_id) + ret = qcom_smem_bust_hwspin_lock_by_host(pas->smem_host_id); return ret; } -static void *adsp_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem) +static void *qcom_pas_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem) { - struct qcom_adsp *adsp = rproc->priv; + struct qcom_pas *pas = rproc->priv; int offset; - offset = da - adsp->mem_reloc; - if (offset < 0 || offset + len > adsp->mem_size) + offset = da - pas->mem_reloc; + if (offset < 0 || offset + len > pas->mem_size) return NULL; if (is_iomem) *is_iomem = true; - return adsp->mem_region + offset; + return pas->mem_region + offset; } -static unsigned long adsp_panic(struct rproc *rproc) +static unsigned long qcom_pas_panic(struct rproc *rproc) { - struct qcom_adsp *adsp = rproc->priv; + struct qcom_pas *pas = rproc->priv; - return qcom_q6v5_panic(&adsp->q6v5); + return qcom_q6v5_panic(&pas->q6v5); } -static const struct rproc_ops adsp_ops = { - .unprepare = adsp_unprepare, - .start = adsp_start, - .stop = adsp_stop, - .da_to_va = adsp_da_to_va, +static const struct rproc_ops qcom_pas_ops = { + .unprepare = qcom_pas_unprepare, + .start = qcom_pas_start, + .stop = qcom_pas_stop, + .da_to_va = qcom_pas_da_to_va, .parse_fw = qcom_register_dump_segments, - .load = adsp_load, - .panic = adsp_panic, + .load = qcom_pas_load, + .panic = qcom_pas_panic, }; -static const struct rproc_ops adsp_minidump_ops = { - .unprepare = adsp_unprepare, - .start = adsp_start, - .stop = adsp_stop, - .da_to_va = adsp_da_to_va, +static const struct rproc_ops qcom_pas_minidump_ops = { + .unprepare = qcom_pas_unprepare, + .start = qcom_pas_start, + .stop = qcom_pas_stop, + .da_to_va = qcom_pas_da_to_va, .parse_fw = qcom_register_dump_segments, - .load = adsp_load, - .panic = adsp_panic, - .coredump = adsp_minidump, + .load = qcom_pas_load, + .panic = qcom_pas_panic, + .coredump = qcom_pas_minidump, }; -static int adsp_init_clock(struct qcom_adsp *adsp) +static int qcom_pas_init_clock(struct qcom_pas *pas) { - adsp->xo = devm_clk_get(adsp->dev, "xo"); - if (IS_ERR(adsp->xo)) - return dev_err_probe(adsp->dev, PTR_ERR(adsp->xo), + pas->xo = devm_clk_get(pas->dev, "xo"); + if (IS_ERR(pas->xo)) + return dev_err_probe(pas->dev, PTR_ERR(pas->xo), "failed to get xo clock"); - - adsp->aggre2_clk = devm_clk_get_optional(adsp->dev, "aggre2"); - if (IS_ERR(adsp->aggre2_clk)) - return dev_err_probe(adsp->dev, PTR_ERR(adsp->aggre2_clk), + pas->aggre2_clk = devm_clk_get_optional(pas->dev, "aggre2"); + if (IS_ERR(pas->aggre2_clk)) + return dev_err_probe(pas->dev, PTR_ERR(pas->aggre2_clk), "failed to get aggre2 clock"); return 0; } -static int adsp_init_regulator(struct qcom_adsp *adsp) +static int qcom_pas_init_regulator(struct qcom_pas *pas) { - adsp->cx_supply = devm_regulator_get_optional(adsp->dev, "cx"); - if (IS_ERR(adsp->cx_supply)) { - if (PTR_ERR(adsp->cx_supply) == -ENODEV) - adsp->cx_supply = NULL; + pas->cx_supply = devm_regulator_get_optional(pas->dev, "cx"); + if (IS_ERR(pas->cx_supply)) { + if (PTR_ERR(pas->cx_supply) == -ENODEV) + pas->cx_supply = NULL; else - return PTR_ERR(adsp->cx_supply); + return PTR_ERR(pas->cx_supply); } - if (adsp->cx_supply) - regulator_set_load(adsp->cx_supply, 100000); + if (pas->cx_supply) + regulator_set_load(pas->cx_supply, 100000); - adsp->px_supply = devm_regulator_get_optional(adsp->dev, "px"); - if (IS_ERR(adsp->px_supply)) { - if (PTR_ERR(adsp->px_supply) == -ENODEV) - adsp->px_supply = NULL; + pas->px_supply = devm_regulator_get_optional(pas->dev, "px"); + if (IS_ERR(pas->px_supply)) { + if (PTR_ERR(pas->px_supply) == -ENODEV) + pas->px_supply = NULL; else - return PTR_ERR(adsp->px_supply); + return PTR_ERR(pas->px_supply); } return 0; } -static int adsp_pds_attach(struct device *dev, struct device **devs, - char **pd_names) +static int qcom_pas_pds_attach(struct device *dev, struct device **devs, char **pd_names) { size_t num_pds = 0; int ret; @@ -528,10 +527,9 @@ static int adsp_pds_attach(struct device *dev, struct device **devs, return ret; }; -static void adsp_pds_detach(struct qcom_adsp *adsp, struct device **pds, - size_t pd_count) +static void qcom_pas_pds_detach(struct qcom_pas *pas, struct device **pds, size_t pd_count) { - struct device *dev = adsp->dev; + struct device *dev = pas->dev; int i; /* Handle single power domain */ @@ -544,62 +542,62 @@ static void adsp_pds_detach(struct qcom_adsp *adsp, struct device **pds, dev_pm_domain_detach(pds[i], false); } -static int adsp_alloc_memory_region(struct qcom_adsp *adsp) +static int qcom_pas_alloc_memory_region(struct qcom_pas *pas) { struct reserved_mem *rmem; struct device_node *node; - node = of_parse_phandle(adsp->dev->of_node, "memory-region", 0); + node = of_parse_phandle(pas->dev->of_node, "memory-region", 0); if (!node) { - dev_err(adsp->dev, "no memory-region specified\n"); + dev_err(pas->dev, "no memory-region specified\n"); return -EINVAL; } rmem = of_reserved_mem_lookup(node); of_node_put(node); if (!rmem) { - dev_err(adsp->dev, "unable to resolve memory-region\n"); + dev_err(pas->dev, "unable to resolve memory-region\n"); return -EINVAL; } - adsp->mem_phys = adsp->mem_reloc = rmem->base; - adsp->mem_size = rmem->size; - adsp->mem_region = devm_ioremap_wc(adsp->dev, adsp->mem_phys, adsp->mem_size); - if (!adsp->mem_region) { - dev_err(adsp->dev, "unable to map memory region: %pa+%zx\n", - &rmem->base, adsp->mem_size); + pas->mem_phys = pas->mem_reloc = rmem->base; + pas->mem_size = rmem->size; + pas->mem_region = devm_ioremap_wc(pas->dev, pas->mem_phys, pas->mem_size); + if (!pas->mem_region) { + dev_err(pas->dev, "unable to map memory region: %pa+%zx\n", + &rmem->base, pas->mem_size); return -EBUSY; } - if (!adsp->dtb_pas_id) + if (!pas->dtb_pas_id) return 0; - node = of_parse_phandle(adsp->dev->of_node, "memory-region", 1); + node = of_parse_phandle(pas->dev->of_node, "memory-region", 1); if (!node) { - dev_err(adsp->dev, "no dtb memory-region specified\n"); + dev_err(pas->dev, "no dtb memory-region specified\n"); return -EINVAL; } rmem = of_reserved_mem_lookup(node); of_node_put(node); if (!rmem) { - dev_err(adsp->dev, "unable to resolve dtb memory-region\n"); + dev_err(pas->dev, "unable to resolve dtb memory-region\n"); return -EINVAL; } - adsp->dtb_mem_phys = adsp->dtb_mem_reloc = rmem->base; - adsp->dtb_mem_size = rmem->size; - adsp->dtb_mem_region = devm_ioremap_wc(adsp->dev, adsp->dtb_mem_phys, adsp->dtb_mem_size); - if (!adsp->dtb_mem_region) { - dev_err(adsp->dev, "unable to map dtb memory region: %pa+%zx\n", - &rmem->base, adsp->dtb_mem_size); + pas->dtb_mem_phys = pas->dtb_mem_reloc = rmem->base; + pas->dtb_mem_size = rmem->size; + pas->dtb_mem_region = devm_ioremap_wc(pas->dev, pas->dtb_mem_phys, pas->dtb_mem_size); + if (!pas->dtb_mem_region) { + dev_err(pas->dev, "unable to map dtb memory region: %pa+%zx\n", + &rmem->base, pas->dtb_mem_size); return -EBUSY; } return 0; } -static int adsp_assign_memory_region(struct qcom_adsp *adsp) +static int qcom_pas_assign_memory_region(struct qcom_pas *pas) { struct qcom_scm_vmperm perm[MAX_ASSIGN_COUNT]; struct device_node *node; @@ -607,45 +605,45 @@ static int adsp_assign_memory_region(struct qcom_adsp *adsp) int offset; int ret; - if (!adsp->region_assign_idx) + if (!pas->region_assign_idx) return 0; - for (offset = 0; offset < adsp->region_assign_count; ++offset) { + for (offset = 0; offset < pas->region_assign_count; ++offset) { struct reserved_mem *rmem = NULL; - node = of_parse_phandle(adsp->dev->of_node, "memory-region", - adsp->region_assign_idx + offset); + node = of_parse_phandle(pas->dev->of_node, "memory-region", + pas->region_assign_idx + offset); if (node) rmem = of_reserved_mem_lookup(node); of_node_put(node); if (!rmem) { - dev_err(adsp->dev, "unable to resolve shareable memory-region index %d\n", + dev_err(pas->dev, "unable to resolve shareable memory-region index %d\n", offset); return -EINVAL; } - if (adsp->region_assign_shared) { + if (pas->region_assign_shared) { perm[0].vmid = QCOM_SCM_VMID_HLOS; perm[0].perm = QCOM_SCM_PERM_RW; - perm[1].vmid = adsp->region_assign_vmid; + perm[1].vmid = pas->region_assign_vmid; perm[1].perm = QCOM_SCM_PERM_RW; perm_size = 2; } else { - perm[0].vmid = adsp->region_assign_vmid; + perm[0].vmid = pas->region_assign_vmid; perm[0].perm = QCOM_SCM_PERM_RW; perm_size = 1; } - adsp->region_assign_phys[offset] = rmem->base; - adsp->region_assign_size[offset] = rmem->size; - adsp->region_assign_owners[offset] = BIT(QCOM_SCM_VMID_HLOS); + pas->region_assign_phys[offset] = rmem->base; + pas->region_assign_size[offset] = rmem->size; + pas->region_assign_owners[offset] = BIT(QCOM_SCM_VMID_HLOS); - ret = qcom_scm_assign_mem(adsp->region_assign_phys[offset], - adsp->region_assign_size[offset], - &adsp->region_assign_owners[offset], + ret = qcom_scm_assign_mem(pas->region_assign_phys[offset], + pas->region_assign_size[offset], + &pas->region_assign_owners[offset], perm, perm_size); if (ret < 0) { - dev_err(adsp->dev, "assign memory %d failed\n", offset); + dev_err(pas->dev, "assign memory %d failed\n", offset); return ret; } } @@ -653,35 +651,35 @@ static int adsp_assign_memory_region(struct qcom_adsp *adsp) return 0; } -static void adsp_unassign_memory_region(struct qcom_adsp *adsp) +static void qcom_pas_unassign_memory_region(struct qcom_pas *pas) { struct qcom_scm_vmperm perm; int offset; int ret; - if (!adsp->region_assign_idx || adsp->region_assign_shared) + if (!pas->region_assign_idx || pas->region_assign_shared) return; - for (offset = 0; offset < adsp->region_assign_count; ++offset) { + for (offset = 0; offset < pas->region_assign_count; ++offset) { perm.vmid = QCOM_SCM_VMID_HLOS; perm.perm = QCOM_SCM_PERM_RW; - ret = qcom_scm_assign_mem(adsp->region_assign_phys[offset], - adsp->region_assign_size[offset], - &adsp->region_assign_owners[offset], + ret = qcom_scm_assign_mem(pas->region_assign_phys[offset], + pas->region_assign_size[offset], + &pas->region_assign_owners[offset], &perm, 1); if (ret < 0) - dev_err(adsp->dev, "unassign memory %d failed\n", offset); + dev_err(pas->dev, "unassign memory %d failed\n", offset); } } -static int adsp_probe(struct platform_device *pdev) +static int qcom_pas_probe(struct platform_device *pdev) { - const struct adsp_data *desc; - struct qcom_adsp *adsp; + const struct qcom_pas_data *desc; + struct qcom_pas *pas; struct rproc *rproc; const char *fw_name, *dtb_fw_name = NULL; - const struct rproc_ops *ops = &adsp_ops; + const struct rproc_ops *ops = &qcom_pas_ops; int ret; desc = of_device_get_match_data(&pdev->dev); @@ -706,9 +704,9 @@ static int adsp_probe(struct platform_device *pdev) } if (desc->minidump_id) - ops = &adsp_minidump_ops; + ops = &qcom_pas_minidump_ops; - rproc = devm_rproc_alloc(&pdev->dev, desc->sysmon_name, ops, fw_name, sizeof(*adsp)); + rproc = devm_rproc_alloc(&pdev->dev, desc->sysmon_name, ops, fw_name, sizeof(*pas)); if (!rproc) { dev_err(&pdev->dev, "unable to allocate remoteproc\n"); @@ -718,68 +716,65 @@ static int adsp_probe(struct platform_device *pdev) rproc->auto_boot = desc->auto_boot; rproc_coredump_set_elf_info(rproc, ELFCLASS32, EM_NONE); - adsp = rproc->priv; - adsp->dev = &pdev->dev; - adsp->rproc = rproc; - adsp->minidump_id = desc->minidump_id; - adsp->pas_id = desc->pas_id; - adsp->lite_pas_id = desc->lite_pas_id; - adsp->info_name = desc->sysmon_name; - adsp->smem_host_id = desc->smem_host_id; - adsp->decrypt_shutdown = desc->decrypt_shutdown; - adsp->region_assign_idx = desc->region_assign_idx; - adsp->region_assign_count = min_t(int, MAX_ASSIGN_COUNT, desc->region_assign_count); - adsp->region_assign_vmid = desc->region_assign_vmid; - adsp->region_assign_shared = desc->region_assign_shared; + pas = rproc->priv; + pas->dev = &pdev->dev; + pas->rproc = rproc; + pas->minidump_id = desc->minidump_id; + pas->pas_id = desc->pas_id; + pas->lite_pas_id = desc->lite_pas_id; + pas->info_name = desc->sysmon_name; + pas->smem_host_id = desc->smem_host_id; + pas->decrypt_shutdown = desc->decrypt_shutdown; + pas->region_assign_idx = desc->region_assign_idx; + pas->region_assign_count = min_t(int, MAX_ASSIGN_COUNT, desc->region_assign_count); + pas->region_assign_vmid = desc->region_assign_vmid; + pas->region_assign_shared = desc->region_assign_shared; if (dtb_fw_name) { - adsp->dtb_firmware_name = dtb_fw_name; - adsp->dtb_pas_id = desc->dtb_pas_id; + pas->dtb_firmware_name = dtb_fw_name; + pas->dtb_pas_id = desc->dtb_pas_id; } - platform_set_drvdata(pdev, adsp); + platform_set_drvdata(pdev, pas); - ret = device_init_wakeup(adsp->dev, true); + ret = device_init_wakeup(pas->dev, true); if (ret) goto free_rproc; - ret = adsp_alloc_memory_region(adsp); + ret = qcom_pas_alloc_memory_region(pas); if (ret) goto free_rproc; - ret = adsp_assign_memory_region(adsp); + ret = qcom_pas_assign_memory_region(pas); if (ret) goto free_rproc; - ret = adsp_init_clock(adsp); + ret = qcom_pas_init_clock(pas); if (ret) goto unassign_mem; - ret = adsp_init_regulator(adsp); + ret = qcom_pas_init_regulator(pas); if (ret) goto unassign_mem; - ret = adsp_pds_attach(&pdev->dev, adsp->proxy_pds, - desc->proxy_pd_names); + ret = qcom_pas_pds_attach(&pdev->dev, pas->proxy_pds, desc->proxy_pd_names); if (ret < 0) goto unassign_mem; - adsp->proxy_pd_count = ret; + pas->proxy_pd_count = ret; - ret = qcom_q6v5_init(&adsp->q6v5, pdev, rproc, desc->crash_reason_smem, desc->load_state, - qcom_pas_handover); + ret = qcom_q6v5_init(&pas->q6v5, pdev, rproc, desc->crash_reason_smem, + desc->load_state, qcom_pas_handover); if (ret) goto detach_proxy_pds; - qcom_add_glink_subdev(rproc, &adsp->glink_subdev, desc->ssr_name); - qcom_add_smd_subdev(rproc, &adsp->smd_subdev); - qcom_add_pdm_subdev(rproc, &adsp->pdm_subdev); - adsp->sysmon = qcom_add_sysmon_subdev(rproc, - desc->sysmon_name, - desc->ssctl_id); - if (IS_ERR(adsp->sysmon)) { - ret = PTR_ERR(adsp->sysmon); + qcom_add_glink_subdev(rproc, &pas->glink_subdev, desc->ssr_name); + qcom_add_smd_subdev(rproc, &pas->smd_subdev); + qcom_add_pdm_subdev(rproc, &pas->pdm_subdev); + pas->sysmon = qcom_add_sysmon_subdev(rproc, desc->sysmon_name, desc->ssctl_id); + if (IS_ERR(pas->sysmon)) { + ret = PTR_ERR(pas->sysmon); goto deinit_remove_pdm_smd_glink; } - qcom_add_ssr_subdev(rproc, &adsp->ssr_subdev, desc->ssr_name); + qcom_add_ssr_subdev(rproc, &pas->ssr_subdev, desc->ssr_name); ret = rproc_add(rproc); if (ret) goto remove_ssr_sysmon; @@ -787,41 +782,41 @@ static int adsp_probe(struct platform_device *pdev) return 0; remove_ssr_sysmon: - qcom_remove_ssr_subdev(rproc, &adsp->ssr_subdev); - qcom_remove_sysmon_subdev(adsp->sysmon); + qcom_remove_ssr_subdev(rproc, &pas->ssr_subdev); + qcom_remove_sysmon_subdev(pas->sysmon); deinit_remove_pdm_smd_glink: - qcom_remove_pdm_subdev(rproc, &adsp->pdm_subdev); - qcom_remove_smd_subdev(rproc, &adsp->smd_subdev); - qcom_remove_glink_subdev(rproc, &adsp->glink_subdev); - qcom_q6v5_deinit(&adsp->q6v5); + qcom_remove_pdm_subdev(rproc, &pas->pdm_subdev); + qcom_remove_smd_subdev(rproc, &pas->smd_subdev); + qcom_remove_glink_subdev(rproc, &pas->glink_subdev); + qcom_q6v5_deinit(&pas->q6v5); detach_proxy_pds: - adsp_pds_detach(adsp, adsp->proxy_pds, adsp->proxy_pd_count); + qcom_pas_pds_detach(pas, pas->proxy_pds, pas->proxy_pd_count); unassign_mem: - adsp_unassign_memory_region(adsp); + qcom_pas_unassign_memory_region(pas); free_rproc: - device_init_wakeup(adsp->dev, false); + device_init_wakeup(pas->dev, false); return ret; } -static void adsp_remove(struct platform_device *pdev) +static void qcom_pas_remove(struct platform_device *pdev) { - struct qcom_adsp *adsp = platform_get_drvdata(pdev); + struct qcom_pas *pas = platform_get_drvdata(pdev); - rproc_del(adsp->rproc); + rproc_del(pas->rproc); - qcom_q6v5_deinit(&adsp->q6v5); - adsp_unassign_memory_region(adsp); - qcom_remove_glink_subdev(adsp->rproc, &adsp->glink_subdev); - qcom_remove_sysmon_subdev(adsp->sysmon); - qcom_remove_smd_subdev(adsp->rproc, &adsp->smd_subdev); - qcom_remove_pdm_subdev(adsp->rproc, &adsp->pdm_subdev); - qcom_remove_ssr_subdev(adsp->rproc, &adsp->ssr_subdev); - adsp_pds_detach(adsp, adsp->proxy_pds, adsp->proxy_pd_count); - device_init_wakeup(adsp->dev, false); + qcom_q6v5_deinit(&pas->q6v5); + qcom_pas_unassign_memory_region(pas); + qcom_remove_glink_subdev(pas->rproc, &pas->glink_subdev); + qcom_remove_sysmon_subdev(pas->sysmon); + qcom_remove_smd_subdev(pas->rproc, &pas->smd_subdev); + qcom_remove_pdm_subdev(pas->rproc, &pas->pdm_subdev); + qcom_remove_ssr_subdev(pas->rproc, &pas->ssr_subdev); + qcom_pas_pds_detach(pas, pas->proxy_pds, pas->proxy_pd_count); + device_init_wakeup(pas->dev, false); } -static const struct adsp_data adsp_resource_init = { +static const struct qcom_pas_data adsp_resource_init = { .crash_reason_smem = 423, .firmware_name = "adsp.mdt", .pas_id = 1, @@ -831,7 +826,7 @@ static const struct adsp_data adsp_resource_init = { .ssctl_id = 0x14, }; -static const struct adsp_data sa8775p_adsp_resource = { +static const struct qcom_pas_data sa8775p_adsp_resource = { .crash_reason_smem = 423, .firmware_name = "adsp.mbn", .pas_id = 1, @@ -848,7 +843,7 @@ static const struct adsp_data sa8775p_adsp_resource = { .ssctl_id = 0x14, }; -static const struct adsp_data sdm845_adsp_resource_init = { +static const struct qcom_pas_data sdm845_adsp_resource_init = { .crash_reason_smem = 423, .firmware_name = "adsp.mdt", .pas_id = 1, @@ -859,7 +854,7 @@ static const struct adsp_data sdm845_adsp_resource_init = { .ssctl_id = 0x14, }; -static const struct adsp_data sm6350_adsp_resource = { +static const struct qcom_pas_data sm6350_adsp_resource = { .crash_reason_smem = 423, .firmware_name = "adsp.mdt", .pas_id = 1, @@ -875,7 +870,7 @@ static const struct adsp_data sm6350_adsp_resource = { .ssctl_id = 0x14, }; -static const struct adsp_data sm6375_mpss_resource = { +static const struct qcom_pas_data sm6375_mpss_resource = { .crash_reason_smem = 421, .firmware_name = "modem.mdt", .pas_id = 4, @@ -890,7 +885,7 @@ static const struct adsp_data sm6375_mpss_resource = { .ssctl_id = 0x12, }; -static const struct adsp_data sm8150_adsp_resource = { +static const struct qcom_pas_data sm8150_adsp_resource = { .crash_reason_smem = 423, .firmware_name = "adsp.mdt", .pas_id = 1, @@ -905,7 +900,7 @@ static const struct adsp_data sm8150_adsp_resource = { .ssctl_id = 0x14, }; -static const struct adsp_data sm8250_adsp_resource = { +static const struct qcom_pas_data sm8250_adsp_resource = { .crash_reason_smem = 423, .firmware_name = "adsp.mdt", .pas_id = 1, @@ -922,7 +917,7 @@ static const struct adsp_data sm8250_adsp_resource = { .ssctl_id = 0x14, }; -static const struct adsp_data sm8350_adsp_resource = { +static const struct qcom_pas_data sm8350_adsp_resource = { .crash_reason_smem = 423, .firmware_name = "adsp.mdt", .pas_id = 1, @@ -938,7 +933,7 @@ static const struct adsp_data sm8350_adsp_resource = { .ssctl_id = 0x14, }; -static const struct adsp_data msm8996_adsp_resource = { +static const struct qcom_pas_data msm8996_adsp_resource = { .crash_reason_smem = 423, .firmware_name = "adsp.mdt", .pas_id = 1, @@ -952,7 +947,7 @@ static const struct adsp_data msm8996_adsp_resource = { .ssctl_id = 0x14, }; -static const struct adsp_data cdsp_resource_init = { +static const struct qcom_pas_data cdsp_resource_init = { .crash_reason_smem = 601, .firmware_name = "cdsp.mdt", .pas_id = 18, @@ -962,7 +957,7 @@ static const struct adsp_data cdsp_resource_init = { .ssctl_id = 0x17, }; -static const struct adsp_data sa8775p_cdsp0_resource = { +static const struct qcom_pas_data sa8775p_cdsp0_resource = { .crash_reason_smem = 601, .firmware_name = "cdsp0.mbn", .pas_id = 18, @@ -980,7 +975,7 @@ static const struct adsp_data sa8775p_cdsp0_resource = { .ssctl_id = 0x17, }; -static const struct adsp_data sa8775p_cdsp1_resource = { +static const struct qcom_pas_data sa8775p_cdsp1_resource = { .crash_reason_smem = 633, .firmware_name = "cdsp1.mbn", .pas_id = 30, @@ -998,7 +993,7 @@ static const struct adsp_data sa8775p_cdsp1_resource = { .ssctl_id = 0x20, }; -static const struct adsp_data sdm845_cdsp_resource_init = { +static const struct qcom_pas_data sdm845_cdsp_resource_init = { .crash_reason_smem = 601, .firmware_name = "cdsp.mdt", .pas_id = 18, @@ -1009,7 +1004,7 @@ static const struct adsp_data sdm845_cdsp_resource_init = { .ssctl_id = 0x17, }; -static const struct adsp_data sm6350_cdsp_resource = { +static const struct qcom_pas_data sm6350_cdsp_resource = { .crash_reason_smem = 601, .firmware_name = "cdsp.mdt", .pas_id = 18, @@ -1025,7 +1020,7 @@ static const struct adsp_data sm6350_cdsp_resource = { .ssctl_id = 0x17, }; -static const struct adsp_data sm8150_cdsp_resource = { +static const struct qcom_pas_data sm8150_cdsp_resource = { .crash_reason_smem = 601, .firmware_name = "cdsp.mdt", .pas_id = 18, @@ -1040,7 +1035,7 @@ static const struct adsp_data sm8150_cdsp_resource = { .ssctl_id = 0x17, }; -static const struct adsp_data sm8250_cdsp_resource = { +static const struct qcom_pas_data sm8250_cdsp_resource = { .crash_reason_smem = 601, .firmware_name = "cdsp.mdt", .pas_id = 18, @@ -1055,7 +1050,7 @@ static const struct adsp_data sm8250_cdsp_resource = { .ssctl_id = 0x17, }; -static const struct adsp_data sc8280xp_nsp0_resource = { +static const struct qcom_pas_data sc8280xp_nsp0_resource = { .crash_reason_smem = 601, .firmware_name = "cdsp.mdt", .pas_id = 18, @@ -1069,7 +1064,7 @@ static const struct adsp_data sc8280xp_nsp0_resource = { .ssctl_id = 0x17, }; -static const struct adsp_data sc8280xp_nsp1_resource = { +static const struct qcom_pas_data sc8280xp_nsp1_resource = { .crash_reason_smem = 633, .firmware_name = "cdsp.mdt", .pas_id = 30, @@ -1083,7 +1078,7 @@ static const struct adsp_data sc8280xp_nsp1_resource = { .ssctl_id = 0x20, }; -static const struct adsp_data x1e80100_adsp_resource = { +static const struct qcom_pas_data x1e80100_adsp_resource = { .crash_reason_smem = 423, .firmware_name = "adsp.mdt", .dtb_firmware_name = "adsp_dtb.mdt", @@ -1103,7 +1098,7 @@ static const struct adsp_data x1e80100_adsp_resource = { .ssctl_id = 0x14, }; -static const struct adsp_data x1e80100_cdsp_resource = { +static const struct qcom_pas_data x1e80100_cdsp_resource = { .crash_reason_smem = 601, .firmware_name = "cdsp.mdt", .dtb_firmware_name = "cdsp_dtb.mdt", @@ -1123,7 +1118,7 @@ static const struct adsp_data x1e80100_cdsp_resource = { .ssctl_id = 0x17, }; -static const struct adsp_data sm8350_cdsp_resource = { +static const struct qcom_pas_data sm8350_cdsp_resource = { .crash_reason_smem = 601, .firmware_name = "cdsp.mdt", .pas_id = 18, @@ -1140,7 +1135,7 @@ static const struct adsp_data sm8350_cdsp_resource = { .ssctl_id = 0x17, }; -static const struct adsp_data sa8775p_gpdsp0_resource = { +static const struct qcom_pas_data sa8775p_gpdsp0_resource = { .crash_reason_smem = 640, .firmware_name = "gpdsp0.mbn", .pas_id = 39, @@ -1157,7 +1152,7 @@ static const struct adsp_data sa8775p_gpdsp0_resource = { .ssctl_id = 0x21, }; -static const struct adsp_data sa8775p_gpdsp1_resource = { +static const struct qcom_pas_data sa8775p_gpdsp1_resource = { .crash_reason_smem = 641, .firmware_name = "gpdsp1.mbn", .pas_id = 40, @@ -1174,7 +1169,7 @@ static const struct adsp_data sa8775p_gpdsp1_resource = { .ssctl_id = 0x22, }; -static const struct adsp_data mpss_resource_init = { +static const struct qcom_pas_data mpss_resource_init = { .crash_reason_smem = 421, .firmware_name = "modem.mdt", .pas_id = 4, @@ -1191,7 +1186,7 @@ static const struct adsp_data mpss_resource_init = { .ssctl_id = 0x12, }; -static const struct adsp_data sc8180x_mpss_resource = { +static const struct qcom_pas_data sc8180x_mpss_resource = { .crash_reason_smem = 421, .firmware_name = "modem.mdt", .pas_id = 4, @@ -1206,7 +1201,7 @@ static const struct adsp_data sc8180x_mpss_resource = { .ssctl_id = 0x12, }; -static const struct adsp_data msm8996_slpi_resource_init = { +static const struct qcom_pas_data msm8996_slpi_resource_init = { .crash_reason_smem = 424, .firmware_name = "slpi.mdt", .pas_id = 12, @@ -1220,7 +1215,7 @@ static const struct adsp_data msm8996_slpi_resource_init = { .ssctl_id = 0x16, }; -static const struct adsp_data sdm845_slpi_resource_init = { +static const struct qcom_pas_data sdm845_slpi_resource_init = { .crash_reason_smem = 424, .firmware_name = "slpi.mdt", .pas_id = 12, @@ -1236,7 +1231,7 @@ static const struct adsp_data sdm845_slpi_resource_init = { .ssctl_id = 0x16, }; -static const struct adsp_data wcss_resource_init = { +static const struct qcom_pas_data wcss_resource_init = { .crash_reason_smem = 421, .firmware_name = "wcnss.mdt", .pas_id = 6, @@ -1246,7 +1241,7 @@ static const struct adsp_data wcss_resource_init = { .ssctl_id = 0x12, }; -static const struct adsp_data sdx55_mpss_resource = { +static const struct qcom_pas_data sdx55_mpss_resource = { .crash_reason_smem = 421, .firmware_name = "modem.mdt", .pas_id = 4, @@ -1261,7 +1256,7 @@ static const struct adsp_data sdx55_mpss_resource = { .ssctl_id = 0x22, }; -static const struct adsp_data sm8450_mpss_resource = { +static const struct qcom_pas_data sm8450_mpss_resource = { .crash_reason_smem = 421, .firmware_name = "modem.mdt", .pas_id = 4, @@ -1279,7 +1274,7 @@ static const struct adsp_data sm8450_mpss_resource = { .ssctl_id = 0x12, }; -static const struct adsp_data sm8550_adsp_resource = { +static const struct qcom_pas_data sm8550_adsp_resource = { .crash_reason_smem = 423, .firmware_name = "adsp.mdt", .dtb_firmware_name = "adsp_dtb.mdt", @@ -1299,7 +1294,7 @@ static const struct adsp_data sm8550_adsp_resource = { .smem_host_id = 2, }; -static const struct adsp_data sm8550_cdsp_resource = { +static const struct qcom_pas_data sm8550_cdsp_resource = { .crash_reason_smem = 601, .firmware_name = "cdsp.mdt", .dtb_firmware_name = "cdsp_dtb.mdt", @@ -1320,7 +1315,7 @@ static const struct adsp_data sm8550_cdsp_resource = { .smem_host_id = 5, }; -static const struct adsp_data sm8550_mpss_resource = { +static const struct qcom_pas_data sm8550_mpss_resource = { .crash_reason_smem = 421, .firmware_name = "modem.mdt", .dtb_firmware_name = "modem_dtb.mdt", @@ -1344,7 +1339,7 @@ static const struct adsp_data sm8550_mpss_resource = { .region_assign_vmid = QCOM_SCM_VMID_MSS_MSA, }; -static const struct adsp_data sc7280_wpss_resource = { +static const struct qcom_pas_data sc7280_wpss_resource = { .crash_reason_smem = 626, .firmware_name = "wpss.mdt", .pas_id = 6, @@ -1361,7 +1356,7 @@ static const struct adsp_data sc7280_wpss_resource = { .ssctl_id = 0x19, }; -static const struct adsp_data sm8650_cdsp_resource = { +static const struct qcom_pas_data sm8650_cdsp_resource = { .crash_reason_smem = 601, .firmware_name = "cdsp.mdt", .dtb_firmware_name = "cdsp_dtb.mdt", @@ -1386,7 +1381,7 @@ static const struct adsp_data sm8650_cdsp_resource = { .region_assign_vmid = QCOM_SCM_VMID_CDSP, }; -static const struct adsp_data sm8650_mpss_resource = { +static const struct qcom_pas_data sm8650_mpss_resource = { .crash_reason_smem = 421, .firmware_name = "modem.mdt", .dtb_firmware_name = "modem_dtb.mdt", @@ -1410,7 +1405,7 @@ static const struct adsp_data sm8650_mpss_resource = { .region_assign_vmid = QCOM_SCM_VMID_MSS_MSA, }; -static const struct adsp_data sm8750_mpss_resource = { +static const struct qcom_pas_data sm8750_mpss_resource = { .crash_reason_smem = 421, .firmware_name = "modem.mdt", .dtb_firmware_name = "modem_dtb.mdt", @@ -1434,7 +1429,7 @@ static const struct adsp_data sm8750_mpss_resource = { .region_assign_vmid = QCOM_SCM_VMID_MSS_MSA, }; -static const struct of_device_id adsp_of_match[] = { +static const struct of_device_id qcom_pas_of_match[] = { { .compatible = "qcom,msm8226-adsp-pil", .data = &msm8996_adsp_resource}, { .compatible = "qcom,msm8953-adsp-pil", .data = &msm8996_adsp_resource}, { .compatible = "qcom,msm8974-adsp-pil", .data = &adsp_resource_init}, @@ -1504,17 +1499,17 @@ static const struct of_device_id adsp_of_match[] = { { .compatible = "qcom,x1e80100-cdsp-pas", .data = &x1e80100_cdsp_resource}, { }, }; -MODULE_DEVICE_TABLE(of, adsp_of_match); +MODULE_DEVICE_TABLE(of, qcom_pas_of_match); -static struct platform_driver adsp_driver = { - .probe = adsp_probe, - .remove = adsp_remove, +static struct platform_driver qcom_pas_driver = { + .probe = qcom_pas_probe, + .remove = qcom_pas_remove, .driver = { .name = "qcom_q6v5_pas", - .of_match_table = adsp_of_match, + .of_match_table = qcom_pas_of_match, }, }; -module_platform_driver(adsp_driver); -MODULE_DESCRIPTION("Qualcomm Hexagon v5 Peripheral Authentication Service driver"); +module_platform_driver(qcom_pas_driver); +MODULE_DESCRIPTION("Qualcomm Peripheral Authentication Service remoteproc driver"); MODULE_LICENSE("GPL v2"); From f9262233b7710fae246784e2be7fa0a1275fe445 Mon Sep 17 00:00:00 2001 From: Lijuan Gao Date: Mon, 26 May 2025 13:21:47 +0800 Subject: [PATCH 0086/2411] dt-bindings: remoteproc: qcom,sm8150-pas: Document QCS615 remoteproc Document the components used to boot the ADSP and CDSP on the Qualcomm QCS615 SoC. Use fallback to indicate the compatibility of the remoteproc on the QCS615 with that on the SM8150. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Lijuan Gao Link: https://lore.kernel.org/r/20250526-add_qcs615_remoteproc_support-v4-1-06a7d8bed0b5@quicinc.com Signed-off-by: Bjorn Andersson --- .../bindings/remoteproc/qcom,sm8150-pas.yaml | 65 +++++++++++-------- 1 file changed, 38 insertions(+), 27 deletions(-) diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,sm8150-pas.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,sm8150-pas.yaml index 5dcc2a32c080..a8cddf7e2fe1 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,sm8150-pas.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,sm8150-pas.yaml @@ -15,17 +15,26 @@ description: properties: compatible: - enum: - - qcom,sc8180x-adsp-pas - - qcom,sc8180x-cdsp-pas - - qcom,sc8180x-slpi-pas - - qcom,sm8150-adsp-pas - - qcom,sm8150-cdsp-pas - - qcom,sm8150-mpss-pas - - qcom,sm8150-slpi-pas - - qcom,sm8250-adsp-pas - - qcom,sm8250-cdsp-pas - - qcom,sm8250-slpi-pas + oneOf: + - items: + - enum: + - qcom,qcs615-adsp-pas + - const: qcom,sm8150-adsp-pas + - items: + - enum: + - qcom,qcs615-cdsp-pas + - const: qcom,sm8150-cdsp-pas + - enum: + - qcom,sc8180x-adsp-pas + - qcom,sc8180x-cdsp-pas + - qcom,sc8180x-slpi-pas + - qcom,sm8150-adsp-pas + - qcom,sm8150-cdsp-pas + - qcom,sm8150-mpss-pas + - qcom,sm8150-slpi-pas + - qcom,sm8250-adsp-pas + - qcom,sm8250-cdsp-pas + - qcom,sm8250-slpi-pas reg: maxItems: 1 @@ -62,16 +71,17 @@ allOf: - if: properties: compatible: - enum: - - qcom,sc8180x-adsp-pas - - qcom,sc8180x-cdsp-pas - - qcom,sc8180x-slpi-pas - - qcom,sm8150-adsp-pas - - qcom,sm8150-cdsp-pas - - qcom,sm8150-slpi-pas - - qcom,sm8250-adsp-pas - - qcom,sm8250-cdsp-pas - - qcom,sm8250-slpi-pas + contains: + enum: + - qcom,sc8180x-adsp-pas + - qcom,sc8180x-cdsp-pas + - qcom,sc8180x-slpi-pas + - qcom,sm8150-adsp-pas + - qcom,sm8150-cdsp-pas + - qcom,sm8150-slpi-pas + - qcom,sm8250-adsp-pas + - qcom,sm8250-cdsp-pas + - qcom,sm8250-slpi-pas then: properties: interrupts: @@ -88,12 +98,13 @@ allOf: - if: properties: compatible: - enum: - - qcom,sc8180x-adsp-pas - - qcom,sc8180x-cdsp-pas - - qcom,sm8150-adsp-pas - - qcom,sm8150-cdsp-pas - - qcom,sm8250-cdsp-pas + contains: + enum: + - qcom,sc8180x-adsp-pas + - qcom,sc8180x-cdsp-pas + - qcom,sm8150-adsp-pas + - qcom,sm8150-cdsp-pas + - qcom,sm8250-cdsp-pas then: properties: power-domains: From 4863d750fdc0d29b68530355b216dfc6d7b52c9a Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 10 Jun 2025 14:58:47 +0200 Subject: [PATCH 0087/2411] pinctrl: baytrail: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Acked-by: Mika Westerberg Signed-off-by: Andy Shevchenko --- drivers/pinctrl/intel/pinctrl-baytrail.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 969137c4cb06..6eb649f1ffd6 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -1045,7 +1045,7 @@ static int byt_gpio_get(struct gpio_chip *chip, unsigned int offset) return !!(val & BYT_LEVEL); } -static void byt_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) +static int byt_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) { struct intel_pinctrl *vg = gpiochip_get_data(chip); void __iomem *reg; @@ -1053,7 +1053,7 @@ static void byt_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) reg = byt_gpio_reg(vg, offset, BYT_VAL_REG); if (!reg) - return; + return -EINVAL; guard(raw_spinlock_irqsave)(&byt_lock); @@ -1062,6 +1062,8 @@ static void byt_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) writel(old_val | BYT_LEVEL, reg); else writel(old_val & ~BYT_LEVEL, reg); + + return 0; } static int byt_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) @@ -1229,7 +1231,7 @@ static const struct gpio_chip byt_gpio_chip = { .direction_input = byt_gpio_direction_input, .direction_output = byt_gpio_direction_output, .get = byt_gpio_get, - .set = byt_gpio_set, + .set_rv = byt_gpio_set, .set_config = gpiochip_generic_config, .dbg_show = byt_gpio_dbg_show, }; From 83ab731cb26b5ec3a37be6a672e0b3bdc0ae84c7 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 10 Jun 2025 14:58:48 +0200 Subject: [PATCH 0088/2411] pinctrl: cherryview: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Acked-by: Mika Westerberg Signed-off-by: Andy Shevchenko --- drivers/pinctrl/intel/pinctrl-cherryview.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 69b18ce0f685..769e8c4102a5 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1112,7 +1112,7 @@ static int chv_gpio_get(struct gpio_chip *chip, unsigned int offset) return !!(ctrl0 & CHV_PADCTRL0_GPIORXSTATE); } -static void chv_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) +static int chv_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) { struct intel_pinctrl *pctrl = gpiochip_get_data(chip); u32 ctrl0; @@ -1127,6 +1127,8 @@ static void chv_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) ctrl0 &= ~CHV_PADCTRL0_GPIOTXSTATE; chv_writel(pctrl, offset, CHV_PADCTRL0, ctrl0); + + return 0; } static int chv_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) @@ -1166,7 +1168,7 @@ static const struct gpio_chip chv_gpio_chip = { .direction_input = chv_gpio_direction_input, .direction_output = chv_gpio_direction_output, .get = chv_gpio_get, - .set = chv_gpio_set, + .set_rv = chv_gpio_set, }; static void chv_gpio_irq_ack(struct irq_data *d) From 241d79f02538fa19ad3441d27c0ee9e18aa9fd3b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 10 Jun 2025 14:58:49 +0200 Subject: [PATCH 0089/2411] pinctrl: intel: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Acked-by: Mika Westerberg Signed-off-by: Andy Shevchenko --- drivers/pinctrl/intel/pinctrl-intel.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index d889c7c878e2..846b25ed1cc4 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -1033,8 +1033,8 @@ static int intel_gpio_get(struct gpio_chip *chip, unsigned int offset) return !!(padcfg0 & PADCFG0_GPIORXSTATE); } -static void intel_gpio_set(struct gpio_chip *chip, unsigned int offset, - int value) +static int intel_gpio_set(struct gpio_chip *chip, unsigned int offset, + int value) { struct intel_pinctrl *pctrl = gpiochip_get_data(chip); void __iomem *reg; @@ -1043,11 +1043,11 @@ static void intel_gpio_set(struct gpio_chip *chip, unsigned int offset, pin = intel_gpio_to_pin(pctrl, offset, NULL, NULL); if (pin < 0) - return; + return -EINVAL; reg = intel_get_padcfg(pctrl, pin, PADCFG0); if (!reg) - return; + return -EINVAL; guard(raw_spinlock_irqsave)(&pctrl->lock); @@ -1057,6 +1057,8 @@ static void intel_gpio_set(struct gpio_chip *chip, unsigned int offset, else padcfg0 &= ~PADCFG0_GPIOTXSTATE; writel(padcfg0, reg); + + return 0; } static int intel_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) @@ -1094,7 +1096,12 @@ static int intel_gpio_direction_input(struct gpio_chip *chip, unsigned int offse static int intel_gpio_direction_output(struct gpio_chip *chip, unsigned int offset, int value) { - intel_gpio_set(chip, offset, value); + int ret; + + ret = intel_gpio_set(chip, offset, value); + if (ret) + return ret; + return pinctrl_gpio_direction_output(chip, offset); } @@ -1106,7 +1113,7 @@ static const struct gpio_chip intel_gpio_chip = { .direction_input = intel_gpio_direction_input, .direction_output = intel_gpio_direction_output, .get = intel_gpio_get, - .set = intel_gpio_set, + .set_rv = intel_gpio_set, .set_config = gpiochip_generic_config, }; From 20e62271fbc80290f543579f3ba8149805d8e06c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 10 Jun 2025 14:58:50 +0200 Subject: [PATCH 0090/2411] pinctrl: lynxpoint: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Acked-by: Mika Westerberg Signed-off-by: Andy Shevchenko --- drivers/pinctrl/intel/pinctrl-lynxpoint.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-lynxpoint.c b/drivers/pinctrl/intel/pinctrl-lynxpoint.c index ac5459a4c63e..5d4a5dd493d1 100644 --- a/drivers/pinctrl/intel/pinctrl-lynxpoint.c +++ b/drivers/pinctrl/intel/pinctrl-lynxpoint.c @@ -503,7 +503,7 @@ static int lp_gpio_get(struct gpio_chip *chip, unsigned int offset) return !!(ioread32(reg) & IN_LVL_BIT); } -static void lp_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) +static int lp_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) { struct intel_pinctrl *lg = gpiochip_get_data(chip); void __iomem *reg = lp_gpio_reg(chip, offset, LP_CONFIG1); @@ -514,6 +514,8 @@ static void lp_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) iowrite32(ioread32(reg) | OUT_LVL_BIT, reg); else iowrite32(ioread32(reg) & ~OUT_LVL_BIT, reg); + + return 0; } static int lp_gpio_direction_input(struct gpio_chip *chip, unsigned int offset) @@ -775,7 +777,7 @@ static int lp_gpio_probe(struct platform_device *pdev) gc->direction_input = lp_gpio_direction_input; gc->direction_output = lp_gpio_direction_output; gc->get = lp_gpio_get; - gc->set = lp_gpio_set; + gc->set_rv = lp_gpio_set; gc->set_config = gpiochip_generic_config; gc->get_direction = lp_gpio_get_direction; gc->base = -1; From 47d8101924b58e03bfd065c972172e6b69331397 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 10 Jun 2025 10:31:56 +0000 Subject: [PATCH 0091/2411] rust: vec: impl Default for Vec with any allocator The implementation of Default is restricted to only work with kmalloc vectors for no good reason. This means I have to use mem::replace(&mut my_vec, KVVec::new()) in Rust Binder instead of `mem::take(&mut my_vec)`. Thus, expand the impl of Default to work with any allocator including kvmalloc. Signed-off-by: Alice Ryhl Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250610-vec-default-v1-1-7bb2c97d75a0@google.com Signed-off-by: Danilo Krummrich --- rust/kernel/alloc/kvec.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs index 1a0dd852a468..606616fc0e59 100644 --- a/rust/kernel/alloc/kvec.rs +++ b/rust/kernel/alloc/kvec.rs @@ -851,7 +851,7 @@ fn drop(&mut self) { } } -impl Default for KVec { +impl Default for Vec { #[inline] fn default() -> Self { Self::new() From 58cebd68882edd407c7f65ebb4a42034bc1ffc6d Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Fri, 23 May 2025 14:54:11 +0200 Subject: [PATCH 0092/2411] rust: pin-init: examples, tests: add conditional compilation in order to compile under any feature combination In the CI, all examples & tests should be run under all feature combinations. Currently several examples & tests use `std` without conditionally enabling it. Thus make them all compile under any feature combination by conditionally disabling the code that uses e.g. `std`. Link: https://github.com/Rust-for-Linux/pin-init/pull/50/commits/fdfb70efddbc711b4543c850ee38a2f5a8d17cb6 Link: https://lore.kernel.org/all/20250523125424.192843-2-lossin@kernel.org Signed-off-by: Benno Lossin --- rust/pin-init/examples/big_struct_in_place.rs | 26 ++--- rust/pin-init/examples/linked_list.rs | 10 +- rust/pin-init/examples/mutex.rs | 97 +++++++++++-------- rust/pin-init/examples/pthread_mutex.rs | 3 + rust/pin-init/examples/static_init.rs | 75 +++++++------- 5 files changed, 121 insertions(+), 90 deletions(-) diff --git a/rust/pin-init/examples/big_struct_in_place.rs b/rust/pin-init/examples/big_struct_in_place.rs index 30d44a334ffd..b0ee793a0a0c 100644 --- a/rust/pin-init/examples/big_struct_in_place.rs +++ b/rust/pin-init/examples/big_struct_in_place.rs @@ -4,6 +4,7 @@ // Struct with size over 1GiB #[derive(Debug)] +#[allow(dead_code)] pub struct BigStruct { buf: [u8; 1024 * 1024 * 1024], a: u64, @@ -25,15 +26,18 @@ pub fn new() -> impl Init { } fn main() { - // we want to initialize the struct in-place, otherwise we would get a stackoverflow - let buf: Box = Box::init(init!(BigStruct { - buf <- zeroed(), - a: 7, - b: 186, - c: 7789, - d: 34, - managed_buf <- ManagedBuf::new(), - })) - .unwrap(); - println!("{}", core::mem::size_of_val(&*buf)); + #[cfg(any(feature = "std", feature = "alloc"))] + { + // we want to initialize the struct in-place, otherwise we would get a stackoverflow + let buf: Box = Box::init(init!(BigStruct { + buf <- zeroed(), + a: 7, + b: 186, + c: 7789, + d: 34, + managed_buf <- ManagedBuf::new(), + })) + .unwrap(); + println!("{}", core::mem::size_of_val(&*buf)); + } } diff --git a/rust/pin-init/examples/linked_list.rs b/rust/pin-init/examples/linked_list.rs index 0bbc7b8d83a1..f9e117c7dfe0 100644 --- a/rust/pin-init/examples/linked_list.rs +++ b/rust/pin-init/examples/linked_list.rs @@ -14,8 +14,9 @@ use pin_init::*; -#[expect(unused_attributes)] +#[allow(unused_attributes)] mod error; +#[allow(unused_imports)] use error::Error; #[pin_data(PinnedDrop)] @@ -39,6 +40,7 @@ pub fn new() -> impl PinInit { } #[inline] + #[allow(dead_code)] pub fn insert_next(list: &ListHead) -> impl PinInit + '_ { try_pin_init!(&this in Self { prev: list.next.prev().replace(unsafe { Link::new_unchecked(this)}), @@ -112,6 +114,7 @@ fn next(&self) -> &Link { } #[inline] + #[allow(dead_code)] fn prev(&self) -> &Link { unsafe { &(*self.0.get().as_ptr()).prev } } @@ -137,8 +140,13 @@ fn set(&self, val: &Link) { } } +#[allow(dead_code)] +#[cfg(not(any(feature = "std", feature = "alloc")))] +fn main() {} + #[allow(dead_code)] #[cfg_attr(test, test)] +#[cfg(any(feature = "std", feature = "alloc"))] fn main() -> Result<(), Error> { let a = Box::pin_init(ListHead::new())?; stack_pin_init!(let b = ListHead::insert_next(&a)); diff --git a/rust/pin-init/examples/mutex.rs b/rust/pin-init/examples/mutex.rs index 3e3630780c96..9f295226cd64 100644 --- a/rust/pin-init/examples/mutex.rs +++ b/rust/pin-init/examples/mutex.rs @@ -12,14 +12,15 @@ pin::Pin, sync::atomic::{AtomicBool, Ordering}, }; +#[cfg(feature = "std")] use std::{ sync::Arc, - thread::{self, park, sleep, Builder, Thread}, + thread::{self, sleep, Builder, Thread}, time::Duration, }; use pin_init::*; -#[expect(unused_attributes)] +#[allow(unused_attributes)] #[path = "./linked_list.rs"] pub mod linked_list; use linked_list::*; @@ -36,6 +37,7 @@ pub fn acquire(&self) -> SpinLockGuard<'_> { .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed) .is_err() { + #[cfg(feature = "std")] while self.inner.load(Ordering::Relaxed) { thread::yield_now(); } @@ -94,7 +96,8 @@ pub fn lock(&self) -> Pin> { // println!("wait list length: {}", self.wait_list.size()); while self.locked.get() { drop(sguard); - park(); + #[cfg(feature = "std")] + thread::park(); sguard = self.spin_lock.acquire(); } // This does have an effect, as the ListHead inside wait_entry implements Drop! @@ -131,8 +134,11 @@ fn drop(&mut self) { let sguard = self.mtx.spin_lock.acquire(); self.mtx.locked.set(false); if let Some(list_field) = self.mtx.wait_list.next() { - let wait_entry = list_field.as_ptr().cast::(); - unsafe { (*wait_entry).thread.unpark() }; + let _wait_entry = list_field.as_ptr().cast::(); + #[cfg(feature = "std")] + unsafe { + (*_wait_entry).thread.unpark() + }; } drop(sguard); } @@ -159,52 +165,61 @@ fn deref_mut(&mut self) -> &mut Self::Target { struct WaitEntry { #[pin] wait_list: ListHead, + #[cfg(feature = "std")] thread: Thread, } impl WaitEntry { #[inline] fn insert_new(list: &ListHead) -> impl PinInit + '_ { - pin_init!(Self { - thread: thread::current(), - wait_list <- ListHead::insert_prev(list), - }) + #[cfg(feature = "std")] + { + pin_init!(Self { + thread: thread::current(), + wait_list <- ListHead::insert_prev(list), + }) + } + #[cfg(not(feature = "std"))] + { + pin_init!(Self { + wait_list <- ListHead::insert_prev(list), + }) + } } } -#[cfg(not(any(feature = "std", feature = "alloc")))] -fn main() {} - -#[allow(dead_code)] #[cfg_attr(test, test)] -#[cfg(any(feature = "std", feature = "alloc"))] +#[allow(dead_code)] fn main() { - let mtx: Pin>> = Arc::pin_init(CMutex::new(0)).unwrap(); - let mut handles = vec![]; - let thread_count = 20; - let workload = if cfg!(miri) { 100 } else { 1_000 }; - for i in 0..thread_count { - let mtx = mtx.clone(); - handles.push( - Builder::new() - .name(format!("worker #{i}")) - .spawn(move || { - for _ in 0..workload { - *mtx.lock() += 1; - } - println!("{i} halfway"); - sleep(Duration::from_millis((i as u64) * 10)); - for _ in 0..workload { - *mtx.lock() += 1; - } - println!("{i} finished"); - }) - .expect("should not fail"), - ); + #[cfg(feature = "std")] + { + let mtx: Pin>> = Arc::pin_init(CMutex::new(0)).unwrap(); + let mut handles = vec![]; + let thread_count = 20; + let workload = if cfg!(miri) { 100 } else { 1_000 }; + for i in 0..thread_count { + let mtx = mtx.clone(); + handles.push( + Builder::new() + .name(format!("worker #{i}")) + .spawn(move || { + for _ in 0..workload { + *mtx.lock() += 1; + } + println!("{i} halfway"); + sleep(Duration::from_millis((i as u64) * 10)); + for _ in 0..workload { + *mtx.lock() += 1; + } + println!("{i} finished"); + }) + .expect("should not fail"), + ); + } + for h in handles { + h.join().expect("thread panicked"); + } + println!("{:?}", &*mtx.lock()); + assert_eq!(*mtx.lock(), workload * thread_count * 2); } - for h in handles { - h.join().expect("thread panicked"); - } - println!("{:?}", &*mtx.lock()); - assert_eq!(*mtx.lock(), workload * thread_count * 2); } diff --git a/rust/pin-init/examples/pthread_mutex.rs b/rust/pin-init/examples/pthread_mutex.rs index 5acc5108b954..c709dabba7eb 100644 --- a/rust/pin-init/examples/pthread_mutex.rs +++ b/rust/pin-init/examples/pthread_mutex.rs @@ -44,6 +44,7 @@ fn drop(self: Pin<&mut Self>) { pub enum Error { #[allow(dead_code)] IO(std::io::Error), + #[allow(dead_code)] Alloc, } @@ -61,6 +62,7 @@ fn from(_: AllocError) -> Self { } impl PThreadMutex { + #[allow(dead_code)] pub fn new(data: T) -> impl PinInit { fn init_raw() -> impl PinInit, Error> { let init = |slot: *mut UnsafeCell| { @@ -103,6 +105,7 @@ fn init_raw() -> impl PinInit, Error> { }? Error) } + #[allow(dead_code)] pub fn lock(&self) -> PThreadMutexGuard<'_, T> { // SAFETY: raw is always initialized unsafe { libc::pthread_mutex_lock(self.raw.get()) }; diff --git a/rust/pin-init/examples/static_init.rs b/rust/pin-init/examples/static_init.rs index 48531413ab94..0e165daa9798 100644 --- a/rust/pin-init/examples/static_init.rs +++ b/rust/pin-init/examples/static_init.rs @@ -3,6 +3,7 @@ #![allow(clippy::undocumented_unsafe_blocks)] #![cfg_attr(feature = "alloc", feature(allocator_api))] #![cfg_attr(not(RUSTC_LINT_REASONS_IS_STABLE), feature(lint_reasons))] +#![allow(unused_imports)] use core::{ cell::{Cell, UnsafeCell}, @@ -12,12 +13,13 @@ time::Duration, }; use pin_init::*; +#[cfg(feature = "std")] use std::{ sync::Arc, thread::{sleep, Builder}, }; -#[expect(unused_attributes)] +#[allow(unused_attributes)] mod mutex; use mutex::*; @@ -82,42 +84,41 @@ unsafe fn __pinned_init( pub static COUNT: StaticInit, CountInit> = StaticInit::new(CountInit); -#[cfg(not(any(feature = "std", feature = "alloc")))] -fn main() {} - -#[cfg(any(feature = "std", feature = "alloc"))] fn main() { - let mtx: Pin>> = Arc::pin_init(CMutex::new(0)).unwrap(); - let mut handles = vec![]; - let thread_count = 20; - let workload = 1_000; - for i in 0..thread_count { - let mtx = mtx.clone(); - handles.push( - Builder::new() - .name(format!("worker #{i}")) - .spawn(move || { - for _ in 0..workload { - *COUNT.lock() += 1; - std::thread::sleep(std::time::Duration::from_millis(10)); - *mtx.lock() += 1; - std::thread::sleep(std::time::Duration::from_millis(10)); - *COUNT.lock() += 1; - } - println!("{i} halfway"); - sleep(Duration::from_millis((i as u64) * 10)); - for _ in 0..workload { - std::thread::sleep(std::time::Duration::from_millis(10)); - *mtx.lock() += 1; - } - println!("{i} finished"); - }) - .expect("should not fail"), - ); + #[cfg(feature = "std")] + { + let mtx: Pin>> = Arc::pin_init(CMutex::new(0)).unwrap(); + let mut handles = vec![]; + let thread_count = 20; + let workload = 1_000; + for i in 0..thread_count { + let mtx = mtx.clone(); + handles.push( + Builder::new() + .name(format!("worker #{i}")) + .spawn(move || { + for _ in 0..workload { + *COUNT.lock() += 1; + std::thread::sleep(std::time::Duration::from_millis(10)); + *mtx.lock() += 1; + std::thread::sleep(std::time::Duration::from_millis(10)); + *COUNT.lock() += 1; + } + println!("{i} halfway"); + sleep(Duration::from_millis((i as u64) * 10)); + for _ in 0..workload { + std::thread::sleep(std::time::Duration::from_millis(10)); + *mtx.lock() += 1; + } + println!("{i} finished"); + }) + .expect("should not fail"), + ); + } + for h in handles { + h.join().expect("thread panicked"); + } + println!("{:?}, {:?}", &*mtx.lock(), &*COUNT.lock()); + assert_eq!(*mtx.lock(), workload * thread_count * 2); } - for h in handles { - h.join().expect("thread panicked"); - } - println!("{:?}, {:?}", &*mtx.lock(), &*COUNT.lock()); - assert_eq!(*mtx.lock(), workload * thread_count * 2); } From 2408678d700c4db6c54749a272d42a964f5f3418 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Fri, 23 May 2025 14:54:12 +0200 Subject: [PATCH 0093/2411] rust: pin-init: examples: pthread_mutex: disable the main test for miri `miri` takes a long time to execute the test, so disable it. Link: https://github.com/Rust-for-Linux/pin-init/pull/50/commits/e717a9eec85024c11e79e8bd9dcb664ad0de8f94 Link: https://lore.kernel.org/all/20250523125424.192843-3-lossin@kernel.org Signed-off-by: Benno Lossin --- rust/pin-init/examples/pthread_mutex.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/pin-init/examples/pthread_mutex.rs b/rust/pin-init/examples/pthread_mutex.rs index c709dabba7eb..6c4d18238956 100644 --- a/rust/pin-init/examples/pthread_mutex.rs +++ b/rust/pin-init/examples/pthread_mutex.rs @@ -139,7 +139,7 @@ fn deref_mut(&mut self) -> &mut Self::Target { } } -#[cfg_attr(test, test)] +#[cfg_attr(all(test, not(miri)), test)] fn main() { #[cfg(all(any(feature = "std", feature = "alloc"), not(windows)))] { From b3b4f760ccf2d08ff3db0f094c32ce70bba2eb15 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Mon, 9 Jun 2025 16:17:35 +0200 Subject: [PATCH 0094/2411] rust: pin-init: feature-gate the `stack_init_reuse` test on the `std` feature When trying to run `cargo check --all-targets --no-default-features`, an error is reported by the test, as it cannot find the `std` crate. This is to be expected, since the `--no-default-features` flag enables the `no-std` behavior of the crate. Thus exclude the test in that scenario. Link: https://github.com/Rust-for-Linux/pin-init/pull/50/commits/2813729ccacdedee9dbfcab1ed285b8721a0391b Link: https://lore.kernel.org/all/20250523125424.192843-4-lossin@kernel.org [ Changed my author email address to @kernel.org. - Benno ] Signed-off-by: Benno Lossin --- rust/pin-init/src/__internal.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/pin-init/src/__internal.rs b/rust/pin-init/src/__internal.rs index 557b5948cddc..90f18e9a2912 100644 --- a/rust/pin-init/src/__internal.rs +++ b/rust/pin-init/src/__internal.rs @@ -188,6 +188,7 @@ pub fn init(self: Pin<&mut Self>, init: impl PinInit) -> Result Date: Fri, 23 May 2025 16:50:57 +0200 Subject: [PATCH 0095/2411] rust: pin-init: rename `zeroed` to `init_zeroed` The name `zeroed` is a much better fit for a function that returns the type by-value. Link: https://github.com/Rust-for-Linux/pin-init/pull/56/commits/7dbe38682c9725405bab91dcabe9c4d8893d2f5e [ also rename uses in `rust/kernel/init.rs` - Benno] Link: https://lore.kernel.org/all/20250523145125.523275-2-lossin@kernel.org [ Fix wrong replacement of `mem::zeroed` in the definition of `trait Zeroable`. - Benno ] [ Also change occurrences of `zeroed` in `configfs.rs` - Benno ] Acked-by: Andreas Hindborg Signed-off-by: Benno Lossin --- rust/kernel/configfs.rs | 4 +-- rust/kernel/init.rs | 8 +++--- rust/pin-init/README.md | 2 +- rust/pin-init/examples/big_struct_in_place.rs | 4 +-- rust/pin-init/src/lib.rs | 26 +++++++++---------- rust/pin-init/src/macros.rs | 16 ++++++------ 6 files changed, 30 insertions(+), 30 deletions(-) diff --git a/rust/kernel/configfs.rs b/rust/kernel/configfs.rs index 34d0bea4f9a5..6d566a8bde74 100644 --- a/rust/kernel/configfs.rs +++ b/rust/kernel/configfs.rs @@ -151,7 +151,7 @@ pub fn new( data: impl PinInit, ) -> impl PinInit { try_pin_init!(Self { - subsystem <- pin_init::zeroed().chain( + subsystem <- pin_init::init_zeroed().chain( |place: &mut Opaque| { // SAFETY: We initialized the required fields of `place.group` above. unsafe { @@ -261,7 +261,7 @@ pub fn new( data: impl PinInit, ) -> impl PinInit { try_pin_init!(Self { - group <- pin_init::zeroed().chain(|v: &mut Opaque| { + group <- pin_init::init_zeroed().chain(|v: &mut Opaque| { let place = v.get(); let name = name.as_bytes_with_nul().as_ptr(); // SAFETY: It is safe to initialize a group once it has been zeroed. diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 8d228c237954..15a1c5e397d8 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -206,7 +206,7 @@ fn init(init: impl Init, flags: Flags) -> error::Result /// /// ```rust /// use kernel::error::Error; -/// use pin_init::zeroed; +/// use pin_init::init_zeroed; /// struct BigBuf { /// big: KBox<[u8; 1024 * 1024 * 1024]>, /// small: [u8; 1024 * 1024], @@ -215,7 +215,7 @@ fn init(init: impl Init, flags: Flags) -> error::Result /// impl BigBuf { /// fn new() -> impl Init { /// try_init!(Self { -/// big: KBox::init(zeroed(), GFP_KERNEL)?, +/// big: KBox::init(init_zeroed(), GFP_KERNEL)?, /// small: [0; 1024 * 1024], /// }? Error) /// } @@ -264,7 +264,7 @@ macro_rules! try_init { /// ```rust /// # #![feature(new_uninit)] /// use kernel::error::Error; -/// use pin_init::zeroed; +/// use pin_init::init_zeroed; /// #[pin_data] /// struct BigBuf { /// big: KBox<[u8; 1024 * 1024 * 1024]>, @@ -275,7 +275,7 @@ macro_rules! try_init { /// impl BigBuf { /// fn new() -> impl PinInit { /// try_pin_init!(Self { -/// big: KBox::init(zeroed(), GFP_KERNEL)?, +/// big: KBox::init(init_zeroed(), GFP_KERNEL)?, /// small: [0; 1024 * 1024], /// ptr: core::ptr::null_mut(), /// }? Error) diff --git a/rust/pin-init/README.md b/rust/pin-init/README.md index 2d0cda961d45..a4c01a8d78b2 100644 --- a/rust/pin-init/README.md +++ b/rust/pin-init/README.md @@ -125,7 +125,7 @@ impl DriverData { fn new() -> impl PinInit { try_pin_init!(Self { status <- CMutex::new(0), - buffer: Box::init(pin_init::zeroed())?, + buffer: Box::init(pin_init::init_zeroed())?, }? Error) } } diff --git a/rust/pin-init/examples/big_struct_in_place.rs b/rust/pin-init/examples/big_struct_in_place.rs index b0ee793a0a0c..c05139927486 100644 --- a/rust/pin-init/examples/big_struct_in_place.rs +++ b/rust/pin-init/examples/big_struct_in_place.rs @@ -21,7 +21,7 @@ pub struct ManagedBuf { impl ManagedBuf { pub fn new() -> impl Init { - init!(ManagedBuf { buf <- zeroed() }) + init!(ManagedBuf { buf <- init_zeroed() }) } } @@ -30,7 +30,7 @@ fn main() { { // we want to initialize the struct in-place, otherwise we would get a stackoverflow let buf: Box = Box::init(init!(BigStruct { - buf <- zeroed(), + buf <- init_zeroed(), a: 7, b: 186, c: 7789, diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index f4e034497cdd..2f7ca94451e6 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -148,7 +148,7 @@ //! fn new() -> impl PinInit { //! try_pin_init!(Self { //! status <- CMutex::new(0), -//! buffer: Box::init(pin_init::zeroed())?, +//! buffer: Box::init(pin_init::init_zeroed())?, //! }? Error) //! } //! } @@ -742,7 +742,7 @@ macro_rules! stack_try_pin_init { /// - Fields that you want to initialize in-place have to use `<-` instead of `:`. /// - In front of the initializer you can write `&this in` to have access to a [`NonNull`] /// pointer named `this` inside of the initializer. -/// - Using struct update syntax one can place `..Zeroable::zeroed()` at the very end of the +/// - Using struct update syntax one can place `..Zeroable::init_zeroed()` at the very end of the /// struct, this initializes every field with 0 and then runs all initializers specified in the /// body. This can only be done if [`Zeroable`] is implemented for the struct. /// @@ -769,7 +769,7 @@ macro_rules! stack_try_pin_init { /// }); /// let init = pin_init!(Buf { /// buf: [1; 64], -/// ..Zeroable::zeroed() +/// ..Zeroable::init_zeroed() /// }); /// ``` /// @@ -805,7 +805,7 @@ macro_rules! pin_init { /// ```rust /// # #![feature(allocator_api)] /// # #[path = "../examples/error.rs"] mod error; use error::Error; -/// use pin_init::{pin_data, try_pin_init, PinInit, InPlaceInit, zeroed}; +/// use pin_init::{pin_data, try_pin_init, PinInit, InPlaceInit, init_zeroed}; /// /// #[pin_data] /// struct BigBuf { @@ -817,7 +817,7 @@ macro_rules! pin_init { /// impl BigBuf { /// fn new() -> impl PinInit { /// try_pin_init!(Self { -/// big: Box::init(zeroed())?, +/// big: Box::init(init_zeroed())?, /// small: [0; 1024 * 1024], /// ptr: core::ptr::null_mut(), /// }? Error) @@ -866,7 +866,7 @@ macro_rules! try_pin_init { /// # #[path = "../examples/error.rs"] mod error; use error::Error; /// # #[path = "../examples/mutex.rs"] mod mutex; use mutex::*; /// # use pin_init::InPlaceInit; -/// use pin_init::{init, Init, zeroed}; +/// use pin_init::{init, Init, init_zeroed}; /// /// struct BigBuf { /// small: [u8; 1024 * 1024], @@ -875,7 +875,7 @@ macro_rules! try_pin_init { /// impl BigBuf { /// fn new() -> impl Init { /// init!(Self { -/// small <- zeroed(), +/// small <- init_zeroed(), /// }) /// } /// } @@ -913,7 +913,7 @@ macro_rules! init { /// # #![feature(allocator_api)] /// # use core::alloc::AllocError; /// # use pin_init::InPlaceInit; -/// use pin_init::{try_init, Init, zeroed}; +/// use pin_init::{try_init, Init, init_zeroed}; /// /// struct BigBuf { /// big: Box<[u8; 1024 * 1024 * 1024]>, @@ -923,7 +923,7 @@ macro_rules! init { /// impl BigBuf { /// fn new() -> impl Init { /// try_init!(Self { -/// big: Box::init(zeroed())?, +/// big: Box::init(init_zeroed())?, /// small: [0; 1024 * 1024], /// }? AllocError) /// } @@ -1170,7 +1170,7 @@ pub unsafe trait Init: PinInit { /// /// ```rust /// # #![expect(clippy::disallowed_names)] - /// use pin_init::{init, zeroed, Init}; + /// use pin_init::{init, init_zeroed, Init}; /// /// struct Foo { /// buf: [u8; 1_000_000], @@ -1183,7 +1183,7 @@ pub unsafe trait Init: PinInit { /// } /// /// let foo = init!(Foo { - /// buf <- zeroed() + /// buf <- init_zeroed() /// }).chain(|foo| { /// foo.setup(); /// Ok(()) @@ -1508,11 +1508,11 @@ pub unsafe trait ZeroableOption {} // SAFETY: by the safety requirement of `ZeroableOption`, this is valid. unsafe impl Zeroable for Option {} -/// Create a new zeroed T. +/// Create an initializer for a zeroed `T`. /// /// The returned initializer will write `0x00` to every byte of the given `slot`. #[inline] -pub fn zeroed() -> impl Init { +pub fn init_zeroed() -> impl Init { // SAFETY: Because `T: Zeroable`, all bytes zero is a valid bit pattern for `T` // and because we write all zeroes, the memory is initialized. unsafe { diff --git a/rust/pin-init/src/macros.rs b/rust/pin-init/src/macros.rs index 935d77745d1d..9ced630737b8 100644 --- a/rust/pin-init/src/macros.rs +++ b/rust/pin-init/src/macros.rs @@ -1030,7 +1030,7 @@ impl<$($impl_generics)*> $pin_data<$($ty_generics)*> /// /// This macro has multiple internal call configurations, these are always the very first ident: /// - nothing: this is the base case and called by the `{try_}{pin_}init!` macros. -/// - `with_update_parsed`: when the `..Zeroable::zeroed()` syntax has been handled. +/// - `with_update_parsed`: when the `..Zeroable::init_zeroed()` syntax has been handled. /// - `init_slot`: recursively creates the code that initializes all fields in `slot`. /// - `make_initializer`: recursively create the struct initializer that guarantees that every /// field has been initialized exactly once. @@ -1059,7 +1059,7 @@ macro_rules! __init_internal { @data($data, $($use_data)?), @has_data($has_data, $get_data), @construct_closure($construct_closure), - @zeroed(), // Nothing means default behavior. + @init_zeroed(), // Nothing means default behavior. ) }; ( @@ -1074,7 +1074,7 @@ macro_rules! __init_internal { @has_data($has_data:ident, $get_data:ident), // `pin_init_from_closure` or `init_from_closure`. @construct_closure($construct_closure:ident), - @munch_fields(..Zeroable::zeroed()), + @munch_fields(..Zeroable::init_zeroed()), ) => { $crate::__init_internal!(with_update_parsed: @this($($this)?), @@ -1084,7 +1084,7 @@ macro_rules! __init_internal { @data($data, $($use_data)?), @has_data($has_data, $get_data), @construct_closure($construct_closure), - @zeroed(()), // `()` means zero all fields not mentioned. + @init_zeroed(()), // `()` means zero all fields not mentioned. ) }; ( @@ -1124,7 +1124,7 @@ macro_rules! __init_internal { @has_data($has_data:ident, $get_data:ident), // `pin_init_from_closure` or `init_from_closure`. @construct_closure($construct_closure:ident), - @zeroed($($init_zeroed:expr)?), + @init_zeroed($($init_zeroed:expr)?), ) => {{ // We do not want to allow arbitrary returns, so we declare this type as the `Ok` return // type and shadow it later when we insert the arbitrary user code. That way there will be @@ -1196,7 +1196,7 @@ fn assert_zeroable(_: *mut T) {} @data($data:ident), @slot($slot:ident), @guards($($guards:ident,)*), - @munch_fields($(..Zeroable::zeroed())? $(,)?), + @munch_fields($(..Zeroable::init_zeroed())? $(,)?), ) => { // Endpoint of munching, no fields are left. If execution reaches this point, all fields // have been initialized. Therefore we can now dismiss the guards by forgetting them. @@ -1300,11 +1300,11 @@ fn assert_zeroable(_: *mut T) {} (make_initializer: @slot($slot:ident), @type_name($t:path), - @munch_fields(..Zeroable::zeroed() $(,)?), + @munch_fields(..Zeroable::init_zeroed() $(,)?), @acc($($acc:tt)*), ) => { // Endpoint, nothing more to munch, create the initializer. Since the users specified - // `..Zeroable::zeroed()`, the slot will already have been zeroed and all field that have + // `..Zeroable::init_zeroed()`, the slot will already have been zeroed and all field that have // not been overwritten are thus zero and initialized. We still check that all fields are // actually accessible by using the struct update syntax ourselves. // We are inside of a closure that is never executed and thus we can abuse `slot` to From c47024ba198b01cab6bb6e3e5a69b73ed2f2aa16 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Fri, 23 May 2025 16:50:58 +0200 Subject: [PATCH 0096/2411] rust: pin-init: add `Zeroable::init_zeroed` The trait function delegates to the already existing `init_zeroed` function that returns a zeroing initializer for `Self`. The syntax `..Zeroable::init_zeroed()` is already used by the initialization macros to initialize all fields that are not mentioned in the initializer with zero. Therefore it is expected that the function also exists on the trait. Link: https://github.com/Rust-for-Linux/pin-init/pull/56/commits/a424a6c9af5a4418a8e5e986a3db26a4432e2f1a Link: https://lore.kernel.org/all/20250523145125.523275-3-lossin@kernel.org Signed-off-by: Benno Lossin --- rust/pin-init/src/lib.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index 2f7ca94451e6..ef7e5a1e1c48 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -1495,7 +1495,18 @@ pub unsafe trait PinnedDrop: __internal::HasPinData { /// ```rust,ignore /// let val: Self = unsafe { core::mem::zeroed() }; /// ``` -pub unsafe trait Zeroable {} +pub unsafe trait Zeroable { + /// Create a new zeroed `Self`. + /// + /// The returned initializer will write `0x00` to every byte of the given `slot`. + #[inline] + fn init_zeroed() -> impl Init + where + Self: Sized, + { + init_zeroed() + } +} /// Marker trait for types that allow `Option` to be set to all zeroes in order to write /// `None` to that location. From d67b37012080cf1978b5fd36f040a53f92152243 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Fri, 23 May 2025 16:50:59 +0200 Subject: [PATCH 0097/2411] rust: pin-init: add `zeroed()` & `Zeroable::zeroed()` functions `zeroed()` returns a zeroed out value of a sized type implementing `Zeroable`. The function is added as a free standing function, in addition to an associated function on `Zeroable`, because then it can be marked `const` (functions in traits can't be const at the moment). Link: https://github.com/Rust-for-Linux/pin-init/pull/56/commits/809e4ec160579c1601dce5d78b432a5b6c8e4e40 Link: https://lore.kernel.org/all/20250523145125.523275-4-lossin@kernel.org Signed-off-by: Benno Lossin --- rust/pin-init/src/lib.rs | 52 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index ef7e5a1e1c48..a5bb3939b58b 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -1506,6 +1506,33 @@ fn init_zeroed() -> impl Init { init_zeroed() } + + /// Create a `Self` consisting of all zeroes. + /// + /// Whenever a type implements [`Zeroable`], this function should be preferred over + /// [`core::mem::zeroed()`] or using `MaybeUninit::zeroed().assume_init()`. + /// + /// # Examples + /// + /// ``` + /// use pin_init::{Zeroable, zeroed}; + /// + /// #[derive(Zeroable)] + /// struct Point { + /// x: u32, + /// y: u32, + /// } + /// + /// let point: Point = zeroed(); + /// assert_eq!(point.x, 0); + /// assert_eq!(point.y, 0); + /// ``` + fn zeroed() -> Self + where + Self: Sized, + { + zeroed() + } } /// Marker trait for types that allow `Option` to be set to all zeroes in order to write @@ -1534,6 +1561,31 @@ pub fn init_zeroed() -> impl Init { } } +/// Create a `T` consisting of all zeroes. +/// +/// Whenever a type implements [`Zeroable`], this function should be preferred over +/// [`core::mem::zeroed()`] or using `MaybeUninit::zeroed().assume_init()`. +/// +/// # Examples +/// +/// ``` +/// use pin_init::{Zeroable, zeroed}; +/// +/// #[derive(Zeroable)] +/// struct Point { +/// x: u32, +/// y: u32, +/// } +/// +/// let point: Point = zeroed(); +/// assert_eq!(point.x, 0); +/// assert_eq!(point.y, 0); +/// ``` +pub const fn zeroed() -> T { + // SAFETY:By the type invariants of `Zeroable`, all zeroes is a valid bit pattern for `T`. + unsafe { core::mem::zeroed() } +} + macro_rules! impl_zeroable { ($($({$($generics:tt)*})? $t:ty, )*) => { // SAFETY: Safety comments written in the macro invocation. From e93a238605348bc40fed77ba5582e311376d113b Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Fri, 23 May 2025 16:51:00 +0200 Subject: [PATCH 0098/2411] rust: pin-init: implement `ZeroableOption` for `&T` and `&mut T` `Option<&T>` and `Option<&mut T>` are documented [1] to have the `None` variant be all zeroes. Link: https://doc.rust-lang.org/stable/std/option/index.html#representation [1] Link: https://github.com/Rust-for-Linux/pin-init/pull/56/commits/5ef1638c79e019d3dc0c62db5905601644c2e60a Link: https://lore.kernel.org/all/20250523145125.523275-5-lossin@kernel.org Signed-off-by: Benno Lossin --- rust/pin-init/src/lib.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index a5bb3939b58b..298a3e675b7f 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -1546,6 +1546,13 @@ pub unsafe trait ZeroableOption {} // SAFETY: by the safety requirement of `ZeroableOption`, this is valid. unsafe impl Zeroable for Option {} +// SAFETY: `Option<&T>` is part of the option layout optimization guarantee: +// . +unsafe impl ZeroableOption for &T {} +// SAFETY: `Option<&mut T>` is part of the option layout optimization guarantee: +// . +unsafe impl ZeroableOption for &mut T {} + /// Create an initializer for a zeroed `T`. /// /// The returned initializer will write `0x00` to every byte of the given `slot`. From 9f473538706b9fb5e82c9864b04089d35e4f93d5 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Fri, 23 May 2025 16:51:01 +0200 Subject: [PATCH 0099/2411] rust: pin-init: change `impl Zeroable for Option>` to `ZeroableOption for NonNull` This brings it in line with references. It too is listed in [1]. Link: https://doc.rust-lang.org/stable/std/option/index.html#representation Link: https://github.com/Rust-for-Linux/pin-init/pull/56/commits/8e52bf56ddc2190ce901d2f7c008ab8a64f653a9 Link: https://lore.kernel.org/all/20250523145125.523275-6-lossin@kernel.org Signed-off-by: Benno Lossin --- rust/pin-init/src/lib.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index 298a3e675b7f..a4656e7976c7 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -1552,6 +1552,9 @@ unsafe impl ZeroableOption for &T {} // SAFETY: `Option<&mut T>` is part of the option layout optimization guarantee: // . unsafe impl ZeroableOption for &mut T {} +// SAFETY: `Option>` is part of the option layout optimization guarantee: +// . +unsafe impl ZeroableOption for NonNull {} /// Create an initializer for a zeroed `T`. /// @@ -1630,7 +1633,6 @@ macro_rules! impl_zeroable { Option, Option, Option, Option, Option, Option, Option, Option, - {} Option>, // SAFETY: `null` pointer is valid. // From ec87ec35ca8bd61bfc1200224d332b4573b9dafa Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Fri, 23 May 2025 16:51:02 +0200 Subject: [PATCH 0100/2411] rust: pin-init: implement `ZeroableOption` for function pointers with up to 20 arguments `Option<[unsafe] [extern "abi"] fn(...args...) -> ret>` is documented [1] to also have the `None` variant equal all zeroes. Link: https://doc.rust-lang.org/stable/std/option/index.html#representation [1] Link: https://github.com/Rust-for-Linux/pin-init/pull/56/commits/b6c1ab4fb3699765f81ae512ecac5a2f032d8d51 Link: https://lore.kernel.org/all/20250523145125.523275-7-lossin@kernel.org Signed-off-by: Benno Lossin --- rust/pin-init/src/lib.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index a4656e7976c7..3e5fe84ae547 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -1662,6 +1662,22 @@ unsafe impl<$first: Zeroable, $($t: Zeroable),*> Zeroable for ($first, $($t),*) impl_tuple_zeroable!(A, B, C, D, E, F, G, H, I, J); +macro_rules! impl_fn_zeroable_option { + ([$($abi:literal),* $(,)?] $args:tt) => { + $(impl_fn_zeroable_option!({extern $abi} $args);)* + $(impl_fn_zeroable_option!({unsafe extern $abi} $args);)* + }; + ({$($prefix:tt)*} {$(,)?}) => {}; + ({$($prefix:tt)*} {$ret:ident, $($rest:ident),* $(,)?}) => { + // SAFETY: function pointers are part of the option layout optimization: + // . + unsafe impl<$ret, $($rest),*> ZeroableOption for $($prefix)* fn($($rest),*) -> $ret {} + impl_fn_zeroable_option!({$($prefix)*} {$($rest),*,}); + }; +} + +impl_fn_zeroable_option!(["Rust", "C"] { A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U }); + /// This trait allows creating an instance of `Self` which contains exactly one /// [structurally pinned value](https://doc.rust-lang.org/std/pin/index.html#projections-and-structural-pinning). /// From d2b7313fa21bbe7ce3c4147d84c1ccbc6d69b9db Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 26 May 2025 17:29:13 +0200 Subject: [PATCH 0101/2411] rust: init: re-enable doctests Commit a30e94c29673 ("rust: init: make doctests compilable/testable") made these tests buildable among others, but eventually the pin-init crate was made into its own crate [1] and the tests were marked as `ignore` in commit 206dea39e559 ("rust: init: disable doctests"). A few other bits got changed in that reorganization, e.g. the `clippy::missing_safety_doc` was removed and the `expect` use. Since there is no reason not to build/test them, re-enable them. In order to do so, tweak a few bits to keep the build clean, and also use again `expect` since this is one of those places where we can actually do so. Link: https://lore.kernel.org/all/20250308110339.2997091-1-benno.lossin@proton.me/ [1] Signed-off-by: Miguel Ojeda Link: https://lore.kernel.org/all/20250526152914.2453949-1-ojeda@kernel.org Signed-off-by: Benno Lossin --- rust/kernel/init.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 15a1c5e397d8..49b949720886 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -29,15 +29,15 @@ //! //! ## General Examples //! -//! ```rust,ignore -//! # #![allow(clippy::disallowed_names)] +//! ```rust +//! # #![expect(clippy::disallowed_names, clippy::undocumented_unsafe_blocks)] //! use kernel::types::Opaque; //! use pin_init::pin_init_from_closure; //! //! // assume we have some `raw_foo` type in C: //! #[repr(C)] //! struct RawFoo([u8; 16]); -//! extern { +//! extern "C" { //! fn init_foo(_: *mut RawFoo); //! } //! @@ -66,12 +66,12 @@ //! }); //! ``` //! -//! ```rust,ignore -//! # #![allow(unreachable_pub, clippy::disallowed_names)] +//! ```rust +//! # #![expect(unreachable_pub, clippy::disallowed_names)] //! use kernel::{prelude::*, types::Opaque}; //! use core::{ptr::addr_of_mut, marker::PhantomPinned, pin::Pin}; //! # mod bindings { -//! # #![allow(non_camel_case_types)] +//! # #![expect(non_camel_case_types, clippy::missing_safety_doc)] //! # pub struct foo; //! # pub unsafe fn init_foo(_ptr: *mut foo) {} //! # pub unsafe fn destroy_foo(_ptr: *mut foo) {} From f744a5b68eead2cc73691e91182522c7d800245e Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 26 May 2025 17:29:14 +0200 Subject: [PATCH 0102/2411] rust: init: remove doctest's `Error::from_errno` workaround Since commit 5ed147473458 ("rust: error: make conversion functions public"), `Error::from_errno` is public. Thus remove the workaround added in commit a30e94c29673 ("rust: init: make doctests compilable/testable"). Suggested-by: Benno Lossin Signed-off-by: Miguel Ojeda Link: https://lore.kernel.org/all/20250526152914.2453949-2-ojeda@kernel.org Signed-off-by: Benno Lossin --- rust/kernel/init.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 49b949720886..49a61fa3dee8 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -77,14 +77,6 @@ //! # pub unsafe fn destroy_foo(_ptr: *mut foo) {} //! # pub unsafe fn enable_foo(_ptr: *mut foo, _flags: u32) -> i32 { 0 } //! # } -//! # // `Error::from_errno` is `pub(crate)` in the `kernel` crate, thus provide a workaround. -//! # trait FromErrno { -//! # fn from_errno(errno: core::ffi::c_int) -> Error { -//! # // Dummy error that can be constructed outside the `kernel` crate. -//! # Error::from(core::fmt::Error) -//! # } -//! # } -//! # impl FromErrno for Error {} //! /// # Invariants //! /// //! /// `foo` is always initialized From fc3870dc5cadb701b4122e4a8daa85f9fa2f57b9 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Thu, 5 Jun 2025 17:52:54 +0200 Subject: [PATCH 0103/2411] rust: pin-init: examples, tests: use `ignore` instead of conditionally compiling tests Change `#[cfg(cond)]` to `#[cfg_attr(not(cond), ignore)]` on tests. Ignoring tests instead of disabling them still makes them appear in the test list, but with `ignored`. It also still compiles the code in those cases. Some tests still need to be ignore, because they use types that are not present when the condition is false. For example the condition is `feature = std` and then it uses `std::thread::Thread`. Suggested-by: Alice Ryhl Link: https://lore.kernel.org/all/aDC9y829vZZBzZ2p@google.com Link: https://github.com/Rust-for-Linux/pin-init/pull/58/commits/b004dd8e64d4cbe219a4eff0d25f0a5f5bc750ca Reviewed-by: Christian Schrefl Link: https://lore.kernel.org/all/20250605155258.573391-1-lossin@kernel.org Signed-off-by: Benno Lossin --- rust/pin-init/examples/pthread_mutex.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rust/pin-init/examples/pthread_mutex.rs b/rust/pin-init/examples/pthread_mutex.rs index 6c4d18238956..49b004c8c137 100644 --- a/rust/pin-init/examples/pthread_mutex.rs +++ b/rust/pin-init/examples/pthread_mutex.rs @@ -139,7 +139,8 @@ fn deref_mut(&mut self) -> &mut Self::Target { } } -#[cfg_attr(all(test, not(miri)), test)] +#[cfg_attr(test, test)] +#[cfg_attr(all(test, miri), ignore)] fn main() { #[cfg(all(any(feature = "std", feature = "alloc"), not(windows)))] { From ae0756933e879a703e1a5deb701d9ec88b032ba3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 29 May 2025 12:22:06 -0700 Subject: [PATCH 0104/2411] perf thread: Ensure comm_lock held for comm_list Add thread safety annotations for comm_list and add locking for two instances where the list is accessed without the lock held (in contradiction to ____thread__set_comm()). Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250529192206.971199-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/comm.c | 2 ++ tools/perf/util/thread.c | 26 ++++++++++++++++++++++---- tools/perf/util/thread.h | 11 ++++++----- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 8aa456d7c2cd..9880247a2c33 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -24,6 +24,7 @@ static struct comm_strs { static void comm_strs__remove_if_last(struct comm_str *cs); static void comm_strs__init(void) + NO_THREAD_SAFETY_ANALYSIS /* Inherently single threaded due to pthread_once. */ { init_rwsem(&_comm_strs.lock); _comm_strs.capacity = 16; @@ -119,6 +120,7 @@ static void comm_strs__remove_if_last(struct comm_str *cs) } static struct comm_str *__comm_strs__find(struct comm_strs *comm_strs, const char *str) + SHARED_LOCKS_REQUIRED(comm_strs->lock) { struct comm_str **result; diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index ffb48cc2103f..aa9c58bbf9d3 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -41,6 +41,7 @@ int thread__init_maps(struct thread *thread, struct machine *machine) } struct thread *thread__new(pid_t pid, pid_t tid) + NO_THREAD_SAFETY_ANALYSIS /* Allocation/creation is inherently single threaded. */ { RC_STRUCT(thread) *_thread = zalloc(sizeof(*_thread)); struct thread *thread; @@ -200,7 +201,8 @@ int thread__set_namespaces(struct thread *thread, u64 timestamp, return ret; } -struct comm *thread__comm(struct thread *thread) +static struct comm *__thread__comm(struct thread *thread) + SHARED_LOCKS_REQUIRED(thread__comm_lock(thread)) { if (list_empty(thread__comm_list(thread))) return NULL; @@ -208,16 +210,30 @@ struct comm *thread__comm(struct thread *thread) return list_first_entry(thread__comm_list(thread), struct comm, list); } +struct comm *thread__comm(struct thread *thread) +{ + struct comm *res = NULL; + + down_read(thread__comm_lock(thread)); + res = __thread__comm(thread); + up_read(thread__comm_lock(thread)); + return res; +} + struct comm *thread__exec_comm(struct thread *thread) { struct comm *comm, *last = NULL, *second_last = NULL; + down_read(thread__comm_lock(thread)); list_for_each_entry(comm, thread__comm_list(thread), list) { - if (comm->exec) + if (comm->exec) { + up_read(thread__comm_lock(thread)); return comm; + } second_last = last; last = comm; } + up_read(thread__comm_lock(thread)); /* * 'last' with no start time might be the parent's comm of a synthesized @@ -233,8 +249,9 @@ struct comm *thread__exec_comm(struct thread *thread) static int ____thread__set_comm(struct thread *thread, const char *str, u64 timestamp, bool exec) + EXCLUSIVE_LOCKS_REQUIRED(thread__comm_lock(thread)) { - struct comm *new, *curr = thread__comm(thread); + struct comm *new, *curr = __thread__comm(thread); /* Override the default :tid entry */ if (!thread__comm_set(thread)) { @@ -285,8 +302,9 @@ int thread__set_comm_from_proc(struct thread *thread) } static const char *__thread__comm_str(struct thread *thread) + SHARED_LOCKS_REQUIRED(thread__comm_lock(thread)) { - const struct comm *comm = thread__comm(thread); + const struct comm *comm = __thread__comm(thread); if (!comm) return NULL; diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 2b90bbed7a61..310eaea344bb 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -236,16 +236,17 @@ static inline struct rw_semaphore *thread__namespaces_lock(struct thread *thread return &RC_CHK_ACCESS(thread)->namespaces_lock; } -static inline struct list_head *thread__comm_list(struct thread *thread) -{ - return &RC_CHK_ACCESS(thread)->comm_list; -} - static inline struct rw_semaphore *thread__comm_lock(struct thread *thread) { return &RC_CHK_ACCESS(thread)->comm_lock; } +static inline struct list_head *thread__comm_list(struct thread *thread) + SHARED_LOCKS_REQUIRED(thread__comm_lock(thread)) +{ + return &RC_CHK_ACCESS(thread)->comm_list; +} + static inline u64 thread__db_id(const struct thread *thread) { return RC_CHK_ACCESS(thread)->db_id; From 38815c43786b899a62bf655d58a58f6d95d04bbf Mon Sep 17 00:00:00 2001 From: Guilherme Giacomo Simoes Date: Tue, 10 Jun 2025 23:21:23 -0300 Subject: [PATCH 0105/2411] PCI: hotplug: Remove TODO about unused .get_power(), .hardware_test() Remove TODO note resolved by 8ff4574cf73d ("PCI: cpcihp: Remove unused .get_power() and .set_power()") and 5b036cada481 ("PCI: cpcihp: Remove unused struct cpci_hp_controller_ops.hardware_test"). Signed-off-by: Guilherme Giacomo Simoes Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250611022123.201839-1-trintaeoitogc@gmail.com --- drivers/pci/hotplug/TODO | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/pci/hotplug/TODO b/drivers/pci/hotplug/TODO index 92e6e20e8595..7397374af171 100644 --- a/drivers/pci/hotplug/TODO +++ b/drivers/pci/hotplug/TODO @@ -2,10 +2,6 @@ Contributions are solicited in particular to remedy the following issues: cpcihp: -* There are no implementations of the ->hardware_test, ->get_power and - ->set_power callbacks in struct cpci_hp_controller_ops. Why were they - introduced? Can they be removed from the struct? - * Returned code from pci_hp_add_bridge() is not checked. cpqphp: From 3b4408038da935be7b1efb7589cc1badb6d10a67 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 11 Jun 2025 21:09:56 +0300 Subject: [PATCH 0106/2411] pinctrl: intel: fix build warnings about export.h After commit a934a57a42f64a4 ("scripts/misc-check: check missing #include when W=1") and 7d95680d64ac8e836c ("scripts/misc-check: check unnecessary #include when W=1"), we get some build warnings with W=1: pinctrl-intel.c: warning: EXPORT_SYMBOL() is used, but #include is missing So fix these build warnings for the driver code. Acked-by: Mika Westerberg Signed-off-by: Andy Shevchenko --- drivers/pinctrl/intel/pinctrl-intel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 846b25ed1cc4..f2ff71e5ea6f 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include From fcc5f586c4edbcc10de23fb9b8c0972a84e945cd Mon Sep 17 00:00:00 2001 From: Hans Zhang <18255117159@163.com> Date: Sun, 8 Jun 2025 00:01:59 +0800 Subject: [PATCH 0107/2411] PCI: rockchip-host: Fix "Unexpected Completion" log message Fix the debug message for the PCIE_CORE_INT_UCR interrupt to clearly indicate "Unexpected Completion" instead of a duplicate "malformed TLP" message. Fixes: e77f847df54c ("PCI: rockchip: Add Rockchip PCIe controller support") Signed-off-by: Hans Zhang <18255117159@163.com> [mani: added fixes tag] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Manivannan Sadhasivam Acked-by: Shawn Lin Link: https://patch.msgid.link/20250607160201.807043-2-18255117159@163.com --- drivers/pci/controller/pcie-rockchip-host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c index b9e7a8710cf0..648b6fcb93b0 100644 --- a/drivers/pci/controller/pcie-rockchip-host.c +++ b/drivers/pci/controller/pcie-rockchip-host.c @@ -439,7 +439,7 @@ static irqreturn_t rockchip_pcie_subsys_irq_handler(int irq, void *arg) dev_dbg(dev, "malformed TLP received from the link\n"); if (sub_reg & PCIE_CORE_INT_UCR) - dev_dbg(dev, "malformed TLP received from the link\n"); + dev_dbg(dev, "Unexpected Completion received from the link\n"); if (sub_reg & PCIE_CORE_INT_FCE) dev_dbg(dev, "an error was observed in the flow control advertisements from the other side\n"); From 917600e630218ce61aa0551079592cb541391668 Mon Sep 17 00:00:00 2001 From: Hans Zhang <18255117159@163.com> Date: Sun, 8 Jun 2025 00:02:00 +0800 Subject: [PATCH 0108/2411] PCI: rockchip-host: Correct non-fatal error log message Correct the debug message for PCIE_CLIENT_INT_NFATAL_ERR from "no fatal error" to "non fatal error interrupt received" to match the actual interrupt semantics. This avoids confusion in log interpretation. Signed-off-by: Hans Zhang <18255117159@163.com> Signed-off-by: Manivannan Sadhasivam Reviewed-by: Manivannan Sadhasivam Acked-by: Shawn Lin Link: https://patch.msgid.link/20250607160201.807043-3-18255117159@163.com --- drivers/pci/controller/pcie-rockchip-host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c index 648b6fcb93b0..63a6012d8937 100644 --- a/drivers/pci/controller/pcie-rockchip-host.c +++ b/drivers/pci/controller/pcie-rockchip-host.c @@ -489,7 +489,7 @@ static irqreturn_t rockchip_pcie_client_irq_handler(int irq, void *arg) dev_dbg(dev, "fatal error interrupt received\n"); if (reg & PCIE_CLIENT_INT_NFATAL_ERR) - dev_dbg(dev, "no fatal error interrupt received\n"); + dev_dbg(dev, "non fatal error interrupt received\n"); if (reg & PCIE_CLIENT_INT_CORR_ERR) dev_dbg(dev, "correctable error interrupt received\n"); From 1fdb13f92388dfc936624b0a0d6abae362b0ace3 Mon Sep 17 00:00:00 2001 From: Hans Zhang <18255117159@163.com> Date: Sun, 8 Jun 2025 00:02:01 +0800 Subject: [PATCH 0109/2411] PCI: rockchip-host: Remove unused header includes Clean up the driver by removing unnecessary header includes that are no longer referenced after the recent refactoring. Signed-off-by: Hans Zhang <18255117159@163.com> [mani: commit message rewording] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Manivannan Sadhasivam Acked-by: Shawn Lin Link: https://patch.msgid.link/20250607160201.807043-4-18255117159@163.com --- drivers/pci/controller/pcie-rockchip-host.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c index 63a6012d8937..9b5a4e1de73d 100644 --- a/drivers/pci/controller/pcie-rockchip-host.c +++ b/drivers/pci/controller/pcie-rockchip-host.c @@ -12,26 +12,17 @@ */ #include -#include -#include #include -#include #include #include #include #include #include -#include -#include #include #include #include -#include -#include #include #include -#include -#include #include "../pci.h" #include "pcie-rockchip.h" From a202f09e3e30622fdcae7d740dbf87fb0f032dd5 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Sun, 4 May 2025 08:44:18 +0800 Subject: [PATCH 0110/2411] dt-bindings: pci: Add Sophgo SG2044 PCIe host The PCIe controller on the SG2044 is Designware based with custom app registers. Add binding document for SG2044 PCIe host controller. Signed-off-by: Inochi Amaoto Signed-off-by: Manivannan Sadhasivam Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250504004420.202685-2-inochiama@gmail.com --- .../bindings/pci/sophgo,sg2044-pcie.yaml | 122 ++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 Documentation/devicetree/bindings/pci/sophgo,sg2044-pcie.yaml diff --git a/Documentation/devicetree/bindings/pci/sophgo,sg2044-pcie.yaml b/Documentation/devicetree/bindings/pci/sophgo,sg2044-pcie.yaml new file mode 100644 index 000000000000..ff1133bae3ba --- /dev/null +++ b/Documentation/devicetree/bindings/pci/sophgo,sg2044-pcie.yaml @@ -0,0 +1,122 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pci/sophgo,sg2044-pcie.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: DesignWare based PCIe Root Complex controller on Sophgo SoCs + +maintainers: + - Inochi Amaoto + +description: + SG2044 SoC PCIe Root Complex controller is based on the Synopsys DesignWare + PCIe IP and thus inherits all the common properties defined in + snps,dw-pcie.yaml. + +allOf: + - $ref: /schemas/pci/pci-host-bridge.yaml# + - $ref: /schemas/pci/snps,dw-pcie.yaml# + +properties: + compatible: + const: sophgo,sg2044-pcie + + reg: + items: + - description: Data Bus Interface (DBI) registers + - description: iATU registers + - description: Config registers + - description: Sophgo designed configuration registers + + reg-names: + items: + - const: dbi + - const: atu + - const: config + - const: app + + clocks: + items: + - description: core clk + + clock-names: + items: + - const: core + + interrupt-controller: + description: Interrupt controller node for handling legacy PCI interrupts. + type: object + + properties: + "#address-cells": + const: 0 + + "#interrupt-cells": + const: 1 + + interrupt-controller: true + + interrupts: + items: + - description: combined legacy interrupt + + required: + - "#address-cells" + - "#interrupt-cells" + - interrupt-controller + - interrupts + + additionalProperties: false + + msi-parent: true + + ranges: + maxItems: 5 + +required: + - compatible + - reg + - clocks + +unevaluatedProperties: false + +examples: + - | + #include + + soc { + #address-cells = <2>; + #size-cells = <2>; + + pcie@6c00400000 { + compatible = "sophgo,sg2044-pcie"; + reg = <0x6c 0x00400000 0x0 0x00001000>, + <0x6c 0x00700000 0x0 0x00004000>, + <0x40 0x00000000 0x0 0x00001000>, + <0x6c 0x00780c00 0x0 0x00000400>; + reg-names = "dbi", "atu", "config", "app"; + #address-cells = <3>; + #size-cells = <2>; + bus-range = <0x00 0xff>; + clocks = <&clk 0>; + clock-names = "core"; + device_type = "pci"; + linux,pci-domain = <0>; + msi-parent = <&msi>; + ranges = <0x01000000 0x0 0x00000000 0x40 0x10000000 0x0 0x00200000>, + <0x42000000 0x0 0x00000000 0x0 0x00000000 0x0 0x04000000>, + <0x02000000 0x0 0x04000000 0x0 0x04000000 0x0 0x04000000>, + <0x43000000 0x42 0x00000000 0x42 0x00000000 0x2 0x00000000>, + <0x03000000 0x41 0x00000000 0x41 0x00000000 0x1 0x00000000>; + + interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + interrupt-parent = <&intc>; + interrupts = <64 IRQ_TYPE_LEVEL_HIGH>; + }; + }; + }; +... From 2c75dc82ed96852138ba09908d5f82cadb8ecfff Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 11 Jun 2025 09:27:52 +0200 Subject: [PATCH 0111/2411] pinctrl: renesas: gpio: Use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Reviewed-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20250611-gpiochip-set-rv-pinctrl-renesas-v1-1-ad169a794ef0@linaro.org Signed-off-by: Geert Uytterhoeven --- drivers/pinctrl/renesas/gpio.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/renesas/gpio.c b/drivers/pinctrl/renesas/gpio.c index a5136dacaaf2..8efbdc1b0078 100644 --- a/drivers/pinctrl/renesas/gpio.c +++ b/drivers/pinctrl/renesas/gpio.c @@ -189,9 +189,11 @@ static int gpio_pin_get(struct gpio_chip *gc, unsigned offset) return (gpio_read_data_reg(chip, reg->info) >> pos) & 1; } -static void gpio_pin_set(struct gpio_chip *gc, unsigned offset, int value) +static int gpio_pin_set(struct gpio_chip *gc, unsigned int offset, int value) { gpio_pin_set_value(gpiochip_get_data(gc), offset, value); + + return 0; } static int gpio_pin_to_irq(struct gpio_chip *gc, unsigned offset) @@ -232,7 +234,7 @@ static int gpio_pin_setup(struct sh_pfc_chip *chip) gc->direction_input = gpio_pin_direction_input; gc->get = gpio_pin_get; gc->direction_output = gpio_pin_direction_output; - gc->set = gpio_pin_set; + gc->set_rv = gpio_pin_set; gc->to_irq = gpio_pin_to_irq; gc->label = pfc->info->name; From 146ea9380f0bba7ff453317c25cdcb0f2a1ce1e9 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 11 Jun 2025 09:27:53 +0200 Subject: [PATCH 0112/2411] pinctrl: renesas: rzg2l: Use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Reviewed-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20250611-gpiochip-set-rv-pinctrl-renesas-v1-2-ad169a794ef0@linaro.org Signed-off-by: Geert Uytterhoeven --- drivers/pinctrl/renesas/pinctrl-rzg2l.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c index 78fa08ff0faa..59c32a0d87f1 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c +++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c @@ -1758,8 +1758,8 @@ static int rzg2l_gpio_direction_input(struct gpio_chip *chip, return 0; } -static void rzg2l_gpio_set(struct gpio_chip *chip, unsigned int offset, - int value) +static int rzg2l_gpio_set(struct gpio_chip *chip, unsigned int offset, + int value) { struct rzg2l_pinctrl *pctrl = gpiochip_get_data(chip); const struct pinctrl_pin_desc *pin_desc = &pctrl->desc.pins[offset]; @@ -1779,6 +1779,8 @@ static void rzg2l_gpio_set(struct gpio_chip *chip, unsigned int offset, writeb(reg8 & ~BIT(bit), pctrl->base + P(off)); spin_unlock_irqrestore(&pctrl->lock, flags); + + return 0; } static int rzg2l_gpio_direction_output(struct gpio_chip *chip, @@ -2788,7 +2790,7 @@ static int rzg2l_gpio_register(struct rzg2l_pinctrl *pctrl) chip->direction_input = rzg2l_gpio_direction_input; chip->direction_output = rzg2l_gpio_direction_output; chip->get = rzg2l_gpio_get; - chip->set = rzg2l_gpio_set; + chip->set_rv = rzg2l_gpio_set; chip->label = name; chip->parent = pctrl->dev; chip->owner = THIS_MODULE; From c5eab2dfdb671383f685627fac156c1a245e5474 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 11 Jun 2025 09:27:54 +0200 Subject: [PATCH 0113/2411] pinctrl: renesas: rza1: Use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Reviewed-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20250611-gpiochip-set-rv-pinctrl-renesas-v1-3-ad169a794ef0@linaro.org Signed-off-by: Geert Uytterhoeven --- drivers/pinctrl/renesas/pinctrl-rza1.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/renesas/pinctrl-rza1.c b/drivers/pinctrl/renesas/pinctrl-rza1.c index b1058504e0bb..3d8492c91710 100644 --- a/drivers/pinctrl/renesas/pinctrl-rza1.c +++ b/drivers/pinctrl/renesas/pinctrl-rza1.c @@ -830,12 +830,13 @@ static int rza1_gpio_get(struct gpio_chip *chip, unsigned int gpio) return rza1_pin_get(port, gpio); } -static void rza1_gpio_set(struct gpio_chip *chip, unsigned int gpio, - int value) +static int rza1_gpio_set(struct gpio_chip *chip, unsigned int gpio, int value) { struct rza1_port *port = gpiochip_get_data(chip); rza1_pin_set(port, gpio, value); + + return 0; } static const struct gpio_chip rza1_gpiochip_template = { @@ -845,7 +846,7 @@ static const struct gpio_chip rza1_gpiochip_template = { .direction_input = rza1_gpio_direction_input, .direction_output = rza1_gpio_direction_output, .get = rza1_gpio_get, - .set = rza1_gpio_set, + .set_rv = rza1_gpio_set, }; /* ---------------------------------------------------------------------------- * pinctrl operations From acffb7ccd238cd533f15029b5b6d067300903644 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 11 Jun 2025 09:27:55 +0200 Subject: [PATCH 0114/2411] pinctrl: renesas: rzv2m: Use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Reviewed-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20250611-gpiochip-set-rv-pinctrl-renesas-v1-4-ad169a794ef0@linaro.org Signed-off-by: Geert Uytterhoeven --- drivers/pinctrl/renesas/pinctrl-rzv2m.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/renesas/pinctrl-rzv2m.c b/drivers/pinctrl/renesas/pinctrl-rzv2m.c index 8c7169db4fcc..a17b68b4c466 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzv2m.c +++ b/drivers/pinctrl/renesas/pinctrl-rzv2m.c @@ -790,14 +790,16 @@ static int rzv2m_gpio_direction_input(struct gpio_chip *chip, return 0; } -static void rzv2m_gpio_set(struct gpio_chip *chip, unsigned int offset, - int value) +static int rzv2m_gpio_set(struct gpio_chip *chip, unsigned int offset, + int value) { struct rzv2m_pinctrl *pctrl = gpiochip_get_data(chip); u32 port = RZV2M_PIN_ID_TO_PORT(offset); u8 bit = RZV2M_PIN_ID_TO_PIN(offset); rzv2m_writel_we(pctrl->base + DO(port), bit, !!value); + + return 0; } static int rzv2m_gpio_direction_output(struct gpio_chip *chip, @@ -955,7 +957,7 @@ static int rzv2m_gpio_register(struct rzv2m_pinctrl *pctrl) chip->direction_input = rzv2m_gpio_direction_input; chip->direction_output = rzv2m_gpio_direction_output; chip->get = rzv2m_gpio_get; - chip->set = rzv2m_gpio_set; + chip->set_rv = rzv2m_gpio_set; chip->label = name; chip->parent = pctrl->dev; chip->owner = THIS_MODULE; From d2fb02624020767f1ee53be0f0f30ef964dbd845 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 11 Jun 2025 09:27:56 +0200 Subject: [PATCH 0115/2411] pinctrl: renesas: rza2: Use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Reviewed-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20250611-gpiochip-set-rv-pinctrl-renesas-v1-5-ad169a794ef0@linaro.org Signed-off-by: Geert Uytterhoeven --- drivers/pinctrl/renesas/pinctrl-rza2.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/renesas/pinctrl-rza2.c b/drivers/pinctrl/renesas/pinctrl-rza2.c index 3b5812963850..7a0b268d3eb9 100644 --- a/drivers/pinctrl/renesas/pinctrl-rza2.c +++ b/drivers/pinctrl/renesas/pinctrl-rza2.c @@ -172,8 +172,7 @@ static int rza2_chip_get(struct gpio_chip *chip, unsigned int offset) return !!(readb(priv->base + RZA2_PIDR(port)) & BIT(pin)); } -static void rza2_chip_set(struct gpio_chip *chip, unsigned int offset, - int value) +static int rza2_chip_set(struct gpio_chip *chip, unsigned int offset, int value) { struct rza2_pinctrl_priv *priv = gpiochip_get_data(chip); u8 port = RZA2_PIN_ID_TO_PORT(offset); @@ -188,6 +187,8 @@ static void rza2_chip_set(struct gpio_chip *chip, unsigned int offset, new_value &= ~BIT(pin); writeb(new_value, priv->base + RZA2_PODR(port)); + + return 0; } static int rza2_chip_direction_output(struct gpio_chip *chip, @@ -236,7 +237,7 @@ static struct gpio_chip chip = { .direction_input = rza2_chip_direction_input, .direction_output = rza2_chip_direction_output, .get = rza2_chip_get, - .set = rza2_chip_set, + .set_rv = rza2_chip_set, }; static int rza2_gpio_register(struct rza2_pinctrl_priv *priv) From 8ffcb7560b4a15faf821df95e3ab532b2b020f8c Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 13 Jun 2025 19:06:26 -0500 Subject: [PATCH 0116/2411] ipmi: Fix strcpy source and destination the same The source and destination of some strcpy operations was the same. Split out the part of the operations that needed to be done for those particular calls so the unnecessary copy wasn't done. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202506140756.EFXXvIP4-lkp@intel.com/ Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_watchdog.c | 59 ++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index ab759b492fdd..a013ddbf1466 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -1146,14 +1146,8 @@ static struct ipmi_smi_watcher smi_watcher = { .smi_gone = ipmi_smi_gone }; -static int action_op(const char *inval, char *outval) +static int action_op_set_val(const char *inval) { - if (outval) - strcpy(outval, action); - - if (!inval) - return 0; - if (strcmp(inval, "reset") == 0) action_val = WDOG_TIMEOUT_RESET; else if (strcmp(inval, "none") == 0) @@ -1164,18 +1158,26 @@ static int action_op(const char *inval, char *outval) action_val = WDOG_TIMEOUT_POWER_DOWN; else return -EINVAL; - strcpy(action, inval); return 0; } -static int preaction_op(const char *inval, char *outval) +static int action_op(const char *inval, char *outval) { + int rv; + if (outval) - strcpy(outval, preaction); + strcpy(outval, action); if (!inval) return 0; + rv = action_op_set_val(inval); + if (!rv) + strcpy(action, inval); + return rv; +} +static int preaction_op_set_val(const char *inval) +{ if (strcmp(inval, "pre_none") == 0) preaction_val = WDOG_PRETIMEOUT_NONE; else if (strcmp(inval, "pre_smi") == 0) @@ -1188,18 +1190,26 @@ static int preaction_op(const char *inval, char *outval) preaction_val = WDOG_PRETIMEOUT_MSG_INT; else return -EINVAL; - strcpy(preaction, inval); return 0; } -static int preop_op(const char *inval, char *outval) +static int preaction_op(const char *inval, char *outval) { + int rv; + if (outval) - strcpy(outval, preop); + strcpy(outval, preaction); if (!inval) return 0; + rv = preaction_op_set_val(inval); + if (!rv) + strcpy(preaction, inval); + return 0; +} +static int preop_op_set_val(const char *inval) +{ if (strcmp(inval, "preop_none") == 0) preop_val = WDOG_PREOP_NONE; else if (strcmp(inval, "preop_panic") == 0) @@ -1208,7 +1218,22 @@ static int preop_op(const char *inval, char *outval) preop_val = WDOG_PREOP_GIVE_DATA; else return -EINVAL; - strcpy(preop, inval); + return 0; +} + +static int preop_op(const char *inval, char *outval) +{ + int rv; + + if (outval) + strcpy(outval, preop); + + if (!inval) + return 0; + + rv = preop_op_set_val(inval); + if (!rv) + strcpy(preop, inval); return 0; } @@ -1245,18 +1270,18 @@ static int __init ipmi_wdog_init(void) { int rv; - if (action_op(action, NULL)) { + if (action_op_set_val(action)) { action_op("reset", NULL); pr_info("Unknown action '%s', defaulting to reset\n", action); } - if (preaction_op(preaction, NULL)) { + if (preaction_op_set_val(preaction)) { preaction_op("pre_none", NULL); pr_info("Unknown preaction '%s', defaulting to none\n", preaction); } - if (preop_op(preop, NULL)) { + if (preop_op_set_val(preop)) { preop_op("preop_none", NULL); pr_info("Unknown preop '%s', defaulting to none\n", preop); } From 9f77d234c1f41e78a99f124bf6fee59dc2e3d46f Mon Sep 17 00:00:00 2001 From: Igor Belwon Date: Thu, 15 May 2025 16:43:01 +0200 Subject: [PATCH 0117/2411] dt-bindings: phy: samsung,usb3-drd-phy: Add exynos990 compatible Add a compatible for the exynos990-usbdrd-phy. The PHY is compatible with the older exynos5420 design (two clocks) when running in highspeed mode. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Igor Belwon Link: https://lore.kernel.org/r/20250515-usb-resends-may-15-v3-1-ad33a85b6cee@mentallysanemainliners.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml b/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml index cc60d2f6f70e..e906403208c0 100644 --- a/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml +++ b/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml @@ -33,6 +33,7 @@ properties: - samsung,exynos7-usbdrd-phy - samsung,exynos7870-usbdrd-phy - samsung,exynos850-usbdrd-phy + - samsung,exynos990-usbdrd-phy clocks: minItems: 1 @@ -217,6 +218,7 @@ allOf: - samsung,exynos5420-usbdrd-phy - samsung,exynos7870-usbdrd-phy - samsung,exynos850-usbdrd-phy + - samsung,exynos990-usbdrd-phy then: properties: clocks: From 385a766bed48c5bcf620061f24e864dafeca671a Mon Sep 17 00:00:00 2001 From: Igor Belwon Date: Thu, 15 May 2025 16:43:02 +0200 Subject: [PATCH 0118/2411] phy: exynos5-usbdrd: Add support for the Exynos990 usbdrd phy The Exynos990 usbdrd PHY is a combo PHY which supports USB SS, HS and DisplayPort outputs. This commit adds support only for UTMI+ (USB HS). Reviewed-by: Krzysztof Kozlowski Signed-off-by: Igor Belwon Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250515-usb-resends-may-15-v3-2-ad33a85b6cee@mentallysanemainliners.org Signed-off-by: Vinod Koul --- drivers/phy/samsung/phy-exynos5-usbdrd.c | 32 +++++++++++++++++++++ include/linux/soc/samsung/exynos-regs-pmu.h | 3 ++ 2 files changed, 35 insertions(+) diff --git a/drivers/phy/samsung/phy-exynos5-usbdrd.c b/drivers/phy/samsung/phy-exynos5-usbdrd.c index 917a76d584f0..dd660ebe8045 100644 --- a/drivers/phy/samsung/phy-exynos5-usbdrd.c +++ b/drivers/phy/samsung/phy-exynos5-usbdrd.c @@ -2025,6 +2025,35 @@ static const struct exynos5_usbdrd_phy_drvdata exynos850_usbdrd_phy = { .n_regulators = ARRAY_SIZE(exynos5_regulator_names), }; +static const struct exynos5_usbdrd_phy_tuning exynos990_tunes_utmi_postinit[] = { + PHY_TUNING_ENTRY_PHY(EXYNOS850_DRD_HSPPARACON, + (HSPPARACON_TXVREF | + HSPPARACON_TXPREEMPAMP | HSPPARACON_SQRX | + HSPPARACON_COMPDIS), + (FIELD_PREP_CONST(HSPPARACON_TXVREF, 7) | + FIELD_PREP_CONST(HSPPARACON_TXPREEMPAMP, 3) | + FIELD_PREP_CONST(HSPPARACON_SQRX, 5) | + FIELD_PREP_CONST(HSPPARACON_COMPDIS, 7))), + PHY_TUNING_ENTRY_LAST +}; + +static const struct exynos5_usbdrd_phy_tuning *exynos990_tunes[PTS_MAX] = { + [PTS_UTMI_POSTINIT] = exynos990_tunes_utmi_postinit, +}; + +static const struct exynos5_usbdrd_phy_drvdata exynos990_usbdrd_phy = { + .phy_cfg = phy_cfg_exynos850, + .phy_ops = &exynos850_usbdrd_phy_ops, + .phy_tunes = exynos990_tunes, + .pmu_offset_usbdrd0_phy = EXYNOS990_PHY_CTRL_USB20, + .clk_names = exynos5_clk_names, + .n_clks = ARRAY_SIZE(exynos5_clk_names), + .core_clk_names = exynos5_core_clk_names, + .n_core_clks = ARRAY_SIZE(exynos5_core_clk_names), + .regulator_names = exynos5_regulator_names, + .n_regulators = ARRAY_SIZE(exynos5_regulator_names), +}; + static const struct exynos5_usbdrd_phy_config phy_cfg_gs101[] = { { .id = EXYNOS5_DRDPHY_UTMI, @@ -2228,6 +2257,9 @@ static const struct of_device_id exynos5_usbdrd_phy_of_match[] = { }, { .compatible = "samsung,exynos850-usbdrd-phy", .data = &exynos850_usbdrd_phy + }, { + .compatible = "samsung,exynos990-usbdrd-phy", + .data = &exynos990_usbdrd_phy }, { }, }; diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index 1a2c0e0838f9..7754697e5810 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -662,6 +662,9 @@ #define EXYNOS5433_PAD_RETENTION_UFS_OPTION (0x3268) #define EXYNOS5433_PAD_RETENTION_FSYSGENIO_OPTION (0x32A8) +/* For Exynos990 */ +#define EXYNOS990_PHY_CTRL_USB20 (0x72C) + /* For Tensor GS101 */ /* PMU ALIVE */ #define GS101_SYSIP_DAT0 (0x810) From 72bf1441231ab421a380771e37a5c595493db178 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 15 Jun 2025 22:32:51 +0900 Subject: [PATCH 0119/2411] firewire: core: allocate workqueue for AR/AT request/response contexts Some tasklets (softIRQs) are still used as bottom-halves to handle events for 1394 OHCI AR/AT contexts. However, using softIRQs for IRQ bottom halves is generally discouraged today. This commit adds a per-fw_card workqueue to accommodate the behaviour specified by the 1394 OHCI specification. According to the 1394 OHCI specification, system memory pages are reserved for each asynchronous DMA context. This allows concurrent operation across contexts. In the 1394 OHCI PCI driver implementation, the hardware generates IRQs either upon receiving asynchronous packets from other nodes (incoming) or after completing transmission to them (outgoing). These independent events can occur in the same transmission cycle, therefore the max_active parameter for the workqueue is set to the total number of AR/AT contexts (=4). The WQ_UNBOUND flag is used to allow the work to be scheduled on any available core, since there is little CPU cache affinity benefit for the data. Each DMA context uses a circular descriptor list in system memory, allowing deferred data processing in software as long as buffer overrun are avoided. Since the overall operation is sleepable except for small atomic regions, WQ_BH is not used. As the descriptors contain timestamps, WQ_HIGHPRI is specified to support semi-real-time processing. The asynchronous context is also used by the SCSI over IEEE 1394 protocol implementation (sbp2), which can be part of memory reclaim paths. Therefore, WQ_MEM_RECLAIM is required. To allow uses to adjust CPU affinity according to workload, WQ_SYSFS is specified so that workqueue attributes are exposed to user space. Link: https://lore.kernel.org/r/20250615133253.433057-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-card.c | 48 +++++++++++++++++++++++++----------- include/linux/firewire.h | 1 + 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 2b6ad47b6d57..b3e48ca516fe 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -574,7 +574,6 @@ EXPORT_SYMBOL(fw_card_initialize); int fw_card_add(struct fw_card *card, u32 max_receive, u32 link_speed, u64 guid, unsigned int supported_isoc_contexts) { - struct workqueue_struct *isoc_wq; int ret; // This workqueue should be: @@ -589,29 +588,48 @@ int fw_card_add(struct fw_card *card, u32 max_receive, u32 link_speed, u64 guid, // * == WQ_SYSFS Parameters are available via sysfs. // * max_active == n_it + n_ir A hardIRQ could notify events for multiple isochronous // contexts if they are scheduled to the same cycle. - isoc_wq = alloc_workqueue("firewire-isoc-card%u", - WQ_UNBOUND | WQ_FREEZABLE | WQ_HIGHPRI | WQ_SYSFS, - supported_isoc_contexts, card->index); - if (!isoc_wq) + card->isoc_wq = alloc_workqueue("firewire-isoc-card%u", + WQ_UNBOUND | WQ_FREEZABLE | WQ_HIGHPRI | WQ_SYSFS, + supported_isoc_contexts, card->index); + if (!card->isoc_wq) return -ENOMEM; + // This workqueue should be: + // * != WQ_BH Sleepable. + // * == WQ_UNBOUND Any core can process data for asynchronous context. + // * == WQ_MEM_RECLAIM Used for any backend of block device. + // * == WQ_FREEZABLE The target device would not be available when being freezed. + // * == WQ_HIGHPRI High priority to process semi-realtime timestamped data. + // * == WQ_SYSFS Parameters are available via sysfs. + // * max_active == 4 A hardIRQ could notify events for a pair of requests and + // response AR/AT contexts. + card->async_wq = alloc_workqueue("firewire-async-card%u", + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_HIGHPRI | WQ_SYSFS, + 4, card->index); + if (!card->async_wq) { + ret = -ENOMEM; + goto err_isoc; + } + card->max_receive = max_receive; card->link_speed = link_speed; card->guid = guid; - guard(mutex)(&card_mutex); + scoped_guard(mutex, &card_mutex) { + generate_config_rom(card, tmp_config_rom); + ret = card->driver->enable(card, tmp_config_rom, config_rom_length); + if (ret < 0) + goto err_async; - generate_config_rom(card, tmp_config_rom); - ret = card->driver->enable(card, tmp_config_rom, config_rom_length); - if (ret < 0) { - destroy_workqueue(isoc_wq); - return ret; + list_add_tail(&card->link, &card_list); } - card->isoc_wq = isoc_wq; - list_add_tail(&card->link, &card_list); - return 0; +err_async: + destroy_workqueue(card->async_wq); +err_isoc: + destroy_workqueue(card->isoc_wq); + return ret; } EXPORT_SYMBOL(fw_card_add); @@ -744,6 +762,7 @@ void fw_core_remove_card(struct fw_card *card) dummy_driver.stop_iso = card->driver->stop_iso; card->driver = &dummy_driver; drain_workqueue(card->isoc_wq); + drain_workqueue(card->async_wq); scoped_guard(spinlock_irqsave, &card->lock) fw_destroy_nodes(card); @@ -753,6 +772,7 @@ void fw_core_remove_card(struct fw_card *card) wait_for_completion(&card->done); destroy_workqueue(card->isoc_wq); + destroy_workqueue(card->async_wq); WARN_ON(!list_empty(&card->transaction_list)); } diff --git a/include/linux/firewire.h b/include/linux/firewire.h index b632eec3ab52..c55b8e30e700 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -136,6 +136,7 @@ struct fw_card { __be32 maint_utility_register; struct workqueue_struct *isoc_wq; + struct workqueue_struct *async_wq; }; static inline struct fw_card *fw_card_get(struct fw_card *card) From 57e6d9f85fff3a71e667628474063c1bbb2fad20 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 15 Jun 2025 22:32:52 +0900 Subject: [PATCH 0120/2411] firewire: ohci: use workqueue to handle events of AR request/response contexts This commit adds a work item to handle events of 1394 OHCI AR request/response contexts, and queues the item to the specific workqueue. The call of struct fw_address_handler.address_callback() is done in the workqueue when receiving any requests from the remove nodes. Additionally, the call of struct fw_packet.callback() is done in the workqueue too when receiving acknowledge to the asynchronous packet for the response subaction of split transaction to the remote nodes. Link: https://lore.kernel.org/r/20250615133253.433057-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 7 ++++--- drivers/firewire/ohci.c | 27 +++++++++++---------------- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 2bd5deb9054e..d28477d84697 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -557,9 +557,10 @@ const struct fw_address_region fw_unit_space_region = * * region->start, ->end, and handler->length have to be quadlet-aligned. * - * When a request is received that falls within the specified address range, - * the specified callback is invoked. The parameters passed to the callback - * give the details of the particular request. + * When a request is received that falls within the specified address range, the specified callback + * is invoked. The parameters passed to the callback give the details of the particular request. + * The callback is invoked in the workqueue context in most cases. However, if the request is + * initiated by the local node, the callback is invoked in the initiator's context. * * To be called in process context. * Return value: 0 on success, non-zero otherwise. diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 40313a3ec63e..68317b5a64a7 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -101,7 +101,7 @@ struct ar_context { void *pointer; unsigned int last_buffer_index; u32 regs; - struct tasklet_struct tasklet; + struct work_struct work; }; struct context; @@ -1016,9 +1016,9 @@ static void ar_recycle_buffers(struct ar_context *ctx, unsigned int end_buffer) } } -static void ar_context_tasklet(unsigned long data) +static void ohci_ar_context_work(struct work_struct *work) { - struct ar_context *ctx = (struct ar_context *)data; + struct ar_context *ctx = from_work(ctx, work, work); unsigned int end_buffer_index, end_buffer_offset; void *p, *end; @@ -1026,23 +1026,19 @@ static void ar_context_tasklet(unsigned long data) if (!p) return; - end_buffer_index = ar_search_last_active_buffer(ctx, - &end_buffer_offset); + end_buffer_index = ar_search_last_active_buffer(ctx, &end_buffer_offset); ar_sync_buffers_for_cpu(ctx, end_buffer_index, end_buffer_offset); end = ctx->buffer + end_buffer_index * PAGE_SIZE + end_buffer_offset; if (end_buffer_index < ar_first_buffer_index(ctx)) { - /* - * The filled part of the overall buffer wraps around; handle - * all packets up to the buffer end here. If the last packet - * wraps around, its tail will be visible after the buffer end - * because the buffer start pages are mapped there again. - */ + // The filled part of the overall buffer wraps around; handle all packets up to the + // buffer end here. If the last packet wraps around, its tail will be visible after + // the buffer end because the buffer start pages are mapped there again. void *buffer_end = ctx->buffer + AR_BUFFERS * PAGE_SIZE; p = handle_ar_packets(ctx, p, buffer_end); if (p < buffer_end) goto error; - /* adjust p to point back into the actual buffer */ + // adjust p to point back into the actual buffer p -= AR_BUFFERS * PAGE_SIZE; } @@ -1057,7 +1053,6 @@ static void ar_context_tasklet(unsigned long data) ar_recycle_buffers(ctx, end_buffer_index); return; - error: ctx->pointer = NULL; } @@ -1073,7 +1068,7 @@ static int ar_context_init(struct ar_context *ctx, struct fw_ohci *ohci, ctx->regs = regs; ctx->ohci = ohci; - tasklet_init(&ctx->tasklet, ar_context_tasklet, (unsigned long)ctx); + INIT_WORK(&ctx->work, ohci_ar_context_work); for (i = 0; i < AR_BUFFERS; i++) { ctx->pages[i] = dma_alloc_pages(dev, PAGE_SIZE, &dma_addr, @@ -2238,10 +2233,10 @@ static irqreturn_t irq_handler(int irq, void *data) } if (event & OHCI1394_RQPkt) - tasklet_schedule(&ohci->ar_request_ctx.tasklet); + queue_work(ohci->card.async_wq, &ohci->ar_request_ctx.work); if (event & OHCI1394_RSPkt) - tasklet_schedule(&ohci->ar_response_ctx.tasklet); + queue_work(ohci->card.async_wq, &ohci->ar_response_ctx.work); if (event & OHCI1394_reqTxComplete) tasklet_schedule(&ohci->at_request_ctx.tasklet); From aef6bcc0f278eba408751f8b3e0beae992e9faec Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 15 Jun 2025 22:32:53 +0900 Subject: [PATCH 0121/2411] firewire: ohci: use workqueue to handle events of AT request/response contexts This commit adds a work item to handle events of 1394 OHCI AT request/response contexts, and queues the item to the specific workqueue. The call of struct fw_packet.callbaqck() is done in the workqueue when receiving acknowledgement to the asynchronous packet transferred to remote node. Link: https://lore.kernel.org/r/20250615133253.433057-4-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/net.c | 4 ++-- drivers/firewire/ohci.c | 40 ++++++++++++++++++++++++---------------- include/linux/firewire.h | 11 +++++++++-- 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index 1bf0e15c1540..6d6446713539 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -1007,7 +1007,7 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask) spin_lock_irqsave(&dev->lock, flags); - /* If the AT tasklet already ran, we may be last user. */ + /* If the AT work item already ran, we may be last user. */ free = (ptask->outstanding_pkts == 0 && !ptask->enqueued); if (!free) ptask->enqueued = true; @@ -1026,7 +1026,7 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask) spin_lock_irqsave(&dev->lock, flags); - /* If the AT tasklet already ran, we may be last user. */ + /* If the AT work item already ran, we may be last user. */ free = (ptask->outstanding_pkts == 0 && !ptask->enqueued); if (!free) ptask->enqueued = true; diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 68317b5a64a7..709a714fd5c8 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -158,7 +158,7 @@ struct context { descriptor_callback_t callback; - struct tasklet_struct tasklet; + struct work_struct work; }; struct iso_context { @@ -1176,9 +1176,9 @@ static void context_retire_descriptors(struct context *ctx) } } -static void context_tasklet(unsigned long data) +static void ohci_at_context_work(struct work_struct *work) { - struct context *ctx = (struct context *) data; + struct context *ctx = from_work(ctx, work, work); context_retire_descriptors(ctx); } @@ -1243,7 +1243,6 @@ static int context_init(struct context *ctx, struct fw_ohci *ohci, ctx->buffer_tail = list_entry(ctx->buffer_list.next, struct descriptor_buffer, list); - tasklet_init(&ctx->tasklet, context_tasklet, (unsigned long)ctx); ctx->callback = callback; /* @@ -1524,13 +1523,17 @@ static int at_context_queue_packet(struct context *ctx, static void at_context_flush(struct context *ctx) { - tasklet_disable(&ctx->tasklet); + // Avoid dead lock due to programming mistake. + if (WARN_ON_ONCE(current_work() == &ctx->work)) + return; - ctx->flushing = true; - context_tasklet((unsigned long)ctx); - ctx->flushing = false; + disable_work_sync(&ctx->work); - tasklet_enable(&ctx->tasklet); + WRITE_ONCE(ctx->flushing, true); + ohci_at_context_work(&ctx->work); + WRITE_ONCE(ctx->flushing, false); + + enable_work(&ctx->work); } static int handle_at_packet(struct context *context, @@ -1542,7 +1545,7 @@ static int handle_at_packet(struct context *context, struct fw_ohci *ohci = context->ohci; int evt; - if (last->transfer_status == 0 && !context->flushing) + if (last->transfer_status == 0 && !READ_ONCE(context->flushing)) /* This descriptor isn't done yet, stop iteration. */ return 0; @@ -1576,7 +1579,7 @@ static int handle_at_packet(struct context *context, break; case OHCI1394_evt_missing_ack: - if (context->flushing) + if (READ_ONCE(context->flushing)) packet->ack = RCODE_GENERATION; else { /* @@ -1598,7 +1601,7 @@ static int handle_at_packet(struct context *context, break; case OHCI1394_evt_no_status: - if (context->flushing) { + if (READ_ONCE(context->flushing)) { packet->ack = RCODE_GENERATION; break; } @@ -2239,10 +2242,10 @@ static irqreturn_t irq_handler(int irq, void *data) queue_work(ohci->card.async_wq, &ohci->ar_response_ctx.work); if (event & OHCI1394_reqTxComplete) - tasklet_schedule(&ohci->at_request_ctx.tasklet); + queue_work(ohci->card.async_wq, &ohci->at_request_ctx.work); if (event & OHCI1394_respTxComplete) - tasklet_schedule(&ohci->at_response_ctx.tasklet); + queue_work(ohci->card.async_wq, &ohci->at_response_ctx.work); if (event & OHCI1394_isochRx) { iso_event = reg_read(ohci, OHCI1394_IsoRecvIntEventClear); @@ -2684,7 +2687,10 @@ static int ohci_cancel_packet(struct fw_card *card, struct fw_packet *packet) struct driver_data *driver_data = packet->driver_data; int ret = -ENOENT; - tasklet_disable_in_atomic(&ctx->tasklet); + // Avoid dead lock due to programming mistake. + if (WARN_ON_ONCE(current_work() == &ctx->work)) + return 0; + disable_work_sync(&ctx->work); if (packet->ack != 0) goto out; @@ -2703,7 +2709,7 @@ static int ohci_cancel_packet(struct fw_card *card, struct fw_packet *packet) packet->callback(packet, &ohci->card, packet->ack); ret = 0; out: - tasklet_enable(&ctx->tasklet); + enable_work(&ctx->work); return ret; } @@ -3765,11 +3771,13 @@ static int pci_probe(struct pci_dev *dev, OHCI1394_AsReqTrContextControlSet, handle_at_packet); if (err < 0) return err; + INIT_WORK(&ohci->at_request_ctx.work, ohci_at_context_work); err = context_init(&ohci->at_response_ctx, ohci, OHCI1394_AsRspTrContextControlSet, handle_at_packet); if (err < 0) return err; + INIT_WORK(&ohci->at_response_ctx.work, ohci_at_context_work); reg_write(ohci, OHCI1394_IsoRecvIntMaskSet, ~0); ohci->ir_context_channels = ~0ULL; diff --git a/include/linux/firewire.h b/include/linux/firewire.h index c55b8e30e700..cceb70415ed2 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -308,8 +308,7 @@ struct fw_packet { * For successful transmission, the status code is the ack received * from the destination. Otherwise it is one of the juju-specific * rcodes: RCODE_SEND_ERROR, _CANCELLED, _BUSY, _GENERATION, _NO_ACK. - * The callback can be called from tasklet context and thus - * must never block. + * The callback can be called from workqueue and thus must never block. */ fw_packet_callback_t callback; int ack; @@ -382,6 +381,10 @@ void __fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode * * A variation of __fw_send_request() to generate callback for response subaction without time * stamp. + * + * The callback is invoked in the workqueue context in most cases. However, if an error is detected + * before queueing or the destination address refers to the local node, it is invoked in the + * current context instead. */ static inline void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode, int destination_id, int generation, int speed, @@ -411,6 +414,10 @@ static inline void fw_send_request(struct fw_card *card, struct fw_transaction * * @callback_data: data to be passed to the transaction completion callback * * A variation of __fw_send_request() to generate callback for response subaction with time stamp. + * + * The callback is invoked in the workqueue context in most cases. However, if an error is detected + * before queueing or the destination address refers to the local node, it is invoked in the current + * context instead. */ static inline void fw_send_request_with_tstamp(struct fw_card *card, struct fw_transaction *t, int tcode, int destination_id, int generation, int speed, unsigned long long offset, From 2ac5840594b2cc2b41116f708241a2a61d9108bd Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Thu, 12 Jun 2025 20:39:29 +0530 Subject: [PATCH 0122/2411] dt-bindings: phy: samsung,mipi-video-phy: document exynos7870 MIPI phy The Exynos7870 MIPI PHY device contains one DSIM PHY block and three CSIS PHY blocks. It also requires two sysregs, one for display, and the other for cameras. Document this device. Signed-off-by: Kaustabh Chakraborty Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250612-exynos7870-mipi-phy-v1-1-3fff0b62d9d3@disroot.org Signed-off-by: Vinod Koul --- .../bindings/phy/samsung,mipi-video-phy.yaml | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/samsung,mipi-video-phy.yaml b/Documentation/devicetree/bindings/phy/samsung,mipi-video-phy.yaml index b2250e4a6b1b..16967ef8e9ec 100644 --- a/Documentation/devicetree/bindings/phy/samsung,mipi-video-phy.yaml +++ b/Documentation/devicetree/bindings/phy/samsung,mipi-video-phy.yaml @@ -29,6 +29,7 @@ properties: - samsung,s5pv210-mipi-video-phy - samsung,exynos5420-mipi-video-phy - samsung,exynos5433-mipi-video-phy + - samsung,exynos7870-mipi-video-phy "#phy-cells": const: 1 @@ -46,19 +47,20 @@ properties: deprecated: true description: Phandle to PMU system controller interface, valid for - samsung,exynos5433-mipi-video-phy (if not a child of PMU). + samsung,exynos5433-mipi-video-phy and samsung,exynos7870-mipi-video-phy + (if not a child of PMU). samsung,disp-sysreg: $ref: /schemas/types.yaml#/definitions/phandle description: Phandle to DISP system controller interface, valid for - samsung,exynos5433-mipi-video-phy. + samsung,exynos5433-mipi-video-phy and samsung,exynos7870-mipi-video-phy. samsung,cam0-sysreg: $ref: /schemas/types.yaml#/definitions/phandle description: Phandle to CAM0 system controller interface, valid for - samsung,exynos5433-mipi-video-phy. + samsung,exynos5433-mipi-video-phy and samsung,exynos7870-mipi-video-phy. samsung,cam1-sysreg: $ref: /schemas/types.yaml#/definitions/phandle @@ -84,7 +86,13 @@ allOf: samsung,disp-sysreg: false samsung,cam0-sysreg: false samsung,cam1-sysreg: false - else: + + - if: + properties: + compatible: + contains: + const: samsung,exynos5433-mipi-video-phy + then: properties: syscon: false required: @@ -92,6 +100,19 @@ allOf: - samsung,cam0-sysreg - samsung,cam1-sysreg + - if: + properties: + compatible: + contains: + const: samsung,exynos7870-mipi-video-phy + then: + properties: + syscon: false + samsung,cam1-sysreg: false + required: + - samsung,disp-sysreg + - samsung,cam0-sysreg + additionalProperties: false examples: From 543f5e314282c4c2e5114f88ddecc9aeaf0985e2 Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Thu, 12 Jun 2025 20:39:30 +0530 Subject: [PATCH 0123/2411] phy: exynos-mipi-video: introduce support for exynos7870 Add support for Exynos7870 in the existing MIPI CSIS/DSIM driver. The SoC has one DSIM phy and three CSIS phys. Signed-off-by: Kaustabh Chakraborty Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250612-exynos7870-mipi-phy-v1-2-3fff0b62d9d3@disroot.org Signed-off-by: Vinod Koul --- drivers/phy/samsung/phy-exynos-mipi-video.c | 52 +++++++++++++++++++++ include/linux/soc/samsung/exynos-regs-pmu.h | 5 ++ 2 files changed, 57 insertions(+) diff --git a/drivers/phy/samsung/phy-exynos-mipi-video.c b/drivers/phy/samsung/phy-exynos-mipi-video.c index f6756a609a9a..b184923b9b40 100644 --- a/drivers/phy/samsung/phy-exynos-mipi-video.c +++ b/drivers/phy/samsung/phy-exynos-mipi-video.c @@ -213,6 +213,55 @@ static const struct mipi_phy_device_desc exynos5433_mipi_phy = { }, }; +static const struct mipi_phy_device_desc exynos7870_mipi_phy = { + .num_regmaps = 3, + .regmap_names = { + "samsung,pmu-syscon", + "samsung,disp-sysreg", + "samsung,cam-sysreg" + }, + .num_phys = 4, + .phys = { + { + /* EXYNOS_MIPI_PHY_ID_CSIS0 */ + .coupled_phy_id = EXYNOS_MIPI_PHY_ID_DSIM0, + .enable_val = EXYNOS4_PHY_ENABLE, + .enable_reg = EXYNOS7870_MIPI_PHY_CONTROL0, + .enable_map = EXYNOS_MIPI_REGMAP_PMU, + .resetn_val = BIT(0), + .resetn_reg = 0, + .resetn_map = EXYNOS_MIPI_REGMAP_CAM0, + }, { + /* EXYNOS_MIPI_PHY_ID_DSIM0 */ + .coupled_phy_id = EXYNOS_MIPI_PHY_ID_CSIS0, + .enable_val = EXYNOS4_PHY_ENABLE, + .enable_reg = EXYNOS7870_MIPI_PHY_CONTROL0, + .enable_map = EXYNOS_MIPI_REGMAP_PMU, + .resetn_val = BIT(0), + .resetn_reg = 0, + .resetn_map = EXYNOS_MIPI_REGMAP_DISP, + }, { + /* EXYNOS_MIPI_PHY_ID_CSIS1 */ + .coupled_phy_id = EXYNOS_MIPI_PHY_ID_NONE, + .enable_val = EXYNOS4_PHY_ENABLE, + .enable_reg = EXYNOS7870_MIPI_PHY_CONTROL1, + .enable_map = EXYNOS_MIPI_REGMAP_PMU, + .resetn_val = BIT(1), + .resetn_reg = 0, + .resetn_map = EXYNOS_MIPI_REGMAP_CAM0, + }, { + /* EXYNOS_MIPI_PHY_ID_CSIS2 */ + .coupled_phy_id = EXYNOS_MIPI_PHY_ID_NONE, + .enable_val = EXYNOS4_PHY_ENABLE, + .enable_reg = EXYNOS7870_MIPI_PHY_CONTROL2, + .enable_map = EXYNOS_MIPI_REGMAP_PMU, + .resetn_val = BIT(2), + .resetn_reg = 0, + .resetn_map = EXYNOS_MIPI_REGMAP_CAM0, + }, + }, +}; + struct exynos_mipi_video_phy { struct regmap *regmaps[EXYNOS_MIPI_REGMAPS_NUM]; int num_phys; @@ -351,6 +400,9 @@ static const struct of_device_id exynos_mipi_video_phy_of_match[] = { }, { .compatible = "samsung,exynos5433-mipi-video-phy", .data = &exynos5433_mipi_phy, + }, { + .compatible = "samsung,exynos7870-mipi-video-phy", + .data = &exynos7870_mipi_phy, }, { /* sentinel */ }, }; diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index 7754697e5810..fa28a8784d65 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -665,6 +665,11 @@ /* For Exynos990 */ #define EXYNOS990_PHY_CTRL_USB20 (0x72C) +/* For Exynos7870 */ +#define EXYNOS7870_MIPI_PHY_CONTROL0 (0x070c) +#define EXYNOS7870_MIPI_PHY_CONTROL1 (0x0714) +#define EXYNOS7870_MIPI_PHY_CONTROL2 (0x0734) + /* For Tensor GS101 */ /* PMU ALIVE */ #define GS101_SYSIP_DAT0 (0x810) From 6767df73f2d36e7d1cf0eb3c4d9469c7e9fe9824 Mon Sep 17 00:00:00 2001 From: Swapnil Jakhade Date: Tue, 10 Jun 2025 18:31:33 +0530 Subject: [PATCH 0124/2411] phy: cadence: Sierra: Add PCIe + USB PHY multilink configuration Add register sequences for PCIe + USB multilink configuration for Sierra PHY. Signed-off-by: Swapnil Jakhade Signed-off-by: Siddharth Vadapalli Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250610130133.2102196-1-s-vadapalli@ti.com Signed-off-by: Vinod Koul --- drivers/phy/cadence/phy-cadence-sierra.c | 180 +++++++++++++++++++++++ 1 file changed, 180 insertions(+) diff --git a/drivers/phy/cadence/phy-cadence-sierra.c b/drivers/phy/cadence/phy-cadence-sierra.c index 45a5c00843bf..74613382ccb0 100644 --- a/drivers/phy/cadence/phy-cadence-sierra.c +++ b/drivers/phy/cadence/phy-cadence-sierra.c @@ -58,8 +58,11 @@ #define SIERRA_CMN_PLLLC1_GEN_PREG 0xC2 #define SIERRA_CMN_PLLLC1_FBDIV_INT_PREG 0xC3 #define SIERRA_CMN_PLLLC1_DCOCAL_CTRL_PREG 0xC5 +#define SIERRA_CMN_PLLLC1_MODE_PREG 0xC8 +#define SIERRA_CMN_PLLLC1_LF_COEFF_MODE1_PREG 0xC9 #define SIERRA_CMN_PLLLC1_LF_COEFF_MODE0_PREG 0xCA #define SIERRA_CMN_PLLLC1_CLK0_PREG 0xCE +#define SIERRA_CMN_PLLLC1_BWCAL_MODE1_PREG 0xCF #define SIERRA_CMN_PLLLC1_BWCAL_MODE0_PREG 0xD0 #define SIERRA_CMN_PLLLC1_SS_TIME_STEPSIZE_MODE_PREG 0xE2 @@ -1541,6 +1544,137 @@ static void cdns_sierra_phy_remove(struct platform_device *pdev) cdns_sierra_clk_unregister(phy); } +/* USB refclk 100MHz, 20b, SuperSpeed opt2, ext ssc, PLL LC1, multilink */ +static const struct cdns_reg_pairs usb_100_ext_ssc_plllc1_cmn_regs[] = { + {0x002D, SIERRA_CMN_PLLLC1_FBDIV_INT_PREG}, + {0x2086, SIERRA_CMN_PLLLC1_LF_COEFF_MODE1_PREG}, + {0x2086, SIERRA_CMN_PLLLC1_LF_COEFF_MODE0_PREG}, + {0x1005, SIERRA_CMN_PLLLC1_CLK0_PREG}, + {0x0000, SIERRA_CMN_PLLLC1_BWCAL_MODE1_PREG}, + {0x0000, SIERRA_CMN_PLLLC1_BWCAL_MODE0_PREG}, + {0x0000, SIERRA_CMN_PLLLC1_SS_TIME_STEPSIZE_MODE_PREG} +}; + +/* USB refclk 100MHz, 20b, SuperSpeed opt2, int ssc, PLL LC1, multilink */ +static const struct cdns_reg_pairs usb_100_int_ssc_plllc1_cmn_regs[] = { + {0x002D, SIERRA_CMN_PLLLC1_FBDIV_INT_PREG}, + {0x000E, SIERRA_CMN_PLLLC1_MODE_PREG}, + {0x1005, SIERRA_CMN_PLLLC1_CLK0_PREG} +}; + +static const struct cdns_reg_pairs usb_100_ml_ln_regs[] = { + {0xFE0A, SIERRA_DET_STANDEC_A_PREG}, + {0x000F, SIERRA_DET_STANDEC_B_PREG}, + {0x55A5, SIERRA_DET_STANDEC_C_PREG}, + {0x69AD, SIERRA_DET_STANDEC_D_PREG}, + {0x0241, SIERRA_DET_STANDEC_E_PREG}, + {0x0010, SIERRA_PSM_LANECAL_DLY_A1_RESETS_PREG}, + {0x0014, SIERRA_PSM_A0IN_TMR_PREG}, + {0x001D, SIERRA_PSM_A3IN_TMR_PREG}, + {0x0004, SIERRA_PSC_LN_A3_PREG}, + {0x0004, SIERRA_PSC_LN_IDLE_PREG}, + {0x001F, SIERRA_PSC_TX_A0_PREG}, + {0x0007, SIERRA_PSC_TX_A1_PREG}, + {0x0003, SIERRA_PSC_TX_A2_PREG}, + {0x0003, SIERRA_PSC_TX_A3_PREG}, + {0x0FFF, SIERRA_PSC_RX_A0_PREG}, + {0x0619, SIERRA_PSC_RX_A1_PREG}, + {0x0003, SIERRA_PSC_RX_A2_PREG}, + {0x0001, SIERRA_PSC_RX_A3_PREG}, + {0x0606, SIERRA_PLLCTRL_FBDIV_MODE01_PREG}, + {0x0001, SIERRA_PLLCTRL_SUBRATE_PREG}, + {0x0003, SIERRA_PLLCTRL_GEN_A_PREG}, + {0x0406, SIERRA_PLLCTRL_GEN_D_PREG}, + {0x5211, SIERRA_PLLCTRL_CPGAIN_MODE_PREG}, + {0x00CA, SIERRA_CLKPATH_BIASTRIM_PREG}, + {0x2512, SIERRA_DFE_BIASTRIM_PREG}, + {0x0000, SIERRA_DRVCTRL_ATTEN_PREG}, + {0x823E, SIERRA_CLKPATHCTRL_TMR_PREG}, + {0x078F, SIERRA_RX_CREQ_FLTR_A_MODE1_PREG}, + {0x078F, SIERRA_RX_CREQ_FLTR_A_MODE0_PREG}, + {0x7B3C, SIERRA_CREQ_CCLKDET_MODE01_PREG}, + {0x023F, SIERRA_RX_CTLE_MAINTENANCE_PREG}, + {0x3232, SIERRA_CREQ_FSMCLK_SEL_PREG}, + {0x0000, SIERRA_CREQ_EQ_CTRL_PREG}, + {0xCC44, SIERRA_CREQ_EQ_OPEN_EYE_THRESH_PREG}, + {0x8452, SIERRA_CTLELUT_CTRL_PREG}, + {0x4121, SIERRA_DFE_ECMP_RATESEL_PREG}, + {0x4121, SIERRA_DFE_SMP_RATESEL_PREG}, + {0x0002, SIERRA_DEQ_PHALIGN_CTRL}, + {0x3200, SIERRA_DEQ_CONCUR_CTRL1_PREG}, + {0x5064, SIERRA_DEQ_CONCUR_CTRL2_PREG}, + {0x0030, SIERRA_DEQ_EPIPWR_CTRL2_PREG}, + {0x5A5A, SIERRA_DEQ_ERRCMP_CTRL_PREG}, + {0x02F5, SIERRA_DEQ_OFFSET_CTRL_PREG}, + {0x02F5, SIERRA_DEQ_GAIN_CTRL_PREG}, + {0xA9A9, SIERRA_DEQ_VGATUNE_CTRL_PREG}, + {0x0014, SIERRA_DEQ_GLUT0}, + {0x0014, SIERRA_DEQ_GLUT1}, + {0x0014, SIERRA_DEQ_GLUT2}, + {0x0014, SIERRA_DEQ_GLUT3}, + {0x0014, SIERRA_DEQ_GLUT4}, + {0x0014, SIERRA_DEQ_GLUT5}, + {0x0014, SIERRA_DEQ_GLUT6}, + {0x0014, SIERRA_DEQ_GLUT7}, + {0x0014, SIERRA_DEQ_GLUT8}, + {0x0014, SIERRA_DEQ_GLUT9}, + {0x0014, SIERRA_DEQ_GLUT10}, + {0x0014, SIERRA_DEQ_GLUT11}, + {0x0014, SIERRA_DEQ_GLUT12}, + {0x0014, SIERRA_DEQ_GLUT13}, + {0x0014, SIERRA_DEQ_GLUT14}, + {0x0014, SIERRA_DEQ_GLUT15}, + {0x0014, SIERRA_DEQ_GLUT16}, + {0x0BAE, SIERRA_DEQ_ALUT0}, + {0x0AEB, SIERRA_DEQ_ALUT1}, + {0x0A28, SIERRA_DEQ_ALUT2}, + {0x0965, SIERRA_DEQ_ALUT3}, + {0x08A2, SIERRA_DEQ_ALUT4}, + {0x07DF, SIERRA_DEQ_ALUT5}, + {0x071C, SIERRA_DEQ_ALUT6}, + {0x0659, SIERRA_DEQ_ALUT7}, + {0x0596, SIERRA_DEQ_ALUT8}, + {0x0514, SIERRA_DEQ_ALUT9}, + {0x0492, SIERRA_DEQ_ALUT10}, + {0x0410, SIERRA_DEQ_ALUT11}, + {0x038E, SIERRA_DEQ_ALUT12}, + {0x030C, SIERRA_DEQ_ALUT13}, + {0x03F4, SIERRA_DEQ_DFETAP_CTRL_PREG}, + {0x0001, SIERRA_DFE_EN_1010_IGNORE_PREG}, + {0x3C01, SIERRA_DEQ_TAU_CTRL1_FAST_MAINT_PREG}, + {0x3C40, SIERRA_DEQ_TAU_CTRL1_SLOW_MAINT_PREG}, + {0x1C08, SIERRA_DEQ_TAU_CTRL2_PREG}, + {0x0033, SIERRA_DEQ_PICTRL_PREG}, + {0x0330, SIERRA_CPICAL_TMRVAL_MODE0_PREG}, + {0x01FF, SIERRA_CPICAL_PICNT_MODE1_PREG}, + {0x0009, SIERRA_CPI_OUTBUF_RATESEL_PREG}, + {0x3232, SIERRA_CPICAL_RES_STARTCODE_MODE23_PREG}, + {0x0005, SIERRA_LFPSDET_SUPPORT_PREG}, + {0x000F, SIERRA_LFPSFILT_NS_PREG}, + {0x0009, SIERRA_LFPSFILT_RD_PREG}, + {0x0001, SIERRA_LFPSFILT_MP_PREG}, + {0x8013, SIERRA_SDFILT_H2L_A_PREG}, + {0x8009, SIERRA_SDFILT_L2H_PREG}, + {0x0024, SIERRA_RXBUFFER_CTLECTRL_PREG}, + {0x0020, SIERRA_RXBUFFER_RCDFECTRL_PREG}, + {0x4243, SIERRA_RXBUFFER_DFECTRL_PREG} +}; + +static const struct cdns_sierra_vals usb_100_ext_ssc_plllc1_cmn_vals = { + .reg_pairs = usb_100_ext_ssc_plllc1_cmn_regs, + .num_regs = ARRAY_SIZE(usb_100_ext_ssc_plllc1_cmn_regs), +}; + +static const struct cdns_sierra_vals usb_100_int_ssc_plllc1_cmn_vals = { + .reg_pairs = usb_100_int_ssc_plllc1_cmn_regs, + .num_regs = ARRAY_SIZE(usb_100_int_ssc_plllc1_cmn_regs), +}; + +static const struct cdns_sierra_vals usb_100_ml_ln_vals = { + .reg_pairs = usb_100_ml_ln_regs, + .num_regs = ARRAY_SIZE(usb_100_ml_ln_regs), +}; + /* SGMII PHY PMA lane configuration */ static const struct cdns_reg_pairs sgmii_phy_pma_ln_regs[] = { {0x9010, SIERRA_PHY_PMA_XCVR_CTRL} @@ -2513,6 +2647,11 @@ static const struct cdns_sierra_data cdns_map_sierra = { [EXTERNAL_SSC] = &pcie_phy_pcs_cmn_vals, [INTERNAL_SSC] = &pcie_phy_pcs_cmn_vals, }, + [TYPE_USB] = { + [NO_SSC] = &pcie_phy_pcs_cmn_vals, + [EXTERNAL_SSC] = &pcie_phy_pcs_cmn_vals, + [INTERNAL_SSC] = &pcie_phy_pcs_cmn_vals, + }, }, }, .pma_cmn_vals = { @@ -2532,11 +2671,20 @@ static const struct cdns_sierra_data cdns_map_sierra = { [EXTERNAL_SSC] = &pcie_100_ext_ssc_plllc_cmn_vals, [INTERNAL_SSC] = &pcie_100_int_ssc_plllc_cmn_vals, }, + [TYPE_USB] = { + [NO_SSC] = &pcie_100_no_ssc_plllc_cmn_vals, + [EXTERNAL_SSC] = &pcie_100_ext_ssc_plllc_cmn_vals, + [INTERNAL_SSC] = &pcie_100_int_ssc_plllc_cmn_vals, + }, }, [TYPE_USB] = { [TYPE_NONE] = { [EXTERNAL_SSC] = &usb_100_ext_ssc_cmn_vals, }, + [TYPE_PCIE] = { + [EXTERNAL_SSC] = &usb_100_ext_ssc_plllc1_cmn_vals, + [INTERNAL_SSC] = &usb_100_int_ssc_plllc1_cmn_vals, + }, }, [TYPE_SGMII] = { [TYPE_NONE] = { @@ -2573,11 +2721,20 @@ static const struct cdns_sierra_data cdns_map_sierra = { [EXTERNAL_SSC] = &ml_pcie_100_ext_ssc_ln_vals, [INTERNAL_SSC] = &ml_pcie_100_int_ssc_ln_vals, }, + [TYPE_USB] = { + [NO_SSC] = &ml_pcie_100_no_ssc_ln_vals, + [EXTERNAL_SSC] = &ml_pcie_100_ext_ssc_ln_vals, + [INTERNAL_SSC] = &ml_pcie_100_int_ssc_ln_vals, + }, }, [TYPE_USB] = { [TYPE_NONE] = { [EXTERNAL_SSC] = &usb_100_ext_ssc_ln_vals, }, + [TYPE_PCIE] = { + [EXTERNAL_SSC] = &usb_100_ml_ln_vals, + [INTERNAL_SSC] = &usb_100_ml_ln_vals, + }, }, [TYPE_SGMII] = { [TYPE_NONE] = { @@ -2620,6 +2777,11 @@ static const struct cdns_sierra_data cdns_ti_map_sierra = { [EXTERNAL_SSC] = &pcie_phy_pcs_cmn_vals, [INTERNAL_SSC] = &pcie_phy_pcs_cmn_vals, }, + [TYPE_USB] = { + [NO_SSC] = &pcie_phy_pcs_cmn_vals, + [EXTERNAL_SSC] = &pcie_phy_pcs_cmn_vals, + [INTERNAL_SSC] = &pcie_phy_pcs_cmn_vals, + }, }, }, .phy_pma_ln_vals = { @@ -2655,11 +2817,20 @@ static const struct cdns_sierra_data cdns_ti_map_sierra = { [EXTERNAL_SSC] = &pcie_100_ext_ssc_plllc_cmn_vals, [INTERNAL_SSC] = &pcie_100_int_ssc_plllc_cmn_vals, }, + [TYPE_USB] = { + [NO_SSC] = &pcie_100_no_ssc_plllc_cmn_vals, + [EXTERNAL_SSC] = &pcie_100_ext_ssc_plllc_cmn_vals, + [INTERNAL_SSC] = &pcie_100_int_ssc_plllc_cmn_vals, + }, }, [TYPE_USB] = { [TYPE_NONE] = { [EXTERNAL_SSC] = &usb_100_ext_ssc_cmn_vals, }, + [TYPE_PCIE] = { + [EXTERNAL_SSC] = &usb_100_ext_ssc_plllc1_cmn_vals, + [INTERNAL_SSC] = &usb_100_int_ssc_plllc1_cmn_vals, + }, }, [TYPE_SGMII] = { [TYPE_PCIE] = { @@ -2693,11 +2864,20 @@ static const struct cdns_sierra_data cdns_ti_map_sierra = { [EXTERNAL_SSC] = &ti_ml_pcie_100_ext_ssc_ln_vals, [INTERNAL_SSC] = &ti_ml_pcie_100_int_ssc_ln_vals, }, + [TYPE_USB] = { + [NO_SSC] = &ti_ml_pcie_100_no_ssc_ln_vals, + [EXTERNAL_SSC] = &ti_ml_pcie_100_ext_ssc_ln_vals, + [INTERNAL_SSC] = &ti_ml_pcie_100_int_ssc_ln_vals, + }, }, [TYPE_USB] = { [TYPE_NONE] = { [EXTERNAL_SSC] = &usb_100_ext_ssc_ln_vals, }, + [TYPE_PCIE] = { + [EXTERNAL_SSC] = &usb_100_ml_ln_vals, + [INTERNAL_SSC] = &usb_100_ml_ln_vals, + }, }, [TYPE_SGMII] = { [TYPE_PCIE] = { From 399c75b6a9ed2fd609f9ad4c22cdd6364bc9d441 Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Mon, 26 May 2025 21:08:13 +0530 Subject: [PATCH 0125/2411] scsi: ufs: qcom: add a new phy calibrate API call Introduce a new phy calibrate API call in the UFS Qualcomm driver to separate phy calibration from phy power-on. This change is a precursor to the successive commits in this series, which requires these two operations to be distinct. Reviewed-by: Dmitry Baryshkov Reviewed-by: Manivannan Sadhasivam Signed-off-by: Nitin Rawat Acked-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250526153821.7918-3-quic_nitirawa@quicinc.com Signed-off-by: Vinod Koul --- drivers/ufs/host/ufs-qcom.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 18a978452001..b764055c1854 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -532,6 +532,12 @@ static int ufs_qcom_power_up_sequence(struct ufs_hba *hba) goto out_disable_phy; } + ret = phy_calibrate(phy); + if (ret) { + dev_err(hba->dev, "Failed to calibrate PHY: %d\n", ret); + goto out_disable_phy; + } + ufs_qcom_select_unipro_mode(host); return 0; From dbd20821946a74e803208a25dddfafe1ae2e34e6 Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Mon, 26 May 2025 21:08:14 +0530 Subject: [PATCH 0126/2411] phy: qcom-qmp-ufs: Rename qmp_ufs_enable and qmp_ufs_power_on Rename qmp_ufs_enable to qmp_ufs_power_on and qmp_ufs_power_on to qmp_ufs_phy_calibrate to better reflect their functionality. Also update function calls and structure assignments accordingly. Reviewed-by: Manivannan Sadhasivam Reviewed-by: Dmitry Baryshkov Co-developed-by: Ram Kumar Dwivedi Signed-off-by: Ram Kumar Dwivedi Signed-off-by: Nitin Rawat Link: https://lore.kernel.org/r/20250526153821.7918-4-quic_nitirawa@quicinc.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index b33e2e2b5014..a67cf0a64f74 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1838,7 +1838,7 @@ static int qmp_ufs_init(struct phy *phy) return 0; } -static int qmp_ufs_power_on(struct phy *phy) +static int qmp_ufs_phy_calibrate(struct phy *phy) { struct qmp_ufs *qmp = phy_get_drvdata(phy); const struct qmp_phy_cfg *cfg = qmp->cfg; @@ -1899,7 +1899,7 @@ static int qmp_ufs_exit(struct phy *phy) return 0; } -static int qmp_ufs_enable(struct phy *phy) +static int qmp_ufs_power_on(struct phy *phy) { int ret; @@ -1907,7 +1907,7 @@ static int qmp_ufs_enable(struct phy *phy) if (ret) return ret; - ret = qmp_ufs_power_on(phy); + ret = qmp_ufs_phy_calibrate(phy); if (ret) qmp_ufs_exit(phy); @@ -1941,7 +1941,7 @@ static int qmp_ufs_set_mode(struct phy *phy, enum phy_mode mode, int submode) } static const struct phy_ops qcom_qmp_ufs_phy_ops = { - .power_on = qmp_ufs_enable, + .power_on = qmp_ufs_power_on, .power_off = qmp_ufs_disable, .set_mode = qmp_ufs_set_mode, .owner = THIS_MODULE, From cbfd6c124f27ad2b4c0f617dc40ad8a08a063463 Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Mon, 26 May 2025 21:08:15 +0530 Subject: [PATCH 0127/2411] phy: qcom-qmp-ufs: Refactor phy_power_on and phy_calibrate callbacks Commit 052553af6a31 ("ufs/phy: qcom: Refactor to use phy_init call") puts enabling regulators & clks, calibrating UFS PHY, starting serdes and polling PCS ready status into phy_power_on. In Current code regulators enable, clks enable, calibrating UFS PHY, start_serdes and polling PCS_ready_status are part of phy_power_on. UFS PHY registers are retained after power collapse, meaning calibrating UFS PHY, start_serdes and polling PCS_ready_status can be done only when hba is powered_on, and not needed every time when phy_power_on is called during resume. Hence keep the code which enables PHY's regulators & clks in phy_power_on and move the rest steps into phy_calibrate function. Refactor the code to retain PHY regulators & clks in phy_power_on and move out rest of the code to new phy_calibrate function. Also move reset_control_assert to qmp_ufs_phy_calibrate to align with Hardware programming guide. Reviewed-by: Manivannan Sadhasivam Reviewed-by: Dmitry Baryshkov Co-developed-by: Can Guo Signed-off-by: Can Guo Signed-off-by: Nitin Rawat Link: https://lore.kernel.org/r/20250526153821.7918-5-quic_nitirawa@quicinc.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 26 ++++++------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index a67cf0a64f74..ade8e9c4b9ae 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1797,7 +1797,7 @@ static int qmp_ufs_com_exit(struct qmp_ufs *qmp) return 0; } -static int qmp_ufs_init(struct phy *phy) +static int qmp_ufs_power_on(struct phy *phy) { struct qmp_ufs *qmp = phy_get_drvdata(phy); const struct qmp_phy_cfg *cfg = qmp->cfg; @@ -1825,10 +1825,6 @@ static int qmp_ufs_init(struct phy *phy) return ret; } } - - ret = reset_control_assert(qmp->ufs_reset); - if (ret) - return ret; } ret = qmp_ufs_com_init(qmp); @@ -1847,6 +1843,10 @@ static int qmp_ufs_phy_calibrate(struct phy *phy) unsigned int val; int ret; + ret = reset_control_assert(qmp->ufs_reset); + if (ret) + return ret; + qmp_ufs_init_registers(qmp, cfg); ret = reset_control_deassert(qmp->ufs_reset); @@ -1899,21 +1899,6 @@ static int qmp_ufs_exit(struct phy *phy) return 0; } -static int qmp_ufs_power_on(struct phy *phy) -{ - int ret; - - ret = qmp_ufs_init(phy); - if (ret) - return ret; - - ret = qmp_ufs_phy_calibrate(phy); - if (ret) - qmp_ufs_exit(phy); - - return ret; -} - static int qmp_ufs_disable(struct phy *phy) { int ret; @@ -1943,6 +1928,7 @@ static int qmp_ufs_set_mode(struct phy *phy, enum phy_mode mode, int submode) static const struct phy_ops qcom_qmp_ufs_phy_ops = { .power_on = qmp_ufs_power_on, .power_off = qmp_ufs_disable, + .calibrate = qmp_ufs_phy_calibrate, .set_mode = qmp_ufs_set_mode, .owner = THIS_MODULE, }; From d58b9ff47775042acc501d0a892af8bd08128a65 Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Mon, 26 May 2025 21:08:16 +0530 Subject: [PATCH 0128/2411] phy: qcom-qmp-ufs: Refactor UFS PHY reset Refactor the UFS PHY reset handling to parse the reset logic only once during initialization, instead of every resume. As part of this change, move the UFS PHY reset parsing logic from qmp_phy_power_on to the new qmp_ufs_phy_init function introduced as part of phy_ops::init callback. Co-developed-by: Ram Kumar Dwivedi Signed-off-by: Ram Kumar Dwivedi Signed-off-by: Nitin Rawat Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250526153821.7918-6-quic_nitirawa@quicinc.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 59 +++++++++++++------------ 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index ade8e9c4b9ae..33d238cf49aa 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1800,38 +1800,11 @@ static int qmp_ufs_com_exit(struct qmp_ufs *qmp) static int qmp_ufs_power_on(struct phy *phy) { struct qmp_ufs *qmp = phy_get_drvdata(phy); - const struct qmp_phy_cfg *cfg = qmp->cfg; int ret; dev_vdbg(qmp->dev, "Initializing QMP phy\n"); - if (cfg->no_pcs_sw_reset) { - /* - * Get UFS reset, which is delayed until now to avoid a - * circular dependency where UFS needs its PHY, but the PHY - * needs this UFS reset. - */ - if (!qmp->ufs_reset) { - qmp->ufs_reset = - devm_reset_control_get_exclusive(qmp->dev, - "ufsphy"); - - if (IS_ERR(qmp->ufs_reset)) { - ret = PTR_ERR(qmp->ufs_reset); - dev_err(qmp->dev, - "failed to get UFS reset: %d\n", - ret); - - qmp->ufs_reset = NULL; - return ret; - } - } - } - ret = qmp_ufs_com_init(qmp); - if (ret) - return ret; - - return 0; + return ret; } static int qmp_ufs_phy_calibrate(struct phy *phy) @@ -1925,7 +1898,37 @@ static int qmp_ufs_set_mode(struct phy *phy, enum phy_mode mode, int submode) return 0; } +static int qmp_ufs_phy_init(struct phy *phy) +{ + struct qmp_ufs *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; + int ret; + + if (!cfg->no_pcs_sw_reset) + return 0; + + /* + * Get UFS reset, which is delayed until now to avoid a + * circular dependency where UFS needs its PHY, but the PHY + * needs this UFS reset. + */ + if (!qmp->ufs_reset) { + qmp->ufs_reset = + devm_reset_control_get_exclusive(qmp->dev, "ufsphy"); + + if (IS_ERR(qmp->ufs_reset)) { + ret = PTR_ERR(qmp->ufs_reset); + dev_err(qmp->dev, "failed to get PHY reset: %d\n", ret); + qmp->ufs_reset = NULL; + return ret; + } + } + + return 0; +} + static const struct phy_ops qcom_qmp_ufs_phy_ops = { + .init = qmp_ufs_phy_init, .power_on = qmp_ufs_power_on, .power_off = qmp_ufs_disable, .calibrate = qmp_ufs_phy_calibrate, From 7bcf4936aac6ec8d6fafbfd6f4f62302e5296a0d Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Mon, 26 May 2025 21:08:17 +0530 Subject: [PATCH 0129/2411] phy: qcom-qmp-ufs: Remove qmp_ufs_com_init() The qmp_ufs_power_on() function acts as a wrapper, solely invoking qmp_ufs_com_init(). Additionally, the code within qmp_ufs_com_init() does not correspond well with its name. Therefore, to enhance the readability and eliminate unnecessary function call inline qmp_ufs_com_init() into qmp_ufs_power_on(). There is no change to the functionality. Reviewed-by: Dmitry Baryshkov Signed-off-by: Nitin Rawat Link: https://lore.kernel.org/r/20250526153821.7918-7-quic_nitirawa@quicinc.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 45 ++++++++++--------------- 1 file changed, 17 insertions(+), 28 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 33d238cf49aa..eda0a59918ea 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1758,32 +1758,6 @@ static void qmp_ufs_init_registers(struct qmp_ufs *qmp, const struct qmp_phy_cfg qmp_ufs_init_all(qmp, &cfg->tbls_hs_b); } -static int qmp_ufs_com_init(struct qmp_ufs *qmp) -{ - const struct qmp_phy_cfg *cfg = qmp->cfg; - void __iomem *pcs = qmp->pcs; - int ret; - - ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs); - if (ret) { - dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret); - return ret; - } - - ret = clk_bulk_prepare_enable(qmp->num_clks, qmp->clks); - if (ret) - goto err_disable_regulators; - - qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); - - return 0; - -err_disable_regulators: - regulator_bulk_disable(cfg->num_vregs, qmp->vregs); - - return ret; -} - static int qmp_ufs_com_exit(struct qmp_ufs *qmp) { const struct qmp_phy_cfg *cfg = qmp->cfg; @@ -1800,10 +1774,25 @@ static int qmp_ufs_com_exit(struct qmp_ufs *qmp) static int qmp_ufs_power_on(struct phy *phy) { struct qmp_ufs *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *pcs = qmp->pcs; int ret; - dev_vdbg(qmp->dev, "Initializing QMP phy\n"); - ret = qmp_ufs_com_init(qmp); + ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs); + if (ret) { + dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret); + return ret; + } + + ret = clk_bulk_prepare_enable(qmp->num_clks, qmp->clks); + if (ret) + goto err_disable_regulators; + + qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); + return 0; + +err_disable_regulators: + regulator_bulk_disable(cfg->num_vregs, qmp->vregs); return ret; } From acc6b0d73d902d3296d8c77878a9b508c2c6a5bf Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Mon, 26 May 2025 21:08:18 +0530 Subject: [PATCH 0130/2411] phy: qcom-qmp-ufs: Rename qmp_ufs_power_off Rename qmp_ufs_disable to qmp_ufs_power_off to better represent its functionality. Additionally, inline qmp_ufs_exit into qmp_ufs_power_off function to preserve the functionality of .power_off. There is no functional change. Reviewed-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Signed-off-by: Nitin Rawat Link: https://lore.kernel.org/r/20250526153821.7918-8-quic_nitirawa@quicinc.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index eda0a59918ea..e0dc5fa43dee 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1849,28 +1849,11 @@ static int qmp_ufs_power_off(struct phy *phy) qphy_clrbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); - return 0; -} - -static int qmp_ufs_exit(struct phy *phy) -{ - struct qmp_ufs *qmp = phy_get_drvdata(phy); - qmp_ufs_com_exit(qmp); return 0; } -static int qmp_ufs_disable(struct phy *phy) -{ - int ret; - - ret = qmp_ufs_power_off(phy); - if (ret) - return ret; - return qmp_ufs_exit(phy); -} - static int qmp_ufs_set_mode(struct phy *phy, enum phy_mode mode, int submode) { struct qmp_ufs *qmp = phy_get_drvdata(phy); @@ -1919,7 +1902,7 @@ static int qmp_ufs_phy_init(struct phy *phy) static const struct phy_ops qcom_qmp_ufs_phy_ops = { .init = qmp_ufs_phy_init, .power_on = qmp_ufs_power_on, - .power_off = qmp_ufs_disable, + .power_off = qmp_ufs_power_off, .calibrate = qmp_ufs_phy_calibrate, .set_mode = qmp_ufs_set_mode, .owner = THIS_MODULE, From 7f600f0e193a6638135026c3718ac296ed3f5044 Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Mon, 26 May 2025 21:08:19 +0530 Subject: [PATCH 0131/2411] phy: qcom-qmp-ufs: Remove qmp_ufs_exit() and Inline qmp_ufs_com_exit() qmp_ufs_exit() is a wrapper function. It only calls qmp_ufs_com_exit(). Remove it to simplify the ufs phy driver. Additonally partial Inline(dropping the reset assert) qmp_ufs_com_exit into qmp_ufs_power_off function to avoid unnecessary function call and to align with the Phy programming guide. Reviewed-by: Dmitry Baryshkov Signed-off-by: Nitin Rawat Link: https://lore.kernel.org/r/20250526153821.7918-9-quic_nitirawa@quicinc.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index e0dc5fa43dee..00bde65733cb 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1758,19 +1758,6 @@ static void qmp_ufs_init_registers(struct qmp_ufs *qmp, const struct qmp_phy_cfg qmp_ufs_init_all(qmp, &cfg->tbls_hs_b); } -static int qmp_ufs_com_exit(struct qmp_ufs *qmp) -{ - const struct qmp_phy_cfg *cfg = qmp->cfg; - - reset_control_assert(qmp->ufs_reset); - - clk_bulk_disable_unprepare(qmp->num_clks, qmp->clks); - - regulator_bulk_disable(cfg->num_vregs, qmp->vregs); - - return 0; -} - static int qmp_ufs_power_on(struct phy *phy) { struct qmp_ufs *qmp = phy_get_drvdata(phy); @@ -1849,7 +1836,9 @@ static int qmp_ufs_power_off(struct phy *phy) qphy_clrbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); - qmp_ufs_com_exit(qmp); + clk_bulk_disable_unprepare(qmp->num_clks, qmp->clks); + + regulator_bulk_disable(cfg->num_vregs, qmp->vregs); return 0; } From a079b2d715340482e425ff136b55810ab8279800 Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Mon, 26 May 2025 21:08:20 +0530 Subject: [PATCH 0132/2411] phy: qcom-qmp-ufs: refactor qmp_ufs_power_off In qmp_ufs_power_off, the PHY is already powered down by asserting QPHY_PCS_POWER_DOWN_CONTROL. Therefore, additional phy_reset and stopping SerDes are unnecessary. Also this approach does not align with the phy HW programming guide. Thus, refactor qmp_ufs_power_off to remove the phy_reset and stop SerDes calls to simplify the code and ensure alignment with the PHY HW programming guide. Signed-off-by: Nitin Rawat Reviewed-by: Dmitry Baryshkov Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20250526153821.7918-10-quic_nitirawa@quicinc.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 00bde65733cb..9c69c77d10c8 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1825,13 +1825,6 @@ static int qmp_ufs_power_off(struct phy *phy) struct qmp_ufs *qmp = phy_get_drvdata(phy); const struct qmp_phy_cfg *cfg = qmp->cfg; - /* PHY reset */ - if (!cfg->no_pcs_sw_reset) - qphy_setbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); - - /* stop SerDes */ - qphy_clrbits(qmp->pcs, cfg->regs[QPHY_START_CTRL], SERDES_START); - /* Put PHY into POWER DOWN state: active low */ qphy_clrbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); From 77d2fa54a94574f767d5fb296b6b8e011eba0c8e Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Mon, 26 May 2025 21:08:21 +0530 Subject: [PATCH 0133/2411] scsi: ufs: qcom : Refactor phy_power_on/off calls Commit 3f6d1767b1a0 ("phy: ufs-qcom: Refactor all init steps into phy_poweron") moved the phy_power_on/off from ufs_qcom_setup_clocks to suspend/resume func. To have a better power saving, remove the phy_power_on/off calls from resume/suspend path and put them back to ufs_qcom_setup_clocks, so that PHY regulators & clks can be turned on/off along with UFS's clocks. Since phy phy_power_on is separated out from phy calibrate, make separate calls to phy_power_on calls from ufs qcom driver. Co-developed-by: Can Guo Signed-off-by: Can Guo Signed-off-by: Nitin Rawat Link: https://lore.kernel.org/r/20250526153821.7918-11-quic_nitirawa@quicinc.com Signed-off-by: Vinod Koul --- drivers/ufs/host/ufs-qcom.c | 58 +++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index b764055c1854..ba4b2880279c 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -711,53 +711,29 @@ static int ufs_qcom_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op, enum ufs_notify_change_status status) { struct ufs_qcom_host *host = ufshcd_get_variant(hba); - struct phy *phy = host->generic_phy; if (status == PRE_CHANGE) return 0; - if (ufs_qcom_is_link_off(hba)) { - /* - * Disable the tx/rx lane symbol clocks before PHY is - * powered down as the PLL source should be disabled - * after downstream clocks are disabled. - */ + if (!ufs_qcom_is_link_active(hba)) ufs_qcom_disable_lane_clks(host); - phy_power_off(phy); - /* reset the connected UFS device during power down */ + + /* reset the connected UFS device during power down */ + if (ufs_qcom_is_link_off(hba) && host->device_reset) ufs_qcom_device_reset_ctrl(hba, true); - } else if (!ufs_qcom_is_link_active(hba)) { - ufs_qcom_disable_lane_clks(host); - } - return ufs_qcom_ice_suspend(host); } static int ufs_qcom_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) { struct ufs_qcom_host *host = ufshcd_get_variant(hba); - struct phy *phy = host->generic_phy; int err; - if (ufs_qcom_is_link_off(hba)) { - err = phy_power_on(phy); - if (err) { - dev_err(hba->dev, "%s: failed PHY power on: %d\n", - __func__, err); - return err; - } - - err = ufs_qcom_enable_lane_clks(host); - if (err) - return err; - - } else if (!ufs_qcom_is_link_active(hba)) { - err = ufs_qcom_enable_lane_clks(host); - if (err) - return err; - } + err = ufs_qcom_enable_lane_clks(host); + if (err) + return err; return ufs_qcom_ice_resume(host); } @@ -1136,12 +1112,20 @@ static void ufs_qcom_set_caps(struct ufs_hba *hba) * @on: If true, enable clocks else disable them. * @status: PRE_CHANGE or POST_CHANGE notify * + * There are certain clocks which comes from the PHY so it needs + * to be managed together along with controller clocks which also + * provides a better power saving. Hence keep phy_power_off/on calls + * in ufs_qcom_setup_clocks, so that PHY's regulators & clks can be + * turned on/off along with UFS's clocks. + * * Return: 0 on success, non-zero on failure. */ static int ufs_qcom_setup_clocks(struct ufs_hba *hba, bool on, enum ufs_notify_change_status status) { struct ufs_qcom_host *host = ufshcd_get_variant(hba); + struct phy *phy = host->generic_phy; + int err; /* * In case ufs_qcom_init() is not yet done, simply ignore. @@ -1160,10 +1144,22 @@ static int ufs_qcom_setup_clocks(struct ufs_hba *hba, bool on, /* disable device ref_clk */ ufs_qcom_dev_ref_clk_ctrl(host, false); } + + err = phy_power_off(phy); + if (err) { + dev_err(hba->dev, "phy power off failed, ret=%d\n", err); + return err; + } } break; case POST_CHANGE: if (on) { + err = phy_power_on(phy); + if (err) { + dev_err(hba->dev, "phy power on failed, ret = %d\n", err); + return err; + } + /* enable the device ref clock for HS mode*/ if (ufshcd_is_hs_mode(&hba->pwr_info)) ufs_qcom_dev_ref_clk_ctrl(host, true); From 65ad0d068c426c2f3477b1241f34ad82d1197e80 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:24:23 -0500 Subject: [PATCH 0134/2411] dt-bindings: phy: Convert apm,xgene-phy to DT schema Convert the APM X-Gene PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250607212424.739972-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../bindings/phy/apm,xgene-phy.yaml | 169 ++++++++++++++++++ .../devicetree/bindings/phy/apm-xgene-phy.txt | 76 -------- 2 files changed, 169 insertions(+), 76 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/apm,xgene-phy.yaml delete mode 100644 Documentation/devicetree/bindings/phy/apm-xgene-phy.txt diff --git a/Documentation/devicetree/bindings/phy/apm,xgene-phy.yaml b/Documentation/devicetree/bindings/phy/apm,xgene-phy.yaml new file mode 100644 index 000000000000..d1e6b112b6de --- /dev/null +++ b/Documentation/devicetree/bindings/phy/apm,xgene-phy.yaml @@ -0,0 +1,169 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/apm,xgene-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: APM X-Gene 15Gbps Multi-purpose PHY + +maintainers: + - Khuong Dinh + +description: + PHY nodes are defined to describe on-chip 15Gbps Multi-purpose PHY. Each + PHY (pair of lanes) has its own node. + +properties: + compatible: + items: + - const: apm,xgene-phy + + reg: + maxItems: 1 + + '#phy-cells': + description: + Possible values are 0 (SATA), 1 (SGMII), 2 (PCIe), 3 (USB), and 4 (XFI). + const: 1 + + clocks: + maxItems: 1 + + apm,tx-eye-tuning: + description: + Manual control to fine tune the capture of the serial bit lines from the + automatic calibrated position. Two set of 3-tuple setting for each + supported link speed on the host. Range from 0 to 127 in unit of one bit + period. + $ref: /schemas/types.yaml#/definitions/uint32-matrix + minItems: 2 + maxItems: 2 + items: + minItems: 3 + maxItems: 3 + items: + minimum: 0 + maximum: 127 + default: 10 + + apm,tx-eye-direction: + description: + Eye tuning manual control direction. 0 means sample data earlier than the + nominal sampling point. 1 means sample data later than the nominal + sampling point. Two set of 3-tuple setting for each supported link speed + on the host. + $ref: /schemas/types.yaml#/definitions/uint32-matrix + minItems: 2 + maxItems: 2 + items: + minItems: 3 + maxItems: 3 + items: + enum: [0, 1] + default: 0 + + apm,tx-boost-gain: + description: + Frequency boost AC (LSB 3-bit) and DC (2-bit) gain control. Two set of + 3-tuple setting for each supported link speed on the host. Range is + between 0 to 31 in unit of dB. Default is 3. + $ref: /schemas/types.yaml#/definitions/uint32-matrix + minItems: 2 + maxItems: 2 + items: + minItems: 3 + maxItems: 3 + items: + minimum: 0 + maximum: 31 + + apm,tx-amplitude: + description: + Amplitude control. Two set of 3-tuple setting for each supported link + speed on the host. Range is between 0 to 199500 in unit of uV. + $ref: /schemas/types.yaml#/definitions/uint32-matrix + minItems: 2 + maxItems: 2 + items: + minItems: 3 + maxItems: 3 + items: + minimum: 0 + maximum: 199500 + default: 199500 + + apm,tx-pre-cursor1: + description: + 1st pre-cursor emphasis taps control. Two set of 3-tuple setting for + each supported link speed on the host. Range is 0 to 273000 in unit of + uV. + $ref: /schemas/types.yaml#/definitions/uint32-matrix + minItems: 2 + maxItems: 2 + items: + minItems: 3 + maxItems: 3 + items: + minimum: 0 + maximum: 273000 + default: 0 + + apm,tx-pre-cursor2: + description: + 2nd pre-cursor emphasis taps control. Two set of 3-tuple setting for + each supported link speed on the host. Range is 0 to 127400 in unit uV. + $ref: /schemas/types.yaml#/definitions/uint32-matrix + minItems: 2 + maxItems: 2 + items: + minItems: 3 + maxItems: 3 + items: + minimum: 0 + maximum: 127400 + default: 0 + + apm,tx-post-cursor: + description: | + Post-cursor emphasis taps control. Two set of 3-tuple setting for Gen1, + Gen2, and Gen3 link speeds. Range is between 0 to 31 in unit of 18.2mV. + $ref: /schemas/types.yaml#/definitions/uint32-matrix + minItems: 2 + maxItems: 2 + items: + minItems: 3 + maxItems: 3 + items: + minimum: 0 + maximum: 31 + default: 0xf + + apm,tx-speed: + description: > + Tx operating speed. One set of 3-tuple for each supported link speed on + the host: + + 0 = 1-2Gbps + 1 = 2-4Gbps (1st tuple default) + 2 = 4-8Gbps + 3 = 8-15Gbps (2nd tuple default) + 4 = 2.5-4Gbps + 5 = 4-5Gbps + 6 = 5-6Gbps + 7 = 6-16Gbps (3rd tuple default). + + $ref: /schemas/types.yaml#/definitions/uint32-array + minItems: 3 + maxItems: 3 + items: + maximum: 7 + +additionalProperties: false + +examples: + - | + phy@1f21a000 { + compatible = "apm,xgene-phy"; + reg = <0x1f21a000 0x100>; + #phy-cells = <1>; + }; diff --git a/Documentation/devicetree/bindings/phy/apm-xgene-phy.txt b/Documentation/devicetree/bindings/phy/apm-xgene-phy.txt deleted file mode 100644 index 602cf952b92b..000000000000 --- a/Documentation/devicetree/bindings/phy/apm-xgene-phy.txt +++ /dev/null @@ -1,76 +0,0 @@ -* APM X-Gene 15Gbps Multi-purpose PHY nodes - -PHY nodes are defined to describe on-chip 15Gbps Multi-purpose PHY. Each -PHY (pair of lanes) has its own node. - -Required properties: -- compatible : Shall be "apm,xgene-phy". -- reg : PHY memory resource is the SDS PHY access resource. -- #phy-cells : Shall be 1 as it expects one argument for setting - the mode of the PHY. Possible values are 0 (SATA), - 1 (SGMII), 2 (PCIe), 3 (USB), and 4 (XFI). - -Optional properties: -- status : Shall be "ok" if enabled or "disabled" if disabled. - Default is "ok". -- clocks : Reference to the clock entry. -- apm,tx-eye-tuning : Manual control to fine tune the capture of the serial - bit lines from the automatic calibrated position. - Two set of 3-tuple setting for each (up to 3) - supported link speed on the host. Range from 0 to - 127 in unit of one bit period. Default is 10. -- apm,tx-eye-direction : Eye tuning manual control direction. 0 means sample - data earlier than the nominal sampling point. 1 means - sample data later than the nominal sampling point. - Two set of 3-tuple setting for each (up to 3) - supported link speed on the host. Default is 0. -- apm,tx-boost-gain : Frequency boost AC (LSB 3-bit) and DC (2-bit) - gain control. Two set of 3-tuple setting for each - (up to 3) supported link speed on the host. Range is - between 0 to 31 in unit of dB. Default is 3. -- apm,tx-amplitude : Amplitude control. Two set of 3-tuple setting for - each (up to 3) supported link speed on the host. - Range is between 0 to 199500 in unit of uV. - Default is 199500 uV. -- apm,tx-pre-cursor1 : 1st pre-cursor emphasis taps control. Two set of - 3-tuple setting for each (up to 3) supported link - speed on the host. Range is 0 to 273000 in unit of - uV. Default is 0. -- apm,tx-pre-cursor2 : 2nd pre-cursor emphasis taps control. Two set of - 3-tuple setting for each (up to 3) supported link - speed on the host. Range is 0 to 127400 in unit uV. - Default is 0x0. -- apm,tx-post-cursor : Post-cursor emphasis taps control. Two set of - 3-tuple setting for Gen1, Gen2, and Gen3. Range is - between 0 to 0x1f in unit of 18.2mV. Default is 0xf. -- apm,tx-speed : Tx operating speed. One set of 3-tuple for each - supported link speed on the host. - 0 = 1-2Gbps - 1 = 2-4Gbps (1st tuple default) - 2 = 4-8Gbps - 3 = 8-15Gbps (2nd tuple default) - 4 = 2.5-4Gbps - 5 = 4-5Gbps - 6 = 5-6Gbps - 7 = 6-16Gbps (3rd tuple default) - -NOTE: PHY override parameters are board specific setting. - -Example: - phy1: phy@1f21a000 { - compatible = "apm,xgene-phy"; - reg = <0x0 0x1f21a000 0x0 0x100>; - #phy-cells = <1>; - }; - - phy2: phy@1f22a000 { - compatible = "apm,xgene-phy"; - reg = <0x0 0x1f22a000 0x0 0x100>; - #phy-cells = <1>; - }; - - phy3: phy@1f23a000 { - compatible = "apm,xgene-phy"; - reg = <0x0 0x1f23a000 0x0 0x100>; - #phy-cells = <1>; - }; From f151f3a6ebe184b5f8c9abe58fe2d63f9950139b Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:24:48 -0500 Subject: [PATCH 0135/2411] dt-bindings: phy: Convert brcm,ns2-drd-phy to DT schema Convert the Broadcom NS2 USB2 PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250607212456.740697-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../bindings/phy/brcm,ns2-drd-phy.txt | 30 --------- .../bindings/phy/brcm,ns2-drd-phy.yaml | 62 +++++++++++++++++++ 2 files changed, 62 insertions(+), 30 deletions(-) delete mode 100644 Documentation/devicetree/bindings/phy/brcm,ns2-drd-phy.txt create mode 100644 Documentation/devicetree/bindings/phy/brcm,ns2-drd-phy.yaml diff --git a/Documentation/devicetree/bindings/phy/brcm,ns2-drd-phy.txt b/Documentation/devicetree/bindings/phy/brcm,ns2-drd-phy.txt deleted file mode 100644 index 04f063aa7883..000000000000 --- a/Documentation/devicetree/bindings/phy/brcm,ns2-drd-phy.txt +++ /dev/null @@ -1,30 +0,0 @@ -BROADCOM NORTHSTAR2 USB2 (DUAL ROLE DEVICE) PHY - -Required properties: - - compatible: brcm,ns2-drd-phy - - reg: offset and length of the NS2 PHY related registers. - - reg-names - The below registers must be provided. - icfg - for DRD ICFG configurations - rst-ctrl - for DRD IDM reset - crmu-ctrl - for CRMU core vdd, PHY and PHY PLL reset - usb2-strap - for port over current polarity reversal - - #phy-cells: Must be 0. No args required. - - vbus-gpios: vbus gpio binding - - id-gpios: id gpio binding - -Refer to phy/phy-bindings.txt for the generic PHY binding properties - -Example: - usbdrd_phy: phy@66000960 { - #phy-cells = <0>; - compatible = "brcm,ns2-drd-phy"; - reg = <0x66000960 0x24>, - <0x67012800 0x4>, - <0x6501d148 0x4>, - <0x664d0700 0x4>; - reg-names = "icfg", "rst-ctrl", - "crmu-ctrl", "usb2-strap"; - id-gpios = <&gpio_g 30 0>; - vbus-gpios = <&gpio_g 31 0>; - }; diff --git a/Documentation/devicetree/bindings/phy/brcm,ns2-drd-phy.yaml b/Documentation/devicetree/bindings/phy/brcm,ns2-drd-phy.yaml new file mode 100644 index 000000000000..1fab97de5c0d --- /dev/null +++ b/Documentation/devicetree/bindings/phy/brcm,ns2-drd-phy.yaml @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/brcm,ns2-drd-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Broadcom Northstar2 USB2 Dual Role Device PHY + +maintainers: + - Florian Fainelli + - Hauke Mehrtens + - Rafał Miłecki + +properties: + compatible: + const: brcm,ns2-drd-phy + + reg: + items: + - description: DRD ICFG configurations + - description: DRD IDM reset + - description: CRMU core vdd, PHY and PHY PLL reset + - description: Port over current polarity reversal + + reg-names: + items: + - const: icfg + - const: rst-ctrl + - const: crmu-ctrl + - const: usb2-strap + + '#phy-cells': + const: 0 + + id-gpios: + maxItems: 1 + description: ID GPIO line + + vbus-gpios: + maxItems: 1 + description: VBUS GPIO line + +required: + - '#phy-cells' + - compatible + - reg + - reg-names + - id-gpios + - vbus-gpios + +additionalProperties: false + +examples: + - | + phy@66000960 { + #phy-cells = <0>; + compatible = "brcm,ns2-drd-phy"; + reg = <0x66000960 0x24>, <0x67012800 0x4>, <0x6501d148 0x4>, <0x664d0700 0x4>; + reg-names = "icfg", "rst-ctrl", "crmu-ctrl", "usb2-strap"; + id-gpios = <&gpio_g 30 0>; + vbus-gpios = <&gpio_g 31 0>; + }; From 6725c334e94a16ac141f23a3aa59cab7eb52cb6b Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:25:07 -0500 Subject: [PATCH 0136/2411] dt-bindings: phy: Convert brcm,sr-pcie-phy to DT schema Convert the Broadcom Stingray PCIe PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250607212508.741193-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../bindings/phy/brcm,sr-pcie-phy.txt | 41 ----------------- .../bindings/phy/brcm,sr-pcie-phy.yaml | 46 +++++++++++++++++++ 2 files changed, 46 insertions(+), 41 deletions(-) delete mode 100644 Documentation/devicetree/bindings/phy/brcm,sr-pcie-phy.txt create mode 100644 Documentation/devicetree/bindings/phy/brcm,sr-pcie-phy.yaml diff --git a/Documentation/devicetree/bindings/phy/brcm,sr-pcie-phy.txt b/Documentation/devicetree/bindings/phy/brcm,sr-pcie-phy.txt deleted file mode 100644 index e8d82286beb9..000000000000 --- a/Documentation/devicetree/bindings/phy/brcm,sr-pcie-phy.txt +++ /dev/null @@ -1,41 +0,0 @@ -Broadcom Stingray PCIe PHY - -Required properties: -- compatible: must be "brcm,sr-pcie-phy" -- reg: base address and length of the PCIe SS register space -- brcm,sr-cdru: phandle to the CDRU syscon node -- brcm,sr-mhb: phandle to the MHB syscon node -- #phy-cells: Must be 1, denotes the PHY index - -For PAXB based root complex, one can have a configuration of up to 8 PHYs -PHY index goes from 0 to 7 - -For the internal PAXC based root complex, PHY index is always 8 - -Example: - mhb: syscon@60401000 { - compatible = "brcm,sr-mhb", "syscon"; - reg = <0 0x60401000 0 0x38c>; - }; - - cdru: syscon@6641d000 { - compatible = "brcm,sr-cdru", "syscon"; - reg = <0 0x6641d000 0 0x400>; - }; - - pcie_phy: phy@40000000 { - compatible = "brcm,sr-pcie-phy"; - reg = <0 0x40000000 0 0x800>; - brcm,sr-cdru = <&cdru>; - brcm,sr-mhb = <&mhb>; - #phy-cells = <1>; - }; - - /* users of the PCIe PHY */ - - pcie0: pcie@48000000 { - ... - ... - phys = <&pcie_phy 0>; - phy-names = "pcie-phy"; - }; diff --git a/Documentation/devicetree/bindings/phy/brcm,sr-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/brcm,sr-pcie-phy.yaml new file mode 100644 index 000000000000..60ccc0813ed5 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/brcm,sr-pcie-phy.yaml @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/brcm,sr-pcie-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Broadcom Stingray PCIe PHY + +maintainers: + - Ray Jui + +description: > + For PAXB based root complex, one can have a configuration of up to 8 PHYs. + PHY index goes from 0 to 7. + + For the internal PAXC based root complex, PHY index is always 8. + +properties: + compatible: + const: brcm,sr-pcie-phy + + reg: + maxItems: 1 + + '#phy-cells': + const: 1 + + brcm,sr-cdru: + description: phandle to the CDRU syscon node + $ref: /schemas/types.yaml#/definitions/phandle + + brcm,sr-mhb: + description: phandle to the MHB syscon node + $ref: /schemas/types.yaml#/definitions/phandle + +additionalProperties: false + +examples: + - | + phy@40000000 { + compatible = "brcm,sr-pcie-phy"; + reg = <0x40000000 0x800>; + brcm,sr-cdru = <&cdru>; + brcm,sr-mhb = <&mhb>; + #phy-cells = <1>; + }; From 1fac100a4dec0fd96dc404561d1418aa20de441f Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:25:18 -0500 Subject: [PATCH 0137/2411] dt-bindings: phy: Convert hisilicon,hix5hd2-sata-phy to DT schema Convert the HiSilicon HIX5HD2 SATA PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250607212520.741588-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../phy/hisilicon,hix5hd2-sata-phy.yaml | 48 +++++++++++++++++++ .../devicetree/bindings/phy/hix5hd2-phy.txt | 22 --------- 2 files changed, 48 insertions(+), 22 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/hisilicon,hix5hd2-sata-phy.yaml delete mode 100644 Documentation/devicetree/bindings/phy/hix5hd2-phy.txt diff --git a/Documentation/devicetree/bindings/phy/hisilicon,hix5hd2-sata-phy.yaml b/Documentation/devicetree/bindings/phy/hisilicon,hix5hd2-sata-phy.yaml new file mode 100644 index 000000000000..2993dd6b40a8 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/hisilicon,hix5hd2-sata-phy.yaml @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/hisilicon,hix5hd2-sata-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: HiSilicon hix5hd2 SATA PHY + +maintainers: + - Jiancheng Xue + +properties: + compatible: + const: hisilicon,hix5hd2-sata-phy + + reg: + maxItems: 1 + + '#phy-cells': + const: 0 + + hisilicon,peripheral-syscon: + description: Phandle of syscon used to control peripheral + $ref: /schemas/types.yaml#/definitions/phandle + + hisilicon,power-reg: + description: Offset and bit number within peripheral-syscon register controlling SATA power supply + $ref: /schemas/types.yaml#/definitions/uint32-array + items: + - description: Offset within peripheral-syscon register + - description: Bit number controlling SATA power supply + +required: + - compatible + - reg + - '#phy-cells' + +additionalProperties: false + +examples: + - | + phy@f9900000 { + compatible = "hisilicon,hix5hd2-sata-phy"; + reg = <0xf9900000 0x10000>; + #phy-cells = <0>; + hisilicon,peripheral-syscon = <&peripheral_ctrl>; + hisilicon,power-reg = <0x8 10>; + }; diff --git a/Documentation/devicetree/bindings/phy/hix5hd2-phy.txt b/Documentation/devicetree/bindings/phy/hix5hd2-phy.txt deleted file mode 100644 index 296168b74d24..000000000000 --- a/Documentation/devicetree/bindings/phy/hix5hd2-phy.txt +++ /dev/null @@ -1,22 +0,0 @@ -Hisilicon hix5hd2 SATA PHY ------------------------ - -Required properties: -- compatible: should be "hisilicon,hix5hd2-sata-phy" -- reg: offset and length of the PHY registers -- #phy-cells: must be 0 -Refer to phy/phy-bindings.txt for the generic PHY binding properties - -Optional Properties: -- hisilicon,peripheral-syscon: phandle of syscon used to control peripheral. -- hisilicon,power-reg: offset and bit number within peripheral-syscon, - register of controlling sata power supply. - -Example: - sata_phy: phy@f9900000 { - compatible = "hisilicon,hix5hd2-sata-phy"; - reg = <0xf9900000 0x10000>; - #phy-cells = <0>; - hisilicon,peripheral-syscon = <&peripheral_ctrl>; - hisilicon,power-reg = <0x8 10>; - }; From 40f1d8214257c4a2eaa07ed4fd3217c5c3cbfc70 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:25:23 -0500 Subject: [PATCH 0138/2411] dt-bindings: phy: Convert hisilicon,hi6220-usb-phy to DT schema Convert the HiSilicon HI6220 USB PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250607212524.741770-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../phy/hisilicon,hi6220-usb-phy.yaml | 35 +++++++++++++++++++ .../bindings/phy/phy-hi6220-usb.txt | 16 --------- 2 files changed, 35 insertions(+), 16 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/hisilicon,hi6220-usb-phy.yaml delete mode 100644 Documentation/devicetree/bindings/phy/phy-hi6220-usb.txt diff --git a/Documentation/devicetree/bindings/phy/hisilicon,hi6220-usb-phy.yaml b/Documentation/devicetree/bindings/phy/hisilicon,hi6220-usb-phy.yaml new file mode 100644 index 000000000000..376586a666e7 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/hisilicon,hi6220-usb-phy.yaml @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/hisilicon,hi6220-usb-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Hisilicon hi6220 USB PHY + +maintainers: + - Zhangfei Gao + +properties: + compatible: + const: hisilicon,hi6220-usb-phy + + '#phy-cells': + const: 0 + + phy-supply: + description: PHY power supply. + + hisilicon,peripheral-syscon: + description: Phandle to the system controller for PHY control. + $ref: /schemas/types.yaml#/definitions/phandle + +additionalProperties: false + +examples: + - | + usbphy { + compatible = "hisilicon,hi6220-usb-phy"; + #phy-cells = <0>; + phy-supply = <&fixed_5v_hub>; + hisilicon,peripheral-syscon = <&sys_ctrl>; + }; diff --git a/Documentation/devicetree/bindings/phy/phy-hi6220-usb.txt b/Documentation/devicetree/bindings/phy/phy-hi6220-usb.txt deleted file mode 100644 index f17a56e2152f..000000000000 --- a/Documentation/devicetree/bindings/phy/phy-hi6220-usb.txt +++ /dev/null @@ -1,16 +0,0 @@ -Hisilicon hi6220 usb PHY ------------------------ - -Required properties: -- compatible: should be "hisilicon,hi6220-usb-phy" -- #phy-cells: must be 0 -- hisilicon,peripheral-syscon: phandle of syscon used to control phy. -Refer to phy/phy-bindings.txt for the generic PHY binding properties - -Example: - usb_phy: usbphy { - compatible = "hisilicon,hi6220-usb-phy"; - #phy-cells = <0>; - phy-supply = <&fixed_5v_hub>; - hisilicon,peripheral-syscon = <&sys_ctrl>; - }; From 7cc5efcd948f3ea768facd7f8472d302466422e8 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:25:26 -0500 Subject: [PATCH 0139/2411] dt-bindings: phy: Convert hisilicon,inno-usb2-phy to DT schema Convert the HiSilicon INNO USB2 PHY binding to DT schema format. It's a straight forward conversion. Add the undocumented "hisilicon,hi3798mv100-usb2-phy" compatible. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250607212527.741915-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../bindings/phy/hisilicon,inno-usb2-phy.yaml | 93 +++++++++++++++++++ .../bindings/phy/phy-hisi-inno-usb2.txt | 71 -------------- 2 files changed, 93 insertions(+), 71 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/hisilicon,inno-usb2-phy.yaml delete mode 100644 Documentation/devicetree/bindings/phy/phy-hisi-inno-usb2.txt diff --git a/Documentation/devicetree/bindings/phy/hisilicon,inno-usb2-phy.yaml b/Documentation/devicetree/bindings/phy/hisilicon,inno-usb2-phy.yaml new file mode 100644 index 000000000000..51ea0e54ce35 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/hisilicon,inno-usb2-phy.yaml @@ -0,0 +1,93 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/hisilicon,inno-usb2-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: HiSilicon INNO USB2 PHY + +maintainers: + - Pengcheng Li + +description: + The INNO USB2 PHY device should be a child node of peripheral controller that + contains the PHY configuration register, and each device supports up to 2 PHY + ports which are represented as child nodes of INNO USB2 PHY device. + +properties: + compatible: + enum: + - hisilicon,hi3798cv200-usb2-phy + - hisilicon,hi3798mv100-usb2-phy + - hisilicon,inno-usb2-phy + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + resets: + maxItems: 1 + + "#address-cells": + const: 1 + + "#size-cells": + const: 0 + +patternProperties: + "^phy@[0-1]$": + description: PHY port subnode + type: object + additionalProperties: false + + properties: + reg: + maximum: 1 + + "#phy-cells": + const: 0 + + resets: + maxItems: 1 + + required: + - reg + - "#phy-cells" + - resets + +required: + - compatible + - reg + - clocks + - resets + - "#address-cells" + - "#size-cells" + +additionalProperties: false + +examples: + - | + #include + + usb2-phy@120 { + compatible = "hisilicon,hi3798cv200-usb2-phy"; + reg = <0x120 0x4>; + clocks = <&crg HISTB_USB2_PHY1_REF_CLK>; + resets = <&crg 0xbc 4>; + #address-cells = <1>; + #size-cells = <0>; + + phy@0 { + reg = <0>; + #phy-cells = <0>; + resets = <&crg 0xbc 8>; + }; + + phy@1 { + reg = <1>; + #phy-cells = <0>; + resets = <&crg 0xbc 9>; + }; + }; diff --git a/Documentation/devicetree/bindings/phy/phy-hisi-inno-usb2.txt b/Documentation/devicetree/bindings/phy/phy-hisi-inno-usb2.txt deleted file mode 100644 index 104953e849e7..000000000000 --- a/Documentation/devicetree/bindings/phy/phy-hisi-inno-usb2.txt +++ /dev/null @@ -1,71 +0,0 @@ -Device tree bindings for HiSilicon INNO USB2 PHY - -Required properties: -- compatible: Should be one of the following strings: - "hisilicon,inno-usb2-phy", - "hisilicon,hi3798cv200-usb2-phy". -- reg: Should be the address space for PHY configuration register in peripheral - controller, e.g. PERI_USB0 for USB 2.0 PHY01 on Hi3798CV200 SoC. -- clocks: The phandle and clock specifier pair for INNO USB2 PHY device - reference clock. -- resets: The phandle and reset specifier pair for INNO USB2 PHY device reset - signal. -- #address-cells: Must be 1. -- #size-cells: Must be 0. - -The INNO USB2 PHY device should be a child node of peripheral controller that -contains the PHY configuration register, and each device supports up to 2 PHY -ports which are represented as child nodes of INNO USB2 PHY device. - -Required properties for PHY port node: -- reg: The PHY port instance number. -- #phy-cells: Defined by generic PHY bindings. Must be 0. -- resets: The phandle and reset specifier pair for PHY port reset signal. - -Refer to phy/phy-bindings.txt for the generic PHY binding properties - -Example: - -perictrl: peripheral-controller@8a20000 { - compatible = "hisilicon,hi3798cv200-perictrl", "simple-mfd"; - reg = <0x8a20000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - ranges = <0x0 0x8a20000 0x1000>; - - usb2_phy1: usb2-phy@120 { - compatible = "hisilicon,hi3798cv200-usb2-phy"; - reg = <0x120 0x4>; - clocks = <&crg HISTB_USB2_PHY1_REF_CLK>; - resets = <&crg 0xbc 4>; - #address-cells = <1>; - #size-cells = <0>; - - usb2_phy1_port0: phy@0 { - reg = <0>; - #phy-cells = <0>; - resets = <&crg 0xbc 8>; - }; - - usb2_phy1_port1: phy@1 { - reg = <1>; - #phy-cells = <0>; - resets = <&crg 0xbc 9>; - }; - }; - - usb2_phy2: usb2-phy@124 { - compatible = "hisilicon,hi3798cv200-usb2-phy"; - reg = <0x124 0x4>; - clocks = <&crg HISTB_USB2_PHY2_REF_CLK>; - resets = <&crg 0xbc 6>; - #address-cells = <1>; - #size-cells = <0>; - - usb2_phy2_port0: phy@0 { - reg = <0>; - #phy-cells = <0>; - resets = <&crg 0xbc 10>; - }; - }; -}; From 66acaf8f6b0bcc273f8356b2a77baa90b177014c Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:25:30 -0500 Subject: [PATCH 0140/2411] dt-bindings: phy: Convert img,pistachio-usb-phy to DT schema Convert the Imagination Pistachio USB PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250607212531.742082-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../bindings/phy/img,pistachio-usb-phy.yaml | 62 +++++++++++++++++++ .../bindings/phy/pistachio-usb-phy.txt | 29 --------- 2 files changed, 62 insertions(+), 29 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/img,pistachio-usb-phy.yaml delete mode 100644 Documentation/devicetree/bindings/phy/pistachio-usb-phy.txt diff --git a/Documentation/devicetree/bindings/phy/img,pistachio-usb-phy.yaml b/Documentation/devicetree/bindings/phy/img,pistachio-usb-phy.yaml new file mode 100644 index 000000000000..bcc19bc68297 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/img,pistachio-usb-phy.yaml @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/img,pistachio-usb-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Imagination Pistachio USB PHY + +maintainers: + - Andrew Bresticker + +properties: + compatible: + const: img,pistachio-usb-phy + + clocks: + maxItems: 1 + + clock-names: + items: + - const: usb_phy + + '#phy-cells': + const: 0 + + phy-supply: + description: USB VBUS supply. Must supply 5.0V. + + img,refclk: + description: + Reference clock source for the USB PHY. See + for valid values. + $ref: /schemas/types.yaml#/definitions/uint32 + + img,cr-top: + description: CR_TOP syscon phandle. + $ref: /schemas/types.yaml#/definitions/phandle + +required: + - compatible + - clocks + - clock-names + - '#phy-cells' + - img,refclk + - img,cr-top + +additionalProperties: false + +examples: + - | + #include + #include + + usb-phy { + compatible = "img,pistachio-usb-phy"; + clocks = <&clk_core CLK_USB_PHY>; + clock-names = "usb_phy"; + #phy-cells = <0>; + phy-supply = <&usb_vbus>; + img,refclk = ; + img,cr-top = <&cr_top>; + }; diff --git a/Documentation/devicetree/bindings/phy/pistachio-usb-phy.txt b/Documentation/devicetree/bindings/phy/pistachio-usb-phy.txt deleted file mode 100644 index c7970c07ee32..000000000000 --- a/Documentation/devicetree/bindings/phy/pistachio-usb-phy.txt +++ /dev/null @@ -1,29 +0,0 @@ -IMG Pistachio USB PHY -===================== - -Required properties: --------------------- - - compatible: Must be "img,pistachio-usb-phy". - - #phy-cells: Must be 0. See ./phy-bindings.txt for details. - - clocks: Must contain an entry for each entry in clock-names. - See ../clock/clock-bindings.txt for details. - - clock-names: Must include "usb_phy". - - img,cr-top: Must contain a phandle to the CR_TOP syscon node. - - img,refclk: Indicates the reference clock source for the USB PHY. - See for a list of valid values. - -Optional properties: --------------------- - - phy-supply: USB VBUS supply. Must supply 5.0V. - -Example: --------- -usb_phy: usb-phy { - compatible = "img,pistachio-usb-phy"; - clocks = <&clk_core CLK_USB_PHY>; - clock-names = "usb_phy"; - phy-supply = <&usb_vbus>; - img,refclk = ; - img,cr-top = <&cr_top>; - #phy-cells = <0>; -}; From 85d6af3b73d4741b2a0d101e6bfac4bfd529e5b5 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:25:36 -0500 Subject: [PATCH 0141/2411] dt-bindings: phy: Convert lantiq,ase-usb2-phy to DT schema Convert the Lantiq XWAY USB PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250607212537.742287-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../bindings/phy/lantiq,ase-usb2-phy.yaml | 71 +++++++++++++++++++ .../bindings/phy/phy-lantiq-rcu-usb2.txt | 40 ----------- 2 files changed, 71 insertions(+), 40 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/lantiq,ase-usb2-phy.yaml delete mode 100644 Documentation/devicetree/bindings/phy/phy-lantiq-rcu-usb2.txt diff --git a/Documentation/devicetree/bindings/phy/lantiq,ase-usb2-phy.yaml b/Documentation/devicetree/bindings/phy/lantiq,ase-usb2-phy.yaml new file mode 100644 index 000000000000..99b5da705ca4 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/lantiq,ase-usb2-phy.yaml @@ -0,0 +1,71 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/lantiq,ase-usb2-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Lantiq XWAY SoC RCU USB 1.1/2.0 PHY + +maintainers: + - Hauke Mehrtens + +description: + This node has to be a sub node of the Lantiq RCU block. + +properties: + compatible: + items: + - enum: + - lantiq,ase-usb2-phy + - lantiq,danube-usb2-phy + - lantiq,xrx100-usb2-phy + - lantiq,xrx200-usb2-phy + - lantiq,xrx300-usb2-phy + + reg: + items: + - description: Offset of the USB PHY configuration register + - description: Offset of the USB Analog configuration register + + clocks: + maxItems: 1 + + clock-names: + items: + - const: phy + + resets: + minItems: 1 + maxItems: 2 + + reset-names: + minItems: 1 + items: + - enum: [ phy, ctrl ] + - const: ctrl + + '#phy-cells': + const: 0 + +required: + - compatible + - reg + - clocks + - clock-names + - resets + - reset-names + - '#phy-cells' + +additionalProperties: false + +examples: + - | + usb2-phy@18 { + compatible = "lantiq,xrx200-usb2-phy"; + reg = <0x18 4>, <0x38 4>; + clocks = <&pmu 1>; + clock-names = "phy"; + resets = <&reset1 4 4>, <&reset0 4 4>; + reset-names = "phy", "ctrl"; + #phy-cells = <0>; + }; diff --git a/Documentation/devicetree/bindings/phy/phy-lantiq-rcu-usb2.txt b/Documentation/devicetree/bindings/phy/phy-lantiq-rcu-usb2.txt deleted file mode 100644 index 643948b6b576..000000000000 --- a/Documentation/devicetree/bindings/phy/phy-lantiq-rcu-usb2.txt +++ /dev/null @@ -1,40 +0,0 @@ -Lantiq XWAY SoC RCU USB 1.1/2.0 PHY binding -=========================================== - -This binding describes the USB PHY hardware provided by the RCU module on the -Lantiq XWAY SoCs. - -This node has to be a sub node of the Lantiq RCU block. - -------------------------------------------------------------------------------- -Required properties (controller (parent) node): -- compatible : Should be one of - "lantiq,ase-usb2-phy" - "lantiq,danube-usb2-phy" - "lantiq,xrx100-usb2-phy" - "lantiq,xrx200-usb2-phy" - "lantiq,xrx300-usb2-phy" -- reg : Defines the following sets of registers in the parent - syscon device - - Offset of the USB PHY configuration register - - Offset of the USB Analog configuration - register (only for xrx200 and xrx200) -- clocks : References to the (PMU) "phy" clk gate. -- clock-names : Must be "phy" -- resets : References to the RCU USB configuration reset bits. -- reset-names : Must be one of the following: - "phy" (optional) - "ctrl" (shared) - -------------------------------------------------------------------------------- -Example for the USB PHYs on an xRX200 SoC: - usb_phy0: usb2-phy@18 { - compatible = "lantiq,xrx200-usb2-phy"; - reg = <0x18 4>, <0x38 4>; - - clocks = <&pmu PMU_GATE_USB0_PHY>; - clock-names = "phy"; - resets = <&reset1 4 4>, <&reset0 4 4>; - reset-names = "phy", "ctrl"; - #phy-cells = <0>; - }; From f4b522ce6ac602bb584c721724b626e64e0abcc1 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:25:44 -0500 Subject: [PATCH 0142/2411] dt-bindings: phy: Convert marvell,berlin2-sata-phy to DT schema Convert the Marvell Berlin2 SATA PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250607212545.742617-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../bindings/phy/berlin-sata-phy.txt | 36 --------- .../phy/marvell,berlin2-sata-phy.yaml | 76 +++++++++++++++++++ 2 files changed, 76 insertions(+), 36 deletions(-) delete mode 100644 Documentation/devicetree/bindings/phy/berlin-sata-phy.txt create mode 100644 Documentation/devicetree/bindings/phy/marvell,berlin2-sata-phy.yaml diff --git a/Documentation/devicetree/bindings/phy/berlin-sata-phy.txt b/Documentation/devicetree/bindings/phy/berlin-sata-phy.txt deleted file mode 100644 index c0155f842f62..000000000000 --- a/Documentation/devicetree/bindings/phy/berlin-sata-phy.txt +++ /dev/null @@ -1,36 +0,0 @@ -Berlin SATA PHY ---------------- - -Required properties: -- compatible: should be one of - "marvell,berlin2-sata-phy" - "marvell,berlin2q-sata-phy" -- address-cells: should be 1 -- size-cells: should be 0 -- phy-cells: from the generic PHY bindings, must be 1 -- reg: address and length of the register -- clocks: reference to the clock entry - -Sub-nodes: -Each PHY should be represented as a sub-node. - -Sub-nodes required properties: -- reg: the PHY number - -Example: - sata_phy: phy@f7e900a0 { - compatible = "marvell,berlin2q-sata-phy"; - reg = <0xf7e900a0 0x200>; - clocks = <&chip CLKID_SATA>; - #address-cells = <1>; - #size-cells = <0>; - #phy-cells = <1>; - - sata-phy@0 { - reg = <0>; - }; - - sata-phy@1 { - reg = <1>; - }; - }; diff --git a/Documentation/devicetree/bindings/phy/marvell,berlin2-sata-phy.yaml b/Documentation/devicetree/bindings/phy/marvell,berlin2-sata-phy.yaml new file mode 100644 index 000000000000..6fc9ff96e682 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/marvell,berlin2-sata-phy.yaml @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/marvell,berlin2-sata-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Marvell Berlin SATA PHY + +maintainers: + - Antoine Tenart + +properties: + compatible: + enum: + - marvell,berlin2-sata-phy + - marvell,berlin2q-sata-phy + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + '#address-cells': + const: 1 + + '#size-cells': + const: 0 + + '#phy-cells': + const: 1 + +patternProperties: + '^sata-phy@[0-1]$': + description: A SATA PHY sub-node. + type: object + additionalProperties: false + + properties: + reg: + maximum: 1 + description: PHY index number. + + required: + - reg + +required: + - compatible + - reg + - clocks + - '#address-cells' + - '#size-cells' + - '#phy-cells' + +additionalProperties: false + +examples: + - | + #include + + phy@f7e900a0 { + compatible = "marvell,berlin2q-sata-phy"; + reg = <0xf7e900a0 0x200>; + clocks = <&chip CLKID_SATA>; + #address-cells = <1>; + #size-cells = <0>; + #phy-cells = <1>; + + sata-phy@0 { + reg = <0>; + }; + + sata-phy@1 { + reg = <1>; + }; + }; From 08a9bc357aa06ae7ca286eef698ba02c2396c5c9 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:25:53 -0500 Subject: [PATCH 0143/2411] dt-bindings: phy: Convert marvell,berlin2-usb-phy to DT schema Convert the Marvell Berlin2 USB PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250607212554.742884-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../bindings/phy/berlin-usb-phy.txt | 16 ------- .../bindings/phy/marvell,berlin2-usb-phy.yaml | 42 +++++++++++++++++++ 2 files changed, 42 insertions(+), 16 deletions(-) delete mode 100644 Documentation/devicetree/bindings/phy/berlin-usb-phy.txt create mode 100644 Documentation/devicetree/bindings/phy/marvell,berlin2-usb-phy.yaml diff --git a/Documentation/devicetree/bindings/phy/berlin-usb-phy.txt b/Documentation/devicetree/bindings/phy/berlin-usb-phy.txt deleted file mode 100644 index be33780f668e..000000000000 --- a/Documentation/devicetree/bindings/phy/berlin-usb-phy.txt +++ /dev/null @@ -1,16 +0,0 @@ -* Marvell Berlin USB PHY - -Required properties: -- compatible: "marvell,berlin2-usb-phy" or "marvell,berlin2cd-usb-phy" -- reg: base address and length of the registers -- #phys-cells: should be 0 -- resets: reference to the reset controller - -Example: - - usb-phy@f774000 { - compatible = "marvell,berlin2-usb-phy"; - reg = <0xf774000 0x128>; - #phy-cells = <0>; - resets = <&chip 0x104 14>; - }; diff --git a/Documentation/devicetree/bindings/phy/marvell,berlin2-usb-phy.yaml b/Documentation/devicetree/bindings/phy/marvell,berlin2-usb-phy.yaml new file mode 100644 index 000000000000..b401e12a600c --- /dev/null +++ b/Documentation/devicetree/bindings/phy/marvell,berlin2-usb-phy.yaml @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/marvell,berlin2-usb-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Marvell Berlin USB PHY + +maintainers: + - Antoine Tenart + +properties: + compatible: + enum: + - marvell,berlin2-usb-phy + - marvell,berlin2cd-usb-phy + + reg: + maxItems: 1 + + "#phy-cells": + const: 0 + + resets: + maxItems: 1 + +required: + - compatible + - reg + - "#phy-cells" + - resets + +additionalProperties: false + +examples: + - | + usb-phy@f774000 { + compatible = "marvell,berlin2-usb-phy"; + reg = <0xf774000 0x128>; + #phy-cells = <0>; + resets = <&chip 0x104 14>; + }; From 50355ac70d4f104e2f82bfbd0658c129027ebb37 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:26:03 -0500 Subject: [PATCH 0144/2411] dt-bindings: phy: Convert marvell,comphy-cp110 to DT schema Convert the Marvell CP110 combo PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250607212605.743176-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../bindings/phy/marvell,comphy-cp110.yaml | 154 ++++++++++++++++++ .../bindings/phy/phy-mvebu-comphy.txt | 94 ----------- MAINTAINERS | 2 +- 3 files changed, 155 insertions(+), 95 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/marvell,comphy-cp110.yaml delete mode 100644 Documentation/devicetree/bindings/phy/phy-mvebu-comphy.txt diff --git a/Documentation/devicetree/bindings/phy/marvell,comphy-cp110.yaml b/Documentation/devicetree/bindings/phy/marvell,comphy-cp110.yaml new file mode 100644 index 000000000000..d9501df42886 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/marvell,comphy-cp110.yaml @@ -0,0 +1,154 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/marvell,comphy-cp110.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Marvell MVEBU COMPHY Controller + +maintainers: + - Miquel Raynal + +description: > + COMPHY controllers can be found on the following Marvell MVEBU SoCs: + + * Armada 7k/8k (on the CP110) + * Armada 3700 + + It provides a number of shared PHYs used by various interfaces (network, SATA, + USB, PCIe...). + +properties: + compatible: + enum: + - marvell,comphy-cp110 + - marvell,comphy-a3700 + + reg: + minItems: 1 + items: + - description: Generic COMPHY registers + - description: Lane 1 (PCIe/GbE) registers (Armada 3700) + - description: Lane 0 (USB3/GbE) registers (Armada 3700) + - description: Lane 2 (SATA/USB3) registers (Armada 3700) + + reg-names: + minItems: 1 + items: + - const: comphy + - const: lane1_pcie_gbe + - const: lane0_usb3_gbe + - const: lane2_sata_usb3 + + '#address-cells': + const: 1 + + '#size-cells': + const: 0 + + clocks: + maxItems: 3 + description: Reference clocks for CP110; MG clock, MG Core clock, AXI clock + + clock-names: + items: + - const: mg_clk + - const: mg_core_clk + - const: axi_clk + + marvell,system-controller: + description: Phandle to the Marvell system controller (CP110 only) + $ref: /schemas/types.yaml#/definitions/phandle + +patternProperties: + '^phy@[0-2]$': + description: A COMPHY lane child node + type: object + additionalProperties: false + + properties: + reg: + description: COMPHY lane number + + '#phy-cells': + const: 1 + + required: + - reg + - '#phy-cells' + +required: + - compatible + - reg + +additionalProperties: false + +allOf: + - if: + properties: + compatible: + const: marvell,comphy-a3700 + + then: + properties: + clocks: false + clock-names: false + + required: + - reg-names + + else: + required: + - marvell,system-controller + +examples: + - | + phy@120000 { + compatible = "marvell,comphy-cp110"; + reg = <0x120000 0x6000>; + clocks = <&clk 1 5>, <&clk 1 6>, <&clk 1 18>; + clock-names = "mg_clk", "mg_core_clk", "axi_clk"; + #address-cells = <1>; + #size-cells = <0>; + marvell,system-controller = <&syscon0>; + + phy@0 { + reg = <0>; + #phy-cells = <1>; + }; + + phy@1 { + reg = <1>; + #phy-cells = <1>; + }; + }; + + - | + phy@18300 { + compatible = "marvell,comphy-a3700"; + reg = <0x18300 0x300>, + <0x1F000 0x400>, + <0x5C000 0x400>, + <0xe0178 0x8>; + reg-names = "comphy", + "lane1_pcie_gbe", + "lane0_usb3_gbe", + "lane2_sata_usb3"; + #address-cells = <1>; + #size-cells = <0>; + + comphy0: phy@0 { + reg = <0>; + #phy-cells = <1>; + }; + + comphy1: phy@1 { + reg = <1>; + #phy-cells = <1>; + }; + + comphy2: phy@2 { + reg = <2>; + #phy-cells = <1>; + }; + }; diff --git a/Documentation/devicetree/bindings/phy/phy-mvebu-comphy.txt b/Documentation/devicetree/bindings/phy/phy-mvebu-comphy.txt deleted file mode 100644 index 5ffd0f55d010..000000000000 --- a/Documentation/devicetree/bindings/phy/phy-mvebu-comphy.txt +++ /dev/null @@ -1,94 +0,0 @@ -MVEBU comphy drivers --------------------- - -COMPHY controllers can be found on the following Marvell MVEBU SoCs: -* Armada 7k/8k (on the CP110) -* Armada 3700 -It provides a number of shared PHYs used by various interfaces (network, SATA, -USB, PCIe...). - -Required properties: - -- compatible: should be one of: - * "marvell,comphy-cp110" for Armada 7k/8k - * "marvell,comphy-a3700" for Armada 3700 -- reg: should contain the COMPHY register(s) location(s) and length(s). - * 1 entry for Armada 7k/8k - * 4 entries for Armada 3700 along with the corresponding reg-names - properties, memory areas are: - * Generic COMPHY registers - * Lane 1 (PCIe/GbE) - * Lane 0 (USB3/GbE) - * Lane 2 (SATA/USB3) -- marvell,system-controller: should contain a phandle to the system - controller node (only for Armada 7k/8k) -- #address-cells: should be 1. -- #size-cells: should be 0. - -Optional properlties: - -- clocks: pointers to the reference clocks for this device (CP110 only), - consequently: MG clock, MG Core clock, AXI clock. -- clock-names: names of used clocks for CP110 only, must be : - "mg_clk", "mg_core_clk" and "axi_clk". - -A sub-node is required for each comphy lane provided by the comphy. - -Required properties (child nodes): - -- reg: COMPHY lane number. -- #phy-cells : from the generic PHY bindings, must be 1. Defines the - input port to use for a given comphy lane. - -Examples: - - CP11X_LABEL(comphy): phy@120000 { - compatible = "marvell,comphy-cp110"; - reg = <0x120000 0x6000>; - marvell,system-controller = <&CP11X_LABEL(syscon0)>; - clocks = <&CP11X_LABEL(clk) 1 5>, <&CP11X_LABEL(clk) 1 6>, - <&CP11X_LABEL(clk) 1 18>; - clock-names = "mg_clk", "mg_core_clk", "axi_clk"; - #address-cells = <1>; - #size-cells = <0>; - - CP11X_LABEL(comphy0): phy@0 { - reg = <0>; - #phy-cells = <1>; - }; - - CP11X_LABEL(comphy1): phy@1 { - reg = <1>; - #phy-cells = <1>; - }; - }; - - comphy: phy@18300 { - compatible = "marvell,comphy-a3700"; - reg = <0x18300 0x300>, - <0x1F000 0x400>, - <0x5C000 0x400>, - <0xe0178 0x8>; - reg-names = "comphy", - "lane1_pcie_gbe", - "lane0_usb3_gbe", - "lane2_sata_usb3"; - #address-cells = <1>; - #size-cells = <0>; - - - comphy0: phy@0 { - reg = <0>; - #phy-cells = <1>; - }; - - comphy1: phy@1 { - reg = <1>; - #phy-cells = <1>; - }; - - comphy2: phy@2 { - reg = <2>; - #phy-cells = <1>; - }; - }; diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..8b8828b5ac1b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14495,7 +14495,7 @@ MARVELL ARMADA 3700 PHY DRIVERS M: Miquel Raynal S: Maintained F: Documentation/devicetree/bindings/phy/marvell,armada-3700-utmi-phy.yaml -F: Documentation/devicetree/bindings/phy/phy-mvebu-comphy.txt +F: Documentation/devicetree/bindings/phy/marvell,comphy-cp110.yaml F: drivers/phy/marvell/phy-mvebu-a3700-comphy.c F: drivers/phy/marvell/phy-mvebu-a3700-utmi.c From fbcc4937636385208561c60a9a51ecb550528cc8 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:26:11 -0500 Subject: [PATCH 0145/2411] dt-bindings: phy: Convert marvell,mmp2-usb-phy to DT schema Convert the Marvell MMP2 USB PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250607212613.743515-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../bindings/phy/marvell,mmp2-usb-phy.yaml | 38 +++++++++++++++++++ .../devicetree/bindings/phy/phy-pxa-usb.txt | 18 --------- 2 files changed, 38 insertions(+), 18 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/marvell,mmp2-usb-phy.yaml delete mode 100644 Documentation/devicetree/bindings/phy/phy-pxa-usb.txt diff --git a/Documentation/devicetree/bindings/phy/marvell,mmp2-usb-phy.yaml b/Documentation/devicetree/bindings/phy/marvell,mmp2-usb-phy.yaml new file mode 100644 index 000000000000..2441c5fae550 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/marvell,mmp2-usb-phy.yaml @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/marvell,mmp2-usb-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Marvell MMP2/PXA USB PHY + +maintainers: + - Lubomir Rintel + +properties: + compatible: + enum: + - marvell,mmp2-usb-phy + - marvell,pxa910-usb-phy + - marvell,pxa168-usb-phy + + reg: + maxItems: 1 + + "#phy-cells": + const: 0 + +required: + - compatible + - "#phy-cells" + +additionalProperties: false + +examples: + - | + usbphy@d4207000 { + compatible = "marvell,mmp2-usb-phy"; + reg = <0xd4207000 0x40>; + #phy-cells = <0>; + status = "okay"; + }; diff --git a/Documentation/devicetree/bindings/phy/phy-pxa-usb.txt b/Documentation/devicetree/bindings/phy/phy-pxa-usb.txt deleted file mode 100644 index d80e36a77ec5..000000000000 --- a/Documentation/devicetree/bindings/phy/phy-pxa-usb.txt +++ /dev/null @@ -1,18 +0,0 @@ -Marvell PXA USB PHY -------------------- - -Required properties: -- compatible: one of: "marvell,mmp2-usb-phy", "marvell,pxa910-usb-phy", - "marvell,pxa168-usb-phy", -- #phy-cells: must be 0 - -Example: - usb-phy: usbphy@d4207000 { - compatible = "marvell,mmp2-usb-phy"; - reg = <0xd4207000 0x40>; - #phy-cells = <0>; - status = "okay"; - }; - -This document explains the device tree binding. For general -information about PHY subsystem refer to Documentation/driver-api/phy/phy.rst From 351d6b70c1c98c560c614b85c24e68cdb1ec8b1e Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:26:15 -0500 Subject: [PATCH 0146/2411] dt-bindings: phy: Convert motorola,cpcap-usb-phy to DT schema Convert the Motorola CPCAP PMIC USB PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250607212616.743674-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../bindings/mfd/motorola-cpcap.txt | 2 +- .../bindings/phy/motorola,cpcap-usb-phy.yaml | 107 ++++++++++++++++++ .../devicetree/bindings/phy/phy-cpcap-usb.txt | 40 ------- 3 files changed, 108 insertions(+), 41 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/motorola,cpcap-usb-phy.yaml delete mode 100644 Documentation/devicetree/bindings/phy/phy-cpcap-usb.txt diff --git a/Documentation/devicetree/bindings/mfd/motorola-cpcap.txt b/Documentation/devicetree/bindings/mfd/motorola-cpcap.txt index f00827c9b67f..18c3fc26ca93 100644 --- a/Documentation/devicetree/bindings/mfd/motorola-cpcap.txt +++ b/Documentation/devicetree/bindings/mfd/motorola-cpcap.txt @@ -19,7 +19,7 @@ which are described in the following files: - Documentation/devicetree/bindings/power/supply/cpcap-battery.yaml - Documentation/devicetree/bindings/power/supply/cpcap-charger.yaml - Documentation/devicetree/bindings/regulator/cpcap-regulator.txt -- Documentation/devicetree/bindings/phy/phy-cpcap-usb.txt +- Documentation/devicetree/bindings/phy/motorola,cpcap-usb-phy.yaml - Documentation/devicetree/bindings/input/cpcap-pwrbutton.txt - Documentation/devicetree/bindings/rtc/cpcap-rtc.txt - Documentation/devicetree/bindings/leds/leds-cpcap.txt diff --git a/Documentation/devicetree/bindings/phy/motorola,cpcap-usb-phy.yaml b/Documentation/devicetree/bindings/phy/motorola,cpcap-usb-phy.yaml new file mode 100644 index 000000000000..0febd04a61f4 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/motorola,cpcap-usb-phy.yaml @@ -0,0 +1,107 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/motorola,cpcap-usb-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Motorola CPCAP PMIC USB PHY + +maintainers: + - Tony Lindgren + +properties: + compatible: + enum: + - motorola,cpcap-usb-phy + - motorola,mapphone-cpcap-usb-phy + + '#phy-cells': + const: 0 + + interrupts: + description: CPCAP PMIC interrupts used by the USB PHY + items: + - description: id_ground interrupt + - description: id_float interrupt + - description: se0conn interrupt + - description: vbusvld interrupt + - description: sessvld interrupt + - description: sessend interrupt + - description: se1 interrupt + - description: dm interrupt + - description: dp interrupt + + interrupt-names: + description: Interrupt names + items: + - const: id_ground + - const: id_float + - const: se0conn + - const: vbusvld + - const: sessvld + - const: sessend + - const: se1 + - const: dm + - const: dp + + io-channels: + description: IIO ADC channels used by the USB PHY + items: + - description: vbus channel + - description: id channel + + io-channel-names: + items: + - const: vbus + - const: id + + vusb-supply: true + + pinctrl-names: + items: + - const: default + - const: ulpi + - const: utmi + - const: uart + + mode-gpios: + description: Optional GPIOs for configuring alternate modes + items: + - description: "mode selection GPIO #0" + - description: "mode selection GPIO #1" + +required: + - compatible + - '#phy-cells' + - interrupts-extended + - interrupt-names + - io-channels + - io-channel-names + - vusb-supply + +additionalProperties: false + +examples: + - | + #include + + phy { + compatible = "motorola,mapphone-cpcap-usb-phy"; + #phy-cells = <0>; + interrupts-extended = < + &cpcap 15 0 &cpcap 14 0 &cpcap 28 0 &cpcap 19 0 + &cpcap 18 0 &cpcap 17 0 &cpcap 16 0 &cpcap 49 0 + &cpcap 48 1 + >; + interrupt-names = "id_ground", "id_float", "se0conn", "vbusvld", + "sessvld", "sessend", "se1", "dm", "dp"; + io-channels = <&cpcap_adc 2>, <&cpcap_adc 7>; + io-channel-names = "vbus", "id"; + vusb-supply = <&vusb>; + pinctrl-0 = <&usb_gpio_mux_sel1 &usb_gpio_mux_sel2>; + pinctrl-1 = <&usb_ulpi_pins>; + pinctrl-2 = <&usb_utmi_pins>; + pinctrl-3 = <&uart3_pins>; + pinctrl-names = "default", "ulpi", "utmi", "uart"; + mode-gpios = <&gpio2 28 GPIO_ACTIVE_HIGH>, <&gpio1 0 GPIO_ACTIVE_HIGH>; + }; diff --git a/Documentation/devicetree/bindings/phy/phy-cpcap-usb.txt b/Documentation/devicetree/bindings/phy/phy-cpcap-usb.txt deleted file mode 100644 index 2eb9b2b69037..000000000000 --- a/Documentation/devicetree/bindings/phy/phy-cpcap-usb.txt +++ /dev/null @@ -1,40 +0,0 @@ -Motorola CPCAP PMIC USB PHY binding - -Required properties: -compatible: Shall be either "motorola,cpcap-usb-phy" or - "motorola,mapphone-cpcap-usb-phy" -#phy-cells: Shall be 0 -interrupts: CPCAP PMIC interrupts used by the USB PHY -interrupt-names: Interrupt names -io-channels: IIO ADC channels used by the USB PHY -io-channel-names: IIO ADC channel names -vusb-supply: Regulator for the PHY - -Optional properties: -pinctrl: Optional alternate pin modes for the PHY -pinctrl-names: Names for optional pin modes -mode-gpios: Optional GPIOs for configuring alternate modes - -Example: -cpcap_usb2_phy: phy { - compatible = "motorola,mapphone-cpcap-usb-phy"; - pinctrl-0 = <&usb_gpio_mux_sel1 &usb_gpio_mux_sel2>; - pinctrl-1 = <&usb_ulpi_pins>; - pinctrl-2 = <&usb_utmi_pins>; - pinctrl-3 = <&uart3_pins>; - pinctrl-names = "default", "ulpi", "utmi", "uart"; - #phy-cells = <0>; - interrupts-extended = < - &cpcap 15 0 &cpcap 14 0 &cpcap 28 0 &cpcap 19 0 - &cpcap 18 0 &cpcap 17 0 &cpcap 16 0 &cpcap 49 0 - &cpcap 48 1 - >; - interrupt-names = - "id_ground", "id_float", "se0conn", "vbusvld", - "sessvld", "sessend", "se1", "dm", "dp"; - mode-gpios = <&gpio2 28 GPIO_ACTIVE_HIGH - &gpio1 0 GPIO_ACTIVE_HIGH>; - io-channels = <&cpcap_adc 2>, <&cpcap_adc 7>; - io-channel-names = "vbus", "id"; - vusb-supply = <&vusb>; -}; From 90647aa7e6babffb42e8dac5394991498c46def8 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:26:20 -0500 Subject: [PATCH 0147/2411] dt-bindings: phy: Convert motorola,mapphone-mdm6600 to DT schema Convert the Motorola Mapphone MDM6600 USB PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250607212621.743859-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../phy/motorola,mapphone-mdm6600.yaml | 81 +++++++++++++++++++ .../bindings/phy/phy-mapphone-mdm6600.txt | 29 ------- 2 files changed, 81 insertions(+), 29 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/motorola,mapphone-mdm6600.yaml delete mode 100644 Documentation/devicetree/bindings/phy/phy-mapphone-mdm6600.txt diff --git a/Documentation/devicetree/bindings/phy/motorola,mapphone-mdm6600.yaml b/Documentation/devicetree/bindings/phy/motorola,mapphone-mdm6600.yaml new file mode 100644 index 000000000000..cb6544b3478d --- /dev/null +++ b/Documentation/devicetree/bindings/phy/motorola,mapphone-mdm6600.yaml @@ -0,0 +1,81 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/motorola,mapphone-mdm6600.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Motorola Mapphone MDM6600 USB PHY + +maintainers: + - Tony Lindgren + +properties: + compatible: + items: + - const: motorola,mapphone-mdm6600 + + enable-gpios: + description: GPIO to enable the USB PHY + maxItems: 1 + + power-gpios: + description: GPIO to power on the device + maxItems: 1 + + reset-gpios: + description: GPIO to reset the device + maxItems: 1 + + motorola,mode-gpios: + description: Two GPIOs to configure MDM6600 USB start-up mode for normal mode versus USB flashing mode + items: + - description: normal mode select GPIO + - description: USB flashing mode select GPIO + + motorola,cmd-gpios: + description: Three GPIOs to control the power state of the MDM6600 + items: + - description: power state control GPIO 0 + - description: power state control GPIO 1 + - description: power state control GPIO 2 + + motorola,status-gpios: + description: Three GPIOs to read the power state of the MDM6600 + items: + - description: power state read GPIO 0 + - description: power state read GPIO 1 + - description: power state read GPIO 2 + + '#phy-cells': + const: 0 + +required: + - compatible + - enable-gpios + - power-gpios + - reset-gpios + - motorola,mode-gpios + - motorola,cmd-gpios + - motorola,status-gpios + +additionalProperties: false + +examples: + - | + #include + + usb-phy { + compatible = "motorola,mapphone-mdm6600"; + enable-gpios = <&gpio3 31 GPIO_ACTIVE_LOW>; + power-gpios = <&gpio2 22 GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpio2 17 GPIO_ACTIVE_HIGH>; + motorola,mode-gpios = <&gpio5 20 GPIO_ACTIVE_HIGH>, + <&gpio5 21 GPIO_ACTIVE_HIGH>; + motorola,cmd-gpios = <&gpio4 7 GPIO_ACTIVE_HIGH>, + <&gpio4 8 GPIO_ACTIVE_HIGH>, + <&gpio5 14 GPIO_ACTIVE_HIGH>; + motorola,status-gpios = <&gpio2 20 GPIO_ACTIVE_HIGH>, + <&gpio2 21 GPIO_ACTIVE_HIGH>, + <&gpio2 23 GPIO_ACTIVE_HIGH>; + #phy-cells = <0>; + }; diff --git a/Documentation/devicetree/bindings/phy/phy-mapphone-mdm6600.txt b/Documentation/devicetree/bindings/phy/phy-mapphone-mdm6600.txt deleted file mode 100644 index 29427d4f047a..000000000000 --- a/Documentation/devicetree/bindings/phy/phy-mapphone-mdm6600.txt +++ /dev/null @@ -1,29 +0,0 @@ -Device tree binding documentation for Motorola Mapphone MDM6600 USB PHY - -Required properties: -- compatible Must be "motorola,mapphone-mdm6600" -- enable-gpios GPIO to enable the USB PHY -- power-gpios GPIO to power on the device -- reset-gpios GPIO to reset the device -- motorola,mode-gpios Two GPIOs to configure MDM6600 USB start-up mode for - normal mode versus USB flashing mode -- motorola,cmd-gpios Three GPIOs to control the power state of the MDM6600 -- motorola,status-gpios Three GPIOs to read the power state of the MDM6600 - -Example: - -usb-phy { - compatible = "motorola,mapphone-mdm6600"; - enable-gpios = <&gpio3 31 GPIO_ACTIVE_LOW>; - power-gpios = <&gpio2 22 GPIO_ACTIVE_HIGH>; - reset-gpios = <&gpio2 17 GPIO_ACTIVE_HIGH>; - motorola,mode-gpios = <&gpio5 20 GPIO_ACTIVE_HIGH>, - <&gpio5 21 GPIO_ACTIVE_HIGH>; - motorola,cmd-gpios = <&gpio4 7 GPIO_ACTIVE_HIGH>, - <&gpio4 8 GPIO_ACTIVE_HIGH>, - <&gpio5 14 GPIO_ACTIVE_HIGH>; - motorola,status-gpios = <&gpio2 20 GPIO_ACTIVE_HIGH>, - <&gpio2 21 GPIO_ACTIVE_HIGH>, - <&gpio2 23 GPIO_ACTIVE_HIGH>; - #phy-cells = <0>; -}; From 3ed7be12756d0ad8ebe34b2cfcfd8f84cfbb2678 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:26:23 -0500 Subject: [PATCH 0148/2411] dt-bindings: phy: Convert qca,ar7100-usb-phy to DT schema Convert the Qualcomm-Atheros AR7100 USB PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250607212625.744008-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/phy-ath79-usb.txt | 18 ------- .../bindings/phy/qca,ar7100-usb-phy.yaml | 49 +++++++++++++++++++ MAINTAINERS | 2 +- 3 files changed, 50 insertions(+), 19 deletions(-) delete mode 100644 Documentation/devicetree/bindings/phy/phy-ath79-usb.txt create mode 100644 Documentation/devicetree/bindings/phy/qca,ar7100-usb-phy.yaml diff --git a/Documentation/devicetree/bindings/phy/phy-ath79-usb.txt b/Documentation/devicetree/bindings/phy/phy-ath79-usb.txt deleted file mode 100644 index c3a29c5feea3..000000000000 --- a/Documentation/devicetree/bindings/phy/phy-ath79-usb.txt +++ /dev/null @@ -1,18 +0,0 @@ -* Atheros AR71XX/9XXX USB PHY - -Required properties: -- compatible: "qca,ar7100-usb-phy" -- #phys-cells: should be 0 -- reset-names: "phy"[, "suspend-override"] -- resets: references to the reset controllers - -Example: - - usb-phy { - compatible = "qca,ar7100-usb-phy"; - - reset-names = "phy", "suspend-override"; - resets = <&rst 4>, <&rst 3>; - - #phy-cells = <0>; - }; diff --git a/Documentation/devicetree/bindings/phy/qca,ar7100-usb-phy.yaml b/Documentation/devicetree/bindings/phy/qca,ar7100-usb-phy.yaml new file mode 100644 index 000000000000..029665530829 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/qca,ar7100-usb-phy.yaml @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/qca,ar7100-usb-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Atheros AR71XX/9XXX USB PHY + +maintainers: + - Alban Bedel + +properties: + compatible: + items: + - const: qca,ar7100-usb-phy + + reset-names: + description: Names of reset lines in order. + minItems: 1 + items: + - const: phy + - const: suspend-override + + resets: + description: References to the reset controllers. + minItems: 1 + items: + - description: Reset controller for phy + - description: Reset controller for suspend-override + + '#phy-cells': + const: 0 + +required: + - compatible + - reset-names + - resets + - '#phy-cells' + +additionalProperties: false + +examples: + - | + usb-phy { + compatible = "qca,ar7100-usb-phy"; + reset-names = "phy", "suspend-override"; + resets = <&rst 4>, <&rst 3>; + #phy-cells = <0>; + }; diff --git a/MAINTAINERS b/MAINTAINERS index 8b8828b5ac1b..04cda64989c5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3823,7 +3823,7 @@ M: Alban Bedel S: Maintained W: https://github.com/AlbanBedel/linux T: git git://github.com/AlbanBedel/linux -F: Documentation/devicetree/bindings/phy/phy-ath79-usb.txt +F: Documentation/devicetree/bindings/phy/qca,ar7100-usb-phy.yaml F: drivers/phy/qualcomm/phy-ath79-usb.c ATHEROS ATH GENERIC UTILITIES From ea54c9d157c705df5e9399ba50fa38edcabd37b1 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:26:28 -0500 Subject: [PATCH 0149/2411] dt-bindings: phy: Convert st,spear1310-miphy to DT schema Convert the ST SPEAr MIPHY PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250607212629.744191-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../bindings/phy/st,spear1310-miphy.yaml | 53 +++++++++++++++++++ .../bindings/phy/st-spear-miphy.txt | 15 ------ 2 files changed, 53 insertions(+), 15 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/st,spear1310-miphy.yaml delete mode 100644 Documentation/devicetree/bindings/phy/st-spear-miphy.txt diff --git a/Documentation/devicetree/bindings/phy/st,spear1310-miphy.yaml b/Documentation/devicetree/bindings/phy/st,spear1310-miphy.yaml new file mode 100644 index 000000000000..32f81615ddad --- /dev/null +++ b/Documentation/devicetree/bindings/phy/st,spear1310-miphy.yaml @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/st,spear1310-miphy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ST SPEAr miphy + +maintainers: + - Pratyush Anand + +description: + ST Microelectronics SPEAr miphy is a phy controller supporting PCIe and SATA. + +properties: + compatible: + enum: + - st,spear1310-miphy + - st,spear1340-miphy + + reg: + maxItems: 1 + + misc: + description: Phandle for the syscon node to access misc registers. + $ref: /schemas/types.yaml#/definitions/phandle + + '#phy-cells': + description: > + Cell[0] indicates interface type: 0 = SATA, 1 = PCIe. + const: 1 + + phy-id: + description: Instance id of the phy. Required when multiple PHYs are present. + $ref: /schemas/types.yaml#/definitions/uint32 + +required: + - compatible + - reg + - misc + - '#phy-cells' + +additionalProperties: false + +examples: + - | + miphy@1000 { + compatible = "st,spear1310-miphy"; + reg = <0x1000 0x100>; + misc = <&syscon>; + #phy-cells = <1>; + phy-id = <0>; + }; diff --git a/Documentation/devicetree/bindings/phy/st-spear-miphy.txt b/Documentation/devicetree/bindings/phy/st-spear-miphy.txt deleted file mode 100644 index 2a6bfdcc09b3..000000000000 --- a/Documentation/devicetree/bindings/phy/st-spear-miphy.txt +++ /dev/null @@ -1,15 +0,0 @@ -ST SPEAr miphy DT details -========================= - -ST Microelectronics SPEAr miphy is a phy controller supporting PCIe and SATA. - -Required properties: -- compatible : should be "st,spear1310-miphy" or "st,spear1340-miphy" -- reg : offset and length of the PHY register set. -- misc: phandle for the syscon node to access misc registers -- #phy-cells : from the generic PHY bindings, must be 1. - - cell[1]: 0 if phy used for SATA, 1 for PCIe. - -Optional properties: -- phy-id: Instance id of the phy. Only required when there are multiple phys - present on a implementation. From 35b629b28afd72a14ed573f1b180dc4ab1bf7e19 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:26:33 -0500 Subject: [PATCH 0150/2411] dt-bindings: phy: Convert ti,dm816x-usb-phy to DT schema Convert the TI DM816x USB PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250607212634.744373-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/dm816x-phy.txt | 24 -------- .../bindings/phy/ti,dm8168-usb-phy.yaml | 58 +++++++++++++++++++ 2 files changed, 58 insertions(+), 24 deletions(-) delete mode 100644 Documentation/devicetree/bindings/phy/dm816x-phy.txt create mode 100644 Documentation/devicetree/bindings/phy/ti,dm8168-usb-phy.yaml diff --git a/Documentation/devicetree/bindings/phy/dm816x-phy.txt b/Documentation/devicetree/bindings/phy/dm816x-phy.txt deleted file mode 100644 index 2fe3d11d063d..000000000000 --- a/Documentation/devicetree/bindings/phy/dm816x-phy.txt +++ /dev/null @@ -1,24 +0,0 @@ -Device tree binding documentation for am816x USB PHY -========================= - -Required properties: -- compatible : should be "ti,dm816x-usb-phy" -- reg : offset and length of the PHY register set. -- reg-names : name for the phy registers -- clocks : phandle to the clock -- clock-names : name of the clock -- syscon: phandle for the syscon node to access misc registers -- #phy-cells : from the generic PHY bindings, must be 1 -- syscon: phandle for the syscon node to access misc registers - -Example: - -usb_phy0: usb-phy@20 { - compatible = "ti,dm8168-usb-phy"; - reg = <0x20 0x8>; - reg-names = "phy"; - clocks = <&main_fapll 6>; - clock-names = "refclk"; - #phy-cells = <0>; - syscon = <&scm_conf>; -}; diff --git a/Documentation/devicetree/bindings/phy/ti,dm8168-usb-phy.yaml b/Documentation/devicetree/bindings/phy/ti,dm8168-usb-phy.yaml new file mode 100644 index 000000000000..673dc1d37dcb --- /dev/null +++ b/Documentation/devicetree/bindings/phy/ti,dm8168-usb-phy.yaml @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/ti,dm8168-usb-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: TI DM8168 USB PHY + +maintainers: + - Tony Lindgren + +properties: + compatible: + const: ti,dm8168-usb-phy + + reg: + maxItems: 1 + + reg-names: + items: + - const: phy + + clocks: + maxItems: 1 + + clock-names: + items: + - const: refclk + + '#phy-cells': + const: 0 + + syscon: + $ref: /schemas/types.yaml#/definitions/phandle + description: Phandle for the syscon node to access misc registers. + +required: + - compatible + - reg + - reg-names + - clocks + - clock-names + - '#phy-cells' + - syscon + +additionalProperties: false + +examples: + - | + usb-phy@20 { + compatible = "ti,dm8168-usb-phy"; + reg = <0x20 0x8>; + reg-names = "phy"; + clocks = <&main_fapll 6>; + clock-names = "refclk"; + #phy-cells = <0>; + syscon = <&scm_conf>; + }; From 222bb02ee691237f1e9393d31226faa35097e9ab Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:26:40 -0500 Subject: [PATCH 0151/2411] dt-bindings: phy: Convert ti,keystone-usbphy to DT schema Convert the TI Keystone USB PHY binding to DT schema format. Drop the "#address-cells" and "#size-cells" properties which don't make sense without any child nodes. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250607212641.744683-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../bindings/phy/keystone-usb-phy.txt | 19 ---------- .../bindings/phy/ti,keystone-usbphy.yaml | 37 +++++++++++++++++++ 2 files changed, 37 insertions(+), 19 deletions(-) delete mode 100644 Documentation/devicetree/bindings/phy/keystone-usb-phy.txt create mode 100644 Documentation/devicetree/bindings/phy/ti,keystone-usbphy.yaml diff --git a/Documentation/devicetree/bindings/phy/keystone-usb-phy.txt b/Documentation/devicetree/bindings/phy/keystone-usb-phy.txt deleted file mode 100644 index 300830dda0bf..000000000000 --- a/Documentation/devicetree/bindings/phy/keystone-usb-phy.txt +++ /dev/null @@ -1,19 +0,0 @@ -TI Keystone USB PHY - -Required properties: - - compatible: should be "ti,keystone-usbphy". - - #address-cells, #size-cells : should be '1' if the device has sub-nodes - with 'reg' property. - - reg : Address and length of the usb phy control register set. - -The main purpose of this PHY driver is to enable the USB PHY reference clock -gate on the Keystone SOC for both the USB2 and USB3 PHY. Otherwise it is just -an NOP PHY driver. Hence this node is referenced as both the usb2 and usb3 -phy node in the USB Glue layer driver node. - -usb_phy: usb_phy@2620738 { - compatible = "ti,keystone-usbphy"; - #address-cells = <1>; - #size-cells = <1>; - reg = <0x2620738 32>; -}; diff --git a/Documentation/devicetree/bindings/phy/ti,keystone-usbphy.yaml b/Documentation/devicetree/bindings/phy/ti,keystone-usbphy.yaml new file mode 100644 index 000000000000..08dc18e7feea --- /dev/null +++ b/Documentation/devicetree/bindings/phy/ti,keystone-usbphy.yaml @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/ti,keystone-usbphy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: TI Keystone USB PHY + +maintainers: + - Nishanth Menon + - Santosh Shilimkar + +description: + The main purpose of this PHY driver is to enable the USB PHY reference clock + gate on the Keystone SOC for both the USB2 and USB3 PHY. Otherwise it is just + an NOP PHY driver. Hence this node is referenced as both the usb2 and usb3 + phy node in the USB Glue layer driver node. + +properties: + compatible: + const: ti,keystone-usbphy + + reg: + maxItems: 1 + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + usb-phy@2620738 { + compatible = "ti,keystone-usbphy"; + reg = <0x2620738 32>; + }; From a5aa04619e715adda36ca5a97cae4c48bad8d65b Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:25:40 -0500 Subject: [PATCH 0152/2411] dt-bindings: phy: Convert marvell,armada-380-comphy to DT schema Convert the Marvell Armada 38x combo PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20250607212541.742427-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../phy/marvell,armada-380-comphy.yaml | 83 +++++++++++++++++++ .../bindings/phy/phy-armada38x-comphy.txt | 48 ----------- 2 files changed, 83 insertions(+), 48 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/marvell,armada-380-comphy.yaml delete mode 100644 Documentation/devicetree/bindings/phy/phy-armada38x-comphy.txt diff --git a/Documentation/devicetree/bindings/phy/marvell,armada-380-comphy.yaml b/Documentation/devicetree/bindings/phy/marvell,armada-380-comphy.yaml new file mode 100644 index 000000000000..dcb4c0007832 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/marvell,armada-380-comphy.yaml @@ -0,0 +1,83 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/marvell,armada-380-comphy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Marvell Armada 38x COMPHY controller + +maintainers: + - Andrew Lunn + - Gregory Clement + +description: + This comphy controller can be found on Marvell Armada 38x. It provides a + number of shared PHYs used by various interfaces (network, sata, usb, + PCIe...). + +properties: + compatible: + items: + - const: marvell,armada-380-comphy + + reg: + items: + - description: COMPHY register location and length + - description: Configuration register location and length + + reg-names: + items: + - const: comphy + - const: conf + + '#address-cells': + const: 1 + + '#size-cells': + const: 0 + +patternProperties: + '^phy@[0-5]$': + description: A COMPHY lane + type: object + additionalProperties: false + + properties: + reg: + maximum: 1 + + '#phy-cells': + description: Input port index for the PHY lane + const: 1 + + required: + - reg + - '#phy-cells' + +required: + - compatible + - reg + - '#address-cells' + - '#size-cells' + +additionalProperties: false + +examples: + - | + comphy: phy@18300 { + compatible = "marvell,armada-380-comphy"; + reg = <0x18300 0x100>, <0x18460 4>; + reg-names = "comphy", "conf"; + #address-cells = <1>; + #size-cells = <0>; + + cpm_comphy0: phy@0 { + reg = <0>; + #phy-cells = <1>; + }; + + cpm_comphy1: phy@1 { + reg = <1>; + #phy-cells = <1>; + }; + }; diff --git a/Documentation/devicetree/bindings/phy/phy-armada38x-comphy.txt b/Documentation/devicetree/bindings/phy/phy-armada38x-comphy.txt deleted file mode 100644 index 8b5a7a28a35b..000000000000 --- a/Documentation/devicetree/bindings/phy/phy-armada38x-comphy.txt +++ /dev/null @@ -1,48 +0,0 @@ -mvebu armada 38x comphy driver ------------------------------- - -This comphy controller can be found on Marvell Armada 38x. It provides a -number of shared PHYs used by various interfaces (network, sata, usb, -PCIe...). - -Required properties: - -- compatible: should be "marvell,armada-380-comphy" -- reg: should contain the comphy register location and length. -- #address-cells: should be 1. -- #size-cells: should be 0. - -Optional properties: - -- reg-names: must be "comphy" as the first name, and "conf". -- reg: must contain the comphy register location and length as the first - pair, followed by an optional configuration register address and - length pair. - -A sub-node is required for each comphy lane provided by the comphy. - -Required properties (child nodes): - -- reg: comphy lane number. -- #phy-cells : from the generic phy bindings, must be 1. Defines the - input port to use for a given comphy lane. - -Example: - - comphy: phy@18300 { - compatible = "marvell,armada-380-comphy"; - reg-names = "comphy", "conf"; - reg = <0x18300 0x100>, <0x18460 4>; - #address-cells = <1>; - #size-cells = <0>; - - cpm_comphy0: phy@0 { - reg = <0>; - #phy-cells = <1>; - }; - - cpm_comphy1: phy@1 { - reg = <1>; - #phy-cells = <1>; - }; - }; From 4dcf1632d617262f16608f3bd0f6dc00eede8d4e Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sat, 7 Jun 2025 16:26:07 -0500 Subject: [PATCH 0153/2411] dt-bindings: phy: Convert Marvell MVEBU PHYs to DT schema Convert the Marvell Armada-375 USB and MVEBU SATA PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20250607212609.743346-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../phy/marvell,armada-375-usb-cluster.yaml | 40 ++++++++++++++++ .../bindings/phy/marvell,mvebu-sata-phy.yaml | 47 +++++++++++++++++++ .../devicetree/bindings/phy/phy-mvebu.txt | 42 ----------------- 3 files changed, 87 insertions(+), 42 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/marvell,armada-375-usb-cluster.yaml create mode 100644 Documentation/devicetree/bindings/phy/marvell,mvebu-sata-phy.yaml delete mode 100644 Documentation/devicetree/bindings/phy/phy-mvebu.txt diff --git a/Documentation/devicetree/bindings/phy/marvell,armada-375-usb-cluster.yaml b/Documentation/devicetree/bindings/phy/marvell,armada-375-usb-cluster.yaml new file mode 100644 index 000000000000..1706c31644e1 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/marvell,armada-375-usb-cluster.yaml @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/marvell,armada-375-usb-cluster.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Armada 375 USB Cluster + +maintainers: + - Andrew Lunn + - Gregory Clement + +description: + Control register for the Armada 375 USB cluster, managing USB2 and USB3 features. + +properties: + compatible: + const: marvell,armada-375-usb-cluster + + reg: + maxItems: 1 + + '#phy-cells': + description: Number of PHY cells in specifier. 1 for USB2, 2 for USB3. + const: 1 + +required: + - compatible + - reg + - '#phy-cells' + +additionalProperties: false + +examples: + - | + usbcluster: usb-cluster@18400 { + compatible = "marvell,armada-375-usb-cluster"; + reg = <0x18400 0x4>; + #phy-cells = <1>; + }; diff --git a/Documentation/devicetree/bindings/phy/marvell,mvebu-sata-phy.yaml b/Documentation/devicetree/bindings/phy/marvell,mvebu-sata-phy.yaml new file mode 100644 index 000000000000..81e942428911 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/marvell,mvebu-sata-phy.yaml @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/marvell,mvebu-sata-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Marvell MVEBU SATA PHY + +maintainers: + - Andrew Lunn + - Gregory Clement + +properties: + compatible: + const: marvell,mvebu-sata-phy + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-names: + items: + - const: sata + + '#phy-cells': + const: 0 + +required: + - compatible + - reg + - clocks + - clock-names + - '#phy-cells' + +additionalProperties: false + +examples: + - | + sata-phy@84000 { + compatible = "marvell,mvebu-sata-phy"; + reg = <0x84000 0x0334>; + clocks = <&gate_clk 15>; + clock-names = "sata"; + #phy-cells = <0>; + }; diff --git a/Documentation/devicetree/bindings/phy/phy-mvebu.txt b/Documentation/devicetree/bindings/phy/phy-mvebu.txt deleted file mode 100644 index 64afdd13d91d..000000000000 --- a/Documentation/devicetree/bindings/phy/phy-mvebu.txt +++ /dev/null @@ -1,42 +0,0 @@ -* Marvell MVEBU SATA PHY - -Power control for the SATA phy found on Marvell MVEBU SoCs. - -This document extends the binding described in phy-bindings.txt - -Required properties : - - - reg : Offset and length of the register set for the SATA device - - compatible : Should be "marvell,mvebu-sata-phy" - - clocks : phandle of clock and specifier that supplies the device - - clock-names : Should be "sata" - -Example: - sata-phy@84000 { - compatible = "marvell,mvebu-sata-phy"; - reg = <0x84000 0x0334>; - clocks = <&gate_clk 15>; - clock-names = "sata"; - #phy-cells = <0>; - }; - -Armada 375 USB cluster ----------------------- - -Armada 375 comes with an USB2 host and device controller and an USB3 -controller. The USB cluster control register allows to manage common -features of both USB controllers. - -Required properties: - -- compatible: "marvell,armada-375-usb-cluster" -- reg: Should contain usb cluster register location and length. -- #phy-cells : from the generic phy bindings, must be 1. Possible -values are 1 (USB2), 2 (USB3). - -Example: - usbcluster: usb-cluster@18400 { - compatible = "marvell,armada-375-usb-cluster"; - reg = <0x18400 0x4>; - #phy-cells = <1> - }; From 00399bbe02d2bb6fd8d6eb90573ec305616449f4 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Wed, 28 May 2025 14:38:58 +0100 Subject: [PATCH 0154/2411] dt-bindings: phy: renesas,usb2-phy: Document RZ/V2N SoC support Document support for the USB2.0 phy found on the Renesas RZ/V2N (R9A09G056) SoC. The USB2.0 phy is functionally identical to that on the RZ/V2H(P) SoC, so no driver changes are needed. The existing `renesas,usb2-phy-r9a09g057` compatible will be used as a fallback for the RZ/V2N SoC. Signed-off-by: Lad Prabhakar Acked-by: Conor Dooley Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20250528133858.168582-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/phy/renesas,usb2-phy.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/renesas,usb2-phy.yaml b/Documentation/devicetree/bindings/phy/renesas,usb2-phy.yaml index 2822dce8d9f4..f45c5f039ae8 100644 --- a/Documentation/devicetree/bindings/phy/renesas,usb2-phy.yaml +++ b/Documentation/devicetree/bindings/phy/renesas,usb2-phy.yaml @@ -40,6 +40,10 @@ properties: - renesas,usb2-phy-r9a07g054 # RZ/V2L - const: renesas,rzg2l-usb2-phy + - items: + - const: renesas,usb2-phy-r9a09g056 # RZ/V2N + - const: renesas,usb2-phy-r9a09g057 + reg: maxItems: 1 From 439cdb309c3cf630b11661872ace09e1a7c5d630 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Tue, 27 May 2025 14:04:37 -0700 Subject: [PATCH 0155/2411] dt-bindings: phy: qcom,sc8280xp-qmp-usb43dp-phy: Add SM8750 to QMP PHY Add an entry to the compatible field for SM8750 for the QMP combo PHY. This handles the USB3 path for SM8750. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Wesley Cheng Signed-off-by: Melody Olvera Link: https://lore.kernel.org/r/20250527-sm8750_usb_master-v6-1-d58de3b41d34@oss.qualcomm.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml index 358a6736a951..38ce04c35d94 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml @@ -29,6 +29,7 @@ properties: - qcom,sm8450-qmp-usb3-dp-phy - qcom,sm8550-qmp-usb3-dp-phy - qcom,sm8650-qmp-usb3-dp-phy + - qcom,sm8750-qmp-usb3-dp-phy - qcom,x1e80100-qmp-usb3-dp-phy reg: @@ -133,6 +134,7 @@ allOf: - qcom,sm6350-qmp-usb3-dp-phy - qcom,sm8550-qmp-usb3-dp-phy - qcom,sm8650-qmp-usb3-dp-phy + - qcom,sm8750-qmp-usb3-dp-phy - qcom,x1e80100-qmp-usb3-dp-phy then: required: From 1166a2ca0900beafbe5b6d1bb357bc26a87490f1 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Tue, 27 May 2025 14:04:38 -0700 Subject: [PATCH 0156/2411] dt-bindings: phy: Add the M31 based eUSB2 PHY bindings On SM8750, the M31 eUSB2 PHY is being used to support USB2. Add the binding definition for the PHY driver. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Wesley Cheng Signed-off-by: Melody Olvera Link: https://lore.kernel.org/r/20250527-sm8750_usb_master-v6-2-d58de3b41d34@oss.qualcomm.com Signed-off-by: Vinod Koul --- .../bindings/phy/qcom,m31-eusb2-phy.yaml | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 Documentation/devicetree/bindings/phy/qcom,m31-eusb2-phy.yaml diff --git a/Documentation/devicetree/bindings/phy/qcom,m31-eusb2-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,m31-eusb2-phy.yaml new file mode 100644 index 000000000000..c84c62d0e8cb --- /dev/null +++ b/Documentation/devicetree/bindings/phy/qcom,m31-eusb2-phy.yaml @@ -0,0 +1,79 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/qcom,m31-eusb2-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm M31 eUSB2 phy + +maintainers: + - Wesley Cheng + +description: + M31 based eUSB2 controller, which supports LS/FS/HS usb connectivity + on Qualcomm chipsets. It is paired with a eUSB2 repeater. + +properties: + compatible: + items: + - enum: + - qcom,sm8750-m31-eusb2-phy + + reg: + maxItems: 1 + + "#phy-cells": + const: 0 + + clocks: + items: + - description: reference clock + + clock-names: + items: + - const: ref + + resets: + maxItems: 1 + + phys: + maxItems: 1 + description: + Phandle to eUSB2 repeater + + vdd-supply: + description: + Phandle to 0.88V regulator supply to PHY digital circuit. + + vdda12-supply: + description: + Phandle to 1.2V regulator supply to PHY refclk pll block. + +required: + - compatible + - reg + - "#phy-cells" + - clocks + - clock-names + - resets + - vdd-supply + - vdda12-supply + +additionalProperties: false + +examples: + - | + usb_1_hsphy: phy@88e3000 { + compatible = "qcom,sm8750-m31-eusb2-phy"; + reg = <0x88e3000 0x29c>; + + clocks = <&tcsrcc_usb2_clkref_en>; + clock-names = "ref"; + + resets = <&gcc_qusb2phy_prim_bcr>; + + #phy-cells = <0>; + + vdd-supply = <&vreg_l2d_0p88>; + vdda12-supply = <&vreg_l3g_1p2>; + }; From c4364048baf4878c270e94aa224bb114b445704d Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Tue, 27 May 2025 14:04:40 -0700 Subject: [PATCH 0157/2411] phy: qcom: qmp-combo: Add new PHY sequences for SM8750 Add new register offsets and PHY values for SM8750. Some of the previous definitions can be leveraged from older PHY versions as offsets within registers have not changed. This also adds the required PHY sequence that is recommended after running hardware characterization. Signed-off-by: Wesley Cheng Signed-off-by: Melody Olvera Reviewed-by: Dmitry Baryshkov Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250527-sm8750_usb_master-v6-4-d58de3b41d34@oss.qualcomm.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 221 ++++++++++++++++++ .../phy/qualcomm/phy-qcom-qmp-pcs-usb-v8.h | 38 +++ drivers/phy/qualcomm/phy-qcom-qmp-pcs-v8.h | 32 +++ .../qualcomm/phy-qcom-qmp-qserdes-com-v8.h | 64 +++++ .../qualcomm/phy-qcom-qmp-qserdes-txrx-v8.h | 68 ++++++ drivers/phy/qualcomm/phy-qcom-qmp.h | 5 + 6 files changed, 428 insertions(+) create mode 100644 drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v8.h create mode 100644 drivers/phy/qualcomm/phy-qcom-qmp-pcs-v8.h create mode 100644 drivers/phy/qualcomm/phy-qcom-qmp-qserdes-com-v8.h create mode 100644 drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v8.h diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index b09fa00e9fe7..8b9710a9654a 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -32,6 +32,7 @@ #include "phy-qcom-qmp-pcs-usb-v4.h" #include "phy-qcom-qmp-pcs-usb-v5.h" #include "phy-qcom-qmp-pcs-usb-v6.h" +#include "phy-qcom-qmp-pcs-usb-v8.h" #include "phy-qcom-qmp-dp-com-v3.h" @@ -212,6 +213,28 @@ static const unsigned int qmp_v6_n4_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_TX_TRANSCEIVER_BIAS_EN] = QSERDES_V6_N4_TX_TRANSCEIVER_BIAS_EN, }; +static const unsigned int qmp_v8_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { + [QPHY_SW_RESET] = QPHY_V8_PCS_SW_RESET, + [QPHY_START_CTRL] = QPHY_V8_PCS_START_CONTROL, + [QPHY_PCS_STATUS] = QPHY_V8_PCS_PCS_STATUS1, + [QPHY_PCS_POWER_DOWN_CONTROL] = QPHY_V8_PCS_POWER_DOWN_CONTROL, + + /* In PCS_USB */ + [QPHY_PCS_AUTONOMOUS_MODE_CTRL] = QPHY_V8_PCS_USB_AUTONOMOUS_MODE_CTRL, + [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR] = QPHY_V8_PCS_USB_LFPS_RXTERM_IRQ_CLEAR, + + [QPHY_COM_RESETSM_CNTRL] = QSERDES_V8_COM_RESETSM_CNTRL, + [QPHY_COM_C_READY_STATUS] = QSERDES_V8_COM_C_READY_STATUS, + [QPHY_COM_CMN_STATUS] = QSERDES_V8_COM_CMN_STATUS, + [QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V8_COM_BIAS_EN_CLKBUFLR_EN, + + [QPHY_TX_TX_POL_INV] = QSERDES_V8_TX_TX_POL_INV, + [QPHY_TX_TX_DRV_LVL] = QSERDES_V8_TX_TX_DRV_LVL, + [QPHY_TX_TX_EMP_POST1_LVL] = QSERDES_V8_TX_TX_EMP_POST1_LVL, + [QPHY_TX_HIGHZ_DRVR_EN] = QSERDES_V8_TX_HIGHZ_DRVR_EN, + [QPHY_TX_TRANSCEIVER_BIAS_EN] = QSERDES_V8_TX_TRANSCEIVER_BIAS_EN, +}; + static const struct qmp_phy_init_tbl qmp_v3_usb3_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_IVCO, 0x07), QMP_PHY_INIT_CFG(QSERDES_V3_COM_SYSCLK_EN_SEL, 0x14), @@ -1471,6 +1494,139 @@ static const struct qmp_phy_init_tbl x1e80100_usb43dp_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_EQ_CONFIG5, 0x10), }; +static const struct qmp_phy_init_tbl sm8750_usb3_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V8_COM_SSC_STEP_SIZE1_MODE1, 0xc0), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_SSC_STEP_SIZE2_MODE1, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_CP_CTRL_MODE1, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_PLL_RCTRL_MODE1, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_PLL_CCTRL_MODE1, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_CORECLK_DIV_MODE1, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_LOCK_CMP1_MODE1, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_LOCK_CMP2_MODE1, 0x41), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_DEC_START_MODE1, 0x41), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_DEC_START_MSB_MODE1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_DIV_FRAC_START1_MODE1, 0x55), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_DIV_FRAC_START2_MODE1, 0x75), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_DIV_FRAC_START3_MODE1, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_HSCLK_SEL_1, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_VCO_TUNE1_MODE1, 0x25), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_VCO_TUNE2_MODE1, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_BIN_VCOCAL_CMP_CODE1_MODE1, 0x5c), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_BIN_VCOCAL_CMP_CODE2_MODE1, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0x5c), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_SSC_STEP_SIZE1_MODE0, 0xc0), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_SSC_STEP_SIZE2_MODE0, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_CP_CTRL_MODE0, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_PLL_RCTRL_MODE0, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_PLL_CCTRL_MODE0, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_LOCK_CMP1_MODE0, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_LOCK_CMP2_MODE0, 0x1a), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_DEC_START_MODE0, 0x41), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_DEC_START_MSB_MODE0, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_DIV_FRAC_START1_MODE0, 0x55), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_DIV_FRAC_START2_MODE0, 0x75), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_DIV_FRAC_START3_MODE0, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_VCO_TUNE1_MODE0, 0x25), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_VCO_TUNE2_MODE0, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_BG_TIMER, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_SSC_EN_CENTER, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_SSC_PER1, 0x62), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_SSC_PER2, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_SYSCLK_BUF_ENABLE, 0x0c), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_SYSCLK_EN_SEL, 0x1a), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_LOCK_CMP_CFG, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_VCO_TUNE_MAP, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_CORE_CLK_EN, 0x20), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_CMN_CONFIG_1, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_AUTO_GAIN_ADJ_CTRL_1, 0xb6), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_AUTO_GAIN_ADJ_CTRL_2, 0x4a), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_AUTO_GAIN_ADJ_CTRL_3, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V8_COM_ADDITIONAL_MISC, 0x0c), +}; + +static const struct qmp_phy_init_tbl sm8750_usb3_tx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V8_TX_RES_CODE_LANE_TX, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V8_TX_RES_CODE_LANE_RX, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V8_TX_RES_CODE_LANE_OFFSET_TX, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_V8_TX_RES_CODE_LANE_OFFSET_RX, 0x09), + QMP_PHY_INIT_CFG(QSERDES_V8_TX_LANE_MODE_1, 0xf5), + QMP_PHY_INIT_CFG(QSERDES_V8_TX_LANE_MODE_3, 0x11), + QMP_PHY_INIT_CFG(QSERDES_V8_TX_LANE_MODE_4, 0x31), + QMP_PHY_INIT_CFG(QSERDES_V8_TX_LANE_MODE_5, 0x5f), + QMP_PHY_INIT_CFG(QSERDES_V8_TX_RCV_DETECT_LVL_2, 0x12), + QMP_PHY_INIT_CFG_LANE(QSERDES_V8_TX_PI_QEC_CTRL, 0x21, 1), + QMP_PHY_INIT_CFG_LANE(QSERDES_V8_TX_PI_QEC_CTRL, 0x05, 2), +}; + +static const struct qmp_phy_init_tbl sm8750_usb3_rx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V8_RX_UCDR_FO_GAIN, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_UCDR_SO_GAIN, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_UCDR_FASTLOCK_FO_GAIN, 0x2f), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x7f), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_UCDR_FASTLOCK_COUNT_LOW, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_UCDR_PI_CONTROLS, 0x99), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_UCDR_SB2_THRESH1, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_UCDR_SB2_THRESH2, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_UCDR_SB2_GAIN1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_UCDR_SB2_GAIN2, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_AUX_DATA_TCOARSE_TFINE, 0x20), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_VGA_CAL_CNTRL1, 0x54), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_VGA_CAL_CNTRL2, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_GM_CAL, 0x13), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_RX_EQU_ADAPTOR_CNTRL2, 0x0e), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_RX_EQU_ADAPTOR_CNTRL3, 0x4a), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_RX_EQU_ADAPTOR_CNTRL4, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_RX_IDAC_TSETTLE_LOW, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_RX_IDAC_TSETTLE_HIGH, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x27), + + QMP_PHY_INIT_CFG(QSERDES_V8_RX_SIGDET_ENABLES, 0x0c), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_SIGDET_CNTRL, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_SIGDET_DEGLITCH_CNTRL, 0x0e), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_RX_MODE_00_LOW, 0x3f), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_RX_MODE_00_HIGH, 0xbf), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_RX_MODE_00_HIGH2, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_RX_MODE_00_HIGH3, 0xdf), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_RX_MODE_00_HIGH4, 0xed), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_RX_MODE_01_LOW, 0x19), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_RX_MODE_01_HIGH, 0x09), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_RX_MODE_01_HIGH2, 0x91), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_RX_MODE_01_HIGH3, 0xb7), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_RX_MODE_01_HIGH4, 0xaa), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_DFE_EN_TIMER, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_DCC_CTRL1, 0x0c), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_VTH_CODE, 0x10), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_SIGDET_CAL_CTRL1, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V8_RX_SIGDET_CAL_TRIM, 0x08), +}; + +static const struct qmp_phy_init_tbl sm8750_usb3_pcs_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V8_PCS_LOCK_DETECT_CONFIG1, 0xc4), + QMP_PHY_INIT_CFG(QPHY_V8_PCS_LOCK_DETECT_CONFIG2, 0x89), + QMP_PHY_INIT_CFG(QPHY_V8_PCS_LOCK_DETECT_CONFIG3, 0x20), + QMP_PHY_INIT_CFG(QPHY_V8_PCS_LOCK_DETECT_CONFIG6, 0x13), + QMP_PHY_INIT_CFG(QPHY_V8_PCS_REFGEN_REQ_CONFIG1, 0x21), + QMP_PHY_INIT_CFG(QPHY_V8_PCS_RX_SIGDET_LVL, 0x55), + QMP_PHY_INIT_CFG(QPHY_V8_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7), + QMP_PHY_INIT_CFG(QPHY_V8_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03), + QMP_PHY_INIT_CFG(QPHY_V8_PCS_CDR_RESET_TIME, 0x0a), + QMP_PHY_INIT_CFG(QPHY_V8_PCS_ALIGN_DETECT_CONFIG1, 0x88), + QMP_PHY_INIT_CFG(QPHY_V8_PCS_ALIGN_DETECT_CONFIG2, 0x13), + QMP_PHY_INIT_CFG(QPHY_V8_PCS_PCS_TX_RX_CONFIG, 0x0c), + QMP_PHY_INIT_CFG(QPHY_V8_PCS_EQ_CONFIG1, 0x4b), + QMP_PHY_INIT_CFG(QPHY_V8_PCS_EQ_CONFIG5, 0x10), +}; + +static const struct qmp_phy_init_tbl sm8750_usb3_pcs_usb_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V8_PCS_USB_LFPS_DET_HIGH_COUNT_VAL, 0xf8), + QMP_PHY_INIT_CFG(QPHY_V8_PCS_USB_RXEQTRAINING_DFE_TIME_S2, 0x07), + QMP_PHY_INIT_CFG(QPHY_V8_PCS_USB_RCVR_DTCT_DLY_U3_L, 0x40), + QMP_PHY_INIT_CFG(QPHY_V8_PCS_USB_RCVR_DTCT_DLY_U3_H, 0x00), +}; + static const struct qmp_phy_init_tbl x1e80100_usb43dp_pcs_usb_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V6_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), QMP_PHY_INIT_CFG(QPHY_V6_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), @@ -1781,6 +1937,22 @@ static const struct qmp_combo_offsets qmp_combo_offsets_v5 = { .dp_dp_phy = 0x2200, }; +static const struct qmp_combo_offsets qmp_combo_offsets_v8 = { + .com = 0x0000, + .txa = 0x1400, + .rxa = 0x1600, + .txb = 0x1800, + .rxb = 0x1a00, + .usb3_serdes = 0x1000, + .usb3_pcs_misc = 0x1c00, + .usb3_pcs = 0x1e00, + .usb3_pcs_usb = 0x2100, + .dp_serdes = 0x3000, + .dp_txa = 0x3400, + .dp_txb = 0x3800, + .dp_dp_phy = 0x3c00, +}; + static const struct qmp_phy_cfg sar2130p_usb3dpphy_cfg = { .offsets = &qmp_combo_offsets_v3, @@ -2280,6 +2452,51 @@ static const struct qmp_phy_cfg sm8650_usb3dpphy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), }; +static const struct qmp_phy_cfg sm8750_usb3dpphy_cfg = { + .offsets = &qmp_combo_offsets_v8, + + .serdes_tbl = sm8750_usb3_serdes_tbl, + .serdes_tbl_num = ARRAY_SIZE(sm8750_usb3_serdes_tbl), + .tx_tbl = sm8750_usb3_tx_tbl, + .tx_tbl_num = ARRAY_SIZE(sm8750_usb3_tx_tbl), + .rx_tbl = sm8750_usb3_rx_tbl, + .rx_tbl_num = ARRAY_SIZE(sm8750_usb3_rx_tbl), + .pcs_tbl = sm8750_usb3_pcs_tbl, + .pcs_tbl_num = ARRAY_SIZE(sm8750_usb3_pcs_tbl), + .pcs_usb_tbl = sm8750_usb3_pcs_usb_tbl, + .pcs_usb_tbl_num = ARRAY_SIZE(sm8750_usb3_pcs_usb_tbl), + + .dp_serdes_tbl = qmp_v6_dp_serdes_tbl, + .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl), + .dp_tx_tbl = qmp_v6_dp_tx_tbl, + .dp_tx_tbl_num = ARRAY_SIZE(qmp_v6_dp_tx_tbl), + + .serdes_tbl_rbr = qmp_v6_dp_serdes_tbl_rbr, + .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_rbr), + .serdes_tbl_hbr = qmp_v6_dp_serdes_tbl_hbr, + .serdes_tbl_hbr_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_hbr), + .serdes_tbl_hbr2 = qmp_v6_dp_serdes_tbl_hbr2, + .serdes_tbl_hbr2_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_hbr2), + .serdes_tbl_hbr3 = qmp_v6_dp_serdes_tbl_hbr3, + .serdes_tbl_hbr3_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_hbr3), + + .swing_hbr_rbr = &qmp_dp_v6_voltage_swing_hbr_rbr, + .pre_emphasis_hbr_rbr = &qmp_dp_v6_pre_emphasis_hbr_rbr, + .swing_hbr3_hbr2 = &qmp_dp_v5_voltage_swing_hbr3_hbr2, + .pre_emphasis_hbr3_hbr2 = &qmp_dp_v5_pre_emphasis_hbr3_hbr2, + + .dp_aux_init = qmp_v4_dp_aux_init, + .configure_dp_tx = qmp_v4_configure_dp_tx, + .configure_dp_phy = qmp_v4_configure_dp_phy, + .calibrate_dp_phy = qmp_v4_calibrate_dp_phy, + + .regs = qmp_v8_usb3phy_regs_layout, + .reset_list = msm8996_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), +}; + static int qmp_combo_dp_serdes_init(struct qmp_combo *qmp) { const struct qmp_phy_cfg *cfg = qmp->cfg; @@ -3915,6 +4132,10 @@ static const struct of_device_id qmp_combo_of_match_table[] = { .compatible = "qcom,sm8650-qmp-usb3-dp-phy", .data = &sm8650_usb3dpphy_cfg, }, + { + .compatible = "qcom,sm8750-qmp-usb3-dp-phy", + .data = &sm8750_usb3dpphy_cfg, + }, { .compatible = "qcom,x1e80100-qmp-usb3-dp-phy", .data = &x1e80100_usb3dpphy_cfg, diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v8.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v8.h new file mode 100644 index 000000000000..89ace8024bc0 --- /dev/null +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v8.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef QCOM_PHY_QMP_PCS_USB_V8_H_ +#define QCOM_PHY_QMP_PCS_USB_V8_H_ + +#define QPHY_V8_PCS_USB_POWER_STATE_CONFIG1 0x00 +#define QPHY_V8_PCS_USB_AUTONOMOUS_MODE_STATUS 0x04 +#define QPHY_V8_PCS_USB_AUTONOMOUS_MODE_CTRL 0x08 +#define QPHY_V8_PCS_USB_AUTONOMOUS_MODE_CTRL2 0x0c +#define QPHY_V8_PCS_USB_LFPS_RXTERM_IRQ_SOURCE_STATUS 0x10 +#define QPHY_V8_PCS_USB_LFPS_RXTERM_IRQ_CLEAR 0x14 +#define QPHY_V8_PCS_USB_LFPS_DET_HIGH_COUNT_VAL 0x18 +#define QPHY_V8_PCS_USB_LFPS_TX_ECSTART 0x1c +#define QPHY_V8_PCS_USB_LFPS_PER_TIMER_VAL 0x20 +#define QPHY_V8_PCS_USB_LFPS_TX_END_CNT_U3_START 0x24 +#define QPHY_V8_PCS_USB_LFPS_CONFIG1 0x28 +#define QPHY_V8_PCS_USB_RXEQTRAINING_LOCK_TIME 0x2c +#define QPHY_V8_PCS_USB_RXEQTRAINING_WAIT_TIME 0x30 +#define QPHY_V8_PCS_USB_RXEQTRAINING_CTLE_TIME 0x34 +#define QPHY_V8_PCS_USB_RXEQTRAINING_WAIT_TIME_S2 0x38 +#define QPHY_V8_PCS_USB_RXEQTRAINING_DFE_TIME_S2 0x3c +#define QPHY_V8_PCS_USB_RCVR_DTCT_DLY_U3_L 0x40 +#define QPHY_V8_PCS_USB_RCVR_DTCT_DLY_U3_H 0x44 +#define QPHY_V8_PCS_USB_ARCVR_DTCT_EN_PERIOD 0x48 +#define QPHY_V8_PCS_USB_ARCVR_DTCT_CM_DLY 0x4c +#define QPHY_V8_PCS_USB_TXONESZEROS_RUN_LENGTH 0x50 +#define QPHY_V8_PCS_USB_ALFPS_DEGLITCH_VAL 0x54 +#define QPHY_V8_PCS_USB_SIGDET_STARTUP_TIMER_VAL 0x58 +#define QPHY_V8_PCS_USB_TEST_CONTROL 0x5c +#define QPHY_V8_PCS_USB_RXTERMINATION_DLY_SEL 0x60 +#define QPHY_V8_PCS_USB_POWER_STATE_CONFIG2 0x64 +#define QPHY_V8_PCS_USB_POWER_STATE_CONFIG3 0x68 +#define QPHY_V8_PCS_USB_POWER_STATE_CONFIG4 0x6c + +#endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v8.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v8.h new file mode 100644 index 000000000000..169fd5de7474 --- /dev/null +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v8.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef QCOM_PHY_QMP_PCS_V8_H_ +#define QCOM_PHY_QMP_PCS_V8_H_ + +/* Only for QMP V8 PHY - USB/PCIe PCS registers */ +#define QPHY_V8_PCS_SW_RESET 0x000 +#define QPHY_V8_PCS_PCS_STATUS1 0x014 +#define QPHY_V8_PCS_POWER_DOWN_CONTROL 0x040 +#define QPHY_V8_PCS_START_CONTROL 0x044 +#define QPHY_V8_PCS_POWER_STATE_CONFIG1 0x090 +#define QPHY_V8_PCS_LOCK_DETECT_CONFIG1 0x0c4 +#define QPHY_V8_PCS_LOCK_DETECT_CONFIG2 0x0c8 +#define QPHY_V8_PCS_LOCK_DETECT_CONFIG3 0x0cc +#define QPHY_V8_PCS_LOCK_DETECT_CONFIG6 0x0d8 +#define QPHY_V8_PCS_REFGEN_REQ_CONFIG1 0x0dc +#define QPHY_V8_PCS_RX_SIGDET_LVL 0x188 +#define QPHY_V8_PCS_RCVR_DTCT_DLY_P1U2_L 0x190 +#define QPHY_V8_PCS_RCVR_DTCT_DLY_P1U2_H 0x194 +#define QPHY_V8_PCS_RATE_SLEW_CNTRL1 0x198 +#define QPHY_V8_PCS_CDR_RESET_TIME 0x1b0 +#define QPHY_V8_PCS_ALIGN_DETECT_CONFIG1 0x1c0 +#define QPHY_V8_PCS_ALIGN_DETECT_CONFIG2 0x1c4 +#define QPHY_V8_PCS_PCS_TX_RX_CONFIG 0x1d0 +#define QPHY_V8_PCS_EQ_CONFIG1 0x1dc +#define QPHY_V8_PCS_EQ_CONFIG2 0x1e0 +#define QPHY_V8_PCS_EQ_CONFIG5 0x1ec + +#endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-com-v8.h b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-com-v8.h new file mode 100644 index 000000000000..d3b2292257bc --- /dev/null +++ b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-com-v8.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef QCOM_PHY_QMP_QSERDES_COM_V8_H_ +#define QCOM_PHY_QMP_QSERDES_COM_V8_H_ + +/* Only for QMP V8 PHY - QSERDES COM registers */ +#define QSERDES_V8_COM_SSC_STEP_SIZE1_MODE1 0x000 +#define QSERDES_V8_COM_SSC_STEP_SIZE2_MODE1 0x004 +#define QSERDES_V8_COM_SSC_STEP_SIZE3_MODE1 0x008 +#define QSERDES_V8_COM_CP_CTRL_MODE1 0x010 +#define QSERDES_V8_COM_PLL_RCTRL_MODE1 0x014 +#define QSERDES_V8_COM_PLL_CCTRL_MODE1 0x018 +#define QSERDES_V8_COM_CORECLK_DIV_MODE1 0x01c +#define QSERDES_V8_COM_LOCK_CMP1_MODE1 0x020 +#define QSERDES_V8_COM_LOCK_CMP2_MODE1 0x024 +#define QSERDES_V8_COM_DEC_START_MODE1 0x028 +#define QSERDES_V8_COM_DEC_START_MSB_MODE1 0x02c +#define QSERDES_V8_COM_DIV_FRAC_START1_MODE1 0x030 +#define QSERDES_V8_COM_DIV_FRAC_START2_MODE1 0x034 +#define QSERDES_V8_COM_DIV_FRAC_START3_MODE1 0x038 +#define QSERDES_V8_COM_HSCLK_SEL_1 0x03c +#define QSERDES_V8_COM_VCO_TUNE1_MODE1 0x048 +#define QSERDES_V8_COM_VCO_TUNE2_MODE1 0x04c +#define QSERDES_V8_COM_BIN_VCOCAL_CMP_CODE1_MODE1 0x050 +#define QSERDES_V8_COM_BIN_VCOCAL_CMP_CODE2_MODE1 0x054 +#define QSERDES_V8_COM_BIN_VCOCAL_CMP_CODE1_MODE0 0x058 +#define QSERDES_V8_COM_BIN_VCOCAL_CMP_CODE2_MODE0 0x05c +#define QSERDES_V8_COM_SSC_STEP_SIZE1_MODE0 0x060 +#define QSERDES_V8_COM_SSC_STEP_SIZE2_MODE0 0x064 +#define QSERDES_V8_COM_CP_CTRL_MODE0 0x070 +#define QSERDES_V8_COM_PLL_RCTRL_MODE0 0x074 +#define QSERDES_V8_COM_PLL_CCTRL_MODE0 0x078 +#define QSERDES_V8_COM_LOCK_CMP1_MODE0 0x080 +#define QSERDES_V8_COM_LOCK_CMP2_MODE0 0x084 +#define QSERDES_V8_COM_DEC_START_MODE0 0x088 +#define QSERDES_V8_COM_DEC_START_MSB_MODE0 0x08c +#define QSERDES_V8_COM_DIV_FRAC_START1_MODE0 0x090 +#define QSERDES_V8_COM_DIV_FRAC_START2_MODE0 0x094 +#define QSERDES_V8_COM_DIV_FRAC_START3_MODE0 0x098 +#define QSERDES_V8_COM_VCO_TUNE1_MODE0 0x0a8 +#define QSERDES_V8_COM_VCO_TUNE2_MODE0 0x0ac +#define QSERDES_V8_COM_BG_TIMER 0x0bc +#define QSERDES_V8_COM_SSC_EN_CENTER 0x0c0 +#define QSERDES_V8_COM_SSC_PER1 0x0cc +#define QSERDES_V8_COM_SSC_PER2 0x0d0 +#define QSERDES_V8_COM_BIAS_EN_CLKBUFLR_EN 0x0dc +#define QSERDES_V8_COM_SYSCLK_BUF_ENABLE 0x0e8 +#define QSERDES_V8_COM_SYSCLK_EN_SEL 0x110 +#define QSERDES_V8_COM_RESETSM_CNTRL 0x118 +#define QSERDES_V8_COM_LOCK_CMP_CFG 0x124 +#define QSERDES_V8_COM_VCO_TUNE_MAP 0x140 +#define QSERDES_V8_COM_CORE_CLK_EN 0x170 +#define QSERDES_V8_COM_CMN_CONFIG_1 0x174 +#define QSERDES_V8_COM_AUTO_GAIN_ADJ_CTRL_1 0x1a4 +#define QSERDES_V8_COM_AUTO_GAIN_ADJ_CTRL_2 0x1a8 +#define QSERDES_V8_COM_AUTO_GAIN_ADJ_CTRL_3 0x1ac +#define QSERDES_V8_COM_ADDITIONAL_MISC 0x1b4 +#define QSERDES_V8_COM_CMN_STATUS 0x2c8 +#define QSERDES_V8_COM_C_READY_STATUS 0x2f0 + +#endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v8.h b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v8.h new file mode 100644 index 000000000000..4cb8b1708607 --- /dev/null +++ b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v8.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef QCOM_PHY_QMP_QSERDES_TXRX_V8_H_ +#define QCOM_PHY_QMP_QSERDES_TXRX_V8_H_ + +#define QSERDES_V8_TX_TX_EMP_POST1_LVL 0x00c +#define QSERDES_V8_TX_TX_DRV_LVL 0x014 +#define QSERDES_V8_TX_RES_CODE_LANE_TX 0x034 +#define QSERDES_V8_TX_RES_CODE_LANE_RX 0x038 +#define QSERDES_V8_TX_RES_CODE_LANE_OFFSET_TX 0x03c +#define QSERDES_V8_TX_RES_CODE_LANE_OFFSET_RX 0x040 +#define QSERDES_V8_TX_TRANSCEIVER_BIAS_EN 0x054 +#define QSERDES_V8_TX_HIGHZ_DRVR_EN 0x058 +#define QSERDES_V8_TX_TX_POL_INV 0x05c +#define QSERDES_V8_TX_LANE_MODE_1 0x084 +#define QSERDES_V8_TX_LANE_MODE_2 0x088 +#define QSERDES_V8_TX_LANE_MODE_3 0x08c +#define QSERDES_V8_TX_LANE_MODE_4 0x090 +#define QSERDES_V8_TX_LANE_MODE_5 0x094 +#define QSERDES_V8_TX_RCV_DETECT_LVL_2 0x0a4 +#define QSERDES_V8_TX_PI_QEC_CTRL 0x0e4 + +#define QSERDES_V8_RX_UCDR_FO_GAIN 0x008 +#define QSERDES_V8_RX_UCDR_SO_GAIN 0x014 +#define QSERDES_V8_RX_UCDR_SVS_FO_GAIN 0x020 +#define QSERDES_V8_RX_UCDR_FASTLOCK_FO_GAIN 0x030 +#define QSERDES_V8_RX_UCDR_SO_SATURATION_AND_ENABLE 0x034 +#define QSERDES_V8_RX_UCDR_FASTLOCK_COUNT_LOW 0x03c +#define QSERDES_V8_RX_UCDR_FASTLOCK_COUNT_HIGH 0x040 +#define QSERDES_V8_RX_UCDR_PI_CONTROLS 0x044 +#define QSERDES_V8_RX_UCDR_SB2_THRESH1 0x04c +#define QSERDES_V8_RX_UCDR_SB2_THRESH2 0x050 +#define QSERDES_V8_RX_UCDR_SB2_GAIN1 0x054 +#define QSERDES_V8_RX_UCDR_SB2_GAIN2 0x058 +#define QSERDES_V8_RX_AUX_DATA_TCOARSE_TFINE 0x060 +#define QSERDES_V8_RX_VGA_CAL_CNTRL1 0x0d4 +#define QSERDES_V8_RX_VGA_CAL_CNTRL2 0x0d8 +#define QSERDES_V8_RX_GM_CAL 0x0dc +#define QSERDES_V8_RX_RX_EQU_ADAPTOR_CNTRL2 0x0ec +#define QSERDES_V8_RX_RX_EQU_ADAPTOR_CNTRL3 0x0f0 +#define QSERDES_V8_RX_RX_EQU_ADAPTOR_CNTRL4 0x0f4 +#define QSERDES_V8_RX_RX_IDAC_TSETTLE_LOW 0x0f8 +#define QSERDES_V8_RX_RX_IDAC_TSETTLE_HIGH 0x0fc +#define QSERDES_V8_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1 0x110 +#define QSERDES_V8_RX_SIGDET_ENABLES 0x118 +#define QSERDES_V8_RX_SIGDET_CNTRL 0x11c +#define QSERDES_V8_RX_SIGDET_DEGLITCH_CNTRL 0x124 +#define QSERDES_V8_RX_RX_MODE_00_LOW 0x15c +#define QSERDES_V8_RX_RX_MODE_00_HIGH 0x160 +#define QSERDES_V8_RX_RX_MODE_00_HIGH2 0x164 +#define QSERDES_V8_RX_RX_MODE_00_HIGH3 0x168 +#define QSERDES_V8_RX_RX_MODE_00_HIGH4 0x16c +#define QSERDES_V8_RX_RX_MODE_01_LOW 0x170 +#define QSERDES_V8_RX_RX_MODE_01_HIGH 0x174 +#define QSERDES_V8_RX_RX_MODE_01_HIGH2 0x178 +#define QSERDES_V8_RX_RX_MODE_01_HIGH3 0x17c +#define QSERDES_V8_RX_RX_MODE_01_HIGH4 0x180 +#define QSERDES_V8_RX_DFE_EN_TIMER 0x1a0 +#define QSERDES_V8_RX_DFE_CTLE_POST_CAL_OFFSET 0x1a4 +#define QSERDES_V8_RX_DCC_CTRL1 0x1a8 +#define QSERDES_V8_RX_VTH_CODE 0x1b0 +#define QSERDES_V8_RX_SIGDET_CAL_CTRL1 0x1e4 +#define QSERDES_V8_RX_SIGDET_CAL_TRIM 0x1f8 + +#endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.h b/drivers/phy/qualcomm/phy-qcom-qmp.h index d0f41e4aaa85..8148853ff275 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp.h @@ -30,6 +30,9 @@ #include "phy-qcom-qmp-qserdes-com-v7.h" #include "phy-qcom-qmp-qserdes-txrx-v7.h" +#include "phy-qcom-qmp-qserdes-com-v8.h" +#include "phy-qcom-qmp-qserdes-txrx-v8.h" + #include "phy-qcom-qmp-qserdes-pll.h" #include "phy-qcom-qmp-pcs-v2.h" @@ -52,6 +55,8 @@ #include "phy-qcom-qmp-pcs-v7.h" +#include "phy-qcom-qmp-pcs-v8.h" + /* QPHY_SW_RESET bit */ #define SW_RESET BIT(0) /* QPHY_POWER_DOWN_CONTROL */ From b0d8d731b4b0fc83bb4826a2c805f4c877c98cc1 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Tue, 27 May 2025 14:04:41 -0700 Subject: [PATCH 0158/2411] phy: qcom: Update description for QCOM based eUSB2 repeater The eUSB2 repeater that exists in the QCOM PMICs are utilized for several different eUSB2 PHY vendors, such as M31 or Synopsys. Hence, the wording needs to be updated to remove associations to a specific vendor. Reviewed-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Signed-off-by: Wesley Cheng Signed-off-by: Melody Olvera Link: https://lore.kernel.org/r/20250527-sm8750_usb_master-v6-5-d58de3b41d34@oss.qualcomm.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/phy/qualcomm/Kconfig b/drivers/phy/qualcomm/Kconfig index ef14f4e33973..85581b40e598 100644 --- a/drivers/phy/qualcomm/Kconfig +++ b/drivers/phy/qualcomm/Kconfig @@ -126,12 +126,12 @@ config PHY_QCOM_QUSB2 USB IPs on MSM SOCs. config PHY_QCOM_EUSB2_REPEATER - tristate "Qualcomm SNPS eUSB2 Repeater Driver" + tristate "Qualcomm PMIC eUSB2 Repeater Driver" depends on OF && (ARCH_QCOM || COMPILE_TEST) select GENERIC_PHY help - Enable support for the USB high-speed SNPS eUSB2 repeater on Qualcomm - PMICs. The repeater is paired with a Synopsys eUSB2 Phy + Enable support for the USB high-speed eUSB2 repeater on Qualcomm + PMICs. The repeater is paired with a Synopsys or M31 eUSB2 Phy on Qualcomm SOCs. config PHY_QCOM_M31_USB From 9c8504861cc4102463f31fe1f5e120a6deb15c15 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Tue, 27 May 2025 14:04:42 -0700 Subject: [PATCH 0159/2411] phy: qcom: Add M31 based eUSB2 PHY driver SM8750 utilizes an eUSB2 PHY from M31. Add the initialization sequences to bring it out of reset and into an operational state. This differs to the M31 USB driver, in that the M31 eUSB2 driver will require a connection to an eUSB2 repeater. This PHY driver will handle the initialization of the associated eUSB2 repeater when required. Reviewed-by: Dmitry Baryshkov Signed-off-by: Wesley Cheng Signed-off-by: Melody Olvera Link: https://lore.kernel.org/r/20250527-sm8750_usb_master-v6-6-d58de3b41d34@oss.qualcomm.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/Kconfig | 10 + drivers/phy/qualcomm/Makefile | 1 + drivers/phy/qualcomm/phy-qcom-m31-eusb2.c | 325 ++++++++++++++++++++++ 3 files changed, 336 insertions(+) create mode 100644 drivers/phy/qualcomm/phy-qcom-m31-eusb2.c diff --git a/drivers/phy/qualcomm/Kconfig b/drivers/phy/qualcomm/Kconfig index 85581b40e598..60a0ead127fa 100644 --- a/drivers/phy/qualcomm/Kconfig +++ b/drivers/phy/qualcomm/Kconfig @@ -158,6 +158,16 @@ config PHY_QCOM_UNIPHY_PCIE_28LP handles PHY initialization, clock management required after resetting the hardware and power management. +config PHY_QCOM_M31_EUSB + tristate "Qualcomm M31 eUSB2 PHY driver support" + depends on USB && (ARCH_QCOM || COMPILE_TEST) + select GENERIC_PHY + help + Enable this to support M31 EUSB2 PHY transceivers on Qualcomm + chips with DWC3 USB core. It supports initializing and cleaning + up of the associated USB repeater that is paired with the eUSB2 + PHY. + config PHY_QCOM_USB_HS tristate "Qualcomm USB HS PHY module" depends on USB_ULPI_BUS diff --git a/drivers/phy/qualcomm/Makefile b/drivers/phy/qualcomm/Makefile index 3851e28a212d..b71a6a0bed3f 100644 --- a/drivers/phy/qualcomm/Makefile +++ b/drivers/phy/qualcomm/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_PHY_QCOM_EDP) += phy-qcom-edp.o obj-$(CONFIG_PHY_QCOM_IPQ4019_USB) += phy-qcom-ipq4019-usb.o obj-$(CONFIG_PHY_QCOM_IPQ806X_SATA) += phy-qcom-ipq806x-sata.o obj-$(CONFIG_PHY_QCOM_M31_USB) += phy-qcom-m31.o +obj-$(CONFIG_PHY_QCOM_M31_EUSB) += phy-qcom-m31-eusb2.o obj-$(CONFIG_PHY_QCOM_PCIE2) += phy-qcom-pcie2.o obj-$(CONFIG_PHY_QCOM_QMP_COMBO) += phy-qcom-qmp-combo.o phy-qcom-qmp-usbc.o diff --git a/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c b/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c new file mode 100644 index 000000000000..9f02b8a78f6e --- /dev/null +++ b/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define USB_PHY_UTMI_CTRL0 (0x3c) +#define SLEEPM BIT(0) + +#define USB_PHY_UTMI_CTRL5 (0x50) +#define POR BIT(1) + +#define USB_PHY_HS_PHY_CTRL_COMMON0 (0x54) +#define SIDDQ_SEL BIT(1) +#define SIDDQ BIT(2) +#define FSEL GENMASK(6, 4) +#define FSEL_38_4_MHZ_VAL (0x6) + +#define USB_PHY_HS_PHY_CTRL2 (0x64) +#define USB2_SUSPEND_N BIT(2) +#define USB2_SUSPEND_N_SEL BIT(3) + +#define USB_PHY_CFG0 (0x94) +#define UTMI_PHY_CMN_CTRL_OVERRIDE_EN BIT(1) + +#define USB_PHY_CFG1 (0x154) +#define PLL_EN BIT(0) + +#define USB_PHY_FSEL_SEL (0xb8) +#define FSEL_SEL BIT(0) + +#define USB_PHY_XCFGI_39_32 (0x16c) +#define HSTX_PE GENMASK(3, 2) + +#define USB_PHY_XCFGI_71_64 (0x17c) +#define HSTX_SWING GENMASK(3, 0) + +#define USB_PHY_XCFGI_31_24 (0x168) +#define HSTX_SLEW GENMASK(2, 0) + +#define USB_PHY_XCFGI_7_0 (0x15c) +#define PLL_LOCK_TIME GENMASK(1, 0) + +#define M31_EUSB_PHY_INIT_CFG(o, b, v) \ +{ \ + .off = o, \ + .mask = b, \ + .val = v, \ +} + +struct m31_phy_tbl_entry { + u32 off; + u32 mask; + u32 val; +}; + +struct m31_eusb2_priv_data { + const struct m31_phy_tbl_entry *setup_seq; + unsigned int setup_seq_nregs; + const struct m31_phy_tbl_entry *override_seq; + unsigned int override_seq_nregs; + const struct m31_phy_tbl_entry *reset_seq; + unsigned int reset_seq_nregs; + unsigned int fsel; +}; + +static const struct m31_phy_tbl_entry m31_eusb2_setup_tbl[] = { + M31_EUSB_PHY_INIT_CFG(USB_PHY_CFG0, UTMI_PHY_CMN_CTRL_OVERRIDE_EN, 1), + M31_EUSB_PHY_INIT_CFG(USB_PHY_UTMI_CTRL5, POR, 1), + M31_EUSB_PHY_INIT_CFG(USB_PHY_CFG1, PLL_EN, 1), + M31_EUSB_PHY_INIT_CFG(USB_PHY_FSEL_SEL, FSEL_SEL, 1), +}; + +static const struct m31_phy_tbl_entry m31_eusb_phy_override_tbl[] = { + M31_EUSB_PHY_INIT_CFG(USB_PHY_XCFGI_39_32, HSTX_PE, 0), + M31_EUSB_PHY_INIT_CFG(USB_PHY_XCFGI_71_64, HSTX_SWING, 7), + M31_EUSB_PHY_INIT_CFG(USB_PHY_XCFGI_31_24, HSTX_SLEW, 0), + M31_EUSB_PHY_INIT_CFG(USB_PHY_XCFGI_7_0, PLL_LOCK_TIME, 0), +}; + +static const struct m31_phy_tbl_entry m31_eusb_phy_reset_tbl[] = { + M31_EUSB_PHY_INIT_CFG(USB_PHY_HS_PHY_CTRL2, USB2_SUSPEND_N_SEL, 1), + M31_EUSB_PHY_INIT_CFG(USB_PHY_HS_PHY_CTRL2, USB2_SUSPEND_N, 1), + M31_EUSB_PHY_INIT_CFG(USB_PHY_UTMI_CTRL0, SLEEPM, 1), + M31_EUSB_PHY_INIT_CFG(USB_PHY_HS_PHY_CTRL_COMMON0, SIDDQ_SEL, 1), + M31_EUSB_PHY_INIT_CFG(USB_PHY_HS_PHY_CTRL_COMMON0, SIDDQ, 0), + M31_EUSB_PHY_INIT_CFG(USB_PHY_UTMI_CTRL5, POR, 0), + M31_EUSB_PHY_INIT_CFG(USB_PHY_HS_PHY_CTRL2, USB2_SUSPEND_N_SEL, 0), + M31_EUSB_PHY_INIT_CFG(USB_PHY_CFG0, UTMI_PHY_CMN_CTRL_OVERRIDE_EN, 0), +}; + +static const struct regulator_bulk_data m31_eusb_phy_vregs[] = { + { .supply = "vdd" }, + { .supply = "vdda12" }, +}; + +#define M31_EUSB_NUM_VREGS ARRAY_SIZE(m31_eusb_phy_vregs) + +struct m31eusb2_phy { + struct phy *phy; + void __iomem *base; + const struct m31_eusb2_priv_data *data; + enum phy_mode mode; + + struct regulator_bulk_data *vregs; + struct clk *clk; + struct reset_control *reset; + + struct phy *repeater; +}; + +static int m31eusb2_phy_write_readback(void __iomem *base, u32 offset, + const u32 mask, u32 val) +{ + u32 write_val; + u32 tmp; + + tmp = readl(base + offset); + tmp &= ~mask; + write_val = tmp | val; + + writel(write_val, base + offset); + + tmp = readl(base + offset); + tmp &= mask; + + if (tmp != val) { + pr_err("write: %x to offset: %x FAILED\n", val, offset); + return -EINVAL; + } + + return 0; +} + +static int m31eusb2_phy_write_sequence(struct m31eusb2_phy *phy, + const struct m31_phy_tbl_entry *tbl, + int num) +{ + int i; + int ret; + + for (i = 0 ; i < num; i++, tbl++) { + dev_dbg(&phy->phy->dev, "Offset:%x BitMask:%x Value:%x", + tbl->off, tbl->mask, tbl->val); + + ret = m31eusb2_phy_write_readback(phy->base, + tbl->off, tbl->mask, + tbl->val << __ffs(tbl->mask)); + if (ret < 0) + return ret; + } + + return 0; +} + +static int m31eusb2_phy_set_mode(struct phy *uphy, enum phy_mode mode, int submode) +{ + struct m31eusb2_phy *phy = phy_get_drvdata(uphy); + + phy->mode = mode; + + return phy_set_mode_ext(phy->repeater, mode, submode); +} + +static int m31eusb2_phy_init(struct phy *uphy) +{ + struct m31eusb2_phy *phy = phy_get_drvdata(uphy); + const struct m31_eusb2_priv_data *data = phy->data; + int ret; + + ret = regulator_bulk_enable(M31_EUSB_NUM_VREGS, phy->vregs); + if (ret) { + dev_err(&uphy->dev, "failed to enable regulator, %d\n", ret); + return ret; + } + + ret = phy_init(phy->repeater); + if (ret) { + dev_err(&uphy->dev, "repeater init failed. %d\n", ret); + goto disable_vreg; + } + + ret = clk_prepare_enable(phy->clk); + if (ret) { + dev_err(&uphy->dev, "failed to enable cfg ahb clock, %d\n", ret); + goto disable_repeater; + } + + /* Perform phy reset */ + reset_control_assert(phy->reset); + udelay(5); + reset_control_deassert(phy->reset); + + m31eusb2_phy_write_sequence(phy, data->setup_seq, data->setup_seq_nregs); + m31eusb2_phy_write_readback(phy->base, + USB_PHY_HS_PHY_CTRL_COMMON0, FSEL, + FIELD_PREP(FSEL, data->fsel)); + m31eusb2_phy_write_sequence(phy, data->override_seq, data->override_seq_nregs); + m31eusb2_phy_write_sequence(phy, data->reset_seq, data->reset_seq_nregs); + + return 0; + +disable_repeater: + phy_exit(phy->repeater); +disable_vreg: + regulator_bulk_disable(M31_EUSB_NUM_VREGS, phy->vregs); + + return 0; +} + +static int m31eusb2_phy_exit(struct phy *uphy) +{ + struct m31eusb2_phy *phy = phy_get_drvdata(uphy); + + clk_disable_unprepare(phy->clk); + regulator_bulk_disable(M31_EUSB_NUM_VREGS, phy->vregs); + phy_exit(phy->repeater); + + return 0; +} + +static const struct phy_ops m31eusb2_phy_gen_ops = { + .init = m31eusb2_phy_init, + .exit = m31eusb2_phy_exit, + .set_mode = m31eusb2_phy_set_mode, + .owner = THIS_MODULE, +}; + +static int m31eusb2_phy_probe(struct platform_device *pdev) +{ + struct phy_provider *phy_provider; + const struct m31_eusb2_priv_data *data; + struct device *dev = &pdev->dev; + struct m31eusb2_phy *phy; + int ret; + + phy = devm_kzalloc(dev, sizeof(*phy), GFP_KERNEL); + if (!phy) + return -ENOMEM; + + data = device_get_match_data(dev); + if (IS_ERR(data)) + return -EINVAL; + phy->data = data; + + phy->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(phy->base)) + return PTR_ERR(phy->base); + + phy->reset = devm_reset_control_get_exclusive(dev, NULL); + if (IS_ERR(phy->reset)) + return PTR_ERR(phy->reset); + + phy->clk = devm_clk_get(dev, NULL); + if (IS_ERR(phy->clk)) + return dev_err_probe(dev, PTR_ERR(phy->clk), + "failed to get clk\n"); + + phy->phy = devm_phy_create(dev, NULL, &m31eusb2_phy_gen_ops); + if (IS_ERR(phy->phy)) + return dev_err_probe(dev, PTR_ERR(phy->phy), + "failed to create phy\n"); + + ret = devm_regulator_bulk_get_const(dev, M31_EUSB_NUM_VREGS, + m31_eusb_phy_vregs, &phy->vregs); + if (ret) + return dev_err_probe(dev, ret, + "failed to get regulator supplies\n"); + + phy_set_drvdata(phy->phy, phy); + + phy->repeater = devm_of_phy_get_by_index(dev, dev->of_node, 0); + if (IS_ERR(phy->repeater)) + return dev_err_probe(dev, PTR_ERR(phy->repeater), + "failed to get repeater\n"); + + phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); + if (!IS_ERR(phy_provider)) + dev_info(dev, "Registered M31 USB phy\n"); + + return PTR_ERR_OR_ZERO(phy_provider); +} + +static const struct m31_eusb2_priv_data m31_eusb_v1_data = { + .setup_seq = m31_eusb2_setup_tbl, + .setup_seq_nregs = ARRAY_SIZE(m31_eusb2_setup_tbl), + .override_seq = m31_eusb_phy_override_tbl, + .override_seq_nregs = ARRAY_SIZE(m31_eusb_phy_override_tbl), + .reset_seq = m31_eusb_phy_reset_tbl, + .reset_seq_nregs = ARRAY_SIZE(m31_eusb_phy_reset_tbl), + .fsel = FSEL_38_4_MHZ_VAL, +}; + +static const struct of_device_id m31eusb2_phy_id_table[] = { + { .compatible = "qcom,sm8750-m31-eusb2-phy", .data = &m31_eusb_v1_data }, + { }, +}; +MODULE_DEVICE_TABLE(of, m31eusb2_phy_id_table); + +static struct platform_driver m31eusb2_phy_driver = { + .probe = m31eusb2_phy_probe, + .driver = { + .name = "qcom-m31eusb2-phy", + .of_match_table = m31eusb2_phy_id_table, + }, +}; + +module_platform_driver(m31eusb2_phy_driver); + +MODULE_AUTHOR("Wesley Cheng "); +MODULE_DESCRIPTION("eUSB2 Qualcomm M31 HSPHY driver"); +MODULE_LICENSE("GPL"); From 641fa5b515a6900b1452cc92d30d1ab391e04414 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 23 May 2025 10:48:33 +0200 Subject: [PATCH 0160/2411] phy: phy-snps-eusb2: fix clock imbalance on phy_exit() Make sure to disable all clocks enabled at phy_init() also on phy_exit(). Fixes: c4098f3e6134 ("phy: phy-snps-eusb2: add support for exynos2200") Cc: Ivaylo Ivanov Signed-off-by: Johan Hovold Reviewed-by: Neil Armstrong Acked-by: Ivaylo Ivanov Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20250523084839.11015-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/phy-snps-eusb2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/phy-snps-eusb2.c b/drivers/phy/phy-snps-eusb2.c index b73a1d7e57b3..19af3f99692c 100644 --- a/drivers/phy/phy-snps-eusb2.c +++ b/drivers/phy/phy-snps-eusb2.c @@ -504,7 +504,7 @@ static int snps_eusb2_hsphy_exit(struct phy *p) { struct snps_eusb2_hsphy *phy = phy_get_drvdata(p); - clk_disable_unprepare(phy->ref_clk); + clk_bulk_disable_unprepare(phy->data->num_clks, phy->clks); regulator_bulk_disable(ARRAY_SIZE(phy->vregs), phy->vregs); From 3232a6b0d834569f71aa898401288af6b4ab781d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 23 May 2025 10:48:34 +0200 Subject: [PATCH 0161/2411] phy: phy-snps-eusb2: fix repeater imbalance on phy_init() failure Make sure to disable the repeater PHY also on phy_init() failure. Fixes: 3584f6392f09 ("phy: qcom: phy-qcom-snps-eusb2: Add support for eUSB2 repeater") Cc: Abel Vesa Cc: Neil Armstrong Signed-off-by: Johan Hovold Reviewed-by: Neil Armstrong Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20250523084839.11015-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/phy-snps-eusb2.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/phy/phy-snps-eusb2.c b/drivers/phy/phy-snps-eusb2.c index 19af3f99692c..a799c3d2bcfb 100644 --- a/drivers/phy/phy-snps-eusb2.c +++ b/drivers/phy/phy-snps-eusb2.c @@ -468,7 +468,7 @@ static int snps_eusb2_hsphy_init(struct phy *p) ret = clk_bulk_prepare_enable(phy->data->num_clks, phy->clks); if (ret) { dev_err(&p->dev, "failed to enable ref clock, %d\n", ret); - goto disable_vreg; + goto exit_repeater; } ret = reset_control_assert(phy->phy_reset); @@ -493,7 +493,8 @@ static int snps_eusb2_hsphy_init(struct phy *p) disable_ref_clk: clk_bulk_disable_unprepare(phy->data->num_clks, phy->clks); - +exit_repeater: + phy_exit(phy->repeater); disable_vreg: regulator_bulk_disable(ARRAY_SIZE(phy->vregs), phy->vregs); From 4f333990841e06059c3cd7251791017d4c9e9028 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 23 May 2025 10:48:35 +0200 Subject: [PATCH 0162/2411] phy: phy-snps-eusb2: rename phy_init() clock error label Rename the clock error label which is now used to disable all clocks and not just the ref clock on phy_init() errors. Signed-off-by: Johan Hovold Reviewed-by: Neil Armstrong Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20250523084839.11015-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/phy-snps-eusb2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/phy/phy-snps-eusb2.c b/drivers/phy/phy-snps-eusb2.c index a799c3d2bcfb..c8f14f8fb9ac 100644 --- a/drivers/phy/phy-snps-eusb2.c +++ b/drivers/phy/phy-snps-eusb2.c @@ -474,7 +474,7 @@ static int snps_eusb2_hsphy_init(struct phy *p) ret = reset_control_assert(phy->phy_reset); if (ret) { dev_err(&p->dev, "failed to assert phy_reset, %d\n", ret); - goto disable_ref_clk; + goto disable_clks; } usleep_range(100, 150); @@ -482,16 +482,16 @@ static int snps_eusb2_hsphy_init(struct phy *p) ret = reset_control_deassert(phy->phy_reset); if (ret) { dev_err(&p->dev, "failed to de-assert phy_reset, %d\n", ret); - goto disable_ref_clk; + goto disable_clks; } ret = phy->data->phy_init(p); if (ret) - goto disable_ref_clk; + goto disable_clks; return 0; -disable_ref_clk: +disable_clks: clk_bulk_disable_unprepare(phy->data->num_clks, phy->clks); exit_repeater: phy_exit(phy->repeater); From b7996f8e9473cf8a594af1fa1bb799f8f28c0670 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 23 May 2025 10:48:36 +0200 Subject: [PATCH 0163/2411] phy: phy-snps-eusb2: clean up error messages Clean up the error messages by using a consistent format with a colon followed by a space and an errno (or unsupported frequency). Signed-off-by: Johan Hovold Reviewed-by: Neil Armstrong Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20250523084839.11015-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/phy-snps-eusb2.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/phy/phy-snps-eusb2.c b/drivers/phy/phy-snps-eusb2.c index c8f14f8fb9ac..232c5e8a554f 100644 --- a/drivers/phy/phy-snps-eusb2.c +++ b/drivers/phy/phy-snps-eusb2.c @@ -256,7 +256,7 @@ static int exynos_eusb2_ref_clk_init(struct snps_eusb2_hsphy *phy) } if (!config) { - dev_err(&phy->phy->dev, "unsupported ref_clk_freq:%lu\n", ref_clk_freq); + dev_err(&phy->phy->dev, "unsupported ref_clk_freq: %lu\n", ref_clk_freq); return -EINVAL; } @@ -293,7 +293,7 @@ static int qcom_eusb2_ref_clk_init(struct snps_eusb2_hsphy *phy) } if (!config) { - dev_err(&phy->phy->dev, "unsupported ref_clk_freq:%lu\n", ref_clk_freq); + dev_err(&phy->phy->dev, "unsupported ref_clk_freq: %lu\n", ref_clk_freq); return -EINVAL; } @@ -461,19 +461,19 @@ static int snps_eusb2_hsphy_init(struct phy *p) ret = phy_init(phy->repeater); if (ret) { - dev_err(&p->dev, "repeater init failed. %d\n", ret); + dev_err(&p->dev, "repeater init failed: %d\n", ret); goto disable_vreg; } ret = clk_bulk_prepare_enable(phy->data->num_clks, phy->clks); if (ret) { - dev_err(&p->dev, "failed to enable ref clock, %d\n", ret); + dev_err(&p->dev, "failed to enable ref clock: %d\n", ret); goto exit_repeater; } ret = reset_control_assert(phy->phy_reset); if (ret) { - dev_err(&p->dev, "failed to assert phy_reset, %d\n", ret); + dev_err(&p->dev, "failed to assert phy_reset: %d\n", ret); goto disable_clks; } @@ -481,7 +481,7 @@ static int snps_eusb2_hsphy_init(struct phy *p) ret = reset_control_deassert(phy->phy_reset); if (ret) { - dev_err(&p->dev, "failed to de-assert phy_reset, %d\n", ret); + dev_err(&p->dev, "failed to de-assert phy_reset: %d\n", ret); goto disable_clks; } @@ -588,7 +588,7 @@ static int snps_eusb2_hsphy_probe(struct platform_device *pdev) generic_phy = devm_phy_create(dev, NULL, &snps_eusb2_hsphy_ops); if (IS_ERR(generic_phy)) { - dev_err(dev, "failed to create phy %d\n", ret); + dev_err(dev, "failed to create phy: %d\n", ret); return PTR_ERR(generic_phy); } From f21b9bea6bc29de88b885cecd5e4f0ada60d4700 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 23 May 2025 10:48:37 +0200 Subject: [PATCH 0164/2411] phy: phy-snps-eusb2: fix optional phy lookup parameter The devm_of_phy_optional_get() takes an optional name argument as its third parameter and not an index like the recently replaced devm_of_phy_get_by_index(). Replace 0 with an explicit NULL for consistency and readability. Signed-off-by: Johan Hovold Reviewed-by: Neil Armstrong Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20250523084839.11015-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/phy-snps-eusb2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/phy-snps-eusb2.c b/drivers/phy/phy-snps-eusb2.c index 232c5e8a554f..328e67ebfe03 100644 --- a/drivers/phy/phy-snps-eusb2.c +++ b/drivers/phy/phy-snps-eusb2.c @@ -581,7 +581,7 @@ static int snps_eusb2_hsphy_probe(struct platform_device *pdev) return dev_err_probe(dev, ret, "failed to get regulator supplies\n"); - phy->repeater = devm_of_phy_optional_get(dev, np, 0); + phy->repeater = devm_of_phy_optional_get(dev, np, NULL); if (IS_ERR(phy->repeater)) return dev_err_probe(dev, PTR_ERR(phy->repeater), "failed to get repeater\n"); From d2d0ae723ba3fca2c54dfbc758b368d3009e79a7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 23 May 2025 10:48:38 +0200 Subject: [PATCH 0165/2411] phy: phy-snps-eusb2: drop unnecessary loop index declarations There is already a loop index variable declared at function scope so drop the unnecessary overloaded loop declarations. Signed-off-by: Johan Hovold Reviewed-by: Neil Armstrong Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20250523084839.11015-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/phy-snps-eusb2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/phy-snps-eusb2.c b/drivers/phy/phy-snps-eusb2.c index 328e67ebfe03..c67712bd8bba 100644 --- a/drivers/phy/phy-snps-eusb2.c +++ b/drivers/phy/phy-snps-eusb2.c @@ -552,7 +552,7 @@ static int snps_eusb2_hsphy_probe(struct platform_device *pdev) if (!phy->clks) return -ENOMEM; - for (int i = 0; i < phy->data->num_clks; ++i) + for (i = 0; i < phy->data->num_clks; ++i) phy->clks[i].id = phy->data->clk_names[i]; ret = devm_clk_bulk_get(dev, phy->data->num_clks, phy->clks); @@ -561,7 +561,7 @@ static int snps_eusb2_hsphy_probe(struct platform_device *pdev) "failed to get phy clock(s)\n"); phy->ref_clk = NULL; - for (int i = 0; i < phy->data->num_clks; ++i) { + for (i = 0; i < phy->data->num_clks; ++i) { if (!strcmp(phy->clks[i].id, "ref")) { phy->ref_clk = phy->clks[i].clk; break; From 47311eaa0a3be575f7835d99e3767f5ee5940b45 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 23 May 2025 10:48:39 +0200 Subject: [PATCH 0166/2411] phy: phy-snps-eusb2: clean up id table sentinel Use a more common format for the id table sentinel for consistency and symmetry. Signed-off-by: Johan Hovold Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20250523084839.11015-8-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/phy-snps-eusb2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/phy/phy-snps-eusb2.c b/drivers/phy/phy-snps-eusb2.c index c67712bd8bba..cf62f2221366 100644 --- a/drivers/phy/phy-snps-eusb2.c +++ b/drivers/phy/phy-snps-eusb2.c @@ -611,7 +611,9 @@ static const struct of_device_id snps_eusb2_hsphy_of_match_table[] = { }, { .compatible = "samsung,exynos2200-eusb2-phy", .data = &exynos2200_snps_eusb2_phy, - }, { }, + }, { + /* sentinel */ + } }; MODULE_DEVICE_TABLE(of, snps_eusb2_hsphy_of_match_table); From 1b7bbd5975279a1cf8d907fbc719f350031194c2 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 2 May 2025 09:45:24 +0900 Subject: [PATCH 0167/2411] rust: time: Avoid 64-bit integer division on 32-bit architectures Avoid 64-bit integer division that 32-bit architectures don't implement generally. This uses ktime_to_us() and ktime_to_ms() instead. The time abstraction needs i64 / u32 division so C's div_s64() can be used but ktime_to_us() and ktime_to_ms() provide a simpler solution for this time abstraction problem on 32-bit architectures. 32-bit ARM is the only 32-bit architecture currently supported by Rust. Using the cfg attribute, only 32-bit architectures will call ktime_to_us() and ktime_to_ms(), while the other 64-bit architectures will continue to use the current code as-is to avoid the overhead. One downside of calling the C's functions is that the as_micros/millis methods can no longer be const fn. We stick with the simpler approach unless there's a compelling need for a const fn. Suggested-by: Arnd Bergmann Suggested-by: Boqun Feng Signed-off-by: FUJITA Tomonori Reviewed-by: Andreas Hindborg Link: https://lore.kernel.org/r/20250502004524.230553-1-fujita.tomonori@gmail.com Signed-off-by: Andreas Hindborg --- rust/helpers/helpers.c | 1 + rust/helpers/time.c | 13 +++++++++++++ rust/kernel/time.rs | 26 ++++++++++++++++++++++---- 3 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 rust/helpers/time.c diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index 16fa9bca5949..1a05bb0dd420 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -40,6 +40,7 @@ #include "spinlock.c" #include "sync.c" #include "task.c" +#include "time.c" #include "uaccess.c" #include "vmalloc.c" #include "wait.c" diff --git a/rust/helpers/time.c b/rust/helpers/time.c new file mode 100644 index 000000000000..3d31473bce08 --- /dev/null +++ b/rust/helpers/time.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +s64 rust_helper_ktime_to_us(const ktime_t kt) +{ + return ktime_to_us(kt); +} + +s64 rust_helper_ktime_to_ms(const ktime_t kt) +{ + return ktime_to_ms(kt); +} diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs index a8089a98da9e..b0a8f3c0ba49 100644 --- a/rust/kernel/time.rs +++ b/rust/kernel/time.rs @@ -228,13 +228,31 @@ pub const fn as_nanos(self) -> i64 { /// Return the smallest number of microseconds greater than or equal /// to the value in the [`Delta`]. #[inline] - pub const fn as_micros_ceil(self) -> i64 { - self.as_nanos().saturating_add(NSEC_PER_USEC - 1) / NSEC_PER_USEC + pub fn as_micros_ceil(self) -> i64 { + #[cfg(CONFIG_64BIT)] + { + self.as_nanos().saturating_add(NSEC_PER_USEC - 1) / NSEC_PER_USEC + } + + #[cfg(not(CONFIG_64BIT))] + // SAFETY: It is always safe to call `ktime_to_us()` with any value. + unsafe { + bindings::ktime_to_us(self.as_nanos().saturating_add(NSEC_PER_USEC - 1)) + } } /// Return the number of milliseconds in the [`Delta`]. #[inline] - pub const fn as_millis(self) -> i64 { - self.as_nanos() / NSEC_PER_MSEC + pub fn as_millis(self) -> i64 { + #[cfg(CONFIG_64BIT)] + { + self.as_nanos() / NSEC_PER_MSEC + } + + #[cfg(not(CONFIG_64BIT))] + // SAFETY: It is always safe to call `ktime_to_ms()` with any value. + unsafe { + bindings::ktime_to_ms(self.as_nanos()) + } } } From 1664a671be46a0b0daf5250eb124d94a5501a64c Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Jun 2025 18:32:54 +0900 Subject: [PATCH 0168/2411] rust: time: Replace ClockId enum with ClockSource trait Replace the ClockId enum with a trait-based abstraction called ClockSource. This change enables expressing clock sources as types and leveraging the Rust type system to enforce clock correctness at compile time. This also sets the stage for future generic abstractions over Instant types such as Instant. Reviewed-by: Andreas Hindborg Reviewed-by: Boqun Feng Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250610093258.3435874-2-fujita.tomonori@gmail.com Signed-off-by: Andreas Hindborg --- rust/kernel/time.rs | 147 ++++++++++++++++++++---------------- rust/kernel/time/hrtimer.rs | 6 +- 2 files changed, 84 insertions(+), 69 deletions(-) diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs index b0a8f3c0ba49..1d2600288ed1 100644 --- a/rust/kernel/time.rs +++ b/rust/kernel/time.rs @@ -49,6 +49,87 @@ pub fn msecs_to_jiffies(msecs: Msecs) -> Jiffies { unsafe { bindings::__msecs_to_jiffies(msecs) } } +/// Trait for clock sources. +/// +/// Selection of the clock source depends on the use case. In some cases the usage of a +/// particular clock is mandatory, e.g. in network protocols, filesystems. In other +/// cases the user of the clock has to decide which clock is best suited for the +/// purpose. In most scenarios clock [`Monotonic`] is the best choice as it +/// provides a accurate monotonic notion of time (leap second smearing ignored). +pub trait ClockSource { + /// The kernel clock ID associated with this clock source. + /// + /// This constant corresponds to the C side `clockid_t` value. + const ID: bindings::clockid_t; +} + +/// A monotonically increasing clock. +/// +/// A nonsettable system-wide clock that represents monotonic time since as +/// described by POSIX, "some unspecified point in the past". On Linux, that +/// point corresponds to the number of seconds that the system has been +/// running since it was booted. +/// +/// The CLOCK_MONOTONIC clock is not affected by discontinuous jumps in the +/// CLOCK_REAL (e.g., if the system administrator manually changes the +/// clock), but is affected by frequency adjustments. This clock does not +/// count time that the system is suspended. +pub struct Monotonic; + +impl ClockSource for Monotonic { + const ID: bindings::clockid_t = bindings::CLOCK_MONOTONIC as bindings::clockid_t; +} + +/// A settable system-wide clock that measures real (i.e., wall-clock) time. +/// +/// Setting this clock requires appropriate privileges. This clock is +/// affected by discontinuous jumps in the system time (e.g., if the system +/// administrator manually changes the clock), and by frequency adjustments +/// performed by NTP and similar applications via adjtime(3), adjtimex(2), +/// clock_adjtime(2), and ntp_adjtime(3). This clock normally counts the +/// number of seconds since 1970-01-01 00:00:00 Coordinated Universal Time +/// (UTC) except that it ignores leap seconds; near a leap second it may be +/// adjusted by leap second smearing to stay roughly in sync with UTC. Leap +/// second smearing applies frequency adjustments to the clock to speed up +/// or slow down the clock to account for the leap second without +/// discontinuities in the clock. If leap second smearing is not applied, +/// the clock will experience discontinuity around leap second adjustment. +pub struct RealTime; + +impl ClockSource for RealTime { + const ID: bindings::clockid_t = bindings::CLOCK_REALTIME as bindings::clockid_t; +} + +/// A monotonic that ticks while system is suspended. +/// +/// A nonsettable system-wide clock that is identical to CLOCK_MONOTONIC, +/// except that it also includes any time that the system is suspended. This +/// allows applications to get a suspend-aware monotonic clock without +/// having to deal with the complications of CLOCK_REALTIME, which may have +/// discontinuities if the time is changed using settimeofday(2) or similar. +pub struct BootTime; + +impl ClockSource for BootTime { + const ID: bindings::clockid_t = bindings::CLOCK_BOOTTIME as bindings::clockid_t; +} + +/// International Atomic Time. +/// +/// A system-wide clock derived from wall-clock time but counting leap seconds. +/// +/// This clock is coupled to CLOCK_REALTIME and will be set when CLOCK_REALTIME is +/// set, or when the offset to CLOCK_REALTIME is changed via adjtimex(2). This +/// usually happens during boot and **should** not happen during normal operations. +/// However, if NTP or another application adjusts CLOCK_REALTIME by leap second +/// smearing, this clock will not be precise during leap second smearing. +/// +/// The acronym TAI refers to International Atomic Time. +pub struct Tai; + +impl ClockSource for Tai { + const ID: bindings::clockid_t = bindings::CLOCK_TAI as bindings::clockid_t; +} + /// A specific point in time. /// /// # Invariants @@ -91,72 +172,6 @@ fn sub(self, other: Instant) -> Delta { } } -/// An identifier for a clock. Used when specifying clock sources. -/// -/// -/// Selection of the clock depends on the use case. In some cases the usage of a -/// particular clock is mandatory, e.g. in network protocols, filesystems.In other -/// cases the user of the clock has to decide which clock is best suited for the -/// purpose. In most scenarios clock [`ClockId::Monotonic`] is the best choice as it -/// provides a accurate monotonic notion of time (leap second smearing ignored). -#[derive(Clone, Copy, PartialEq, Eq, Debug)] -#[repr(u32)] -pub enum ClockId { - /// A settable system-wide clock that measures real (i.e., wall-clock) time. - /// - /// Setting this clock requires appropriate privileges. This clock is - /// affected by discontinuous jumps in the system time (e.g., if the system - /// administrator manually changes the clock), and by frequency adjustments - /// performed by NTP and similar applications via adjtime(3), adjtimex(2), - /// clock_adjtime(2), and ntp_adjtime(3). This clock normally counts the - /// number of seconds since 1970-01-01 00:00:00 Coordinated Universal Time - /// (UTC) except that it ignores leap seconds; near a leap second it may be - /// adjusted by leap second smearing to stay roughly in sync with UTC. Leap - /// second smearing applies frequency adjustments to the clock to speed up - /// or slow down the clock to account for the leap second without - /// discontinuities in the clock. If leap second smearing is not applied, - /// the clock will experience discontinuity around leap second adjustment. - RealTime = bindings::CLOCK_REALTIME, - /// A monotonically increasing clock. - /// - /// A nonsettable system-wide clock that represents monotonic time since—as - /// described by POSIX—"some unspecified point in the past". On Linux, that - /// point corresponds to the number of seconds that the system has been - /// running since it was booted. - /// - /// The CLOCK_MONOTONIC clock is not affected by discontinuous jumps in the - /// CLOCK_REAL (e.g., if the system administrator manually changes the - /// clock), but is affected by frequency adjustments. This clock does not - /// count time that the system is suspended. - Monotonic = bindings::CLOCK_MONOTONIC, - /// A monotonic that ticks while system is suspended. - /// - /// A nonsettable system-wide clock that is identical to CLOCK_MONOTONIC, - /// except that it also includes any time that the system is suspended. This - /// allows applications to get a suspend-aware monotonic clock without - /// having to deal with the complications of CLOCK_REALTIME, which may have - /// discontinuities if the time is changed using settimeofday(2) or similar. - BootTime = bindings::CLOCK_BOOTTIME, - /// International Atomic Time. - /// - /// A system-wide clock derived from wall-clock time but counting leap seconds. - /// - /// This clock is coupled to CLOCK_REALTIME and will be set when CLOCK_REALTIME is - /// set, or when the offset to CLOCK_REALTIME is changed via adjtimex(2). This - /// usually happens during boot and **should** not happen during normal operations. - /// However, if NTP or another application adjusts CLOCK_REALTIME by leap second - /// smearing, this clock will not be precise during leap second smearing. - /// - /// The acronym TAI refers to International Atomic Time. - TAI = bindings::CLOCK_TAI, -} - -impl ClockId { - fn into_c(self) -> bindings::clockid_t { - self as bindings::clockid_t - } -} - /// A span of time. /// /// This struct represents a span of time, with its value stored as nanoseconds. diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index 36e1290cd079..20b87a4d65ae 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -67,7 +67,7 @@ //! A `restart` operation on a timer in the **stopped** state is equivalent to a //! `start` operation. -use super::ClockId; +use super::ClockSource; use crate::{prelude::*, types::Opaque}; use core::marker::PhantomData; use pin_init::PinInit; @@ -112,7 +112,7 @@ unsafe impl Sync for HrTimer {} impl HrTimer { /// Return an initializer for a new timer instance. - pub fn new(mode: HrTimerMode, clock: ClockId) -> impl PinInit + pub fn new(mode: HrTimerMode) -> impl PinInit where T: HrTimerCallback, { @@ -126,7 +126,7 @@ pub fn new(mode: HrTimerMode, clock: ClockId) -> impl PinInit bindings::hrtimer_setup( place, Some(T::Pointer::run), - clock.into_c(), + U::ID, mode.into_c(), ); } From 768dfbfc98e26cfad45f7165a1801d188f3cbd81 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Jun 2025 18:32:55 +0900 Subject: [PATCH 0169/2411] rust: time: Make Instant generic over ClockSource Refactor the Instant type to be generic over a ClockSource type parameter, enabling static enforcement of clock correctness across APIs that deal with time. Previously, the clock source was implicitly fixed (typically CLOCK_MONOTONIC), and developers had to ensure compatibility manually. This design eliminates runtime mismatches between clock sources, and enables stronger type-level guarantees throughout the timer subsystem. Reviewed-by: Andreas Hindborg Reviewed-by: Boqun Feng Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250610093258.3435874-3-fujita.tomonori@gmail.com Signed-off-by: Andreas Hindborg --- rust/kernel/time.rs | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs index 1d2600288ed1..3bc76f75bfd0 100644 --- a/rust/kernel/time.rs +++ b/rust/kernel/time.rs @@ -24,6 +24,8 @@ //! C header: [`include/linux/jiffies.h`](srctree/include/linux/jiffies.h). //! C header: [`include/linux/ktime.h`](srctree/include/linux/ktime.h). +use core::marker::PhantomData; + pub mod hrtimer; /// The number of nanoseconds per microsecond. @@ -136,12 +138,21 @@ impl ClockSource for Tai { /// /// The `inner` value is in the range from 0 to `KTIME_MAX`. #[repr(transparent)] -#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord)] -pub struct Instant { +#[derive(PartialEq, PartialOrd, Eq, Ord)] +pub struct Instant { inner: bindings::ktime_t, + _c: PhantomData, } -impl Instant { +impl Clone for Instant { + fn clone(&self) -> Self { + *self + } +} + +impl Copy for Instant {} + +impl Instant { /// Get the current time using `CLOCK_MONOTONIC`. #[inline] pub fn now() -> Self { @@ -150,6 +161,7 @@ pub fn now() -> Self { Self { // SAFETY: It is always safe to call `ktime_get()` outside of NMI context. inner: unsafe { bindings::ktime_get() }, + _c: PhantomData, } } @@ -160,12 +172,12 @@ pub fn elapsed(&self) -> Delta { } } -impl core::ops::Sub for Instant { +impl core::ops::Sub for Instant { type Output = Delta; // By the type invariant, it never overflows. #[inline] - fn sub(self, other: Instant) -> Delta { + fn sub(self, other: Instant) -> Delta { Delta { nanos: self.inner - other.inner, } From cc6d1098b4cca6ec8e659de8361457c59a90b583 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Jun 2025 18:32:56 +0900 Subject: [PATCH 0170/2411] rust: time: Add ktime_get() to ClockSource trait Introduce the ktime_get() associated function to the ClockSource trait, allowing each clock source to specify how it retrieves the current time. This enables Instant::now() to be implemented generically using the type-level ClockSource abstraction. This change enhances the type safety and extensibility of timekeeping by statically associating time retrieval mechanisms with their respective clock types. It also reduces the reliance on hardcoded clock logic within Instant. Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250610093258.3435874-4-fujita.tomonori@gmail.com Signed-off-by: Andreas Hindborg --- rust/helpers/time.c | 16 ++++++++++++++++ rust/kernel/time.rs | 32 ++++++++++++++++++++++++++++---- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/rust/helpers/time.c b/rust/helpers/time.c index 3d31473bce08..08755db43fc2 100644 --- a/rust/helpers/time.c +++ b/rust/helpers/time.c @@ -1,6 +1,22 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include + +ktime_t rust_helper_ktime_get_real(void) +{ + return ktime_get_real(); +} + +ktime_t rust_helper_ktime_get_boottime(void) +{ + return ktime_get_boottime(); +} + +ktime_t rust_helper_ktime_get_clocktai(void) +{ + return ktime_get_clocktai(); +} s64 rust_helper_ktime_to_us(const ktime_t kt) { diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs index 3bc76f75bfd0..1be5ecd814d3 100644 --- a/rust/kernel/time.rs +++ b/rust/kernel/time.rs @@ -63,6 +63,11 @@ pub trait ClockSource { /// /// This constant corresponds to the C side `clockid_t` value. const ID: bindings::clockid_t; + + /// Get the current time from the clock source. + /// + /// The function must return a value in the range from 0 to `KTIME_MAX`. + fn ktime_get() -> bindings::ktime_t; } /// A monotonically increasing clock. @@ -80,6 +85,11 @@ pub trait ClockSource { impl ClockSource for Monotonic { const ID: bindings::clockid_t = bindings::CLOCK_MONOTONIC as bindings::clockid_t; + + fn ktime_get() -> bindings::ktime_t { + // SAFETY: It is always safe to call `ktime_get()` outside of NMI context. + unsafe { bindings::ktime_get() } + } } /// A settable system-wide clock that measures real (i.e., wall-clock) time. @@ -100,6 +110,11 @@ impl ClockSource for Monotonic { impl ClockSource for RealTime { const ID: bindings::clockid_t = bindings::CLOCK_REALTIME as bindings::clockid_t; + + fn ktime_get() -> bindings::ktime_t { + // SAFETY: It is always safe to call `ktime_get_real()` outside of NMI context. + unsafe { bindings::ktime_get_real() } + } } /// A monotonic that ticks while system is suspended. @@ -113,6 +128,11 @@ impl ClockSource for RealTime { impl ClockSource for BootTime { const ID: bindings::clockid_t = bindings::CLOCK_BOOTTIME as bindings::clockid_t; + + fn ktime_get() -> bindings::ktime_t { + // SAFETY: It is always safe to call `ktime_get_boottime()` outside of NMI context. + unsafe { bindings::ktime_get_boottime() } + } } /// International Atomic Time. @@ -130,6 +150,11 @@ impl ClockSource for BootTime { impl ClockSource for Tai { const ID: bindings::clockid_t = bindings::CLOCK_TAI as bindings::clockid_t; + + fn ktime_get() -> bindings::ktime_t { + // SAFETY: It is always safe to call `ktime_get_tai()` outside of NMI context. + unsafe { bindings::ktime_get_clocktai() } + } } /// A specific point in time. @@ -153,14 +178,13 @@ fn clone(&self) -> Self { impl Copy for Instant {} impl Instant { - /// Get the current time using `CLOCK_MONOTONIC`. + /// Get the current time from the clock source. #[inline] pub fn now() -> Self { - // INVARIANT: The `ktime_get()` function returns a value in the range + // INVARIANT: The `ClockSource::ktime_get()` function returns a value in the range // from 0 to `KTIME_MAX`. Self { - // SAFETY: It is always safe to call `ktime_get()` outside of NMI context. - inner: unsafe { bindings::ktime_get() }, + inner: C::ktime_get(), _c: PhantomData, } } From 240ef19ad78b12e40ec8808694a0b81e6a3a2c2d Mon Sep 17 00:00:00 2001 From: Swapnil Jakhade Date: Mon, 16 Jun 2025 12:17:04 +0530 Subject: [PATCH 0171/2411] phy: cadence-torrent: Add PCIe multilink configuration for 100 MHz refclk Add register sequences to support PCIe multilink configuration for 100MHz reference clock. Maximum two PCIe links are supported. Signed-off-by: Swapnil Jakhade Signed-off-by: Siddharth Vadapalli Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20250616064705.3225758-2-s-vadapalli@ti.com Signed-off-by: Vinod Koul --- drivers/phy/cadence/phy-cadence-torrent.c | 130 +++++++++++++++++++++- 1 file changed, 129 insertions(+), 1 deletion(-) diff --git a/drivers/phy/cadence/phy-cadence-torrent.c b/drivers/phy/cadence/phy-cadence-torrent.c index a281c0dfae97..1c5e96e2a188 100644 --- a/drivers/phy/cadence/phy-cadence-torrent.c +++ b/drivers/phy/cadence/phy-cadence-torrent.c @@ -197,6 +197,7 @@ #define RX_SDCAL1_INIT_TMR 0x004CU #define RX_SDCAL1_ITER_TMR 0x004DU #define RX_CDRLF_CNFG 0x0080U +#define RX_CDRLF_CNFG2 0x0081U #define RX_CDRLF_CNFG3 0x0082U #define RX_SIGDET_HL_FILT_TMR 0x0090U #define RX_REE_GCSM1_CTRL 0x0108U @@ -204,6 +205,7 @@ #define RX_REE_GCSM1_EQENM_PH2 0x010AU #define RX_REE_GCSM2_CTRL 0x0110U #define RX_REE_PERGCSM_CTRL 0x0118U +#define RX_REE_PEAK_UTHR 0x0142U #define RX_REE_ATTEN_THR 0x0149U #define RX_REE_TAP1_CLIP 0x0171U #define RX_REE_TAP2TON_CLIP 0x0172U @@ -212,6 +214,7 @@ #define RX_DIAG_DFE_CTRL 0x01E0U #define RX_DIAG_DFE_AMP_TUNE_2 0x01E2U #define RX_DIAG_DFE_AMP_TUNE_3 0x01E3U +#define RX_DIAG_REE_DAC_CTRL 0x01E4U #define RX_DIAG_NQST_CTRL 0x01E5U #define RX_DIAG_SIGDET_TUNE 0x01E8U #define RX_DIAG_PI_RATE 0x01F4U @@ -3131,6 +3134,101 @@ static void cdns_torrent_phy_remove(struct platform_device *pdev) cdns_torrent_clk_cleanup(cdns_phy); } +/* Multi link PCIe configuration */ +static const struct cdns_reg_pairs ml_pcie_link_cmn_regs[] = { + {0x0002, PHY_PLL_CFG}, + {0x0601, CMN_PDIAG_PLL0_CLK_SEL_M0} +}; + +static const struct cdns_reg_pairs ml_pcie_xcvr_diag_ln_regs[] = { + {0x0100, XCVR_DIAG_HSCLK_SEL}, + {0x0001, XCVR_DIAG_HSCLK_DIV}, + {0x0812, XCVR_DIAG_PLLDRC_CTRL} +}; + +static const struct cdns_torrent_vals ml_pcie_link_cmn_vals = { + .reg_pairs = ml_pcie_link_cmn_regs, + .num_regs = ARRAY_SIZE(ml_pcie_link_cmn_regs), +}; + +static const struct cdns_torrent_vals ml_pcie_xcvr_diag_ln_vals = { + .reg_pairs = ml_pcie_xcvr_diag_ln_regs, + .num_regs = ARRAY_SIZE(ml_pcie_xcvr_diag_ln_regs), +}; + +/* Multi link PCIe, 100 MHz Ref clk, no SSC */ +static const struct cdns_reg_pairs ml_pcie_100_no_ssc_cmn_regs[] = { + {0x0003, CMN_PLL0_VCOCAL_TCTRL}, + {0x0003, CMN_PLL1_VCOCAL_TCTRL} +}; + +static const struct cdns_reg_pairs ml_pcie_100_no_ssc_rx_ln_regs[] = { + {0x0019, RX_REE_TAP1_CLIP}, + {0x0019, RX_REE_TAP2TON_CLIP}, + {0x0008, RX_REE_PEAK_UTHR}, + {0x018E, RX_CDRLF_CNFG}, + {0x2E33, RX_CDRLF_CNFG2}, + {0x0001, RX_DIAG_ACYA}, + {0x0C21, RX_DIAG_DFE_AMP_TUNE_2}, + {0x0002, RX_DIAG_DFE_AMP_TUNE_3}, + {0x0005, RX_DIAG_REE_DAC_CTRL} +}; + +static const struct cdns_torrent_vals ml_pcie_100_no_ssc_cmn_vals = { + .reg_pairs = ml_pcie_100_no_ssc_cmn_regs, + .num_regs = ARRAY_SIZE(ml_pcie_100_no_ssc_cmn_regs), +}; + +static const struct cdns_torrent_vals ml_pcie_100_no_ssc_rx_ln_vals = { + .reg_pairs = ml_pcie_100_no_ssc_rx_ln_regs, + .num_regs = ARRAY_SIZE(ml_pcie_100_no_ssc_rx_ln_regs), +}; + +/* Multi link PCIe, 100 MHz Ref clk, internal SSC */ +static const struct cdns_reg_pairs ml_pcie_100_int_ssc_cmn_regs[] = { + {0x0004, CMN_PLL0_DSM_DIAG_M0}, + {0x0004, CMN_PLL1_DSM_DIAG_M0}, + {0x0509, CMN_PDIAG_PLL0_CP_PADJ_M0}, + {0x0509, CMN_PDIAG_PLL1_CP_PADJ_M0}, + {0x0F00, CMN_PDIAG_PLL0_CP_IADJ_M0}, + {0x0F00, CMN_PDIAG_PLL1_CP_IADJ_M0}, + {0x0F08, CMN_PDIAG_PLL0_FILT_PADJ_M0}, + {0x0F08, CMN_PDIAG_PLL1_FILT_PADJ_M0}, + {0x0064, CMN_PLL0_INTDIV_M0}, + {0x0050, CMN_PLL1_INTDIV_M0}, + {0x0002, CMN_PLL0_FRACDIVH_M0}, + {0x0002, CMN_PLL1_FRACDIVH_M0}, + {0x0044, CMN_PLL0_HIGH_THR_M0}, + {0x0036, CMN_PLL1_HIGH_THR_M0}, + {0x0002, CMN_PDIAG_PLL0_CTRL_M0}, + {0x0002, CMN_PDIAG_PLL1_CTRL_M0}, + {0x0001, CMN_PLL0_SS_CTRL1_M0}, + {0x0001, CMN_PLL1_SS_CTRL1_M0}, + {0x011B, CMN_PLL0_SS_CTRL2_M0}, + {0x011B, CMN_PLL1_SS_CTRL2_M0}, + {0x006E, CMN_PLL0_SS_CTRL3_M0}, + {0x0058, CMN_PLL1_SS_CTRL3_M0}, + {0x000E, CMN_PLL0_SS_CTRL4_M0}, + {0x0012, CMN_PLL1_SS_CTRL4_M0}, + {0x0C5E, CMN_PLL0_VCOCAL_REFTIM_START}, + {0x0C5E, CMN_PLL1_VCOCAL_REFTIM_START}, + {0x0C56, CMN_PLL0_VCOCAL_PLLCNT_START}, + {0x0C56, CMN_PLL1_VCOCAL_PLLCNT_START}, + {0x0003, CMN_PLL0_VCOCAL_TCTRL}, + {0x0003, CMN_PLL1_VCOCAL_TCTRL}, + {0x00C7, CMN_PLL0_LOCK_REFCNT_START}, + {0x00C7, CMN_PLL1_LOCK_REFCNT_START}, + {0x00C7, CMN_PLL0_LOCK_PLLCNT_START}, + {0x00C7, CMN_PLL1_LOCK_PLLCNT_START}, + {0x0005, CMN_PLL0_LOCK_PLLCNT_THR}, + {0x0005, CMN_PLL1_LOCK_PLLCNT_THR} +}; + +static const struct cdns_torrent_vals ml_pcie_100_int_ssc_cmn_vals = { + .reg_pairs = ml_pcie_100_int_ssc_cmn_regs, + .num_regs = ARRAY_SIZE(ml_pcie_100_int_ssc_cmn_regs), +}; + /* SGMII and QSGMII link configuration */ static const struct cdns_reg_pairs sgmii_qsgmii_link_cmn_regs[] = { {0x0002, PHY_PLL_CFG} @@ -4531,7 +4629,7 @@ static const struct cdns_torrent_vals sl_sgmii_xcvr_diag_ln_vals = { .num_regs = ARRAY_SIZE(sl_sgmii_xcvr_diag_ln_regs), }; -/* Multi link PCIe, 100 MHz Ref clk, internal SSC */ +/* For PCIe (with some other protocol), 100 MHz Ref clk, internal SSC */ static const struct cdns_reg_pairs pcie_100_int_ssc_cmn_regs[] = { {0x0004, CMN_PLL0_DSM_DIAG_M0}, {0x0004, CMN_PLL0_DSM_DIAG_M1}, @@ -4670,6 +4768,7 @@ static const struct cdns_torrent_vals_entry link_cmn_vals_entries[] = { {CDNS_TORRENT_KEY_ANYCLK(TYPE_DP, TYPE_USB), &usb_dp_link_cmn_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_NONE), NULL}, + {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_PCIE), &ml_pcie_link_cmn_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_SGMII), &pcie_sgmii_link_cmn_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_QSGMII), &pcie_sgmii_link_cmn_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_USB), &pcie_usb_link_cmn_vals}, @@ -4706,6 +4805,7 @@ static const struct cdns_torrent_vals_entry xcvr_diag_vals_entries[] = { {CDNS_TORRENT_KEY_ANYCLK(TYPE_DP, TYPE_USB), &dp_usb_xcvr_diag_ln_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_NONE), NULL}, + {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_PCIE), &ml_pcie_xcvr_diag_ln_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_SGMII), &pcie_sgmii_xcvr_diag_ln_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_QSGMII), &pcie_sgmii_xcvr_diag_ln_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_USB), &pcie_usb_xcvr_diag_ln_vals}, @@ -4756,6 +4856,10 @@ static const struct cdns_torrent_vals_entry cmn_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, EXTERNAL_SSC), NULL}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, INTERNAL_SSC), &sl_pcie_100_int_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, NO_SSC), &ml_pcie_100_no_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, EXTERNAL_SSC), &ml_pcie_100_no_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, INTERNAL_SSC), &ml_pcie_100_int_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, NO_SSC), &pcie_100_no_ssc_cmn_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, EXTERNAL_SSC), &pcie_100_no_ssc_cmn_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, INTERNAL_SSC), &pcie_100_int_ssc_cmn_vals}, @@ -4838,6 +4942,10 @@ static const struct cdns_torrent_vals_entry cdns_tx_ln_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, EXTERNAL_SSC), NULL}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, INTERNAL_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, NO_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, EXTERNAL_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, INTERNAL_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, NO_SSC), NULL}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, EXTERNAL_SSC), NULL}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, INTERNAL_SSC), NULL}, @@ -4920,6 +5028,10 @@ static const struct cdns_torrent_vals_entry cdns_rx_ln_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, EXTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, INTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, NO_SSC), &ml_pcie_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, EXTERNAL_SSC), &ml_pcie_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, INTERNAL_SSC), &ml_pcie_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, NO_SSC), &pcie_100_no_ssc_rx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, EXTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, INTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals}, @@ -5038,6 +5150,10 @@ static const struct cdns_torrent_vals_entry ti_tx_ln_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, EXTERNAL_SSC), NULL}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, INTERNAL_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, NO_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, EXTERNAL_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, INTERNAL_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, NO_SSC), NULL}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, EXTERNAL_SSC), NULL}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, INTERNAL_SSC), NULL}, @@ -5154,6 +5270,10 @@ static const struct cdns_torrent_vals_entry ti_j7200_cmn_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, EXTERNAL_SSC), NULL}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, INTERNAL_SSC), &sl_pcie_100_int_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, NO_SSC), &ml_pcie_100_no_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, EXTERNAL_SSC), &ml_pcie_100_no_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, INTERNAL_SSC), &ml_pcie_100_int_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, NO_SSC), &pcie_100_no_ssc_cmn_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, EXTERNAL_SSC), &pcie_100_no_ssc_cmn_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, INTERNAL_SSC), &pcie_100_int_ssc_cmn_vals}, @@ -5236,6 +5356,10 @@ static const struct cdns_torrent_vals_entry ti_j7200_tx_ln_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, EXTERNAL_SSC), NULL}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, INTERNAL_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, NO_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, EXTERNAL_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, INTERNAL_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, NO_SSC), NULL}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, EXTERNAL_SSC), NULL}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, INTERNAL_SSC), NULL}, @@ -5318,6 +5442,10 @@ static const struct cdns_torrent_vals_entry ti_j7200_rx_ln_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, EXTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, INTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, NO_SSC), &pcie_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, EXTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_PCIE, INTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, NO_SSC), &pcie_100_no_ssc_rx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, EXTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, INTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals}, From 351e07e6b2ecc16ef8669713b14b6f67518c945d Mon Sep 17 00:00:00 2001 From: Swapnil Jakhade Date: Mon, 16 Jun 2025 12:17:05 +0530 Subject: [PATCH 0172/2411] phy: cadence-torrent: Add PCIe multilink + USB with same SSC register config for 100 MHz refclk Add register sequences and support for PCIe multilink + USB configuration for 100MHz reference clock. The same SSC is used for both PCIe and USB. Signed-off-by: Swapnil Jakhade Co-developed-by: Siddharth Vadapalli Signed-off-by: Siddharth Vadapalli Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20250616064705.3225758-3-s-vadapalli@ti.com Signed-off-by: Vinod Koul --- drivers/phy/cadence/phy-cadence-torrent.c | 158 ++++++++++++++++++++-- 1 file changed, 150 insertions(+), 8 deletions(-) diff --git a/drivers/phy/cadence/phy-cadence-torrent.c b/drivers/phy/cadence/phy-cadence-torrent.c index 1c5e96e2a188..37fa4bad6bd7 100644 --- a/drivers/phy/cadence/phy-cadence-torrent.c +++ b/drivers/phy/cadence/phy-cadence-torrent.c @@ -206,6 +206,7 @@ #define RX_REE_GCSM2_CTRL 0x0110U #define RX_REE_PERGCSM_CTRL 0x0118U #define RX_REE_PEAK_UTHR 0x0142U +#define RX_REE_PEAK_LTHR 0x0143U #define RX_REE_ATTEN_THR 0x0149U #define RX_REE_TAP1_CLIP 0x0171U #define RX_REE_TAP2TON_CLIP 0x0172U @@ -298,6 +299,7 @@ enum cdns_torrent_phy_type { TYPE_QSGMII, TYPE_USB, TYPE_USXGMII, + TYPE_PCIE_ML, }; enum cdns_torrent_ref_clk { @@ -696,6 +698,7 @@ static const char *cdns_torrent_get_phy_type(enum cdns_torrent_phy_type phy_type case TYPE_DP: return "DisplayPort"; case TYPE_PCIE: + case TYPE_PCIE_ML: return "PCIe"; case TYPE_SGMII: return "SGMII"; @@ -2481,6 +2484,7 @@ int cdns_torrent_phy_configure_multilink(struct cdns_torrent_phy *cdns_phy) enum cdns_torrent_ssc_mode ssc; struct regmap *regmap; u32 num_regs, num_protocols, protocol; + u32 num_pcie_links = 0; num_protocols = hweight32(cdns_phy->protocol_bitmask); /* Maximum 2 protocols are supported */ @@ -2513,6 +2517,44 @@ int cdns_torrent_phy_configure_multilink(struct cdns_torrent_phy *cdns_phy) phy_t1 = fns(cdns_phy->protocol_bitmask, 0); phy_t2 = fns(cdns_phy->protocol_bitmask, 1); + + /* + * PCIe Multilink configuration can be supported along with a + * non-PCIe protocol. The existing limitation associated with + * the standalone PCIe Multilink configuration still remains, + * implying that there can be only two links (subnodes) of the + * PHY type PCIe which constitute the PCIe Multilink. + * + * Such configurations are handled by introducing a new protocol + * namely TYPE_PCIE_ML. Both of the PCIe links which have the + * protocol as TYPE_PCIE shall be treated as though the protocol + * corresponding to them is TYPE_PCIE_ML only for the sake of + * configuring the SERDES. + * + * PCIe Multilink configuration can be identified by checking if + * there are exactly two links with phy_type set to TYPE_PCIE. + * phy_t1 and phy_t2 are modified in such cases to support the + * PCIe Multilink configuration with a non-PCIe protocol. + */ + for (node = 0; node < cdns_phy->nsubnodes; node++) { + if (cdns_phy->phys[node].phy_type == TYPE_PCIE) + num_pcie_links++; + } + + if (num_pcie_links > 2) { + dev_err(dev, "cannot support PCIe Multilink with %u PCIe links\n", + num_pcie_links); + return -EINVAL; + } else if (num_pcie_links == 2) { + phy_t1 = TYPE_PCIE_ML; + for (node = 0; node < cdns_phy->nsubnodes; node++) { + if (cdns_phy->phys[node].phy_type == TYPE_PCIE) { + cdns_phy->phys[node].phy_type = TYPE_PCIE_ML; + continue; + } + phy_t2 = cdns_phy->phys[node].phy_type; + } + } } /** @@ -2679,6 +2721,11 @@ int cdns_torrent_phy_configure_multilink(struct cdns_torrent_phy *cdns_phy) } } + /* Restore TYPE_PCIE_ML to TYPE_PCIE to be compatible with suspend-resume */ + for (node = 0; node < cdns_phy->nsubnodes; node++) + if (cdns_phy->phys[node].phy_type == TYPE_PCIE_ML) + cdns_phy->phys[node].phy_type = TYPE_PCIE; + /* Take the PHY out of reset */ ret = reset_control_deassert(cdns_phy->phy_rst); if (ret) @@ -3091,15 +3138,14 @@ static int cdns_torrent_phy_probe(struct platform_device *pdev) } if (cdns_phy->nsubnodes > 1) - dev_dbg(dev, "Multi-link: %s (%d lanes) & %s (%d lanes)", - cdns_torrent_get_phy_type(cdns_phy->phys[0].phy_type), - cdns_phy->phys[0].num_lanes, - cdns_torrent_get_phy_type(cdns_phy->phys[1].phy_type), - cdns_phy->phys[1].num_lanes); + dev_dbg(dev, "Multi link configuration:\n"); else - dev_dbg(dev, "Single link: %s (%d lanes)", - cdns_torrent_get_phy_type(cdns_phy->phys[0].phy_type), - cdns_phy->phys[0].num_lanes); + dev_dbg(dev, "Single link configuration:\n"); + + for (i = 0; i < cdns_phy->nsubnodes; i++) + dev_dbg(dev, "%s (%d lanes)", + cdns_torrent_get_phy_type(cdns_phy->phys[i].phy_type), + cdns_phy->phys[i].num_lanes); return 0; @@ -3134,6 +3180,37 @@ static void cdns_torrent_phy_remove(struct platform_device *pdev) cdns_torrent_clk_cleanup(cdns_phy); } +/* Multilink PCIe and USB Same SSC link configuration */ +static const struct cdns_reg_pairs ml_pcie_usb_link_cmn_regs[] = { + {0x0002, PHY_PLL_CFG}, + {0x8600, CMN_PDIAG_PLL0_CLK_SEL_M0} +}; + +static const struct cdns_reg_pairs ml_pcie_usb_xcvr_diag_ln_regs[] = { + {0x0100, XCVR_DIAG_HSCLK_SEL}, + {0x0013, XCVR_DIAG_HSCLK_DIV}, + {0x0812, XCVR_DIAG_PLLDRC_CTRL} +}; + +static const struct cdns_reg_pairs usb_ml_pcie_xcvr_diag_ln_regs[] = { + {0x0041, XCVR_DIAG_PLLDRC_CTRL}, +}; + +static const struct cdns_torrent_vals ml_pcie_usb_link_cmn_vals = { + .reg_pairs = ml_pcie_usb_link_cmn_regs, + .num_regs = ARRAY_SIZE(ml_pcie_usb_link_cmn_regs), +}; + +static const struct cdns_torrent_vals ml_pcie_usb_xcvr_diag_ln_vals = { + .reg_pairs = ml_pcie_usb_xcvr_diag_ln_regs, + .num_regs = ARRAY_SIZE(ml_pcie_usb_xcvr_diag_ln_regs), +}; + +static const struct cdns_torrent_vals usb_ml_pcie_xcvr_diag_ln_vals = { + .reg_pairs = usb_ml_pcie_xcvr_diag_ln_regs, + .num_regs = ARRAY_SIZE(usb_ml_pcie_xcvr_diag_ln_regs), +}; + /* Multi link PCIe configuration */ static const struct cdns_reg_pairs ml_pcie_link_cmn_regs[] = { {0x0002, PHY_PLL_CFG}, @@ -4140,6 +4217,8 @@ static const struct cdns_reg_pairs usb_100_no_ssc_rx_ln_regs[] = { {0x0C02, RX_REE_ATTEN_THR}, {0x0330, RX_REE_SMGM_CTRL1}, {0x0300, RX_REE_SMGM_CTRL2}, + {0x0000, RX_REE_PEAK_UTHR}, + {0x01F5, RX_REE_PEAK_LTHR}, {0x0019, RX_REE_TAP1_CLIP}, {0x0019, RX_REE_TAP2TON_CLIP}, {0x1004, RX_DIAG_SIGDET_TUNE}, @@ -4775,6 +4854,8 @@ static const struct cdns_torrent_vals_entry link_cmn_vals_entries[] = { {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_DP), &pcie_dp_link_cmn_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_USXGMII), &pcie_usxgmii_link_cmn_vals}, + {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE_ML, TYPE_USB), &ml_pcie_usb_link_cmn_vals}, + {CDNS_TORRENT_KEY_ANYCLK(TYPE_SGMII, TYPE_NONE), &sl_sgmii_link_cmn_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_SGMII, TYPE_PCIE), &pcie_sgmii_link_cmn_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_SGMII, TYPE_QSGMII), &sgmii_qsgmii_link_cmn_vals}, @@ -4789,6 +4870,7 @@ static const struct cdns_torrent_vals_entry link_cmn_vals_entries[] = { {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_NONE), &sl_usb_link_cmn_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_PCIE), &pcie_usb_link_cmn_vals}, + {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_PCIE_ML), &ml_pcie_usb_link_cmn_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_SGMII), &usb_sgmii_link_cmn_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_QSGMII), &usb_sgmii_link_cmn_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_DP), &usb_dp_link_cmn_vals}, @@ -4812,6 +4894,8 @@ static const struct cdns_torrent_vals_entry xcvr_diag_vals_entries[] = { {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_DP), &pcie_dp_xcvr_diag_ln_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_USXGMII), &pcie_usxgmii_xcvr_diag_ln_vals}, + {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE_ML, TYPE_USB), &ml_pcie_usb_xcvr_diag_ln_vals}, + {CDNS_TORRENT_KEY_ANYCLK(TYPE_SGMII, TYPE_NONE), &sl_sgmii_xcvr_diag_ln_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_SGMII, TYPE_PCIE), &sgmii_pcie_xcvr_diag_ln_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_SGMII, TYPE_QSGMII), &sgmii_qsgmii_xcvr_diag_ln_vals}, @@ -4826,6 +4910,7 @@ static const struct cdns_torrent_vals_entry xcvr_diag_vals_entries[] = { {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_NONE), &sl_usb_xcvr_diag_ln_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_PCIE), &usb_pcie_xcvr_diag_ln_vals}, + {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_PCIE_ML), &usb_ml_pcie_xcvr_diag_ln_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_SGMII), &usb_sgmii_xcvr_diag_ln_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_QSGMII), &usb_sgmii_xcvr_diag_ln_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_DP), &usb_dp_xcvr_diag_ln_vals}, @@ -4839,6 +4924,7 @@ static const struct cdns_torrent_vals_entry xcvr_diag_vals_entries[] = { static const struct cdns_torrent_vals_entry pcs_cmn_vals_entries[] = { {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_NONE), &usb_phy_pcs_cmn_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_PCIE), &usb_phy_pcs_cmn_vals}, + {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_PCIE_ML), &usb_phy_pcs_cmn_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_SGMII), &usb_phy_pcs_cmn_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_QSGMII), &usb_phy_pcs_cmn_vals}, {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_DP), &usb_phy_pcs_cmn_vals}, @@ -4874,6 +4960,10 @@ static const struct cdns_torrent_vals_entry cmn_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_DP, NO_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, NO_SSC), &ml_pcie_100_no_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, EXTERNAL_SSC), &ml_pcie_100_no_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, INTERNAL_SSC), &ml_pcie_100_int_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_NONE, NO_SSC), &sl_sgmii_100_no_ssc_cmn_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_PCIE, NO_SSC), &sgmii_100_no_ssc_cmn_vals}, @@ -4906,6 +4996,10 @@ static const struct cdns_torrent_vals_entry cmn_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, EXTERNAL_SSC), &usb_100_no_ssc_cmn_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, INTERNAL_SSC), &usb_100_int_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, NO_SSC), &usb_100_no_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, EXTERNAL_SSC), &usb_100_no_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, INTERNAL_SSC), &usb_100_no_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, NO_SSC), &sl_usb_100_no_ssc_cmn_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, EXTERNAL_SSC), &sl_usb_100_no_ssc_cmn_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, INTERNAL_SSC), &sl_usb_100_int_ssc_cmn_vals}, @@ -4960,6 +5054,10 @@ static const struct cdns_torrent_vals_entry cdns_tx_ln_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_DP, NO_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, NO_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, EXTERNAL_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, INTERNAL_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_NONE, NO_SSC), &sgmii_100_no_ssc_tx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_PCIE, NO_SSC), &sgmii_100_no_ssc_tx_ln_vals}, @@ -4992,6 +5090,10 @@ static const struct cdns_torrent_vals_entry cdns_tx_ln_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, EXTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, INTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, NO_SSC), &usb_100_no_ssc_tx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, EXTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, INTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, NO_SSC), &usb_100_no_ssc_tx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, EXTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, INTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals}, @@ -5046,6 +5148,10 @@ static const struct cdns_torrent_vals_entry cdns_rx_ln_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_DP, NO_SSC), &pcie_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, NO_SSC), &ml_pcie_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, EXTERNAL_SSC), &ml_pcie_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, INTERNAL_SSC), &ml_pcie_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_NONE, NO_SSC), &sgmii_100_no_ssc_rx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_PCIE, NO_SSC), &sgmii_100_no_ssc_rx_ln_vals}, @@ -5078,6 +5184,10 @@ static const struct cdns_torrent_vals_entry cdns_rx_ln_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, EXTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, INTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, NO_SSC), &usb_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, EXTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, INTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, NO_SSC), &usb_100_no_ssc_rx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, EXTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, INTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals}, @@ -5168,6 +5278,10 @@ static const struct cdns_torrent_vals_entry ti_tx_ln_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_DP, NO_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, NO_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, EXTERNAL_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, INTERNAL_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_NONE, NO_SSC), &ti_sgmii_100_no_ssc_tx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_PCIE, NO_SSC), &ti_sgmii_100_no_ssc_tx_ln_vals}, @@ -5200,6 +5314,10 @@ static const struct cdns_torrent_vals_entry ti_tx_ln_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, EXTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, INTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, NO_SSC), &usb_100_no_ssc_tx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, EXTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, INTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, NO_SSC), &usb_100_no_ssc_tx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, EXTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, INTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals}, @@ -5288,6 +5406,10 @@ static const struct cdns_torrent_vals_entry ti_j7200_cmn_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_DP, NO_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, NO_SSC), &ml_pcie_100_no_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, EXTERNAL_SSC), &ml_pcie_100_no_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, INTERNAL_SSC), &ml_pcie_100_int_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_NONE, NO_SSC), &sl_sgmii_100_no_ssc_cmn_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_PCIE, NO_SSC), &sgmii_100_no_ssc_cmn_vals}, @@ -5320,6 +5442,10 @@ static const struct cdns_torrent_vals_entry ti_j7200_cmn_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, EXTERNAL_SSC), &usb_100_no_ssc_cmn_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, INTERNAL_SSC), &usb_100_int_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, NO_SSC), &usb_100_no_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, EXTERNAL_SSC), &usb_100_no_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, INTERNAL_SSC), &usb_100_no_ssc_cmn_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, NO_SSC), &sl_usb_100_no_ssc_cmn_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, EXTERNAL_SSC), &sl_usb_100_no_ssc_cmn_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, INTERNAL_SSC), &sl_usb_100_int_ssc_cmn_vals}, @@ -5374,6 +5500,10 @@ static const struct cdns_torrent_vals_entry ti_j7200_tx_ln_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_DP, NO_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, NO_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, EXTERNAL_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, INTERNAL_SSC), NULL}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_NONE, NO_SSC), &ti_sgmii_100_no_ssc_tx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_PCIE, NO_SSC), &ti_sgmii_100_no_ssc_tx_ln_vals}, @@ -5406,6 +5536,10 @@ static const struct cdns_torrent_vals_entry ti_j7200_tx_ln_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, EXTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, INTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, NO_SSC), &usb_100_no_ssc_tx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, EXTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, INTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, NO_SSC), &usb_100_no_ssc_tx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, EXTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, INTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals}, @@ -5460,6 +5594,10 @@ static const struct cdns_torrent_vals_entry ti_j7200_rx_ln_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_DP, NO_SSC), &pcie_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, NO_SSC), &pcie_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, EXTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE_ML, TYPE_USB, INTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_NONE, NO_SSC), &sgmii_100_no_ssc_rx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_PCIE, NO_SSC), &sgmii_100_no_ssc_rx_ln_vals}, @@ -5492,6 +5630,10 @@ static const struct cdns_torrent_vals_entry ti_j7200_rx_ln_vals_entries[] = { {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, EXTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, INTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, NO_SSC), &usb_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, EXTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE_ML, INTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals}, + {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, NO_SSC), &usb_100_no_ssc_rx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, EXTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals}, {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, INTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals}, From 95463cbb4fe6489921fb8c72890113dca54ce83f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 23 May 2025 10:51:12 +0200 Subject: [PATCH 0173/2411] phy: drop probe registration printks Drivers should generally be quiet on successful probe, but this is not followed by some PHY drivers, for example: snps-eusb2-hsphy 88e1000.phy: Registered Snps-eUSB2 phy qcom-eusb2-repeater c432000.spmi:pmic@7:phy@fd00: Registered Qcom-eUSB2 repeater qcom-eusb2-repeater c432000.spmi:pmic@a:phy@fd00: Registered Qcom-eUSB2 repeater qcom-eusb2-repeater c432000.spmi:pmic@b:phy@fd00: Registered Qcom-eUSB2 repeater snps-eusb2-hsphy fd3000.phy: Registered Snps-eUSB2 phy snps-eusb2-hsphy fd9000.phy: Registered Snps-eUSB2 phy snps-eusb2-hsphy fde000.phy: Registered Snps-eUSB2 phy snps-eusb2-hsphy 88e0000.phy: Registered Snps-eUSB2 phy snps-eusb2-hsphy 88e2000.phy: Registered Snps-eUSB2 phy Drop (or demote to debug level) unnecessary registration info messages to make boot logs a little less noisy. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20250523085112.11287-1-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/broadcom/phy-bcm-ns2-pcie.c | 2 -- drivers/phy/broadcom/phy-bcm-ns2-usbdrd.c | 1 - drivers/phy/broadcom/phy-bcm-sr-pcie.c | 2 -- drivers/phy/broadcom/phy-brcm-sata.c | 2 +- drivers/phy/marvell/phy-pxa-usb.c | 1 - drivers/phy/phy-snps-eusb2.c | 2 -- drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c | 2 -- drivers/phy/qualcomm/phy-qcom-m31.c | 2 -- drivers/phy/qualcomm/phy-qcom-qusb2.c | 4 +--- drivers/phy/st/phy-stih407-usb.c | 2 -- drivers/phy/st/phy-stm32-usbphyc.c | 4 ++-- drivers/phy/ti/phy-twl4030-usb.c | 1 - 12 files changed, 4 insertions(+), 21 deletions(-) diff --git a/drivers/phy/broadcom/phy-bcm-ns2-pcie.c b/drivers/phy/broadcom/phy-bcm-ns2-pcie.c index 2eaa41f8fc70..67a6ae5ecba0 100644 --- a/drivers/phy/broadcom/phy-bcm-ns2-pcie.c +++ b/drivers/phy/broadcom/phy-bcm-ns2-pcie.c @@ -61,8 +61,6 @@ static int ns2_pci_phy_probe(struct mdio_device *mdiodev) return PTR_ERR(provider); } - dev_info(dev, "%s PHY registered\n", dev_name(dev)); - return 0; } diff --git a/drivers/phy/broadcom/phy-bcm-ns2-usbdrd.c b/drivers/phy/broadcom/phy-bcm-ns2-usbdrd.c index 36ad02c33ac5..8473fa574529 100644 --- a/drivers/phy/broadcom/phy-bcm-ns2-usbdrd.c +++ b/drivers/phy/broadcom/phy-bcm-ns2-usbdrd.c @@ -395,7 +395,6 @@ static int ns2_drd_phy_probe(struct platform_device *pdev) platform_set_drvdata(pdev, driver); - dev_info(dev, "Registered NS2 DRD Phy device\n"); queue_delayed_work(system_power_efficient_wq, &driver->wq_extcon, driver->debounce_jiffies); diff --git a/drivers/phy/broadcom/phy-bcm-sr-pcie.c b/drivers/phy/broadcom/phy-bcm-sr-pcie.c index ff9b3862bf7a..706e1d83b4ce 100644 --- a/drivers/phy/broadcom/phy-bcm-sr-pcie.c +++ b/drivers/phy/broadcom/phy-bcm-sr-pcie.c @@ -277,8 +277,6 @@ static int sr_pcie_phy_probe(struct platform_device *pdev) return PTR_ERR(provider); } - dev_info(dev, "Stingray PCIe PHY driver initialized\n"); - return 0; } diff --git a/drivers/phy/broadcom/phy-brcm-sata.c b/drivers/phy/broadcom/phy-brcm-sata.c index 228100357054..d52dd065e862 100644 --- a/drivers/phy/broadcom/phy-brcm-sata.c +++ b/drivers/phy/broadcom/phy-brcm-sata.c @@ -832,7 +832,7 @@ static int brcm_sata_phy_probe(struct platform_device *pdev) return PTR_ERR(provider); } - dev_info(dev, "registered %d port(s)\n", count); + dev_dbg(dev, "registered %d port(s)\n", count); return 0; } diff --git a/drivers/phy/marvell/phy-pxa-usb.c b/drivers/phy/marvell/phy-pxa-usb.c index 6c98eb9608e9..c0bb71f80c04 100644 --- a/drivers/phy/marvell/phy-pxa-usb.c +++ b/drivers/phy/marvell/phy-pxa-usb.c @@ -325,7 +325,6 @@ static int pxa_usb_phy_probe(struct platform_device *pdev) phy_create_lookup(pxa_usb_phy->phy, "usb", "mv-otg"); } - dev_info(dev, "Marvell PXA USB PHY"); return 0; } diff --git a/drivers/phy/phy-snps-eusb2.c b/drivers/phy/phy-snps-eusb2.c index cf62f2221366..87f323e758d6 100644 --- a/drivers/phy/phy-snps-eusb2.c +++ b/drivers/phy/phy-snps-eusb2.c @@ -599,8 +599,6 @@ static int snps_eusb2_hsphy_probe(struct platform_device *pdev) if (IS_ERR(phy_provider)) return PTR_ERR(phy_provider); - dev_info(dev, "Registered Snps-eUSB2 phy\n"); - return 0; } diff --git a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c index 6bd1b3c75c77..260894b6932c 100644 --- a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c +++ b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c @@ -264,8 +264,6 @@ static int eusb2_repeater_probe(struct platform_device *pdev) if (IS_ERR(phy_provider)) return PTR_ERR(phy_provider); - dev_info(dev, "Registered Qcom-eUSB2 repeater\n"); - return 0; } diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c index 20d4c020a83c..7caeea1b109e 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31.c +++ b/drivers/phy/qualcomm/phy-qcom-m31.c @@ -305,8 +305,6 @@ static int m31usb_phy_probe(struct platform_device *pdev) phy_set_drvdata(qphy->phy, qphy); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); - if (!IS_ERR(phy_provider)) - dev_info(dev, "Registered M31 USB phy\n"); return PTR_ERR_OR_ZERO(phy_provider); } diff --git a/drivers/phy/qualcomm/phy-qcom-qusb2.c b/drivers/phy/qualcomm/phy-qcom-qusb2.c index 49c37c53b38e..b5514a32ff8f 100644 --- a/drivers/phy/qualcomm/phy-qcom-qusb2.c +++ b/drivers/phy/qualcomm/phy-qcom-qusb2.c @@ -1114,9 +1114,7 @@ static int qusb2_phy_probe(struct platform_device *pdev) phy_set_drvdata(generic_phy, qphy); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); - if (!IS_ERR(phy_provider)) - dev_info(dev, "Registered Qcom-QUSB2 phy\n"); - else + if (IS_ERR(phy_provider)) pm_runtime_disable(dev); return PTR_ERR_OR_ZERO(phy_provider); diff --git a/drivers/phy/st/phy-stih407-usb.c b/drivers/phy/st/phy-stih407-usb.c index ebb1d0858aa3..7a3e4584895c 100644 --- a/drivers/phy/st/phy-stih407-usb.c +++ b/drivers/phy/st/phy-stih407-usb.c @@ -139,8 +139,6 @@ static int stih407_usb2_picophy_probe(struct platform_device *pdev) if (IS_ERR(phy_provider)) return PTR_ERR(phy_provider); - dev_info(dev, "STiH407 USB Generic picoPHY driver probed!"); - return 0; } diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c index b917cd413de7..27fe92f73f33 100644 --- a/drivers/phy/st/phy-stm32-usbphyc.c +++ b/drivers/phy/st/phy-stm32-usbphyc.c @@ -757,8 +757,8 @@ static int stm32_usbphyc_probe(struct platform_device *pdev) } version = readl_relaxed(usbphyc->base + STM32_USBPHYC_VERSION); - dev_info(dev, "registered rev:%lu.%lu\n", - FIELD_GET(MAJREV, version), FIELD_GET(MINREV, version)); + dev_dbg(dev, "registered rev: %lu.%lu\n", + FIELD_GET(MAJREV, version), FIELD_GET(MINREV, version)); return 0; diff --git a/drivers/phy/ti/phy-twl4030-usb.c b/drivers/phy/ti/phy-twl4030-usb.c index 6f12b38cd894..a26aec3ab29e 100644 --- a/drivers/phy/ti/phy-twl4030-usb.c +++ b/drivers/phy/ti/phy-twl4030-usb.c @@ -784,7 +784,6 @@ static int twl4030_usb_probe(struct platform_device *pdev) pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_put_autosuspend(twl->dev); - dev_info(&pdev->dev, "Initialized TWL4030 USB module\n"); return 0; } From 81456710391d3e55e623b387f01830a50747fd75 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 17 Jun 2025 09:43:20 +0900 Subject: [PATCH 0174/2411] firewire: core: minor code refactoring to localize table of gap count The table for gap count is accessed by a single function. In this case, it can be localized to the function. Link: https://lore.kernel.org/r/20250617004320.477421-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-card.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index b3e48ca516fe..aae774e7a5c3 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -273,10 +273,6 @@ static void allocate_broadcast_channel(struct fw_card *card, int generation) fw_device_set_broadcast_channel); } -static const char gap_count_table[] = { - 63, 5, 7, 8, 10, 13, 16, 18, 21, 24, 26, 29, 32, 35, 37, 40 -}; - void fw_schedule_bm_work(struct fw_card *card, unsigned long delay) { fw_card_get(card); @@ -286,6 +282,9 @@ void fw_schedule_bm_work(struct fw_card *card, unsigned long delay) static void bm_work(struct work_struct *work) { + static const char gap_count_table[] = { + 63, 5, 7, 8, 10, 13, 16, 18, 21, 24, 26, 29, 32, 35, 37, 40 + }; struct fw_card *card = from_work(card, work, bm_work.work); struct fw_device *root_device, *irm_device; struct fw_node *root_node; From f0185cd5942569ea9872bf85dce621d0a7fa401e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 16 Jun 2025 20:42:49 -0700 Subject: [PATCH 0175/2411] phy: qcom: add linux/bitfield.h header to fix a build error Add the header to prevent a build error: drivers/phy/qualcomm/phy-qcom-m31-eusb2.c: In function 'm31eusb2_phy_init': drivers/phy/qualcomm/phy-qcom-m31-eusb2.c:210:37: error: implicit declaration of function 'FIELD_PREP' [-Wimplicit-function-declaration] 210 | FIELD_PREP(FSEL, data->fsel)); Fixes: 9c8504861cc4 ("phy: qcom: Add M31 based eUSB2 PHY driver") Signed-off-by: Randy Dunlap Cc: Wesley Cheng Cc: Melody Olvera Cc: Vinod Koul Cc: Kishon Vijay Abraham I Cc: linux-phy@lists.infradead.org Cc: linux-arm-msm@vger.kernel.org Link: https://lore.kernel.org/r/20250617034249.2067135-1-rdunlap@infradead.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-m31-eusb2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c b/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c index 9f02b8a78f6e..7b7120e4214f 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c +++ b/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c @@ -3,6 +3,7 @@ * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ +#include #include #include #include From 994b5709f9f83c48f607e9a52912c912b8149421 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Wed, 11 Jun 2025 16:09:58 +0800 Subject: [PATCH 0176/2411] dt-bindings: dmaengine: Add dma multiplexer for CV18XX/SG200X series SoC The DMA IP of Sophgo CV18XX/SG200X is based on a DW AXI CORE, with an additional channel remap register located in the top system control area. The DMA channel is exclusive to each core. In addition, the DMA multiplexer is a subdevice of system controller, so this binding only contains necessary properties for the multiplexer itself. Add the dmamux binding for CV18XX/SG200X series SoC. Signed-off-by: Inochi Amaoto Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20250611081000.1187374-2-inochiama@gmail.com Signed-off-by: Vinod Koul --- .../bindings/dma/sophgo,cv1800b-dmamux.yaml | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 Documentation/devicetree/bindings/dma/sophgo,cv1800b-dmamux.yaml diff --git a/Documentation/devicetree/bindings/dma/sophgo,cv1800b-dmamux.yaml b/Documentation/devicetree/bindings/dma/sophgo,cv1800b-dmamux.yaml new file mode 100644 index 000000000000..011002942235 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/sophgo,cv1800b-dmamux.yaml @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dma/sophgo,cv1800b-dmamux.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Sophgo CV1800/SG200 Series DMA multiplexer + +maintainers: + - Inochi Amaoto + +description: + The DMA multiplexer of CV1800 is a subdevice of the system + controller. It support mapping 8 channels, but each channel + can be mapped only once. + +allOf: + - $ref: dma-router.yaml# + +properties: + compatible: + const: sophgo,cv1800b-dmamux + + reg: + items: + - description: DMA channal remapping register + - description: DMA channel interrupt mapping register + + '#dma-cells': + const: 2 + description: + The first cells is device id. The second one is the cpu id. + + dma-masters: + maxItems: 1 + +required: + - reg + - '#dma-cells' + - dma-masters + +additionalProperties: false + +examples: + - | + dma-router@154 { + compatible = "sophgo,cv1800b-dmamux"; + reg = <0x154 0x8>, <0x298 0x4>; + #dma-cells = <2>; + dma-masters = <&dmac>; + }; From db7d07b5add4d839df74adab9940cf9da488313f Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Wed, 11 Jun 2025 16:09:59 +0800 Subject: [PATCH 0177/2411] dmaengine: add driver for Sophgo CV18XX/SG200X dmamux Sophgo CV18XX/SG200X use DW AXI CORE with a multiplexer for remapping its request lines. The multiplexer supports at most 8 request lines. Add driver for Sophgo CV18XX/SG200X DMA multiplexer. Signed-off-by: Inochi Amaoto Tested-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20250611081000.1187374-3-inochiama@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 9 ++ drivers/dma/Makefile | 1 + drivers/dma/cv1800b-dmamux.c | 259 +++++++++++++++++++++++++++++++++++ 3 files changed, 269 insertions(+) create mode 100644 drivers/dma/cv1800b-dmamux.c diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index db87dd2a07f7..5d81e34f8e1f 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -572,6 +572,15 @@ config PLX_DMA These are exposed via extra functions on the switch's upstream port. Each function exposes one DMA channel. +config SOPHGO_CV1800B_DMAMUX + tristate "Sophgo CV1800/SG2000 series SoC DMA multiplexer support" + depends on MFD_SYSCON + depends on ARCH_SOPHGO || COMPILE_TEST + help + Support for the DMA multiplexer on Sophgo CV1800/SG2000 + series SoCs. + Say Y here if your board have this soc. + config STE_DMA40 bool "ST-Ericsson DMA40 support" depends on ARCH_U8500 diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index ba9732644752..a54d7688392b 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -71,6 +71,7 @@ obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/ obj-$(CONFIG_PXA_DMA) += pxa_dma.o obj-$(CONFIG_RENESAS_DMA) += sh/ obj-$(CONFIG_SF_PDMA) += sf-pdma/ +obj-$(CONFIG_SOPHGO_CV1800B_DMAMUX) += cv1800b-dmamux.o obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o obj-$(CONFIG_SPRD_DMA) += sprd-dma.o obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o diff --git a/drivers/dma/cv1800b-dmamux.c b/drivers/dma/cv1800b-dmamux.c new file mode 100644 index 000000000000..e900d6595617 --- /dev/null +++ b/drivers/dma/cv1800b-dmamux.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025 Inochi Amaoto + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define REG_DMA_CHANNEL_REMAP0 0x154 +#define REG_DMA_CHANNEL_REMAP1 0x158 +#define REG_DMA_INT_MUX 0x298 + +#define DMAMUX_NCELLS 2 +#define MAX_DMA_MAPPING_ID 42 +#define MAX_DMA_CPU_ID 2 +#define MAX_DMA_CH_ID 7 + +#define DMAMUX_INTMUX_REGISTER_LEN 4 +#define DMAMUX_NR_CH_PER_REGISTER 4 +#define DMAMUX_BIT_PER_CH 8 +#define DMAMUX_CH_MASk GENMASK(5, 0) +#define DMAMUX_INT_BIT_PER_CPU 10 +#define DMAMUX_CH_UPDATE_BIT BIT(31) + +#define DMAMUX_CH_REGPOS(chid) \ + ((chid) / DMAMUX_NR_CH_PER_REGISTER) +#define DMAMUX_CH_REGOFF(chid) \ + ((chid) % DMAMUX_NR_CH_PER_REGISTER) +#define DMAMUX_CH_REG(chid) \ + ((DMAMUX_CH_REGPOS(chid) * sizeof(u32)) + \ + REG_DMA_CHANNEL_REMAP0) +#define DMAMUX_CH_SET(chid, val) \ + (((val) << (DMAMUX_CH_REGOFF(chid) * DMAMUX_BIT_PER_CH)) | \ + DMAMUX_CH_UPDATE_BIT) +#define DMAMUX_CH_MASK(chid) \ + DMAMUX_CH_SET(chid, DMAMUX_CH_MASk) + +#define DMAMUX_INT_BIT(chid, cpuid) \ + BIT((cpuid) * DMAMUX_INT_BIT_PER_CPU + (chid)) +#define DMAMUX_INTEN_BIT(cpuid) \ + DMAMUX_INT_BIT(8, cpuid) +#define DMAMUX_INT_CH_BIT(chid, cpuid) \ + (DMAMUX_INT_BIT(chid, cpuid) | DMAMUX_INTEN_BIT(cpuid)) +#define DMAMUX_INT_MASK(chid) \ + (DMAMUX_INT_BIT(chid, 0) | \ + DMAMUX_INT_BIT(chid, 1) | \ + DMAMUX_INT_BIT(chid, 2)) +#define DMAMUX_INT_CH_MASK(chid, cpuid) \ + (DMAMUX_INT_MASK(chid) | DMAMUX_INTEN_BIT(cpuid)) + +struct cv1800_dmamux_data { + struct dma_router dmarouter; + struct regmap *regmap; + spinlock_t lock; + struct llist_head free_maps; + struct llist_head reserve_maps; + DECLARE_BITMAP(mapped_peripherals, MAX_DMA_MAPPING_ID); +}; + +struct cv1800_dmamux_map { + struct llist_node node; + unsigned int channel; + unsigned int peripheral; + unsigned int cpu; +}; + +static void cv1800_dmamux_free(struct device *dev, void *route_data) +{ + struct cv1800_dmamux_data *dmamux = dev_get_drvdata(dev); + struct cv1800_dmamux_map *map = route_data; + + guard(spinlock_irqsave)(&dmamux->lock); + + regmap_update_bits(dmamux->regmap, + DMAMUX_CH_REG(map->channel), + DMAMUX_CH_MASK(map->channel), + DMAMUX_CH_UPDATE_BIT); + + regmap_update_bits(dmamux->regmap, REG_DMA_INT_MUX, + DMAMUX_INT_CH_MASK(map->channel, map->cpu), + DMAMUX_INTEN_BIT(map->cpu)); + + dev_dbg(dev, "free channel %u for req %u (cpu %u)\n", + map->channel, map->peripheral, map->cpu); +} + +static void *cv1800_dmamux_route_allocate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct platform_device *pdev = of_find_device_by_node(ofdma->of_node); + struct cv1800_dmamux_data *dmamux = platform_get_drvdata(pdev); + struct cv1800_dmamux_map *map; + struct llist_node *node; + unsigned long flags; + unsigned int chid, devid, cpuid; + int ret; + + if (dma_spec->args_count != DMAMUX_NCELLS) { + dev_err(&pdev->dev, "invalid number of dma mux args\n"); + return ERR_PTR(-EINVAL); + } + + devid = dma_spec->args[0]; + cpuid = dma_spec->args[1]; + dma_spec->args_count = 1; + + if (devid > MAX_DMA_MAPPING_ID) { + dev_err(&pdev->dev, "invalid device id: %u\n", devid); + return ERR_PTR(-EINVAL); + } + + if (cpuid > MAX_DMA_CPU_ID) { + dev_err(&pdev->dev, "invalid cpu id: %u\n", cpuid); + return ERR_PTR(-EINVAL); + } + + dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0); + if (!dma_spec->np) { + dev_err(&pdev->dev, "can't get dma master\n"); + return ERR_PTR(-EINVAL); + } + + spin_lock_irqsave(&dmamux->lock, flags); + + if (test_bit(devid, dmamux->mapped_peripherals)) { + llist_for_each_entry(map, dmamux->reserve_maps.first, node) { + if (map->peripheral == devid && map->cpu == cpuid) + goto found; + } + + ret = -EINVAL; + goto failed; + } else { + node = llist_del_first(&dmamux->free_maps); + if (!node) { + ret = -ENODEV; + goto failed; + } + + map = llist_entry(node, struct cv1800_dmamux_map, node); + llist_add(&map->node, &dmamux->reserve_maps); + set_bit(devid, dmamux->mapped_peripherals); + } + +found: + chid = map->channel; + map->peripheral = devid; + map->cpu = cpuid; + + regmap_set_bits(dmamux->regmap, + DMAMUX_CH_REG(chid), + DMAMUX_CH_SET(chid, devid)); + + regmap_update_bits(dmamux->regmap, REG_DMA_INT_MUX, + DMAMUX_INT_CH_MASK(chid, cpuid), + DMAMUX_INT_CH_BIT(chid, cpuid)); + + spin_unlock_irqrestore(&dmamux->lock, flags); + + dma_spec->args[0] = chid; + + dev_dbg(&pdev->dev, "register channel %u for req %u (cpu %u)\n", + chid, devid, cpuid); + + return map; + +failed: + spin_unlock_irqrestore(&dmamux->lock, flags); + of_node_put(dma_spec->np); + dev_err(&pdev->dev, "errno %d\n", ret); + return ERR_PTR(ret); +} + +static int cv1800_dmamux_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *mux_node = dev->of_node; + struct cv1800_dmamux_data *data; + struct cv1800_dmamux_map *tmp; + struct device *parent = dev->parent; + struct regmap *regmap = NULL; + unsigned int i; + + if (!parent) + return -ENODEV; + + regmap = device_node_to_regmap(parent->of_node); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + spin_lock_init(&data->lock); + init_llist_head(&data->free_maps); + init_llist_head(&data->reserve_maps); + + for (i = 0; i <= MAX_DMA_CH_ID; i++) { + tmp = devm_kmalloc(dev, sizeof(*tmp), GFP_KERNEL); + if (!tmp) { + /* It is OK for not allocating all channel */ + dev_warn(dev, "can not allocate channel %u\n", i); + continue; + } + + init_llist_node(&tmp->node); + tmp->channel = i; + llist_add(&tmp->node, &data->free_maps); + } + + /* if no channel is allocated, the probe must fail */ + if (llist_empty(&data->free_maps)) + return -ENOMEM; + + data->regmap = regmap; + data->dmarouter.dev = dev; + data->dmarouter.route_free = cv1800_dmamux_free; + + platform_set_drvdata(pdev, data); + + return of_dma_router_register(mux_node, + cv1800_dmamux_route_allocate, + &data->dmarouter); +} + +static void cv1800_dmamux_remove(struct platform_device *pdev) +{ + of_dma_controller_free(pdev->dev.of_node); +} + +static const struct of_device_id cv1800_dmamux_ids[] = { + { .compatible = "sophgo,cv1800b-dmamux", }, + { } +}; +MODULE_DEVICE_TABLE(of, cv1800_dmamux_ids); + +static struct platform_driver cv1800_dmamux_driver = { + .probe = cv1800_dmamux_probe, + .remove = cv1800_dmamux_remove, + .driver = { + .name = "cv1800-dmamux", + .of_match_table = cv1800_dmamux_ids, + }, +}; +module_platform_driver(cv1800_dmamux_driver); + +MODULE_AUTHOR("Inochi Amaoto "); +MODULE_DESCRIPTION("Sophgo CV1800/SG2000 Series SoC DMAMUX driver"); +MODULE_LICENSE("GPL"); From de266931dd996fc2cb8ee8b5d12e39ea463e3f36 Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Thu, 12 Jun 2025 21:11:33 +0000 Subject: [PATCH 0178/2411] dmaengine: apple-admac: Drop default ARCH_APPLE in Kconfig When the first driver for Apple Silicon was upstreamed we accidentally included `default ARCH_APPLE` in its Kconfig which then spread to almost every subsequent driver. As soon as ARCH_APPLE is set to y this will pull in many drivers as built-ins which is not what we want. Thus, drop `default ARCH_APPLE` from Kconfig. Signed-off-by: Sven Peter Reviewed-by: Janne Grunau Link: https://lore.kernel.org/r/20250612-apple-kconfig-defconfig-v1-9-0e6f9cb512c1@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 5d81e34f8e1f..3bc79f320540 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -89,7 +89,6 @@ config APPLE_ADMAC tristate "Apple ADMAC support" depends on ARCH_APPLE || COMPILE_TEST select DMA_ENGINE - default ARCH_APPLE help Enable support for Audio DMA Controller found on Apple Silicon SoCs. From 571c1ea91a73db56bd94054fabecd0f070dc90db Mon Sep 17 00:00:00 2001 From: John Ogness Date: Fri, 6 Jun 2025 21:01:49 +0206 Subject: [PATCH 0179/2411] printk: nbcon: Allow reacquire during panic If a console printer is interrupted during panic, it will never be able to reacquire ownership in order to perform and cleanup. That in itself is not a problem, since the non-panic CPU will simply quiesce in an endless loop within nbcon_reacquire_nobuf(). However, in this state, platforms that do not support a true NMI to interrupt the quiesced CPU will not be able to shutdown that CPU from within panic(). This then causes problems for such as being unable to load and run a kdump kernel. Fix this by allowing non-panic CPUs to reacquire ownership using a direct acquire. Then the non-panic CPUs can successfullyl exit the nbcon_reacquire_nobuf() loop and the console driver can perform any necessary cleanup. But more importantly, the CPU is no longer quiesced and is free to process any interrupts necessary for panic() to shutdown the CPU. All other forms of acquire are still not allowed for non-panic CPUs since it is safer to have them avoid gaining console ownership that is not strictly necessary. Reported-by: Michael Kelley Closes: https://lore.kernel.org/r/SN6PR02MB4157A4C5E8CB219A75263A17D46DA@SN6PR02MB4157.namprd02.prod.outlook.com Signed-off-by: John Ogness Reviewed-by: Petr Mladek Tested-by: Michael Kelley Link: https://patch.msgid.link/20250606185549.900611-1-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 63 ++++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index fd12efcc4aed..e7a3af81b173 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -214,8 +214,9 @@ static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) /** * nbcon_context_try_acquire_direct - Try to acquire directly - * @ctxt: The context of the caller - * @cur: The current console state + * @ctxt: The context of the caller + * @cur: The current console state + * @is_reacquire: This acquire is a reacquire * * Acquire the console when it is released. Also acquire the console when * the current owner has a lower priority and the console is in a safe state. @@ -225,17 +226,17 @@ static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) * * Errors: * - * -EPERM: A panic is in progress and this is not the panic CPU. - * Or the current owner or waiter has the same or higher - * priority. No acquire method can be successful in - * this case. + * -EPERM: A panic is in progress and this is neither the panic + * CPU nor is this a reacquire. Or the current owner or + * waiter has the same or higher priority. No acquire + * method can be successful in these cases. * * -EBUSY: The current owner has a lower priority but the console * in an unsafe state. The caller should try using * the handover acquire method. */ static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt, - struct nbcon_state *cur) + struct nbcon_state *cur, bool is_reacquire) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; @@ -243,14 +244,20 @@ static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt, do { /* - * Panic does not imply that the console is owned. However, it - * is critical that non-panic CPUs during panic are unable to - * acquire ownership in order to satisfy the assumptions of - * nbcon_waiter_matches(). In particular, the assumption that - * lower priorities are ignored during panic. + * Panic does not imply that the console is owned. However, + * since all non-panic CPUs are stopped during panic(), it + * is safer to have them avoid gaining console ownership. + * + * If this acquire is a reacquire (and an unsafe takeover + * has not previously occurred) then it is allowed to attempt + * a direct acquire in panic. This gives console drivers an + * opportunity to perform any necessary cleanup if they were + * interrupted by the panic CPU while printing. */ - if (other_cpu_in_panic()) + if (other_cpu_in_panic() && + (!is_reacquire || cur->unsafe_takeover)) { return -EPERM; + } if (ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio) return -EPERM; @@ -301,8 +308,9 @@ static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio) * Event #1 implies this context is EMERGENCY. * Event #2 implies the new context is PANIC. * Event #3 occurs when panic() has flushed the console. - * Events #4 and #5 are not possible due to the other_cpu_in_panic() - * check in nbcon_context_try_acquire_direct(). + * Event #4 occurs when a non-panic CPU reacquires. + * Event #5 is not possible due to the other_cpu_in_panic() check + * in nbcon_context_try_acquire_handover(). */ return (cur->req_prio == expected_prio); @@ -431,6 +439,16 @@ static int nbcon_context_try_acquire_handover(struct nbcon_context *ctxt, WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio); WARN_ON_ONCE(!cur->unsafe); + /* + * Panic does not imply that the console is owned. However, it + * is critical that non-panic CPUs during panic are unable to + * wait for a handover in order to satisfy the assumptions of + * nbcon_waiter_matches(). In particular, the assumption that + * lower priorities are ignored during panic. + */ + if (other_cpu_in_panic()) + return -EPERM; + /* Handover is not possible on the same CPU. */ if (cur->cpu == cpu) return -EBUSY; @@ -558,7 +576,8 @@ static struct printk_buffers panic_nbcon_pbufs; /** * nbcon_context_try_acquire - Try to acquire nbcon console - * @ctxt: The context of the caller + * @ctxt: The context of the caller + * @is_reacquire: This acquire is a reacquire * * Context: Under @ctxt->con->device_lock() or local_irq_save(). * Return: True if the console was acquired. False otherwise. @@ -568,7 +587,7 @@ static struct printk_buffers panic_nbcon_pbufs; * in an unsafe state. Otherwise, on success the caller may assume * the console is not in an unsafe state. */ -static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) +static bool nbcon_context_try_acquire(struct nbcon_context *ctxt, bool is_reacquire) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; @@ -577,7 +596,7 @@ static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) nbcon_state_read(con, &cur); try_again: - err = nbcon_context_try_acquire_direct(ctxt, &cur); + err = nbcon_context_try_acquire_direct(ctxt, &cur, is_reacquire); if (err != -EBUSY) goto out; @@ -913,7 +932,7 @@ void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); - while (!nbcon_context_try_acquire(ctxt)) + while (!nbcon_context_try_acquire(ctxt, true)) cpu_relax(); nbcon_write_context_set_buf(wctxt, NULL, 0); @@ -1101,7 +1120,7 @@ static bool nbcon_emit_one(struct nbcon_write_context *wctxt, bool use_atomic) cant_migrate(); } - if (!nbcon_context_try_acquire(ctxt)) + if (!nbcon_context_try_acquire(ctxt, false)) goto out; /* @@ -1486,7 +1505,7 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, ctxt->prio = nbcon_get_default_prio(); ctxt->allow_unsafe_takeover = allow_unsafe_takeover; - if (!nbcon_context_try_acquire(ctxt)) + if (!nbcon_context_try_acquire(ctxt, false)) return -EPERM; while (nbcon_seq_read(con) < stop_seq) { @@ -1762,7 +1781,7 @@ bool nbcon_device_try_acquire(struct console *con) ctxt->console = con; ctxt->prio = NBCON_PRIO_NORMAL; - if (!nbcon_context_try_acquire(ctxt)) + if (!nbcon_context_try_acquire(ctxt, false)) return false; if (!nbcon_context_enter_unsafe(ctxt)) From b0dc512ab7ba177442cfec4a65b2681355d7cb64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 11 Jun 2025 12:03:06 +0200 Subject: [PATCH 0180/2411] remoteproc: Don't use %pK through printk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the past %pK was preferable to %p as it would not leak raw pointer values into the kernel log. Since commit ad67b74d2469 ("printk: hash addresses printed with %p") the regular %p has been improved to avoid this issue. Furthermore, restricted pointers ("%pK") were never meant to be used through printk(). They can still unintentionally leak raw pointers or acquire sleeping locks in atomic contexts. Switch to the regular pointer formatting which is safer and easier to reason about. There are still a few users of %pK left, but these use it through seq_file, for which its usage is safe. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250611-restricted-pointers-remoteproc-v1-1-f059097ba663@linutronix.de Signed-off-by: Bjorn Andersson --- drivers/remoteproc/omap_remoteproc.c | 2 +- drivers/remoteproc/pru_rproc.c | 2 +- drivers/remoteproc/remoteproc_core.c | 2 +- drivers/remoteproc/remoteproc_virtio.c | 2 +- drivers/remoteproc/st_slim_rproc.c | 2 +- drivers/remoteproc/ti_k3_common.c | 4 ++-- drivers/remoteproc/ti_k3_r5_remoteproc.c | 2 +- drivers/rpmsg/virtio_rpmsg_bus.c | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/remoteproc/omap_remoteproc.c b/drivers/remoteproc/omap_remoteproc.c index 9c7182b3b038..9c9e9c3cf378 100644 --- a/drivers/remoteproc/omap_remoteproc.c +++ b/drivers/remoteproc/omap_remoteproc.c @@ -1211,7 +1211,7 @@ static int omap_rproc_of_get_internal_memories(struct platform_device *pdev, oproc->mem[i].dev_addr = data->mems[i].dev_addr; oproc->mem[i].size = resource_size(res); - dev_dbg(dev, "memory %8s: bus addr %pa size 0x%x va %pK da 0x%x\n", + dev_dbg(dev, "memory %8s: bus addr %pa size 0x%x va %p da 0x%x\n", data->mems[i].name, &oproc->mem[i].bus_addr, oproc->mem[i].size, oproc->mem[i].cpu_addr, oproc->mem[i].dev_addr); diff --git a/drivers/remoteproc/pru_rproc.c b/drivers/remoteproc/pru_rproc.c index 4a4eb9c0b133..842e4b6cc5f9 100644 --- a/drivers/remoteproc/pru_rproc.c +++ b/drivers/remoteproc/pru_rproc.c @@ -1055,7 +1055,7 @@ static int pru_rproc_probe(struct platform_device *pdev) pru->mem_regions[i].pa = res->start; pru->mem_regions[i].size = resource_size(res); - dev_dbg(dev, "memory %8s: pa %pa size 0x%zx va %pK\n", + dev_dbg(dev, "memory %8s: pa %pa size 0x%zx va %p\n", mem_names[i], &pru->mem_regions[i].pa, pru->mem_regions[i].size, pru->mem_regions[i].va); } diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 81b2ccf988e8..825672100528 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -699,7 +699,7 @@ static int rproc_alloc_carveout(struct rproc *rproc, return -ENOMEM; } - dev_dbg(dev, "carveout va %pK, dma %pad, len 0x%zx\n", + dev_dbg(dev, "carveout va %p, dma %pad, len 0x%zx\n", va, &dma, mem->len); if (mem->da != FW_RSC_ADDR_ANY && !rproc->domain) { diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index 25a655f33ec0..c5d46a878149 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -136,7 +136,7 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev, size = vring_size(num, rvring->align); memset(addr, 0, size); - dev_dbg(dev, "vring%d: va %pK qsz %d notifyid %d\n", + dev_dbg(dev, "vring%d: va %p qsz %d notifyid %d\n", id, addr, num, rvring->notifyid); /* diff --git a/drivers/remoteproc/st_slim_rproc.c b/drivers/remoteproc/st_slim_rproc.c index 5412beb0a692..d083ecf02f5c 100644 --- a/drivers/remoteproc/st_slim_rproc.c +++ b/drivers/remoteproc/st_slim_rproc.c @@ -190,7 +190,7 @@ static void *slim_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool * } } - dev_dbg(&rproc->dev, "da = 0x%llx len = 0x%zx va = 0x%pK\n", + dev_dbg(&rproc->dev, "da = 0x%llx len = 0x%zx va = 0x%p\n", da, len, va); return va; diff --git a/drivers/remoteproc/ti_k3_common.c b/drivers/remoteproc/ti_k3_common.c index d5dccc81d460..d4f20900f33b 100644 --- a/drivers/remoteproc/ti_k3_common.c +++ b/drivers/remoteproc/ti_k3_common.c @@ -450,7 +450,7 @@ int k3_rproc_of_get_memories(struct platform_device *pdev, kproc->mem[i].dev_addr = data->mems[i].dev_addr; kproc->mem[i].size = resource_size(res); - dev_dbg(dev, "memory %8s: bus addr %pa size 0x%zx va %pK da 0x%x\n", + dev_dbg(dev, "memory %8s: bus addr %pa size 0x%zx va %p da 0x%x\n", data->mems[i].name, &kproc->mem[i].bus_addr, kproc->mem[i].size, kproc->mem[i].cpu_addr, kproc->mem[i].dev_addr); @@ -528,7 +528,7 @@ int k3_reserved_mem_init(struct k3_rproc *kproc) return -ENOMEM; } - dev_dbg(dev, "reserved memory%d: bus addr %pa size 0x%zx va %pK da 0x%x\n", + dev_dbg(dev, "reserved memory%d: bus addr %pa size 0x%zx va %p da 0x%x\n", i + 1, &kproc->rmem[i].bus_addr, kproc->rmem[i].size, kproc->rmem[i].cpu_addr, kproc->rmem[i].dev_addr); diff --git a/drivers/remoteproc/ti_k3_r5_remoteproc.c b/drivers/remoteproc/ti_k3_r5_remoteproc.c index e34c04c135fc..ca5ff280d2dc 100644 --- a/drivers/remoteproc/ti_k3_r5_remoteproc.c +++ b/drivers/remoteproc/ti_k3_r5_remoteproc.c @@ -1007,7 +1007,7 @@ static int k3_r5_core_of_get_sram_memories(struct platform_device *pdev, return -ENOMEM; } - dev_dbg(dev, "memory sram%d: bus addr %pa size 0x%zx va %pK da 0x%x\n", + dev_dbg(dev, "memory sram%d: bus addr %pa size 0x%zx va %p da 0x%x\n", i, &core->sram[i].bus_addr, core->sram[i].size, core->sram[i].cpu_addr, core->sram[i].dev_addr); diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 4730b1c8b322..484890b4a6a7 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -901,7 +901,7 @@ static int rpmsg_probe(struct virtio_device *vdev) goto vqs_del; } - dev_dbg(&vdev->dev, "buffers: va %pK, dma %pad\n", + dev_dbg(&vdev->dev, "buffers: va %p, dma %pad\n", bufs_va, &vrp->bufs_dma); /* half of the buffers is dedicated for RX */ From f6588dea0ab2873760b87b3ffbd02316e7826ee0 Mon Sep 17 00:00:00 2001 From: Lijuan Gao Date: Thu, 12 Jun 2025 10:39:32 +0800 Subject: [PATCH 0181/2411] dt-bindings: remoteproc: qcom,sa8775p-pas: Correct the interrupt number Correct the interrupt number of ready and handover in the DTS example. Fixes: af5da7b0944c ("dt-bindings: remoteproc: qcom,sa8775p-pas: Document the SA8775p ADSP, CDSP and GPDSP") Signed-off-by: Lijuan Gao Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250612-correct_interrupt_for_remoteproc-v1-1-490ee6d92a1b@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- .../devicetree/bindings/remoteproc/qcom,sa8775p-pas.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,sa8775p-pas.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,sa8775p-pas.yaml index a66007951d58..188a25194000 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,sa8775p-pas.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,sa8775p-pas.yaml @@ -144,8 +144,8 @@ examples: interrupts-extended = <&pdc 6 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>, - <&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>, + <&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 3 IRQ_TYPE_EDGE_RISING>; interrupt-names = "wdog", "fatal", "ready", "handover", "stop-ack"; From 46d221ed970a46dc2089b6747efe7a69239558f4 Mon Sep 17 00:00:00 2001 From: Ziyue Zhang Date: Tue, 27 May 2025 15:20:34 +0800 Subject: [PATCH 0182/2411] dt-bindings: PCI: qcom,pcie-sm8150: Document QCS615 QCS615 is derived from SM8150. Hence, add the callback with SM8150 as the fallback. Signed-off-by: Ziyue Zhang [mani: commit message rewording] Signed-off-by: Manivannan Sadhasivam Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250527072036.3599076-3-quic_ziyuzhan@quicinc.com --- .../devicetree/bindings/pci/qcom,pcie-sm8150.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie-sm8150.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie-sm8150.yaml index a604f2a79de3..2aa012b04d93 100644 --- a/Documentation/devicetree/bindings/pci/qcom,pcie-sm8150.yaml +++ b/Documentation/devicetree/bindings/pci/qcom,pcie-sm8150.yaml @@ -16,7 +16,12 @@ description: properties: compatible: - const: qcom,pcie-sm8150 + oneOf: + - const: qcom,pcie-sm8150 + - items: + - enum: + - qcom,pcie-qcs615 + - const: qcom,pcie-sm8150 reg: minItems: 5 From be84da3e19666da5c43c5c4ad86eff456510bd77 Mon Sep 17 00:00:00 2001 From: Ziyue Zhang Date: Thu, 29 May 2025 11:56:31 +0800 Subject: [PATCH 0183/2411] dt-bindings: PCI: qcom,pcie-sa8775p: Document QCS8300 QCS8300 is derived from SA8775p. Hence, add the callback with SA8775p as the fallback. Signed-off-by: Ziyue Zhang Signed-off-by: Manivannan Sadhasivam Reviewed-by: Manivannan Sadhasivam Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250529035635.4162149-3-quic_ziyuzhan@quicinc.com --- .../devicetree/bindings/pci/qcom,pcie-sa8775p.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie-sa8775p.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie-sa8775p.yaml index e3fa232da2ca..4b91b5608013 100644 --- a/Documentation/devicetree/bindings/pci/qcom,pcie-sa8775p.yaml +++ b/Documentation/devicetree/bindings/pci/qcom,pcie-sa8775p.yaml @@ -16,7 +16,12 @@ description: properties: compatible: - const: qcom,pcie-sa8775p + oneOf: + - const: qcom,pcie-sa8775p + - items: + - enum: + - qcom,pcie-qcs8300 + - const: qcom,pcie-sa8775p reg: minItems: 6 From 35636068ce18b8506a7c6cb475395707b6ef3989 Mon Sep 17 00:00:00 2001 From: Hans Zhang <18255117159@163.com> Date: Thu, 29 May 2025 10:10:25 +0800 Subject: [PATCH 0184/2411] dt-bindings: PCI: pci-ep: Extend max-link-speed to PCIe Gen5/Gen6 Update the PCI Endpoint (EP) device tree binding documentation to include PCIe Gen5 and Gen6 support for the `max-link-speed` property. Similar to the Host Controller binding, the original EP binding limited this value to 1~4 (Gen1~Gen4). With current SoCs requiring Gen5/Gen6 support (e.g., Synopsys/Cadence IP), this change aligns the EP binding with the kernel's PCIe 6.0 capabilities. Signed-off-by: Hans Zhang <18255117159@163.com> Signed-off-by: Manivannan Sadhasivam Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250529021026.475861-3-18255117159@163.com --- Documentation/devicetree/bindings/pci/pci-ep.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/pci/pci-ep.yaml b/Documentation/devicetree/bindings/pci/pci-ep.yaml index 214caa4ec3d5..1868a10d5b10 100644 --- a/Documentation/devicetree/bindings/pci/pci-ep.yaml +++ b/Documentation/devicetree/bindings/pci/pci-ep.yaml @@ -51,7 +51,7 @@ properties: max-link-speed: $ref: /schemas/types.yaml#/definitions/uint32 - enum: [ 1, 2, 3, 4 ] + enum: [ 1, 2, 3, 4, 5, 6 ] msi-map: description: | From c1842b98c9f1ac1aac468253febc437f880bb8b5 Mon Sep 17 00:00:00 2001 From: Hans Zhang <18255117159@163.com> Date: Sat, 17 May 2025 23:49:39 +0800 Subject: [PATCH 0185/2411] PCI/ASPM: Use boolean type for aspm_disabled and aspm_force MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The aspm_disabled and aspm_force variables are used as boolean flags. Change their type from int to bool and update assignments to use true/false instead of 1/0. This improves code clarity. Signed-off-by: Hans Zhang <18255117159@163.com> Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/20250517154939.139237-1-18255117159@163.com --- drivers/pci/pcie/aspm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 29fcb0689a91..98b3022802b2 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -245,7 +245,7 @@ struct pcie_link_state { u32 clkpm_disable:1; /* Clock PM disabled */ }; -static int aspm_disabled, aspm_force; +static bool aspm_disabled, aspm_force; static bool aspm_support_enabled = true; static DEFINE_MUTEX(aspm_lock); static LIST_HEAD(link_list); @@ -1712,11 +1712,11 @@ static int __init pcie_aspm_disable(char *str) { if (!strcmp(str, "off")) { aspm_policy = POLICY_DEFAULT; - aspm_disabled = 1; + aspm_disabled = true; aspm_support_enabled = false; pr_info("PCIe ASPM is disabled\n"); } else if (!strcmp(str, "force")) { - aspm_force = 1; + aspm_force = true; pr_info("PCIe ASPM is forcibly enabled\n"); } return 1; @@ -1734,7 +1734,7 @@ void pcie_no_aspm(void) */ if (!aspm_force) { aspm_policy = POLICY_DEFAULT; - aspm_disabled = 1; + aspm_disabled = true; } } From 64fd90ef25206b1b9f232fb192ceadb814e991a4 Mon Sep 17 00:00:00 2001 From: Hans Zhang <18255117159@163.com> Date: Fri, 23 May 2025 00:15:33 +0800 Subject: [PATCH 0186/2411] PCI/ASPM: Consolidate variable declaration and initialization Merge the declaration and initialization of 'val' into a single statement for clarity. This eliminates a redundant assignment operation and improves code readability while maintaining the same functionality. Signed-off-by: Hans Zhang <18255117159@163.com> Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250522161533.394689-1-18255117159@163.com --- drivers/pci/pcie/aspm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 98b3022802b2..919a05b97647 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -884,10 +884,9 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) /* Configure the ASPM L1 substates. Caller must disable L1 first. */ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state) { - u32 val; + u32 val = 0; struct pci_dev *child = link->downstream, *parent = link->pdev; - val = 0; if (state & PCIE_LINK_STATE_L1_1) val |= PCI_L1SS_CTL1_ASPM_L1_1; if (state & PCIE_LINK_STATE_L1_2) From 9890dd3fb7f93546b4cd760e8371e63a94b05cd5 Mon Sep 17 00:00:00 2001 From: Hans Zhang Date: Sat, 17 May 2025 00:52:23 +0800 Subject: [PATCH 0187/2411] PCI/AER: Use bool for AER disable state tracking Change pcie_aer_disable variable to bool and update pci_no_aer() to set it to true. Improves code readability and aligns with modern kernel practices. Signed-off-by: Hans Zhang Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250516165223.125083-3-18255117159@163.com --- drivers/pci/pcie/aer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 70ac66188367..361957502831 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -116,12 +116,12 @@ struct aer_info { PCI_ERR_ROOT_MULTI_COR_RCV | \ PCI_ERR_ROOT_MULTI_UNCOR_RCV) -static int pcie_aer_disable; +static bool pcie_aer_disable; static pci_ers_result_t aer_root_reset(struct pci_dev *dev); void pci_no_aer(void) { - pcie_aer_disable = 1; + pcie_aer_disable = true; } bool pci_aer_available(void) From 0a1eab129fedb4281e65c845b04be02b53c99f9c Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Tue, 24 Sep 2024 20:47:22 +0200 Subject: [PATCH 0188/2411] kconfig: use memcmp instead of deprecated bcmp bcmp() was removed in POSIX.1-2008. This commit replaces bcmp() with memcmp(). This allows Kconfig to link with C libraries that do not provide bcmp(), such as Android bionic libc. Signed-off-by: Thomas Meyer Reviewed-by: Miguel Ojeda Reported-by: Abhigyan Ghosh [masahiro: update commit description] Signed-off-by: Masahiro Yamada --- scripts/kconfig/confdata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index ac95661a1c9d..9599a0408862 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -77,7 +77,7 @@ static bool is_same(const char *file1, const char *file2) if (map2 == MAP_FAILED) goto close2; - if (bcmp(map1, map2, st1.st_size)) + if (memcmp(map1, map2, st1.st_size)) goto close2; ret = true; From 9b71efc450fdd2f70d59917025da34f8b0e81135 Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Tue, 10 Jun 2025 16:30:38 +0200 Subject: [PATCH 0189/2411] pinctrl: stm32: Declare stm32_pmx_get_mode() as static The commit acaa037970f6 ("pinctrl: stm32: Implement .get_direction gpio_chip callback") exported the function stm32_pmx_get_mode() and the struct stm32_gpio_bank, but these were never used outside the file that defines them. Declare the function as static, drop it from the include file and drop also the struct, not anymore visible outside the file. Signed-off-by: Antonio Borneo Link: https://lore.kernel.org/20250610143042.295376-2-antonio.borneo@foss.st.com Signed-off-by: Linus Walleij --- drivers/pinctrl/stm32/pinctrl-stm32.c | 5 +++-- drivers/pinctrl/stm32/pinctrl-stm32.h | 4 ---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index ba49d48c3a1d..3abf47e31c01 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -122,6 +122,8 @@ struct stm32_pinctrl { spinlock_t irqmux_lock; }; +static void stm32_pmx_get_mode(struct stm32_gpio_bank *bank, int pin, u32 *mode, u32 *alt); + static inline int stm32_gpio_pin(int gpio) { return gpio % STM32_GPIO_PINS_PER_BANK; @@ -798,8 +800,7 @@ static int stm32_pmx_set_mode(struct stm32_gpio_bank *bank, return err; } -void stm32_pmx_get_mode(struct stm32_gpio_bank *bank, int pin, u32 *mode, - u32 *alt) +static void stm32_pmx_get_mode(struct stm32_gpio_bank *bank, int pin, u32 *mode, u32 *alt) { u32 val; int alt_shift = (pin % 8) * 4; diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.h b/drivers/pinctrl/stm32/pinctrl-stm32.h index 5e5de92ddd58..8790fef2d58a 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.h +++ b/drivers/pinctrl/stm32/pinctrl-stm32.h @@ -65,11 +65,7 @@ struct stm32_pinctrl_match_data { bool secure_control; }; -struct stm32_gpio_bank; - int stm32_pctl_probe(struct platform_device *pdev); -void stm32_pmx_get_mode(struct stm32_gpio_bank *bank, - int pin, u32 *mode, u32 *alt); int stm32_pinctrl_suspend(struct device *dev); int stm32_pinctrl_resume(struct device *dev); From 4c5cc2f65386e22166ce006efe515c667aa075e4 Mon Sep 17 00:00:00 2001 From: Cheick Traore Date: Tue, 10 Jun 2025 16:30:39 +0200 Subject: [PATCH 0190/2411] pinctrl: stm32: Manage irq affinity settings Trying to set the affinity of the interrupts associated to stm32 pinctrl results in a write error. Fill struct irq_chip::irq_set_affinity to use the default helper function. Signed-off-by: Cheick Traore Signed-off-by: Antonio Borneo Link: https://lore.kernel.org/20250610143042.295376-3-antonio.borneo@foss.st.com Signed-off-by: Linus Walleij --- drivers/pinctrl/stm32/pinctrl-stm32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index 3abf47e31c01..e0e3e649976b 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -413,6 +413,7 @@ static struct irq_chip stm32_gpio_irq_chip = { .irq_set_wake = irq_chip_set_wake_parent, .irq_request_resources = stm32_gpio_irq_request_resources, .irq_release_resources = stm32_gpio_irq_release_resources, + .irq_set_affinity = IS_ENABLED(CONFIG_SMP) ? irq_chip_set_affinity_parent : NULL, }; static int stm32_gpio_domain_translate(struct irq_domain *d, From d6c8fceb33d9cc983a5d050f9c3714fe15a51279 Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Tue, 10 Jun 2025 16:30:40 +0200 Subject: [PATCH 0191/2411] pinctrl: stm32: Add RIF support for stm32mp257 On SoC stm32mp257, GPIO supports security and isolation compliant with the Resource Isolation Framework (RIF). Each GPIO line can be assigned to different security and compartment domains. Add the generic code to handle the RIF configuration set by the secure world and initialize the GPIO valid mask accordingly. Enable the RIF support for stm32mp257. Co-developed-by: Gatien Chevallier Signed-off-by: Gatien Chevallier Signed-off-by: Antonio Borneo Link: https://lore.kernel.org/20250610143042.295376-4-antonio.borneo@foss.st.com Signed-off-by: Linus Walleij --- drivers/pinctrl/stm32/pinctrl-stm32.c | 121 ++++++++++++++++++++- drivers/pinctrl/stm32/pinctrl-stm32.h | 1 + drivers/pinctrl/stm32/pinctrl-stm32mp257.c | 4 + 3 files changed, 125 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index e0e3e649976b..e7621f9b0005 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -6,6 +6,7 @@ * * Heavily based on Mediatek's pinctrl driver */ +#include #include #include #include @@ -36,6 +37,8 @@ #include "../pinctrl-utils.h" #include "pinctrl-stm32.h" +#define STM32_GPIO_CID1 1 + #define STM32_GPIO_MODER 0x00 #define STM32_GPIO_TYPER 0x04 #define STM32_GPIO_SPEEDR 0x08 @@ -47,6 +50,8 @@ #define STM32_GPIO_AFRL 0x20 #define STM32_GPIO_AFRH 0x24 #define STM32_GPIO_SECCFGR 0x30 +#define STM32_GPIO_CIDCFGR(x) (0x50 + (0x8 * (x))) +#define STM32_GPIO_SEMCR(x) (0x54 + (0x8 * (x))) /* custom bitfield to backup pin status */ #define STM32_GPIO_BKP_MODE_SHIFT 0 @@ -60,6 +65,14 @@ #define STM32_GPIO_BKP_TYPE 10 #define STM32_GPIO_BKP_VAL 11 +#define STM32_GPIO_CIDCFGR_CFEN BIT(0) +#define STM32_GPIO_CIDCFGR_SEMEN BIT(1) +#define STM32_GPIO_CIDCFGR_SCID_MASK GENMASK(5, 4) +#define STM32_GPIO_CIDCFGR_SEMWL_CID1 BIT(16 + STM32_GPIO_CID1) + +#define STM32_GPIO_SEMCR_SEM_MUTEX BIT(0) +#define STM32_GPIO_SEMCR_SEMCID_MASK GENMASK(5, 4) + #define STM32_GPIO_PINS_PER_BANK 16 #define STM32_GPIO_IRQ_LINE 16 @@ -98,6 +111,7 @@ struct stm32_gpio_bank { u32 pin_backup[STM32_GPIO_PINS_PER_BANK]; u8 irq_type[STM32_GPIO_PINS_PER_BANK]; bool secure_control; + bool rif_control; }; struct stm32_pinctrl { @@ -194,6 +208,80 @@ static void stm32_gpio_backup_bias(struct stm32_gpio_bank *bank, u32 offset, bank->pin_backup[offset] |= bias << STM32_GPIO_BKP_PUPD_SHIFT; } +/* RIF functions */ + +static bool stm32_gpio_rif_valid(struct stm32_gpio_bank *bank, unsigned int gpio_nr) +{ + u32 cid; + + cid = readl_relaxed(bank->base + STM32_GPIO_CIDCFGR(gpio_nr)); + + if (!(cid & STM32_GPIO_CIDCFGR_CFEN)) + return true; + + if (!(cid & STM32_GPIO_CIDCFGR_SEMEN)) { + if (FIELD_GET(STM32_GPIO_CIDCFGR_SCID_MASK, cid) == STM32_GPIO_CID1) + return true; + + return false; + } + + if (cid & STM32_GPIO_CIDCFGR_SEMWL_CID1) + return true; + + return false; +} + +static bool stm32_gpio_rif_acquire_semaphore(struct stm32_gpio_bank *bank, unsigned int gpio_nr) +{ + u32 cid, sem; + + cid = readl_relaxed(bank->base + STM32_GPIO_CIDCFGR(gpio_nr)); + + if (!(cid & STM32_GPIO_CIDCFGR_CFEN)) + return true; + + if (!(cid & STM32_GPIO_CIDCFGR_SEMEN)) { + if (FIELD_GET(STM32_GPIO_CIDCFGR_SCID_MASK, cid) == STM32_GPIO_CID1) + return true; + + return false; + } + + if (!(cid & STM32_GPIO_CIDCFGR_SEMWL_CID1)) + return false; + + sem = readl_relaxed(bank->base + STM32_GPIO_SEMCR(gpio_nr)); + if (sem & STM32_GPIO_SEMCR_SEM_MUTEX) { + if (FIELD_GET(STM32_GPIO_SEMCR_SEMCID_MASK, sem) == STM32_GPIO_CID1) + return true; + + return false; + } + + writel_relaxed(STM32_GPIO_SEMCR_SEM_MUTEX, bank->base + STM32_GPIO_SEMCR(gpio_nr)); + + sem = readl_relaxed(bank->base + STM32_GPIO_SEMCR(gpio_nr)); + if (sem & STM32_GPIO_SEMCR_SEM_MUTEX && + FIELD_GET(STM32_GPIO_SEMCR_SEMCID_MASK, sem) == STM32_GPIO_CID1) + return true; + + return false; +} + +static void stm32_gpio_rif_release_semaphore(struct stm32_gpio_bank *bank, unsigned int gpio_nr) +{ + u32 cid; + + cid = readl_relaxed(bank->base + STM32_GPIO_CIDCFGR(gpio_nr)); + + if (!(cid & STM32_GPIO_CIDCFGR_CFEN)) + return; + + if (cid & STM32_GPIO_CIDCFGR_SEMEN) + writel_relaxed(0, bank->base + STM32_GPIO_SEMCR(gpio_nr)); +} + /* GPIO functions */ static inline void __stm32_gpio_set(struct stm32_gpio_bank *bank, @@ -220,9 +308,26 @@ static int stm32_gpio_request(struct gpio_chip *chip, unsigned offset) return -EINVAL; } + if (bank->rif_control) { + if (!stm32_gpio_rif_acquire_semaphore(bank, offset)) { + dev_err(pctl->dev, "pin %d not available.\n", pin); + return -EINVAL; + } + } + return pinctrl_gpio_request(chip, offset); } +static void stm32_gpio_free(struct gpio_chip *chip, unsigned int offset) +{ + struct stm32_gpio_bank *bank = gpiochip_get_data(chip); + + pinctrl_gpio_free(chip, offset); + + if (bank->rif_control) + stm32_gpio_rif_release_semaphore(bank, offset); +} + static int stm32_gpio_get(struct gpio_chip *chip, unsigned offset) { struct stm32_gpio_bank *bank = gpiochip_get_data(chip); @@ -306,12 +411,25 @@ static int stm32_gpio_init_valid_mask(struct gpio_chip *chip, } } + if (bank->rif_control) { + for (i = 0; i < ngpios; i++) { + if (!test_bit(i, valid_mask)) + continue; + + if (stm32_gpio_rif_valid(bank, i)) + continue; + + dev_dbg(pctl->dev, "RIF semaphore ownership conflict, GPIO %u", i); + clear_bit(i, valid_mask); + } + } + return 0; } static const struct gpio_chip stm32_gpio_template = { .request = stm32_gpio_request, - .free = pinctrl_gpio_free, + .free = stm32_gpio_free, .get = stm32_gpio_get, .set_rv = stm32_gpio_set, .direction_input = pinctrl_gpio_direction_input, @@ -1350,6 +1468,7 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, struct fwnode bank->bank_nr = bank_nr; bank->bank_ioport_nr = bank_ioport_nr; bank->secure_control = pctl->match_data->secure_control; + bank->rif_control = pctl->match_data->rif_control; spin_lock_init(&bank->lock); if (pctl->domain) { diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.h b/drivers/pinctrl/stm32/pinctrl-stm32.h index 8790fef2d58a..ed525f5bdd7c 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.h +++ b/drivers/pinctrl/stm32/pinctrl-stm32.h @@ -63,6 +63,7 @@ struct stm32_pinctrl_match_data { const struct stm32_desc_pin *pins; const unsigned int npins; bool secure_control; + bool rif_control; }; int stm32_pctl_probe(struct platform_device *pdev); diff --git a/drivers/pinctrl/stm32/pinctrl-stm32mp257.c b/drivers/pinctrl/stm32/pinctrl-stm32mp257.c index 23aebd4695e9..984587207956 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32mp257.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32mp257.c @@ -2542,11 +2542,15 @@ static const struct stm32_desc_pin stm32mp257_z_pins[] = { static struct stm32_pinctrl_match_data stm32mp257_match_data = { .pins = stm32mp257_pins, .npins = ARRAY_SIZE(stm32mp257_pins), + .secure_control = true, + .rif_control = true, }; static struct stm32_pinctrl_match_data stm32mp257_z_match_data = { .pins = stm32mp257_z_pins, .npins = ARRAY_SIZE(stm32mp257_z_pins), + .secure_control = true, + .rif_control = true, }; static const struct of_device_id stm32mp257_pctrl_match[] = { From dba0aff2b89bd106601ed88dfda44fdd3218eb53 Mon Sep 17 00:00:00 2001 From: Stephane Danieau Date: Tue, 10 Jun 2025 16:30:41 +0200 Subject: [PATCH 0192/2411] pinctrl: stm32: Allow compile as module for stm32mp257 Add ability to build pinctrl for stm32mp257 as a kernel module. Add kernel-doc to the exported symbols. Signed-off-by: Stephane Danieau Signed-off-by: Antonio Borneo Link: https://lore.kernel.org/20250610143042.295376-5-antonio.borneo@foss.st.com Signed-off-by: Linus Walleij --- arch/arm64/Kconfig.platforms | 1 - drivers/pinctrl/stm32/Kconfig | 6 +++--- drivers/pinctrl/stm32/pinctrl-stm32.c | 8 ++++++++ drivers/pinctrl/stm32/pinctrl-stm32.h | 14 ++++++++++++++ drivers/pinctrl/stm32/pinctrl-stm32mp257.c | 11 ++++++----- 5 files changed, 31 insertions(+), 9 deletions(-) diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index a541bb029aa4..d6cfcfad26e4 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -311,7 +311,6 @@ config ARCH_STM32 bool "STMicroelectronics STM32 SoC Family" select GPIOLIB select PINCTRL - select PINCTRL_STM32MP257 select ARM_SMC_MBOX select ARM_SCMI_PROTOCOL select REGULATOR diff --git a/drivers/pinctrl/stm32/Kconfig b/drivers/pinctrl/stm32/Kconfig index 2656d3d3ae40..297a2f088bc1 100644 --- a/drivers/pinctrl/stm32/Kconfig +++ b/drivers/pinctrl/stm32/Kconfig @@ -2,7 +2,7 @@ if ARCH_STM32 || COMPILE_TEST config PINCTRL_STM32 - bool + tristate depends on OF select PINMUX select GENERIC_PINCONF @@ -53,8 +53,8 @@ config PINCTRL_STM32MP157 select PINCTRL_STM32 config PINCTRL_STM32MP257 - bool "STMicroelectronics STM32MP257 pin control" if COMPILE_TEST && !MACH_STM32MP25 + tristate "STMicroelectronics STM32MP257 pin control" depends on OF && HAS_IOMEM - default MACH_STM32MP25 + default MACH_STM32MP25 || (ARCH_STM32 && ARM64) select PINCTRL_STM32 endif diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index e7621f9b0005..ce2a5fee4230 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -8,6 +8,7 @@ */ #include #include +#include #include #include #include @@ -1785,6 +1786,7 @@ int stm32_pctl_probe(struct platform_device *pdev) clk_bulk_disable_unprepare(banks, pctl->clks); return ret; } +EXPORT_SYMBOL(stm32_pctl_probe); static int __maybe_unused stm32_pinctrl_restore_gpio_regs( struct stm32_pinctrl *pctl, u32 pin) @@ -1857,6 +1859,7 @@ int __maybe_unused stm32_pinctrl_suspend(struct device *dev) return 0; } +EXPORT_SYMBOL(stm32_pinctrl_suspend); int __maybe_unused stm32_pinctrl_resume(struct device *dev) { @@ -1873,3 +1876,8 @@ int __maybe_unused stm32_pinctrl_resume(struct device *dev) return 0; } +EXPORT_SYMBOL(stm32_pinctrl_resume); + +MODULE_AUTHOR("Alexandre Torgue "); +MODULE_DESCRIPTION("STM32 core pinctrl driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.h b/drivers/pinctrl/stm32/pinctrl-stm32.h index ed525f5bdd7c..9e44ad8c35b3 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.h +++ b/drivers/pinctrl/stm32/pinctrl-stm32.h @@ -66,8 +66,22 @@ struct stm32_pinctrl_match_data { bool rif_control; }; +/** + * stm32_pctl_probe() - Common probe for stm32 pinctrl drivers. + * @pdev: Pinctrl platform device. + */ int stm32_pctl_probe(struct platform_device *pdev); + +/** + * stm32_pinctrl_suspend() - Common suspend for stm32 pinctrl drivers. + * @dev: Pinctrl device. + */ int stm32_pinctrl_suspend(struct device *dev); + +/** + * stm32_pinctrl_resume() - Common resume for stm32 pinctrl drivers. + * @dev: Pinctrl device. + */ int stm32_pinctrl_resume(struct device *dev); #endif /* __PINCTRL_STM32_H */ diff --git a/drivers/pinctrl/stm32/pinctrl-stm32mp257.c b/drivers/pinctrl/stm32/pinctrl-stm32mp257.c index 984587207956..d226de524bfc 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32mp257.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32mp257.c @@ -4,6 +4,7 @@ * Author: Alexandre Torgue for STMicroelectronics. */ #include +#include #include #include @@ -2564,6 +2565,7 @@ static const struct of_device_id stm32mp257_pctrl_match[] = { }, { } }; +MODULE_DEVICE_TABLE(of, stm32mp257_pctrl_match); static const struct dev_pm_ops stm32_pinctrl_dev_pm_ops = { SET_LATE_SYSTEM_SLEEP_PM_OPS(stm32_pinctrl_suspend, stm32_pinctrl_resume) @@ -2577,9 +2579,8 @@ static struct platform_driver stm32mp257_pinctrl_driver = { .pm = &stm32_pinctrl_dev_pm_ops, }, }; +module_platform_driver(stm32mp257_pinctrl_driver); -static int __init stm32mp257_pinctrl_init(void) -{ - return platform_driver_register(&stm32mp257_pinctrl_driver); -} -arch_initcall(stm32mp257_pinctrl_init); +MODULE_AUTHOR("Alexandre Torgue "); +MODULE_DESCRIPTION("STM32MP257 pinctrl driver"); +MODULE_LICENSE("GPL"); From da5b24fbf4b8aac24c20bb948e51850ae9426c87 Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Tue, 10 Jun 2025 16:30:42 +0200 Subject: [PATCH 0193/2411] MAINTAINERS: Add entry for STM32 pinctrl drivers and documentation Add an entry to make myself a maintainer of STM32 pinctrl drivers and documentation. Exclude the HDP driver, already covered by another maintainer. Signed-off-by: Antonio Borneo Link: https://lore.kernel.org/20250610143042.295376-6-antonio.borneo@foss.st.com Signed-off-by: Linus Walleij --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..13b1226cc4b9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23471,6 +23471,14 @@ S: Maintained F: Documentation/devicetree/bindings/memory-controllers/st,stm32mp25-omm.yaml F: drivers/memory/stm32_omm.c +ST STM32 PINCTRL DRIVER +M: Antonio Borneo +S: Maintained +F: Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml +F: drivers/pinctrl/stm32/ +F: include/dt-bindings/pinctrl/stm32-pinfunc.h +X: drivers/pinctrl/stm32/pinctrl-stm32-hdp.c + ST STM32 SPI DRIVER M: Alain Volmat L: linux-spi@vger.kernel.org From a88ca6deca4f19c95811433aa23bab7b74d182bb Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Tue, 10 Jun 2025 17:23:05 +0200 Subject: [PATCH 0194/2411] pinctrl: stm32: Handle RSVD pin configuration When a pin is assigned to a coprocessor not running Linux, the pin should be flagged in the DT as reserved and not usable by Linux. Add pin configuration RSVD (Reserved) and instrument the driver to not update the mux setting. Signed-off-by: Fabien Dessenne Signed-off-by: Antonio Borneo Link: https://lore.kernel.org/20250610152309.299438-2-antonio.borneo@foss.st.com Signed-off-by: Linus Walleij --- drivers/pinctrl/stm32/pinctrl-stm32.c | 9 +++++++++ drivers/pinctrl/stm32/pinctrl-stm32.h | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index ce2a5fee4230..f47c4e6f12b4 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -91,6 +91,7 @@ static const char * const stm32_gpio_functions[] = { "af8", "af9", "af10", "af11", "af12", "af13", "af14", "af15", "analog", + "reserved", }; struct stm32_pinctrl_group { @@ -663,6 +664,9 @@ static bool stm32_pctrl_is_function_valid(struct stm32_pinctrl *pctl, if (pin->pin.number != pin_num) continue; + if (fnum == STM32_PIN_RSVD) + return true; + for (k = 0; k < STM32_CONFIG_NUM; k++) { if (func->num == fnum) return true; @@ -962,6 +966,11 @@ static int stm32_pmx_set_mux(struct pinctrl_dev *pctldev, return -EINVAL; } + if (function == STM32_PIN_RSVD) { + dev_dbg(pctl->dev, "Reserved pins, skipping HW update.\n"); + return 0; + } + bank = gpiochip_get_data(range->gc); pin = stm32_gpio_pin(g->pin); diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.h b/drivers/pinctrl/stm32/pinctrl-stm32.h index 9e44ad8c35b3..b98a4141bf2c 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.h +++ b/drivers/pinctrl/stm32/pinctrl-stm32.h @@ -17,7 +17,8 @@ #define STM32_PIN_GPIO 0 #define STM32_PIN_AF(x) ((x) + 1) #define STM32_PIN_ANALOG (STM32_PIN_AF(15) + 1) -#define STM32_CONFIG_NUM (STM32_PIN_ANALOG + 1) +#define STM32_PIN_RSVD (STM32_PIN_ANALOG + 1) +#define STM32_CONFIG_NUM (STM32_PIN_RSVD + 1) /* package information */ #define STM32MP_PKG_AA BIT(0) From 4cdf874f67adfdec4f0a288c76f9aba05f9babe2 Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Tue, 10 Jun 2025 17:23:06 +0200 Subject: [PATCH 0195/2411] dt-bindings: pinctrl: stm32: Add RSVD mux function Document the RSVD (Reserved) mux function, used to reserve pins for a coprocessor not running Linux. Signed-off-by: Fabien Dessenne Signed-off-by: Antonio Borneo Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250610152309.299438-3-antonio.borneo@foss.st.com Signed-off-by: Linus Walleij --- .../devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml | 8 ++++++++ include/dt-bindings/pinctrl/stm32-pinfunc.h | 1 + 2 files changed, 9 insertions(+) diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml index a28d77748095..5d17d6487ae9 100644 --- a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml @@ -160,9 +160,13 @@ patternProperties: * ... * 16 : Alternate Function 15 * 17 : Analog + * 18 : Reserved To simplify the usage, macro is available to generate "pinmux" field. This macro is available here: - include/dt-bindings/pinctrl/stm32-pinfunc.h + Setting the pinmux's function to the Reserved (RSVD) value is used to inform + the driver that it shall not apply the mux setting. This can be used to + reserve some pins, for example to a co-processor not running Linux. Some examples of using macro: /* GPIO A9 set as alternate function 2 */ ... { @@ -176,6 +180,10 @@ patternProperties: ... { pinmux = ; }; + /* GPIO A9 reserved for co-processor */ + ... { + pinmux = ; + }; bias-disable: type: boolean diff --git a/include/dt-bindings/pinctrl/stm32-pinfunc.h b/include/dt-bindings/pinctrl/stm32-pinfunc.h index 28ad0235086a..af3fd388329a 100644 --- a/include/dt-bindings/pinctrl/stm32-pinfunc.h +++ b/include/dt-bindings/pinctrl/stm32-pinfunc.h @@ -26,6 +26,7 @@ #define AF14 0xf #define AF15 0x10 #define ANALOG 0x11 +#define RSVD 0x12 /* define Pins number*/ #define PIN_NO(port, line) (((port) - 'A') * 0x10 + (line)) From 9b369669452f500fc7334aad62bd8c96a075245f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 11 Jun 2025 08:13:33 +0200 Subject: [PATCH 0196/2411] pinctrl: starfive: Allow compile testing on other platforms Always descent to drivers/pinctrl/starfive/ because limiting it with SOC_STARFIVE is redundant since its Makefile doesn't build anything if no Starfive-specific pin control Kconfig options are enabled. This allows compile testing on other architectures with allyesconfig. Reviewed-by: Geert Uytterhoeven Signed-off-by: Krzysztof Kozlowski Reviewed-by: Emil Renner Berthing Acked-by: Hal Feng Link: https://lore.kernel.org/20250611-pinctrl-const-desc-v2-1-b11c1d650384@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile index ac27e88677d1..dcede70b2566 100644 --- a/drivers/pinctrl/Makefile +++ b/drivers/pinctrl/Makefile @@ -82,7 +82,7 @@ obj-y += sophgo/ obj-y += spacemit/ obj-$(CONFIG_PINCTRL_SPEAR) += spear/ obj-y += sprd/ -obj-$(CONFIG_SOC_STARFIVE) += starfive/ +obj-y += starfive/ obj-$(CONFIG_PINCTRL_STM32) += stm32/ obj-y += sunplus/ obj-$(CONFIG_PINCTRL_SUNXI) += sunxi/ From 1982621decaf788d0611fc291fe89b297b6e5510 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 11 Jun 2025 08:13:34 +0200 Subject: [PATCH 0197/2411] pinctrl: Allow compile testing for K210, TB10X and ZYNQ Pinctrl drivers for K210, TB10X and ZYNQ do not reference any machine headers, thus can be compile tested for increased build coverage. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250611-pinctrl-const-desc-v2-2-b11c1d650384@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/Kconfig | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig index 33db9104df17..95f032c7ec69 100644 --- a/drivers/pinctrl/Kconfig +++ b/drivers/pinctrl/Kconfig @@ -269,7 +269,8 @@ config PINCTRL_INGENIC config PINCTRL_K210 bool "Pinctrl driver for the Canaan Kendryte K210 SoC" - depends on RISCV && SOC_CANAAN_K210 && OF + depends on RISCV && SOC_CANAAN_K210 || COMPILE_TEST + depends on OF select GENERIC_PINMUX_FUNCTIONS select GENERIC_PINCONF select GPIOLIB @@ -554,8 +555,8 @@ config PINCTRL_SX150X - 16 bits: sx1509q, sx1506q config PINCTRL_TB10X - bool - depends on OF && ARC_PLAT_TB10X + bool "Pinctrl for TB10X" if COMPILE_TEST + depends on OF && ARC_PLAT_TB10X || COMPILE_TEST select GPIOLIB config PINCTRL_TPS6594 @@ -590,7 +591,7 @@ config PINCTRL_TH1520 config PINCTRL_ZYNQ bool "Pinctrl driver for Xilinx Zynq" - depends on ARCH_ZYNQ + depends on ARCH_ZYNQ || COMPILE_TEST select PINMUX select GENERIC_PINCONF help From 91ed43b045119fe47b256af83eb7ad35eb0ba356 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 11 Jun 2025 08:13:35 +0200 Subject: [PATCH 0198/2411] pinctrl: amd: Constify pointers to 'pinctrl_desc' Internal functions obtaining pointers to 'struct pinctrl_desc' do not modify the contents so they can be made pointers to const. This makes code safer, explicit and later allows constifying 'pinctrl_desc' in pinctrl core code. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250611-pinctrl-const-desc-v2-3-b11c1d650384@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 5cf3db6d78b7..85617cfed229 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -872,7 +872,7 @@ static const struct pinconf_ops amd_pinconf_ops = { static void amd_gpio_irq_init(struct amd_gpio *gpio_dev) { - struct pinctrl_desc *desc = gpio_dev->pctrl->desc; + const struct pinctrl_desc *desc = gpio_dev->pctrl->desc; unsigned long flags; u32 pin_reg, mask; int i; @@ -901,7 +901,7 @@ static void amd_gpio_irq_init(struct amd_gpio *gpio_dev) static void amd_gpio_check_pending(void) { struct amd_gpio *gpio_dev = pinctrl_dev; - struct pinctrl_desc *desc = gpio_dev->pctrl->desc; + const struct pinctrl_desc *desc = gpio_dev->pctrl->desc; int i; if (!pm_debug_messages_on) @@ -957,7 +957,7 @@ static bool amd_gpio_should_save(struct amd_gpio *gpio_dev, unsigned int pin) static int amd_gpio_suspend_hibernate_common(struct device *dev, bool is_suspend) { struct amd_gpio *gpio_dev = dev_get_drvdata(dev); - struct pinctrl_desc *desc = gpio_dev->pctrl->desc; + const struct pinctrl_desc *desc = gpio_dev->pctrl->desc; unsigned long flags; int i; u32 wake_mask = is_suspend ? WAKE_SOURCE_SUSPEND : WAKE_SOURCE_HIBERNATE; @@ -1001,7 +1001,7 @@ static int amd_gpio_hibernate(struct device *dev) static int amd_gpio_resume(struct device *dev) { struct amd_gpio *gpio_dev = dev_get_drvdata(dev); - struct pinctrl_desc *desc = gpio_dev->pctrl->desc; + const struct pinctrl_desc *desc = gpio_dev->pctrl->desc; unsigned long flags; int i; From 9724e6f1953644cc9a5d102605d624bc79609038 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 11 Jun 2025 08:13:36 +0200 Subject: [PATCH 0199/2411] pinctrl: Constify pointers to 'pinctrl_desc' Pin controller core code only stores the pointer to 'struct pinctrl_desc' and does not modify it anywhere. The pointer can be changed to pointer to const which makes the code safer, explicit and later allows constifying 'pinctrl_desc' allocations in individual drivers. Reviewed-by: Geert Uytterhoeven Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250611-pinctrl-const-desc-v2-4-b11c1d650384@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 13 +++++++------ drivers/pinctrl/core.h | 2 +- include/linux/pinctrl/pinctrl.h | 8 ++++---- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 9046292d1360..73b78d6eac67 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -2062,7 +2062,7 @@ static int pinctrl_check_ops(struct pinctrl_dev *pctldev) * @driver_data: private pin controller data for this pin controller */ static struct pinctrl_dev * -pinctrl_init_controller(struct pinctrl_desc *pctldesc, struct device *dev, +pinctrl_init_controller(const struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data) { struct pinctrl_dev *pctldev; @@ -2132,7 +2132,8 @@ pinctrl_init_controller(struct pinctrl_desc *pctldesc, struct device *dev, return ERR_PTR(ret); } -static void pinctrl_uninit_controller(struct pinctrl_dev *pctldev, struct pinctrl_desc *pctldesc) +static void pinctrl_uninit_controller(struct pinctrl_dev *pctldev, + const struct pinctrl_desc *pctldesc) { pinctrl_free_pindescs(pctldev, pctldesc->pins, pctldesc->npins); @@ -2209,7 +2210,7 @@ EXPORT_SYMBOL_GPL(pinctrl_enable); * struct pinctrl_dev handle. To avoid issues later on, please use the * new pinctrl_register_and_init() below instead. */ -struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc, +struct pinctrl_dev *pinctrl_register(const struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data) { struct pinctrl_dev *pctldev; @@ -2239,7 +2240,7 @@ EXPORT_SYMBOL_GPL(pinctrl_register); * Note that pinctrl_enable() still needs to be manually called after * this once the driver is ready. */ -int pinctrl_register_and_init(struct pinctrl_desc *pctldesc, +int pinctrl_register_and_init(const struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data, struct pinctrl_dev **pctldev) { @@ -2330,7 +2331,7 @@ static int devm_pinctrl_dev_match(struct device *dev, void *res, void *data) * The pinctrl device will be automatically released when the device is unbound. */ struct pinctrl_dev *devm_pinctrl_register(struct device *dev, - struct pinctrl_desc *pctldesc, + const struct pinctrl_desc *pctldesc, void *driver_data) { struct pinctrl_dev **ptr, *pctldev; @@ -2364,7 +2365,7 @@ EXPORT_SYMBOL_GPL(devm_pinctrl_register); * The pinctrl device will be automatically released when the device is unbound. */ int devm_pinctrl_register_and_init(struct device *dev, - struct pinctrl_desc *pctldesc, + const struct pinctrl_desc *pctldesc, void *driver_data, struct pinctrl_dev **pctldev) { diff --git a/drivers/pinctrl/core.h b/drivers/pinctrl/core.h index d6c24978e708..fc513a9cdd4f 100644 --- a/drivers/pinctrl/core.h +++ b/drivers/pinctrl/core.h @@ -51,7 +51,7 @@ struct pinctrl_state; */ struct pinctrl_dev { struct list_head node; - struct pinctrl_desc *desc; + const struct pinctrl_desc *desc; struct radix_tree_root pin_desc_tree; #ifdef CONFIG_GENERIC_PINCTRL_GROUPS struct radix_tree_root pin_group_tree; diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 9a8189ffd0f2..d138e1815645 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -165,25 +165,25 @@ struct pinctrl_desc { /* External interface to pin controller */ -extern int pinctrl_register_and_init(struct pinctrl_desc *pctldesc, +extern int pinctrl_register_and_init(const struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data, struct pinctrl_dev **pctldev); extern int pinctrl_enable(struct pinctrl_dev *pctldev); /* Please use pinctrl_register_and_init() and pinctrl_enable() instead */ -extern struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc, +extern struct pinctrl_dev *pinctrl_register(const struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data); extern void pinctrl_unregister(struct pinctrl_dev *pctldev); extern int devm_pinctrl_register_and_init(struct device *dev, - struct pinctrl_desc *pctldesc, + const struct pinctrl_desc *pctldesc, void *driver_data, struct pinctrl_dev **pctldev); /* Please use devm_pinctrl_register_and_init() instead */ extern struct pinctrl_dev *devm_pinctrl_register(struct device *dev, - struct pinctrl_desc *pctldesc, + const struct pinctrl_desc *pctldesc, void *driver_data); extern void devm_pinctrl_unregister(struct device *dev, From e3d0571befa61a2692f6efb5f2c33926e99aea84 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 11 Jun 2025 08:13:37 +0200 Subject: [PATCH 0200/2411] pinctrl: aspeed: Constify static 'pinctrl_desc' The local static 'struct pinctrl_desc' is not modified, so can be made const for code safety. Reviewed-by: Andrew Jeffery Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250611-pinctrl-const-desc-v2-5-b11c1d650384@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed-g4.c | 2 +- drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c | 2 +- drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c | 2 +- drivers/pinctrl/aspeed/pinctrl-aspeed.c | 2 +- drivers/pinctrl/aspeed/pinctrl-aspeed.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g4.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g4.c index 774f8d05142f..cb295856dda1 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g4.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g4.c @@ -2653,7 +2653,7 @@ static const struct pinconf_ops aspeed_g4_conf_ops = { .pin_config_group_set = aspeed_pin_config_group_set, }; -static struct pinctrl_desc aspeed_g4_pinctrl_desc = { +static const struct pinctrl_desc aspeed_g4_pinctrl_desc = { .name = "aspeed-g4-pinctrl", .pins = aspeed_g4_pins, .npins = ARRAY_SIZE(aspeed_g4_pins), diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c index 5bb8fd0d1e41..792089628362 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c @@ -2845,7 +2845,7 @@ static const struct pinconf_ops aspeed_g5_conf_ops = { .pin_config_group_set = aspeed_pin_config_group_set, }; -static struct pinctrl_desc aspeed_g5_pinctrl_desc = { +static const struct pinctrl_desc aspeed_g5_pinctrl_desc = { .name = "aspeed-g5-pinctrl", .pins = aspeed_g5_pins, .npins = ARRAY_SIZE(aspeed_g5_pins), diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c index 5a7cd0a88687..51a63cf92023 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c @@ -2763,7 +2763,7 @@ static const struct pinconf_ops aspeed_g6_conf_ops = { .pin_config_group_set = aspeed_pin_config_group_set, }; -static struct pinctrl_desc aspeed_g6_pinctrl_desc = { +static const struct pinctrl_desc aspeed_g6_pinctrl_desc = { .name = "aspeed-g6-pinctrl", .pins = aspeed_g6_pins, .npins = ARRAY_SIZE(aspeed_g6_pins), diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c index 9c6ee46ac7a0..7e0ebf11af16 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c @@ -441,7 +441,7 @@ int aspeed_gpio_request_enable(struct pinctrl_dev *pctldev, } int aspeed_pinctrl_probe(struct platform_device *pdev, - struct pinctrl_desc *pdesc, + const struct pinctrl_desc *pdesc, struct aspeed_pinctrl_data *pdata) { struct device *parent; diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.h b/drivers/pinctrl/aspeed/pinctrl-aspeed.h index 4dcde3bc29c8..28f3bde25081 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed.h +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.h @@ -102,7 +102,7 @@ int aspeed_gpio_request_enable(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned int offset); int aspeed_pinctrl_probe(struct platform_device *pdev, - struct pinctrl_desc *pdesc, + const struct pinctrl_desc *pdesc, struct aspeed_pinctrl_data *pdata); int aspeed_pin_config_get(struct pinctrl_dev *pctldev, unsigned int offset, unsigned long *config); From 25b306c484947b5f4baebb97e34163d9984dc480 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 11 Jun 2025 08:13:38 +0200 Subject: [PATCH 0201/2411] pinctrl: nuvoton: Constify static 'pinctrl_desc' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The local static 'struct pinctrl_desc' is not modified, so can be made const for code safety. Signed-off-by: Krzysztof Kozlowski Reviewed-by: J. Neuschäfer Link: https://lore.kernel.org/20250611-pinctrl-const-desc-v2-6-b11c1d650384@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c | 2 +- drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c | 2 +- drivers/pinctrl/nuvoton/pinctrl-wpcm450.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c index dfd32feb3428..b8872d8f5930 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c +++ b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c @@ -1817,7 +1817,7 @@ static const struct pinconf_ops npcm7xx_pinconf_ops = { }; /* pinctrl_desc */ -static struct pinctrl_desc npcm7xx_pinctrl_desc = { +static const struct pinctrl_desc npcm7xx_pinctrl_desc = { .name = "npcm7xx-pinctrl", .pins = npcm7xx_pins, .npins = ARRAY_SIZE(npcm7xx_pins), diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c index be3db8ab406c..3c3b9d8d3681 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c +++ b/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c @@ -2299,7 +2299,7 @@ static const struct pinconf_ops npcm8xx_pinconf_ops = { }; /* pinctrl_desc */ -static struct pinctrl_desc npcm8xx_pinctrl_desc = { +static const struct pinctrl_desc npcm8xx_pinctrl_desc = { .name = "npcm8xx-pinctrl", .pins = npcm8xx_pins, .npins = ARRAY_SIZE(npcm8xx_pins), diff --git a/drivers/pinctrl/nuvoton/pinctrl-wpcm450.c b/drivers/pinctrl/nuvoton/pinctrl-wpcm450.c index 4264ca749175..8d8314ba0e4c 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-wpcm450.c +++ b/drivers/pinctrl/nuvoton/pinctrl-wpcm450.c @@ -989,7 +989,7 @@ static const struct pinconf_ops wpcm450_pinconf_ops = { .pin_config_set = wpcm450_config_set, }; -static struct pinctrl_desc wpcm450_pinctrl_desc = { +static const struct pinctrl_desc wpcm450_pinctrl_desc = { .name = "wpcm450-pinctrl", .pins = wpcm450_pins, .npins = ARRAY_SIZE(wpcm450_pins), From 3cbbb91f11a15e88447e6ae78a6d4031ce502b27 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 11 Jun 2025 08:13:39 +0200 Subject: [PATCH 0202/2411] pinctrl: bcm: Constify static 'pinctrl_desc' The local static 'struct pinctrl_desc' is not modified, so can be made const for code safety. Reviewed-by: Florian Fainelli Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250611-pinctrl-const-desc-v2-7-b11c1d650384@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/pinctrl-bcm4908.c | 2 +- drivers/pinctrl/bcm/pinctrl-ns.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-bcm4908.c b/drivers/pinctrl/bcm/pinctrl-bcm4908.c index f190e0997f1f..12f7a253ea4d 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm4908.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm4908.c @@ -456,7 +456,7 @@ static const struct pinmux_ops bcm4908_pinctrl_pmxops = { * Controller code */ -static struct pinctrl_desc bcm4908_pinctrl_desc = { +static const struct pinctrl_desc bcm4908_pinctrl_desc = { .name = "bcm4908-pinctrl", .pctlops = &bcm4908_pinctrl_ops, .pmxops = &bcm4908_pinctrl_pmxops, diff --git a/drivers/pinctrl/bcm/pinctrl-ns.c b/drivers/pinctrl/bcm/pinctrl-ns.c index 6bb2b461950b..03bd01b4a945 100644 --- a/drivers/pinctrl/bcm/pinctrl-ns.c +++ b/drivers/pinctrl/bcm/pinctrl-ns.c @@ -192,7 +192,7 @@ static const struct pinmux_ops ns_pinctrl_pmxops = { * Controller code */ -static struct pinctrl_desc ns_pinctrl_desc = { +static const struct pinctrl_desc ns_pinctrl_desc = { .name = "pinctrl-ns", .pctlops = &ns_pinctrl_ops, .pmxops = &ns_pinctrl_pmxops, From fa856a2c3fcd0f3e2e65360c3fb2045be5f36f3a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 11 Jun 2025 08:13:40 +0200 Subject: [PATCH 0203/2411] pinctrl: bcm: cygnus-mux: Move fixed assignments to 'pinctrl_desc' definition Assign 'struct pinctrl_desc' .npins member in definition to make clear that number of pins is fixed and have less code in the probe. Reviewed-by: Florian Fainelli Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250611-pinctrl-const-desc-v2-8-b11c1d650384@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/pinctrl-cygnus-mux.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-cygnus-mux.c b/drivers/pinctrl/bcm/pinctrl-cygnus-mux.c index bf9597800954..e9aa99f85e05 100644 --- a/drivers/pinctrl/bcm/pinctrl-cygnus-mux.c +++ b/drivers/pinctrl/bcm/pinctrl-cygnus-mux.c @@ -903,6 +903,7 @@ static struct pinctrl_desc cygnus_pinctrl_desc = { .name = "cygnus-pinmux", .pctlops = &cygnus_pinctrl_ops, .pmxops = &cygnus_pinmux_ops, + .npins = ARRAY_SIZE(cygnus_pins), }; static int cygnus_mux_log_init(struct cygnus_pinctrl *pinctrl) @@ -935,7 +936,6 @@ static int cygnus_pinmux_probe(struct platform_device *pdev) struct cygnus_pinctrl *pinctrl; int i, ret; struct pinctrl_pin_desc *pins; - unsigned num_pins = ARRAY_SIZE(cygnus_pins); pinctrl = devm_kzalloc(&pdev->dev, sizeof(*pinctrl), GFP_KERNEL); if (!pinctrl) @@ -963,11 +963,12 @@ static int cygnus_pinmux_probe(struct platform_device *pdev) return ret; } - pins = devm_kcalloc(&pdev->dev, num_pins, sizeof(*pins), GFP_KERNEL); + pins = devm_kcalloc(&pdev->dev, ARRAY_SIZE(cygnus_pins), sizeof(*pins), + GFP_KERNEL); if (!pins) return -ENOMEM; - for (i = 0; i < num_pins; i++) { + for (i = 0; i < ARRAY_SIZE(cygnus_pins); i++) { pins[i].number = cygnus_pins[i].pin; pins[i].name = cygnus_pins[i].name; pins[i].drv_data = &cygnus_pins[i].gpio_mux; @@ -978,7 +979,6 @@ static int cygnus_pinmux_probe(struct platform_device *pdev) pinctrl->functions = cygnus_pin_functions; pinctrl->num_functions = ARRAY_SIZE(cygnus_pin_functions); cygnus_pinctrl_desc.pins = pins; - cygnus_pinctrl_desc.npins = num_pins; pinctrl->pctl = devm_pinctrl_register(&pdev->dev, &cygnus_pinctrl_desc, pinctrl); From 9b2ae7ca1ac9cf5b6b5ff934bfba900e59477ba3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 11 Jun 2025 08:13:41 +0200 Subject: [PATCH 0204/2411] pinctrl: bcm: cygnus-ns2: Move fixed assignments to 'pinctrl_desc' definition Assign 'struct pinctrl_desc' .npins member in definition to make clear that number of pins is fixed and have less code in the probe. Reviewed-by: Florian Fainelli Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250611-pinctrl-const-desc-v2-9-b11c1d650384@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/pinctrl-ns2-mux.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-ns2-mux.c b/drivers/pinctrl/bcm/pinctrl-ns2-mux.c index 04f4fca854cc..23ab3ab064b6 100644 --- a/drivers/pinctrl/bcm/pinctrl-ns2-mux.c +++ b/drivers/pinctrl/bcm/pinctrl-ns2-mux.c @@ -971,6 +971,7 @@ static struct pinctrl_desc ns2_pinctrl_desc = { .pctlops = &ns2_pinctrl_ops, .pmxops = &ns2_pinmux_ops, .confops = &ns2_pinconf_ops, + .npins = ARRAY_SIZE(ns2_pins), }; static int ns2_mux_log_init(struct ns2_pinctrl *pinctrl) @@ -1026,7 +1027,6 @@ static int ns2_pinmux_probe(struct platform_device *pdev) struct resource *res; int i, ret; struct pinctrl_pin_desc *pins; - unsigned int num_pins = ARRAY_SIZE(ns2_pins); pinctrl = devm_kzalloc(&pdev->dev, sizeof(*pinctrl), GFP_KERNEL); if (!pinctrl) @@ -1060,11 +1060,12 @@ static int ns2_pinmux_probe(struct platform_device *pdev) return ret; } - pins = devm_kcalloc(&pdev->dev, num_pins, sizeof(*pins), GFP_KERNEL); + pins = devm_kcalloc(&pdev->dev, ARRAY_SIZE(ns2_pins), sizeof(*pins), + GFP_KERNEL); if (!pins) return -ENOMEM; - for (i = 0; i < num_pins; i++) { + for (i = 0; i < ARRAY_SIZE(ns2_pins); i++) { pins[i].number = ns2_pins[i].pin; pins[i].name = ns2_pins[i].name; pins[i].drv_data = &ns2_pins[i]; @@ -1075,7 +1076,6 @@ static int ns2_pinmux_probe(struct platform_device *pdev) pinctrl->functions = ns2_pin_functions; pinctrl->num_functions = ARRAY_SIZE(ns2_pin_functions); ns2_pinctrl_desc.pins = pins; - ns2_pinctrl_desc.npins = num_pins; pinctrl->pctl = pinctrl_register(&ns2_pinctrl_desc, &pdev->dev, pinctrl); From dd12fca44967ce66bf052644e47f99221715204f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 11 Jun 2025 08:13:42 +0200 Subject: [PATCH 0205/2411] pinctrl: bcm: cygnus-nsp: Move fixed assignments to 'pinctrl_desc' definition Assign 'struct pinctrl_desc' .npins member in definition to make clear that number of pins is fixed and have less code in the probe. Reviewed-by: Florian Fainelli Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250611-pinctrl-const-desc-v2-10-b11c1d650384@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/pinctrl-nsp-mux.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-nsp-mux.c b/drivers/pinctrl/bcm/pinctrl-nsp-mux.c index eb6298507c1d..9b716c0d2b94 100644 --- a/drivers/pinctrl/bcm/pinctrl-nsp-mux.c +++ b/drivers/pinctrl/bcm/pinctrl-nsp-mux.c @@ -525,6 +525,7 @@ static struct pinctrl_desc nsp_pinctrl_desc = { .name = "nsp-pinmux", .pctlops = &nsp_pinctrl_ops, .pmxops = &nsp_pinmux_ops, + .npins = ARRAY_SIZE(nsp_pins), }; static int nsp_mux_log_init(struct nsp_pinctrl *pinctrl) @@ -556,7 +557,6 @@ static int nsp_pinmux_probe(struct platform_device *pdev) struct resource *res; int i, ret; struct pinctrl_pin_desc *pins; - unsigned int num_pins = ARRAY_SIZE(nsp_pins); pinctrl = devm_kzalloc(&pdev->dev, sizeof(*pinctrl), GFP_KERNEL); if (!pinctrl) @@ -589,11 +589,12 @@ static int nsp_pinmux_probe(struct platform_device *pdev) return ret; } - pins = devm_kcalloc(&pdev->dev, num_pins, sizeof(*pins), GFP_KERNEL); + pins = devm_kcalloc(&pdev->dev, ARRAY_SIZE(nsp_pins), sizeof(*pins), + GFP_KERNEL); if (!pins) return -ENOMEM; - for (i = 0; i < num_pins; i++) { + for (i = 0; i < ARRAY_SIZE(nsp_pins); i++) { pins[i].number = nsp_pins[i].pin; pins[i].name = nsp_pins[i].name; pins[i].drv_data = &nsp_pins[i].gpio_select; @@ -604,7 +605,6 @@ static int nsp_pinmux_probe(struct platform_device *pdev) pinctrl->functions = nsp_pin_functions; pinctrl->num_functions = ARRAY_SIZE(nsp_pin_functions); nsp_pinctrl_desc.pins = pins; - nsp_pinctrl_desc.npins = num_pins; pinctrl->pctl = devm_pinctrl_register(&pdev->dev, &nsp_pinctrl_desc, pinctrl); From 1bd634e535db29055c106b5f0ce6f1569958ac5b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 11 Jun 2025 08:13:43 +0200 Subject: [PATCH 0206/2411] pinctrl: as3722: Move fixed assignments to 'pinctrl_desc' definition Assign 'struct pinctrl_desc' .pins and .npins members in definition to make clear that number of pins is fixed and have less code in the probe. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250611-pinctrl-const-desc-v2-11-b11c1d650384@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-as3722.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinctrl-as3722.c b/drivers/pinctrl/pinctrl-as3722.c index 0d8c75ce20ed..ed7b2c482ff0 100644 --- a/drivers/pinctrl/pinctrl-as3722.c +++ b/drivers/pinctrl/pinctrl-as3722.c @@ -422,6 +422,8 @@ static struct pinctrl_desc as3722_pinctrl_desc = { .pmxops = &as3722_pinmux_ops, .confops = &as3722_pinconf_ops, .owner = THIS_MODULE, + .pins = as3722_pins_desc, + .npins = ARRAY_SIZE(as3722_pins_desc), }; static int as3722_gpio_get(struct gpio_chip *chip, unsigned offset) @@ -550,8 +552,6 @@ static int as3722_pinctrl_probe(struct platform_device *pdev) as_pci->pin_groups = as3722_pingroups; as_pci->num_pin_groups = ARRAY_SIZE(as3722_pingroups); as3722_pinctrl_desc.name = dev_name(&pdev->dev); - as3722_pinctrl_desc.pins = as3722_pins_desc; - as3722_pinctrl_desc.npins = ARRAY_SIZE(as3722_pins_desc); as_pci->pctl = devm_pinctrl_register(&pdev->dev, &as3722_pinctrl_desc, as_pci); if (IS_ERR(as_pci->pctl)) { From ed32213ffc1fae738f3d67c4be70ae14391fe347 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 11 Jun 2025 08:13:44 +0200 Subject: [PATCH 0207/2411] pinctrl: max77620: Move fixed assignments to 'pinctrl_desc' definition Assign 'struct pinctrl_desc' .pins, .npins and other members in definition to make clear that number of pins is fixed and have less code in the probe. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250611-pinctrl-const-desc-v2-12-b11c1d650384@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-max77620.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/pinctrl-max77620.c b/drivers/pinctrl/pinctrl-max77620.c index d236daa7c13e..acb945a25743 100644 --- a/drivers/pinctrl/pinctrl-max77620.c +++ b/drivers/pinctrl/pinctrl-max77620.c @@ -543,6 +543,10 @@ static struct pinctrl_desc max77620_pinctrl_desc = { .pctlops = &max77620_pinctrl_ops, .pmxops = &max77620_pinmux_ops, .confops = &max77620_pinconf_ops, + .pins = max77620_pins_desc, + .npins = ARRAY_SIZE(max77620_pins_desc), + .num_custom_params = ARRAY_SIZE(max77620_cfg_params), + .custom_params = max77620_cfg_params, }; static int max77620_pinctrl_probe(struct platform_device *pdev) @@ -569,11 +573,6 @@ static int max77620_pinctrl_probe(struct platform_device *pdev) platform_set_drvdata(pdev, mpci); max77620_pinctrl_desc.name = dev_name(&pdev->dev); - max77620_pinctrl_desc.pins = max77620_pins_desc; - max77620_pinctrl_desc.npins = ARRAY_SIZE(max77620_pins_desc); - max77620_pinctrl_desc.num_custom_params = - ARRAY_SIZE(max77620_cfg_params); - max77620_pinctrl_desc.custom_params = max77620_cfg_params; for (i = 0; i < MAX77620_PIN_NUM; ++i) { mpci->fps_config[i].active_fps_src = -1; From d9ef8eec634c06c50bc96ac8eb556257a34ff13f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 11 Jun 2025 08:13:45 +0200 Subject: [PATCH 0208/2411] pinctrl: palmas: Move fixed assignments to 'pinctrl_desc' definition Assign 'struct pinctrl_desc' .pins and .npins members in definition to make clear that number of pins is fixed and have less code in the probe. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250611-pinctrl-const-desc-v2-13-b11c1d650384@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-palmas.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinctrl-palmas.c b/drivers/pinctrl/pinctrl-palmas.c index 9e272f9deb4f..d69f114e4642 100644 --- a/drivers/pinctrl/pinctrl-palmas.c +++ b/drivers/pinctrl/pinctrl-palmas.c @@ -956,6 +956,8 @@ static struct pinctrl_desc palmas_pinctrl_desc = { .pmxops = &palmas_pinmux_ops, .confops = &palmas_pinconf_ops, .owner = THIS_MODULE, + .pins = palmas_pins_desc, + .npins = ARRAY_SIZE(palmas_pins_desc), }; struct palmas_pinctrl_data { @@ -1023,8 +1025,6 @@ static int palmas_pinctrl_probe(struct platform_device *pdev) } palmas_pinctrl_desc.name = dev_name(&pdev->dev); - palmas_pinctrl_desc.pins = palmas_pins_desc; - palmas_pinctrl_desc.npins = ARRAY_SIZE(palmas_pins_desc); pci->pctl = devm_pinctrl_register(&pdev->dev, &palmas_pinctrl_desc, pci); if (IS_ERR(pci->pctl)) { From 10d038214b1b5b351e716e720fa1b4c07d24def8 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 11 Jun 2025 08:13:46 +0200 Subject: [PATCH 0209/2411] pinctrl: renesas: Move fixed assignments to 'pinctrl_desc' definition Assign 'struct pinctrl_desc' .pins and .npins members in definition to make clear that number of pins is fixed and have less code in the probe. Reviewed-by: Geert Uytterhoeven Acked-by: Geert Uytterhoeven Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250611-pinctrl-const-desc-v2-14-b11c1d650384@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/renesas/pinctrl-rzn1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/renesas/pinctrl-rzn1.c b/drivers/pinctrl/renesas/pinctrl-rzn1.c index d442d4f9981c..fb874867dbfb 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzn1.c +++ b/drivers/pinctrl/renesas/pinctrl-rzn1.c @@ -680,6 +680,8 @@ static struct pinctrl_desc rzn1_pinctrl_desc = { .pmxops = &rzn1_pmx_ops, .confops = &rzn1_pinconf_ops, .owner = THIS_MODULE, + .pins = rzn1_pins, + .npins = ARRAY_SIZE(rzn1_pins), }; static int rzn1_pinctrl_parse_groups(struct device_node *np, @@ -878,8 +880,6 @@ static int rzn1_pinctrl_probe(struct platform_device *pdev) ipctl->dev = &pdev->dev; rzn1_pinctrl_desc.name = dev_name(&pdev->dev); - rzn1_pinctrl_desc.pins = rzn1_pins; - rzn1_pinctrl_desc.npins = ARRAY_SIZE(rzn1_pins); ret = rzn1_pinctrl_probe_dt(pdev, ipctl); if (ret) { From c98ee6f300d213c1c5f9d39f5af4f944007a7aed Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 11 Jun 2025 08:13:47 +0200 Subject: [PATCH 0210/2411] pinctrl: pistachio: Constify static 'pinctrl_desc' The local static 'struct pinctrl_desc' is not modified, so can be made const for code safety after moving .pins and .npins assignment to definition. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250611-pinctrl-const-desc-v2-15-b11c1d650384@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-pistachio.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c index e7bf60960961..7f8b562c81c9 100644 --- a/drivers/pinctrl/pinctrl-pistachio.c +++ b/drivers/pinctrl/pinctrl-pistachio.c @@ -1156,11 +1156,14 @@ static const struct pinconf_ops pistachio_pinconf_ops = { .is_generic = true, }; -static struct pinctrl_desc pistachio_pinctrl_desc = { +static const struct pinctrl_desc pistachio_pinctrl_desc = { .name = "pistachio-pinctrl", .pctlops = &pistachio_pinctrl_ops, .pmxops = &pistachio_pinmux_ops, .confops = &pistachio_pinconf_ops, + .pins = pistachio_pins, + .npins = ARRAY_SIZE(pistachio_pins), + }; static int pistachio_gpio_get_direction(struct gpio_chip *chip, unsigned offset) @@ -1474,9 +1477,6 @@ static int pistachio_pinctrl_probe(struct platform_device *pdev) pctl->gpio_banks = pistachio_gpio_banks; pctl->nbanks = ARRAY_SIZE(pistachio_gpio_banks); - pistachio_pinctrl_desc.pins = pctl->pins; - pistachio_pinctrl_desc.npins = pctl->npins; - pctl->pctldev = devm_pinctrl_register(&pdev->dev, &pistachio_pinctrl_desc, pctl); if (IS_ERR(pctl->pctldev)) { From 490bfd1ca55640e5f36e5e30245b4ab0f4a05118 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 11 Jun 2025 08:13:48 +0200 Subject: [PATCH 0211/2411] pinctrl: Constify static 'pinctrl_desc' The local static 'struct pinctrl_desc' is not modified, so can be made const for code safety. Acked-by: Lorenzo Bianconi Acked-by: Jesper Nilsson Reviewed-by: Charles Keepax Signed-off-by: Krzysztof Kozlowski Reviewed-by: Hal Feng Acked-by: Emil Renner Berthing Acked-by: Vladimir Zapolskiy Link: https://lore.kernel.org/20250611-pinctrl-const-desc-v2-16-b11c1d650384@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/berlin/berlin.c | 2 +- drivers/pinctrl/cirrus/pinctrl-cs42l43.c | 2 +- drivers/pinctrl/mediatek/pinctrl-airoha.c | 2 +- drivers/pinctrl/pinctrl-artpec6.c | 2 +- drivers/pinctrl/pinctrl-bm1880.c | 2 +- drivers/pinctrl/pinctrl-k210.c | 2 +- drivers/pinctrl/pinctrl-lpc18xx.c | 2 +- drivers/pinctrl/pinctrl-mlxbf3.c | 2 +- drivers/pinctrl/pinctrl-tb10x.c | 2 +- drivers/pinctrl/pinctrl-zynq.c | 2 +- drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/pinctrl/berlin/berlin.c b/drivers/pinctrl/berlin/berlin.c index c372a2a24be4..e5a35b803ce6 100644 --- a/drivers/pinctrl/berlin/berlin.c +++ b/drivers/pinctrl/berlin/berlin.c @@ -283,7 +283,7 @@ static int berlin_pinctrl_build_state(struct platform_device *pdev) return 0; } -static struct pinctrl_desc berlin_pctrl_desc = { +static const struct pinctrl_desc berlin_pctrl_desc = { .name = "berlin-pinctrl", .pctlops = &berlin_pinctrl_ops, .pmxops = &berlin_pinmux_ops, diff --git a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c index 628b60ccc2b0..a90beb986f5b 100644 --- a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c +++ b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c @@ -448,7 +448,7 @@ static const struct pinconf_ops cs42l43_pin_conf_ops = { .pin_config_group_set = cs42l43_pin_config_group_set, }; -static struct pinctrl_desc cs42l43_pin_desc = { +static const struct pinctrl_desc cs42l43_pin_desc = { .name = "cs42l43-pinctrl", .owner = THIS_MODULE, diff --git a/drivers/pinctrl/mediatek/pinctrl-airoha.c b/drivers/pinctrl/mediatek/pinctrl-airoha.c index b97b28ebb37a..ccd2b512e836 100644 --- a/drivers/pinctrl/mediatek/pinctrl-airoha.c +++ b/drivers/pinctrl/mediatek/pinctrl-airoha.c @@ -2852,7 +2852,7 @@ static const struct pinctrl_ops airoha_pctlops = { .dt_free_map = pinconf_generic_dt_free_map, }; -static struct pinctrl_desc airoha_pinctrl_desc = { +static const struct pinctrl_desc airoha_pinctrl_desc = { .name = KBUILD_MODNAME, .owner = THIS_MODULE, .pctlops = &airoha_pctlops, diff --git a/drivers/pinctrl/pinctrl-artpec6.c b/drivers/pinctrl/pinctrl-artpec6.c index 717f9592b28b..af67057128ff 100644 --- a/drivers/pinctrl/pinctrl-artpec6.c +++ b/drivers/pinctrl/pinctrl-artpec6.c @@ -907,7 +907,7 @@ static const struct pinconf_ops artpec6_pconf_ops = { .pin_config_group_set = artpec6_pconf_group_set, }; -static struct pinctrl_desc artpec6_desc = { +static const struct pinctrl_desc artpec6_desc = { .name = "artpec6-pinctrl", .owner = THIS_MODULE, .pins = artpec6_pins, diff --git a/drivers/pinctrl/pinctrl-bm1880.c b/drivers/pinctrl/pinctrl-bm1880.c index b0000fe5b31d..387798fb09be 100644 --- a/drivers/pinctrl/pinctrl-bm1880.c +++ b/drivers/pinctrl/pinctrl-bm1880.c @@ -1298,7 +1298,7 @@ static const struct pinmux_ops bm1880_pinmux_ops = { .set_mux = bm1880_pinmux_set_mux, }; -static struct pinctrl_desc bm1880_desc = { +static const struct pinctrl_desc bm1880_desc = { .name = "bm1880_pinctrl", .pins = bm1880_pins, .npins = ARRAY_SIZE(bm1880_pins), diff --git a/drivers/pinctrl/pinctrl-k210.c b/drivers/pinctrl/pinctrl-k210.c index eddb01796a83..66c04120c29d 100644 --- a/drivers/pinctrl/pinctrl-k210.c +++ b/drivers/pinctrl/pinctrl-k210.c @@ -879,7 +879,7 @@ static const struct pinctrl_ops k210_pinctrl_ops = { .dt_free_map = pinconf_generic_dt_free_map, }; -static struct pinctrl_desc k210_pinctrl_desc = { +static const struct pinctrl_desc k210_pinctrl_desc = { .name = "k210-pinctrl", .pins = k210_pins, .npins = K210_NPINS, diff --git a/drivers/pinctrl/pinctrl-lpc18xx.c b/drivers/pinctrl/pinctrl-lpc18xx.c index 0f5a7bed2f81..5e0201768323 100644 --- a/drivers/pinctrl/pinctrl-lpc18xx.c +++ b/drivers/pinctrl/pinctrl-lpc18xx.c @@ -1257,7 +1257,7 @@ static const struct pinctrl_ops lpc18xx_pctl_ops = { .dt_free_map = pinctrl_utils_free_map, }; -static struct pinctrl_desc lpc18xx_scu_desc = { +static const struct pinctrl_desc lpc18xx_scu_desc = { .name = "lpc18xx/43xx-scu", .pins = lpc18xx_pins, .npins = ARRAY_SIZE(lpc18xx_pins), diff --git a/drivers/pinctrl/pinctrl-mlxbf3.c b/drivers/pinctrl/pinctrl-mlxbf3.c index ffb5dda364dc..fcd9d46de89f 100644 --- a/drivers/pinctrl/pinctrl-mlxbf3.c +++ b/drivers/pinctrl/pinctrl-mlxbf3.c @@ -231,7 +231,7 @@ static const struct pinmux_ops mlxbf3_pmx_ops = { .gpio_request_enable = mlxbf3_gpio_request_enable, }; -static struct pinctrl_desc mlxbf3_pin_desc = { +static const struct pinctrl_desc mlxbf3_pin_desc = { .name = "pinctrl-mlxbf3", .pins = mlxbf3_pins, .npins = ARRAY_SIZE(mlxbf3_pins), diff --git a/drivers/pinctrl/pinctrl-tb10x.c b/drivers/pinctrl/pinctrl-tb10x.c index d6bb8f58978d..2d2e9f697ff9 100644 --- a/drivers/pinctrl/pinctrl-tb10x.c +++ b/drivers/pinctrl/pinctrl-tb10x.c @@ -735,7 +735,7 @@ static const struct pinmux_ops tb10x_pinmux_ops = { .set_mux = tb10x_pctl_set_mux, }; -static struct pinctrl_desc tb10x_pindesc = { +static const struct pinctrl_desc tb10x_pindesc = { .name = "TB10x", .pins = tb10x_pins, .npins = ARRAY_SIZE(tb10x_pins), diff --git a/drivers/pinctrl/pinctrl-zynq.c b/drivers/pinctrl/pinctrl-zynq.c index caa8a2ca3e68..dcde86fed10d 100644 --- a/drivers/pinctrl/pinctrl-zynq.c +++ b/drivers/pinctrl/pinctrl-zynq.c @@ -1143,7 +1143,7 @@ static const struct pinconf_ops zynq_pinconf_ops = { .pin_config_group_set = zynq_pinconf_group_set, }; -static struct pinctrl_desc zynq_desc = { +static const struct pinctrl_desc zynq_desc = { .name = "zynq_pinctrl", .pins = zynq_pins, .npins = ARRAY_SIZE(zynq_pins), diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c index 27f99183d994..aeaa0ded7c1e 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c @@ -898,7 +898,7 @@ static const struct pinconf_ops starfive_pinconf_ops = { .is_generic = true, }; -static struct pinctrl_desc starfive_desc = { +static const struct pinctrl_desc starfive_desc = { .name = DRIVER_NAME, .pins = starfive_pins, .npins = ARRAY_SIZE(starfive_pins), From 5409d619f127cf121e572046aa6e2ed81c98d9bb Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 11 Jun 2025 08:13:49 +0200 Subject: [PATCH 0212/2411] rtc: stm32: Constify static 'pinctrl_desc' The local static 'struct pinctrl_desc' is not modified, so can be made const for code safety. Signed-off-by: Krzysztof Kozlowski Acked-by: Alexandre Belloni Link: https://lore.kernel.org/20250611-pinctrl-const-desc-v2-17-b11c1d650384@linaro.org Signed-off-by: Linus Walleij --- drivers/rtc/rtc-stm32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-stm32.c b/drivers/rtc/rtc-stm32.c index ef8fb88aab48..d4ebf3eb54aa 100644 --- a/drivers/rtc/rtc-stm32.c +++ b/drivers/rtc/rtc-stm32.c @@ -393,7 +393,7 @@ static const struct pinmux_ops stm32_rtc_pinmux_ops = { .strict = true, }; -static struct pinctrl_desc stm32_rtc_pdesc = { +static const struct pinctrl_desc stm32_rtc_pdesc = { .name = DRIVER_NAME, .pins = stm32_rtc_pinctrl_pins, .npins = ARRAY_SIZE(stm32_rtc_pinctrl_pins), From 9576e8d3c46cd7e14ce07981180520275c7152e6 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 11 Jun 2025 12:43:45 +0200 Subject: [PATCH 0213/2411] pinctrl: Use dev_fwnode() irq_domain_create_simple() takes fwnode as the first argument. It can be extracted from the struct device using dev_fwnode() helper instead of using of_node with of_fwnode_handle(). So use the dev_fwnode() helper. Signed-off-by: Jiri Slaby (SUSE) Acked-by: Chen-Yu Tsai Link: https://lore.kernel.org/20250611104348.192092-17-jirislaby@kernel.org Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/mtk-eint.c | 4 ++-- drivers/pinctrl/pinctrl-at91-pio4.c | 6 +++--- drivers/pinctrl/sunxi/pinctrl-sunxi.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index d906a5e4101f..9f175c73613f 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -561,8 +561,8 @@ int mtk_eint_do_init(struct mtk_eint *eint, struct mtk_eint_pin *eint_pin) goto err_eint; } - eint->domain = irq_domain_create_linear(of_fwnode_handle(eint->dev->of_node), - eint->hw->ap_num, &irq_domain_simple_ops, NULL); + eint->domain = irq_domain_create_linear(dev_fwnode(eint->dev), eint->hw->ap_num, + &irq_domain_simple_ops, NULL); if (!eint->domain) goto err_eint; diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c index ca8a54a43ff5..57f105ac962d 100644 --- a/drivers/pinctrl/pinctrl-at91-pio4.c +++ b/drivers/pinctrl/pinctrl-at91-pio4.c @@ -1212,9 +1212,9 @@ static int atmel_pinctrl_probe(struct platform_device *pdev) dev_dbg(dev, "bank %i: irq=%d\n", i, ret); } - atmel_pioctrl->irq_domain = irq_domain_create_linear(of_fwnode_handle(dev->of_node), - atmel_pioctrl->gpio_chip->ngpio, - &irq_domain_simple_ops, NULL); + atmel_pioctrl->irq_domain = irq_domain_create_linear(dev_fwnode(dev), + atmel_pioctrl->gpio_chip->ngpio, + &irq_domain_simple_ops, NULL); if (!atmel_pioctrl->irq_domain) return dev_err_probe(dev, -ENODEV, "can't add the irq domain\n"); diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index bf8612d72daa..a090d78a3413 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -1646,7 +1646,7 @@ int sunxi_pinctrl_init_with_flags(struct platform_device *pdev, } } - pctl->domain = irq_domain_create_linear(of_fwnode_handle(node), + pctl->domain = irq_domain_create_linear(dev_fwnode(&pdev->dev), pctl->desc->irq_banks * IRQ_PER_BANK, &sunxi_pinctrl_irq_domain_ops, pctl); if (!pctl->domain) { From 0a11110bfc5a95ee0416f76500ba0655f62d2baa Mon Sep 17 00:00:00 2001 From: Yulin Lu Date: Thu, 12 Jun 2025 18:48:11 +0800 Subject: [PATCH 0214/2411] dt-bindings: pinctrl: eswin: Document for EIC7700 SoC Add EIC7700 pinctrl device for all configurable pins. For the EIC7700 pinctrl registers, each register (32 bits) controls the characteristics of a single pin. It supports setting function multiplexing, Schmitt trigger, drive strength, pull-up/pull-down, and input enable. Co-developed-by: Samuel Holland Signed-off-by: Samuel Holland Signed-off-by: Yulin Lu Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250612104811.1206-1-luyulin@eswincomputing.com Signed-off-by: Linus Walleij --- .../pinctrl/eswin,eic7700-pinctrl.yaml | 156 ++++++++++++++++++ 1 file changed, 156 insertions(+) create mode 100644 Documentation/devicetree/bindings/pinctrl/eswin,eic7700-pinctrl.yaml diff --git a/Documentation/devicetree/bindings/pinctrl/eswin,eic7700-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/eswin,eic7700-pinctrl.yaml new file mode 100644 index 000000000000..d46e7ee6372d --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/eswin,eic7700-pinctrl.yaml @@ -0,0 +1,156 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pinctrl/eswin,eic7700-pinctrl.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Eswin Eic7700 Pinctrl + +maintainers: + - Yulin Lu + +allOf: + - $ref: pinctrl.yaml# + +description: | + eic7700 pin configuration nodes act as a container for an arbitrary number of + subnodes. Each of these subnodes represents some desired configuration for one or + more pins. This configuration can include the mux function to select on those pin(s), + and various pin configuration parameters, such as input-enable, pull-up, etc. + +properties: + compatible: + const: eswin,eic7700-pinctrl + + reg: + maxItems: 1 + + vrgmii-supply: + description: + Regulator supply for the RGMII interface IO power domain. + This property should reference a regulator that provides either 1.8V or 3.3V, + depending on the board-level voltage configuration required by the RGMII interface. + +patternProperties: + '-grp$': + type: object + additionalProperties: false + + patternProperties: + '-pins$': + type: object + + properties: + pins: + description: + For eic7700, specifies the name(s) of one or more pins to be configured by + this node. + items: + enum: [ chip_mode, mode_set0, mode_set1, mode_set2, mode_set3, xin, + rst_out_n, key_reset_n, gpio0, por_sel, jtag0_tck, jtag0_tms, + jtag0_tdi, jtag0_tdo, gpio5, spi2_cs0_n, jtag1_tck, jtag1_tms, + jtag1_tdi, jtag1_tdo, gpio11, spi2_cs1_n, pcie_clkreq_n, + pcie_wake_n, pcie_perst_n, hdmi_scl, hdmi_sda, hdmi_cec, + jtag2_trst, rgmii0_clk_125, rgmii0_txen, rgmii0_txclk, + rgmii0_txd0, rgmii0_txd1, rgmii0_txd2, rgmii0_txd3, i2s0_bclk, + i2s0_wclk, i2s0_sdi, i2s0_sdo, i2s_mclk, rgmii0_rxclk, + rgmii0_rxdv, rgmii0_rxd0, rgmii0_rxd1, rgmii0_rxd2, rgmii0_rxd3, + i2s2_bclk, i2s2_wclk, i2s2_sdi, i2s2_sdo, gpio27, gpio28, gpio29, + rgmii0_mdc, rgmii0_mdio, rgmii0_intb, rgmii1_clk_125, rgmii1_txen, + rgmii1_txclk, rgmii1_txd0, rgmii1_txd1, rgmii1_txd2, rgmii1_txd3, + i2s1_bclk, i2s1_wclk, i2s1_sdi, i2s1_sdo, gpio34, rgmii1_rxclk, + rgmii1_rxdv, rgmii1_rxd0, rgmii1_rxd1, rgmii1_rxd2, rgmii1_rxd3, + spi1_cs0_n, spi1_clk, spi1_d0, spi1_d1, spi1_d2, spi1_d3, spi1_cs1_n, + rgmii1_mdc, rgmii1_mdio, rgmii1_intb, usb0_pwren, usb1_pwren, + i2c0_scl, i2c0_sda, i2c1_scl, i2c1_sda, i2c2_scl, i2c2_sda, + i2c3_scl, i2c3_sda, i2c4_scl, i2c4_sda, i2c5_scl, i2c5_sda, + uart0_tx, uart0_rx, uart1_tx, uart1_rx, uart1_cts, uart1_rts, + uart2_tx, uart2_rx, jtag2_tck, jtag2_tms, jtag2_tdi, jtag2_tdo, + fan_pwm, fan_tach, mipi_csi0_xvs, mipi_csi0_xhs, mipi_csi0_mclk, + mipi_csi1_xvs, mipi_csi1_xhs, mipi_csi1_mclk, mipi_csi2_xvs, + mipi_csi2_xhs, mipi_csi2_mclk, mipi_csi3_xvs, mipi_csi3_xhs, + mipi_csi3_mclk, mipi_csi4_xvs, mipi_csi4_xhs, mipi_csi4_mclk, + mipi_csi5_xvs, mipi_csi5_xhs, mipi_csi5_mclk, spi3_cs_n, spi3_clk, + spi3_di, spi3_do, gpio92, gpio93, s_mode, gpio95, spi0_cs_n, + spi0_clk, spi0_d0, spi0_d1, spi0_d2, spi0_d3, i2c10_scl, + i2c10_sda, i2c11_scl, i2c11_sda, gpio106, boot_sel0, boot_sel1, + boot_sel2, boot_sel3, gpio111, lpddr_ref_clk ] + + function: + description: + Specify the alternative function to be configured for the + given pins. + enum: [ disabled, boot_sel, chip_mode, emmc, fan_tach, + gpio, hdmi, i2c, i2s, jtag, ddr_ref_clk_sel, + lpddr_ref_clk, mipi_csi, osc, pcie, pwm, + rgmii, reset, sata, sdio, spi, s_mode, uart, usb ] + + input-schmitt-enable: true + + input-schmitt-disable: true + + bias-disable: true + + bias-pull-down: true + + bias-pull-up: true + + input-enable: true + + input-disable: true + + drive-strength-microamp: true + + required: + - pins + + additionalProperties: false + + allOf: + - $ref: pincfg-node.yaml# + - $ref: pinmux-node.yaml# + + - if: + properties: + pins: + anyOf: + - pattern: '^rgmii' + - const: lpddr_ref_clk + then: + properties: + drive-strength-microamp: + enum: [3000, 6000, 9000, 12000, 15000, 18000, 21000, 24000] + else: + properties: + drive-strength-microamp: + enum: [6000, 9000, 12000, 15000, 18000, 21000, 24000, 27000] + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + pinctrl@51600080 { + compatible = "eswin,eic7700-pinctrl"; + reg = <0x51600080 0x1fff80>; + vrgmii-supply = <&vcc_1v8>; + + dev-active-grp { + /* group node defining 1 standard pin */ + gpio10-pins { + pins = "jtag1_tdo"; + function = "gpio"; + input-enable; + bias-pull-up; + }; + + /* group node defining 2 I2C pins */ + i2c6-pins { + pins = "uart1_cts", "uart1_rts"; + function = "i2c"; + }; + }; + }; From 5b797bcc00ef6ac2d274406db7f6959c25af15e8 Mon Sep 17 00:00:00 2001 From: Yulin Lu Date: Thu, 12 Jun 2025 18:51:59 +0800 Subject: [PATCH 0215/2411] pinctrl: eswin: Add EIC7700 pinctrl driver Add support for the pin controller in ESWIN's EIC7700 SoC, which supports pin multiplexing, pin configuration, and rgmii voltage control. Co-developed-by: Samuel Holland Signed-off-by: Samuel Holland Signed-off-by: Yulin Lu Link: https://lore.kernel.org/20250612105159.1241-1-luyulin@eswincomputing.com Signed-off-by: Linus Walleij --- drivers/pinctrl/Kconfig | 11 + drivers/pinctrl/Makefile | 1 + drivers/pinctrl/pinctrl-eic7700.c | 704 ++++++++++++++++++++++++++++++ 3 files changed, 716 insertions(+) create mode 100644 drivers/pinctrl/pinctrl-eic7700.c diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig index 95f032c7ec69..346003b624ec 100644 --- a/drivers/pinctrl/Kconfig +++ b/drivers/pinctrl/Kconfig @@ -206,6 +206,17 @@ config PINCTRL_DIGICOLOR select PINMUX select GENERIC_PINCONF +config PINCTRL_EIC7700 + tristate "EIC7700 PINCTRL driver" + depends on ARCH_ESWIN || COMPILE_TEST + select PINMUX + select GENERIC_PINCONF + help + This driver support for the pin controller in ESWIN's EIC7700 SoC, + which supports pin multiplexing, pin configuration,and rgmii voltage + control. + Say Y here to enable the eic7700 pinctrl driver + config PINCTRL_EP93XX bool depends on ARCH_EP93XX || COMPILE_TEST diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile index dcede70b2566..9d20df57a69e 100644 --- a/drivers/pinctrl/Makefile +++ b/drivers/pinctrl/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_PINCTRL_CY8C95X0) += pinctrl-cy8c95x0.o obj-$(CONFIG_PINCTRL_DA850_PUPD) += pinctrl-da850-pupd.o obj-$(CONFIG_PINCTRL_DA9062) += pinctrl-da9062.o obj-$(CONFIG_PINCTRL_DIGICOLOR) += pinctrl-digicolor.o +obj-$(CONFIG_PINCTRL_EIC7700) += pinctrl-eic7700.o obj-$(CONFIG_PINCTRL_EQUILIBRIUM) += pinctrl-equilibrium.o obj-$(CONFIG_PINCTRL_EP93XX) += pinctrl-ep93xx.o obj-$(CONFIG_PINCTRL_EYEQ5) += pinctrl-eyeq5.o diff --git a/drivers/pinctrl/pinctrl-eic7700.c b/drivers/pinctrl/pinctrl-eic7700.c new file mode 100644 index 000000000000..719cd11e276a --- /dev/null +++ b/drivers/pinctrl/pinctrl-eic7700.c @@ -0,0 +1,704 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ESWIN Pinctrl Controller Platform Device Driver + * + * Copyright 2024, Beijing ESWIN Computing Technology Co., Ltd.. All rights reserved. + * + * Authors: Samuel Holland + * Yulin Lu + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "core.h" +#include "pinmux.h" +#include "pinconf.h" + +#define EIC7700_PIN_REG(i) (4 * (i)) +#define EIC7700_IE BIT(0) +#define EIC7700_PU BIT(1) +#define EIC7700_PD BIT(2) +#define EIC7700_DS GENMASK(6, 3) +#define EIC7700_ST BIT(7) +#define EIC7700_FUNC_SEL GENMASK(18, 16) + +#define EIC7700_BIAS (EIC7700_PD | EIC7700_PU) +#define EIC7700_PINCONF GENMASK(7, 0) + +#define EIC7700_RGMII0_SEL_MODE (0x310 - 0x80) +#define EIC7700_RGMII1_SEL_MODE (0x314 - 0x80) +#define EIC7700_MS GENMASK(1, 0) +#define EIC7700_MS_3V3 0x0 +#define EIC7700_MS_1V8 0x3 + +#define EIC7700_FUNCTIONS_PER_PIN 8 + +struct eic7700_pin { + u8 functions[EIC7700_FUNCTIONS_PER_PIN]; +}; + +struct eic7700_pinctrl { + void __iomem *base; + struct pinctrl_desc desc; + unsigned int functions_count; + struct pinfunction functions[] __counted_by(functions_count); +}; + +enum { + F_DISABLED, + F_BOOT_SEL, + F_CHIP_MODE, + F_EMMC, + F_FAN_TACH, + F_GPIO, + F_HDMI, + F_I2C, + F_I2S, + F_JTAG, + F_DDR_REF_CLK_SEL, + F_LPDDR_REF_CLK, + F_MIPI_CSI, + F_OSC, + F_PCIE, + F_PWM, + F_RGMII, + F_RESET, + F_SATA, + F_SDIO, + F_SPI, + F_S_MODE, + F_UART, + F_USB, + EIC7700_FUNCTIONS_COUNT +}; + +static const char *const eic7700_functions[EIC7700_FUNCTIONS_COUNT] = { + [F_DISABLED] = "disabled", + [F_BOOT_SEL] = "boot_sel", + [F_CHIP_MODE] = "chip_mode", + [F_EMMC] = "emmc", + [F_FAN_TACH] = "fan_tach", + [F_GPIO] = "gpio", + [F_HDMI] = "hdmi", + [F_I2C] = "i2c", + [F_I2S] = "i2s", + [F_JTAG] = "jtag", + [F_DDR_REF_CLK_SEL] = "ddr_ref_clk_sel", + [F_LPDDR_REF_CLK] = "lpddr_ref_clk", + [F_MIPI_CSI] = "mipi_csi", + [F_OSC] = "osc", + [F_PCIE] = "pcie", + [F_PWM] = "pwm", + [F_RGMII] = "rgmii", + [F_RESET] = "reset", + [F_SATA] = "sata", + [F_SDIO] = "sdio", + [F_SPI] = "spi", + [F_S_MODE] = "s_mode", + [F_UART] = "uart", + [F_USB] = "usb", +}; + +#define EIC7700_PIN(_number, _name, ...) \ + { \ + .number = _number, \ + .name = _name, \ + .drv_data = (void *)&(struct eic7700_pin) { { __VA_ARGS__ } } \ + } + +static const struct pinctrl_pin_desc eic7700_pins[] = { + EIC7700_PIN(0, "chip_mode", [0] = F_CHIP_MODE), + EIC7700_PIN(1, "mode_set0", [0] = F_SDIO, [2] = F_GPIO), + EIC7700_PIN(2, "mode_set1", [0] = F_SDIO, [2] = F_GPIO), + EIC7700_PIN(3, "mode_set2", [0] = F_SDIO, [2] = F_GPIO), + EIC7700_PIN(4, "mode_set3", [0] = F_SDIO, [2] = F_GPIO), + EIC7700_PIN(5, "xin", [0] = F_OSC), + EIC7700_PIN(6, "rtc_xin", [0] = F_DISABLED), + EIC7700_PIN(7, "rst_out_n", [0] = F_RESET), + EIC7700_PIN(8, "key_reset_n", [0] = F_RESET), + EIC7700_PIN(9, "rst_in_n", [0] = F_DISABLED), + EIC7700_PIN(10, "por_in_n", [0] = F_DISABLED), + EIC7700_PIN(11, "por_out_n", [0] = F_DISABLED), + EIC7700_PIN(12, "gpio0", [0] = F_GPIO), + EIC7700_PIN(13, "por_sel", [0] = F_RESET), + EIC7700_PIN(14, "jtag0_tck", [0] = F_JTAG, [1] = F_SPI, [2] = F_GPIO), + EIC7700_PIN(15, "jtag0_tms", [0] = F_JTAG, [1] = F_SPI, [2] = F_GPIO), + EIC7700_PIN(16, "jtag0_tdi", [0] = F_JTAG, [1] = F_SPI, [2] = F_GPIO), + EIC7700_PIN(17, "jtag0_tdo", [0] = F_JTAG, [1] = F_SPI, [2] = F_GPIO), + EIC7700_PIN(18, "gpio5", [0] = F_GPIO, [1] = F_SPI), + EIC7700_PIN(19, "spi2_cs0_n", [0] = F_SPI, [2] = F_GPIO), + EIC7700_PIN(20, "jtag1_tck", [0] = F_JTAG, [2] = F_GPIO), + EIC7700_PIN(21, "jtag1_tms", [0] = F_JTAG, [2] = F_GPIO), + EIC7700_PIN(22, "jtag1_tdi", [0] = F_JTAG, [2] = F_GPIO), + EIC7700_PIN(23, "jtag1_tdo", [0] = F_JTAG, [2] = F_GPIO), + EIC7700_PIN(24, "gpio11", [0] = F_GPIO), + EIC7700_PIN(25, "spi2_cs1_n", [0] = F_SPI, [2] = F_GPIO), + EIC7700_PIN(26, "pcie_clkreq_n", [0] = F_PCIE), + EIC7700_PIN(27, "pcie_wake_n", [0] = F_PCIE), + EIC7700_PIN(28, "pcie_perst_n", [0] = F_PCIE), + EIC7700_PIN(29, "hdmi_scl", [0] = F_HDMI), + EIC7700_PIN(30, "hdmi_sda", [0] = F_HDMI), + EIC7700_PIN(31, "hdmi_cec", [0] = F_HDMI), + EIC7700_PIN(32, "jtag2_trst", [0] = F_JTAG, [2] = F_GPIO), + EIC7700_PIN(33, "rgmii0_clk_125", [0] = F_RGMII), + EIC7700_PIN(34, "rgmii0_txen", [0] = F_RGMII), + EIC7700_PIN(35, "rgmii0_txclk", [0] = F_RGMII), + EIC7700_PIN(36, "rgmii0_txd0", [0] = F_RGMII), + EIC7700_PIN(37, "rgmii0_txd1", [0] = F_RGMII), + EIC7700_PIN(38, "rgmii0_txd2", [0] = F_RGMII), + EIC7700_PIN(39, "rgmii0_txd3", [0] = F_RGMII), + EIC7700_PIN(40, "i2s0_bclk", [0] = F_I2S, [2] = F_GPIO), + EIC7700_PIN(41, "i2s0_wclk", [0] = F_I2S, [2] = F_GPIO), + EIC7700_PIN(42, "i2s0_sdi", [0] = F_I2S, [2] = F_GPIO), + EIC7700_PIN(43, "i2s0_sdo", [0] = F_I2S, [2] = F_GPIO), + EIC7700_PIN(44, "i2s_mclk", [0] = F_I2S, [2] = F_GPIO), + EIC7700_PIN(45, "rgmii0_rxclk", [0] = F_RGMII), + EIC7700_PIN(46, "rgmii0_rxdv", [0] = F_RGMII), + EIC7700_PIN(47, "rgmii0_rxd0", [0] = F_RGMII), + EIC7700_PIN(48, "rgmii0_rxd1", [0] = F_RGMII), + EIC7700_PIN(49, "rgmii0_rxd2", [0] = F_RGMII), + EIC7700_PIN(50, "rgmii0_rxd3", [0] = F_RGMII), + EIC7700_PIN(51, "i2s2_bclk", [0] = F_I2S, [2] = F_GPIO), + EIC7700_PIN(52, "i2s2_wclk", [0] = F_I2S, [2] = F_GPIO), + EIC7700_PIN(53, "i2s2_sdi", [0] = F_I2S, [2] = F_GPIO), + EIC7700_PIN(54, "i2s2_sdo", [0] = F_I2S, [2] = F_GPIO), + EIC7700_PIN(55, "gpio27", [0] = F_GPIO, [1] = F_SATA), + EIC7700_PIN(56, "gpio28", [0] = F_GPIO), + EIC7700_PIN(57, "gpio29", [0] = F_RESET, [1] = F_EMMC, [2] = F_GPIO), + EIC7700_PIN(58, "rgmii0_mdc", [0] = F_RGMII), + EIC7700_PIN(59, "rgmii0_mdio", [0] = F_RGMII), + EIC7700_PIN(60, "rgmii0_intb", [0] = F_RGMII), + EIC7700_PIN(61, "rgmii1_clk_125", [0] = F_RGMII), + EIC7700_PIN(62, "rgmii1_txen", [0] = F_RGMII), + EIC7700_PIN(63, "rgmii1_txclk", [0] = F_RGMII), + EIC7700_PIN(64, "rgmii1_txd0", [0] = F_RGMII), + EIC7700_PIN(65, "rgmii1_txd1", [0] = F_RGMII), + EIC7700_PIN(66, "rgmii1_txd2", [0] = F_RGMII), + EIC7700_PIN(67, "rgmii1_txd3", [0] = F_RGMII), + EIC7700_PIN(68, "i2s1_bclk", [0] = F_I2S, [2] = F_GPIO), + EIC7700_PIN(69, "i2s1_wclk", [0] = F_I2S, [2] = F_GPIO), + EIC7700_PIN(70, "i2s1_sdi", [0] = F_I2S, [2] = F_GPIO), + EIC7700_PIN(71, "i2s1_sdo", [0] = F_I2S, [2] = F_GPIO), + EIC7700_PIN(72, "gpio34", [0] = F_RESET, [1] = F_SDIO, [2] = F_GPIO), + EIC7700_PIN(73, "rgmii1_rxclk", [0] = F_RGMII), + EIC7700_PIN(74, "rgmii1_rxdv", [0] = F_RGMII), + EIC7700_PIN(75, "rgmii1_rxd0", [0] = F_RGMII), + EIC7700_PIN(76, "rgmii1_rxd1", [0] = F_RGMII), + EIC7700_PIN(77, "rgmii1_rxd2", [0] = F_RGMII), + EIC7700_PIN(78, "rgmii1_rxd3", [0] = F_RGMII), + EIC7700_PIN(79, "spi1_cs0_n", [0] = F_SPI, [2] = F_GPIO), + EIC7700_PIN(80, "spi1_clk", [0] = F_SPI, [2] = F_GPIO), + EIC7700_PIN(81, "spi1_d0", [0] = F_SPI, [1] = F_I2C, [2] = F_GPIO, [3] = F_UART), + EIC7700_PIN(82, "spi1_d1", [0] = F_SPI, [1] = F_I2C, [2] = F_GPIO, [3] = F_UART), + EIC7700_PIN(83, "spi1_d2", [0] = F_SPI, [1] = F_SDIO, [2] = F_GPIO), + EIC7700_PIN(84, "spi1_d3", [0] = F_SPI, [1] = F_PWM, [2] = F_GPIO), + EIC7700_PIN(85, "spi1_cs1_n", [0] = F_SPI, [1] = F_PWM, [2] = F_GPIO), + EIC7700_PIN(86, "rgmii1_mdc", [0] = F_RGMII), + EIC7700_PIN(87, "rgmii1_mdio", [0] = F_RGMII), + EIC7700_PIN(88, "rgmii1_intb", [0] = F_RGMII), + EIC7700_PIN(89, "usb0_pwren", [0] = F_USB, [2] = F_GPIO), + EIC7700_PIN(90, "usb1_pwren", [0] = F_USB, [2] = F_GPIO), + EIC7700_PIN(91, "i2c0_scl", [0] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(92, "i2c0_sda", [0] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(93, "i2c1_scl", [0] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(94, "i2c1_sda", [0] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(95, "i2c2_scl", [0] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(96, "i2c2_sda", [0] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(97, "i2c3_scl", [0] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(98, "i2c3_sda", [0] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(99, "i2c4_scl", [0] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(100, "i2c4_sda", [0] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(101, "i2c5_scl", [0] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(102, "i2c5_sda", [0] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(103, "uart0_tx", [0] = F_UART, [2] = F_GPIO), + EIC7700_PIN(104, "uart0_rx", [0] = F_UART, [2] = F_GPIO), + EIC7700_PIN(105, "uart1_tx", [0] = F_UART, [2] = F_GPIO), + EIC7700_PIN(106, "uart1_rx", [0] = F_UART, [2] = F_GPIO), + EIC7700_PIN(107, "uart1_cts", [0] = F_UART, [1] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(108, "uart1_rts", [0] = F_UART, [1] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(109, "uart2_tx", [0] = F_UART, [1] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(110, "uart2_rx", [0] = F_UART, [1] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(111, "jtag2_tck", [0] = F_JTAG, [2] = F_GPIO), + EIC7700_PIN(112, "jtag2_tms", [0] = F_JTAG, [2] = F_GPIO), + EIC7700_PIN(113, "jtag2_tdi", [0] = F_JTAG, [2] = F_GPIO), + EIC7700_PIN(114, "jtag2_tdo", [0] = F_JTAG, [2] = F_GPIO), + EIC7700_PIN(115, "fan_pwm", [0] = F_PWM, [2] = F_GPIO), + EIC7700_PIN(116, "fan_tach", [0] = F_FAN_TACH, [2] = F_GPIO), + EIC7700_PIN(117, "mipi_csi0_xvs", [0] = F_MIPI_CSI, [2] = F_GPIO), + EIC7700_PIN(118, "mipi_csi0_xhs", [0] = F_MIPI_CSI, [2] = F_GPIO), + EIC7700_PIN(119, "mipi_csi0_mclk", [0] = F_MIPI_CSI, [2] = F_GPIO), + EIC7700_PIN(120, "mipi_csi1_xvs", [0] = F_MIPI_CSI, [2] = F_GPIO), + EIC7700_PIN(121, "mipi_csi1_xhs", [0] = F_MIPI_CSI, [2] = F_GPIO), + EIC7700_PIN(122, "mipi_csi1_mclk", [0] = F_MIPI_CSI, [2] = F_GPIO), + EIC7700_PIN(123, "mipi_csi2_xvs", [0] = F_MIPI_CSI, [2] = F_GPIO), + EIC7700_PIN(124, "mipi_csi2_xhs", [0] = F_MIPI_CSI, [2] = F_GPIO), + EIC7700_PIN(125, "mipi_csi2_mclk", [0] = F_MIPI_CSI, [2] = F_GPIO), + EIC7700_PIN(126, "mipi_csi3_xvs", [0] = F_MIPI_CSI, [2] = F_GPIO), + EIC7700_PIN(127, "mipi_csi3_xhs", [0] = F_MIPI_CSI, [2] = F_GPIO), + EIC7700_PIN(128, "mipi_csi3_mclk", [0] = F_MIPI_CSI, [2] = F_GPIO), + EIC7700_PIN(129, "mipi_csi4_xvs", [0] = F_MIPI_CSI, [2] = F_GPIO), + EIC7700_PIN(130, "mipi_csi4_xhs", [0] = F_MIPI_CSI, [2] = F_GPIO), + EIC7700_PIN(131, "mipi_csi4_mclk", [0] = F_MIPI_CSI, [2] = F_GPIO), + EIC7700_PIN(132, "mipi_csi5_xvs", [0] = F_MIPI_CSI, [2] = F_GPIO), + EIC7700_PIN(133, "mipi_csi5_xhs", [0] = F_MIPI_CSI, [2] = F_GPIO), + EIC7700_PIN(134, "mipi_csi5_mclk", [0] = F_MIPI_CSI, [2] = F_GPIO), + EIC7700_PIN(135, "spi3_cs_n", [0] = F_SPI, [2] = F_GPIO), + EIC7700_PIN(136, "spi3_clk", [0] = F_SPI, [2] = F_GPIO), + EIC7700_PIN(137, "spi3_di", [0] = F_SPI, [2] = F_GPIO), + EIC7700_PIN(138, "spi3_do", [0] = F_SPI, [2] = F_GPIO), + EIC7700_PIN(139, "gpio92", [0] = F_I2C, [1] = F_MIPI_CSI, [2] = F_GPIO, [3] = F_UART), + EIC7700_PIN(140, "gpio93", [0] = F_I2C, [1] = F_MIPI_CSI, [2] = F_GPIO, [3] = F_UART), + EIC7700_PIN(141, "s_mode", [0] = F_S_MODE, [2] = F_GPIO), + EIC7700_PIN(142, "gpio95", [0] = F_DDR_REF_CLK_SEL, [2] = F_GPIO), + EIC7700_PIN(143, "spi0_cs_n", [0] = F_SPI, [2] = F_GPIO), + EIC7700_PIN(144, "spi0_clk", [0] = F_SPI, [2] = F_GPIO), + EIC7700_PIN(145, "spi0_d0", [0] = F_SPI, [2] = F_GPIO), + EIC7700_PIN(146, "spi0_d1", [0] = F_SPI, [2] = F_GPIO), + EIC7700_PIN(147, "spi0_d2", [0] = F_SPI, [2] = F_GPIO), + EIC7700_PIN(148, "spi0_d3", [0] = F_SPI, [2] = F_GPIO), + EIC7700_PIN(149, "i2c10_scl", [0] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(150, "i2c10_sda", [0] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(151, "i2c11_scl", [0] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(152, "i2c11_sda", [0] = F_I2C, [2] = F_GPIO), + EIC7700_PIN(153, "gpio106", [0] = F_GPIO), + EIC7700_PIN(154, "boot_sel0", [0] = F_BOOT_SEL, [2] = F_GPIO), + EIC7700_PIN(155, "boot_sel1", [0] = F_BOOT_SEL, [2] = F_GPIO), + EIC7700_PIN(156, "boot_sel2", [0] = F_BOOT_SEL, [2] = F_GPIO), + EIC7700_PIN(157, "boot_sel3", [0] = F_BOOT_SEL, [2] = F_GPIO), + EIC7700_PIN(158, "gpio111", [0] = F_GPIO), + EIC7700_PIN(159, "reserved0", [0] = F_DISABLED), + EIC7700_PIN(160, "reserved1", [0] = F_DISABLED), + EIC7700_PIN(161, "reserved2", [0] = F_DISABLED), + EIC7700_PIN(162, "reserved3", [0] = F_DISABLED), + EIC7700_PIN(163, "lpddr_ref_clk", [0] = F_LPDDR_REF_CLK), +}; + +static int eic7700_get_groups_count(struct pinctrl_dev *pctldev) +{ + struct eic7700_pinctrl *pc = pinctrl_dev_get_drvdata(pctldev); + + return pc->desc.npins; +} + +static const char *eic7700_get_group_name(struct pinctrl_dev *pctldev, unsigned int selector) +{ + struct eic7700_pinctrl *pc = pinctrl_dev_get_drvdata(pctldev); + + return pc->desc.pins[selector].name; +} + +static int eic7700_get_group_pins(struct pinctrl_dev *pctldev, unsigned int selector, + const unsigned int **pins, unsigned int *npins) +{ + struct eic7700_pinctrl *pc = pinctrl_dev_get_drvdata(pctldev); + + *pins = &pc->desc.pins[selector].number; + *npins = 1; + + return 0; +} + +static const struct pinctrl_ops eic7700_pinctrl_ops = { + .get_groups_count = eic7700_get_groups_count, + .get_group_name = eic7700_get_group_name, + .get_group_pins = eic7700_get_group_pins, +#ifdef CONFIG_OF + .dt_node_to_map = pinconf_generic_dt_node_to_map_pin, + .dt_free_map = pinconf_generic_dt_free_map, +#endif +}; + +static int eic7700_pin_config_get(struct pinctrl_dev *pctldev, unsigned int pin, + unsigned long *config) +{ + struct eic7700_pinctrl *pc = pinctrl_dev_get_drvdata(pctldev); + const struct eic7700_pin *pin_data = pc->desc.pins[pin].drv_data; + u32 arg, value; + int param; + + if (pin_data->functions[0] == F_OSC || pin_data->functions[0] == F_DISABLED) + return -EOPNOTSUPP; + + value = readl_relaxed(pc->base + EIC7700_PIN_REG(pin)); + + param = pinconf_to_config_param(*config); + switch (param) { + case PIN_CONFIG_BIAS_DISABLE: + arg = (value & EIC7700_BIAS) == 0; + break; + case PIN_CONFIG_BIAS_PULL_DOWN: + arg = (value & EIC7700_BIAS) == EIC7700_PD; + break; + case PIN_CONFIG_BIAS_PULL_UP: + arg = (value & EIC7700_BIAS) == EIC7700_PU; + break; + case PIN_CONFIG_DRIVE_STRENGTH_UA: + if (pin_data->functions[0] == F_RGMII || + pin_data->functions[0] == F_LPDDR_REF_CLK) + arg = FIELD_GET(EIC7700_DS, value) * 3000 + 3000; + else + arg = FIELD_GET(EIC7700_DS, value) * 3000 + 6000; + break; + case PIN_CONFIG_INPUT_ENABLE: + arg = value & EIC7700_IE; + break; + case PIN_CONFIG_INPUT_SCHMITT_ENABLE: + arg = value & EIC7700_ST; + break; + default: + return -EOPNOTSUPP; + } + + *config = pinconf_to_config_packed(param, arg); + return arg ? 0 : -EINVAL; +} + +static int eic7700_pin_config_set(struct pinctrl_dev *pctldev, unsigned int pin, + unsigned long *configs, unsigned int num_configs) +{ + struct eic7700_pinctrl *pc = pinctrl_dev_get_drvdata(pctldev); + const struct eic7700_pin *pin_data = pc->desc.pins[pin].drv_data; + u32 value; + + if (pin_data->functions[0] == F_OSC || pin_data->functions[0] == F_DISABLED) + return -EOPNOTSUPP; + + value = readl_relaxed(pc->base + EIC7700_PIN_REG(pin)); + + for (unsigned int i = 0; i < num_configs; i++) { + int param = pinconf_to_config_param(configs[i]); + u32 arg = pinconf_to_config_argument(configs[i]); + + switch (param) { + case PIN_CONFIG_BIAS_DISABLE: + value &= ~EIC7700_BIAS; + break; + case PIN_CONFIG_BIAS_PULL_DOWN: + if (arg == 0) + return -EOPNOTSUPP; + value &= ~EIC7700_BIAS; + value |= EIC7700_PD; + break; + case PIN_CONFIG_BIAS_PULL_UP: + if (arg == 0) + return -EOPNOTSUPP; + value &= ~EIC7700_BIAS; + value |= EIC7700_PU; + break; + case PIN_CONFIG_DRIVE_STRENGTH_UA: + value &= ~EIC7700_DS; + if (pin_data->functions[0] == F_RGMII || + pin_data->functions[0] == F_LPDDR_REF_CLK) { + if (arg < 3000 || arg > 24000) + return -EOPNOTSUPP; + value |= FIELD_PREP(EIC7700_DS, (arg - 3000) / 3000); + } else { + if (arg < 6000 || arg > 27000) + return -EOPNOTSUPP; + value |= FIELD_PREP(EIC7700_DS, (arg - 6000) / 3000); + } + break; + case PIN_CONFIG_INPUT_ENABLE: + if (arg) + value |= EIC7700_IE; + else + value &= ~EIC7700_IE; + break; + case PIN_CONFIG_INPUT_SCHMITT_ENABLE: + if (arg) + value |= EIC7700_ST; + else + value &= ~EIC7700_ST; + break; + default: + return -EOPNOTSUPP; + } + } + + writel_relaxed(value, pc->base + EIC7700_PIN_REG(pin)); + + return 0; +} + +#ifdef CONFIG_DEBUG_FS +static void eic7700_pin_config_dbg_show(struct pinctrl_dev *pctldev, struct seq_file *s, + unsigned int pin) +{ + struct eic7700_pinctrl *pc = pinctrl_dev_get_drvdata(pctldev); + u32 value = readl_relaxed(pc->base + EIC7700_PIN_REG(pin)) & EIC7700_PINCONF; + + seq_printf(s, " [0x%02x]", value); +} +#else +#define eic7700_pin_config_dbg_show NULL +#endif + +static const struct pinconf_ops eic7700_pinconf_ops = { + .is_generic = true, + .pin_config_get = eic7700_pin_config_get, + .pin_config_set = eic7700_pin_config_set, + .pin_config_group_get = eic7700_pin_config_get, + .pin_config_group_set = eic7700_pin_config_set, + .pin_config_dbg_show = eic7700_pin_config_dbg_show, + .pin_config_group_dbg_show = eic7700_pin_config_dbg_show, +}; + +static int eic7700_get_functions_count(struct pinctrl_dev *pctldev) +{ + struct eic7700_pinctrl *pc = pinctrl_dev_get_drvdata(pctldev); + + return pc->functions_count; +} + +static const char *eic7700_get_function_name(struct pinctrl_dev *pctldev, unsigned int selector) +{ + struct eic7700_pinctrl *pc = pinctrl_dev_get_drvdata(pctldev); + + return pc->functions[selector].name; +} + +static int eic7700_get_function_groups(struct pinctrl_dev *pctldev, unsigned int selector, + const char *const **groups, unsigned int *num_groups) +{ + struct eic7700_pinctrl *pc = pinctrl_dev_get_drvdata(pctldev); + + *groups = pc->functions[selector].groups; + *num_groups = pc->functions[selector].ngroups; + + return 0; +} + +static int eic7700_set_mux(struct pinctrl_dev *pctldev, unsigned int func_selector, + unsigned int group_selector) +{ + struct eic7700_pinctrl *pc = pinctrl_dev_get_drvdata(pctldev); + const struct eic7700_pin *pin_data = pc->desc.pins[group_selector].drv_data; + u32 fs, value; + + if (pin_data->functions[0] == F_OSC || pin_data->functions[0] == F_DISABLED) + return -EOPNOTSUPP; + + for (fs = 0; fs < EIC7700_FUNCTIONS_PER_PIN; fs++) + if (pin_data->functions[fs] == func_selector) + break; + + if (fs == EIC7700_FUNCTIONS_PER_PIN) { + dev_err(pctldev->dev, "invalid mux %s for pin %s\n", + pc->functions[func_selector].name, + pc->desc.pins[group_selector].name); + return -EINVAL; + } + + value = readl_relaxed(pc->base + EIC7700_PIN_REG(group_selector)); + value &= ~EIC7700_FUNC_SEL; + value |= FIELD_PREP(EIC7700_FUNC_SEL, fs); + writel_relaxed(value, pc->base + EIC7700_PIN_REG(group_selector)); + + return 0; +} + +static int eic7700_gpio_request_enable(struct pinctrl_dev *pctldev, + struct pinctrl_gpio_range *range, unsigned int offset) +{ + return eic7700_set_mux(pctldev, F_GPIO, offset); +} + +static void eic7700_gpio_disable_free(struct pinctrl_dev *pctldev, + struct pinctrl_gpio_range *range, unsigned int offset) +{ + eic7700_set_mux(pctldev, F_DISABLED, offset); +} + +static int eic7700_gpio_set_direction(struct pinctrl_dev *pctldev, + struct pinctrl_gpio_range *range, unsigned int offset, + bool input) +{ + struct eic7700_pinctrl *pc = pinctrl_dev_get_drvdata(pctldev); + u32 value; + + value = readl_relaxed(pc->base + EIC7700_PIN_REG(offset)); + if (input) + value |= EIC7700_IE; + else + value &= ~EIC7700_IE; + writel_relaxed(value, pc->base + EIC7700_PIN_REG(offset)); + + return 0; +} + +static const struct pinmux_ops eic7700_pinmux_ops = { + .get_functions_count = eic7700_get_functions_count, + .get_function_name = eic7700_get_function_name, + .get_function_groups = eic7700_get_function_groups, + .set_mux = eic7700_set_mux, + .gpio_request_enable = eic7700_gpio_request_enable, + .gpio_disable_free = eic7700_gpio_disable_free, + .gpio_set_direction = eic7700_gpio_set_direction, + .strict = true, +}; + +static int eic7700_pinctrl_init_function_groups(struct device *dev, struct eic7700_pinctrl *pc, + const char *const *function_names) +{ + unsigned int ngroups = 0; + const char **groups; + + /* Count the number of groups for each function */ + for (unsigned int pin = 0; pin < pc->desc.npins; pin++) { + const struct eic7700_pin *pin_data = pc->desc.pins[pin].drv_data; + bool found_disabled = false; + + for (unsigned int fs = 0; fs < EIC7700_FUNCTIONS_PER_PIN; fs++) { + unsigned int selector = pin_data->functions[fs]; + struct pinfunction *function = &pc->functions[selector]; + + /* Only count F_DISABLED once per pin */ + if (selector == F_DISABLED) { + if (found_disabled) + continue; + found_disabled = true; + } + + function->ngroups++; + ngroups++; + } + } + + groups = devm_kcalloc(dev, ngroups, sizeof(*groups), GFP_KERNEL); + if (!groups) + return -ENOMEM; + + for (unsigned int selector = 0; selector < pc->functions_count; selector++) { + struct pinfunction *function = &pc->functions[selector]; + + function->name = function_names[selector]; + function->groups = groups; + groups += function->ngroups; + + /* Reset per-function ngroups for use as iterator below */ + function->ngroups = 0; + } + + /* Fill in the group pointers for each function */ + for (unsigned int pin = 0; pin < pc->desc.npins; pin++) { + const struct pinctrl_pin_desc *desc = &pc->desc.pins[pin]; + const struct eic7700_pin *pin_data = desc->drv_data; + bool found_disabled = false; + + for (unsigned int fs = 0; fs < EIC7700_FUNCTIONS_PER_PIN; fs++) { + unsigned int selector = pin_data->functions[fs]; + struct pinfunction *function = &pc->functions[selector]; + + /* Only count F_DISABLED once per pin */ + if (selector == F_DISABLED) { + if (found_disabled) + continue; + found_disabled = true; + } + + ((const char **)function->groups)[function->ngroups++] = desc->name; + } + } + + return 0; +} + +static int eic7700_pinctrl_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct pinctrl_dev *pctldev; + struct eic7700_pinctrl *pc; + struct regulator *regulator; + u32 voltage, rgmii0_mode, rgmii1_mode; + int ret; + + pc = devm_kzalloc(dev, struct_size(pc, functions, EIC7700_FUNCTIONS_COUNT), GFP_KERNEL); + if (!pc) + return -ENOMEM; + + pc->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(pc->base)) + return PTR_ERR(pc->base); + + regulator = devm_regulator_get(dev, "vrgmii"); + if (IS_ERR_OR_NULL(regulator)) { + return dev_err_probe(dev, PTR_ERR(regulator), + "failed to get vrgmii regulator\n"); + } + + voltage = regulator_get_voltage(regulator); + if (voltage < 0) { + return dev_err_probe(&pdev->dev, voltage, + "Failed to get voltage from regulator\n"); + } + + rgmii0_mode = readl_relaxed(pc->base + EIC7700_RGMII0_SEL_MODE); + rgmii1_mode = readl_relaxed(pc->base + EIC7700_RGMII1_SEL_MODE); + rgmii0_mode &= ~EIC7700_MS; + rgmii1_mode &= ~EIC7700_MS; + if (voltage == 1800000) { + rgmii0_mode |= FIELD_PREP(EIC7700_MS, EIC7700_MS_1V8); + rgmii1_mode |= FIELD_PREP(EIC7700_MS, EIC7700_MS_1V8); + } else if (voltage == 3300000) { + rgmii0_mode |= FIELD_PREP(EIC7700_MS, EIC7700_MS_3V3); + rgmii1_mode |= FIELD_PREP(EIC7700_MS, EIC7700_MS_3V3); + } else { + return dev_err_probe(&pdev->dev, -EINVAL, + "Invalid voltage configuration, should be either 1.8V or 3.3V\n"); + } + + writel_relaxed(rgmii0_mode, pc->base + EIC7700_RGMII0_SEL_MODE); + writel_relaxed(rgmii1_mode, pc->base + EIC7700_RGMII1_SEL_MODE); + + pc->desc.name = dev_name(dev); + pc->desc.pins = eic7700_pins; + pc->desc.npins = ARRAY_SIZE(eic7700_pins); + pc->desc.pctlops = &eic7700_pinctrl_ops; + pc->desc.pmxops = &eic7700_pinmux_ops; + pc->desc.confops = &eic7700_pinconf_ops; + pc->desc.owner = THIS_MODULE; + + pc->functions_count = EIC7700_FUNCTIONS_COUNT; + ret = eic7700_pinctrl_init_function_groups(dev, pc, eic7700_functions); + if (ret) + return ret; + + ret = devm_pinctrl_register_and_init(dev, &pc->desc, pc, &pctldev); + if (ret) + return dev_err_probe(dev, ret, "could not register pinctrl driver\n"); + + return pinctrl_enable(pctldev); +} + +static const struct of_device_id eic7700_pinctrl_of_match[] = { + { .compatible = "eswin,eic7700-pinctrl" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, eic7700_pinctrl_of_match); + +static struct platform_driver eic7700_pinctrl_driver = { + .probe = eic7700_pinctrl_probe, + .driver = { + .name = "pinctrl-eic7700", + .of_match_table = eic7700_pinctrl_of_match, + }, +}; +module_platform_driver(eic7700_pinctrl_driver); + +MODULE_DESCRIPTION("Pinctrl driver for the ESWIN EIC7700 SoC"); +MODULE_AUTHOR("Samuel Holland "); +MODULE_AUTHOR("Yulin Lu "); +MODULE_LICENSE("GPL"); From 76ba1bb25cbbf836961839fee28554b84949462f Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 14:19:53 +0200 Subject: [PATCH 0216/2411] pinctrl: cirrus: lochnagar: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Reviewed-by: Charles Keepax Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-cirrus-v1-1-2d45c1f92557@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/cirrus/pinctrl-lochnagar.c | 25 +++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/pinctrl/cirrus/pinctrl-lochnagar.c b/drivers/pinctrl/cirrus/pinctrl-lochnagar.c index 0f32866a4aef..dcc0a2f3c7dd 100644 --- a/drivers/pinctrl/cirrus/pinctrl-lochnagar.c +++ b/drivers/pinctrl/cirrus/pinctrl-lochnagar.c @@ -1058,13 +1058,12 @@ static const struct pinctrl_desc lochnagar_pin_desc = { .confops = &lochnagar_pin_conf_ops, }; -static void lochnagar_gpio_set(struct gpio_chip *chip, - unsigned int offset, int value) +static int lochnagar_gpio_set(struct gpio_chip *chip, + unsigned int offset, int value) { struct lochnagar_pin_priv *priv = gpiochip_get_data(chip); struct lochnagar *lochnagar = priv->lochnagar; const struct lochnagar_pin *pin = priv->pins[offset].drv_data; - int ret; value = !!value; @@ -1075,29 +1074,31 @@ static void lochnagar_gpio_set(struct gpio_chip *chip, case LN_PTYPE_MUX: value |= LN2_OP_GPIO; - ret = lochnagar_pin_set_mux(priv, pin, value); + return lochnagar_pin_set_mux(priv, pin, value); break; case LN_PTYPE_GPIO: if (pin->invert) value = !value; - ret = regmap_update_bits(lochnagar->regmap, pin->reg, - BIT(pin->shift), value << pin->shift); + return regmap_update_bits(lochnagar->regmap, pin->reg, + BIT(pin->shift), + value << pin->shift); break; default: - ret = -EINVAL; break; } - if (ret < 0) - dev_err(chip->parent, "Failed to set %s value: %d\n", - pin->name, ret); + return -EINVAL; } static int lochnagar_gpio_direction_out(struct gpio_chip *chip, unsigned int offset, int value) { - lochnagar_gpio_set(chip, offset, value); + int ret; + + ret = lochnagar_gpio_set(chip, offset, value); + if (ret) + return ret; return pinctrl_gpio_direction_output(chip, offset); } @@ -1160,7 +1161,7 @@ static int lochnagar_pin_probe(struct platform_device *pdev) priv->gpio_chip.request = gpiochip_generic_request; priv->gpio_chip.free = gpiochip_generic_free; priv->gpio_chip.direction_output = lochnagar_gpio_direction_out; - priv->gpio_chip.set = lochnagar_gpio_set; + priv->gpio_chip.set_rv = lochnagar_gpio_set; priv->gpio_chip.can_sleep = true; priv->gpio_chip.parent = dev; priv->gpio_chip.base = -1; From e52c741907fb9a3ca9433775d4d7c70e6c3a8078 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 14:19:54 +0200 Subject: [PATCH 0217/2411] pinctrl: cirrus: cs42l43: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Reviewed-by: Charles Keepax Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-cirrus-v1-2-2d45c1f92557@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/cirrus/pinctrl-cs42l43.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c index a90beb986f5b..4e47710eb3d5 100644 --- a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c +++ b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c @@ -483,7 +483,8 @@ static int cs42l43_gpio_get(struct gpio_chip *chip, unsigned int offset) return ret; } -static void cs42l43_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) +static int cs42l43_gpio_set(struct gpio_chip *chip, unsigned int offset, + int value) { struct cs42l43_pin *priv = gpiochip_get_data(chip); unsigned int shift = offset + CS42L43_GPIO1_LVL_SHIFT; @@ -493,23 +494,27 @@ static void cs42l43_gpio_set(struct gpio_chip *chip, unsigned int offset, int va offset + 1, str_high_low(value)); ret = pm_runtime_resume_and_get(priv->dev); - if (ret) { - dev_err(priv->dev, "Failed to resume for set: %d\n", ret); - return; - } + if (ret) + return ret; ret = regmap_update_bits(priv->regmap, CS42L43_GPIO_CTRL1, BIT(shift), value << shift); if (ret) - dev_err(priv->dev, "Failed to set gpio%d: %d\n", offset + 1, ret); + return ret; pm_runtime_put(priv->dev); + + return 0; } static int cs42l43_gpio_direction_out(struct gpio_chip *chip, unsigned int offset, int value) { - cs42l43_gpio_set(chip, offset, value); + int ret; + + ret = cs42l43_gpio_set(chip, offset, value); + if (ret) + return ret; return pinctrl_gpio_direction_output(chip, offset); } @@ -550,7 +555,7 @@ static int cs42l43_pin_probe(struct platform_device *pdev) priv->gpio_chip.direction_output = cs42l43_gpio_direction_out; priv->gpio_chip.add_pin_ranges = cs42l43_gpio_add_pin_ranges; priv->gpio_chip.get = cs42l43_gpio_get; - priv->gpio_chip.set = cs42l43_gpio_set; + priv->gpio_chip.set_rv = cs42l43_gpio_set; priv->gpio_chip.label = dev_name(priv->dev); priv->gpio_chip.parent = priv->dev; priv->gpio_chip.can_sleep = true; From 48773aa04b49ade6435c5f571501f7f2882b15fb Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 14:24:30 +0200 Subject: [PATCH 0218/2411] pinctrl: starfive: jh7100: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Reviewed-by: Hal Feng Reviewed-by: Emil Renner Berthing Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-starfive-v1-1-8507b46516f5@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c index aeaa0ded7c1e..b729ca4de422 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c @@ -969,8 +969,8 @@ static int starfive_gpio_get(struct gpio_chip *gc, unsigned int gpio) return !!(readl_relaxed(din) & BIT(gpio % 32)); } -static void starfive_gpio_set(struct gpio_chip *gc, unsigned int gpio, - int value) +static int starfive_gpio_set(struct gpio_chip *gc, unsigned int gpio, + int value) { struct starfive_pinctrl *sfp = container_of(gc, struct starfive_pinctrl, gc); void __iomem *dout = sfp->base + GPON_DOUT_CFG + 8 * gpio; @@ -979,6 +979,8 @@ static void starfive_gpio_set(struct gpio_chip *gc, unsigned int gpio, raw_spin_lock_irqsave(&sfp->lock, flags); writel_relaxed(value, dout); raw_spin_unlock_irqrestore(&sfp->lock, flags); + + return 0; } static int starfive_gpio_set_config(struct gpio_chip *gc, unsigned int gpio, @@ -1300,7 +1302,7 @@ static int starfive_probe(struct platform_device *pdev) sfp->gc.direction_input = starfive_gpio_direction_input; sfp->gc.direction_output = starfive_gpio_direction_output; sfp->gc.get = starfive_gpio_get; - sfp->gc.set = starfive_gpio_set; + sfp->gc.set_rv = starfive_gpio_set; sfp->gc.set_config = starfive_gpio_set_config; sfp->gc.add_pin_ranges = starfive_gpio_add_pin_ranges; sfp->gc.base = -1; From 17037b6f76e2019d554ebbe2591a107166d54f2b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 14:24:31 +0200 Subject: [PATCH 0219/2411] pinctrl: starfive: jh7110: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Reviewed-by: Hal Feng Reviewed-by: Emil Renner Berthing Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-starfive-v1-2-8507b46516f5@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c index 1d0d6c224c10..082bb1c6cea9 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c @@ -608,8 +608,7 @@ static int jh7110_gpio_get(struct gpio_chip *gc, unsigned int gpio) return !!(readl_relaxed(reg) & BIT(gpio % 32)); } -static void jh7110_gpio_set(struct gpio_chip *gc, - unsigned int gpio, int value) +static int jh7110_gpio_set(struct gpio_chip *gc, unsigned int gpio, int value) { struct jh7110_pinctrl *sfp = container_of(gc, struct jh7110_pinctrl, gc); @@ -625,6 +624,8 @@ static void jh7110_gpio_set(struct gpio_chip *gc, dout |= readl_relaxed(reg_dout) & ~mask; writel_relaxed(dout, reg_dout); raw_spin_unlock_irqrestore(&sfp->lock, flags); + + return 0; } static int jh7110_gpio_set_config(struct gpio_chip *gc, @@ -934,7 +935,7 @@ int jh7110_pinctrl_probe(struct platform_device *pdev) sfp->gc.direction_input = jh7110_gpio_direction_input; sfp->gc.direction_output = jh7110_gpio_direction_output; sfp->gc.get = jh7110_gpio_get; - sfp->gc.set = jh7110_gpio_set; + sfp->gc.set_rv = jh7110_gpio_set; sfp->gc.set_config = jh7110_gpio_set_config; sfp->gc.add_pin_ranges = jh7110_gpio_add_pin_ranges; sfp->gc.base = info->gc_base; From 1ae8c585f7051aecf1ed208a02a0298eaf999066 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 15:15:10 +0200 Subject: [PATCH 0220/2411] pinctrl: sppctl: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-remaining-v1-1-556b0a530cd4@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/sunplus/sppctl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/sunplus/sppctl.c b/drivers/pinctrl/sunplus/sppctl.c index ae156f779a16..3c3357f80889 100644 --- a/drivers/pinctrl/sunplus/sppctl.c +++ b/drivers/pinctrl/sunplus/sppctl.c @@ -461,13 +461,15 @@ static int sppctl_gpio_get(struct gpio_chip *chip, unsigned int offset) return (reg & BIT(bit_off)) ? 1 : 0; } -static void sppctl_gpio_set(struct gpio_chip *chip, unsigned int offset, int val) +static int sppctl_gpio_set(struct gpio_chip *chip, unsigned int offset, int val) { struct sppctl_gpio_chip *spp_gchip = gpiochip_get_data(chip); u32 reg_off, reg; reg = sppctl_prep_moon_reg_and_offset(offset, ®_off, val); sppctl_gpio_out_writel(spp_gchip, reg, reg_off); + + return 0; } static int sppctl_gpio_set_config(struct gpio_chip *chip, unsigned int offset, @@ -545,7 +547,7 @@ static int sppctl_gpio_new(struct platform_device *pdev, struct sppctl_pdata *pc gchip->direction_input = sppctl_gpio_direction_input; gchip->direction_output = sppctl_gpio_direction_output; gchip->get = sppctl_gpio_get; - gchip->set = sppctl_gpio_set; + gchip->set_rv = sppctl_gpio_set; gchip->set_config = sppctl_gpio_set_config; gchip->dbg_show = IS_ENABLED(CONFIG_DEBUG_FS) ? sppctl_gpio_dbg_show : NULL; From fd81c42a830f5a82fc08403aa69909c532af33e5 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 15:15:11 +0200 Subject: [PATCH 0221/2411] pinctrl: st: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-remaining-v1-2-556b0a530cd4@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-st.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index fe2d52e434db..fb5d72a6826e 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -711,10 +711,12 @@ static int st_gpio_get(struct gpio_chip *chip, unsigned offset) return !!(readl(bank->base + REG_PIO_PIN) & BIT(offset)); } -static void st_gpio_set(struct gpio_chip *chip, unsigned offset, int value) +static int st_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) { struct st_gpio_bank *bank = gpiochip_get_data(chip); __st_gpio_set(bank, offset, value); + + return 0; } static int st_gpio_direction_output(struct gpio_chip *chip, @@ -1470,7 +1472,7 @@ static const struct gpio_chip st_gpio_template = { .request = gpiochip_generic_request, .free = gpiochip_generic_free, .get = st_gpio_get, - .set = st_gpio_set, + .set_rv = st_gpio_set, .direction_input = pinctrl_gpio_direction_input, .direction_output = st_gpio_direction_output, .get_direction = st_gpio_get_direction, From 1a0a2c079b98d8b4c092ec97970bf12c9d94ba42 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 15:15:12 +0200 Subject: [PATCH 0222/2411] pinctrl: da9062: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-remaining-v1-3-556b0a530cd4@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-da9062.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/pinctrl-da9062.c b/drivers/pinctrl/pinctrl-da9062.c index 6f44a13b90ce..3295b09dfc3d 100644 --- a/drivers/pinctrl/pinctrl-da9062.c +++ b/drivers/pinctrl/pinctrl-da9062.c @@ -102,14 +102,14 @@ static int da9062_gpio_get(struct gpio_chip *gc, unsigned int offset) return !!(val & BIT(offset)); } -static void da9062_gpio_set(struct gpio_chip *gc, unsigned int offset, - int value) +static int da9062_gpio_set(struct gpio_chip *gc, unsigned int offset, + int value) { struct da9062_pctl *pctl = gpiochip_get_data(gc); struct regmap *regmap = pctl->da9062->regmap; - regmap_update_bits(regmap, DA9062AA_GPIO_MODE0_4, BIT(offset), - value << offset); + return regmap_update_bits(regmap, DA9062AA_GPIO_MODE0_4, BIT(offset), + value << offset); } static int da9062_gpio_get_direction(struct gpio_chip *gc, unsigned int offset) @@ -172,9 +172,7 @@ static int da9062_gpio_direction_output(struct gpio_chip *gc, if (ret) return ret; - da9062_gpio_set(gc, offset, value); - - return 0; + return da9062_gpio_set(gc, offset, value); } static int da9062_gpio_set_config(struct gpio_chip *gc, unsigned int offset, @@ -235,7 +233,7 @@ static int da9062_gpio_to_irq(struct gpio_chip *gc, unsigned int offset) static const struct gpio_chip reference_gc = { .owner = THIS_MODULE, .get = da9062_gpio_get, - .set = da9062_gpio_set, + .set_rv = da9062_gpio_set, .get_direction = da9062_gpio_get_direction, .direction_input = da9062_gpio_direction_input, .direction_output = da9062_gpio_direction_output, From 8766f8e7f1ee2d5c8697ddcdc7b94e096982b433 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 15:15:13 +0200 Subject: [PATCH 0223/2411] pinctrl: mcp23s08: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-remaining-v1-4-556b0a530cd4@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-mcp23s08.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c index c2f4b16f42d2..c8027ef03ecc 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08.c +++ b/drivers/pinctrl/pinctrl-mcp23s08.c @@ -341,24 +341,30 @@ static int __mcp23s08_set(struct mcp23s08 *mcp, unsigned mask, bool value) return mcp_update_bits(mcp, MCP_OLAT, mask, value ? mask : 0); } -static void mcp23s08_set(struct gpio_chip *chip, unsigned offset, int value) +static int mcp23s08_set(struct gpio_chip *chip, unsigned int offset, int value) { struct mcp23s08 *mcp = gpiochip_get_data(chip); unsigned mask = BIT(offset); + int ret; mutex_lock(&mcp->lock); - __mcp23s08_set(mcp, mask, !!value); + ret = __mcp23s08_set(mcp, mask, !!value); mutex_unlock(&mcp->lock); + + return ret; } -static void mcp23s08_set_multiple(struct gpio_chip *chip, - unsigned long *mask, unsigned long *bits) +static int mcp23s08_set_multiple(struct gpio_chip *chip, + unsigned long *mask, unsigned long *bits) { struct mcp23s08 *mcp = gpiochip_get_data(chip); + int ret; mutex_lock(&mcp->lock); - mcp_update_bits(mcp, MCP_OLAT, *mask, *bits); + ret = mcp_update_bits(mcp, MCP_OLAT, *mask, *bits); mutex_unlock(&mcp->lock); + + return ret; } static int @@ -626,8 +632,8 @@ int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, mcp->chip.get = mcp23s08_get; mcp->chip.get_multiple = mcp23s08_get_multiple; mcp->chip.direction_output = mcp23s08_direction_output; - mcp->chip.set = mcp23s08_set; - mcp->chip.set_multiple = mcp23s08_set_multiple; + mcp->chip.set_rv = mcp23s08_set; + mcp->chip.set_multiple_rv = mcp23s08_set_multiple; mcp->chip.base = base; mcp->chip.can_sleep = true; From a23b8eab75a74ffcfb79676bb61ef557e1a4fb8d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 15:15:14 +0200 Subject: [PATCH 0224/2411] pinctrl: wmt: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-remaining-v1-5-556b0a530cd4@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/vt8500/pinctrl-wmt.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/vt8500/pinctrl-wmt.c b/drivers/pinctrl/vt8500/pinctrl-wmt.c index fce92111a32e..767c6808a463 100644 --- a/drivers/pinctrl/vt8500/pinctrl-wmt.c +++ b/drivers/pinctrl/vt8500/pinctrl-wmt.c @@ -507,8 +507,8 @@ static int wmt_gpio_get_value(struct gpio_chip *chip, unsigned offset) return !!(readl_relaxed(data->base + reg_data_in) & BIT(bit)); } -static void wmt_gpio_set_value(struct gpio_chip *chip, unsigned offset, - int val) +static int wmt_gpio_set_value(struct gpio_chip *chip, unsigned int offset, + int val) { struct wmt_pinctrl_data *data = gpiochip_get_data(chip); u32 bank = WMT_BANK_FROM_PIN(offset); @@ -517,19 +517,26 @@ static void wmt_gpio_set_value(struct gpio_chip *chip, unsigned offset, if (reg_data_out == NO_REG) { dev_err(data->dev, "no data out register defined\n"); - return; + return -EINVAL; } if (val) wmt_setbits(data, reg_data_out, BIT(bit)); else wmt_clearbits(data, reg_data_out, BIT(bit)); + + return 0; } static int wmt_gpio_direction_output(struct gpio_chip *chip, unsigned offset, int value) { - wmt_gpio_set_value(chip, offset, value); + int ret; + + ret = wmt_gpio_set_value(chip, offset, value); + if (ret) + return ret; + return pinctrl_gpio_direction_output(chip, offset); } @@ -542,7 +549,7 @@ static const struct gpio_chip wmt_gpio_chip = { .direction_input = pinctrl_gpio_direction_input, .direction_output = wmt_gpio_direction_output, .get = wmt_gpio_get_value, - .set = wmt_gpio_set_value, + .set_rv = wmt_gpio_set_value, .can_sleep = false, }; From dffe286e2428a32bf5a70648d22a678b83080414 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 15:15:15 +0200 Subject: [PATCH 0225/2411] pinctrl: aw9523: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-remaining-v1-6-556b0a530cd4@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-aw9523.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/pinctrl/pinctrl-aw9523.c b/drivers/pinctrl/pinctrl-aw9523.c index 9bf53de20be8..c84454038419 100644 --- a/drivers/pinctrl/pinctrl-aw9523.c +++ b/drivers/pinctrl/pinctrl-aw9523.c @@ -625,14 +625,14 @@ static int aw9523_gpio_get_multiple(struct gpio_chip *chip, return ret; } -static void aw9523_gpio_set_multiple(struct gpio_chip *chip, +static int aw9523_gpio_set_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct aw9523 *awi = gpiochip_get_data(chip); u8 mask_lo, mask_hi, bits_lo, bits_hi; unsigned int reg; - int ret; + int ret = 0; mask_lo = *mask; mask_hi = *mask >> 8; @@ -644,27 +644,33 @@ static void aw9523_gpio_set_multiple(struct gpio_chip *chip, reg = AW9523_REG_OUT_STATE(AW9523_PINS_PER_PORT); ret = regmap_write_bits(awi->regmap, reg, mask_hi, bits_hi); if (ret) - dev_warn(awi->dev, "Cannot write port1 out level\n"); + goto out; } if (mask_lo) { reg = AW9523_REG_OUT_STATE(0); ret = regmap_write_bits(awi->regmap, reg, mask_lo, bits_lo); if (ret) - dev_warn(awi->dev, "Cannot write port0 out level\n"); + goto out; } mutex_unlock(&awi->i2c_lock); + +out: + return ret; } -static void aw9523_gpio_set(struct gpio_chip *chip, - unsigned int offset, int value) +static int aw9523_gpio_set(struct gpio_chip *chip, unsigned int offset, + int value) { struct aw9523 *awi = gpiochip_get_data(chip); u8 regbit = offset % AW9523_PINS_PER_PORT; + int ret; mutex_lock(&awi->i2c_lock); - regmap_update_bits(awi->regmap, AW9523_REG_OUT_STATE(offset), - BIT(regbit), value ? BIT(regbit) : 0); + ret = regmap_update_bits(awi->regmap, AW9523_REG_OUT_STATE(offset), + BIT(regbit), value ? BIT(regbit) : 0); mutex_unlock(&awi->i2c_lock); + + return ret; } @@ -779,8 +785,8 @@ static int aw9523_init_gpiochip(struct aw9523 *awi, unsigned int npins) gc->direction_output = aw9523_direction_output; gc->get = aw9523_gpio_get; gc->get_multiple = aw9523_gpio_get_multiple; - gc->set = aw9523_gpio_set; - gc->set_multiple = aw9523_gpio_set_multiple; + gc->set_rv = aw9523_gpio_set; + gc->set_multiple_rv = aw9523_gpio_set_multiple; gc->set_config = gpiochip_generic_config; gc->parent = dev; gc->owner = THIS_MODULE; From e62acaef5d3b67648a7161b329ae8a5afce8c682 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 15:15:16 +0200 Subject: [PATCH 0226/2411] pinctrl: xway: statify xway_pinconf_group_set() This function is not exported and is only used locally. Make it static. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-remaining-v1-7-556b0a530cd4@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-xway.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/pinctrl-xway.c b/drivers/pinctrl/pinctrl-xway.c index 48f8aabf3bfa..02e65d25e729 100644 --- a/drivers/pinctrl/pinctrl-xway.c +++ b/drivers/pinctrl/pinctrl-xway.c @@ -1228,10 +1228,10 @@ static int xway_pinconf_set(struct pinctrl_dev *pctldev, return 0; } -int xway_pinconf_group_set(struct pinctrl_dev *pctldev, - unsigned selector, - unsigned long *configs, - unsigned num_configs) +static int xway_pinconf_group_set(struct pinctrl_dev *pctldev, + unsigned int selector, + unsigned long *configs, + unsigned int num_configs) { struct ltq_pinmux_info *info = pinctrl_dev_get_drvdata(pctldev); int i, ret = 0; From 0f7ccc85d8e3559c91bd219a027b75d2d6c44305 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 15:15:17 +0200 Subject: [PATCH 0227/2411] pinctrl: xway: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-remaining-v1-8-556b0a530cd4@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-xway.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/pinctrl-xway.c b/drivers/pinctrl/pinctrl-xway.c index 02e65d25e729..53c6c22ff24d 100644 --- a/drivers/pinctrl/pinctrl-xway.c +++ b/drivers/pinctrl/pinctrl-xway.c @@ -1293,7 +1293,7 @@ static struct ltq_pinmux_info xway_info = { }; /* --------- gpio_chip related code --------- */ -static void xway_gpio_set(struct gpio_chip *chip, unsigned int pin, int val) +static int xway_gpio_set(struct gpio_chip *chip, unsigned int pin, int val) { struct ltq_pinmux_info *info = dev_get_drvdata(chip->parent); @@ -1301,6 +1301,8 @@ static void xway_gpio_set(struct gpio_chip *chip, unsigned int pin, int val) gpio_setbit(info->membase[0], GPIO_OUT(pin), PORT_PIN(pin)); else gpio_clearbit(info->membase[0], GPIO_OUT(pin), PORT_PIN(pin)); + + return 0; } static int xway_gpio_get(struct gpio_chip *chip, unsigned int pin) @@ -1328,9 +1330,7 @@ static int xway_gpio_dir_out(struct gpio_chip *chip, unsigned int pin, int val) else gpio_setbit(info->membase[0], GPIO_OD(pin), PORT_PIN(pin)); gpio_setbit(info->membase[0], GPIO_DIR(pin), PORT_PIN(pin)); - xway_gpio_set(chip, pin, val); - - return 0; + return xway_gpio_set(chip, pin, val); } /* @@ -1354,7 +1354,7 @@ static struct gpio_chip xway_chip = { .direction_input = xway_gpio_dir_in, .direction_output = xway_gpio_dir_out, .get = xway_gpio_get, - .set = xway_gpio_set, + .set_rv = xway_gpio_set, .request = gpiochip_generic_request, .free = gpiochip_generic_free, .to_irq = xway_gpio_to_irq, From 66a07081110adc575b8d6cfc4cf618ce85d2f32e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 15:15:18 +0200 Subject: [PATCH 0228/2411] pinctrl: digicolor: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-remaining-v1-9-556b0a530cd4@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-digicolor.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/pinctrl-digicolor.c b/drivers/pinctrl/pinctrl-digicolor.c index a0423172bdd6..1676cb3cc4c9 100644 --- a/drivers/pinctrl/pinctrl-digicolor.c +++ b/drivers/pinctrl/pinctrl-digicolor.c @@ -182,7 +182,7 @@ static int dc_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) return 0; } -static void dc_gpio_set(struct gpio_chip *chip, unsigned gpio, int value); +static int dc_gpio_set(struct gpio_chip *chip, unsigned int gpio, int value); static int dc_gpio_direction_output(struct gpio_chip *chip, unsigned gpio, int value) @@ -216,7 +216,7 @@ static int dc_gpio_get(struct gpio_chip *chip, unsigned gpio) return !!(input & BIT(bit_off)); } -static void dc_gpio_set(struct gpio_chip *chip, unsigned gpio, int value) +static int dc_gpio_set(struct gpio_chip *chip, unsigned int gpio, int value) { struct dc_pinmap *pmap = gpiochip_get_data(chip); int reg_off = GP_OUTPUT0(gpio/PINS_PER_COLLECTION); @@ -232,6 +232,8 @@ static void dc_gpio_set(struct gpio_chip *chip, unsigned gpio, int value) output &= ~BIT(bit_off); writeb_relaxed(output, pmap->regs + reg_off); spin_unlock_irqrestore(&pmap->lock, flags); + + return 0; } static int dc_gpiochip_add(struct dc_pinmap *pmap) @@ -246,7 +248,7 @@ static int dc_gpiochip_add(struct dc_pinmap *pmap) chip->direction_input = dc_gpio_direction_input; chip->direction_output = dc_gpio_direction_output; chip->get = dc_gpio_get; - chip->set = dc_gpio_set; + chip->set_rv = dc_gpio_set; chip->base = -1; chip->ngpio = PINS_COUNT; From 72c236f78edae3b500af3efa44b1504a3a9b6b60 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 15:15:19 +0200 Subject: [PATCH 0229/2411] pinctrl: apple: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Reviewed-by: Janne Grunau Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-remaining-v1-10-556b0a530cd4@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-apple-gpio.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinctrl-apple-gpio.c b/drivers/pinctrl/pinctrl-apple-gpio.c index 0f551d67d482..dcf3a921b4df 100644 --- a/drivers/pinctrl/pinctrl-apple-gpio.c +++ b/drivers/pinctrl/pinctrl-apple-gpio.c @@ -217,11 +217,13 @@ static int apple_gpio_get(struct gpio_chip *chip, unsigned offset) return !!(reg & REG_GPIOx_DATA); } -static void apple_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) +static int apple_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) { struct apple_gpio_pinctrl *pctl = gpiochip_get_data(chip); apple_gpio_set_reg(pctl, offset, REG_GPIOx_DATA, value ? REG_GPIOx_DATA : 0); + + return 0; } static int apple_gpio_direction_input(struct gpio_chip *chip, unsigned int offset) @@ -376,7 +378,7 @@ static int apple_gpio_register(struct apple_gpio_pinctrl *pctl) pctl->gpio_chip.direction_input = apple_gpio_direction_input; pctl->gpio_chip.direction_output = apple_gpio_direction_output; pctl->gpio_chip.get = apple_gpio_get; - pctl->gpio_chip.set = apple_gpio_set; + pctl->gpio_chip.set_rv = apple_gpio_set; pctl->gpio_chip.base = -1; pctl->gpio_chip.ngpio = pctl->pinctrl_desc.npins; pctl->gpio_chip.parent = pctl->dev; From dfdbce964904daa4b6e874d11cf2e95f5f76213d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 15:15:20 +0200 Subject: [PATCH 0230/2411] pinctrl: pic32: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-remaining-v1-11-556b0a530cd4@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-pic32.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/pinctrl-pic32.c b/drivers/pinctrl/pinctrl-pic32.c index bf827ab081a1..6d64cab97e81 100644 --- a/drivers/pinctrl/pinctrl-pic32.c +++ b/drivers/pinctrl/pinctrl-pic32.c @@ -1828,8 +1828,8 @@ static int pic32_gpio_get(struct gpio_chip *chip, unsigned offset) return !!(readl(bank->reg_base + PORT_REG) & BIT(offset)); } -static void pic32_gpio_set(struct gpio_chip *chip, unsigned offset, - int value) +static int pic32_gpio_set(struct gpio_chip *chip, unsigned int offset, + int value) { struct pic32_gpio_bank *bank = gpiochip_get_data(chip); u32 mask = BIT(offset); @@ -1838,6 +1838,8 @@ static void pic32_gpio_set(struct gpio_chip *chip, unsigned offset, writel(mask, bank->reg_base + PIC32_SET(PORT_REG)); else writel(mask, bank->reg_base + PIC32_CLR(PORT_REG)); + + return 0; } static int pic32_gpio_direction_output(struct gpio_chip *chip, @@ -2118,7 +2120,7 @@ static void pic32_gpio_irq_handler(struct irq_desc *desc) .direction_input = pic32_gpio_direction_input, \ .direction_output = pic32_gpio_direction_output, \ .get = pic32_gpio_get, \ - .set = pic32_gpio_set, \ + .set_rv = pic32_gpio_set, \ .ngpio = _npins, \ .base = GPIO_BANK_START(_bank), \ .owner = THIS_MODULE, \ From d9727b48515b656e641173fb07462a5d93d493da Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 15:15:21 +0200 Subject: [PATCH 0231/2411] pinctrl: spear: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-remaining-v1-12-556b0a530cd4@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/spear/pinctrl-plgpio.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/spear/pinctrl-plgpio.c b/drivers/pinctrl/spear/pinctrl-plgpio.c index a05570c7d833..e8234d2156da 100644 --- a/drivers/pinctrl/spear/pinctrl-plgpio.c +++ b/drivers/pinctrl/spear/pinctrl-plgpio.c @@ -181,24 +181,27 @@ static int plgpio_get_value(struct gpio_chip *chip, unsigned offset) return is_plgpio_set(plgpio->regmap, offset, plgpio->regs.rdata); } -static void plgpio_set_value(struct gpio_chip *chip, unsigned offset, int value) +static int plgpio_set_value(struct gpio_chip *chip, unsigned int offset, + int value) { struct plgpio *plgpio = gpiochip_get_data(chip); if (offset >= chip->ngpio) - return; + return -EINVAL; /* get correct offset for "offset" pin */ if (plgpio->p2o && (plgpio->p2o_regs & PTO_WDATA_REG)) { offset = plgpio->p2o(offset); if (offset == -1) - return; + return -EINVAL; } if (value) plgpio_reg_set(plgpio->regmap, offset, plgpio->regs.wdata); else plgpio_reg_reset(plgpio->regmap, offset, plgpio->regs.wdata); + + return 0; } static int plgpio_request(struct gpio_chip *chip, unsigned offset) @@ -579,7 +582,7 @@ static int plgpio_probe(struct platform_device *pdev) plgpio->chip.direction_input = plgpio_direction_input; plgpio->chip.direction_output = plgpio_direction_output; plgpio->chip.get = plgpio_get_value; - plgpio->chip.set = plgpio_set_value; + plgpio->chip.set_rv = plgpio_set_value; plgpio->chip.label = dev_name(&pdev->dev); plgpio->chip.parent = &pdev->dev; plgpio->chip.owner = THIS_MODULE; From 84b91ca38f891cc149e1bbadb3bede206fbd4063 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 15:15:22 +0200 Subject: [PATCH 0232/2411] pinctrl: keembay: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-remaining-v1-13-556b0a530cd4@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-keembay.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/pinctrl-keembay.c b/drivers/pinctrl/pinctrl-keembay.c index 0d7cc8280ea2..622000139317 100644 --- a/drivers/pinctrl/pinctrl-keembay.c +++ b/drivers/pinctrl/pinctrl-keembay.c @@ -1188,7 +1188,7 @@ static int keembay_gpio_get(struct gpio_chip *gc, unsigned int pin) return keembay_read_pin(kpc->base0 + offset, pin); } -static void keembay_gpio_set(struct gpio_chip *gc, unsigned int pin, int val) +static int keembay_gpio_set(struct gpio_chip *gc, unsigned int pin, int val) { struct keembay_pinctrl *kpc = gpiochip_get_data(gc); unsigned int reg_val; @@ -1200,6 +1200,8 @@ static void keembay_gpio_set(struct gpio_chip *gc, unsigned int pin, int val) else keembay_write_gpio_reg(~reg_val | BIT(pin % KEEMBAY_GPIO_MAX_PER_REG), kpc->base0 + KEEMBAY_GPIO_DATA_LOW, pin); + + return 0; } static int keembay_gpio_get_direction(struct gpio_chip *gc, unsigned int pin) @@ -1231,9 +1233,8 @@ static int keembay_gpio_set_direction_out(struct gpio_chip *gc, val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_MODE, pin); val &= ~KEEMBAY_GPIO_MODE_DIR; keembay_write_reg(val, kpc->base1 + KEEMBAY_GPIO_MODE, pin); - keembay_gpio_set(gc, pin, value); - return 0; + return keembay_gpio_set(gc, pin, value); } static void keembay_gpio_irq_handler(struct irq_desc *desc) @@ -1480,7 +1481,7 @@ static int keembay_gpiochip_probe(struct keembay_pinctrl *kpc, gc->direction_input = keembay_gpio_set_direction_in; gc->direction_output = keembay_gpio_set_direction_out; gc->get = keembay_gpio_get; - gc->set = keembay_gpio_set; + gc->set_rv = keembay_gpio_set; gc->set_config = gpiochip_generic_config; gc->base = -1; gc->ngpio = kpc->npins; From 5956a3a9733703e8784daf6a1a3fb431724abf11 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 15:15:23 +0200 Subject: [PATCH 0233/2411] pinctrl: sunxi: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-remaining-v1-14-556b0a530cd4@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sunxi.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index a090d78a3413..a5ce84621e5a 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -955,8 +955,8 @@ static int sunxi_pinctrl_gpio_get(struct gpio_chip *chip, unsigned offset) return val; } -static void sunxi_pinctrl_gpio_set(struct gpio_chip *chip, - unsigned offset, int value) +static int sunxi_pinctrl_gpio_set(struct gpio_chip *chip, unsigned int offset, + int value) { struct sunxi_pinctrl *pctl = gpiochip_get_data(chip); u32 reg, shift, mask, val; @@ -976,6 +976,8 @@ static void sunxi_pinctrl_gpio_set(struct gpio_chip *chip, writel(val, pctl->membase + reg); raw_spin_unlock_irqrestore(&pctl->lock, flags); + + return 0; } static int sunxi_pinctrl_gpio_direction_output(struct gpio_chip *chip, @@ -1597,7 +1599,7 @@ int sunxi_pinctrl_init_with_flags(struct platform_device *pdev, pctl->chip->direction_input = sunxi_pinctrl_gpio_direction_input; pctl->chip->direction_output = sunxi_pinctrl_gpio_direction_output; pctl->chip->get = sunxi_pinctrl_gpio_get; - pctl->chip->set = sunxi_pinctrl_gpio_set; + pctl->chip->set_rv = sunxi_pinctrl_gpio_set; pctl->chip->of_xlate = sunxi_pinctrl_gpio_of_xlate; pctl->chip->to_irq = sunxi_pinctrl_gpio_to_irq; pctl->chip->of_gpio_n_cells = 3; From b8cd87c0e999b4689b1cab8906790ece5d00ab75 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 15:15:24 +0200 Subject: [PATCH 0234/2411] pinctrl: as3722: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-remaining-v1-15-556b0a530cd4@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-as3722.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/pinctrl-as3722.c b/drivers/pinctrl/pinctrl-as3722.c index ed7b2c482ff0..30ed758bbe9d 100644 --- a/drivers/pinctrl/pinctrl-as3722.c +++ b/drivers/pinctrl/pinctrl-as3722.c @@ -473,8 +473,8 @@ static int as3722_gpio_get(struct gpio_chip *chip, unsigned offset) return (invert_enable) ? !val : val; } -static void as3722_gpio_set(struct gpio_chip *chip, unsigned offset, - int value) +static int as3722_gpio_set(struct gpio_chip *chip, unsigned int offset, + int value) { struct as3722_pctrl_info *as_pci = gpiochip_get_data(chip); struct as3722 *as3722 = as_pci->as3722; @@ -486,7 +486,7 @@ static void as3722_gpio_set(struct gpio_chip *chip, unsigned offset, if (ret < 0) { dev_err(as_pci->dev, "GPIO_CONTROL%d_REG read failed: %d\n", offset, ret); - return; + return ret; } en_invert = !!(val & AS3722_GPIO_INV); @@ -500,12 +500,19 @@ static void as3722_gpio_set(struct gpio_chip *chip, unsigned offset, if (ret < 0) dev_err(as_pci->dev, "GPIO_SIGNAL_OUT_REG update failed: %d\n", ret); + + return ret; } static int as3722_gpio_direction_output(struct gpio_chip *chip, - unsigned offset, int value) + unsigned int offset, int value) { - as3722_gpio_set(chip, offset, value); + int ret; + + ret = as3722_gpio_set(chip, offset, value); + if (ret) + return ret; + return pinctrl_gpio_direction_output(chip, offset); } @@ -522,7 +529,7 @@ static const struct gpio_chip as3722_gpio_chip = { .request = gpiochip_generic_request, .free = gpiochip_generic_free, .get = as3722_gpio_get, - .set = as3722_gpio_set, + .set_rv = as3722_gpio_set, .direction_input = pinctrl_gpio_direction_input, .direction_output = as3722_gpio_direction_output, .to_irq = as3722_gpio_to_irq, From 9a40347181c20313bee2fb7e10dd0865368b9e53 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 12 Jun 2025 15:15:25 +0200 Subject: [PATCH 0235/2411] pinctrl: amdisp: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250612-gpiochip-set-rv-pinctrl-remaining-v1-16-556b0a530cd4@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amdisp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amdisp.c b/drivers/pinctrl/pinctrl-amdisp.c index 9256ed67bb20..2e706bf8bcde 100644 --- a/drivers/pinctrl/pinctrl-amdisp.c +++ b/drivers/pinctrl/pinctrl-amdisp.c @@ -117,7 +117,7 @@ static int amdisp_gpio_get(struct gpio_chip *gc, unsigned int gpio) return !!(pin_reg & BIT(GPIO_CONTROL_PIN)); } -static void amdisp_gpio_set(struct gpio_chip *gc, unsigned int gpio, int value) +static int amdisp_gpio_set(struct gpio_chip *gc, unsigned int gpio, int value) { unsigned long flags; u32 pin_reg; @@ -131,6 +131,8 @@ static void amdisp_gpio_set(struct gpio_chip *gc, unsigned int gpio, int value) pin_reg &= ~BIT(GPIO_CONTROL_PIN); writel(pin_reg, pctrl->gpiobase + gpio_offset[gpio]); raw_spin_unlock_irqrestore(&pctrl->lock, flags); + + return 0; } static int amdisp_gpiochip_add(struct platform_device *pdev, @@ -149,7 +151,7 @@ static int amdisp_gpiochip_add(struct platform_device *pdev, gc->direction_input = amdisp_gpio_direction_input; gc->direction_output = amdisp_gpio_direction_output; gc->get = amdisp_gpio_get; - gc->set = amdisp_gpio_set; + gc->set_rv = amdisp_gpio_set; gc->base = -1; gc->ngpio = ARRAY_SIZE(amdisp_range_pins); From c09a8ac1cd560c8f944611045841fed99790116b Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 16 Jun 2025 12:34:05 +0900 Subject: [PATCH 0236/2411] rust: alloc: implement `Borrow` and `BorrowMut` for `Vec` Implement `Borrow<[T]>` and `BorrowMut<[T]>` for `Vec`. This allows `Vec` to be used in generic APIs asking for types implementing those traits. `[T; N]` and `&mut [T]` also implement those traits allowing users to use either owned, borrowed and heap-owned values. The implementation leverages `as_slice` and `as_mut_slice`. Reviewed-by: Alice Ryhl Reviewed-by: Benno Lossin Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250616-borrow_impls-v4-1-36f9beb3fe6a@nvidia.com Signed-off-by: Danilo Krummrich --- rust/kernel/alloc/kvec.rs | 53 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs index 606616fc0e59..cb543a61a33c 100644 --- a/rust/kernel/alloc/kvec.rs +++ b/rust/kernel/alloc/kvec.rs @@ -8,6 +8,7 @@ AllocError, Allocator, Box, Flags, }; use core::{ + borrow::{Borrow, BorrowMut}, fmt, marker::PhantomData, mem::{ManuallyDrop, MaybeUninit}, @@ -890,6 +891,58 @@ fn deref_mut(&mut self) -> &mut [T] { } } +/// # Examples +/// +/// ``` +/// # use core::borrow::Borrow; +/// struct Foo>(B); +/// +/// // Owned array. +/// let owned_array = Foo([1, 2, 3]); +/// +/// // Owned vector. +/// let owned_vec = Foo(KVec::from_elem(0, 3, GFP_KERNEL)?); +/// +/// let arr = [1, 2, 3]; +/// // Borrowed slice from `arr`. +/// let borrowed_slice = Foo(&arr[..]); +/// # Ok::<(), Error>(()) +/// ``` +impl Borrow<[T]> for Vec +where + A: Allocator, +{ + fn borrow(&self) -> &[T] { + self.as_slice() + } +} + +/// # Examples +/// +/// ``` +/// # use core::borrow::BorrowMut; +/// struct Foo>(B); +/// +/// // Owned array. +/// let owned_array = Foo([1, 2, 3]); +/// +/// // Owned vector. +/// let owned_vec = Foo(KVec::from_elem(0, 3, GFP_KERNEL)?); +/// +/// let mut arr = [1, 2, 3]; +/// // Borrowed slice from `arr`. +/// let borrowed_slice = Foo(&mut arr[..]); +/// # Ok::<(), Error>(()) +/// ``` +impl BorrowMut<[T]> for Vec +where + A: Allocator, +{ + fn borrow_mut(&mut self) -> &mut [T] { + self.as_mut_slice() + } +} + impl Eq for Vec where A: Allocator {} impl, A> Index for Vec From f86c0036c7de5fc379115809c653dfd57c453330 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 16 Jun 2025 12:34:07 +0900 Subject: [PATCH 0237/2411] rust: alloc: implement `Borrow` and `BorrowMut` for `KBox` Implement `Borrow` and `BorrowMut` for `KBox`. This allows `KBox` to be used in generic APIs asking for types implementing those traits. `T` and `&mut T` also implement those traits allowing users to use either owned, borrowed and heap-owned values. Reviewed-by: Alice Ryhl Reviewed-by: Benno Lossin Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250616-borrow_impls-v4-3-36f9beb3fe6a@nvidia.com Signed-off-by: Danilo Krummrich --- rust/kernel/alloc/kbox.rs | 57 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/rust/kernel/alloc/kbox.rs b/rust/kernel/alloc/kbox.rs index c386ff771d50..ccf1df7da96c 100644 --- a/rust/kernel/alloc/kbox.rs +++ b/rust/kernel/alloc/kbox.rs @@ -6,6 +6,7 @@ use super::allocator::{KVmalloc, Kmalloc, Vmalloc}; use super::{AllocError, Allocator, Flags}; use core::alloc::Layout; +use core::borrow::{Borrow, BorrowMut}; use core::fmt; use core::marker::PhantomData; use core::mem::ManuallyDrop; @@ -499,6 +500,62 @@ fn deref_mut(&mut self) -> &mut T { } } +/// # Examples +/// +/// ``` +/// # use core::borrow::Borrow; +/// # use kernel::alloc::KBox; +/// struct Foo>(B); +/// +/// // Owned instance. +/// let owned = Foo(1); +/// +/// // Owned instance using `KBox`. +/// let owned_kbox = Foo(KBox::new(1, GFP_KERNEL)?); +/// +/// let i = 1; +/// // Borrowed from `i`. +/// let borrowed = Foo(&i); +/// # Ok::<(), Error>(()) +/// ``` +impl Borrow for Box +where + T: ?Sized, + A: Allocator, +{ + fn borrow(&self) -> &T { + self.deref() + } +} + +/// # Examples +/// +/// ``` +/// # use core::borrow::BorrowMut; +/// # use kernel::alloc::KBox; +/// struct Foo>(B); +/// +/// // Owned instance. +/// let owned = Foo(1); +/// +/// // Owned instance using `KBox`. +/// let owned_kbox = Foo(KBox::new(1, GFP_KERNEL)?); +/// +/// let mut i = 1; +/// // Borrowed from `i`. +/// let borrowed = Foo(&mut i); +/// # Ok::<(), Error>(()) +/// ``` +impl BorrowMut for Box +where + T: ?Sized, + A: Allocator, +{ + fn borrow_mut(&mut self) -> &mut T { + self.deref_mut() + } +} + impl fmt::Display for Box where T: ?Sized + fmt::Display, From f28413fe0899591492d8ca3cdf5fd35558d9c05d Mon Sep 17 00:00:00 2001 From: Hans Zhang <18255117159@163.com> Date: Sat, 7 Jun 2025 23:49:12 +0800 Subject: [PATCH 0238/2411] PCI: cadence: Replace private message routing enums with PCI core definitions The Cadence driver previously defined its own message routing enums (e.g., MSG_ROUTING_LOCAL) and message codes, which duplicated existing PCI core macros (PCIE_MSG_TYPE_R_LOCAL, PCIE_MSG_CODE_ASSERT_INTA, etc.) in drivers/pci/pci.h. These core definitions align with the PCIe r6.0 spec. Remove the driver-specific enums and switch to the centralized PCI core macros. This eliminates redundancy, ensures consistency, and simplifies future updates. No functional changes are introduced. Signed-off-by: Hans Zhang <18255117159@163.com> Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250607154913.805027-2-18255117159@163.com --- .../pci/controller/cadence/pcie-cadence-ep.c | 2 +- drivers/pci/controller/cadence/pcie-cadence.h | 20 ------------------- 2 files changed, 1 insertion(+), 21 deletions(-) diff --git a/drivers/pci/controller/cadence/pcie-cadence-ep.c b/drivers/pci/controller/cadence/pcie-cadence-ep.c index 8ab6cf70c18e..77c5a19b2ab1 100644 --- a/drivers/pci/controller/cadence/pcie-cadence-ep.c +++ b/drivers/pci/controller/cadence/pcie-cadence-ep.c @@ -353,7 +353,7 @@ static void cdns_pcie_ep_assert_intx(struct cdns_pcie_ep *ep, u8 fn, u8 intx, } spin_unlock_irqrestore(&ep->lock, flags); - offset = CDNS_PCIE_NORMAL_MSG_ROUTING(MSG_ROUTING_LOCAL) | + offset = CDNS_PCIE_NORMAL_MSG_ROUTING(PCIE_MSG_TYPE_R_LOCAL) | CDNS_PCIE_NORMAL_MSG_CODE(msg_code); writel(0, ep->irq_cpu_addr + offset); } diff --git a/drivers/pci/controller/cadence/pcie-cadence.h b/drivers/pci/controller/cadence/pcie-cadence.h index a149845d341a..1d81c4bf6c6d 100644 --- a/drivers/pci/controller/cadence/pcie-cadence.h +++ b/drivers/pci/controller/cadence/pcie-cadence.h @@ -250,26 +250,6 @@ struct cdns_pcie_rp_ib_bar { struct cdns_pcie; -enum cdns_pcie_msg_routing { - /* Route to Root Complex */ - MSG_ROUTING_TO_RC, - - /* Use Address Routing */ - MSG_ROUTING_BY_ADDR, - - /* Use ID Routing */ - MSG_ROUTING_BY_ID, - - /* Route as Broadcast Message from Root Complex */ - MSG_ROUTING_BCAST, - - /* Local message; terminate at receiver (INTx messages) */ - MSG_ROUTING_LOCAL, - - /* Gather & route to Root Complex (PME_TO_Ack message) */ - MSG_ROUTING_GATHER, -}; - struct cdns_pcie_ops { int (*start_link)(struct cdns_pcie *pcie); void (*stop_link)(struct cdns_pcie *pcie); From 1a69c63fdf1c9095e132096081e27ac85a4d48a5 Mon Sep 17 00:00:00 2001 From: Hans Zhang <18255117159@163.com> Date: Sat, 7 Jun 2025 23:49:13 +0800 Subject: [PATCH 0239/2411] PCI: rockchip: Remove redundant PCIe message routing definitions The Rockchip driver contains duplicated message routing and INTx code definitions (e.g., ROCKCHIP_PCIE_MSG_ROUTING_TO_RC, ROCKCHIP_PCIE_MSG_CODE_ASSERT_INTA). These are already provided by the PCI core in drivers/pci/pci.h as PCIE_MSG_TYPE_R_RC and PCIE_MSG_CODE_ASSERT_INTA, respectively. Remove the driver-specific definitions in favor of the core definitions instead. This aligns the driver with the PCIe specification and reduces maintenance overhead. Signed-off-by: Hans Zhang <18255117159@163.com> [mani: commit message rewording] Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250607154913.805027-3-18255117159@163.com --- drivers/pci/controller/pcie-rockchip.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index 5864a20323f2..12bc8da59d73 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -215,20 +215,6 @@ #define RC_REGION_0_TYPE_MASK GENMASK(3, 0) #define MAX_AXI_WRAPPER_REGION_NUM 33 -#define ROCKCHIP_PCIE_MSG_ROUTING_TO_RC 0x0 -#define ROCKCHIP_PCIE_MSG_ROUTING_VIA_ADDR 0x1 -#define ROCKCHIP_PCIE_MSG_ROUTING_VIA_ID 0x2 -#define ROCKCHIP_PCIE_MSG_ROUTING_BROADCAST 0x3 -#define ROCKCHIP_PCIE_MSG_ROUTING_LOCAL_INTX 0x4 -#define ROCKCHIP_PCIE_MSG_ROUTING_PME_ACK 0x5 -#define ROCKCHIP_PCIE_MSG_CODE_ASSERT_INTA 0x20 -#define ROCKCHIP_PCIE_MSG_CODE_ASSERT_INTB 0x21 -#define ROCKCHIP_PCIE_MSG_CODE_ASSERT_INTC 0x22 -#define ROCKCHIP_PCIE_MSG_CODE_ASSERT_INTD 0x23 -#define ROCKCHIP_PCIE_MSG_CODE_DEASSERT_INTA 0x24 -#define ROCKCHIP_PCIE_MSG_CODE_DEASSERT_INTB 0x25 -#define ROCKCHIP_PCIE_MSG_CODE_DEASSERT_INTC 0x26 -#define ROCKCHIP_PCIE_MSG_CODE_DEASSERT_INTD 0x27 #define ROCKCHIP_PCIE_MSG_ROUTING_MASK GENMASK(7, 5) #define ROCKCHIP_PCIE_MSG_ROUTING(route) \ (((route) << 5) & ROCKCHIP_PCIE_MSG_ROUTING_MASK) From c0b93754547dde16c8370b8fdad5f396e7786647 Mon Sep 17 00:00:00 2001 From: Wilfred Mallawa Date: Fri, 13 Jun 2025 12:19:09 +0200 Subject: [PATCH 0240/2411] PCI: dw-rockchip: Delay link training after hot reset in EP mode RK3588 TRM, section "11.6.1.3.3 Hot Reset and Link-Down Reset" states that: If you want to delay link re-establishment (after reset) so that you can reprogram some registers through DBI, you must set app_ltssm_enable =0 immediately after core_rst_n as shown in above. This can be achieved by enable the app_dly2_en, and end-up the delay by assert app_dly2_done. I.e. setting app_dly2_en will automatically deassert app_ltssm_enable on a hot reset, and setting app_dly2_done will re-assert app_ltssm_enable, re-enabling link training. When receiving a hot reset/link-down IRQ when running in EP mode, we will call dw_pcie_ep_linkdown(), which may update registers through DBI. Unless link training is inhibited, these register updates race with the link training. To avoid the race, set PCIE_LTSSM_APP_DLY2_EN so the controller never automatically trains the link after a link-down or hot reset interrupt. That way any DBI updates done in the dw_pcie_ep_linkdown() path will happen while the link is still down. Then allow link training by setting PCIE_LTSSM_APP_DLY2_DONE Co-developed-by: Niklas Cassel Signed-off-by: Wilfred Mallawa Signed-off-by: Niklas Cassel Signed-off-by: Manivannan Sadhasivam [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250613101908.2182053-2-cassel@kernel.org --- drivers/pci/controller/dwc/pcie-dw-rockchip.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-dw-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-rockchip.c index 93171a392879..cd1e9352b21f 100644 --- a/drivers/pci/controller/dwc/pcie-dw-rockchip.c +++ b/drivers/pci/controller/dwc/pcie-dw-rockchip.c @@ -58,6 +58,8 @@ /* Hot Reset Control Register */ #define PCIE_CLIENT_HOT_RESET_CTRL 0x180 +#define PCIE_LTSSM_APP_DLY2_EN BIT(1) +#define PCIE_LTSSM_APP_DLY2_DONE BIT(3) #define PCIE_LTSSM_ENABLE_ENHANCE BIT(4) /* LTSSM Status Register */ @@ -474,7 +476,7 @@ static irqreturn_t rockchip_pcie_ep_sys_irq_thread(int irq, void *arg) struct rockchip_pcie *rockchip = arg; struct dw_pcie *pci = &rockchip->pci; struct device *dev = pci->dev; - u32 reg; + u32 reg, val; reg = rockchip_pcie_readl_apb(rockchip, PCIE_CLIENT_INTR_STATUS_MISC); rockchip_pcie_writel_apb(rockchip, reg, PCIE_CLIENT_INTR_STATUS_MISC); @@ -485,6 +487,10 @@ static irqreturn_t rockchip_pcie_ep_sys_irq_thread(int irq, void *arg) if (reg & PCIE_LINK_REQ_RST_NOT_INT) { dev_dbg(dev, "hot reset or link-down reset\n"); dw_pcie_ep_linkdown(&pci->ep); + /* Stop delaying link training. */ + val = HIWORD_UPDATE_BIT(PCIE_LTSSM_APP_DLY2_DONE); + rockchip_pcie_writel_apb(rockchip, val, + PCIE_CLIENT_HOT_RESET_CTRL); } if (reg & PCIE_RDLH_LINK_UP_CHGED) { @@ -566,8 +572,11 @@ static int rockchip_pcie_configure_ep(struct platform_device *pdev, return ret; } - /* LTSSM enable control mode */ - val = HIWORD_UPDATE_BIT(PCIE_LTSSM_ENABLE_ENHANCE); + /* + * LTSSM enable control mode, and automatically delay link training on + * hot reset/link-down reset. + */ + val = HIWORD_UPDATE_BIT(PCIE_LTSSM_ENABLE_ENHANCE | PCIE_LTSSM_APP_DLY2_EN); rockchip_pcie_writel_apb(rockchip, val, PCIE_CLIENT_HOT_RESET_CTRL); rockchip_pcie_writel_apb(rockchip, PCIE_CLIENT_EP_MODE, From 52161035571cd62be9865039b4be65615860dce0 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 16 Jun 2025 14:27:50 +0100 Subject: [PATCH 0241/2411] pinctrl: renesas: rzg2l: Validate pins before setting mux function Ensure only valid pins are configured by validating pin mappings before setting the mux function. Rename rzg2l_validate_gpio_pin() to rzg2l_validate_pin() to reflect its broader purpose validating both GPIO pins and muxed pins. This helps avoid invalid configurations. Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20250616132750.216368-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- drivers/pinctrl/renesas/pinctrl-rzg2l.c | 45 ++++++++++++++----------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c index 59c32a0d87f1..2a10ae0bf5bd 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c +++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c @@ -493,6 +493,23 @@ static void rzv2h_pmc_writeb(struct rzg2l_pinctrl *pctrl, u8 val, u16 offset) writeb(pwpr & ~PWPR_REGWE_A, pctrl->base + regs->pwpr); } +static int rzg2l_validate_pin(struct rzg2l_pinctrl *pctrl, + u64 cfg, u32 port, u8 bit) +{ + u8 pinmap = FIELD_GET(PIN_CFG_PIN_MAP_MASK, cfg); + u32 off = RZG2L_PIN_CFG_TO_PORT_OFFSET(cfg); + u64 data; + + if (!(pinmap & BIT(bit)) || port >= pctrl->data->n_port_pins) + return -EINVAL; + + data = pctrl->data->port_pin_configs[port]; + if (off != RZG2L_PIN_CFG_TO_PORT_OFFSET(data)) + return -EINVAL; + + return 0; +} + static void rzg2l_pinctrl_set_pfc_mode(struct rzg2l_pinctrl *pctrl, u8 pin, u8 off, u8 func) { @@ -536,6 +553,7 @@ static int rzg2l_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned int i, *psel_val; struct group_desc *group; const unsigned int *pins; + int ret; func = pinmux_generic_get_function(pctldev, func_selector); if (!func) @@ -552,6 +570,10 @@ static int rzg2l_pinctrl_set_mux(struct pinctrl_dev *pctldev, u32 off = RZG2L_PIN_CFG_TO_PORT_OFFSET(*pin_data); u32 pin = RZG2L_PIN_ID_TO_PIN(pins[i]); + ret = rzg2l_validate_pin(pctrl, *pin_data, RZG2L_PIN_ID_TO_PORT(pins[i]), pin); + if (ret) + return ret; + dev_dbg(pctrl->dev, "port:%u pin: %u off:%x PSEL:%u\n", RZG2L_PIN_ID_TO_PORT(pins[i]), pin, off, psel_val[i] - hwcfg->func_base); @@ -806,23 +828,6 @@ static int rzg2l_dt_node_to_map(struct pinctrl_dev *pctldev, return ret; } -static int rzg2l_validate_gpio_pin(struct rzg2l_pinctrl *pctrl, - u64 cfg, u32 port, u8 bit) -{ - u8 pinmap = FIELD_GET(PIN_CFG_PIN_MAP_MASK, cfg); - u32 off = RZG2L_PIN_CFG_TO_PORT_OFFSET(cfg); - u64 data; - - if (!(pinmap & BIT(bit)) || port >= pctrl->data->n_port_pins) - return -EINVAL; - - data = pctrl->data->port_pin_configs[port]; - if (off != RZG2L_PIN_CFG_TO_PORT_OFFSET(data)) - return -EINVAL; - - return 0; -} - static u32 rzg2l_read_pin_config(struct rzg2l_pinctrl *pctrl, u32 offset, u8 bit, u32 mask) { @@ -1287,7 +1292,7 @@ static int rzg2l_pinctrl_pinconf_get(struct pinctrl_dev *pctldev, } else { bit = RZG2L_PIN_ID_TO_PIN(_pin); - if (rzg2l_validate_gpio_pin(pctrl, *pin_data, RZG2L_PIN_ID_TO_PORT(_pin), bit)) + if (rzg2l_validate_pin(pctrl, *pin_data, RZG2L_PIN_ID_TO_PORT(_pin), bit)) return -EINVAL; } @@ -1447,7 +1452,7 @@ static int rzg2l_pinctrl_pinconf_set(struct pinctrl_dev *pctldev, } else { bit = RZG2L_PIN_ID_TO_PIN(_pin); - if (rzg2l_validate_gpio_pin(pctrl, *pin_data, RZG2L_PIN_ID_TO_PORT(_pin), bit)) + if (rzg2l_validate_pin(pctrl, *pin_data, RZG2L_PIN_ID_TO_PORT(_pin), bit)) return -EINVAL; } @@ -1687,7 +1692,7 @@ static int rzg2l_gpio_request(struct gpio_chip *chip, unsigned int offset) u8 reg8; int ret; - ret = rzg2l_validate_gpio_pin(pctrl, *pin_data, port, bit); + ret = rzg2l_validate_pin(pctrl, *pin_data, port, bit); if (ret) return ret; From e54dd5059d46e44606395cb6ab15f022dc5a5902 Mon Sep 17 00:00:00 2001 From: Pengyu Luo Date: Tue, 17 Jun 2025 17:00:31 +0800 Subject: [PATCH 0242/2411] dt-bindings: dma: qcom,gpi: Document the sc8280xp GPI DMA engine Document the GPI DMA engine on the sc8280xp platform. Signed-off-by: Pengyu Luo Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250617090032.1487382-2-mitltlatltl@gmail.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/qcom,gpi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml index 7052468b15c8..19764452d2cf 100644 --- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml @@ -30,6 +30,7 @@ properties: - qcom,sa8775p-gpi-dma - qcom,sar2130p-gpi-dma - qcom,sc7280-gpi-dma + - qcom,sc8280xp-gpi-dma - qcom,sdx75-gpi-dma - qcom,sm6115-gpi-dma - qcom,sm6375-gpi-dma From ce3d5af2a92bd6cd775ce819f5e83857e8a277fb Mon Sep 17 00:00:00 2001 From: "Kotaro, Tokai" Date: Wed, 18 Jun 2025 15:35:42 +0900 Subject: [PATCH 0243/2411] perf vendor events arm64: Update FUJITSU-MONAKA pmu event Update pmu events for FUJITSU-MONAKA. And, also updated common-and-microarch.json. FUJITSU-MONAKA PMU Events Specification v1.1 and Errata v1.0 URL: https://github.com/fujitsu/FUJITSU-MONAKA Arm Architecture Reference Version L.b URL: https://developer.arm.com/documentation/ddi0487/lb/?lang=en Signed-off-by: Kotaro, Tokai Reviewed-by: James Clark Link: https://lore.kernel.org/r/20250618063618.1244363-1-fj0635gf@aa.jp.fujitsu.com Signed-off-by: Namhyung Kim --- .../arch/arm64/common-and-microarch.json | 70 +++++++++++++ .../arm64/fujitsu/monaka/core-imp-def.json | 2 +- .../fujitsu/monaka/cycle_accounting.json | 4 +- .../arch/arm64/fujitsu/monaka/exception.json | 2 +- .../arm64/fujitsu/monaka/fp_operation.json | 98 +++++++++++++++---- .../arch/arm64/fujitsu/monaka/l1d_cache.json | 10 +- .../arch/arm64/fujitsu/monaka/l1i_cache.json | 8 +- .../arch/arm64/fujitsu/monaka/l2_cache.json | 28 +++--- .../arch/arm64/fujitsu/monaka/l3_cache.json | 63 ++++++------ .../arch/arm64/fujitsu/monaka/ll_cache.json | 2 +- .../arch/arm64/fujitsu/monaka/pipeline.json | 6 +- .../arm64/fujitsu/monaka/spec_operation.json | 12 +-- .../arch/arm64/fujitsu/monaka/stall.json | 4 +- .../arch/arm64/fujitsu/monaka/sve.json | 44 ++++----- .../arch/arm64/fujitsu/monaka/tlb.json | 56 +++++------ 15 files changed, 265 insertions(+), 144 deletions(-) diff --git a/tools/perf/pmu-events/arch/arm64/common-and-microarch.json b/tools/perf/pmu-events/arch/arm64/common-and-microarch.json index e40be37addf8..2416d9f8a83d 100644 --- a/tools/perf/pmu-events/arch/arm64/common-and-microarch.json +++ b/tools/perf/pmu-events/arch/arm64/common-and-microarch.json @@ -1833,5 +1833,75 @@ "EventCode": "0x8324", "EventName": "L1I_CACHE_REFILL_PERCYC", "BriefDescription": "Level 1 instruction or unified cache refills in progress." + }, + { + "EventCode": "0x8431", + "EventName": "ASE_FP_VREDUCE_SPEC", + "BriefDescription": "Floating-point operation_speculatively_executed, Advanced SIMD pairwise or reduction." + }, + { + "EventCode": "0x8432", + "EventName": "SVE_FP_PREDUCE_SPEC", + "BriefDescription": "Floating-point operation_speculatively_executed, Advanced SIMD pairwise add step or pairwise reduce step." + }, + { + "EventCode": "0x8443", + "EventName": "ASE_FP_BF16_MIN_SPEC", + "BriefDescription": "Advanced SIMD data processing operation speculatively_executed, smallest type is BFloat16 floating-point." + }, + { + "EventCode": "0x8444", + "EventName": "ASE_FP_FP8_MIN_SPEC", + "BriefDescription": "Advanced SIMD data processing operation speculatively_executed, smallest type is 8-bit floating-point." + }, + { + "EventCode": "0x844B", + "EventName": "ASE_SVE_FP_BF16_MIN_SPEC", + "BriefDescription": "Advanced SIMD data processing or SVE data processing operation speculatively_executed, smallest type is BFloat16 floating-point." + }, + { + "EventCode": "0x844C", + "EventName": "ASE_SVE_FP_FP8_MIN_SPEC", + "BriefDescription": "Advanced SIMD data processing or SVE data processing operation speculatively_executed, smallest type is 8-bit floating-point." + }, + { + "EventCode": "0x8463", + "EventName": "SVE_FP_BF16_MIN_SPEC", + "BriefDescription": "SVE data processing operation speculatively_executed, smallest type is BFloat16 floating-point." + }, + { + "EventCode": "0x8464", + "EventName": "SVE_FP_FP8_MIN_SPEC", + "BriefDescription": "SVE data processing operation speculatively_executed, smallest type is 8-bit floating-point." + }, + { + "EventCode": "0x8473", + "EventName": "FP_BF16_MIN_SPEC", + "BriefDescription": "Floating-point operation speculatively_executed, smallest type is BFloat16 floating-point." + }, + { + "EventCode": "0x8474", + "EventName": "FP_FP8_MIN_SPEC", + "BriefDescription": "Floating-point operation speculatively_executed, smallest type is 8-bit floating-point." + }, + { + "EventCode": "0x8483", + "EventName": "FP_BF16_FIXED_MIN_OPS_SPEC", + "BriefDescription": "Non-scalable element arithmetic operations speculatively executed, smallest type is BFloat16 floating-point." + }, + { + "EventCode": "0x8484", + "EventName": "FP_FP8_FIXED_MIN_OPS_SPEC", + "BriefDescription": "Non-scalable element arithmetic operations speculatively executed, smallest type is 8-bit floating-point." + }, + { + "EventCode": "0x848B", + "EventName": "FP_BF16_SCALE_MIN_OPS_SPEC", + "BriefDescription": "Scalable element arithmetic operations speculatively executed, smallest type is BFloat16 floating-point." + }, + { + "EventCode": "0x848C", + "EventName": "FP_FP8_SCALE_MIN_OPS_SPEC", + "BriefDescription": "Scalable element arithmetic operations speculatively executed, smallest type is 8-bit floating-point." } ] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/core-imp-def.json index 52f5ca1482fe..57a854ff5033 100644 --- a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/core-imp-def.json +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/core-imp-def.json @@ -1,6 +1,6 @@ [ { "ArchStdEvent": "L1I_CACHE_PRF", - "BriefDescription": "This event counts fetch counted by either Level 1 instruction hardware prefetch or Level 1 instruction software prefetch." + "BriefDescription": "This event counts L1I_CACHE caused by hardware prefetch or software prefetch." } ] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/cycle_accounting.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/cycle_accounting.json index 24ff5d8dbb98..84374adbb0f8 100644 --- a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/cycle_accounting.json +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/cycle_accounting.json @@ -12,12 +12,12 @@ { "EventCode": "0x0184", "EventName": "LD_COMP_WAIT", - "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L1D cache, L2 cache and memory access." + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L1D cache, L2 cache, L3 cache and memory access." }, { "EventCode": "0x0185", "EventName": "LD_COMP_WAIT_EX", - "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L1D cache, L2 cache and memory access." + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L1D cache, L2 cache, L3 cache and memory access." }, { "EventCode": "0x0186", diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/exception.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/exception.json index f231712fe261..fba66bbcfeb5 100644 --- a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/exception.json +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/exception.json @@ -33,7 +33,7 @@ }, { "ArchStdEvent": "EXC_SMC", - "BriefDescription": "This event counts only Secure Monitor Call exceptions. The counter does not increment on SMC instructions trapped as a Hyp Trap exception." + "BriefDescription": "This event counts only Secure Monitor Call exceptions. This event does not increment on SMC instructions trapped as a Hyp Trap exception." }, { "ArchStdEvent": "EXC_HVC", diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/fp_operation.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/fp_operation.json index a3c368959199..2ffdc16530dd 100644 --- a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/fp_operation.json +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/fp_operation.json @@ -2,7 +2,7 @@ { "EventCode": "0x0105", "EventName": "FP_MV_SPEC", - "BriefDescription": "This event counts architecturally executed floating-point move operations." + "BriefDescription": "This event counts architecturally executed floating-point move operation." }, { "EventCode": "0x0112", @@ -24,7 +24,7 @@ }, { "ArchStdEvent": "ASE_SVE_FP_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE floating-point operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD or SVE floating-point operation." }, { "ArchStdEvent": "FP_HP_SPEC", @@ -40,7 +40,7 @@ }, { "ArchStdEvent": "ASE_SVE_FP_HP_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE half-precision floating-point operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD or SVE half-precision floating-point operation." }, { "ArchStdEvent": "FP_SP_SPEC", @@ -56,7 +56,7 @@ }, { "ArchStdEvent": "ASE_SVE_FP_SP_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE single-precision floating-point operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD or SVE single-precision floating-point operation." }, { "ArchStdEvent": "FP_DP_SPEC", @@ -72,7 +72,7 @@ }, { "ArchStdEvent": "ASE_SVE_FP_DP_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE double-precision floating-point operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD or SVE double-precision floating-point operation." }, { "ArchStdEvent": "FP_DIV_SPEC", @@ -88,7 +88,7 @@ }, { "ArchStdEvent": "ASE_SVE_FP_DIV_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE floating-point divide operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD or SVE floating-point divide operation." }, { "ArchStdEvent": "FP_SQRT_SPEC", @@ -104,7 +104,7 @@ }, { "ArchStdEvent": "ASE_SVE_FP_SQRT_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE floating-point square root operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD or SVE floating-point square root operation." }, { "ArchStdEvent": "ASE_FP_FMA_SPEC", @@ -116,11 +116,11 @@ }, { "ArchStdEvent": "ASE_SVE_FP_FMA_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE floating-point FMA operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD or SVE floating-point FMA operation." }, { "ArchStdEvent": "FP_MUL_SPEC", - "BriefDescription": "This event counts architecturally executed floating-point multiply operations." + "BriefDescription": "This event counts architecturally executed floating-point multiply operation." }, { "ArchStdEvent": "ASE_FP_MUL_SPEC", @@ -132,11 +132,11 @@ }, { "ArchStdEvent": "ASE_SVE_FP_MUL_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE floating-point multiply operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD or SVE floating-point multiply operation." }, { "ArchStdEvent": "FP_ADDSUB_SPEC", - "BriefDescription": "This event counts architecturally executed floating-point add or subtract operations." + "BriefDescription": "This event counts architecturally executed floating-point add or subtract operation." }, { "ArchStdEvent": "ASE_FP_ADDSUB_SPEC", @@ -148,19 +148,19 @@ }, { "ArchStdEvent": "ASE_SVE_FP_ADDSUB_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE floating-point add or subtract operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD or SVE floating-point add or subtract operation." }, { "ArchStdEvent": "ASE_FP_RECPE_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD floating-point reciprocal estimate operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD floating-point reciprocal estimate operation." }, { "ArchStdEvent": "SVE_FP_RECPE_SPEC", - "BriefDescription": "This event counts architecturally executed SVE floating-point reciprocal estimate operations." + "BriefDescription": "This event counts architecturally executed SVE floating-point reciprocal estimate operation." }, { "ArchStdEvent": "ASE_SVE_FP_RECPE_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE floating-point reciprocal estimate operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD or SVE floating-point reciprocal estimate operation." }, { "ArchStdEvent": "ASE_FP_CVT_SPEC", @@ -172,15 +172,15 @@ }, { "ArchStdEvent": "ASE_SVE_FP_CVT_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE floating-point convert operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD or SVE floating-point convert operation." }, { "ArchStdEvent": "SVE_FP_AREDUCE_SPEC", - "BriefDescription": "This event counts architecturally executed SVE floating-point accumulating reduction operations." + "BriefDescription": "This event counts architecturally executed SVE floating-point accumulating reduction operation." }, { "ArchStdEvent": "ASE_FP_PREDUCE_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD floating-point pairwise add step operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD floating-point pairwise add step operation." }, { "ArchStdEvent": "SVE_FP_VREDUCE_SPEC", @@ -188,15 +188,15 @@ }, { "ArchStdEvent": "ASE_SVE_FP_VREDUCE_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE floating-point vector reduction operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD or SVE floating-point vector reduction operation." }, { "ArchStdEvent": "FP_SCALE_OPS_SPEC", - "BriefDescription": "This event counts architecturally executed SVE arithmetic operations. See FP_SCALE_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by (128 / CSIZE) and by twice that amount for operations that would also be counted by SVE_FP_FMA_SPEC." + "BriefDescription": "This event counts architecturally executed SVE arithmetic operation. See FP_SCALE_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by (128 / CSIZE) and by twice that amount for operations that would also be counted by SVE_FP_FMA_SPEC." }, { "ArchStdEvent": "FP_FIXED_OPS_SPEC", - "BriefDescription": "This event counts architecturally executed v8SIMD&FP arithmetic operations. See FP_FIXED_OPS_SPEC of ARMv9 Reference Manual for more information. The event counter is incremented by the specified number of elements for Advanced SIMD operations or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC." + "BriefDescription": "This event counts architecturally executed v8SIMD&FP arithmetic operation. See FP_FIXED_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by the specified number of elements for Advanced SIMD operations or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC." }, { "ArchStdEvent": "ASE_SVE_FP_DOT_SPEC", @@ -205,5 +205,61 @@ { "ArchStdEvent": "ASE_SVE_FP_MMLA_SPEC", "BriefDescription": "This event counts architecturally executed microarchitectural Advanced SIMD or SVE floating-point matrix multiply operation." + }, + { + "ArchStdEvent": "ASE_FP_VREDUCE_SPEC", + "BriefDescription": "This event counts architecturally executed Advanced SIMD floating-point vector reduction operation." + }, + { + "ArchStdEvent": "SVE_FP_PREDUCE_SPEC", + "BriefDescription": "This event counts architecturally executed SVE floating-point pairwise add step operation." + }, + { + "ArchStdEvent": "ASE_FP_BF16_MIN_SPEC", + "BriefDescription": "This event counts architecturally executed Advanced SIMD data processing operations, smallest type is BFloat16 floating-point." + }, + { + "ArchStdEvent": "ASE_FP_FP8_MIN_SPEC", + "BriefDescription": "This event counts architecturally executed Advanced SIMD data processing operations, smallest type is 8-bit floating-point." + }, + { + "ArchStdEvent": "ASE_SVE_FP_BF16_MIN_SPEC", + "BriefDescription": "This event counts architecturally executed Advanced SIMD data processing or SVE data processing operations, smallest type is BFloat16 floating-point." + }, + { + "ArchStdEvent": "ASE_SVE_FP_FP8_MIN_SPEC", + "BriefDescription": "This event counts architecturally executed Advanced SIMD data processing or SVE data processing operations, smallest type is 8-bit floating-point." + }, + { + "ArchStdEvent": "SVE_FP_BF16_MIN_SPEC", + "BriefDescription": "This event counts architecturally executed SVE data processing operations, smallest type is BFloat16 floating-point." + }, + { + "ArchStdEvent": "SVE_FP_FP8_MIN_SPEC", + "BriefDescription": "This event counts architecturally executed SVE data processing operations, smallest type is 8-bit floating-point." + }, + { + "ArchStdEvent": "FP_BF16_MIN_SPEC", + "BriefDescription": "This event counts architecturally executed data processing operations, smallest type is BFloat16 floating-point." + }, + { + "ArchStdEvent": "FP_FP8_MIN_SPEC", + "BriefDescription": "This event counts architecturally executed data processing operations, smallest type is 8-bit floating-point." + }, + { + "ArchStdEvent": "FP_BF16_FIXED_MIN_OPS_SPEC", + "BriefDescription": "This event counts architecturally executed non-scalable element arithmetic operations, smallest type is BFloat16 floating-point." + }, + { + "ArchStdEvent": "FP_FP8_FIXED_MIN_OPS_SPEC", + "BriefDescription": "This event counts architecturally executed non-scalable element arithmetic operations, smallest type is 8-bit floating-point." + }, + { + "ArchStdEvent": "FP_BF16_SCALE_MIN_OPS_SPEC", + "BriefDescription": "This event counts architecturally executed scalable element arithmetic operations, smallest type is BFloat16 floating-point." + }, + { + "ArchStdEvent": "FP_FP8_SCALE_MIN_OPS_SPEC", + "BriefDescription": "This event counts architecturally executed scalable element arithmetic operations, smallest type is 8-bit floating-point." } ] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l1d_cache.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l1d_cache.json index b0818a2fedb0..a2ff3b49ac0d 100644 --- a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l1d_cache.json +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l1d_cache.json @@ -72,11 +72,11 @@ }, { "ArchStdEvent": "L1D_CACHE_HWPRF", - "BriefDescription": "This event counts access counted by L1D_CACHE that is due to a hardware prefetch." + "BriefDescription": "This event counts L1D_CACHE caused by hardware prefetch." }, { "ArchStdEvent": "L1D_CACHE_REFILL_HWPRF", - "BriefDescription": "This event counts hardware prefetch counted by L1D_CACHE_HWPRF that causes a refill of the Level 1 data cache from outside of the Level 1 data cache." + "BriefDescription": "This event counts L1D_CACHE_REFILL caused by hardware prefetch." }, { "ArchStdEvent": "L1D_CACHE_HIT_RD", @@ -100,14 +100,14 @@ }, { "ArchStdEvent": "L1D_CACHE_PRF", - "BriefDescription": "This event counts fetch counted by either Level 1 data hardware prefetch or Level 1 data software prefetch." + "BriefDescription": "This event counts L1D_CACHE caused by hardware prefetch or software prefetch." }, { "ArchStdEvent": "L1D_CACHE_REFILL_PRF", - "BriefDescription": "This event counts hardware prefetch counted by L1D_CACHE_PRF that causes a refill of the Level 1 data cache from outside of the Level 1 data cache." + "BriefDescription": "This event counts L1D_CACHE_REFILL caused by hardware prefetch or software prefetch." }, { "ArchStdEvent": "L1D_CACHE_REFILL_PERCYC", - "BriefDescription": "The counter counts by the number of cache refills counted by L1D_CACHE_REFILL in progress on each Processor cycle." + "BriefDescription": "This counter counts by the number of cache refills counted by L1D_CACHE_REFILL in progress on each Processor cycle." } ] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l1i_cache.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l1i_cache.json index 8680d8ec461d..5250af8631c0 100644 --- a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l1i_cache.json +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l1i_cache.json @@ -23,11 +23,11 @@ }, { "ArchStdEvent": "L1I_CACHE_HWPRF", - "BriefDescription": "This event counts access counted by L1I_CACHE that is due to a hardware prefetch." + "BriefDescription": "This event counts L1I_CACHE caused by hardware prefetch." }, { "ArchStdEvent": "L1I_CACHE_REFILL_HWPRF", - "BriefDescription": "This event counts hardware prefetch counted by L1I_CACHE_HWPRF that causes a refill of the Level 1 instruction cache from outside of the Level 1 instruction cache." + "BriefDescription": "This event counts L1I_CACHE_REFILL caused by hardware prefetch." }, { "ArchStdEvent": "L1I_CACHE_HIT_RD", @@ -43,10 +43,10 @@ }, { "ArchStdEvent": "L1I_CACHE_REFILL_PRF", - "BriefDescription": "This event counts hardware prefetch counted by L1I_CACHE_PRF that causes a refill of the Level 1 instruction cache from outside of the Level 1 instruction cache." + "BriefDescription": "This event counts L1I_CACHE_REFILL caused by hardware prefetch or software prefetch." }, { "ArchStdEvent": "L1I_CACHE_REFILL_PERCYC", - "BriefDescription": "The counter counts by the number of cache refills counted by L1I_CACHE_REFILL in progress on each Processor cycle." + "BriefDescription": "This counter counts by the number of cache refills counted by L1I_CACHE_REFILL in progress on each Processor cycle." } ] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l2_cache.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l2_cache.json index 9e092752e6db..67f9151d7685 100644 --- a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l2_cache.json +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l2_cache.json @@ -21,19 +21,19 @@ }, { "ArchStdEvent": "L2D_CACHE_RD", - "BriefDescription": "This event counts L2D CACHE caused by read access." + "BriefDescription": "This event counts L2D_CACHE caused by read access." }, { "ArchStdEvent": "L2D_CACHE_WR", - "BriefDescription": "This event counts L2D CACHE caused by write access." + "BriefDescription": "This event counts L2D_CACHE caused by write access." }, { "ArchStdEvent": "L2D_CACHE_REFILL_RD", - "BriefDescription": "This event counts L2D CACHE_REFILL caused by read access." + "BriefDescription": "This event counts L2D_CACHE_REFILL caused by read access." }, { "ArchStdEvent": "L2D_CACHE_REFILL_WR", - "BriefDescription": "This event counts L2D CACHE_REFILL caused by write access." + "BriefDescription": "This event counts L2D_CACHE_REFILL caused by write access." }, { "ArchStdEvent": "L2D_CACHE_WB_VICTIM", @@ -57,7 +57,7 @@ { "EventCode": "0x0305", "EventName": "L2D_CACHE_HWPRF_ADJACENT", - "BriefDescription": "This event counts L2D_CACHE caused by hardware adjacent prefetch access." + "BriefDescription": "This event counts L2D_CACHE caused by hardware adjacent prefetch." }, { "EventCode": "0x0308", @@ -111,7 +111,7 @@ }, { "ArchStdEvent": "L2D_CACHE_LMISS_RD", - "BriefDescription": "This event counts operations that cause a refill of the L2D cache that incurs additional latency." + "BriefDescription": "This event counts operations that cause a refill of the L2 cache that incurs additional latency." }, { "ArchStdEvent": "L2D_CACHE_MISS", @@ -119,23 +119,23 @@ }, { "ArchStdEvent": "L2D_CACHE_HWPRF", - "BriefDescription": "This event counts access counted by L2D_CACHE that is due to a hardware prefetch." + "BriefDescription": "This event counts L2D_CACHE caused by hardware prefetch." }, { "ArchStdEvent": "L2D_CACHE_REFILL_HWPRF", - "BriefDescription": "This event counts hardware prefetch counted by L2D_CACHE_HWPRF that causes a refill of the Level 2 cache, or any Level 1 data and instruction cache of this PE, from outside of those caches." + "BriefDescription": "This event counts L2D_CACHE_REFILL caused by hardware prefetch." }, { "ArchStdEvent": "L2D_CACHE_HIT_RD", - "BriefDescription": "This event counts demand read counted by L2D_CACHE_RD that hits in the Level 2 data cache." + "BriefDescription": "This event counts demand read counted by L2D_CACHE_RD that hits in the Level 2 cache." }, { "ArchStdEvent": "L2D_CACHE_HIT_WR", - "BriefDescription": "This event counts demand write counted by L2D_CACHE_WR that hits in the Level 2 data cache." + "BriefDescription": "This event counts demand write counted by L2D_CACHE_WR that hits in the Level 2 cache." }, { "ArchStdEvent": "L2D_CACHE_HIT", - "BriefDescription": "This event counts access counted by L2D_CACHE that hits in the Level 2 data cache." + "BriefDescription": "This event counts access counted by L2D_CACHE that hits in the Level 2 cache." }, { "ArchStdEvent": "L2D_LFB_HIT_RD", @@ -147,14 +147,14 @@ }, { "ArchStdEvent": "L2D_CACHE_PRF", - "BriefDescription": "This event counts fetch counted by either Level 2 data hardware prefetch or Level 2 data software prefetch." + "BriefDescription": "This event counts L2D_CACHE caused by hardware prefetch or software prefetch." }, { "ArchStdEvent": "L2D_CACHE_REFILL_PRF", - "BriefDescription": "This event counts hardware prefetch counted by L2D_CACHE_PRF that causes a refill of the Level 2 data cache from outside of the Level 1 data cache." + "BriefDescription": "This event counts L2D_CACHE_REFILL caused by hardware prefetch or software prefetch." }, { "ArchStdEvent": "L2D_CACHE_REFILL_PERCYC", - "BriefDescription": "The counter counts by the number of cache refills counted by L2D_CACHE_REFILL in progress on each Processor cycle." + "BriefDescription": "This counter counts by the number of cache refills counted by L2D_CACHE_REFILL in progress on each Processor cycle." } ] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l3_cache.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l3_cache.json index 3f3e0d22ac68..cf49c4d452b7 100644 --- a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l3_cache.json +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l3_cache.json @@ -30,17 +30,17 @@ { "EventCode": "0x0394", "EventName": "L2D_CACHE_REFILL_L3D_CACHE_PRF", - "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_CACHE caused by prefetch access." + "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_CACHE caused by hardware prefetch or software prefetch." }, { "EventCode": "0x0395", "EventName": "L2D_CACHE_REFILL_L3D_CACHE_HWPRF", - "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_CACHE caused by hardware prefetch access." + "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_CACHE caused by hardware prefetch." }, { "EventCode": "0x0396", "EventName": "L2D_CACHE_REFILL_L3D_MISS", - "BriefDescription": "This event counts operations that cause a miss of the L3 cache." + "BriefDescription": "This event counts operations that cause a miss of the L3 cache. Note: This event may count inaccurately." }, { "EventCode": "0x0397", @@ -60,17 +60,17 @@ { "EventCode": "0x039A", "EventName": "L2D_CACHE_REFILL_L3D_MISS_PRF", - "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS caused by prefetch access." + "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS caused by hardware prefetch or software prefetch. Note: This event may count inaccurately." }, { "EventCode": "0x039B", "EventName": "L2D_CACHE_REFILL_L3D_MISS_HWPRF", - "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS caused by hardware prefetch access." + "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS caused by hardware prefetch. Note: This event may count inaccurately." }, { "EventCode": "0x039C", "EventName": "L2D_CACHE_REFILL_L3D_HIT", - "BriefDescription": "This event counts operations that cause a hit of the L3 cache." + "BriefDescription": "This event counts operations that cause a hit of the L3 cache. Note: This event may count inaccurately." }, { "EventCode": "0x039D", @@ -90,70 +90,65 @@ { "EventCode": "0x03A0", "EventName": "L2D_CACHE_REFILL_L3D_HIT_PRF", - "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_HIT caused by prefetch access." + "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_HIT caused by hardware prefetch or software prefetch. Note: This event may count inaccurately." }, { "EventCode": "0x03A1", "EventName": "L2D_CACHE_REFILL_L3D_HIT_HWPRF", - "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_HIT caused by hardware prefetch access." - }, - { - "EventCode": "0x03A2", - "EventName": "L2D_CACHE_REFILL_L3D_MISS_PFTGT_HIT", - "BriefDescription": "This event counts the number of L3 cache misses where the requests hit the PFTGT buffer." + "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_HIT caused by hardware prefetch. Note: This event may count inaccurately." }, { "EventCode": "0x03A3", - "EventName": "L2D_CACHE_REFILL_L3D_MISS_PFTGT_HIT_DM", - "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS_PFTGT_HIT caused by demand access." + "EventName": "L2D_CACHE_REFILL_L3D_MISS_DM_PFTGT_HIT", + "BriefDescription": "This event counts the number of L3 cache misses caused by demand access where the requests hit the PFTGT buffer." }, { "EventCode": "0x03A4", - "EventName": "L2D_CACHE_REFILL_L3D_MISS_PFTGT_HIT_DM_RD", - "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS_PFTGT_HIT caused by demand read access." + "EventName": "L2D_CACHE_REFILL_L3D_MISS_DM_RD_PFTGT_HIT", + "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS_DM_PFTGT_HIT caused by read access." }, { "EventCode": "0x03A5", - "EventName": "L2D_CACHE_REFILL_L3D_MISS_PFTGT_HIT_DM_WR", - "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS_PFTGT_HIT caused by demand write access." + "EventName": "L2D_CACHE_REFILL_L3D_MISS_DM_WR_PFTGT_HIT", + "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS_DM_PFTGT_HIT caused by write access." }, { "EventCode": "0x03A6", - "EventName": "L2D_CACHE_REFILL_L3D_MISS_L_MEM", - "BriefDescription": "This event counts the number of L3 cache misses where the requests access the memory in the same socket as the requests." + "EventName": "L2D_CACHE_REFILL_L3D_MISS_DM_L_MEM", + "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS_DM where the requests access the memory in the same socket as the requests." }, { "EventCode": "0x03A7", - "EventName": "L2D_CACHE_REFILL_L3D_MISS_FR_MEM", - "BriefDescription": "This event counts the number of L3 cache misses where the requests access the memory in the different socket from the requests." + "EventName": "L2D_CACHE_REFILL_L3D_MISS_DM_FR_MEM", + "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS_DM where the requests access the memory in the different socket from the requests." }, { "EventCode": "0x03A8", - "EventName": "L2D_CACHE_REFILL_L3D_MISS_L_L2", - "BriefDescription": "This event counts the number of L3 cache misses where the requests access the different L2 cache from the requests in the same Numa nodes as the requests." + "EventName": "L2D_CACHE_REFILL_L3D_MISS_DM_L_L2", + "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS_DM where the requests access the different L2 cache from the requests in the same Numa nodes as the requests." }, { "EventCode": "0x03A9", - "EventName": "L2D_CACHE_REFILL_L3D_MISS_NR_L2", - "BriefDescription": "This event counts the number of L3 cache misses where the requests access L2 cache in the different Numa nodes from the requests in the same socket as the requests." + "EventName": "L2D_CACHE_REFILL_L3D_MISS_DM_NR_L2", + "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS_DM where the requests access L2 cache in the different Numa nodes from the requests in the same socket as the requests." }, { "EventCode": "0x03AA", - "EventName": "L2D_CACHE_REFILL_L3D_MISS_NR_L3", - "BriefDescription": "This event counts the number of L3 cache misses where the requests access L3 cache in the different Numa nodes from the requests in the same socket as the requests." + "EventName": "L2D_CACHE_REFILL_L3D_MISS_DM_NR_L3", + "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS_DM where the requests access L3 cache in the different Numa nodes from the requests in the same socket as the requests." }, { "EventCode": "0x03AB", - "EventName": "L2D_CACHE_REFILL_L3D_MISS_FR_L2", - "BriefDescription": "This event counts the number of L3 cache misses where the requests access L2 cache in the different socket from the requests." + "EventName": "L2D_CACHE_REFILL_L3D_MISS_DM_FR_L2", + "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS_DM where the requests access L2 cache in the different socket from the requests." }, { "EventCode": "0x03AC", - "EventName": "L2D_CACHE_REFILL_L3D_MISS_FR_L3", - "BriefDescription": "This event counts the number of L3 cache misses where the requests access L3 cache in the different socket from the requests." + "EventName": "L2D_CACHE_REFILL_L3D_MISS_DM_FR_L3", + "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS_DM where the requests access L3 cache in the different socket from the requests." }, { "ArchStdEvent": "L3D_CACHE_LMISS_RD", - "BriefDescription": "This event counts access counted by L3D_CACHE that is not completed by the L3D cache, and a Memory-read operation, as defined by the L2D_CACHE_REFILL_L3D_MISS events." + "BriefDescription": "This event counts access counted by L3D_CACHE that is not completed by the L3 cache, and a Memory-read operation, as defined by the L2D_CACHE_REFILL_L3D_MISS events. Note: This event may count inaccurately." } ] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/ll_cache.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/ll_cache.json index a441b84729ab..d49d9f6df72c 100644 --- a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/ll_cache.json +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/ll_cache.json @@ -5,6 +5,6 @@ }, { "ArchStdEvent": "LL_CACHE_MISS_RD", - "BriefDescription": "This event counts access counted by L3D_CACHE that is not completed by the L3D cache, and a Memory-read operation, as defined by the L2D_CACHE_REFILL_L3D_MISS events." + "BriefDescription": "This event counts access counted by L3D_CACHE that is not completed by the L3 cache, and a Memory-read operation, as defined by the L2D_CACHE_REFILL_L3D_MISS events. Note: This event may count inaccurately." } ] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/pipeline.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/pipeline.json index 3cc3105f4a5e..15cf54730b85 100644 --- a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/pipeline.json +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/pipeline.json @@ -147,17 +147,17 @@ { "EventCode": "0x02B0", "EventName": "L1_PIPE_COMP_GATHER_2FLOW", - "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 2 flows because 2 elements could not be combined." + "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 2-flows because 2 elements could not be combined." }, { "EventCode": "0x02B1", "EventName": "L1_PIPE_COMP_GATHER_1FLOW", - "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 1 flow because 2 elements could be combined." + "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 1-flow because 2 elements could be combined." }, { "EventCode": "0x02B2", "EventName": "L1_PIPE_COMP_GATHER_0FLOW", - "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 0 flow because both predicate values are 0." + "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 0-flow because both predicate values are 0." }, { "EventCode": "0x02B3", diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/spec_operation.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/spec_operation.json index 4841b43e2871..1caf3baeae4e 100644 --- a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/spec_operation.json +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/spec_operation.json @@ -81,7 +81,7 @@ }, { "ArchStdEvent": "CSDB_SPEC", - "BriefDescription": "This event counts speculatively executed control speculation barrier instructions." + "BriefDescription": "This event counts architecturally executed control speculation barrier instructions." }, { "EventCode": "0x0108", @@ -91,17 +91,17 @@ { "EventCode": "0x0109", "EventName": "IEL_SPEC", - "BriefDescription": "This event counts architecturally executed inter-element manipulation operations." + "BriefDescription": "This event counts architecturally executed inter-element manipulation operation." }, { "EventCode": "0x010A", "EventName": "IREG_SPEC", - "BriefDescription": "This event counts architecturally executed inter-register manipulation operations." + "BriefDescription": "This event counts architecturally executed inter-register manipulation operation." }, { "EventCode": "0x011A", "EventName": "BC_LD_SPEC", - "BriefDescription": "This event counts architecturally executed SIMD broadcast floating-point load operations." + "BriefDescription": "This event counts architecturally executed SIMD broadcast floating-point load operation." }, { "EventCode": "0x011B", @@ -130,7 +130,7 @@ }, { "ArchStdEvent": "ASE_INST_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD operation." }, { "ArchStdEvent": "INT_SPEC", @@ -158,7 +158,7 @@ }, { "ArchStdEvent": "NONFP_SPEC", - "BriefDescription": "This event counts architecturally executed non-floating-point operations." + "BriefDescription": "This event counts architecturally executed non-floating-point operation." }, { "ArchStdEvent": "INT_SCALE_OPS_SPEC", diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/stall.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/stall.json index 5fb81e2a0a07..e1e16d513828 100644 --- a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/stall.json +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/stall.json @@ -5,7 +5,7 @@ }, { "ArchStdEvent": "STALL_BACKEND", - "BriefDescription": "This event counts every cycle counted by the CPU_CYCLES event on that no operation was issued because the backend is unable to accept any operations." + "BriefDescription": "This event counts every cycle counted by the CPU_CYCLES event on that no operation was issued because the backend is unable to accept any operation." }, { "ArchStdEvent": "STALL", @@ -69,7 +69,7 @@ }, { "ArchStdEvent": "STALL_BACKEND_L2D", - "BriefDescription": "This event counts every cycle counted by STALL_BACKEND_MEMBOUND when there is a demand data miss in L2D cache." + "BriefDescription": "This event counts every cycle counted by STALL_BACKEND_MEMBOUND when there is a demand data miss in L2 cache." }, { "ArchStdEvent": "STALL_BACKEND_CPUBOUND", diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/sve.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/sve.json index e66b5af00f90..88cab0caf49e 100644 --- a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/sve.json +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/sve.json @@ -13,11 +13,11 @@ }, { "ArchStdEvent": "ASE_SVE_INST_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD or SVE operation." }, { "ArchStdEvent": "UOP_SPEC", - "BriefDescription": "This event counts all architecturally executed micro-operations." + "BriefDescription": "This event counts all architecturally executed micro-operation." }, { "ArchStdEvent": "SVE_MATH_SPEC", @@ -29,7 +29,7 @@ }, { "ArchStdEvent": "FP_FMA_SPEC", - "BriefDescription": "This event counts architecturally executed floating-point fused multiply-add and multiply-subtract operations." + "BriefDescription": "This event counts architecturally executed floating-point fused multiply-add and multiply-subtract operation." }, { "ArchStdEvent": "FP_RECPE_SPEC", @@ -41,15 +41,15 @@ }, { "ArchStdEvent": "ASE_INT_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD integer operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD integer operation." }, { "ArchStdEvent": "SVE_INT_SPEC", - "BriefDescription": "This event counts architecturally executed SVE integer operations." + "BriefDescription": "This event counts architecturally executed SVE integer operation." }, { "ArchStdEvent": "ASE_SVE_INT_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE integer operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD or SVE integer operation." }, { "ArchStdEvent": "SVE_INT_DIV_SPEC", @@ -69,7 +69,7 @@ }, { "ArchStdEvent": "ASE_SVE_INT_MUL_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE integer multiply operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD or SVE integer multiply operation." }, { "ArchStdEvent": "SVE_INT_MUL64_SPEC", @@ -77,19 +77,19 @@ }, { "ArchStdEvent": "SVE_INT_MULH64_SPEC", - "BriefDescription": "This event counts architecturally executed SVE integer 64-bit x 64-bit multiply returning high part operations." + "BriefDescription": "This event counts architecturally executed SVE integer 64-bit x 64-bit multiply returning high part operation." }, { "ArchStdEvent": "ASE_NONFP_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD non-floating-point operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD non-floating-point operation." }, { "ArchStdEvent": "SVE_NONFP_SPEC", - "BriefDescription": "This event counts architecturally executed SVE non-floating-point operations." + "BriefDescription": "This event counts architecturally executed SVE non-floating-point operation." }, { "ArchStdEvent": "ASE_SVE_NONFP_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE non-floating-point operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD or SVE non-floating-point operation." }, { "ArchStdEvent": "ASE_INT_VREDUCE_SPEC", @@ -101,7 +101,7 @@ }, { "ArchStdEvent": "ASE_SVE_INT_VREDUCE_SPEC", - "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE integer reduction operations." + "BriefDescription": "This event counts architecturally executed Advanced SIMD or SVE integer reduction operation." }, { "ArchStdEvent": "SVE_PERM_SPEC", @@ -149,11 +149,11 @@ }, { "ArchStdEvent": "ASE_SVE_LD_SPEC", - "BriefDescription": "This event counts architecturally executed operations that read from memory due to SVE and Advanced SIMD load instructions." + "BriefDescription": "This event counts architecturally executed operations that read from memory due to Advanced SIMD or SVE load instructions." }, { "ArchStdEvent": "ASE_SVE_ST_SPEC", - "BriefDescription": "This event counts architecturally executed operations that write to memory due to SVE and Advanced SIMD store instructions." + "BriefDescription": "This event counts architecturally executed operations that write to memory due to Advanced SIMD or SVE store instructions." }, { "ArchStdEvent": "PRF_SPEC", @@ -197,11 +197,11 @@ }, { "ArchStdEvent": "ASE_SVE_LD_MULTI_SPEC", - "BriefDescription": "This event counts architecturally executed operations that read from memory due to SVE and Advanced SIMD multiple vector contiguous structure load instructions." + "BriefDescription": "This event counts architecturally executed operations that read from memory due to Advanced SIMD or SVE multiple vector contiguous structure load instructions." }, { "ArchStdEvent": "ASE_SVE_ST_MULTI_SPEC", - "BriefDescription": "This event counts architecturally executed operations that write to memory due to SVE and Advanced SIMD multiple vector contiguous structure store instructions." + "BriefDescription": "This event counts architecturally executed operations that write to memory due to Advanced SIMD or SVE multiple vector contiguous structure store instructions." }, { "ArchStdEvent": "SVE_LD_GATHER_SPEC", @@ -221,27 +221,27 @@ }, { "ArchStdEvent": "FP_HP_SCALE_OPS_SPEC", - "BriefDescription": "This event counts architecturally executed SVE half-precision arithmetic operations. See FP_HP_SCALE_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by 8, or by 16 for operations that would also be counted by SVE_FP_FMA_SPEC." + "BriefDescription": "This event counts architecturally executed SVE half-precision arithmetic operation. See FP_HP_SCALE_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by 8, or by 16 for operations that would also be counted by SVE_FP_FMA_SPEC." }, { "ArchStdEvent": "FP_HP_FIXED_OPS_SPEC", - "BriefDescription": "This event counts architecturally executed v8SIMD&FP half-precision arithmetic operations. See FP_HP_FIXED_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by the number of 16-bit elements for Advanced SIMD operations, or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC." + "BriefDescription": "This event counts architecturally executed v8SIMD&FP half-precision arithmetic operation. See FP_HP_FIXED_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by the number of 16-bit elements for Advanced SIMD operations, or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC." }, { "ArchStdEvent": "FP_SP_SCALE_OPS_SPEC", - "BriefDescription": "This event counts architecturally executed SVE single-precision arithmetic operations. See FP_SP_SCALE_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by 4, or by 8 for operations that would also be counted by SVE_FP_FMA_SPEC." + "BriefDescription": "This event counts architecturally executed SVE single-precision arithmetic operation. See FP_SP_SCALE_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by 4, or by 8 for operations that would also be counted by SVE_FP_FMA_SPEC." }, { "ArchStdEvent": "FP_SP_FIXED_OPS_SPEC", - "BriefDescription": "This event counts architecturally executed v8SIMD&FP single-precision arithmetic operations. See FP_SP_FIXED_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by the number of 32-bit elements for Advanced SIMD operations, or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC." + "BriefDescription": "This event counts architecturally executed v8SIMD&FP single-precision arithmetic operation. See FP_SP_FIXED_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by the number of 32-bit elements for Advanced SIMD operations, or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC." }, { "ArchStdEvent": "FP_DP_SCALE_OPS_SPEC", - "BriefDescription": "This event counts architecturally executed SVE double-precision arithmetic operations. See FP_DP_SCALE_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by 2, or by 4 for operations that would also be counted by SVE_FP_FMA_SPEC." + "BriefDescription": "This event counts architecturally executed SVE double-precision arithmetic operation. See FP_DP_SCALE_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by 2, or by 4 for operations that would also be counted by SVE_FP_FMA_SPEC." }, { "ArchStdEvent": "FP_DP_FIXED_OPS_SPEC", - "BriefDescription": "This event counts architecturally executed v8SIMD&FP double-precision arithmetic operations. See FP_DP_FIXED_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by 2 for Advanced SIMD operations, or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC." + "BriefDescription": "This event counts architecturally executed v8SIMD&FP double-precision arithmetic operation. See FP_DP_FIXED_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by 2 for Advanced SIMD operations, or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC." }, { "ArchStdEvent": "ASE_SVE_INT_DOT_SPEC", diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/tlb.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/tlb.json index edc7cb8696c8..f54029ba369a 100644 --- a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/tlb.json +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/tlb.json @@ -104,72 +104,72 @@ { "EventCode": "0x0C10", "EventName": "L1I_TLB_REFILL_4K", - "BriefDescription": "This event counts operations that cause a TLB refill to the L1I in 4KB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L1I in 4KB page." }, { "EventCode": "0x0C11", "EventName": "L1I_TLB_REFILL_64K", - "BriefDescription": "This event counts operations that cause a TLB refill to the L1I in 64KB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L1I in 64KB page." }, { "EventCode": "0x0C12", "EventName": "L1I_TLB_REFILL_2M", - "BriefDescription": "This event counts operations that cause a TLB refill to the L1I in 2MB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L1I in 2MB page." }, { "EventCode": "0x0C13", "EventName": "L1I_TLB_REFILL_32M", - "BriefDescription": "This event counts operations that cause a TLB refill to the L1I in 32MB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L1I in 32MB page." }, { "EventCode": "0x0C14", "EventName": "L1I_TLB_REFILL_512M", - "BriefDescription": "This event counts operations that cause a TLB refill to the L1I in 512MB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L1I in 512MB page." }, { "EventCode": "0x0C15", "EventName": "L1I_TLB_REFILL_1G", - "BriefDescription": "This event counts operations that cause a TLB refill to the L1I in 1GB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L1I in 1GB page." }, { "EventCode": "0x0C16", "EventName": "L1I_TLB_REFILL_16G", - "BriefDescription": "This event counts operations that cause a TLB refill to the L1I in 16GB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L1I in 16GB page." }, { "EventCode": "0x0C18", "EventName": "L1D_TLB_REFILL_4K", - "BriefDescription": "This event counts operations that cause a TLB refill to the L1D in 4KB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L1D in 4KB page." }, { "EventCode": "0x0C19", "EventName": "L1D_TLB_REFILL_64K", - "BriefDescription": "This event counts operations that cause a TLB refill to the L1D in 64KB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L1D in 64KB page." }, { "EventCode": "0x0C1A", "EventName": "L1D_TLB_REFILL_2M", - "BriefDescription": "This event counts operations that cause a TLB refill to the L1D in 2MB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L1D in 2MB page." }, { "EventCode": "0x0C1B", "EventName": "L1D_TLB_REFILL_32M", - "BriefDescription": "This event counts operations that cause a TLB refill to the L1D in 32MB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L1D in 32MB page." }, { "EventCode": "0x0C1C", "EventName": "L1D_TLB_REFILL_512M", - "BriefDescription": "This event counts operations that cause a TLB refill to the L1D in 512MB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L1D in 512MB page." }, { "EventCode": "0x0C1D", "EventName": "L1D_TLB_REFILL_1G", - "BriefDescription": "This event counts operations that cause a TLB refill to the L1D in 1GB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L1D in 1GB page." }, { "EventCode": "0x0C1E", "EventName": "L1D_TLB_REFILL_16G", - "BriefDescription": "This event counts operations that cause a TLB refill to the L1D in 16GB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L1D in 16GB page." }, { "EventCode": "0x0C20", @@ -244,72 +244,72 @@ { "EventCode": "0x0C30", "EventName": "L2I_TLB_REFILL_4K", - "BriefDescription": "This event counts operations that cause a TLB refill to the L2Iin 4KB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L2I in 4KB page." }, { "EventCode": "0x0C31", "EventName": "L2I_TLB_REFILL_64K", - "BriefDescription": "This event counts operations that cause a TLB refill to the L2I in 64KB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L2I in 64KB page." }, { "EventCode": "0x0C32", "EventName": "L2I_TLB_REFILL_2M", - "BriefDescription": "This event counts operations that cause a TLB refill to the L2I in 2MB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L2I in 2MB page." }, { "EventCode": "0x0C33", "EventName": "L2I_TLB_REFILL_32M", - "BriefDescription": "This event counts operations that cause a TLB refill to the L2I in 32MB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L2I in 32MB page." }, { "EventCode": "0x0C34", "EventName": "L2I_TLB_REFILL_512M", - "BriefDescription": "This event counts operations that cause a TLB refill to the L2I in 512MB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L2I in 512MB page." }, { "EventCode": "0x0C35", "EventName": "L2I_TLB_REFILL_1G", - "BriefDescription": "This event counts operations that cause a TLB refill to the L2I in 1GB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L2I in 1GB page." }, { "EventCode": "0x0C36", "EventName": "L2I_TLB_REFILL_16G", - "BriefDescription": "This event counts operations that cause a TLB refill to the L2I in 16GB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L2I in 16GB page." }, { "EventCode": "0x0C38", "EventName": "L2D_TLB_REFILL_4K", - "BriefDescription": "This event counts operations that cause a TLB refill to the L2D in 4KB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L2D in 4KB page." }, { "EventCode": "0x0C39", "EventName": "L2D_TLB_REFILL_64K", - "BriefDescription": "This event counts operations that cause a TLB refill to the L2D in 64KB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L2D in 64KB page." }, { "EventCode": "0x0C3A", "EventName": "L2D_TLB_REFILL_2M", - "BriefDescription": "This event counts operations that cause a TLB refill to the L2D in 2MB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L2D in 2MB page." }, { "EventCode": "0x0C3B", "EventName": "L2D_TLB_REFILL_32M", - "BriefDescription": "This event counts operations that cause a TLB refill to the L2D in 32MB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L2D in 32MB page." }, { "EventCode": "0x0C3C", "EventName": "L2D_TLB_REFILL_512M", - "BriefDescription": "This event counts operations that cause a TLB refill to the L2D in 512MB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L2D in 512MB page." }, { "EventCode": "0x0C3D", "EventName": "L2D_TLB_REFILL_1G", - "BriefDescription": "This event counts operations that cause a TLB refill to the L2D in 1GB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L2D in 1GB page." }, { "EventCode": "0x0C3E", "EventName": "L2D_TLB_REFILL_16G", - "BriefDescription": "This event counts operations that cause a TLB refill to the L2D in 16GB page." + "BriefDescription": "This event counts operations that cause a TLB refill of the L2D in 16GB page." }, { "ArchStdEvent": "DTLB_WALK_PERCYC", From 588d22b40480bca9efdb6e24d253baaa5165884c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 13 Jun 2025 17:45:28 -0700 Subject: [PATCH 0244/2411] perf test: Expand user space event reading (rdpmc) tests Test that disabling rdpmc support via /sys/bus/event_source/cpu*/rdpmc disables reading in the mmap (libperf read support will fallback to using a system call). Test all hybrid PMUs support rdpmc. Ensure hybrid PMUs use the correct CPU to rdpmc the correct event. Previously the test would open cycles or instructions with no extended type then rdpmc it on whatever CPU. This could fail/skip due to which CPU the test was scheduled upon. Signed-off-by: Ian Rogers Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20250614004528.1652860-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/tests/mmap-basic.c | 307 ++++++++++++++++++++++++---------- tools/perf/tests/tests.h | 9 + tools/perf/util/affinity.c | 18 ++ tools/perf/util/affinity.h | 2 + 4 files changed, 249 insertions(+), 87 deletions(-) diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 04b547c6bdbe..3c89d3001887 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -1,15 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include +#include "cpumap.h" #include "debug.h" #include "event.h" #include "evlist.h" #include "evsel.h" #include "thread_map.h" #include "tests.h" +#include "util/affinity.h" #include "util/mmap.h" #include "util/sample.h" #include @@ -172,99 +175,199 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest return err; } -static int test_stat_user_read(int event) +enum user_read_state { + USER_READ_ENABLED, + USER_READ_DISABLED, + USER_READ_UNKNOWN, +}; + +static enum user_read_state set_user_read(struct perf_pmu *pmu, enum user_read_state enabled) { - struct perf_counts_values counts = { .val = 0 }; - struct perf_thread_map *threads; - struct perf_evsel *evsel; - struct perf_event_mmap_page *pc; - struct perf_event_attr attr = { - .type = PERF_TYPE_HARDWARE, - .config = event, -#ifdef __aarch64__ - .config1 = 0x2, /* Request user access */ -#endif - }; - int err, i, ret = TEST_FAIL; - bool opened = false, mapped = false; + char buf[2] = {0, '\n'}; + ssize_t len; + int events_fd, rdpmc_fd; + enum user_read_state old_user_read = USER_READ_UNKNOWN; - threads = perf_thread_map__new_dummy(); - TEST_ASSERT_VAL("failed to create threads", threads); + if (enabled == USER_READ_UNKNOWN) + return USER_READ_UNKNOWN; + events_fd = perf_pmu__event_source_devices_fd(); + if (events_fd < 0) + return USER_READ_UNKNOWN; + + rdpmc_fd = perf_pmu__pathname_fd(events_fd, pmu->name, "rdpmc", O_RDWR); + if (rdpmc_fd < 0) { + close(events_fd); + return USER_READ_UNKNOWN; + } + + len = read(rdpmc_fd, buf, sizeof(buf)); + if (len != sizeof(buf)) + pr_debug("%s read failed\n", __func__); + + // Note, on Intel hybrid disabling on 1 PMU will implicitly disable on + // all the core PMUs. + old_user_read = (buf[0] == '1') ? USER_READ_ENABLED : USER_READ_DISABLED; + + if (enabled != old_user_read) { + buf[0] = (enabled == USER_READ_ENABLED) ? '1' : '0'; + len = write(rdpmc_fd, buf, sizeof(buf)); + if (len != sizeof(buf)) + pr_debug("%s write failed\n", __func__); + } + close(rdpmc_fd); + close(events_fd); + return old_user_read; +} + +static int test_stat_user_read(u64 event, enum user_read_state enabled) +{ + struct perf_pmu *pmu = NULL; + struct perf_thread_map *threads = perf_thread_map__new_dummy(); + int ret = TEST_OK; + + pr_err("User space counter reading %" PRIu64 "\n", event); + if (!threads) { + pr_err("User space counter reading [Failed to create threads]\n"); + return TEST_FAIL; + } perf_thread_map__set_pid(threads, 0, 0); - evsel = perf_evsel__new(&attr); - TEST_ASSERT_VAL("failed to create evsel", evsel); + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + enum user_read_state saved_user_read_state = set_user_read(pmu, enabled); + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = perf_pmus__supports_extended_type() + ? event | ((u64)pmu->type << PERF_PMU_TYPE_SHIFT) + : event, +#ifdef __aarch64__ + .config1 = 0x2, /* Request user access */ +#endif + }; + struct perf_evsel *evsel = NULL; + int err; + struct perf_event_mmap_page *pc; + bool mapped = false, opened = false, rdpmc_supported; + struct perf_counts_values counts = { .val = 0 }; - err = perf_evsel__open(evsel, NULL, threads); - if (err) { - pr_err("failed to open evsel: %s\n", strerror(-err)); - ret = TEST_SKIP; - goto out; - } - opened = true; - err = perf_evsel__mmap(evsel, 0); - if (err) { - pr_err("failed to mmap evsel: %s\n", strerror(-err)); - goto out; - } - mapped = true; + pr_debug("User space counter reading for PMU %s\n", pmu->name); + /* + * Restrict scheduling to only use the rdpmc on the CPUs the + * event can be on. If the test doesn't run on the CPU of the + * event then the event will be disabled and the pc->index test + * will fail. + */ + if (pmu->cpus != NULL) + cpu_map__set_affinity(pmu->cpus); - pc = perf_evsel__mmap_base(evsel, 0, 0); - if (!pc) { - pr_err("failed to get mmapped address\n"); - goto out; - } - - if (!pc->cap_user_rdpmc || !pc->index) { - pr_err("userspace counter access not %s\n", - !pc->cap_user_rdpmc ? "supported" : "enabled"); - ret = TEST_SKIP; - goto out; - } - if (pc->pmc_width < 32) { - pr_err("userspace counter width not set (%d)\n", pc->pmc_width); - goto out; - } - - perf_evsel__read(evsel, 0, 0, &counts); - if (counts.val == 0) { - pr_err("failed to read value for evsel\n"); - goto out; - } - - for (i = 0; i < 5; i++) { - volatile int count = 0x10000 << i; - __u64 start, end, last = 0; - - pr_debug("\tloop = %u, ", count); - - perf_evsel__read(evsel, 0, 0, &counts); - start = counts.val; - - while (count--) ; - - perf_evsel__read(evsel, 0, 0, &counts); - end = counts.val; - - if ((end - start) < last) { - pr_err("invalid counter data: end=%llu start=%llu last= %llu\n", - end, start, last); - goto out; + /* Make the evsel. */ + evsel = perf_evsel__new(&attr); + if (!evsel) { + pr_err("User space counter reading for PMU %s [Failed to allocate evsel]\n", + pmu->name); + ret = TEST_FAIL; + goto cleanup; } - last = end - start; - pr_debug("count = %llu\n", end - start); + + err = perf_evsel__open(evsel, NULL, threads); + if (err) { + pr_err("User space counter reading for PMU %s [Failed to open evsel]\n", + pmu->name); + ret = TEST_SKIP; + goto cleanup; + } + opened = true; + err = perf_evsel__mmap(evsel, 0); + if (err) { + pr_err("User space counter reading for PMU %s [Failed to mmap evsel]\n", + pmu->name); + ret = TEST_FAIL; + goto cleanup; + } + mapped = true; + + pc = perf_evsel__mmap_base(evsel, 0, 0); + if (!pc) { + pr_err("User space counter reading for PMU %s [Failed to get mmaped address]\n", + pmu->name); + ret = TEST_FAIL; + goto cleanup; + } + + if (saved_user_read_state == USER_READ_UNKNOWN) + rdpmc_supported = pc->cap_user_rdpmc && pc->index; + else + rdpmc_supported = (enabled == USER_READ_ENABLED); + + if (rdpmc_supported && (!pc->cap_user_rdpmc || !pc->index)) { + pr_err("User space counter reading for PMU %s [Failed unexpected supported counter access %d %d]\n", + pmu->name, pc->cap_user_rdpmc, pc->index); + ret = TEST_FAIL; + goto cleanup; + } + + if (!rdpmc_supported && pc->cap_user_rdpmc) { + pr_err("User space counter reading for PMU %s [Failed unexpected unsupported counter access %d]\n", + pmu->name, pc->cap_user_rdpmc); + ret = TEST_FAIL; + goto cleanup; + } + + if (rdpmc_supported && pc->pmc_width < 32) { + pr_err("User space counter reading for PMU %s [Failed width not set %d]\n", + pmu->name, pc->pmc_width); + ret = TEST_FAIL; + goto cleanup; + } + + perf_evsel__read(evsel, 0, 0, &counts); + if (counts.val == 0) { + pr_err("User space counter reading for PMU %s [Failed read]\n", pmu->name); + ret = TEST_FAIL; + goto cleanup; + } + + for (int i = 0; i < 5; i++) { + volatile int count = 0x10000 << i; + __u64 start, end, last = 0; + + pr_debug("\tloop = %u, ", count); + + perf_evsel__read(evsel, 0, 0, &counts); + start = counts.val; + + while (count--) ; + + perf_evsel__read(evsel, 0, 0, &counts); + end = counts.val; + + if ((end - start) < last) { + pr_err("User space counter reading for PMU %s [Failed invalid counter data: end=%llu start=%llu last= %llu]\n", + pmu->name, end, start, last); + ret = TEST_FAIL; + goto cleanup; + } + last = end - start; + pr_debug("count = %llu\n", last); + } + pr_debug("User space counter reading for PMU %s [Success]\n", pmu->name); +cleanup: + if (mapped) + perf_evsel__munmap(evsel); + if (opened) + perf_evsel__close(evsel); + perf_evsel__delete(evsel); + + /* If the affinity was changed, then put it back to all CPUs. */ + if (pmu->cpus != NULL) { + struct perf_cpu_map *cpus = cpu_map__online(); + + cpu_map__set_affinity(cpus); + perf_cpu_map__put(cpus); + } + set_user_read(pmu, saved_user_read_state); } - ret = TEST_OK; - -out: - if (mapped) - perf_evsel__munmap(evsel); - if (opened) - perf_evsel__close(evsel); - perf_evsel__delete(evsel); - perf_thread_map__put(threads); return ret; } @@ -272,20 +375,32 @@ static int test_stat_user_read(int event) static int test__mmap_user_read_instr(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { - return test_stat_user_read(PERF_COUNT_HW_INSTRUCTIONS); + return test_stat_user_read(PERF_COUNT_HW_INSTRUCTIONS, USER_READ_ENABLED); } static int test__mmap_user_read_cycles(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { - return test_stat_user_read(PERF_COUNT_HW_CPU_CYCLES); + return test_stat_user_read(PERF_COUNT_HW_CPU_CYCLES, USER_READ_ENABLED); +} + +static int test__mmap_user_read_instr_disabled(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + return test_stat_user_read(PERF_COUNT_HW_INSTRUCTIONS, USER_READ_DISABLED); +} + +static int test__mmap_user_read_cycles_disabled(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + return test_stat_user_read(PERF_COUNT_HW_CPU_CYCLES, USER_READ_DISABLED); } static struct test_case tests__basic_mmap[] = { TEST_CASE_REASON("Read samples using the mmap interface", basic_mmap, "permissions"), - TEST_CASE_REASON("User space counter reading of instructions", + TEST_CASE_REASON_EXCLUSIVE("User space counter reading of instructions", mmap_user_read_instr, #if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || \ (defined(__riscv) && __riscv_xlen == 64) @@ -294,13 +409,31 @@ static struct test_case tests__basic_mmap[] = { "unsupported" #endif ), - TEST_CASE_REASON("User space counter reading of cycles", + TEST_CASE_REASON_EXCLUSIVE("User space counter reading of cycles", mmap_user_read_cycles, #if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || \ (defined(__riscv) && __riscv_xlen == 64) "permissions" #else "unsupported" +#endif + ), + TEST_CASE_REASON_EXCLUSIVE("User space counter disabling instructions", + mmap_user_read_instr_disabled, +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || \ + (defined(__riscv) && __riscv_xlen == 64) + "permissions" +#else + "unsupported" +#endif + ), + TEST_CASE_REASON_EXCLUSIVE("User space counter disabling cycles", + mmap_user_read_cycles_disabled, +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || \ + (defined(__riscv) && __riscv_xlen == 64) + "permissions" +#else + "unsupported" #endif ), { .name = NULL, } diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index bb7951c61971..4c128a959441 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -71,6 +71,15 @@ struct test_suite { .exclusive = true, \ } +#define TEST_CASE_REASON_EXCLUSIVE(description, _name, _reason) \ + { \ + .name = #_name, \ + .desc = description, \ + .run_case = test__##_name, \ + .skip_reason = _reason, \ + .exclusive = true, \ + } + #define DEFINE_SUITE(description, _name) \ struct test_case tests__##_name[] = { \ TEST_CASE(description, _name), \ diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c index 38dc4524b7e8..4fe851334296 100644 --- a/tools/perf/util/affinity.c +++ b/tools/perf/util/affinity.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "perf.h" #include "cpumap.h" #include "affinity.h" @@ -83,3 +84,20 @@ void affinity__cleanup(struct affinity *a) if (a != NULL) __affinity__cleanup(a); } + +void cpu_map__set_affinity(const struct perf_cpu_map *cpumap) +{ + int cpu_set_size = get_cpu_set_size(); + unsigned long *cpuset = bitmap_zalloc(cpu_set_size * 8); + struct perf_cpu cpu; + int idx; + + if (!cpuset) + return; + + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpumap) + __set_bit(cpu.cpu, cpuset); + + sched_setaffinity(0, cpu_set_size, (cpu_set_t *)cpuset); + zfree(&cpuset); +} diff --git a/tools/perf/util/affinity.h b/tools/perf/util/affinity.h index 0ad6a18ef20c..7341194b2298 100644 --- a/tools/perf/util/affinity.h +++ b/tools/perf/util/affinity.h @@ -4,6 +4,7 @@ #include +struct perf_cpu_map; struct affinity { unsigned long *orig_cpus; unsigned long *sched_cpus; @@ -13,5 +14,6 @@ struct affinity { void affinity__cleanup(struct affinity *a); void affinity__set(struct affinity *a, int cpu); int affinity__setup(struct affinity *a); +void cpu_map__set_affinity(const struct perf_cpu_map *cpumap); #endif // PERF_AFFINITY_H From dcbe6e51a0bb80a40f9a8c87750c291c2364573d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 6 Jun 2025 15:54:31 -0700 Subject: [PATCH 0245/2411] perf parse-events: Set default GH modifier properly Commit 7b100989b4f6bce7 ("perf evlist: Remove __evlist__add_default") changed to use "cycles:P" as a default event. But the problem is it cannot set other default modifiers correctly. perf kvm needs to set attr.exclude_host by default but it didn't work because of the logic in the parse_events__modifier_list(). Also the exclude_GH_default was applied only if ":u" modifier was specified - which is strange. Move it out after handling the ":GH" and check perf_host and perf_guest properly. Before: $ ./perf kvm record -vv true |& grep exclude (nothing) But specifying an event (without a modifier) works: $ ./perf kvm record -vv -e cycles true |& grep exclude exclude_host 1 After: It now works for the both cases: $ ./perf kvm record -vv true |& grep exclude exclude_host 1 $ ./perf kvm record -vv -e cycles true |& grep exclude exclude_host 1 Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250606225431.2109754-1-namhyung@kernel.org Fixes: 35c8d21371e9b342 ("perf tools: Don't set attr.exclude_guest by default") Signed-off-by: Namhyung Kim --- tools/perf/util/parse-events.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 7f34e602fc08..d1965a7b97ed 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1830,13 +1830,11 @@ static int parse_events__modifier_list(struct parse_events_state *parse_state, int eH = group ? evsel->core.attr.exclude_host : 0; int eG = group ? evsel->core.attr.exclude_guest : 0; int exclude = eu | ek | eh; - int exclude_GH = group ? evsel->exclude_GH : 0; + int exclude_GH = eG | eH; if (mod.user) { if (!exclude) exclude = eu = ek = eh = 1; - if (!exclude_GH && !perf_guest && exclude_GH_default) - eG = 1; eu = 0; } if (mod.kernel) { @@ -1859,6 +1857,13 @@ static int parse_events__modifier_list(struct parse_events_state *parse_state, exclude_GH = eG = eH = 1; eH = 0; } + if (!exclude_GH && exclude_GH_default) { + if (perf_host) + eG = 1; + else if (perf_guest) + eH = 1; + } + evsel->core.attr.exclude_user = eu; evsel->core.attr.exclude_kernel = ek; evsel->core.attr.exclude_hv = eh; From 2d584688643fac90428ab12513e05d6deff7c606 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 18 Jun 2025 17:25:55 -0700 Subject: [PATCH 0246/2411] perf test: Add header shell test Add a shell test that sanity checks perf data and pipe mode produce expected header fields. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250619002555.100896-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/header.sh | 74 ++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100755 tools/perf/tests/shell/header.sh diff --git a/tools/perf/tests/shell/header.sh b/tools/perf/tests/shell/header.sh new file mode 100755 index 000000000000..813831cff0bd --- /dev/null +++ b/tools/perf/tests/shell/header.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# perf header tests +# SPDX-License-Identifier: GPL-2.0 + +set -e + +err=0 +perfdata=$(mktemp /tmp/__perf_test_header.perf.data.XXXXX) +script_output=$(mktemp /tmp/__perf_test_header.perf.data.XXXXX.script) + +cleanup() { + rm -f "${perfdata}" + rm -f "${perfdata}".old + rm -f "${script_output}" + + trap - EXIT TERM INT +} + +trap_cleanup() { + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup + exit 1 +} +trap trap_cleanup EXIT TERM INT + +check_header_output() { + declare -a fields=( + "captured" + "hostname" + "os release" + "arch" + "cpuid" + "nrcpus" + "event" + "cmdline" + "perf version" + "sibling (cores|dies|threads)" + "sibling threads" + "total memory" + ) + for i in "${fields[@]}" + do + if ! grep -q -E "$i" "${script_output}" + then + echo "Failed to find expect $i in output" + err=1 + fi + done +} + +test_file() { + echo "Test perf header file" + + perf record -o "${perfdata}" -g -- perf test -w noploop + perf report --header-only -I -i "${perfdata}" > "${script_output}" + check_header_output + + echo "Test perf header file [Done]" +} + +test_pipe() { + echo "Test perf header pipe" + + perf record -o - -g -- perf test -w noploop | perf report --header-only -I -i - > "${script_output}" + check_header_output + + echo "Test perf header pipe [Done]" +} + +test_file +test_pipe + +cleanup +exit $err From 13b38e6b8059de096ebddb5d770c2419943949b7 Mon Sep 17 00:00:00 2001 From: Anubhav Shelat Date: Wed, 18 Jun 2025 10:29:22 -0400 Subject: [PATCH 0247/2411] perf header: remove unecessary core id test It is possible for systems to have a greater socket id number than the number of cpus present on a machine, so this test is obselete and should be removed. Signed-off-by: Anubhav Shelat Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250618142921.4053400-2-ashelat@redhat.com Signed-off-by: Namhyung Kim --- tools/perf/util/header.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index e3cdc3b7b4ab..d7f6ff6974aa 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2559,7 +2559,6 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) int cpu_nr = ff->ph->env.nr_cpus_avail; u64 size = 0; struct perf_header *ph = ff->ph; - bool do_core_id_test = true; ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu)); if (!ph->env.cpu) @@ -2614,15 +2613,6 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) return 0; } - /* On s390 the socket_id number is not related to the numbers of cpus. - * The socket_id number might be higher than the numbers of cpus. - * This depends on the configuration. - * AArch64 is the same. - */ - if (ph->env.arch && (!strncmp(ph->env.arch, "s390", 4) - || !strncmp(ph->env.arch, "aarch64", 7))) - do_core_id_test = false; - for (i = 0; i < (u32)cpu_nr; i++) { if (do_read_u32(ff, &nr)) goto free_cpu; @@ -2633,12 +2623,6 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) if (do_read_u32(ff, &nr)) goto free_cpu; - if (do_core_id_test && nr != (u32)-1 && nr > (u32)cpu_nr) { - pr_debug("socket_id number is too big." - "You may need to upgrade the perf tool.\n"); - goto free_cpu; - } - ph->env.cpu[i].socket_id = nr; size += sizeof(u32); } From 1d0654b7fdc5431b85035f6e76b4bc57679575d8 Mon Sep 17 00:00:00 2001 From: Blake Jones Date: Thu, 12 Jun 2025 12:49:35 -0700 Subject: [PATCH 0248/2411] perf build: detect support for libbpf's emit_strings option This creates a config option that detects libbpf's ability to display character arrays as strings, which was just added to the BPF tree (https://git.kernel.org/bpf/bpf-next/c/87c9c79a02b4). To test this change, I built perf (from later in this patch set) with: - static libbpf (default, using source from kernel tree) - dynamic libbpf (LIBBPF_DYNAMIC=1 LIBBPF_INCLUDE=/usr/local/include) For both the static and dynamic versions, I used headers with and without the ".emit_strings" option. I verified that of the four resulting binaries, the two with ".emit_strings" would successfully record BPF_METADATA events, and the two without wouldn't. All four binaries would successfully display BPF_METADATA events, because the relevant bit of libbpf code is only used during "perf record". Signed-off-by: Blake Jones Link: https://lore.kernel.org/r/20250612194939.162730-2-blakejones@google.com Signed-off-by: Namhyung Kim --- tools/build/Makefile.feature | 1 + tools/build/feature/Makefile | 4 ++++ tools/build/feature/test-libbpf-strings.c | 10 ++++++++++ tools/perf/Documentation/perf-check.txt | 1 + tools/perf/Makefile.config | 8 ++++++++ tools/perf/builtin-check.c | 1 + 6 files changed, 25 insertions(+) create mode 100644 tools/build/feature/test-libbpf-strings.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 3a1fddd38db0..2e5f4c8b6547 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -126,6 +126,7 @@ FEATURE_TESTS_EXTRA := \ llvm \ clang \ libbpf \ + libbpf-strings \ libpfm4 \ libdebuginfod \ clang-bpf-co-re \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 4aa166d3eab6..0c4e541ed56e 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -59,6 +59,7 @@ FILES= \ test-lzma.bin \ test-bpf.bin \ test-libbpf.bin \ + test-libbpf-strings.bin \ test-get_cpuid.bin \ test-sdt.bin \ test-cxx.bin \ @@ -339,6 +340,9 @@ $(OUTPUT)test-bpf.bin: $(OUTPUT)test-libbpf.bin: $(BUILD) -lbpf +$(OUTPUT)test-libbpf-strings.bin: + $(BUILD) + $(OUTPUT)test-sdt.bin: $(BUILD) diff --git a/tools/build/feature/test-libbpf-strings.c b/tools/build/feature/test-libbpf-strings.c new file mode 100644 index 000000000000..83e6c45f5c85 --- /dev/null +++ b/tools/build/feature/test-libbpf-strings.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +int main(void) +{ + struct btf_dump_type_data_opts opts; + + opts.emit_strings = 0; + return opts.emit_strings; +} diff --git a/tools/perf/Documentation/perf-check.txt b/tools/perf/Documentation/perf-check.txt index a764a4629220..799982d8d868 100644 --- a/tools/perf/Documentation/perf-check.txt +++ b/tools/perf/Documentation/perf-check.txt @@ -52,6 +52,7 @@ feature:: dwarf-unwind / HAVE_DWARF_UNWIND_SUPPORT auxtrace / HAVE_AUXTRACE_SUPPORT libbfd / HAVE_LIBBFD_SUPPORT + libbpf-strings / HAVE_LIBBPF_STRINGS_SUPPORT libcapstone / HAVE_LIBCAPSTONE_SUPPORT libcrypto / HAVE_LIBCRYPTO_SUPPORT libdw-dwarf-unwind / HAVE_LIBDW_SUPPORT diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index d1ea7bf44964..affe5e173920 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -595,8 +595,16 @@ ifndef NO_LIBELF LIBBPF_STATIC := 1 $(call detected,CONFIG_LIBBPF) CFLAGS += -DHAVE_LIBBPF_SUPPORT + LIBBPF_INCLUDE = $(LIBBPF_DIR)/.. endif endif + + FEATURE_CHECK_CFLAGS-libbpf-strings="-I$(LIBBPF_INCLUDE)" + $(call feature_check,libbpf-strings) + ifeq ($(feature-libbpf-strings), 1) + $(call detected,CONFIG_LIBBPF_STRINGS) + CFLAGS += -DHAVE_LIBBPF_STRINGS_SUPPORT + endif endif endif # NO_LIBBPF endif # NO_LIBELF diff --git a/tools/perf/builtin-check.c b/tools/perf/builtin-check.c index 9a509cb3bb9a..f4827f0ddb47 100644 --- a/tools/perf/builtin-check.c +++ b/tools/perf/builtin-check.c @@ -43,6 +43,7 @@ struct feature_status supported_features[] = { FEATURE_STATUS("dwarf-unwind", HAVE_DWARF_UNWIND_SUPPORT), FEATURE_STATUS("auxtrace", HAVE_AUXTRACE_SUPPORT), FEATURE_STATUS_TIP("libbfd", HAVE_LIBBFD_SUPPORT, "Deprecated, license incompatibility, use BUILD_NONDISTRO=1 and install binutils-dev[el]"), + FEATURE_STATUS("libbpf-strings", HAVE_LIBBPF_STRINGS_SUPPORT), FEATURE_STATUS("libcapstone", HAVE_LIBCAPSTONE_SUPPORT), FEATURE_STATUS("libcrypto", HAVE_LIBCRYPTO_SUPPORT), FEATURE_STATUS("libdw-dwarf-unwind", HAVE_LIBDW_SUPPORT), From ab38e84ba9a80581e055408e0f8c0158998fa4b9 Mon Sep 17 00:00:00 2001 From: Blake Jones Date: Thu, 12 Jun 2025 12:49:36 -0700 Subject: [PATCH 0249/2411] perf record: collect BPF metadata from existing BPF programs Look for .rodata maps, find ones with 'bpf_metadata_' variables, extract their values as strings, and create a new PERF_RECORD_BPF_METADATA synthetic event using that data. The code gets invoked from the existing routine perf_event__synthesize_one_bpf_prog(). For example, a BPF program with the following variables: const char bpf_metadata_version[] SEC(".rodata") = "3.14159"; int bpf_metadata_value[] SEC(".rodata") = 42; would generate a PERF_RECORD_BPF_METADATA record with: .prog_name = .nr_entries = 2 .entries[0].key = "version" .entries[0].value = "3.14159" .entries[1].key = "value" .entries[1].value = "42" Each of the BPF programs and subprograms that share those variables would get a distinct PERF_RECORD_BPF_METADATA record, with the ".prog_name" showing the name of each program or subprogram. The prog_name is deliberately the same as the ".name" field in the corresponding PERF_RECORD_KSYMBOL record. This code only gets invoked if support for displaying BTF char arrays as strings is detected. Signed-off-by: Blake Jones Link: https://lore.kernel.org/r/20250612194939.162730-3-blakejones@google.com Signed-off-by: Namhyung Kim --- tools/lib/perf/include/perf/event.h | 18 ++ tools/perf/util/bpf-event.c | 332 ++++++++++++++++++++++++++++ tools/perf/util/bpf-event.h | 12 + 3 files changed, 362 insertions(+) diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 09b7c643ddac..6608f1e3701b 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -467,6 +467,22 @@ struct perf_record_compressed2 { char data[]; }; +#define BPF_METADATA_KEY_LEN 64 +#define BPF_METADATA_VALUE_LEN 256 +#define BPF_PROG_NAME_LEN KSYM_NAME_LEN + +struct perf_record_bpf_metadata_entry { + char key[BPF_METADATA_KEY_LEN]; + char value[BPF_METADATA_VALUE_LEN]; +}; + +struct perf_record_bpf_metadata { + struct perf_event_header header; + char prog_name[BPF_PROG_NAME_LEN]; + __u64 nr_entries; + struct perf_record_bpf_metadata_entry entries[]; +}; + enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_USER_TYPE_START = 64, PERF_RECORD_HEADER_ATTR = 64, @@ -489,6 +505,7 @@ enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_COMPRESSED = 81, PERF_RECORD_FINISHED_INIT = 82, PERF_RECORD_COMPRESSED2 = 83, + PERF_RECORD_BPF_METADATA = 84, PERF_RECORD_HEADER_MAX }; @@ -530,6 +547,7 @@ union perf_event { struct perf_record_header_feature feat; struct perf_record_compressed pack; struct perf_record_compressed2 pack2; + struct perf_record_bpf_metadata bpf_metadata; }; #endif /* __LIBPERF_EVENT_H */ diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index c81444059ad0..1f6e76ee6024 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -1,13 +1,21 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include +#include +#include #include +#include #include #include #include +#include #include #include +#include #include +#include #include +#include #include #include "bpf-event.h" #include "bpf-utils.h" @@ -151,6 +159,319 @@ static int synthesize_bpf_prog_name(char *buf, int size, return name_len; } +#ifdef HAVE_LIBBPF_STRINGS_SUPPORT + +#define BPF_METADATA_PREFIX "bpf_metadata_" +#define BPF_METADATA_PREFIX_LEN (sizeof(BPF_METADATA_PREFIX) - 1) + +static bool name_has_bpf_metadata_prefix(const char **s) +{ + if (strncmp(*s, BPF_METADATA_PREFIX, BPF_METADATA_PREFIX_LEN) != 0) + return false; + *s += BPF_METADATA_PREFIX_LEN; + return true; +} + +struct bpf_metadata_map { + struct btf *btf; + const struct btf_type *datasec; + void *rodata; + size_t rodata_size; + unsigned int num_vars; +}; + +static int bpf_metadata_read_map_data(__u32 map_id, struct bpf_metadata_map *map) +{ + int map_fd; + struct bpf_map_info map_info; + __u32 map_info_len; + int key; + struct btf *btf; + const struct btf_type *datasec; + struct btf_var_secinfo *vsi; + unsigned int vlen, vars; + void *rodata; + + map_fd = bpf_map_get_fd_by_id(map_id); + if (map_fd < 0) + return -1; + + memset(&map_info, 0, sizeof(map_info)); + map_info_len = sizeof(map_info); + if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len) < 0) + goto out_close; + + /* If it's not an .rodata map, don't bother. */ + if (map_info.type != BPF_MAP_TYPE_ARRAY || + map_info.key_size != sizeof(int) || + map_info.max_entries != 1 || + !map_info.btf_value_type_id || + !strstr(map_info.name, ".rodata")) { + goto out_close; + } + + btf = btf__load_from_kernel_by_id(map_info.btf_id); + if (!btf) + goto out_close; + datasec = btf__type_by_id(btf, map_info.btf_value_type_id); + if (!btf_is_datasec(datasec)) + goto out_free_btf; + + /* + * If there aren't any variables with the "bpf_metadata_" prefix, + * don't bother. + */ + vlen = btf_vlen(datasec); + vsi = btf_var_secinfos(datasec); + vars = 0; + for (unsigned int i = 0; i < vlen; i++, vsi++) { + const struct btf_type *t_var = btf__type_by_id(btf, vsi->type); + const char *name = btf__name_by_offset(btf, t_var->name_off); + + if (name_has_bpf_metadata_prefix(&name)) + vars++; + } + if (vars == 0) + goto out_free_btf; + + rodata = zalloc(map_info.value_size); + if (!rodata) + goto out_free_btf; + key = 0; + if (bpf_map_lookup_elem(map_fd, &key, rodata)) { + free(rodata); + goto out_free_btf; + } + close(map_fd); + + map->btf = btf; + map->datasec = datasec; + map->rodata = rodata; + map->rodata_size = map_info.value_size; + map->num_vars = vars; + return 0; + +out_free_btf: + btf__free(btf); +out_close: + close(map_fd); + return -1; +} + +struct format_btf_ctx { + char *buf; + size_t buf_size; + size_t buf_idx; +}; + +static void format_btf_cb(void *arg, const char *fmt, va_list ap) +{ + int n; + struct format_btf_ctx *ctx = (struct format_btf_ctx *)arg; + + n = vsnprintf(ctx->buf + ctx->buf_idx, ctx->buf_size - ctx->buf_idx, + fmt, ap); + ctx->buf_idx += n; + if (ctx->buf_idx >= ctx->buf_size) + ctx->buf_idx = ctx->buf_size; +} + +static void format_btf_variable(struct btf *btf, char *buf, size_t buf_size, + const struct btf_type *t, const void *btf_data) +{ + struct format_btf_ctx ctx = { + .buf = buf, + .buf_idx = 0, + .buf_size = buf_size, + }; + const struct btf_dump_type_data_opts opts = { + .sz = sizeof(struct btf_dump_type_data_opts), + .skip_names = 1, + .compact = 1, + .emit_strings = 1, + }; + struct btf_dump *d; + size_t btf_size; + + d = btf_dump__new(btf, format_btf_cb, &ctx, NULL); + btf_size = btf__resolve_size(btf, t->type); + btf_dump__dump_type_data(d, t->type, btf_data, btf_size, &opts); + btf_dump__free(d); +} + +static void bpf_metadata_fill_event(struct bpf_metadata_map *map, + struct perf_record_bpf_metadata *bpf_metadata_event) +{ + struct btf_var_secinfo *vsi; + unsigned int i, vlen; + + memset(bpf_metadata_event->prog_name, 0, BPF_PROG_NAME_LEN); + vlen = btf_vlen(map->datasec); + vsi = btf_var_secinfos(map->datasec); + + for (i = 0; i < vlen; i++, vsi++) { + const struct btf_type *t_var = btf__type_by_id(map->btf, + vsi->type); + const char *name = btf__name_by_offset(map->btf, + t_var->name_off); + const __u64 nr_entries = bpf_metadata_event->nr_entries; + struct perf_record_bpf_metadata_entry *entry; + + if (!name_has_bpf_metadata_prefix(&name)) + continue; + + if (nr_entries >= (__u64)map->num_vars) + break; + + entry = &bpf_metadata_event->entries[nr_entries]; + memset(entry, 0, sizeof(*entry)); + snprintf(entry->key, BPF_METADATA_KEY_LEN, "%s", name); + format_btf_variable(map->btf, entry->value, + BPF_METADATA_VALUE_LEN, t_var, + map->rodata + vsi->offset); + bpf_metadata_event->nr_entries++; + } +} + +static void bpf_metadata_free_map_data(struct bpf_metadata_map *map) +{ + btf__free(map->btf); + free(map->rodata); +} + +static struct bpf_metadata *bpf_metadata_alloc(__u32 nr_prog_tags, + __u32 nr_variables) +{ + struct bpf_metadata *metadata; + size_t event_size; + + metadata = zalloc(sizeof(struct bpf_metadata)); + if (!metadata) + return NULL; + + metadata->prog_names = zalloc(nr_prog_tags * sizeof(char *)); + if (!metadata->prog_names) { + bpf_metadata_free(metadata); + return NULL; + } + for (__u32 prog_index = 0; prog_index < nr_prog_tags; prog_index++) { + metadata->prog_names[prog_index] = zalloc(BPF_PROG_NAME_LEN); + if (!metadata->prog_names[prog_index]) { + bpf_metadata_free(metadata); + return NULL; + } + metadata->nr_prog_names++; + } + + event_size = sizeof(metadata->event->bpf_metadata) + + nr_variables * sizeof(metadata->event->bpf_metadata.entries[0]); + metadata->event = zalloc(event_size); + if (!metadata->event) { + bpf_metadata_free(metadata); + return NULL; + } + metadata->event->bpf_metadata = (struct perf_record_bpf_metadata) { + .header = { + .type = PERF_RECORD_BPF_METADATA, + .size = event_size, + }, + .nr_entries = 0, + }; + + return metadata; +} + +static struct bpf_metadata *bpf_metadata_create(struct bpf_prog_info *info) +{ + struct bpf_metadata *metadata; + const __u32 *map_ids = (__u32 *)(uintptr_t)info->map_ids; + + for (__u32 map_index = 0; map_index < info->nr_map_ids; map_index++) { + struct bpf_metadata_map map; + + if (bpf_metadata_read_map_data(map_ids[map_index], &map) != 0) + continue; + + metadata = bpf_metadata_alloc(info->nr_prog_tags, map.num_vars); + if (!metadata) + continue; + + bpf_metadata_fill_event(&map, &metadata->event->bpf_metadata); + + for (__u32 index = 0; index < info->nr_prog_tags; index++) { + synthesize_bpf_prog_name(metadata->prog_names[index], + BPF_PROG_NAME_LEN, info, + map.btf, index); + } + + bpf_metadata_free_map_data(&map); + + return metadata; + } + + return NULL; +} + +static int synthesize_perf_record_bpf_metadata(const struct bpf_metadata *metadata, + const struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + const size_t event_size = metadata->event->header.size; + union perf_event *event; + int err = 0; + + event = zalloc(event_size + machine->id_hdr_size); + if (!event) + return -1; + memcpy(event, metadata->event, event_size); + memset((void *)event + event->header.size, 0, machine->id_hdr_size); + event->header.size += machine->id_hdr_size; + for (__u32 index = 0; index < metadata->nr_prog_names; index++) { + memcpy(event->bpf_metadata.prog_name, + metadata->prog_names[index], BPF_PROG_NAME_LEN); + err = perf_tool__process_synth_event(tool, event, machine, + process); + if (err != 0) + break; + } + + free(event); + return err; +} + +void bpf_metadata_free(struct bpf_metadata *metadata) +{ + if (metadata == NULL) + return; + for (__u32 index = 0; index < metadata->nr_prog_names; index++) + free(metadata->prog_names[index]); + free(metadata->prog_names); + free(metadata->event); + free(metadata); +} + +#else /* HAVE_LIBBPF_STRINGS_SUPPORT */ + +static struct bpf_metadata *bpf_metadata_create(struct bpf_prog_info *info __maybe_unused) +{ + return NULL; +} + +static int synthesize_perf_record_bpf_metadata(const struct bpf_metadata *metadata __maybe_unused, + const struct perf_tool *tool __maybe_unused, + perf_event__handler_t process __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + +void bpf_metadata_free(struct bpf_metadata *metadata __maybe_unused) +{ +} + +#endif /* HAVE_LIBBPF_STRINGS_SUPPORT */ + /* * Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf * program. One PERF_RECORD_BPF_EVENT is generated for the program. And @@ -173,6 +494,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, const struct perf_tool *tool = session->tool; struct bpf_prog_info_node *info_node; struct perf_bpil *info_linear; + struct bpf_metadata *metadata; struct bpf_prog_info *info; struct btf *btf = NULL; struct perf_env *env; @@ -193,6 +515,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, arrays |= 1UL << PERF_BPIL_JITED_INSNS; arrays |= 1UL << PERF_BPIL_LINE_INFO; arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO; + arrays |= 1UL << PERF_BPIL_MAP_IDS; info_linear = get_bpf_prog_info_linear(fd, arrays); if (IS_ERR_OR_NULL(info_linear)) { @@ -301,6 +624,15 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, */ err = perf_tool__process_synth_event(tool, event, machine, process); + + /* Synthesize PERF_RECORD_BPF_METADATA */ + metadata = bpf_metadata_create(info); + if (metadata != NULL) { + err = synthesize_perf_record_bpf_metadata(metadata, + tool, process, + machine); + bpf_metadata_free(metadata); + } } out: diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index e2f0420905f5..ef2dd3f1619e 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -17,6 +17,12 @@ struct record_opts; struct evlist; struct target; +struct bpf_metadata { + union perf_event *event; + char **prog_names; + __u64 nr_prog_names; +}; + struct bpf_prog_info_node { struct perf_bpil *info_linear; struct rb_node rb_node; @@ -36,6 +42,7 @@ int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env); void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, struct perf_env *env, FILE *fp); +void bpf_metadata_free(struct bpf_metadata *metadata); #else static inline int machine__process_bpf(struct machine *machine __maybe_unused, union perf_event *event __maybe_unused, @@ -55,6 +62,11 @@ static inline void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info _ FILE *fp __maybe_unused) { +} + +static inline void bpf_metadata_free(struct bpf_metadata *metadata __maybe_unused) +{ + } #endif // HAVE_LIBBPF_SUPPORT #endif From fdc3441f2d317b40ace0936ee040a6c895d60014 Mon Sep 17 00:00:00 2001 From: Blake Jones Date: Thu, 12 Jun 2025 12:49:37 -0700 Subject: [PATCH 0250/2411] perf record: collect BPF metadata from new programs This collects metadata for any BPF programs that were loaded during a "perf record" run, and emits it at the end of the run. Signed-off-by: Blake Jones Link: https://lore.kernel.org/r/20250612194939.162730-4-blakejones@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-record.c | 10 +++++++ tools/perf/util/bpf-event.c | 46 ++++++++++++++++++++++++++++++ tools/perf/util/bpf-event.h | 1 + tools/perf/util/env.c | 19 +++++++++++- tools/perf/util/env.h | 6 ++++ tools/perf/util/header.c | 1 + tools/perf/util/synthetic-events.h | 2 ++ 7 files changed, 84 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0b566f300569..53971b9de3ba 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2162,6 +2162,14 @@ static int record__synthesize(struct record *rec, bool tail) return err; } +static void record__synthesize_final_bpf_metadata(struct record *rec __maybe_unused) +{ +#ifdef HAVE_LIBBPF_SUPPORT + perf_event__synthesize_final_bpf_metadata(rec->session, + process_synthesized_event); +#endif +} + static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) { struct record *rec = data; @@ -2807,6 +2815,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) trigger_off(&auxtrace_snapshot_trigger); trigger_off(&switch_output_trigger); + record__synthesize_final_bpf_metadata(rec); + if (opts->auxtrace_snapshot_on_exit) record__auxtrace_snapshot_exit(rec); diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 1f6e76ee6024..dc09a4730c50 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -472,6 +472,49 @@ void bpf_metadata_free(struct bpf_metadata *metadata __maybe_unused) #endif /* HAVE_LIBBPF_STRINGS_SUPPORT */ +struct bpf_metadata_final_ctx { + const struct perf_tool *tool; + perf_event__handler_t process; + struct machine *machine; +}; + +static void synthesize_final_bpf_metadata_cb(struct bpf_prog_info_node *node, + void *data) +{ + struct bpf_metadata_final_ctx *ctx = (struct bpf_metadata_final_ctx *)data; + struct bpf_metadata *metadata = node->metadata; + int err; + + if (metadata == NULL) + return; + err = synthesize_perf_record_bpf_metadata(metadata, ctx->tool, + ctx->process, ctx->machine); + if (err != 0) { + const char *prog_name = metadata->prog_names[0]; + + if (prog_name != NULL) + pr_warning("Couldn't synthesize final BPF metadata for %s.\n", prog_name); + else + pr_warning("Couldn't synthesize final BPF metadata.\n"); + } + bpf_metadata_free(metadata); + node->metadata = NULL; +} + +void perf_event__synthesize_final_bpf_metadata(struct perf_session *session, + perf_event__handler_t process) +{ + struct perf_env *env = &session->header.env; + struct bpf_metadata_final_ctx ctx = { + .tool = session->tool, + .process = process, + .machine = &session->machines.host, + }; + + perf_env__iterate_bpf_prog_info(env, synthesize_final_bpf_metadata_cb, + &ctx); +} + /* * Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf * program. One PERF_RECORD_BPF_EVENT is generated for the program. And @@ -612,6 +655,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, } info_node->info_linear = info_linear; + info_node->metadata = NULL; if (!perf_env__insert_bpf_prog_info(env, info_node)) { free(info_linear); free(info_node); @@ -803,6 +847,7 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) arrays |= 1UL << PERF_BPIL_JITED_INSNS; arrays |= 1UL << PERF_BPIL_LINE_INFO; arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO; + arrays |= 1UL << PERF_BPIL_MAP_IDS; info_linear = get_bpf_prog_info_linear(fd, arrays); if (IS_ERR_OR_NULL(info_linear)) { @@ -815,6 +860,7 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) info_node = malloc(sizeof(struct bpf_prog_info_node)); if (info_node) { info_node->info_linear = info_linear; + info_node->metadata = bpf_metadata_create(&info_linear->info); if (!perf_env__insert_bpf_prog_info(env, info_node)) { free(info_linear); free(info_node); diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index ef2dd3f1619e..60d2c6637af5 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -25,6 +25,7 @@ struct bpf_metadata { struct bpf_prog_info_node { struct perf_bpil *info_linear; + struct bpf_metadata *metadata; struct rb_node rb_node; }; diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 36411749e007..05a4f2657d72 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -3,8 +3,10 @@ #include "debug.h" #include "env.h" #include "util/header.h" -#include "linux/compiler.h" +#include "util/rwsem.h" +#include #include +#include #include #include #include "cgroup.h" @@ -89,6 +91,20 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, return node; } +void perf_env__iterate_bpf_prog_info(struct perf_env *env, + void (*cb)(struct bpf_prog_info_node *node, + void *data), + void *data) +{ + struct rb_node *first; + + down_read(&env->bpf_progs.lock); + first = rb_first(&env->bpf_progs.infos); + for (struct rb_node *node = first; node != NULL; node = rb_next(node)) + (*cb)(rb_entry(node, struct bpf_prog_info_node, rb_node), data); + up_read(&env->bpf_progs.lock); +} + bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) { bool ret; @@ -174,6 +190,7 @@ static void perf_env__purge_bpf(struct perf_env *env) next = rb_next(&node->rb_node); rb_erase(&node->rb_node, root); zfree(&node->info_linear); + bpf_metadata_free(node->metadata); free(node); } diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index d90e343cf1fa..c90c1d717e73 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -174,16 +174,22 @@ const char *perf_env__raw_arch(struct perf_env *env); int perf_env__nr_cpus_avail(struct perf_env *env); void perf_env__init(struct perf_env *env); +#ifdef HAVE_LIBBPF_SUPPORT bool __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node); bool perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node); struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, __u32 prog_id); +void perf_env__iterate_bpf_prog_info(struct perf_env *env, + void (*cb)(struct bpf_prog_info_node *node, + void *data), + void *data); bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id); +#endif // HAVE_LIBBPF_SUPPORT int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu); char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index d7f6ff6974aa..2dea35237e81 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3145,6 +3145,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) /* after reading from file, translate offset to address */ bpil_offs_to_addr(info_linear); info_node->info_linear = info_linear; + info_node->metadata = NULL; if (!__perf_env__insert_bpf_prog_info(env, info_node)) { free(info_linear); free(info_node); diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h index b9c936b5cfeb..ee29615d68e5 100644 --- a/tools/perf/util/synthetic-events.h +++ b/tools/perf/util/synthetic-events.h @@ -92,6 +92,8 @@ int perf_event__synthesize_threads(const struct perf_tool *tool, perf_event__han int perf_event__synthesize_tracing_data(const struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process); int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); pid_t perf_event__synthesize_comm(const struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine); +void perf_event__synthesize_final_bpf_metadata(struct perf_session *session, + perf_event__handler_t process); int perf_tool__process_synth_event(const struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process); From f19860ea9477f5ac33775cc0a602c7d54188c00a Mon Sep 17 00:00:00 2001 From: Blake Jones Date: Thu, 12 Jun 2025 12:49:38 -0700 Subject: [PATCH 0251/2411] perf tools: display the new PERF_RECORD_BPF_METADATA event Here's some example "perf script -D" output for the new event type. The ": unhandled!" message is from tool.c, analogous to other behavior there. I've elided some rows with all NUL characters for brevity, and I wrapped one of the >75-column lines to fit in the commit guidelines. 0x50fc8@perf.data [0x260]: event: 84 . . ... raw event: size 608 bytes . 0000: 54 00 00 00 00 00 60 02 62 70 66 5f 70 72 6f 67 T.....`.bpf_prog . 0010: 5f 31 65 30 61 32 65 33 36 36 65 35 36 66 31 61 _1e0a2e366e56f1a . 0020: 32 5f 70 65 72 66 5f 73 61 6d 70 6c 65 5f 66 69 2_perf_sample_fi . 0030: 6c 74 65 72 00 00 00 00 00 00 00 00 00 00 00 00 lter............ . 0040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [...] . 0110: 74 65 73 74 5f 76 61 6c 75 65 00 00 00 00 00 00 test_value...... . 0120: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [...] . 0150: 34 32 00 00 00 00 00 00 00 00 00 00 00 00 00 00 42.............. . 0160: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [...] 0 0x50fc8 [0x260]: PERF_RECORD_BPF_METADATA \ prog bpf_prog_1e0a2e366e56f1a2_perf_sample_filter entry 0: test_value = 42 : unhandled! Signed-off-by: Blake Jones Link: https://lore.kernel.org/r/20250612194939.162730-5-blakejones@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-inject.c | 1 + tools/perf/builtin-script.c | 15 +++++++++++++-- tools/perf/util/event.c | 21 +++++++++++++++++++++ tools/perf/util/event.h | 1 + tools/perf/util/session.c | 4 ++++ tools/perf/util/tool.c | 14 ++++++++++++++ tools/perf/util/tool.h | 3 ++- 7 files changed, 56 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 11e49cafa3af..b15eac0716f7 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -2530,6 +2530,7 @@ int cmd_inject(int argc, const char **argv) inject.tool.finished_init = perf_event__repipe_op2_synth; inject.tool.compressed = perf_event__repipe_op4_synth; inject.tool.auxtrace = perf_event__repipe_auxtrace; + inject.tool.bpf_metadata = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.session = __perf_session__new(&data, &inject.tool, /*trace_event_repipe=*/inject.output.is_pipe); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 6c3bf74dd78c..4001e621b6cb 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -38,6 +38,7 @@ #include "print_insn.h" #include "archinsn.h" #include +#include #include #include #include @@ -50,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -2755,6 +2757,14 @@ process_bpf_events(const struct perf_tool *tool __maybe_unused, sample->tid); } +static int +process_bpf_metadata_event(struct perf_session *session __maybe_unused, + union perf_event *event) +{ + perf_event__fprintf(event, NULL, stdout); + return 0; +} + static int process_text_poke_events(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -2877,8 +2887,9 @@ static int __cmd_script(struct perf_script *script) script->tool.finished_round = process_finished_round_event; } if (script->show_bpf_events) { - script->tool.ksymbol = process_bpf_events; - script->tool.bpf = process_bpf_events; + script->tool.ksymbol = process_bpf_events; + script->tool.bpf = process_bpf_events; + script->tool.bpf_metadata = process_bpf_metadata_event; } if (script->show_text_poke_events) { script->tool.ksymbol = process_bpf_events; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 7544a3104e21..14b0d3689137 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,9 +1,12 @@ #include #include #include +#include #include #include #include +#include +#include #include #include #include @@ -78,6 +81,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_COMPRESSED] = "COMPRESSED", [PERF_RECORD_FINISHED_INIT] = "FINISHED_INIT", [PERF_RECORD_COMPRESSED2] = "COMPRESSED2", + [PERF_RECORD_BPF_METADATA] = "BPF_METADATA", }; const char *perf_event__name(unsigned int id) @@ -505,6 +509,20 @@ size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp) event->bpf.type, event->bpf.flags, event->bpf.id); } +size_t perf_event__fprintf_bpf_metadata(union perf_event *event, FILE *fp) +{ + struct perf_record_bpf_metadata *metadata = &event->bpf_metadata; + size_t ret; + + ret = fprintf(fp, " prog %s\n", metadata->prog_name); + for (__u32 i = 0; i < metadata->nr_entries; i++) { + ret += fprintf(fp, " entry %d: %20s = %s\n", i, + metadata->entries[i].key, + metadata->entries[i].value); + } + return ret; +} + static int text_poke_printer(enum binary_printer_ops op, unsigned int val, void *extra, FILE *fp) { @@ -602,6 +620,9 @@ size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FIL case PERF_RECORD_AUX_OUTPUT_HW_ID: ret += perf_event__fprintf_aux_output_hw_id(event, fp); break; + case PERF_RECORD_BPF_METADATA: + ret += perf_event__fprintf_bpf_metadata(event, fp); + break; default: ret += fprintf(fp, "\n"); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 664bf39567ce..67ad4a2014bc 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -370,6 +370,7 @@ size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp); size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp); size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp); size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_bpf_metadata(union perf_event *event, FILE *fp); size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine,FILE *fp); size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a320672c264e..38075059086c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "map_symbol.h" #include "branch.h" @@ -1491,6 +1492,9 @@ static s64 perf_session__process_user_event(struct perf_session *session, case PERF_RECORD_FINISHED_INIT: err = tool->finished_init(session, event); break; + case PERF_RECORD_BPF_METADATA: + err = tool->bpf_metadata(session, event); + break; default: err = -EINVAL; break; diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c index 37bd8ac63b01..204ec03071bc 100644 --- a/tools/perf/util/tool.c +++ b/tools/perf/util/tool.c @@ -1,12 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 #include "data.h" #include "debug.h" +#include "event.h" #include "header.h" #include "session.h" #include "stat.h" #include "tool.h" #include "tsc.h" +#include #include +#include #include #ifdef HAVE_ZSTD_SUPPORT @@ -237,6 +240,16 @@ static int perf_session__process_compressed_event_stub(struct perf_session *sess return 0; } +static int perf_event__process_bpf_metadata_stub(struct perf_session *perf_session __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_bpf_metadata(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + void perf_tool__init(struct perf_tool *tool, bool ordered_events) { tool->ordered_events = ordered_events; @@ -293,6 +306,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events) tool->compressed = perf_session__process_compressed_event_stub; #endif tool->finished_init = process_event_op2_stub; + tool->bpf_metadata = perf_event__process_bpf_metadata_stub; } bool perf_tool__compressed_is_stub(const struct perf_tool *tool) diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index db1c7642b0d1..18b76ff0f26a 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -77,7 +77,8 @@ struct perf_tool { stat, stat_round, feature, - finished_init; + finished_init, + bpf_metadata; event_op4 compressed; event_op3 auxtrace; bool ordered_events; From edf2cadf01e8f2620af25b337d15ebc584911b46 Mon Sep 17 00:00:00 2001 From: Blake Jones Date: Thu, 12 Jun 2025 12:49:39 -0700 Subject: [PATCH 0252/2411] perf test: add test for BPF metadata collection This is an end-to-end test for the PERF_RECORD_BPF_METADATA support. It adds a new "bpf_metadata_perf_version" variable to perf's BPF programs, so that when they are loaded, there will be at least one BPF program with some metadata to parse. The test invokes "perf record" in a way that loads one of those BPF programs, and then sifts through the output to find its BPF metadata. Signed-off-by: Blake Jones Link: https://lore.kernel.org/r/20250612194939.162730-6-blakejones@google.com Signed-off-by: Namhyung Kim --- tools/perf/Makefile.perf | 3 +- tools/perf/tests/shell/test_bpf_metadata.sh | 76 +++++++++++++++++++++ tools/perf/util/bpf_skel/perf_version.h | 17 +++++ 3 files changed, 95 insertions(+), 1 deletion(-) create mode 100755 tools/perf/tests/shell/test_bpf_metadata.sh create mode 100644 tools/perf/util/bpf_skel/perf_version.h diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index d4c7031b01a7..4f292edeca5a 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1250,8 +1250,9 @@ else $(Q)cp "$(VMLINUX_H)" $@ endif -$(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) $(SKEL_OUT)/vmlinux.h | $(SKEL_TMP_OUT) +$(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(OUTPUT)PERF-VERSION-FILE util/bpf_skel/perf_version.h $(LIBBPF) $(SKEL_OUT)/vmlinux.h | $(SKEL_TMP_OUT) $(QUIET_CLANG)$(CLANG) -g -O2 --target=bpf $(CLANG_OPTIONS) $(BPF_INCLUDE) $(TOOLS_UAPI_INCLUDE) \ + -include $(OUTPUT)PERF-VERSION-FILE -include util/bpf_skel/perf_version.h \ -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ $(SKEL_OUT)/%.skel.h: $(SKEL_TMP_OUT)/%.bpf.o | $(BPFTOOL) diff --git a/tools/perf/tests/shell/test_bpf_metadata.sh b/tools/perf/tests/shell/test_bpf_metadata.sh new file mode 100755 index 000000000000..11df592fb661 --- /dev/null +++ b/tools/perf/tests/shell/test_bpf_metadata.sh @@ -0,0 +1,76 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# BPF metadata collection test. + +set -e + +err=0 +perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) + +cleanup() { + rm -f "${perfdata}" + rm -f "${perfdata}".old + trap - EXIT TERM INT +} + +trap_cleanup() { + cleanup + exit 1 +} +trap trap_cleanup EXIT TERM INT + +test_bpf_metadata() { + echo "Checking BPF metadata collection" + + if ! perf check -q feature libbpf-strings ; then + echo "Basic BPF metadata test [skipping - not supported]" + err=0 + return + fi + + # This is a basic invocation of perf record + # that invokes the perf_sample_filter BPF program. + if ! perf record -e task-clock --filter 'ip > 0' \ + -o "${perfdata}" sleep 1 2> /dev/null + then + echo "Basic BPF metadata test [Failed record]" + err=1 + return + fi + + # The BPF programs that ship with "perf" all have the following + # variable defined at compile time: + # + # const char bpf_metadata_perf_version[] SEC(".rodata") = <...>; + # + # This invocation looks for a PERF_RECORD_BPF_METADATA event, + # and checks that its content contains the string given by + # "perf version". + VERS=$(perf version | awk '{print $NF}') + if ! perf script --show-bpf-events -i "${perfdata}" | awk ' + /PERF_RECORD_BPF_METADATA.*perf_sample_filter/ { + header = 1; + } + /^ *entry/ { + if (header) { header = 0; entry = 1; } + } + $0 !~ /^ *entry/ { + entry = 0; + } + /perf_version/ { + if (entry) print $NF; + } + ' | egrep "$VERS" > /dev/null + then + echo "Basic BPF metadata test [Failed invalid output]" + err=1 + return + fi + echo "Basic BPF metadata test [Success]" +} + +test_bpf_metadata + +cleanup +exit $err diff --git a/tools/perf/util/bpf_skel/perf_version.h b/tools/perf/util/bpf_skel/perf_version.h new file mode 100644 index 000000000000..1ed5b2e59bf5 --- /dev/null +++ b/tools/perf/util/bpf_skel/perf_version.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef __PERF_VERSION_H__ +#define __PERF_VERSION_H__ + +#include "vmlinux.h" +#include + +/* + * This is used by tests/shell/record_bpf_metadata.sh + * to verify that BPF metadata generation works. + * + * PERF_VERSION is defined by a build rule at compile time. + */ +const char bpf_metadata_perf_version[] SEC(".rodata") = PERF_VERSION; + +#endif /* __PERF_VERSION_H__ */ From ceed13630489fb9afbaa1326d2adc793d91fa48b Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Sun, 8 Jun 2025 22:40:01 +0200 Subject: [PATCH 0253/2411] regulator: act8865-regulator: switch psy_cfg from of_node to fwnode In order to remove .of_node from the power_supply_config struct, use .fwnode instead. Link: https://lore.kernel.org/r/20250430-psy-core-convert-to-fwnode-v2-1-f9643b958677@collabora.com Reviewed-by: Hans de Goede Signed-off-by: Sebastian Reichel --- drivers/regulator/act8865-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/act8865-regulator.c b/drivers/regulator/act8865-regulator.c index 0457af23c55a..b2a6ddc6f56d 100644 --- a/drivers/regulator/act8865-regulator.c +++ b/drivers/regulator/act8865-regulator.c @@ -643,7 +643,7 @@ static int act8600_charger_probe(struct device *dev, struct regmap *regmap) struct power_supply *charger; struct power_supply_config cfg = { .drv_data = regmap, - .of_node = dev->of_node, + .fwnode = dev_fwnode(dev), }; charger = devm_power_supply_register(dev, &act8600_charger_desc, &cfg); From 0dc41c6b18b8fdd959c56f2d5b61a2d0960e3d91 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 4 Jun 2025 10:18:21 +0200 Subject: [PATCH 0254/2411] dt-bindings: power: supply: bq2515x: Add missing power-supply ref Reference the common power supply schema to bring the definition of monitored-battery property. Drop also the obvious description because it is duplicating what is in power-supply.yaml. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250604-dt-bindings-psy-monitored-battery-v1-1-7f755ff75218@linaro.org Signed-off-by: Sebastian Reichel --- .../devicetree/bindings/power/supply/bq2515x.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/power/supply/bq2515x.yaml b/Documentation/devicetree/bindings/power/supply/bq2515x.yaml index 845822c87f2a..0e99a218e662 100644 --- a/Documentation/devicetree/bindings/power/supply/bq2515x.yaml +++ b/Documentation/devicetree/bindings/power/supply/bq2515x.yaml @@ -53,15 +53,16 @@ properties: minimum: 50000 maximum: 500000 - monitored-battery: - $ref: /schemas/types.yaml#/definitions/phandle - description: phandle to the battery node being monitored + monitored-battery: true required: - compatible - reg - monitored-battery +allOf: + - $ref: power-supply.yaml# + additionalProperties: false examples: From 0835608458bc4bd1afb15d108c6d8a3b8b3d5767 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 4 Jun 2025 10:18:22 +0200 Subject: [PATCH 0255/2411] dt-bindings: power: supply: bq256xx: Add missing power-supply ref Reference the common power supply schema to bring the definition of monitored-battery property. Drop also the obvious description because it is duplicating what is in power-supply.yaml. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250604-dt-bindings-psy-monitored-battery-v1-2-7f755ff75218@linaro.org Signed-off-by: Sebastian Reichel --- Documentation/devicetree/bindings/power/supply/bq256xx.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/power/supply/bq256xx.yaml b/Documentation/devicetree/bindings/power/supply/bq256xx.yaml index a76afe3ca299..8cee37b9879e 100644 --- a/Documentation/devicetree/bindings/power/supply/bq256xx.yaml +++ b/Documentation/devicetree/bindings/power/supply/bq256xx.yaml @@ -58,9 +58,7 @@ properties: minimum: 100000 maximum: 3200000 - monitored-battery: - $ref: /schemas/types.yaml#/definitions/phandle - description: phandle to the battery node being monitored + monitored-battery: true interrupts: maxItems: 1 @@ -78,6 +76,7 @@ required: - monitored-battery allOf: + - $ref: power-supply.yaml# - if: properties: compatible: From 860cb8df5a622f2044a65f98c4158a7ff9c5b07c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 4 Jun 2025 10:18:23 +0200 Subject: [PATCH 0256/2411] dt-bindings: power: supply: qcom,pmi8998: Add missing power-supply ref Reference the common power supply schema to bring the definition of monitored-battery property. Drop also the obvious description because it is duplicating what is in power-supply.yaml. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250604-dt-bindings-psy-monitored-battery-v1-3-7f755ff75218@linaro.org Signed-off-by: Sebastian Reichel --- .../bindings/power/supply/qcom,pmi8998-charger.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/power/supply/qcom,pmi8998-charger.yaml b/Documentation/devicetree/bindings/power/supply/qcom,pmi8998-charger.yaml index 90c7dc7632c5..70f5cd6eaeab 100644 --- a/Documentation/devicetree/bindings/power/supply/qcom,pmi8998-charger.yaml +++ b/Documentation/devicetree/bindings/power/supply/qcom,pmi8998-charger.yaml @@ -38,9 +38,7 @@ properties: - const: usbin_i - const: usbin_v - monitored-battery: - description: phandle to the simple-battery node - $ref: /schemas/types.yaml#/definitions/phandle + monitored-battery: true required: - compatible @@ -51,6 +49,9 @@ required: - io-channel-names - monitored-battery +allOf: + - $ref: power-supply.yaml# + additionalProperties: false examples: From 221e08ebf6271eb80c6cb77df9ad3586229920e9 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 4 Jun 2025 10:18:24 +0200 Subject: [PATCH 0257/2411] dt-bindings: power: supply: richtek,rt5033: Add missing power-supply ref Reference the common power supply schema to bring the definition of monitored-battery property. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250604-dt-bindings-psy-monitored-battery-v1-4-7f755ff75218@linaro.org Signed-off-by: Sebastian Reichel --- .../bindings/power/supply/richtek,rt5033-charger.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/power/supply/richtek,rt5033-charger.yaml b/Documentation/devicetree/bindings/power/supply/richtek,rt5033-charger.yaml index 5b3edd79a523..d91eced9f5fb 100644 --- a/Documentation/devicetree/bindings/power/supply/richtek,rt5033-charger.yaml +++ b/Documentation/devicetree/bindings/power/supply/richtek,rt5033-charger.yaml @@ -18,7 +18,6 @@ properties: const: richtek,rt5033-charger monitored-battery: - $ref: /schemas/types.yaml#/definitions/phandle description: | Phandle to the monitored battery according to battery.yaml. The battery node needs to contain five parameters. @@ -54,6 +53,9 @@ properties: required: - monitored-battery +allOf: + - $ref: power-supply.yaml# + additionalProperties: false examples: From 128c0704821e7101257951c1d32459e2fc3e591b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 4 Jun 2025 10:18:25 +0200 Subject: [PATCH 0258/2411] dt-bindings: power: supply: summit,smb347: Add missing power-supply ref Reference the common power supply schema to bring the definition of monitored-battery property. Drop also the obvious description because it is duplicating what is in power-supply.yaml. Signed-off-by: Krzysztof Kozlowski Reviewed-by: David Heidelberg Link: https://lore.kernel.org/r/20250604-dt-bindings-psy-monitored-battery-v1-5-7f755ff75218@linaro.org Signed-off-by: Sebastian Reichel --- .../bindings/power/supply/summit,smb347-charger.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/power/supply/summit,smb347-charger.yaml b/Documentation/devicetree/bindings/power/supply/summit,smb347-charger.yaml index 2d552becbfe6..65ed92bb05f3 100644 --- a/Documentation/devicetree/bindings/power/supply/summit,smb347-charger.yaml +++ b/Documentation/devicetree/bindings/power/supply/summit,smb347-charger.yaml @@ -23,9 +23,7 @@ properties: interrupts: maxItems: 1 - monitored-battery: - description: phandle to the battery node - $ref: /schemas/types.yaml#/definitions/phandle + monitored-battery: true summit,enable-usb-charging: type: boolean @@ -94,6 +92,7 @@ properties: unevaluatedProperties: false allOf: + - $ref: power-supply.yaml# - if: properties: compatible: From da32b6d7bcdd7a7cfd5f77418a1c026bc3374113 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 4 Jun 2025 10:18:26 +0200 Subject: [PATCH 0259/2411] dt-bindings: power: supply: Drop redundant monitored-battery ref Bindings reference the common power supply schema, which already defines the type (ref) for "monitored-battery" property. Drop the redundant ref from individual device schemas along with obvious description also duplicating what is in power-supply.yaml. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250604-dt-bindings-psy-monitored-battery-v1-6-7f755ff75218@linaro.org Signed-off-by: Sebastian Reichel --- Documentation/devicetree/bindings/power/supply/bq24190.yaml | 1 - Documentation/devicetree/bindings/power/supply/bq25980.yaml | 4 +--- .../devicetree/bindings/power/supply/cw2015_battery.yaml | 5 +---- .../bindings/power/supply/stericsson,ab8500-btemp.yaml | 4 +--- .../bindings/power/supply/stericsson,ab8500-chargalg.yaml | 4 +--- .../bindings/power/supply/stericsson,ab8500-charger.yaml | 4 +--- .../bindings/power/supply/stericsson,ab8500-fg.yaml | 4 +--- .../power/supply/x-powers,axp20x-battery-power-supply.yaml | 6 +----- 8 files changed, 7 insertions(+), 25 deletions(-) diff --git a/Documentation/devicetree/bindings/power/supply/bq24190.yaml b/Documentation/devicetree/bindings/power/supply/bq24190.yaml index 307c99c07721..ac9a76fc5876 100644 --- a/Documentation/devicetree/bindings/power/supply/bq24190.yaml +++ b/Documentation/devicetree/bindings/power/supply/bq24190.yaml @@ -48,7 +48,6 @@ properties: battery device. monitored-battery: - $ref: /schemas/types.yaml#/definitions/phandle description: | phandle to a "simple-battery" compatible node. diff --git a/Documentation/devicetree/bindings/power/supply/bq25980.yaml b/Documentation/devicetree/bindings/power/supply/bq25980.yaml index 256adbef55eb..0b5d005dc780 100644 --- a/Documentation/devicetree/bindings/power/supply/bq25980.yaml +++ b/Documentation/devicetree/bindings/power/supply/bq25980.yaml @@ -73,9 +73,7 @@ properties: description: | Indicates that the device state has changed. - monitored-battery: - $ref: /schemas/types.yaml#/definitions/phandle - description: phandle to the battery node being monitored + monitored-battery: true required: - compatible diff --git a/Documentation/devicetree/bindings/power/supply/cw2015_battery.yaml b/Documentation/devicetree/bindings/power/supply/cw2015_battery.yaml index dc697b6147b2..f7bde324153d 100644 --- a/Documentation/devicetree/bindings/power/supply/cw2015_battery.yaml +++ b/Documentation/devicetree/bindings/power/supply/cw2015_battery.yaml @@ -43,10 +43,7 @@ properties: minItems: 1 maxItems: 8 # Should be enough - monitored-battery: - description: - Specifies the phandle of a simple-battery connected to this gauge - $ref: /schemas/types.yaml#/definitions/phandle + monitored-battery: true required: - compatible diff --git a/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-btemp.yaml b/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-btemp.yaml index 525abdfb3e2d..c464aa82255a 100644 --- a/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-btemp.yaml +++ b/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-btemp.yaml @@ -17,9 +17,7 @@ properties: compatible: const: stericsson,ab8500-btemp - monitored-battery: - $ref: /schemas/types.yaml#/definitions/phandle - description: phandle to battery node + monitored-battery: true battery: $ref: /schemas/types.yaml#/definitions/phandle diff --git a/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-chargalg.yaml b/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-chargalg.yaml index 10bbdcfc87b6..39914b9e0cf5 100644 --- a/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-chargalg.yaml +++ b/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-chargalg.yaml @@ -17,9 +17,7 @@ properties: compatible: const: stericsson,ab8500-chargalg - monitored-battery: - $ref: /schemas/types.yaml#/definitions/phandle - description: phandle to battery node + monitored-battery: true battery: $ref: /schemas/types.yaml#/definitions/phandle diff --git a/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-charger.yaml b/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-charger.yaml index e33329b3af61..994fac12c8da 100644 --- a/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-charger.yaml +++ b/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-charger.yaml @@ -17,9 +17,7 @@ properties: compatible: const: stericsson,ab8500-charger - monitored-battery: - $ref: /schemas/types.yaml#/definitions/phandle - description: phandle to battery node + monitored-battery: true battery: $ref: /schemas/types.yaml#/definitions/phandle diff --git a/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-fg.yaml b/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-fg.yaml index 6a724ca90e99..92e4eb08fd61 100644 --- a/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-fg.yaml +++ b/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-fg.yaml @@ -17,9 +17,7 @@ properties: compatible: const: stericsson,ab8500-fg - monitored-battery: - $ref: /schemas/types.yaml#/definitions/phandle - description: phandle to battery node + monitored-battery: true battery: $ref: /schemas/types.yaml#/definitions/phandle diff --git a/Documentation/devicetree/bindings/power/supply/x-powers,axp20x-battery-power-supply.yaml b/Documentation/devicetree/bindings/power/supply/x-powers,axp20x-battery-power-supply.yaml index 3504c76a01d8..a90d558e7f86 100644 --- a/Documentation/devicetree/bindings/power/supply/x-powers,axp20x-battery-power-supply.yaml +++ b/Documentation/devicetree/bindings/power/supply/x-powers,axp20x-battery-power-supply.yaml @@ -26,11 +26,7 @@ properties: - const: x-powers,axp813-battery-power-supply - const: x-powers,axp813-battery-power-supply - monitored-battery: - description: - Specifies the phandle of an optional simple-battery connected to - this gauge. - $ref: /schemas/types.yaml#/definitions/phandle + monitored-battery: true x-powers,no-thermistor: type: boolean From 520c790c83e9e4c915a8e3fc9f2152ece39b6511 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Sun, 8 Jun 2025 22:40:02 +0200 Subject: [PATCH 0260/2411] power: supply: core: remove of_node from power_supply_config All drivers have been migrated from .of_node to .fwnode, so let's kill the former. Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Sebastian Reichel Link: https://lore.kernel.org/r/20250430-psy-core-convert-to-fwnode-v2-2-f9643b958677@collabora.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 3 +-- include/linux/power_supply.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 33a5bfce4604..89947f1fe610 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -1529,10 +1529,9 @@ __power_supply_register(struct device *parent, dev_set_drvdata(dev, psy); psy->desc = desc; if (cfg) { + device_set_node(dev, cfg->fwnode); dev->groups = cfg->attr_grp; psy->drv_data = cfg->drv_data; - dev->of_node = - cfg->fwnode ? to_of_node(cfg->fwnode) : cfg->of_node; psy->supplied_to = cfg->supplied_to; psy->num_supplicants = cfg->num_supplicants; } diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 7803edaa8ff8..72012141656e 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -232,7 +232,6 @@ struct power_supply; /* Run-time specific power supply configuration */ struct power_supply_config { - struct device_node *of_node; struct fwnode_handle *fwnode; /* Driver private data */ From 570ba047a6548df24f5c9aaaf9a81173577ca789 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Sun, 8 Jun 2025 22:40:03 +0200 Subject: [PATCH 0261/2411] power: supply: core: battery-info: fully switch to fwnode Also use fwnode based parsing for "ocv-capacity-celsius" and "resistance-temp-table", so that any DT specific bits are removed from the power-supply core. Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Sebastian Reichel Link: https://lore.kernel.org/r/20250430-psy-core-convert-to-fwnode-v2-3-f9643b958677@collabora.com Co-developed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 101 ++++++++++++----------- 1 file changed, 55 insertions(+), 46 deletions(-) diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 89947f1fe610..f2c79f15838d 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -585,32 +585,19 @@ int power_supply_get_battery_info(struct power_supply *psy, { struct power_supply_resistance_temp_table *resist_table; struct power_supply_battery_info *info; - struct device_node *battery_np = NULL; - struct fwnode_reference_args args; - struct fwnode_handle *fwnode = NULL; + struct fwnode_handle *srcnode, *fwnode; const char *value; - int err, len, index; - const __be32 *list; + int err, len, index, proplen; + u32 *propdata __free(kfree) = NULL; u32 min_max[2]; - if (psy->dev.of_node) { - battery_np = of_parse_phandle(psy->dev.of_node, "monitored-battery", 0); - if (!battery_np) - return -ENODEV; + srcnode = dev_fwnode(&psy->dev); + if (!srcnode && psy->dev.parent) + srcnode = dev_fwnode(psy->dev.parent); - fwnode = fwnode_handle_get(of_fwnode_handle(battery_np)); - } else if (psy->dev.parent) { - err = fwnode_property_get_reference_args( - dev_fwnode(psy->dev.parent), - "monitored-battery", NULL, 0, 0, &args); - if (err) - return err; - - fwnode = args.fwnode; - } - - if (!fwnode) - return -ENOENT; + fwnode = fwnode_find_reference(srcnode, "monitored-battery", 0); + if (IS_ERR(fwnode)) + return PTR_ERR(fwnode); err = fwnode_property_read_string(fwnode, "compatible", &value); if (err) @@ -740,15 +727,7 @@ int power_supply_get_battery_info(struct power_supply *psy, info->temp_max = min_max[1]; } - /* - * The below code uses raw of-data parsing to parse - * /schemas/types.yaml#/definitions/uint32-matrix - * data, so for now this is only support with of. - */ - if (!battery_np) - goto out_ret_pointer; - - len = of_property_count_u32_elems(battery_np, "ocv-capacity-celsius"); + len = fwnode_property_count_u32(fwnode, "ocv-capacity-celsius"); if (len < 0 && len != -EINVAL) { err = len; goto out_put_node; @@ -757,13 +736,13 @@ int power_supply_get_battery_info(struct power_supply *psy, err = -EINVAL; goto out_put_node; } else if (len > 0) { - of_property_read_u32_array(battery_np, "ocv-capacity-celsius", + fwnode_property_read_u32_array(fwnode, "ocv-capacity-celsius", info->ocv_temp, len); } for (index = 0; index < len; index++) { struct power_supply_battery_ocv_table *table; - int i, tab_len, size; + int i, tab_len; char *propname __free(kfree) = kasprintf(GFP_KERNEL, "ocv-capacity-table-%d", index); @@ -772,15 +751,28 @@ int power_supply_get_battery_info(struct power_supply *psy, err = -ENOMEM; goto out_put_node; } - list = of_get_property(battery_np, propname, &size); - if (!list || !size) { + proplen = fwnode_property_count_u32(fwnode, propname); + if (proplen < 0 || proplen % 2 != 0) { dev_err(&psy->dev, "failed to get %s\n", propname); power_supply_put_battery_info(psy, info); err = -EINVAL; goto out_put_node; } - tab_len = size / (2 * sizeof(__be32)); + u32 *propdata __free(kfree) = kcalloc(proplen, sizeof(*propdata), GFP_KERNEL); + if (!propdata) { + power_supply_put_battery_info(psy, info); + err = -EINVAL; + goto out_put_node; + } + err = fwnode_property_read_u32_array(fwnode, propname, propdata, proplen); + if (err < 0) { + dev_err(&psy->dev, "failed to get %s\n", propname); + power_supply_put_battery_info(psy, info); + goto out_put_node; + } + + tab_len = proplen / 2; info->ocv_table_size[index] = tab_len; info->ocv_table[index] = table = @@ -792,18 +784,36 @@ int power_supply_get_battery_info(struct power_supply *psy, } for (i = 0; i < tab_len; i++) { - table[i].ocv = be32_to_cpu(*list); - list++; - table[i].capacity = be32_to_cpu(*list); - list++; + table[i].ocv = propdata[i*2]; + table[i].capacity = propdata[i*2+1]; } } - list = of_get_property(battery_np, "resistance-temp-table", &len); - if (!list || !len) + proplen = fwnode_property_count_u32(fwnode, "resistance-temp-table"); + if (proplen == 0 || proplen == -EINVAL) { + err = 0; goto out_ret_pointer; + } else if (proplen < 0 || proplen % 2 != 0) { + power_supply_put_battery_info(psy, info); + err = (proplen < 0) ? proplen : -EINVAL; + goto out_put_node; + } - info->resist_table_size = len / (2 * sizeof(__be32)); + propdata = kcalloc(proplen, sizeof(*propdata), GFP_KERNEL); + if (!propdata) { + power_supply_put_battery_info(psy, info); + err = -ENOMEM; + goto out_put_node; + } + + err = fwnode_property_read_u32_array(fwnode, "resistance-temp-table", + propdata, proplen); + if (err < 0) { + power_supply_put_battery_info(psy, info); + goto out_put_node; + } + + info->resist_table_size = proplen / 2; info->resist_table = resist_table = devm_kcalloc(&psy->dev, info->resist_table_size, sizeof(*resist_table), @@ -815,8 +825,8 @@ int power_supply_get_battery_info(struct power_supply *psy, } for (index = 0; index < info->resist_table_size; index++) { - resist_table[index].temp = be32_to_cpu(*list++); - resist_table[index].resistance = be32_to_cpu(*list++); + resist_table[index].temp = propdata[index*2]; + resist_table[index].resistance = propdata[index*2+1]; } out_ret_pointer: @@ -825,7 +835,6 @@ int power_supply_get_battery_info(struct power_supply *psy, out_put_node: fwnode_handle_put(fwnode); - of_node_put(battery_np); return err; } EXPORT_SYMBOL_GPL(power_supply_get_battery_info); From f368f87b22dab8e97c5f447b00a0cae79fefbdcb Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Sun, 8 Jun 2025 22:40:06 +0200 Subject: [PATCH 0262/2411] power: supply: core: convert to fwnnode Replace any DT specific code with fwnode in the power-supply core. Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250430-psy-core-convert-to-fwnode-v2-4-f9643b958677@collabora.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq2415x_charger.c | 2 +- drivers/power/supply/power_supply_core.c | 65 ++++++++++++------------ include/linux/power_supply.h | 2 +- 3 files changed, 34 insertions(+), 35 deletions(-) diff --git a/drivers/power/supply/bq2415x_charger.c b/drivers/power/supply/bq2415x_charger.c index 9e3b9181ee76..1ecbca510bba 100644 --- a/drivers/power/supply/bq2415x_charger.c +++ b/drivers/power/supply/bq2415x_charger.c @@ -1674,7 +1674,7 @@ static int bq2415x_probe(struct i2c_client *client) /* Query for initial reported_mode and set it */ if (bq->nb.notifier_call) { if (np) { - notify_psy = power_supply_get_by_phandle(np, + notify_psy = power_supply_get_by_phandle(of_fwnode_handle(np), "ti,usb-charger-detection"); if (IS_ERR(notify_psy)) notify_psy = NULL; diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index f2c79f15838d..2d83bb125a48 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -196,24 +195,24 @@ static int __power_supply_populate_supplied_from(struct power_supply *epsy, void *data) { struct power_supply *psy = data; - struct device_node *np; + struct fwnode_handle *np; int i = 0; do { - np = of_parse_phandle(psy->dev.of_node, "power-supplies", i++); - if (!np) + np = fwnode_find_reference(psy->dev.fwnode, "power-supplies", i++); + if (IS_ERR(np)) break; - if (np == epsy->dev.of_node) { + if (np == epsy->dev.fwnode) { dev_dbg(&psy->dev, "%s: Found supply : %s\n", psy->desc->name, epsy->desc->name); psy->supplied_from[i-1] = (char *)epsy->desc->name; psy->num_supplies++; - of_node_put(np); + fwnode_handle_put(np); break; } - of_node_put(np); - } while (np); + fwnode_handle_put(np); + } while (!IS_ERR(np)); return 0; } @@ -232,16 +231,16 @@ static int power_supply_populate_supplied_from(struct power_supply *psy) static int __power_supply_find_supply_from_node(struct power_supply *epsy, void *data) { - struct device_node *np = data; + struct fwnode_handle *fwnode = data; /* returning non-zero breaks out of power_supply_for_each_psy loop */ - if (epsy->dev.of_node == np) + if (epsy->dev.fwnode == fwnode) return 1; return 0; } -static int power_supply_find_supply_from_node(struct device_node *supply_node) +static int power_supply_find_supply_from_fwnode(struct fwnode_handle *supply_node) { int error; @@ -249,7 +248,7 @@ static int power_supply_find_supply_from_node(struct device_node *supply_node) * power_supply_for_each_psy() either returns its own errors or values * returned by __power_supply_find_supply_from_node(). * - * __power_supply_find_supply_from_node() will return 0 (no match) + * __power_supply_find_supply_from_fwnode() will return 0 (no match) * or 1 (match). * * We return 0 if power_supply_for_each_psy() returned 1, -EPROBE_DEFER if @@ -262,7 +261,7 @@ static int power_supply_find_supply_from_node(struct device_node *supply_node) static int power_supply_check_supplies(struct power_supply *psy) { - struct device_node *np; + struct fwnode_handle *np; int cnt = 0; /* If there is already a list honor it */ @@ -270,24 +269,24 @@ static int power_supply_check_supplies(struct power_supply *psy) return 0; /* No device node found, nothing to do */ - if (!psy->dev.of_node) + if (!psy->dev.fwnode) return 0; do { int ret; - np = of_parse_phandle(psy->dev.of_node, "power-supplies", cnt++); - if (!np) + np = fwnode_find_reference(psy->dev.fwnode, "power-supplies", cnt++); + if (IS_ERR(np)) break; - ret = power_supply_find_supply_from_node(np); - of_node_put(np); + ret = power_supply_find_supply_from_fwnode(np); + fwnode_handle_put(np); if (ret) { dev_dbg(&psy->dev, "Failed to find supply!\n"); return ret; } - } while (np); + } while (!IS_ERR(np)); /* Missing valid "power-supplies" entries */ if (cnt == 1) @@ -498,14 +497,14 @@ void power_supply_put(struct power_supply *psy) EXPORT_SYMBOL_GPL(power_supply_put); #ifdef CONFIG_OF -static int power_supply_match_device_node(struct device *dev, const void *data) +static int power_supply_match_device_fwnode(struct device *dev, const void *data) { - return dev->parent && dev->parent->of_node == data; + return dev->parent && dev_fwnode(dev->parent) == data; } /** * power_supply_get_by_phandle() - Search for a power supply and returns its ref - * @np: Pointer to device node holding phandle property + * @fwnode: Pointer to fwnode holding phandle property * @property: Name of property holding a power supply name * * If power supply was found, it increases reference count for the @@ -515,21 +514,21 @@ static int power_supply_match_device_node(struct device *dev, const void *data) * Return: On success returns a reference to a power supply with * matching name equals to value under @property, NULL or ERR_PTR otherwise. */ -struct power_supply *power_supply_get_by_phandle(struct device_node *np, - const char *property) +struct power_supply *power_supply_get_by_phandle(struct fwnode_handle *fwnode, + const char *property) { - struct device_node *power_supply_np; + struct fwnode_handle *power_supply_fwnode; struct power_supply *psy = NULL; struct device *dev; - power_supply_np = of_parse_phandle(np, property, 0); - if (!power_supply_np) - return ERR_PTR(-ENODEV); + power_supply_fwnode = fwnode_find_reference(fwnode, property, 0); + if (IS_ERR(power_supply_fwnode)) + return ERR_CAST(power_supply_fwnode); - dev = class_find_device(&power_supply_class, NULL, power_supply_np, - power_supply_match_device_node); + dev = class_find_device(&power_supply_class, NULL, power_supply_fwnode, + power_supply_match_device_fwnode); - of_node_put(power_supply_np); + fwnode_handle_put(power_supply_fwnode); if (dev) { psy = dev_to_psy(dev); @@ -561,14 +560,14 @@ struct power_supply *devm_power_supply_get_by_phandle(struct device *dev, { struct power_supply **ptr, *psy; - if (!dev->of_node) + if (!dev_fwnode(dev)) return ERR_PTR(-ENODEV); ptr = devres_alloc(devm_power_supply_put, sizeof(*ptr), GFP_KERNEL); if (!ptr) return ERR_PTR(-ENOMEM); - psy = power_supply_get_by_phandle(dev->of_node, property); + psy = power_supply_get_by_phandle(dev_fwnode(dev), property); if (IS_ERR_OR_NULL(psy)) { devres_free(ptr); } else { diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 72012141656e..d90ac7b73755 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -808,7 +808,7 @@ static inline struct power_supply *power_supply_get_by_name(const char *name) { return NULL; } #endif #ifdef CONFIG_OF -extern struct power_supply *power_supply_get_by_phandle(struct device_node *np, +extern struct power_supply *power_supply_get_by_phandle(struct fwnode_handle *fwnode, const char *property); extern struct power_supply *devm_power_supply_get_by_phandle( struct device *dev, const char *property); From 370643f45aad93476b6489238ccb45a77b94da3f Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Sun, 8 Jun 2025 22:40:07 +0200 Subject: [PATCH 0263/2411] power: supply: core: rename power_supply_get_by_phandle to power_supply_get_by_reference (devm_)power_supply_get_by_phandle now internally uses fwnode and are no longer DT specific. Thus drop the ifdef check for CONFIG_OF and rename to (devm_)power_supply_get_by_reference to avoid the DT terminology. Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250430-psy-core-convert-to-fwnode-v2-5-f9643b958677@collabora.com Signed-off-by: Sebastian Reichel --- drivers/phy/allwinner/phy-sun4i-usb.c | 2 +- drivers/power/supply/bq2415x_charger.c | 2 +- drivers/power/supply/power_supply_core.c | 22 ++++++++++------------ include/linux/power_supply.h | 15 +++------------ 4 files changed, 15 insertions(+), 26 deletions(-) diff --git a/drivers/phy/allwinner/phy-sun4i-usb.c b/drivers/phy/allwinner/phy-sun4i-usb.c index 29b8fd4b9351..8873aed3a52a 100644 --- a/drivers/phy/allwinner/phy-sun4i-usb.c +++ b/drivers/phy/allwinner/phy-sun4i-usb.c @@ -754,7 +754,7 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev) } if (of_property_present(np, "usb0_vbus_power-supply")) { - data->vbus_power_supply = devm_power_supply_get_by_phandle(dev, + data->vbus_power_supply = devm_power_supply_get_by_reference(dev, "usb0_vbus_power-supply"); if (IS_ERR(data->vbus_power_supply)) { dev_err(dev, "Couldn't get the VBUS power supply\n"); diff --git a/drivers/power/supply/bq2415x_charger.c b/drivers/power/supply/bq2415x_charger.c index 1ecbca510bba..917c26ee56bc 100644 --- a/drivers/power/supply/bq2415x_charger.c +++ b/drivers/power/supply/bq2415x_charger.c @@ -1674,7 +1674,7 @@ static int bq2415x_probe(struct i2c_client *client) /* Query for initial reported_mode and set it */ if (bq->nb.notifier_call) { if (np) { - notify_psy = power_supply_get_by_phandle(of_fwnode_handle(np), + notify_psy = power_supply_get_by_reference(of_fwnode_handle(np), "ti,usb-charger-detection"); if (IS_ERR(notify_psy)) notify_psy = NULL; diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 2d83bb125a48..aedb20c1d276 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -496,14 +496,13 @@ void power_supply_put(struct power_supply *psy) } EXPORT_SYMBOL_GPL(power_supply_put); -#ifdef CONFIG_OF static int power_supply_match_device_fwnode(struct device *dev, const void *data) { return dev->parent && dev_fwnode(dev->parent) == data; } /** - * power_supply_get_by_phandle() - Search for a power supply and returns its ref + * power_supply_get_by_reference() - Search for a power supply and returns its ref * @fwnode: Pointer to fwnode holding phandle property * @property: Name of property holding a power supply name * @@ -514,8 +513,8 @@ static int power_supply_match_device_fwnode(struct device *dev, const void *data * Return: On success returns a reference to a power supply with * matching name equals to value under @property, NULL or ERR_PTR otherwise. */ -struct power_supply *power_supply_get_by_phandle(struct fwnode_handle *fwnode, - const char *property) +struct power_supply *power_supply_get_by_reference(struct fwnode_handle *fwnode, + const char *property) { struct fwnode_handle *power_supply_fwnode; struct power_supply *psy = NULL; @@ -537,7 +536,7 @@ struct power_supply *power_supply_get_by_phandle(struct fwnode_handle *fwnode, return psy; } -EXPORT_SYMBOL_GPL(power_supply_get_by_phandle); +EXPORT_SYMBOL_GPL(power_supply_get_by_reference); static void devm_power_supply_put(struct device *dev, void *res) { @@ -547,16 +546,16 @@ static void devm_power_supply_put(struct device *dev, void *res) } /** - * devm_power_supply_get_by_phandle() - Resource managed version of - * power_supply_get_by_phandle() + * devm_power_supply_get_by_reference() - Resource managed version of + * power_supply_get_by_reference() * @dev: Pointer to device holding phandle property * @property: Name of property holding a power supply phandle * * Return: On success returns a reference to a power supply with * matching name equals to value under @property, NULL or ERR_PTR otherwise. */ -struct power_supply *devm_power_supply_get_by_phandle(struct device *dev, - const char *property) +struct power_supply *devm_power_supply_get_by_reference(struct device *dev, + const char *property) { struct power_supply **ptr, *psy; @@ -567,7 +566,7 @@ struct power_supply *devm_power_supply_get_by_phandle(struct device *dev, if (!ptr) return ERR_PTR(-ENOMEM); - psy = power_supply_get_by_phandle(dev_fwnode(dev), property); + psy = power_supply_get_by_reference(dev_fwnode(dev), property); if (IS_ERR_OR_NULL(psy)) { devres_free(ptr); } else { @@ -576,8 +575,7 @@ struct power_supply *devm_power_supply_get_by_phandle(struct device *dev, } return psy; } -EXPORT_SYMBOL_GPL(devm_power_supply_get_by_phandle); -#endif /* CONFIG_OF */ +EXPORT_SYMBOL_GPL(devm_power_supply_get_by_reference); int power_supply_get_battery_info(struct power_supply *psy, struct power_supply_battery_info **info_out) diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index d90ac7b73755..45468959dd98 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -807,19 +807,10 @@ static inline void power_supply_put(struct power_supply *psy) {} static inline struct power_supply *power_supply_get_by_name(const char *name) { return NULL; } #endif -#ifdef CONFIG_OF -extern struct power_supply *power_supply_get_by_phandle(struct fwnode_handle *fwnode, - const char *property); -extern struct power_supply *devm_power_supply_get_by_phandle( +extern struct power_supply *power_supply_get_by_reference(struct fwnode_handle *fwnode, + const char *property); +extern struct power_supply *devm_power_supply_get_by_reference( struct device *dev, const char *property); -#else /* !CONFIG_OF */ -static inline struct power_supply * -power_supply_get_by_phandle(struct device_node *np, const char *property) -{ return NULL; } -static inline struct power_supply * -devm_power_supply_get_by_phandle(struct device *dev, const char *property) -{ return NULL; } -#endif /* CONFIG_OF */ extern const enum power_supply_property power_supply_battery_info_properties[]; extern const size_t power_supply_battery_info_properties_size; From 8842bd00a74bf758fb1abf572ec1c7d70c09dedb Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 8 Jun 2025 22:40:08 +0200 Subject: [PATCH 0264/2411] power: supply: ug3105_battery: Use psy->battery_info For POWER_SUPPLY_TYPE_BATTERY power-supplies the core already calls power_supply_get_battery_info() and stores the result in psy->battery_info. Use psy->battery_info instead of having the driver call power_supply_get_battery_info() itself. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20250608204010.37482-9-hansg@kernel.org Signed-off-by: Sebastian Reichel --- drivers/power/supply/ug3105_battery.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/power/supply/ug3105_battery.c b/drivers/power/supply/ug3105_battery.c index 38e23bdd4603..806f6892e189 100644 --- a/drivers/power/supply/ug3105_battery.c +++ b/drivers/power/supply/ug3105_battery.c @@ -69,7 +69,6 @@ struct ug3105_chip { struct i2c_client *client; struct power_supply *psy; - struct power_supply_battery_info *info; struct delayed_work work; struct mutex lock; int ocv[UG3105_MOV_AVG_WINDOW]; /* micro-volt */ @@ -103,7 +102,8 @@ static int ug3105_read_word(struct i2c_client *client, u8 reg) static int ug3105_get_status(struct ug3105_chip *chip) { - int full = chip->info->constant_charge_voltage_max_uv - UG3105_FULL_BAT_HYST_UV; + int full = chip->psy->battery_info->constant_charge_voltage_max_uv - + UG3105_FULL_BAT_HYST_UV; if (chip->curr > UG3105_CURR_HYST_UA) return POWER_SUPPLY_STATUS_CHARGING; @@ -164,7 +164,7 @@ static int ug3105_get_capacity(struct ug3105_chip *chip) ocv_diff = ocv_capacity_tbl[i] - chip->ocv_avg; ocv_step = ocv_capacity_tbl[i] - ocv_capacity_tbl[i - 1]; /* scale 0-110% down to 0-100% for LiPo HV */ - if (chip->info->constant_charge_voltage_max_uv >= 4300000) + if (chip->psy->battery_info->constant_charge_voltage_max_uv >= 4300000) return (i * 500 - ocv_diff * 500 / ocv_step) / 110; else return i * 5 - ocv_diff * 5 / ocv_step; @@ -401,12 +401,9 @@ static int ug3105_probe(struct i2c_client *client) if (IS_ERR(psy)) return PTR_ERR(psy); - ret = power_supply_get_battery_info(psy, &chip->info); - if (ret) - return ret; - - if (chip->info->factory_internal_resistance_uohm == -EINVAL || - chip->info->constant_charge_voltage_max_uv == -EINVAL) { + if (!psy->battery_info || + psy->battery_info->factory_internal_resistance_uohm == -EINVAL || + psy->battery_info->constant_charge_voltage_max_uv == -EINVAL) { dev_err(dev, "error required properties are missing\n"); return -ENODEV; } @@ -422,7 +419,7 @@ static int ug3105_probe(struct i2c_client *client) chip->ua_per_unit = 8100000 / curr_sense_res_uohm; /* Use provided internal resistance as start point (in milli-ohm) */ - chip->intern_res_avg = chip->info->factory_internal_resistance_uohm / 1000; + chip->intern_res_avg = psy->battery_info->factory_internal_resistance_uohm / 1000; /* Also add it to the internal resistance moving average window */ chip->intern_res[0] = chip->intern_res_avg; chip->intern_res_avg_index = 1; From 2986e5b213cd84ac290ae68e73b7629ec8f184a6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 8 Jun 2025 22:40:09 +0200 Subject: [PATCH 0265/2411] power: supply: ug3105_battery: Switch to power_supply_batinfo_ocv2cap() Replace the hardcoded ocv -> capacity table and the ug3105_get_capacity() helper with using the generic power_supply_batinfo_ocv2cap() function. Note this relies on the battery fwnode providing at least 1 "ocv-capacity-table", if that is missing probe() will now fail with EINVAL. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20250608204010.37482-10-hansg@kernel.org Signed-off-by: Sebastian Reichel --- drivers/power/supply/ug3105_battery.c | 68 ++++----------------------- 1 file changed, 10 insertions(+), 58 deletions(-) diff --git a/drivers/power/supply/ug3105_battery.c b/drivers/power/supply/ug3105_battery.c index 806f6892e189..e8a1de7cade0 100644 --- a/drivers/power/supply/ug3105_battery.c +++ b/drivers/power/supply/ug3105_battery.c @@ -66,6 +66,8 @@ #define UG3105_LOW_BAT_UV 3700000 #define UG3105_FULL_BAT_HYST_UV 38000 +#define AMBIENT_TEMP_CELCIUS 25 + struct ug3105_chip { struct i2c_client *client; struct power_supply *psy; @@ -117,62 +119,6 @@ static int ug3105_get_status(struct ug3105_chip *chip) return POWER_SUPPLY_STATUS_NOT_CHARGING; } -static int ug3105_get_capacity(struct ug3105_chip *chip) -{ - /* - * OCV voltages in uV for 0-110% in 5% increments, the 100-110% is - * for LiPo HV (High-Voltage) bateries which can go up to 4.35V - * instead of the usual 4.2V. - */ - static const int ocv_capacity_tbl[23] = { - 3350000, - 3610000, - 3690000, - 3710000, - 3730000, - 3750000, - 3770000, - 3786667, - 3803333, - 3820000, - 3836667, - 3853333, - 3870000, - 3907500, - 3945000, - 3982500, - 4020000, - 4075000, - 4110000, - 4150000, - 4200000, - 4250000, - 4300000, - }; - int i, ocv_diff, ocv_step; - - if (chip->ocv_avg < ocv_capacity_tbl[0]) - return 0; - - if (chip->status == POWER_SUPPLY_STATUS_FULL) - return 100; - - for (i = 1; i < ARRAY_SIZE(ocv_capacity_tbl); i++) { - if (chip->ocv_avg > ocv_capacity_tbl[i]) - continue; - - ocv_diff = ocv_capacity_tbl[i] - chip->ocv_avg; - ocv_step = ocv_capacity_tbl[i] - ocv_capacity_tbl[i - 1]; - /* scale 0-110% down to 0-100% for LiPo HV */ - if (chip->psy->battery_info->constant_charge_voltage_max_uv >= 4300000) - return (i * 500 - ocv_diff * 500 / ocv_step) / 110; - else - return i * 5 - ocv_diff * 5 / ocv_step; - } - - return 100; -} - static void ug3105_work(struct work_struct *work) { struct ug3105_chip *chip = container_of(work, struct ug3105_chip, @@ -231,7 +177,12 @@ static void ug3105_work(struct work_struct *work) chip->supplied = power_supply_am_i_supplied(psy); chip->status = ug3105_get_status(chip); - chip->capacity = ug3105_get_capacity(chip); + if (chip->status == POWER_SUPPLY_STATUS_FULL) + chip->capacity = 100; + else + chip->capacity = power_supply_batinfo_ocv2cap(chip->psy->battery_info, + chip->ocv_avg, + AMBIENT_TEMP_CELCIUS); /* * Skip internal resistance calc on charger [un]plug and @@ -403,7 +354,8 @@ static int ug3105_probe(struct i2c_client *client) if (!psy->battery_info || psy->battery_info->factory_internal_resistance_uohm == -EINVAL || - psy->battery_info->constant_charge_voltage_max_uv == -EINVAL) { + psy->battery_info->constant_charge_voltage_max_uv == -EINVAL || + !psy->battery_info->ocv_table[0]) { dev_err(dev, "error required properties are missing\n"); return -ENODEV; } From 6aa1c3a72b99abeb7ddc649047073d701ede2c91 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 8 Jun 2025 22:40:10 +0200 Subject: [PATCH 0266/2411] power: supply: bq24190: Free battery_info Call power_supply_put_battery_info() when bq24190_get_config() is done with it. The "struct power_supply_battery_info *info" pointer runs out of scope at the end of bq24190_get_config() so there is no need to keep it around after this. Note technically this is not a memleak fix, since all battery_info data is devm_alloc()-ed so it would still be free-ed when the driver is unbound. This just frees it as soon as the driver is done with it. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20250608204010.37482-11-hansg@kernel.org Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq24190_charger.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c index 1867beadd7af..2c3804e1207e 100644 --- a/drivers/power/supply/bq24190_charger.c +++ b/drivers/power/supply/bq24190_charger.c @@ -1983,6 +1983,8 @@ static int bq24190_get_config(struct bq24190_dev_info *bdi) v = info->constant_charge_voltage_max_uv; if (v >= bq24190_cvc_vreg_values[0] && v <= bdi->vreg_max) bdi->vreg = bdi->vreg_max = v; + + power_supply_put_battery_info(bdi->charger, info); } return 0; From 3f87baacea4d185071655f9b0baf07abb6237fcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kornel=20Dul=C4=99ba?= Date: Wed, 28 May 2025 11:23:29 +0000 Subject: [PATCH 0267/2411] power: supply: qcom_battmgr: Report battery capacity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Battery charge can be reported in several different ways. One of them is is charge percentage referred to as POWER_SUPPLY_PROP_CAPACITY in the power supply API. Currently the driver reports the capacity in this way on SM8350, but not on the newer variants referred to as SC8280XP in the driver. Although this is not a bug in itself, not reporting the percentage can confuse some userspace consumers. Mimic what is done in the ACPI driver (drivers/acpi/battery.c) and calculate the percentage capacity by dividing the current charge value by the full charge. Signed-off-by: Kornel Dulęba Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250528112328.1640743-2-korneld@google.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/qcom_battmgr.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c index fe27676fbc7c..33a70f3d72bc 100644 --- a/drivers/power/supply/qcom_battmgr.c +++ b/drivers/power/supply/qcom_battmgr.c @@ -577,6 +577,8 @@ static int qcom_battmgr_bat_get_property(struct power_supply *psy, val->intval = battmgr->status.capacity; break; case POWER_SUPPLY_PROP_CAPACITY: + if (battmgr->status.percent == (unsigned int)-1) + return -ENODATA; val->intval = battmgr->status.percent; break; case POWER_SUPPLY_PROP_TEMP: @@ -617,6 +619,7 @@ static const enum power_supply_property sc8280xp_bat_props[] = { POWER_SUPPLY_PROP_STATUS, POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_TECHNOLOGY, + POWER_SUPPLY_PROP_CAPACITY, POWER_SUPPLY_PROP_CYCLE_COUNT, POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN, POWER_SUPPLY_PROP_VOLTAGE_NOW, @@ -1063,6 +1066,26 @@ static void qcom_battmgr_sc8280xp_callback(struct qcom_battmgr *battmgr, battmgr->ac.online = source == BATTMGR_CHARGING_SOURCE_AC; battmgr->usb.online = source == BATTMGR_CHARGING_SOURCE_USB; battmgr->wireless.online = source == BATTMGR_CHARGING_SOURCE_WIRELESS; + if (battmgr->info.last_full_capacity != 0) { + /* + * 100 * battmgr->status.capacity can overflow a 32bit + * unsigned integer. FW readings are in m{W/A}h, which + * are multiplied by 1000 converting them to u{W/A}h, + * the format the power_supply API expects. + * To avoid overflow use the original value for dividend + * and convert the divider back to m{W/A}h, which can be + * done without any loss of precision. + */ + battmgr->status.percent = + (100 * le32_to_cpu(resp->status.capacity)) / + (battmgr->info.last_full_capacity / 1000); + } else { + /* + * Let the sysfs handler know no data is available at + * this time. + */ + battmgr->status.percent = (unsigned int)-1; + } break; case BATTMGR_BAT_DISCHARGE_TIME: battmgr->status.discharge_time = le32_to_cpu(resp->time); From 202ac22b8e2e015e6c196fd8113f3d2a62dd1afc Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Fri, 23 May 2025 13:14:22 +0300 Subject: [PATCH 0268/2411] power: supply: qcom_battmgr: Add lithium-polymer entry On some Dell XPS 13 (9345) variants, the battery used is lithium-polymer based. Currently, this is reported as unknown technology due to the entry missing. [ 4083.135325] Unknown battery technology 'LIP' Add another check for lithium-polymer in the technology parsing callback and return that instead of unknown. Signed-off-by: Abel Vesa Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250523-psy-qcom-battmgr-add-lipo-entry-v1-1-938c20a43a25@linaro.org Signed-off-by: Sebastian Reichel --- drivers/power/supply/qcom_battmgr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c index 33a70f3d72bc..99808ea9851f 100644 --- a/drivers/power/supply/qcom_battmgr.c +++ b/drivers/power/supply/qcom_battmgr.c @@ -984,6 +984,8 @@ static unsigned int qcom_battmgr_sc8280xp_parse_technology(const char *chemistry { if (!strncmp(chemistry, "LIO", BATTMGR_CHEMISTRY_LEN)) return POWER_SUPPLY_TECHNOLOGY_LION; + if (!strncmp(chemistry, "LIP", BATTMGR_CHEMISTRY_LEN)) + return POWER_SUPPLY_TECHNOLOGY_LIPO; pr_err("Unknown battery technology '%s'\n", chemistry); return POWER_SUPPLY_TECHNOLOGY_UNKNOWN; From e4ab1bfc3fe92ef5f8cebcc17963a08955963995 Mon Sep 17 00:00:00 2001 From: Taeyoung Kwon Date: Wed, 21 May 2025 13:09:52 +0000 Subject: [PATCH 0269/2411] power: reset: qcom-pon: Rename variables to use generic naming The qcom-pon driver was originally implemented for the PM8916 PMIC, and as a result, several internal variable names still refer to 'pm8916'. However, the driver has since been extended to support other PMICs as well. This patch renames those variables to use more generic and consistent names, improving clarity and reducing confusion for non-PM8916 devices. Signed-off-by: Taeyoung Kwon Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250521131116.2664-1-Taeyoung.Kwon@telit.com Signed-off-by: Sebastian Reichel --- drivers/power/reset/qcom-pon.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/power/reset/qcom-pon.c b/drivers/power/reset/qcom-pon.c index 1344b361a475..7e108982a582 100644 --- a/drivers/power/reset/qcom-pon.c +++ b/drivers/power/reset/qcom-pon.c @@ -19,7 +19,7 @@ #define NO_REASON_SHIFT 0 -struct pm8916_pon { +struct qcom_pon { struct device *dev; struct regmap *regmap; u32 baseaddr; @@ -27,11 +27,11 @@ struct pm8916_pon { long reason_shift; }; -static int pm8916_reboot_mode_write(struct reboot_mode_driver *reboot, +static int qcom_pon_reboot_mode_write(struct reboot_mode_driver *reboot, unsigned int magic) { - struct pm8916_pon *pon = container_of - (reboot, struct pm8916_pon, reboot_mode); + struct qcom_pon *pon = container_of + (reboot, struct qcom_pon, reboot_mode); int ret; ret = regmap_update_bits(pon->regmap, @@ -44,9 +44,9 @@ static int pm8916_reboot_mode_write(struct reboot_mode_driver *reboot, return ret; } -static int pm8916_pon_probe(struct platform_device *pdev) +static int qcom_pon_probe(struct platform_device *pdev) { - struct pm8916_pon *pon; + struct qcom_pon *pon; long reason_shift; int error; @@ -72,7 +72,7 @@ static int pm8916_pon_probe(struct platform_device *pdev) if (reason_shift != NO_REASON_SHIFT) { pon->reboot_mode.dev = &pdev->dev; pon->reason_shift = reason_shift; - pon->reboot_mode.write = pm8916_reboot_mode_write; + pon->reboot_mode.write = qcom_pon_reboot_mode_write; error = devm_reboot_mode_register(&pdev->dev, &pon->reboot_mode); if (error) { dev_err(&pdev->dev, "can't register reboot mode\n"); @@ -85,7 +85,7 @@ static int pm8916_pon_probe(struct platform_device *pdev) return devm_of_platform_populate(&pdev->dev); } -static const struct of_device_id pm8916_pon_id_table[] = { +static const struct of_device_id qcom_pon_id_table[] = { { .compatible = "qcom,pm8916-pon", .data = (void *)GEN1_REASON_SHIFT }, { .compatible = "qcom,pm8941-pon", .data = (void *)NO_REASON_SHIFT }, { .compatible = "qcom,pms405-pon", .data = (void *)GEN1_REASON_SHIFT }, @@ -93,16 +93,16 @@ static const struct of_device_id pm8916_pon_id_table[] = { { .compatible = "qcom,pmk8350-pon", .data = (void *)GEN2_REASON_SHIFT }, { } }; -MODULE_DEVICE_TABLE(of, pm8916_pon_id_table); +MODULE_DEVICE_TABLE(of, qcom_pon_id_table); -static struct platform_driver pm8916_pon_driver = { - .probe = pm8916_pon_probe, +static struct platform_driver qcom_pon_driver = { + .probe = qcom_pon_probe, .driver = { - .name = "pm8916-pon", - .of_match_table = pm8916_pon_id_table, + .name = "qcom-pon", + .of_match_table = qcom_pon_id_table, }, }; -module_platform_driver(pm8916_pon_driver); +module_platform_driver(qcom_pon_driver); -MODULE_DESCRIPTION("pm8916 Power On driver"); +MODULE_DESCRIPTION("Qualcomm Power On driver"); MODULE_LICENSE("GPL v2"); From 6af8ffab2db3199f22298641880dd111f3a630e2 Mon Sep 17 00:00:00 2001 From: Mihai Sain Date: Tue, 10 Jun 2025 15:45:47 +0300 Subject: [PATCH 0270/2411] power: reset: at91-sama5d2_shdwc: Refactor wake-up source logging to use dev_info Use dev_info() instead of pr_info() for more consistent logging in the driver. [root@sam9x75eb ~]$ dmesg | grep power [ 1.678542] at91-shdwc fffffe10.poweroff: Wake-Up source: WKUP pin Signed-off-by: Mihai Sain Link: https://lore.kernel.org/r/20250610124545.175492-3-mihai.sain@microchip.com Signed-off-by: Sebastian Reichel --- drivers/power/reset/at91-sama5d2_shdwc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/reset/at91-sama5d2_shdwc.c b/drivers/power/reset/at91-sama5d2_shdwc.c index e9fe08ee3812..ecf15694f925 100644 --- a/drivers/power/reset/at91-sama5d2_shdwc.c +++ b/drivers/power/reset/at91-sama5d2_shdwc.c @@ -129,7 +129,7 @@ static void at91_wakeup_status(struct platform_device *pdev) else if (SHDW_RTTWK(reg, &rcfg->shdwc)) reason = "RTT"; - pr_info("AT91: Wake-Up source: %s\n", reason); + dev_info(&pdev->dev, "Wake-Up source: %s\n", reason); } static void at91_poweroff(void) From 188014b4256fd7b625c79a45d61209da5ca4c92c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 28 May 2025 21:44:40 +0200 Subject: [PATCH 0271/2411] power: supply: bq256xx_charger: Constify reg_default array Static 'struct reg_default' array is not modified so can be changed to const for more safety. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250528194439.567263-3-krzysztof.kozlowski@linaro.org Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq256xx_charger.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/bq256xx_charger.c b/drivers/power/supply/bq256xx_charger.c index 9f9b6019f8e1..ae14162f017a 100644 --- a/drivers/power/supply/bq256xx_charger.c +++ b/drivers/power/supply/bq256xx_charger.c @@ -387,7 +387,7 @@ static void bq256xx_usb_work(struct work_struct *data) } } -static struct reg_default bq2560x_reg_defs[] = { +static const struct reg_default bq2560x_reg_defs[] = { {BQ256XX_INPUT_CURRENT_LIMIT, 0x17}, {BQ256XX_CHARGER_CONTROL_0, 0x1a}, {BQ256XX_CHARGE_CURRENT_LIMIT, 0xa2}, @@ -398,7 +398,7 @@ static struct reg_default bq2560x_reg_defs[] = { {BQ256XX_CHARGER_CONTROL_3, 0x4c}, }; -static struct reg_default bq25611d_reg_defs[] = { +static const struct reg_default bq25611d_reg_defs[] = { {BQ256XX_INPUT_CURRENT_LIMIT, 0x17}, {BQ256XX_CHARGER_CONTROL_0, 0x1a}, {BQ256XX_CHARGE_CURRENT_LIMIT, 0x91}, @@ -411,7 +411,7 @@ static struct reg_default bq25611d_reg_defs[] = { {BQ256XX_CHARGER_CONTROL_4, 0x75}, }; -static struct reg_default bq25618_619_reg_defs[] = { +static const struct reg_default bq25618_619_reg_defs[] = { {BQ256XX_INPUT_CURRENT_LIMIT, 0x17}, {BQ256XX_CHARGER_CONTROL_0, 0x1a}, {BQ256XX_CHARGE_CURRENT_LIMIT, 0x91}, From 7cf88213b95e9491572c4af39c7ba2829f9b2637 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 28 May 2025 21:44:41 +0200 Subject: [PATCH 0272/2411] power: supply: bq25980_charger: Constify reg_default array Static 'struct reg_default' array is not modified so can be changed to const for more safety. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250528194439.567263-4-krzysztof.kozlowski@linaro.org Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq25980_charger.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/bq25980_charger.c b/drivers/power/supply/bq25980_charger.c index 4ff76e3dddf6..723858d62d14 100644 --- a/drivers/power/supply/bq25980_charger.c +++ b/drivers/power/supply/bq25980_charger.c @@ -104,7 +104,7 @@ struct bq25980_device { int watchdog_timer; }; -static struct reg_default bq25980_reg_defs[] = { +static const struct reg_default bq25980_reg_defs[] = { {BQ25980_BATOVP, 0x5A}, {BQ25980_BATOVP_ALM, 0x46}, {BQ25980_BATOCP, 0x51}, @@ -159,7 +159,7 @@ static struct reg_default bq25980_reg_defs[] = { {BQ25980_CHRGR_CTRL_6, 0x0}, }; -static struct reg_default bq25975_reg_defs[] = { +static const struct reg_default bq25975_reg_defs[] = { {BQ25980_BATOVP, 0x5A}, {BQ25980_BATOVP_ALM, 0x46}, {BQ25980_BATOCP, 0x51}, @@ -214,7 +214,7 @@ static struct reg_default bq25975_reg_defs[] = { {BQ25980_CHRGR_CTRL_6, 0x0}, }; -static struct reg_default bq25960_reg_defs[] = { +static const struct reg_default bq25960_reg_defs[] = { {BQ25980_BATOVP, 0x5A}, {BQ25980_BATOVP_ALM, 0x46}, {BQ25980_BATOCP, 0x51}, From d9fa3aae08f99493e67fb79413c0e95d30fca5e9 Mon Sep 17 00:00:00 2001 From: Charles Han Date: Mon, 19 May 2025 10:47:41 +0800 Subject: [PATCH 0273/2411] power: supply: cpcap-charger: Fix null check for power_supply_get_by_name In the cpcap_usb_detect() function, the power_supply_get_by_name() function may return `NULL` instead of an error pointer. To prevent potential null pointer dereferences, Added a null check. Fixes: eab4e6d953c1 ("power: supply: cpcap-charger: get the battery inserted infomation from cpcap-battery") Signed-off-by: Charles Han Link: https://lore.kernel.org/r/20250519024741.5846-1-hanchunchao@inspur.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/cpcap-charger.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/cpcap-charger.c b/drivers/power/supply/cpcap-charger.c index 13300dc60baf..d0c3008db534 100644 --- a/drivers/power/supply/cpcap-charger.c +++ b/drivers/power/supply/cpcap-charger.c @@ -689,9 +689,8 @@ static void cpcap_usb_detect(struct work_struct *work) struct power_supply *battery; battery = power_supply_get_by_name("battery"); - if (IS_ERR_OR_NULL(battery)) { - dev_err(ddata->dev, "battery power_supply not available %li\n", - PTR_ERR(battery)); + if (!battery) { + dev_err(ddata->dev, "battery power_supply not available\n"); return; } From 22e4d29f081df8a10f1c062d3d952bb876eb9bdc Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 6 May 2025 15:01:27 +0200 Subject: [PATCH 0274/2411] power: reset: POWER_RESET_TORADEX_EC should depend on ARCH_MXC The Toradex Embedded Controller is currently only present on Toradex SMARC iMX8MP and iMX95 SoMs. Hence add a dependency on ARCH_MXC, to prevent asking the user about this driver when configuring a kernel without NXP i.MX SoC family support. Fixes: 18672fe12367ed44 ("power: reset: add Toradex Embedded Controller") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/1ef0beb1e09bf914650f9f9885a33af06772540d.1746536287.git.geert+renesas@glider.be Signed-off-by: Sebastian Reichel --- drivers/power/reset/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig index e71f0af4e378..95f140ee7077 100644 --- a/drivers/power/reset/Kconfig +++ b/drivers/power/reset/Kconfig @@ -218,6 +218,7 @@ config POWER_RESET_ST config POWER_RESET_TORADEX_EC tristate "Toradex Embedded Controller power-off and reset driver" + depends on ARCH_MXC || COMPILE_TEST depends on I2C select REGMAP_I2C help From a9aece5d7e8fa8bc74f1827d2cf1b189ffe7e8c8 Mon Sep 17 00:00:00 2001 From: Yuanjun Gong Date: Tue, 13 May 2025 20:37:32 +0800 Subject: [PATCH 0275/2411] power: return the correct error code In POWER_SUPPLY_PROP_MODEL_NAME branch of max1720x_battery_get_property(), program would return -ENODEV out of FIELD_GET error, but it's better also considering the error code returned by regmap_read() in case it fails. Signed-off-by: Yuanjun Gong Link: https://lore.kernel.org/r/20250513123732.3041577-1-ruc_gongyuanjun@163.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/max1720x_battery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/power/supply/max1720x_battery.c b/drivers/power/supply/max1720x_battery.c index ea3912fd1de8..12ecb1f40fe1 100644 --- a/drivers/power/supply/max1720x_battery.c +++ b/drivers/power/supply/max1720x_battery.c @@ -426,6 +426,8 @@ static int max1720x_battery_get_property(struct power_supply *psy, break; case POWER_SUPPLY_PROP_MODEL_NAME: ret = regmap_read(info->regmap, MAX172XX_DEV_NAME, ®_val); + if (ret) + return ret; reg_val = FIELD_GET(MAX172XX_DEV_NAME_TYPE_MASK, reg_val); if (reg_val == MAX172XX_DEV_NAME_TYPE_MAX17201) val->strval = max17201_model; From 2937f5d2e24eefef8cb126244caec7fe3307f724 Mon Sep 17 00:00:00 2001 From: Charles Han Date: Mon, 19 May 2025 14:16:01 +0800 Subject: [PATCH 0276/2411] power: supply: max14577: Handle NULL pdata when CONFIG_OF is not set When the kernel is not configured CONFIG_OF, the max14577_charger_dt_init function returns NULL. Fix the max14577_charger_probe functionby returning -ENODATA instead of potentially passing a NULL pointer to PTR_ERR. This fixes the below smatch warning: max14577_charger_probe() warn: passing zero to 'PTR_ERR' Fixes: e30110e9c96f ("charger: max14577: Configure battery-dependent settings from DTS and sysfs") Signed-off-by: Charles Han Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250519061601.8755-1-hanchunchao@inspur.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/max14577_charger.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/power/supply/max14577_charger.c b/drivers/power/supply/max14577_charger.c index 1cef2f860b5f..63077d38ea30 100644 --- a/drivers/power/supply/max14577_charger.c +++ b/drivers/power/supply/max14577_charger.c @@ -501,7 +501,7 @@ static struct max14577_charger_platform_data *max14577_charger_dt_init( static struct max14577_charger_platform_data *max14577_charger_dt_init( struct platform_device *pdev) { - return NULL; + return ERR_PTR(-ENODATA); } #endif /* CONFIG_OF */ @@ -572,7 +572,7 @@ static int max14577_charger_probe(struct platform_device *pdev) chg->max14577 = max14577; chg->pdata = max14577_charger_dt_init(pdev); - if (IS_ERR_OR_NULL(chg->pdata)) + if (IS_ERR(chg->pdata)) return PTR_ERR(chg->pdata); ret = max14577_charger_reg_init(chg); From 6c5393771c50fac30f08dfb6d2f65f4f2cfeb8c7 Mon Sep 17 00:00:00 2001 From: Casey Connolly Date: Thu, 19 Jun 2025 16:55:11 +0200 Subject: [PATCH 0277/2411] power: supply: qcom_pmi8998_charger: fix wakeirq Unloading and reloading the driver (e.g. when built as a module) currently leads to errors trying to enable wake IRQ since it's already enabled. Use devm to manage this for us so it correctly gets disabled when removing the driver. Additionally, call device_init_wakeup() so that charger attach/remove will trigger a wakeup by default. Fixes: 8648aeb5d7b7 ("power: supply: add Qualcomm PMI8998 SMB2 Charger driver") Signed-off-by: Casey Connolly Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250619-smb2-smb5-support-v1-3-ac5dec51b6e1@linaro.org Signed-off-by: Sebastian Reichel --- drivers/power/supply/qcom_pmi8998_charger.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/power/supply/qcom_pmi8998_charger.c b/drivers/power/supply/qcom_pmi8998_charger.c index c2f8f2e24398..cd3cb473c70d 100644 --- a/drivers/power/supply/qcom_pmi8998_charger.c +++ b/drivers/power/supply/qcom_pmi8998_charger.c @@ -1016,7 +1016,9 @@ static int smb2_probe(struct platform_device *pdev) if (rc < 0) return rc; - rc = dev_pm_set_wake_irq(chip->dev, chip->cable_irq); + devm_device_init_wakeup(chip->dev); + + rc = devm_pm_set_wake_irq(chip->dev, chip->cable_irq); if (rc < 0) return dev_err_probe(chip->dev, rc, "Couldn't set wake irq\n"); From 5ec53bcc7fce6801977a0c125fb726d7b0e9102c Mon Sep 17 00:00:00 2001 From: Casey Connolly Date: Thu, 19 Jun 2025 16:55:12 +0200 Subject: [PATCH 0278/2411] power: supply: pmi8998_charger: rename to qcom_smbx Prepare to add smb5 support by making variables and the file name more generic. Also take the opportunity to remove the "_charger" suffix since smb2 always refers to a charger. Signed-off-by: Casey Connolly Link: https://lore.kernel.org/r/20250619-smb2-smb5-support-v1-4-ac5dec51b6e1@linaro.org Signed-off-by: Sebastian Reichel --- drivers/power/supply/Makefile | 2 +- .../{qcom_pmi8998_charger.c => qcom_smbx.c} | 148 +++++++++--------- 2 files changed, 75 insertions(+), 75 deletions(-) rename drivers/power/supply/{qcom_pmi8998_charger.c => qcom_smbx.c} (88%) diff --git a/drivers/power/supply/Makefile b/drivers/power/supply/Makefile index 4f5f8e3507f8..f943c9150b32 100644 --- a/drivers/power/supply/Makefile +++ b/drivers/power/supply/Makefile @@ -120,5 +120,5 @@ obj-$(CONFIG_BATTERY_ACER_A500) += acer_a500_battery.o obj-$(CONFIG_BATTERY_SURFACE) += surface_battery.o obj-$(CONFIG_CHARGER_SURFACE) += surface_charger.o obj-$(CONFIG_BATTERY_UG3105) += ug3105_battery.o -obj-$(CONFIG_CHARGER_QCOM_SMB2) += qcom_pmi8998_charger.o +obj-$(CONFIG_CHARGER_QCOM_SMB2) += qcom_smbx.o obj-$(CONFIG_FUEL_GAUGE_MM8013) += mm8013.o diff --git a/drivers/power/supply/qcom_pmi8998_charger.c b/drivers/power/supply/qcom_smbx.c similarity index 88% rename from drivers/power/supply/qcom_pmi8998_charger.c rename to drivers/power/supply/qcom_smbx.c index cd3cb473c70d..b1cb925581ec 100644 --- a/drivers/power/supply/qcom_pmi8998_charger.c +++ b/drivers/power/supply/qcom_smbx.c @@ -362,17 +362,17 @@ enum charger_status { DISABLE_CHARGE, }; -struct smb2_register { +struct smb_init_register { u16 addr; u8 mask; u8 val; }; /** - * struct smb2_chip - smb2 chip structure + * struct smb_chip - smb chip structure * @dev: Device reference for power_supply * @name: The platform device name - * @base: Base address for smb2 registers + * @base: Base address for smb registers * @regmap: Register map * @batt_info: Battery data from DT * @status_change_work: Worker to handle plug/unplug events @@ -382,7 +382,7 @@ struct smb2_register { * @usb_in_v_chan: USB_IN voltage measurement channel * @chg_psy: Charger power supply instance */ -struct smb2_chip { +struct smb_chip { struct device *dev; const char *name; unsigned int base; @@ -399,7 +399,7 @@ struct smb2_chip { struct power_supply *chg_psy; }; -static enum power_supply_property smb2_properties[] = { +static enum power_supply_property smb_properties[] = { POWER_SUPPLY_PROP_MANUFACTURER, POWER_SUPPLY_PROP_MODEL_NAME, POWER_SUPPLY_PROP_CURRENT_MAX, @@ -411,7 +411,7 @@ static enum power_supply_property smb2_properties[] = { POWER_SUPPLY_PROP_USB_TYPE, }; -static int smb2_get_prop_usb_online(struct smb2_chip *chip, int *val) +static int smb_get_prop_usb_online(struct smb_chip *chip, int *val) { unsigned int stat; int rc; @@ -431,13 +431,13 @@ static int smb2_get_prop_usb_online(struct smb2_chip *chip, int *val) * Qualcomm "automatic power source detection" aka APSD * tells us what type of charger we're connected to. */ -static int smb2_apsd_get_charger_type(struct smb2_chip *chip, int *val) +static int smb_apsd_get_charger_type(struct smb_chip *chip, int *val) { unsigned int apsd_stat, stat; int usb_online = 0; int rc; - rc = smb2_get_prop_usb_online(chip, &usb_online); + rc = smb_get_prop_usb_online(chip, &usb_online); if (!usb_online) { *val = POWER_SUPPLY_USB_TYPE_UNKNOWN; return rc; @@ -471,13 +471,13 @@ static int smb2_apsd_get_charger_type(struct smb2_chip *chip, int *val) return 0; } -static int smb2_get_prop_status(struct smb2_chip *chip, int *val) +static int smb_get_prop_status(struct smb_chip *chip, int *val) { unsigned char stat[2]; int usb_online = 0; int rc; - rc = smb2_get_prop_usb_online(chip, &usb_online); + rc = smb_get_prop_usb_online(chip, &usb_online); if (!usb_online) { *val = POWER_SUPPLY_STATUS_DISCHARGING; return rc; @@ -519,7 +519,7 @@ static int smb2_get_prop_status(struct smb2_chip *chip, int *val) } } -static inline int smb2_get_current_limit(struct smb2_chip *chip, +static inline int smb_get_current_limit(struct smb_chip *chip, unsigned int *val) { int rc = regmap_read(chip->regmap, chip->base + ICL_STATUS, val); @@ -529,7 +529,7 @@ static inline int smb2_get_current_limit(struct smb2_chip *chip, return rc; } -static int smb2_set_current_limit(struct smb2_chip *chip, unsigned int val) +static int smb_set_current_limit(struct smb_chip *chip, unsigned int val) { unsigned char val_raw; @@ -544,22 +544,22 @@ static int smb2_set_current_limit(struct smb2_chip *chip, unsigned int val) val_raw); } -static void smb2_status_change_work(struct work_struct *work) +static void smb_status_change_work(struct work_struct *work) { unsigned int charger_type, current_ua; int usb_online = 0; int count, rc; - struct smb2_chip *chip; + struct smb_chip *chip; - chip = container_of(work, struct smb2_chip, status_change_work.work); + chip = container_of(work, struct smb_chip, status_change_work.work); - smb2_get_prop_usb_online(chip, &usb_online); + smb_get_prop_usb_online(chip, &usb_online); if (!usb_online) return; for (count = 0; count < 3; count++) { dev_dbg(chip->dev, "get charger type retry %d\n", count); - rc = smb2_apsd_get_charger_type(chip, &charger_type); + rc = smb_apsd_get_charger_type(chip, &charger_type); if (rc != -EAGAIN) break; msleep(100); @@ -592,11 +592,11 @@ static void smb2_status_change_work(struct work_struct *work) break; } - smb2_set_current_limit(chip, current_ua); + smb_set_current_limit(chip, current_ua); power_supply_changed(chip->chg_psy); } -static int smb2_get_iio_chan(struct smb2_chip *chip, struct iio_channel *chan, +static int smb_get_iio_chan(struct smb_chip *chip, struct iio_channel *chan, int *val) { int rc; @@ -617,7 +617,7 @@ static int smb2_get_iio_chan(struct smb2_chip *chip, struct iio_channel *chan, return iio_read_channel_processed(chan, val); } -static int smb2_get_prop_health(struct smb2_chip *chip, int *val) +static int smb_get_prop_health(struct smb_chip *chip, int *val) { int rc; unsigned int stat; @@ -651,11 +651,11 @@ static int smb2_get_prop_health(struct smb2_chip *chip, int *val) } } -static int smb2_get_property(struct power_supply *psy, +static int smb_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val) { - struct smb2_chip *chip = power_supply_get_drvdata(psy); + struct smb_chip *chip = power_supply_get_drvdata(psy); switch (psp) { case POWER_SUPPLY_PROP_MANUFACTURER: @@ -665,43 +665,43 @@ static int smb2_get_property(struct power_supply *psy, val->strval = chip->name; return 0; case POWER_SUPPLY_PROP_CURRENT_MAX: - return smb2_get_current_limit(chip, &val->intval); + return smb_get_current_limit(chip, &val->intval); case POWER_SUPPLY_PROP_CURRENT_NOW: - return smb2_get_iio_chan(chip, chip->usb_in_i_chan, + return smb_get_iio_chan(chip, chip->usb_in_i_chan, &val->intval); case POWER_SUPPLY_PROP_VOLTAGE_NOW: - return smb2_get_iio_chan(chip, chip->usb_in_v_chan, + return smb_get_iio_chan(chip, chip->usb_in_v_chan, &val->intval); case POWER_SUPPLY_PROP_ONLINE: - return smb2_get_prop_usb_online(chip, &val->intval); + return smb_get_prop_usb_online(chip, &val->intval); case POWER_SUPPLY_PROP_STATUS: - return smb2_get_prop_status(chip, &val->intval); + return smb_get_prop_status(chip, &val->intval); case POWER_SUPPLY_PROP_HEALTH: - return smb2_get_prop_health(chip, &val->intval); + return smb_get_prop_health(chip, &val->intval); case POWER_SUPPLY_PROP_USB_TYPE: - return smb2_apsd_get_charger_type(chip, &val->intval); + return smb_apsd_get_charger_type(chip, &val->intval); default: dev_err(chip->dev, "invalid property: %d\n", psp); return -EINVAL; } } -static int smb2_set_property(struct power_supply *psy, +static int smb_set_property(struct power_supply *psy, enum power_supply_property psp, const union power_supply_propval *val) { - struct smb2_chip *chip = power_supply_get_drvdata(psy); + struct smb_chip *chip = power_supply_get_drvdata(psy); switch (psp) { case POWER_SUPPLY_PROP_CURRENT_MAX: - return smb2_set_current_limit(chip, val->intval); + return smb_set_current_limit(chip, val->intval); default: dev_err(chip->dev, "No setter for property: %d\n", psp); return -EINVAL; } } -static int smb2_property_is_writable(struct power_supply *psy, +static int smb_property_is_writable(struct power_supply *psy, enum power_supply_property psp) { switch (psp) { @@ -712,9 +712,9 @@ static int smb2_property_is_writable(struct power_supply *psy, } } -static irqreturn_t smb2_handle_batt_overvoltage(int irq, void *data) +static irqreturn_t smb_handle_batt_overvoltage(int irq, void *data) { - struct smb2_chip *chip = data; + struct smb_chip *chip = data; unsigned int status; regmap_read(chip->regmap, chip->base + BATTERY_CHARGER_STATUS_2, @@ -729,9 +729,9 @@ static irqreturn_t smb2_handle_batt_overvoltage(int irq, void *data) return IRQ_HANDLED; } -static irqreturn_t smb2_handle_usb_plugin(int irq, void *data) +static irqreturn_t smb_handle_usb_plugin(int irq, void *data) { - struct smb2_chip *chip = data; + struct smb_chip *chip = data; power_supply_changed(chip->chg_psy); @@ -741,18 +741,18 @@ static irqreturn_t smb2_handle_usb_plugin(int irq, void *data) return IRQ_HANDLED; } -static irqreturn_t smb2_handle_usb_icl_change(int irq, void *data) +static irqreturn_t smb_handle_usb_icl_change(int irq, void *data) { - struct smb2_chip *chip = data; + struct smb_chip *chip = data; power_supply_changed(chip->chg_psy); return IRQ_HANDLED; } -static irqreturn_t smb2_handle_wdog_bark(int irq, void *data) +static irqreturn_t smb_handle_wdog_bark(int irq, void *data) { - struct smb2_chip *chip = data; + struct smb_chip *chip = data; int rc; power_supply_changed(chip->chg_psy); @@ -765,22 +765,22 @@ static irqreturn_t smb2_handle_wdog_bark(int irq, void *data) return IRQ_HANDLED; } -static const struct power_supply_desc smb2_psy_desc = { +static const struct power_supply_desc smb_psy_desc = { .name = "pmi8998_charger", .type = POWER_SUPPLY_TYPE_USB, .usb_types = BIT(POWER_SUPPLY_USB_TYPE_SDP) | BIT(POWER_SUPPLY_USB_TYPE_CDP) | BIT(POWER_SUPPLY_USB_TYPE_DCP) | BIT(POWER_SUPPLY_USB_TYPE_UNKNOWN), - .properties = smb2_properties, - .num_properties = ARRAY_SIZE(smb2_properties), - .get_property = smb2_get_property, - .set_property = smb2_set_property, - .property_is_writeable = smb2_property_is_writable, + .properties = smb_properties, + .num_properties = ARRAY_SIZE(smb_properties), + .get_property = smb_get_property, + .set_property = smb_set_property, + .property_is_writeable = smb_property_is_writable, }; /* Init sequence derived from vendor downstream driver */ -static const struct smb2_register smb2_init_seq[] = { +static const struct smb_init_register smb_init_seq[] = { { .addr = AICL_RERUN_TIME_CFG, .mask = AICL_RERUN_TIME_MASK, .val = 0 }, /* * By default configure us as an upstream facing port @@ -882,17 +882,17 @@ static const struct smb2_register smb2_init_seq[] = { .val = 1000000 / CURRENT_SCALE_FACTOR }, }; -static int smb2_init_hw(struct smb2_chip *chip) +static int smb_init_hw(struct smb_chip *chip) { int rc, i; - for (i = 0; i < ARRAY_SIZE(smb2_init_seq); i++) { + for (i = 0; i < ARRAY_SIZE(smb_init_seq); i++) { dev_dbg(chip->dev, "%d: Writing 0x%02x to 0x%02x\n", i, - smb2_init_seq[i].val, smb2_init_seq[i].addr); + smb_init_seq[i].val, smb_init_seq[i].addr); rc = regmap_update_bits(chip->regmap, - chip->base + smb2_init_seq[i].addr, - smb2_init_seq[i].mask, - smb2_init_seq[i].val); + chip->base + smb_init_seq[i].addr, + smb_init_seq[i].mask, + smb_init_seq[i].val); if (rc < 0) return dev_err_probe(chip->dev, rc, "%s: init command %d failed\n", @@ -902,7 +902,7 @@ static int smb2_init_hw(struct smb2_chip *chip) return 0; } -static int smb2_init_irq(struct smb2_chip *chip, int *irq, const char *name, +static int smb_init_irq(struct smb_chip *chip, int *irq, const char *name, irqreturn_t (*handler)(int irq, void *data)) { int irqnum; @@ -924,11 +924,11 @@ static int smb2_init_irq(struct smb2_chip *chip, int *irq, const char *name, return 0; } -static int smb2_probe(struct platform_device *pdev) +static int smb_probe(struct platform_device *pdev) { struct power_supply_config supply_config = {}; struct power_supply_desc *desc; - struct smb2_chip *chip; + struct smb_chip *chip; int rc, irq; chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL); @@ -959,17 +959,17 @@ static int smb2_probe(struct platform_device *pdev) "Couldn't get usbin_i IIO channel\n"); } - rc = smb2_init_hw(chip); + rc = smb_init_hw(chip); if (rc < 0) return rc; supply_config.drv_data = chip; supply_config.fwnode = dev_fwnode(&pdev->dev); - desc = devm_kzalloc(chip->dev, sizeof(smb2_psy_desc), GFP_KERNEL); + desc = devm_kzalloc(chip->dev, sizeof(smb_psy_desc), GFP_KERNEL); if (!desc) return -ENOMEM; - memcpy(desc, &smb2_psy_desc, sizeof(smb2_psy_desc)); + memcpy(desc, &smb_psy_desc, sizeof(smb_psy_desc)); desc->name = devm_kasprintf(chip->dev, GFP_KERNEL, "%s-charger", (const char *)device_get_match_data(chip->dev)); @@ -988,7 +988,7 @@ static int smb2_probe(struct platform_device *pdev) "Failed to get battery info\n"); rc = devm_delayed_work_autocancel(chip->dev, &chip->status_change_work, - smb2_status_change_work); + smb_status_change_work); if (rc) return dev_err_probe(chip->dev, rc, "Failed to init status change work\n"); @@ -999,20 +999,20 @@ static int smb2_probe(struct platform_device *pdev) if (rc < 0) return dev_err_probe(chip->dev, rc, "Couldn't set vbat max\n"); - rc = smb2_init_irq(chip, &irq, "bat-ov", smb2_handle_batt_overvoltage); + rc = smb_init_irq(chip, &irq, "bat-ov", smb_handle_batt_overvoltage); if (rc < 0) return rc; - rc = smb2_init_irq(chip, &chip->cable_irq, "usb-plugin", - smb2_handle_usb_plugin); + rc = smb_init_irq(chip, &chip->cable_irq, "usb-plugin", + smb_handle_usb_plugin); if (rc < 0) return rc; - rc = smb2_init_irq(chip, &irq, "usbin-icl-change", - smb2_handle_usb_icl_change); + rc = smb_init_irq(chip, &irq, "usbin-icl-change", + smb_handle_usb_icl_change); if (rc < 0) return rc; - rc = smb2_init_irq(chip, &irq, "wdog-bark", smb2_handle_wdog_bark); + rc = smb_init_irq(chip, &irq, "wdog-bark", smb_handle_wdog_bark); if (rc < 0) return rc; @@ -1030,22 +1030,22 @@ static int smb2_probe(struct platform_device *pdev) return 0; } -static const struct of_device_id smb2_match_id_table[] = { +static const struct of_device_id smb_match_id_table[] = { { .compatible = "qcom,pmi8998-charger", .data = "pmi8998" }, { .compatible = "qcom,pm660-charger", .data = "pm660" }, { /* sentinal */ } }; -MODULE_DEVICE_TABLE(of, smb2_match_id_table); +MODULE_DEVICE_TABLE(of, smb_match_id_table); -static struct platform_driver qcom_spmi_smb2 = { - .probe = smb2_probe, +static struct platform_driver qcom_spmi_smb = { + .probe = smb_probe, .driver = { - .name = "qcom-pmi8998/pm660-charger", - .of_match_table = smb2_match_id_table, + .name = "qcom-smbx-charger", + .of_match_table = smb_match_id_table, }, }; -module_platform_driver(qcom_spmi_smb2); +module_platform_driver(qcom_spmi_smb); MODULE_AUTHOR("Casey Connolly "); MODULE_DESCRIPTION("Qualcomm SMB2 Charger Driver"); From 4deeea4b07414e7dd766005d8e28b1ef878cd417 Mon Sep 17 00:00:00 2001 From: Casey Connolly Date: Thu, 19 Jun 2025 16:55:18 +0200 Subject: [PATCH 0279/2411] MAINTAINERS: add myself as smbx charger driver maintainer Missed when this originally went upstream, add myself to the MAINTAINERS file for this driver. Signed-off-by: Casey Connolly Link: https://lore.kernel.org/r/20250619-smb2-smb5-support-v1-10-ac5dec51b6e1@linaro.org Signed-off-by: Sebastian Reichel --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..0a61f8e63245 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20523,6 +20523,13 @@ S: Maintained F: Documentation/devicetree/bindings/mtd/qcom,nandc.yaml F: drivers/mtd/nand/raw/qcom_nandc.c +QUALCOMM SMB CHARGER DRIVER +M: Casey Connolly +L: linux-arm-msm@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/power/supply/qcom,pmi8998-charger.yaml +F: drivers/power/supply/qcom_smbx_charger.c + QUALCOMM QSEECOM DRIVER M: Maximilian Luz L: linux-arm-msm@vger.kernel.org From fcad9bbf9e1a7de6c53908954ba1b1a1ab11ef1e Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Sun, 15 Jun 2025 16:55:05 -0400 Subject: [PATCH 0280/2411] rust: enable `clippy::ptr_as_ptr` lint In Rust 1.51.0, Clippy introduced the `ptr_as_ptr` lint [1]: > Though `as` casts between raw pointers are not terrible, > `pointer::cast` is safer because it cannot accidentally change the > pointer's mutability, nor cast the pointer to other types like `usize`. There are a few classes of changes required: - Modules generated by bindgen are marked `#[allow(clippy::ptr_as_ptr)]`. - Inferred casts (` as _`) are replaced with `.cast()`. - Ascribed casts (` as *... T`) are replaced with `.cast::()`. - Multistep casts from references (` as *const _ as *const T`) are replaced with `core::ptr::from_ref(&x).cast()` with or without `::` according to the previous rules. The `core::ptr::from_ref` call is required because `(x as *const _).cast::()` results in inference failure. - Native literal C strings are replaced with `c_str!().as_char_ptr()`. - `*mut *mut T as _` is replaced with `let *mut *const T = (*mut *mut T)`.cast();` since pointer to pointer can be confusing. Apply these changes and enable the lint -- no functional change intended. Link: https://rust-lang.github.io/rust-clippy/master/index.html#ptr_as_ptr [1] Reviewed-by: Benno Lossin Reviewed-by: Boqun Feng Signed-off-by: Tamir Duberstein Acked-by: Viresh Kumar Acked-by: Greg Kroah-Hartman Acked-by: Tejun Heo Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250615-ptr-as-ptr-v12-1-f43b024581e8@gmail.com [ Added `.cast()` for `opp`. - Miguel ] Signed-off-by: Miguel Ojeda --- Makefile | 1 + rust/bindings/lib.rs | 1 + rust/kernel/alloc/allocator_test.rs | 2 +- rust/kernel/alloc/kvec.rs | 4 ++-- rust/kernel/configfs.rs | 2 +- rust/kernel/cpufreq.rs | 2 +- rust/kernel/device.rs | 4 ++-- rust/kernel/devres.rs | 2 +- rust/kernel/dma.rs | 4 ++-- rust/kernel/error.rs | 2 +- rust/kernel/firmware.rs | 3 ++- rust/kernel/fs/file.rs | 2 +- rust/kernel/kunit.rs | 11 +++++++---- rust/kernel/list/impl_list_item_mod.rs | 2 +- rust/kernel/opp.rs | 2 +- rust/kernel/pci.rs | 2 +- rust/kernel/platform.rs | 4 +++- rust/kernel/print.rs | 6 +++--- rust/kernel/seq_file.rs | 2 +- rust/kernel/str.rs | 2 +- rust/kernel/sync/poll.rs | 2 +- rust/kernel/time/hrtimer/pin.rs | 2 +- rust/kernel/time/hrtimer/pin_mut.rs | 2 +- rust/kernel/workqueue.rs | 6 +++--- rust/uapi/lib.rs | 1 + 25 files changed, 41 insertions(+), 32 deletions(-) diff --git a/Makefile b/Makefile index f884dfe10246..25c19877b903 100644 --- a/Makefile +++ b/Makefile @@ -484,6 +484,7 @@ export rust_common_flags := --edition=2021 \ -Wclippy::needless_bitwise_bool \ -Aclippy::needless_lifetimes \ -Wclippy::no_mangle_with_rust_abi \ + -Wclippy::ptr_as_ptr \ -Wclippy::undocumented_unsafe_blocks \ -Wclippy::unnecessary_safety_comment \ -Wclippy::unnecessary_safety_doc \ diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs index a08eb5518cac..81b6c7aa4916 100644 --- a/rust/bindings/lib.rs +++ b/rust/bindings/lib.rs @@ -25,6 +25,7 @@ )] #[allow(dead_code)] +#[allow(clippy::ptr_as_ptr)] #[allow(clippy::undocumented_unsafe_blocks)] #[cfg_attr(CONFIG_RUSTC_HAS_UNNECESSARY_TRANSMUTES, allow(unnecessary_transmutes))] mod bindings_raw { diff --git a/rust/kernel/alloc/allocator_test.rs b/rust/kernel/alloc/allocator_test.rs index d19c06ef0498..a3074480bd8d 100644 --- a/rust/kernel/alloc/allocator_test.rs +++ b/rust/kernel/alloc/allocator_test.rs @@ -82,7 +82,7 @@ unsafe fn realloc( // SAFETY: Returns either NULL or a pointer to a memory allocation that satisfies or // exceeds the given size and alignment requirements. - let dst = unsafe { libc_aligned_alloc(layout.align(), layout.size()) } as *mut u8; + let dst = unsafe { libc_aligned_alloc(layout.align(), layout.size()) }.cast::(); let dst = NonNull::new(dst).ok_or(AllocError)?; if flags.contains(__GFP_ZERO) { diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs index 1a0dd852a468..0477041cbc03 100644 --- a/rust/kernel/alloc/kvec.rs +++ b/rust/kernel/alloc/kvec.rs @@ -288,7 +288,7 @@ pub fn spare_capacity_mut(&mut self) -> &mut [MaybeUninit] { // - `self.len` is smaller than `self.capacity` by the type invariant and hence, the // resulting pointer is guaranteed to be part of the same allocated object. // - `self.len` can not overflow `isize`. - let ptr = unsafe { self.as_mut_ptr().add(self.len) } as *mut MaybeUninit; + let ptr = unsafe { self.as_mut_ptr().add(self.len) }.cast::>(); // SAFETY: The memory between `self.len` and `self.capacity` is guaranteed to be allocated // and valid, but uninitialized. @@ -847,7 +847,7 @@ fn drop(&mut self) { // - `ptr` points to memory with at least a size of `size_of::() * len`, // - all elements within `b` are initialized values of `T`, // - `len` does not exceed `isize::MAX`. - unsafe { Vec::from_raw_parts(ptr as _, len, len) } + unsafe { Vec::from_raw_parts(ptr.cast(), len, len) } } } diff --git a/rust/kernel/configfs.rs b/rust/kernel/configfs.rs index 34d0bea4f9a5..bc8e15dcec18 100644 --- a/rust/kernel/configfs.rs +++ b/rust/kernel/configfs.rs @@ -561,7 +561,7 @@ impl Attribute let data: &Data = unsafe { get_group_data(c_group) }; // SAFETY: By function safety requirements, `page` is writable for `PAGE_SIZE`. - let ret = O::show(data, unsafe { &mut *(page as *mut [u8; PAGE_SIZE]) }); + let ret = O::show(data, unsafe { &mut *(page.cast::<[u8; PAGE_SIZE]>()) }); match ret { Ok(size) => size as isize, diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs index 11b03e9d7e89..14aafb0c0314 100644 --- a/rust/kernel/cpufreq.rs +++ b/rust/kernel/cpufreq.rs @@ -649,7 +649,7 @@ pub fn data(&mut self) -> Option<::Borrowed<'_>> { fn set_data(&mut self, data: T) -> Result { if self.as_ref().driver_data.is_null() { // Transfer the ownership of the data to the foreign interface. - self.as_mut_ref().driver_data = ::into_foreign(data) as _; + self.as_mut_ref().driver_data = ::into_foreign(data).cast(); Ok(()) } else { Err(EBUSY) diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index dea06b79ecb5..5c946af3a4d5 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -195,10 +195,10 @@ unsafe fn printk(&self, klevel: &[u8], msg: fmt::Arguments<'_>) { #[cfg(CONFIG_PRINTK)] unsafe { bindings::_dev_printk( - klevel as *const _ as *const crate::ffi::c_char, + klevel.as_ptr().cast::(), self.as_raw(), c_str!("%pA").as_char_ptr(), - &msg as *const _ as *const crate::ffi::c_void, + core::ptr::from_ref(&msg).cast::(), ) }; } diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index 57502534d985..b8ba5417337b 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -159,7 +159,7 @@ fn remove_action(this: &Arc) -> bool { #[allow(clippy::missing_safety_doc)] unsafe extern "C" fn devres_callback(ptr: *mut kernel::ffi::c_void) { - let ptr = ptr as *mut DevresInner; + let ptr = ptr.cast::>(); // Devres owned this memory; now that we received the callback, drop the `Arc` and hence the // reference. // SAFETY: Safe, since we leaked an `Arc` reference to devm_add_action() in diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index a33261c62e0c..666bf2d64f9a 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -186,7 +186,7 @@ pub fn alloc_attrs( dev: dev.into(), dma_handle, count, - cpu_addr: ret as *mut T, + cpu_addr: ret.cast::(), dma_attrs, }) } @@ -293,7 +293,7 @@ fn drop(&mut self) { bindings::dma_free_attrs( self.dev.as_raw(), size, - self.cpu_addr as _, + self.cpu_addr.cast(), self.dma_handle, self.dma_attrs.as_raw(), ) diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index 3dee3139fcd4..afcb00cb6a75 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -153,7 +153,7 @@ pub(crate) fn to_blk_status(self) -> bindings::blk_status_t { /// Returns the error encoded as a pointer. pub fn to_ptr(self) -> *mut T { // SAFETY: `self.0` is a valid error due to its invariant. - unsafe { bindings::ERR_PTR(self.0.get() as _) as *mut _ } + unsafe { bindings::ERR_PTR(self.0.get() as _).cast() } } /// Returns a string representing the error, if one exists. diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs index 2494c96e105f..94fa1ea17ef0 100644 --- a/rust/kernel/firmware.rs +++ b/rust/kernel/firmware.rs @@ -62,10 +62,11 @@ impl Firmware { fn request_internal(name: &CStr, dev: &Device, func: FwFunc) -> Result { let mut fw: *mut bindings::firmware = core::ptr::null_mut(); let pfw: *mut *mut bindings::firmware = &mut fw; + let pfw: *mut *const bindings::firmware = pfw.cast(); // SAFETY: `pfw` is a valid pointer to a NULL initialized `bindings::firmware` pointer. // `name` and `dev` are valid as by their type invariants. - let ret = unsafe { func.0(pfw as _, name.as_char_ptr(), dev.as_raw()) }; + let ret = unsafe { func.0(pfw, name.as_char_ptr(), dev.as_raw()) }; if ret != 0 { return Err(Error::from_errno(ret)); } diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs index 72d84fb0e266..e9bfbad00755 100644 --- a/rust/kernel/fs/file.rs +++ b/rust/kernel/fs/file.rs @@ -366,7 +366,7 @@ fn deref(&self) -> &LocalFile { // // By the type invariants, there are no `fdget_pos` calls that did not take the // `f_pos_lock` mutex. - unsafe { LocalFile::from_raw_file(self as *const File as *const bindings::file) } + unsafe { LocalFile::from_raw_file((self as *const Self).cast()) } } } diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs index 4b8cdcb21e77..6930e86d98a9 100644 --- a/rust/kernel/kunit.rs +++ b/rust/kernel/kunit.rs @@ -9,6 +9,9 @@ use crate::prelude::*; use core::{ffi::c_void, fmt}; +#[cfg(CONFIG_PRINTK)] +use crate::c_str; + /// Prints a KUnit error-level message. /// /// Public but hidden since it should only be used from KUnit generated code. @@ -19,8 +22,8 @@ pub fn err(args: fmt::Arguments<'_>) { #[cfg(CONFIG_PRINTK)] unsafe { bindings::_printk( - c"\x013%pA".as_ptr() as _, - &args as *const _ as *const c_void, + c_str!("\x013%pA").as_char_ptr(), + core::ptr::from_ref(&args).cast::(), ); } } @@ -35,8 +38,8 @@ pub fn info(args: fmt::Arguments<'_>) { #[cfg(CONFIG_PRINTK)] unsafe { bindings::_printk( - c"\x016%pA".as_ptr() as _, - &args as *const _ as *const c_void, + c_str!("\x016%pA").as_char_ptr(), + core::ptr::from_ref(&args).cast::(), ); } } diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index a0438537cee1..1f9498c1458f 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -34,7 +34,7 @@ pub unsafe trait HasListLinks { unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut ListLinks { // SAFETY: The caller promises that the pointer is valid. The implementer promises that the // `OFFSET` constant is correct. - unsafe { (ptr as *mut u8).add(Self::OFFSET) as *mut ListLinks } + unsafe { ptr.cast::().add(Self::OFFSET).cast() } } } diff --git a/rust/kernel/opp.rs b/rust/kernel/opp.rs index a566fc3e7dcb..bc82a85ca883 100644 --- a/rust/kernel/opp.rs +++ b/rust/kernel/opp.rs @@ -92,7 +92,7 @@ fn to_c_str_array(names: &[CString]) -> Result> { let mut list = KVec::with_capacity(names.len() + 1, GFP_KERNEL)?; for name in names.iter() { - list.push(name.as_ptr() as _, GFP_KERNEL)?; + list.push(name.as_ptr().cast(), GFP_KERNEL)?; } list.push(ptr::null(), GFP_KERNEL)?; diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index 8435f8132e38..33ae0bdc433d 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -78,7 +78,7 @@ extern "C" fn probe_callback( // Let the `struct pci_dev` own a reference of the driver's private data. // SAFETY: By the type invariant `pdev.as_raw` returns a valid pointer to a // `struct pci_dev`. - unsafe { bindings::pci_set_drvdata(pdev.as_raw(), data.into_foreign() as _) }; + unsafe { bindings::pci_set_drvdata(pdev.as_raw(), data.into_foreign().cast()) }; } Err(err) => return Error::to_errno(err), } diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index 5b21fa517e55..4b06f9fbc172 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -69,7 +69,9 @@ extern "C" fn probe_callback(pdev: *mut bindings::platform_device) -> kernel::ff // Let the `struct platform_device` own a reference of the driver's private data. // SAFETY: By the type invariant `pdev.as_raw` returns a valid pointer to a // `struct platform_device`. - unsafe { bindings::platform_set_drvdata(pdev.as_raw(), data.into_foreign() as _) }; + unsafe { + bindings::platform_set_drvdata(pdev.as_raw(), data.into_foreign().cast()) + }; } Err(err) => return Error::to_errno(err), } diff --git a/rust/kernel/print.rs b/rust/kernel/print.rs index 9783d960a97a..ecdcee43e5a5 100644 --- a/rust/kernel/print.rs +++ b/rust/kernel/print.rs @@ -25,7 +25,7 @@ // SAFETY: The C contract guarantees that `buf` is valid if it's less than `end`. let mut w = unsafe { RawFormatter::from_ptrs(buf.cast(), end.cast()) }; // SAFETY: TODO. - let _ = w.write_fmt(unsafe { *(ptr as *const fmt::Arguments<'_>) }); + let _ = w.write_fmt(unsafe { *ptr.cast::>() }); w.pos().cast() } @@ -109,7 +109,7 @@ pub unsafe fn call_printk( bindings::_printk( format_string.as_ptr(), module_name.as_ptr(), - &args as *const _ as *const c_void, + core::ptr::from_ref(&args).cast::(), ); } } @@ -129,7 +129,7 @@ pub fn call_printk_cont(args: fmt::Arguments<'_>) { unsafe { bindings::_printk( format_strings::CONT.as_ptr(), - &args as *const _ as *const c_void, + core::ptr::from_ref(&args).cast::(), ); } } diff --git a/rust/kernel/seq_file.rs b/rust/kernel/seq_file.rs index 7a9403eb6e5b..8f199b1a3bb1 100644 --- a/rust/kernel/seq_file.rs +++ b/rust/kernel/seq_file.rs @@ -37,7 +37,7 @@ pub fn call_printf(&self, args: core::fmt::Arguments<'_>) { bindings::seq_printf( self.inner.get(), c_str!("%pA").as_char_ptr(), - &args as *const _ as *const crate::ffi::c_void, + core::ptr::from_ref(&args).cast::(), ); } } diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index a927db8e079c..6a3cb607b332 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -237,7 +237,7 @@ pub unsafe fn from_char_ptr<'a>(ptr: *const crate::ffi::c_char) -> &'a Self { // to a `NUL`-terminated C string. let len = unsafe { bindings::strlen(ptr) } + 1; // SAFETY: Lifetime guaranteed by the safety precondition. - let bytes = unsafe { core::slice::from_raw_parts(ptr as _, len) }; + let bytes = unsafe { core::slice::from_raw_parts(ptr.cast(), len) }; // SAFETY: As `len` is returned by `strlen`, `bytes` does not contain interior `NUL`. // As we have added 1 to `len`, the last byte is known to be `NUL`. unsafe { Self::from_bytes_with_nul_unchecked(bytes) } diff --git a/rust/kernel/sync/poll.rs b/rust/kernel/sync/poll.rs index d7e6e59e124b..339ab6097be7 100644 --- a/rust/kernel/sync/poll.rs +++ b/rust/kernel/sync/poll.rs @@ -73,7 +73,7 @@ pub fn register_wait(&mut self, file: &File, cv: &PollCondVar) { // be destroyed, the destructor must run. That destructor first removes all waiters, // and then waits for an rcu grace period. Therefore, `cv.wait_queue_head` is valid for // long enough. - unsafe { qproc(file.as_ptr() as _, cv.wait_queue_head.get(), self.0.get()) }; + unsafe { qproc(file.as_ptr().cast(), cv.wait_queue_head.get(), self.0.get()) }; } } } diff --git a/rust/kernel/time/hrtimer/pin.rs b/rust/kernel/time/hrtimer/pin.rs index 293ca9cf058c..2f29fd75d63a 100644 --- a/rust/kernel/time/hrtimer/pin.rs +++ b/rust/kernel/time/hrtimer/pin.rs @@ -79,7 +79,7 @@ impl<'a, T> RawHrTimerCallback for Pin<&'a T> unsafe extern "C" fn run(ptr: *mut bindings::hrtimer) -> bindings::hrtimer_restart { // `HrTimer` is `repr(C)` - let timer_ptr = ptr as *mut HrTimer; + let timer_ptr = ptr.cast::>(); // SAFETY: By the safety requirement of this function, `timer_ptr` // points to a `HrTimer` contained in an `T`. diff --git a/rust/kernel/time/hrtimer/pin_mut.rs b/rust/kernel/time/hrtimer/pin_mut.rs index 6033572d35ad..d05d68be55e9 100644 --- a/rust/kernel/time/hrtimer/pin_mut.rs +++ b/rust/kernel/time/hrtimer/pin_mut.rs @@ -83,7 +83,7 @@ impl<'a, T> RawHrTimerCallback for Pin<&'a mut T> unsafe extern "C" fn run(ptr: *mut bindings::hrtimer) -> bindings::hrtimer_restart { // `HrTimer` is `repr(C)` - let timer_ptr = ptr as *mut HrTimer; + let timer_ptr = ptr.cast::>(); // SAFETY: By the safety requirement of this function, `timer_ptr` // points to a `HrTimer` contained in an `T`. diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index d092112d843f..de61374e36bd 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -170,7 +170,7 @@ impl Queue { pub unsafe fn from_raw<'a>(ptr: *const bindings::workqueue_struct) -> &'a Queue { // SAFETY: The `Queue` type is `#[repr(transparent)]`, so the pointer cast is valid. The // caller promises that the pointer is not dangling. - unsafe { &*(ptr as *const Queue) } + unsafe { &*ptr.cast::() } } /// Enqueues a work item. @@ -522,7 +522,7 @@ unsafe impl WorkItemPointer for Arc { unsafe extern "C" fn run(ptr: *mut bindings::work_struct) { // The `__enqueue` method always uses a `work_struct` stored in a `Work`. - let ptr = ptr as *mut Work; + let ptr = ptr.cast::>(); // SAFETY: This computes the pointer that `__enqueue` got from `Arc::into_raw`. let ptr = unsafe { T::work_container_of(ptr) }; // SAFETY: This pointer comes from `Arc::into_raw` and we've been given back ownership. @@ -575,7 +575,7 @@ unsafe impl WorkItemPointer for Pin> { unsafe extern "C" fn run(ptr: *mut bindings::work_struct) { // The `__enqueue` method always uses a `work_struct` stored in a `Work`. - let ptr = ptr as *mut Work; + let ptr = ptr.cast::>(); // SAFETY: This computes the pointer that `__enqueue` got from `Arc::into_raw`. let ptr = unsafe { T::work_container_of(ptr) }; // SAFETY: This pointer comes from `Arc::into_raw` and we've been given back ownership. diff --git a/rust/uapi/lib.rs b/rust/uapi/lib.rs index c98d7a8cde77..e79a1f49f055 100644 --- a/rust/uapi/lib.rs +++ b/rust/uapi/lib.rs @@ -14,6 +14,7 @@ #![cfg_attr(test, allow(unsafe_op_in_unsafe_fn))] #![allow( clippy::all, + clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, dead_code, missing_docs, From d8c9e735f1f3e729268222a550de7a7f594c4210 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Sun, 15 Jun 2025 16:55:06 -0400 Subject: [PATCH 0281/2411] rust: enable `clippy::ptr_cast_constness` lint In Rust 1.72.0, Clippy introduced the `ptr_cast_constness` lint [1]: > Though `as` casts between raw pointers are not terrible, > `pointer::cast_mut` and `pointer::cast_const` are safer because they > cannot accidentally cast the pointer to another type. There are only 3 affected sites: - `*mut T as *const U as *mut U` becomes `(*mut T).cast()`. - `&self as *const Self as *mut Self` becomes `core::ptr::from_ref(self).cast_mut()`. - `*const T as *mut _` becommes `(*const T).cast_mut()`. Apply these changes and enable the lint -- no functional change intended. Link: https://rust-lang.github.io/rust-clippy/master/index.html#ptr_cast_constness [1] Reviewed-by: Benno Lossin Reviewed-by: Boqun Feng Signed-off-by: Tamir Duberstein Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250615-ptr-as-ptr-v12-2-f43b024581e8@gmail.com Signed-off-by: Miguel Ojeda --- Makefile | 1 + rust/kernel/block/mq/request.rs | 4 ++-- rust/kernel/drm/device.rs | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 25c19877b903..63b705d99631 100644 --- a/Makefile +++ b/Makefile @@ -485,6 +485,7 @@ export rust_common_flags := --edition=2021 \ -Aclippy::needless_lifetimes \ -Wclippy::no_mangle_with_rust_abi \ -Wclippy::ptr_as_ptr \ + -Wclippy::ptr_cast_constness \ -Wclippy::undocumented_unsafe_blocks \ -Wclippy::unnecessary_safety_comment \ -Wclippy::unnecessary_safety_doc \ diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs index 4a5b7ec914ef..af5c9ac94f36 100644 --- a/rust/kernel/block/mq/request.rs +++ b/rust/kernel/block/mq/request.rs @@ -69,7 +69,7 @@ pub(crate) unsafe fn aref_from_raw(ptr: *mut bindings::request) -> ARef { // INVARIANT: By the safety requirements of this function, invariants are upheld. // SAFETY: By the safety requirement of this function, we own a // reference count that we can pass to `ARef`. - unsafe { ARef::from_raw(NonNull::new_unchecked(ptr as *const Self as *mut Self)) } + unsafe { ARef::from_raw(NonNull::new_unchecked(ptr.cast())) } } /// Notify the block layer that a request is going to be processed now. @@ -155,7 +155,7 @@ pub(crate) fn wrapper_ref(&self) -> &RequestDataWrapper { // the private data associated with this request is initialized and // valid. The existence of `&self` guarantees that the private data is // valid as a shared reference. - unsafe { Self::wrapper_ptr(self as *const Self as *mut Self).as_ref() } + unsafe { Self::wrapper_ptr(core::ptr::from_ref(self).cast_mut()).as_ref() } } } diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index 624d7a4c83ea..ef66deb7ce23 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -83,8 +83,8 @@ impl Device { major: T::INFO.major, minor: T::INFO.minor, patchlevel: T::INFO.patchlevel, - name: T::INFO.name.as_char_ptr() as *mut _, - desc: T::INFO.desc.as_char_ptr() as *mut _, + name: T::INFO.name.as_char_ptr().cast_mut(), + desc: T::INFO.desc.as_char_ptr().cast_mut(), driver_features: drm::driver::FEAT_GEM, ioctls: T::IOCTLS.as_ptr(), From 23773bd8da719b83013e66795e990036c4bfe014 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Sun, 15 Jun 2025 16:55:07 -0400 Subject: [PATCH 0282/2411] rust: enable `clippy::as_ptr_cast_mut` lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Rust 1.66.0, Clippy introduced the `as_ptr_cast_mut` lint [1]: > Since `as_ptr` takes a `&self`, the pointer won’t have write > permissions unless interior mutability is used, making it unlikely > that having it as a mutable pointer is correct. There is only one affected callsite, and the change amounts to replacing `as _` with `.cast_mut().cast()`. This doesn't change the semantics, but is more descriptive of what's going on. Apply this change and enable the lint -- no functional change intended. Link: https://rust-lang.github.io/rust-clippy/master/index.html#as_ptr_cast_mut [1] Reviewed-by: Benno Lossin Reviewed-by: Boqun Feng Signed-off-by: Tamir Duberstein Acked-by: Greg Kroah-Hartman Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250615-ptr-as-ptr-v12-3-f43b024581e8@gmail.com Signed-off-by: Miguel Ojeda --- Makefile | 1 + rust/kernel/devres.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 63b705d99631..54160d6bb168 100644 --- a/Makefile +++ b/Makefile @@ -479,6 +479,7 @@ export rust_common_flags := --edition=2021 \ -Wrust_2018_idioms \ -Wunreachable_pub \ -Wclippy::all \ + -Wclippy::as_ptr_cast_mut \ -Wclippy::ignored_unit_patterns \ -Wclippy::mut_mut \ -Wclippy::needless_bitwise_bool \ diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index b8ba5417337b..b418cfc6f90d 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -143,7 +143,7 @@ fn remove_action(this: &Arc) -> bool { bindings::devm_remove_action_nowarn( this.dev.as_raw(), Some(this.callback), - this.as_ptr() as _, + this.as_ptr().cast_mut().cast(), ) } == 0; From 5e30550558b1eace5fa4af4e2257216fa8a7c90f Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Sun, 15 Jun 2025 16:55:08 -0400 Subject: [PATCH 0283/2411] rust: enable `clippy::as_underscore` lint In Rust 1.63.0, Clippy introduced the `as_underscore` lint [1]: > The conversion might include lossy conversion or a dangerous cast that > might go undetected due to the type being inferred. > > The lint is allowed by default as using `_` is less wordy than always > specifying the type. Always specifying the type is especially helpful in function call contexts where the inferred type may change at a distance. Specifying the type also allows Clippy to spot more cases of `useless_conversion`. The primary downside is the need to specify the type in trivial getters. There are 4 such functions: 3 have become slightly less ergonomic, 1 was revealed to be a `useless_conversion`. While this doesn't eliminate unchecked `as` conversions, it makes such conversions easier to scrutinize. It also has the slight benefit of removing a degree of freedom on which to bikeshed. Thus apply the changes and enable the lint -- no functional change intended. Link: https://rust-lang.github.io/rust-clippy/master/index.html#as_underscore [1] Reviewed-by: Benno Lossin Reviewed-by: Boqun Feng Signed-off-by: Tamir Duberstein Acked-by: Greg Kroah-Hartman Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250615-ptr-as-ptr-v12-4-f43b024581e8@gmail.com [ Changed `isize` to `c_long`. - Miguel ] Signed-off-by: Miguel Ojeda --- Makefile | 1 + drivers/gpu/nova-core/driver.rs | 2 +- rust/kernel/block/mq/operations.rs | 2 +- rust/kernel/block/mq/request.rs | 7 +++- rust/kernel/device_id.rs | 2 +- rust/kernel/devres.rs | 13 ++++---- rust/kernel/dma.rs | 2 +- rust/kernel/drm/device.rs | 2 +- rust/kernel/error.rs | 2 +- rust/kernel/io.rs | 18 +++++------ rust/kernel/miscdevice.rs | 2 +- rust/kernel/mm/virt.rs | 52 +++++++++++++++--------------- rust/kernel/of.rs | 6 ++-- rust/kernel/pci.rs | 9 ++++-- rust/kernel/str.rs | 8 ++--- rust/kernel/workqueue.rs | 2 +- 16 files changed, 70 insertions(+), 60 deletions(-) diff --git a/Makefile b/Makefile index 54160d6bb168..c66dd543b44e 100644 --- a/Makefile +++ b/Makefile @@ -480,6 +480,7 @@ export rust_common_flags := --edition=2021 \ -Wunreachable_pub \ -Wclippy::all \ -Wclippy::as_ptr_cast_mut \ + -Wclippy::as_underscore \ -Wclippy::ignored_unit_patterns \ -Wclippy::mut_mut \ -Wclippy::needless_bitwise_bool \ diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs index 8c86101c26cb..a0e435dc4656 100644 --- a/drivers/gpu/nova-core/driver.rs +++ b/drivers/gpu/nova-core/driver.rs @@ -19,7 +19,7 @@ pub(crate) struct NovaCore { MODULE_PCI_TABLE, ::IdInfo, [( - pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_NVIDIA, bindings::PCI_ANY_ID as _), + pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_NVIDIA, bindings::PCI_ANY_ID as u32), () )] ); diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs index 864ff379dc91..c2b98f507bcb 100644 --- a/rust/kernel/block/mq/operations.rs +++ b/rust/kernel/block/mq/operations.rs @@ -101,7 +101,7 @@ impl OperationsVTable { if let Err(e) = ret { e.to_blk_status() } else { - bindings::BLK_STS_OK as _ + bindings::BLK_STS_OK as bindings::blk_status_t } } diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs index af5c9ac94f36..fefd394f064a 100644 --- a/rust/kernel/block/mq/request.rs +++ b/rust/kernel/block/mq/request.rs @@ -125,7 +125,12 @@ pub fn end_ok(this: ARef) -> Result<(), ARef> { // success of the call to `try_set_end` guarantees that there are no // `ARef`s pointing to this request. Therefore it is safe to hand it // back to the block layer. - unsafe { bindings::blk_mq_end_request(request_ptr, bindings::BLK_STS_OK as _) }; + unsafe { + bindings::blk_mq_end_request( + request_ptr, + bindings::BLK_STS_OK as bindings::blk_status_t, + ) + }; Ok(()) } diff --git a/rust/kernel/device_id.rs b/rust/kernel/device_id.rs index 0a4eb56d98f2..f9d55ac7b9e6 100644 --- a/rust/kernel/device_id.rs +++ b/rust/kernel/device_id.rs @@ -82,7 +82,7 @@ impl IdArray { unsafe { raw_ids[i] .as_mut_ptr() - .byte_offset(T::DRIVER_DATA_OFFSET as _) + .byte_add(T::DRIVER_DATA_OFFSET) .cast::() .write(i); } diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index b418cfc6f90d..8dfbc5b21dc1 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -61,19 +61,19 @@ struct DevresInner { /// unsafe fn new(paddr: usize) -> Result{ /// // SAFETY: By the safety requirements of this function [`paddr`, `paddr` + `SIZE`) is /// // valid for `ioremap`. -/// let addr = unsafe { bindings::ioremap(paddr as _, SIZE as _) }; +/// let addr = unsafe { bindings::ioremap(paddr as bindings::phys_addr_t, SIZE) }; /// if addr.is_null() { /// return Err(ENOMEM); /// } /// -/// Ok(IoMem(IoRaw::new(addr as _, SIZE)?)) +/// Ok(IoMem(IoRaw::new(addr as usize, SIZE)?)) /// } /// } /// /// impl Drop for IoMem { /// fn drop(&mut self) { /// // SAFETY: `self.0.addr()` is guaranteed to be properly mapped by `Self::new`. -/// unsafe { bindings::iounmap(self.0.addr() as _); }; +/// unsafe { bindings::iounmap(self.0.addr() as *mut c_void); }; /// } /// } /// @@ -115,8 +115,9 @@ fn new(dev: &Device, data: T, flags: Flags) -> Result> // SAFETY: `devm_add_action` guarantees to call `Self::devres_callback` once `dev` is // detached. - let ret = - unsafe { bindings::devm_add_action(dev.as_raw(), Some(inner.callback), data as _) }; + let ret = unsafe { + bindings::devm_add_action(dev.as_raw(), Some(inner.callback), data.cast_mut().cast()) + }; if ret != 0 { // SAFETY: We just created another reference to `inner` in order to pass it to @@ -130,7 +131,7 @@ fn new(dev: &Device, data: T, flags: Flags) -> Result> } fn as_ptr(&self) -> *const Self { - self as _ + self } fn remove_action(this: &Arc) -> bool { diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index 666bf2d64f9a..8e317005decd 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -38,7 +38,7 @@ impl Attrs { /// Get the raw representation of this attribute. pub(crate) fn as_raw(self) -> crate::ffi::c_ulong { - self.0 as _ + self.0 as crate::ffi::c_ulong } /// Check whether `flags` is contained in `self`. diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index ef66deb7ce23..b7ee3c464a12 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -89,7 +89,7 @@ impl Device { driver_features: drm::driver::FEAT_GEM, ioctls: T::IOCTLS.as_ptr(), num_ioctls: T::IOCTLS.len() as i32, - fops: &Self::GEM_FOPS as _, + fops: &Self::GEM_FOPS, }; const GEM_FOPS: bindings::file_operations = drm::gem::create_fops(); diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index afcb00cb6a75..6277af1c1baa 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -153,7 +153,7 @@ pub(crate) fn to_blk_status(self) -> bindings::blk_status_t { /// Returns the error encoded as a pointer. pub fn to_ptr(self) -> *mut T { // SAFETY: `self.0` is a valid error due to its invariant. - unsafe { bindings::ERR_PTR(self.0.get() as _).cast() } + unsafe { bindings::ERR_PTR(self.0.get() as crate::ffi::c_long).cast() } } /// Returns a string representing the error, if one exists. diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs index 72d80a6f131e..c08de4121637 100644 --- a/rust/kernel/io.rs +++ b/rust/kernel/io.rs @@ -5,7 +5,7 @@ //! C header: [`include/asm-generic/io.h`](srctree/include/asm-generic/io.h) use crate::error::{code::EINVAL, Result}; -use crate::{bindings, build_assert}; +use crate::{bindings, build_assert, ffi::c_void}; /// Raw representation of an MMIO region. /// @@ -56,7 +56,7 @@ pub fn maxsize(&self) -> usize { /// # Examples /// /// ```no_run -/// # use kernel::{bindings, io::{Io, IoRaw}}; +/// # use kernel::{bindings, ffi::c_void, io::{Io, IoRaw}}; /// # use core::ops::Deref; /// /// // See also [`pci::Bar`] for a real example. @@ -70,19 +70,19 @@ pub fn maxsize(&self) -> usize { /// unsafe fn new(paddr: usize) -> Result{ /// // SAFETY: By the safety requirements of this function [`paddr`, `paddr` + `SIZE`) is /// // valid for `ioremap`. -/// let addr = unsafe { bindings::ioremap(paddr as _, SIZE as _) }; +/// let addr = unsafe { bindings::ioremap(paddr as bindings::phys_addr_t, SIZE) }; /// if addr.is_null() { /// return Err(ENOMEM); /// } /// -/// Ok(IoMem(IoRaw::new(addr as _, SIZE)?)) +/// Ok(IoMem(IoRaw::new(addr as usize, SIZE)?)) /// } /// } /// /// impl Drop for IoMem { /// fn drop(&mut self) { /// // SAFETY: `self.0.addr()` is guaranteed to be properly mapped by `Self::new`. -/// unsafe { bindings::iounmap(self.0.addr() as _); }; +/// unsafe { bindings::iounmap(self.0.addr() as *mut c_void); }; /// } /// } /// @@ -119,7 +119,7 @@ pub fn $name(&self, offset: usize) -> $type_name { let addr = self.io_addr_assert::<$type_name>(offset); // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - unsafe { bindings::$c_fn(addr as _) } + unsafe { bindings::$c_fn(addr as *const c_void) } } /// Read IO data from a given offset. @@ -131,7 +131,7 @@ pub fn $try_name(&self, offset: usize) -> Result<$type_name> { let addr = self.io_addr::<$type_name>(offset)?; // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - Ok(unsafe { bindings::$c_fn(addr as _) }) + Ok(unsafe { bindings::$c_fn(addr as *const c_void) }) } }; } @@ -148,7 +148,7 @@ pub fn $name(&self, value: $type_name, offset: usize) { let addr = self.io_addr_assert::<$type_name>(offset); // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - unsafe { bindings::$c_fn(value, addr as _, ) } + unsafe { bindings::$c_fn(value, addr as *mut c_void) } } /// Write IO data from a given offset. @@ -160,7 +160,7 @@ pub fn $try_name(&self, value: $type_name, offset: usize) -> Result { let addr = self.io_addr::<$type_name>(offset)?; // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - unsafe { bindings::$c_fn(value, addr as _) } + unsafe { bindings::$c_fn(value, addr as *mut c_void) } Ok(()) } }; diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs index 939278bc7b03..288f40e79906 100644 --- a/rust/kernel/miscdevice.rs +++ b/rust/kernel/miscdevice.rs @@ -34,7 +34,7 @@ impl MiscDeviceOptions { pub const fn into_raw(self) -> bindings::miscdevice { // SAFETY: All zeros is valid for this C type. let mut result: bindings::miscdevice = unsafe { MaybeUninit::zeroed().assume_init() }; - result.minor = bindings::MISC_DYNAMIC_MINOR as _; + result.minor = bindings::MISC_DYNAMIC_MINOR as ffi::c_int; result.name = self.name.as_char_ptr(); result.fops = MiscdeviceVTable::::build(); result diff --git a/rust/kernel/mm/virt.rs b/rust/kernel/mm/virt.rs index 31803674aecc..6086ca981b06 100644 --- a/rust/kernel/mm/virt.rs +++ b/rust/kernel/mm/virt.rs @@ -392,80 +392,80 @@ pub mod flags { use crate::bindings; /// No flags are set. - pub const NONE: vm_flags_t = bindings::VM_NONE as _; + pub const NONE: vm_flags_t = bindings::VM_NONE as vm_flags_t; /// Mapping allows reads. - pub const READ: vm_flags_t = bindings::VM_READ as _; + pub const READ: vm_flags_t = bindings::VM_READ as vm_flags_t; /// Mapping allows writes. - pub const WRITE: vm_flags_t = bindings::VM_WRITE as _; + pub const WRITE: vm_flags_t = bindings::VM_WRITE as vm_flags_t; /// Mapping allows execution. - pub const EXEC: vm_flags_t = bindings::VM_EXEC as _; + pub const EXEC: vm_flags_t = bindings::VM_EXEC as vm_flags_t; /// Mapping is shared. - pub const SHARED: vm_flags_t = bindings::VM_SHARED as _; + pub const SHARED: vm_flags_t = bindings::VM_SHARED as vm_flags_t; /// Mapping may be updated to allow reads. - pub const MAYREAD: vm_flags_t = bindings::VM_MAYREAD as _; + pub const MAYREAD: vm_flags_t = bindings::VM_MAYREAD as vm_flags_t; /// Mapping may be updated to allow writes. - pub const MAYWRITE: vm_flags_t = bindings::VM_MAYWRITE as _; + pub const MAYWRITE: vm_flags_t = bindings::VM_MAYWRITE as vm_flags_t; /// Mapping may be updated to allow execution. - pub const MAYEXEC: vm_flags_t = bindings::VM_MAYEXEC as _; + pub const MAYEXEC: vm_flags_t = bindings::VM_MAYEXEC as vm_flags_t; /// Mapping may be updated to be shared. - pub const MAYSHARE: vm_flags_t = bindings::VM_MAYSHARE as _; + pub const MAYSHARE: vm_flags_t = bindings::VM_MAYSHARE as vm_flags_t; /// Page-ranges managed without `struct page`, just pure PFN. - pub const PFNMAP: vm_flags_t = bindings::VM_PFNMAP as _; + pub const PFNMAP: vm_flags_t = bindings::VM_PFNMAP as vm_flags_t; /// Memory mapped I/O or similar. - pub const IO: vm_flags_t = bindings::VM_IO as _; + pub const IO: vm_flags_t = bindings::VM_IO as vm_flags_t; /// Do not copy this vma on fork. - pub const DONTCOPY: vm_flags_t = bindings::VM_DONTCOPY as _; + pub const DONTCOPY: vm_flags_t = bindings::VM_DONTCOPY as vm_flags_t; /// Cannot expand with mremap(). - pub const DONTEXPAND: vm_flags_t = bindings::VM_DONTEXPAND as _; + pub const DONTEXPAND: vm_flags_t = bindings::VM_DONTEXPAND as vm_flags_t; /// Lock the pages covered when they are faulted in. - pub const LOCKONFAULT: vm_flags_t = bindings::VM_LOCKONFAULT as _; + pub const LOCKONFAULT: vm_flags_t = bindings::VM_LOCKONFAULT as vm_flags_t; /// Is a VM accounted object. - pub const ACCOUNT: vm_flags_t = bindings::VM_ACCOUNT as _; + pub const ACCOUNT: vm_flags_t = bindings::VM_ACCOUNT as vm_flags_t; /// Should the VM suppress accounting. - pub const NORESERVE: vm_flags_t = bindings::VM_NORESERVE as _; + pub const NORESERVE: vm_flags_t = bindings::VM_NORESERVE as vm_flags_t; /// Huge TLB Page VM. - pub const HUGETLB: vm_flags_t = bindings::VM_HUGETLB as _; + pub const HUGETLB: vm_flags_t = bindings::VM_HUGETLB as vm_flags_t; /// Synchronous page faults. (DAX-specific) - pub const SYNC: vm_flags_t = bindings::VM_SYNC as _; + pub const SYNC: vm_flags_t = bindings::VM_SYNC as vm_flags_t; /// Architecture-specific flag. - pub const ARCH_1: vm_flags_t = bindings::VM_ARCH_1 as _; + pub const ARCH_1: vm_flags_t = bindings::VM_ARCH_1 as vm_flags_t; /// Wipe VMA contents in child on fork. - pub const WIPEONFORK: vm_flags_t = bindings::VM_WIPEONFORK as _; + pub const WIPEONFORK: vm_flags_t = bindings::VM_WIPEONFORK as vm_flags_t; /// Do not include in the core dump. - pub const DONTDUMP: vm_flags_t = bindings::VM_DONTDUMP as _; + pub const DONTDUMP: vm_flags_t = bindings::VM_DONTDUMP as vm_flags_t; /// Not soft dirty clean area. - pub const SOFTDIRTY: vm_flags_t = bindings::VM_SOFTDIRTY as _; + pub const SOFTDIRTY: vm_flags_t = bindings::VM_SOFTDIRTY as vm_flags_t; /// Can contain `struct page` and pure PFN pages. - pub const MIXEDMAP: vm_flags_t = bindings::VM_MIXEDMAP as _; + pub const MIXEDMAP: vm_flags_t = bindings::VM_MIXEDMAP as vm_flags_t; /// MADV_HUGEPAGE marked this vma. - pub const HUGEPAGE: vm_flags_t = bindings::VM_HUGEPAGE as _; + pub const HUGEPAGE: vm_flags_t = bindings::VM_HUGEPAGE as vm_flags_t; /// MADV_NOHUGEPAGE marked this vma. - pub const NOHUGEPAGE: vm_flags_t = bindings::VM_NOHUGEPAGE as _; + pub const NOHUGEPAGE: vm_flags_t = bindings::VM_NOHUGEPAGE as vm_flags_t; /// KSM may merge identical pages. - pub const MERGEABLE: vm_flags_t = bindings::VM_MERGEABLE as _; + pub const MERGEABLE: vm_flags_t = bindings::VM_MERGEABLE as vm_flags_t; } diff --git a/rust/kernel/of.rs b/rust/kernel/of.rs index 04f2d8ef29cb..40d1bd13682c 100644 --- a/rust/kernel/of.rs +++ b/rust/kernel/of.rs @@ -22,7 +22,7 @@ unsafe impl RawDeviceId for DeviceId { const DRIVER_DATA_OFFSET: usize = core::mem::offset_of!(bindings::of_device_id, data); fn index(&self) -> usize { - self.0.data as _ + self.0.data as usize } } @@ -34,10 +34,10 @@ pub const fn new(compatible: &'static CStr) -> Self { // SAFETY: FFI type is valid to be zero-initialized. let mut of: bindings::of_device_id = unsafe { core::mem::zeroed() }; - // TODO: Use `clone_from_slice` once the corresponding types do match. + // TODO: Use `copy_from_slice` once stabilized for `const`. let mut i = 0; while i < src.len() { - of.compatible[i] = src[i] as _; + of.compatible[i] = src[i]; i += 1; } diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index 33ae0bdc433d..f6b19764ad17 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -171,7 +171,7 @@ unsafe impl RawDeviceId for DeviceId { const DRIVER_DATA_OFFSET: usize = core::mem::offset_of!(bindings::pci_device_id, driver_data); fn index(&self) -> usize { - self.0.driver_data as _ + self.0.driver_data } } @@ -206,7 +206,10 @@ macro_rules! pci_device_table { /// MODULE_PCI_TABLE, /// ::IdInfo, /// [ -/// (pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_REDHAT, bindings::PCI_ANY_ID as _), ()) +/// ( +/// pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_REDHAT, bindings::PCI_ANY_ID as u32), +/// (), +/// ) /// ] /// ); /// @@ -330,7 +333,7 @@ unsafe fn do_release(pdev: &Device, ioptr: usize, num: i32) { // `ioptr` is valid by the safety requirements. // `num` is valid by the safety requirements. unsafe { - bindings::pci_iounmap(pdev.as_raw(), ioptr as _); + bindings::pci_iounmap(pdev.as_raw(), ioptr as *mut kernel::ffi::c_void); bindings::pci_release_region(pdev.as_raw(), num); } } diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index 6a3cb607b332..43597eb7c5c1 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -728,9 +728,9 @@ fn new() -> Self { pub(crate) unsafe fn from_ptrs(pos: *mut u8, end: *mut u8) -> Self { // INVARIANT: The safety requirements guarantee the type invariants. Self { - beg: pos as _, - pos: pos as _, - end: end as _, + beg: pos as usize, + pos: pos as usize, + end: end as usize, } } @@ -755,7 +755,7 @@ pub(crate) unsafe fn from_buffer(buf: *mut u8, len: usize) -> Self { /// /// N.B. It may point to invalid memory. pub(crate) fn pos(&self) -> *mut u8 { - self.pos as _ + self.pos as *mut u8 } /// Returns the number of bytes written to the formatter. diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index de61374e36bd..89e5c2560eec 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -198,7 +198,7 @@ pub fn enqueue(&self, w: W) -> W::EnqueueOutput unsafe { w.__enqueue(move |work_ptr| { bindings::queue_work_on( - bindings::wq_misc_consts_WORK_CPU_UNBOUND as _, + bindings::wq_misc_consts_WORK_CPU_UNBOUND as ffi::c_int, queue_ptr, work_ptr, ) From b7c8d7a8d251ab63fba3cc964f1928a216c28081 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Sun, 15 Jun 2025 16:55:09 -0400 Subject: [PATCH 0284/2411] rust: enable `clippy::cast_lossless` lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before Rust 1.29.0, Clippy introduced the `cast_lossless` lint [1]: > Rust’s `as` keyword will perform many kinds of conversions, including > silently lossy conversions. Conversion functions such as `i32::from` > will only perform lossless conversions. Using the conversion functions > prevents conversions from becoming silently lossy if the input types > ever change, and makes it clear for people reading the code that the > conversion is lossless. While this doesn't eliminate unchecked `as` conversions, it makes such conversions easier to scrutinize. It also has the slight benefit of removing a degree of freedom on which to bikeshed. Thus apply the changes and enable the lint -- no functional change intended. Link: https://rust-lang.github.io/rust-clippy/master/index.html#cast_lossless [1] Suggested-by: Benno Lossin Link: https://lore.kernel.org/all/D8ORTXSUTKGL.1KOJAGBM8F8TN@proton.me/ Reviewed-by: Benno Lossin Reviewed-by: Boqun Feng Signed-off-by: Tamir Duberstein Acked-by: FUJITA Tomonori Acked-by: Jocelyn Falempe Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250615-ptr-as-ptr-v12-5-f43b024581e8@gmail.com Signed-off-by: Miguel Ojeda --- Makefile | 1 + drivers/gpu/drm/drm_panic_qr.rs | 4 ++-- drivers/gpu/nova-core/regs.rs | 2 +- drivers/gpu/nova-core/regs/macros.rs | 2 +- rust/bindings/lib.rs | 1 + rust/kernel/net/phy.rs | 4 ++-- rust/uapi/lib.rs | 1 + 7 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index c66dd543b44e..a95000adf261 100644 --- a/Makefile +++ b/Makefile @@ -481,6 +481,7 @@ export rust_common_flags := --edition=2021 \ -Wclippy::all \ -Wclippy::as_ptr_cast_mut \ -Wclippy::as_underscore \ + -Wclippy::cast_lossless \ -Wclippy::ignored_unit_patterns \ -Wclippy::mut_mut \ -Wclippy::needless_bitwise_bool \ diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index dd55b1cb764d..6b59d19ab631 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -404,7 +404,7 @@ fn pop3(&mut self) -> Option<(u16, usize)> { let mut out = 0; let mut exp = 1; for i in 0..poplen { - out += self.decimals[self.len + i] as u16 * exp; + out += u16::from(self.decimals[self.len + i]) * exp; exp *= 10; } Some((out, NUM_CHARS_BITS[poplen])) @@ -425,7 +425,7 @@ fn next(&mut self) -> Option { match self.segment { Segment::Binary(data) => { if self.offset < data.len() { - let byte = data[self.offset] as u16; + let byte = u16::from(data[self.offset]); self.offset += 1; Some((byte, 8)) } else { diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 5a1273230306..c1cb6d4c49ee 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -32,7 +32,7 @@ pub(crate) fn architecture(self) -> Result { pub(crate) fn chipset(self) -> Result { self.architecture() .map(|arch| { - ((arch as u32) << Self::IMPLEMENTATION.len()) | self.implementation() as u32 + ((arch as u32) << Self::IMPLEMENTATION.len()) | u32::from(self.implementation()) }) .and_then(Chipset::try_from) } diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 7ecc70efb3cd..6851af8b5885 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -264,7 +264,7 @@ pub(crate) fn $field(self) -> $res_type { pub(crate) fn [](mut self, value: $to_type) -> Self { const MASK: u32 = $name::[<$field:upper _MASK>]; const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; - let value = ((value as u32) << SHIFT) & MASK; + let value = (u32::from(value) << SHIFT) & MASK; self.0 = (self.0 & !MASK) | value; self diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs index 81b6c7aa4916..7631c9f6708d 100644 --- a/rust/bindings/lib.rs +++ b/rust/bindings/lib.rs @@ -25,6 +25,7 @@ )] #[allow(dead_code)] +#[allow(clippy::cast_lossless)] #[allow(clippy::ptr_as_ptr)] #[allow(clippy::undocumented_unsafe_blocks)] #[cfg_attr(CONFIG_RUSTC_HAS_UNNECESSARY_TRANSMUTES, allow(unnecessary_transmutes))] diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs index 32ea43ece646..65ac4d59ad77 100644 --- a/rust/kernel/net/phy.rs +++ b/rust/kernel/net/phy.rs @@ -142,7 +142,7 @@ pub fn is_autoneg_enabled(&self) -> bool { // SAFETY: The struct invariant ensures that we may access // this field without additional synchronization. let bit_field = unsafe { &(*self.0.get())._bitfield_1 }; - bit_field.get(13, 1) == bindings::AUTONEG_ENABLE as u64 + bit_field.get(13, 1) == u64::from(bindings::AUTONEG_ENABLE) } /// Gets the current auto-negotiation state. @@ -427,7 +427,7 @@ impl Adapter { // where we hold `phy_device->lock`, so the accessors on // `Device` are okay to call. let dev = unsafe { Device::from_raw(phydev) }; - T::match_phy_device(dev) as i32 + T::match_phy_device(dev).into() } /// # Safety diff --git a/rust/uapi/lib.rs b/rust/uapi/lib.rs index e79a1f49f055..08e68ebef606 100644 --- a/rust/uapi/lib.rs +++ b/rust/uapi/lib.rs @@ -14,6 +14,7 @@ #![cfg_attr(test, allow(unsafe_op_in_unsafe_fn))] #![allow( clippy::all, + clippy::cast_lossless, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, dead_code, From dc35ddcf97e99b18559d0855071030e664aae44d Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Sun, 15 Jun 2025 16:55:10 -0400 Subject: [PATCH 0285/2411] rust: enable `clippy::ref_as_ptr` lint In Rust 1.78.0, Clippy introduced the `ref_as_ptr` lint [1]: > Using `as` casts may result in silently changing mutability or type. While this doesn't eliminate unchecked `as` conversions, it makes such conversions easier to scrutinize. It also has the slight benefit of removing a degree of freedom on which to bikeshed. Thus apply the changes and enable the lint -- no functional change intended. Link: https://rust-lang.github.io/rust-clippy/master/index.html#ref_as_ptr [1] Suggested-by: Benno Lossin Link: https://lore.kernel.org/all/D8PGG7NTWB6U.3SS3A5LN4XWMN@proton.me/ Reviewed-by: Benno Lossin Reviewed-by: Boqun Feng Signed-off-by: Tamir Duberstein Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250615-ptr-as-ptr-v12-6-f43b024581e8@gmail.com Signed-off-by: Miguel Ojeda --- Makefile | 1 + rust/bindings/lib.rs | 1 + rust/kernel/configfs.rs | 20 ++++++-------------- rust/kernel/device_id.rs | 2 +- rust/kernel/fs/file.rs | 2 +- rust/kernel/str.rs | 4 ++-- rust/kernel/uaccess.rs | 4 ++-- rust/uapi/lib.rs | 1 + 8 files changed, 15 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index a95000adf261..31268acb0d22 100644 --- a/Makefile +++ b/Makefile @@ -489,6 +489,7 @@ export rust_common_flags := --edition=2021 \ -Wclippy::no_mangle_with_rust_abi \ -Wclippy::ptr_as_ptr \ -Wclippy::ptr_cast_constness \ + -Wclippy::ref_as_ptr \ -Wclippy::undocumented_unsafe_blocks \ -Wclippy::unnecessary_safety_comment \ -Wclippy::unnecessary_safety_doc \ diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs index 7631c9f6708d..474cc98c48a3 100644 --- a/rust/bindings/lib.rs +++ b/rust/bindings/lib.rs @@ -27,6 +27,7 @@ #[allow(dead_code)] #[allow(clippy::cast_lossless)] #[allow(clippy::ptr_as_ptr)] +#[allow(clippy::ref_as_ptr)] #[allow(clippy::undocumented_unsafe_blocks)] #[cfg_attr(CONFIG_RUSTC_HAS_UNNECESSARY_TRANSMUTES, allow(unnecessary_transmutes))] mod bindings_raw { diff --git a/rust/kernel/configfs.rs b/rust/kernel/configfs.rs index bc8e15dcec18..1ddac786bd0d 100644 --- a/rust/kernel/configfs.rs +++ b/rust/kernel/configfs.rs @@ -426,7 +426,7 @@ impl GroupOperationsVTable }; const fn vtable_ptr() -> *const bindings::configfs_group_operations { - &Self::VTABLE as *const bindings::configfs_group_operations + &Self::VTABLE } } @@ -464,7 +464,7 @@ impl ItemOperationsVTable, Data> }; const fn vtable_ptr() -> *const bindings::configfs_item_operations { - &Self::VTABLE as *const bindings::configfs_item_operations + &Self::VTABLE } } @@ -476,7 +476,7 @@ impl ItemOperationsVTable, Data> { }; const fn vtable_ptr() -> *const bindings::configfs_item_operations { - &Self::VTABLE as *const bindings::configfs_item_operations + &Self::VTABLE } } @@ -717,11 +717,7 @@ impl AttributeList { // SAFETY: By function safety requirements, we have exclusive access to // `self` and the reference created below will be exclusive. - unsafe { - (&mut *self.0.get())[I] = (attribute as *const Attribute) - .cast_mut() - .cast() - }; + unsafe { (&mut *self.0.get())[I] = core::ptr::from_ref(attribute).cast_mut().cast() }; } } @@ -761,9 +757,7 @@ pub const fn new_with_child_ctor( ct_owner: owner.as_ptr(), ct_group_ops: GroupOperationsVTable::::vtable_ptr().cast_mut(), ct_item_ops: ItemOperationsVTable::<$tpe, Data>::vtable_ptr().cast_mut(), - ct_attrs: (attributes as *const AttributeList) - .cast_mut() - .cast(), + ct_attrs: core::ptr::from_ref(attributes).cast_mut().cast(), ct_bin_attrs: core::ptr::null_mut(), }), _p: PhantomData, @@ -780,9 +774,7 @@ pub const fn new( ct_owner: owner.as_ptr(), ct_group_ops: core::ptr::null_mut(), ct_item_ops: ItemOperationsVTable::<$tpe, Data>::vtable_ptr().cast_mut(), - ct_attrs: (attributes as *const AttributeList) - .cast_mut() - .cast(), + ct_attrs: core::ptr::from_ref(attributes).cast_mut().cast(), ct_bin_attrs: core::ptr::null_mut(), }), _p: PhantomData, diff --git a/rust/kernel/device_id.rs b/rust/kernel/device_id.rs index f9d55ac7b9e6..3dc72ca8cfc2 100644 --- a/rust/kernel/device_id.rs +++ b/rust/kernel/device_id.rs @@ -136,7 +136,7 @@ impl IdTable for IdArray { fn as_ptr(&self) -> *const T::RawType { // This cannot be `self.ids.as_ptr()`, as the return pointer must have correct provenance // to access the sentinel. - (self as *const Self).cast() + core::ptr::from_ref(self).cast() } fn id(&self, index: usize) -> &T::RawType { diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs index e9bfbad00755..35fd5db35c46 100644 --- a/rust/kernel/fs/file.rs +++ b/rust/kernel/fs/file.rs @@ -366,7 +366,7 @@ fn deref(&self) -> &LocalFile { // // By the type invariants, there are no `fdget_pos` calls that did not take the // `f_pos_lock` mutex. - unsafe { LocalFile::from_raw_file((self as *const Self).cast()) } + unsafe { LocalFile::from_raw_file(core::ptr::from_ref(self).cast()) } } } diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index 43597eb7c5c1..cbc8b459ed41 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -29,7 +29,7 @@ pub const fn is_empty(&self) -> bool { #[inline] pub const fn from_bytes(bytes: &[u8]) -> &Self { // SAFETY: `BStr` is transparent to `[u8]`. - unsafe { &*(bytes as *const [u8] as *const BStr) } + unsafe { &*(core::ptr::from_ref(bytes) as *const BStr) } } /// Strip a prefix from `self`. Delegates to [`slice::strip_prefix`]. @@ -290,7 +290,7 @@ pub const fn from_bytes_with_nul(bytes: &[u8]) -> Result<&Self, CStrConvertError #[inline] pub unsafe fn from_bytes_with_nul_unchecked_mut(bytes: &mut [u8]) -> &mut CStr { // SAFETY: Properties of `bytes` guaranteed by the safety precondition. - unsafe { &mut *(bytes as *mut [u8] as *mut CStr) } + unsafe { &mut *(core::ptr::from_mut(bytes) as *mut CStr) } } /// Returns a C pointer to the string. diff --git a/rust/kernel/uaccess.rs b/rust/kernel/uaccess.rs index 6d70edd8086a..4ef13cf13a78 100644 --- a/rust/kernel/uaccess.rs +++ b/rust/kernel/uaccess.rs @@ -240,7 +240,7 @@ pub fn read_raw(&mut self, out: &mut [MaybeUninit]) -> Result { pub fn read_slice(&mut self, out: &mut [u8]) -> Result { // SAFETY: The types are compatible and `read_raw` doesn't write uninitialized bytes to // `out`. - let out = unsafe { &mut *(out as *mut [u8] as *mut [MaybeUninit]) }; + let out = unsafe { &mut *(core::ptr::from_mut(out) as *mut [MaybeUninit]) }; self.read_raw(out) } @@ -355,7 +355,7 @@ pub fn write(&mut self, value: &T) -> Result { let res = unsafe { bindings::_copy_to_user( self.ptr as *mut c_void, - (value as *const T).cast::(), + core::ptr::from_ref(value).cast::(), len, ) }; diff --git a/rust/uapi/lib.rs b/rust/uapi/lib.rs index 08e68ebef606..31c2f713313f 100644 --- a/rust/uapi/lib.rs +++ b/rust/uapi/lib.rs @@ -16,6 +16,7 @@ clippy::all, clippy::cast_lossless, clippy::ptr_as_ptr, + clippy::ref_as_ptr, clippy::undocumented_unsafe_blocks, dead_code, missing_docs, From 58ae036172b5f051a19a32eba94a3e5eb37bf47e Mon Sep 17 00:00:00 2001 From: Thomas Antoine Date: Fri, 23 May 2025 14:51:44 +0200 Subject: [PATCH 0286/2411] power: supply: max1720x correct capacity computation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From the datasheet of the MAX17201/17205, the LSB should be "5.0μVh/RSENSE". The current computation sets it at 0.5mAh=5.0μVh/10mOhm, which does not take into account the value of rsense (which is in 10µV steps) which can be different from 10mOhm. Change the computation to fit the specs. Fixes: 479b6d04964b ("power: supply: add support for MAX1720x standalone fuel gauge") Signed-off-by: Thomas Antoine Link: https://lore.kernel.org/r/20250523-b4-gs101_max77759_fg-v4-1-b49904e35a34@uclouvain.be Signed-off-by: Sebastian Reichel --- drivers/power/supply/max1720x_battery.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/power/supply/max1720x_battery.c b/drivers/power/supply/max1720x_battery.c index 12ecb1f40fe1..e2bd54ee3970 100644 --- a/drivers/power/supply/max1720x_battery.c +++ b/drivers/power/supply/max1720x_battery.c @@ -288,9 +288,10 @@ static int max172xx_voltage_to_ps(unsigned int reg) return reg * 1250; /* in uV */ } -static int max172xx_capacity_to_ps(unsigned int reg) +static int max172xx_capacity_to_ps(unsigned int reg, + struct max1720x_device_info *info) { - return reg * 500; /* in uAh */ + return reg * (500000 / info->rsense); /* in uAh */ } /* @@ -394,11 +395,11 @@ static int max1720x_battery_get_property(struct power_supply *psy, break; case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN: ret = regmap_read(info->regmap, MAX172XX_DESIGN_CAP, ®_val); - val->intval = max172xx_capacity_to_ps(reg_val); + val->intval = max172xx_capacity_to_ps(reg_val, info); break; case POWER_SUPPLY_PROP_CHARGE_AVG: ret = regmap_read(info->regmap, MAX172XX_REPCAP, ®_val); - val->intval = max172xx_capacity_to_ps(reg_val); + val->intval = max172xx_capacity_to_ps(reg_val, info); break; case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG: ret = regmap_read(info->regmap, MAX172XX_TTE, ®_val); @@ -422,7 +423,7 @@ static int max1720x_battery_get_property(struct power_supply *psy, break; case POWER_SUPPLY_PROP_CHARGE_FULL: ret = regmap_read(info->regmap, MAX172XX_FULL_CAP, ®_val); - val->intval = max172xx_capacity_to_ps(reg_val); + val->intval = max172xx_capacity_to_ps(reg_val, info); break; case POWER_SUPPLY_PROP_MODEL_NAME: ret = regmap_read(info->regmap, MAX172XX_DEV_NAME, ®_val); From 8ca719b81987be690f197e82fdb030580c0a07f3 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Fri, 13 Jun 2025 19:08:52 -0400 Subject: [PATCH 0287/2411] dm-table: fix checking for rq stackable devices Due to the semantics of iterate_devices(), the current code allows a request-based dm table as long as it includes one request-stackable device. It is supposed to only allow tables where there are no non-request-stackable devices. Signed-off-by: Benjamin Marzinski Reviewed-by: Mike Snitzer Signed-off-by: Mikulas Patocka --- drivers/md/dm-table.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 24a857ff6d0b..79ba4bacd0f9 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -899,17 +899,17 @@ static bool dm_table_supports_dax(struct dm_table *t, return true; } -static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) +static int device_is_not_rq_stackable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) { struct block_device *bdev = dev->bdev; struct request_queue *q = bdev_get_queue(bdev); /* request-based cannot stack on partitions! */ if (bdev_is_partition(bdev)) - return false; + return true; - return queue_is_mq(q); + return !queue_is_mq(q); } static int dm_table_determine_type(struct dm_table *t) @@ -1005,7 +1005,7 @@ static int dm_table_determine_type(struct dm_table *t) /* Non-request-stackable devices can't be used for request-based dm */ if (!ti->type->iterate_devices || - !ti->type->iterate_devices(ti, device_is_rq_stackable, NULL)) { + ti->type->iterate_devices(ti, device_is_not_rq_stackable, NULL)) { DMERR("table load rejected: including non-request-stackable devices"); return -EINVAL; } From 75227ed6812cb869380c8fb6d41a845ae571781e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 11 Jun 2025 15:12:20 -0400 Subject: [PATCH 0288/2411] dm-flakey: Fix corrupt_bio_byte setup checks Fix the error_reads mode - it's incompatible with corrupt_bio_byte, but that's only enabled if corrupt_bio_byte is nonzero. Cc: Benjamin Marzinski Cc: Mikulas Patocka Cc: Mike Snitzer Cc: dm-devel@lists.linux.dev Signed-off-by: Kent Overstreet Reviewed-by: Benjamin Marzinski Fixes: 19da6b2c9e8e ("dm-flakey: Clean up parsing messages") Signed-off-by: Mikulas Patocka --- drivers/md/dm-flakey.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index c711db6f8f5c..cf17fd46e255 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -215,16 +215,19 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, } if (test_bit(DROP_WRITES, &fc->flags) && - (fc->corrupt_bio_rw == WRITE || fc->random_write_corrupt)) { + ((fc->corrupt_bio_byte && fc->corrupt_bio_rw == WRITE) || + fc->random_write_corrupt)) { ti->error = "drop_writes is incompatible with random_write_corrupt or corrupt_bio_byte with the WRITE flag set"; return -EINVAL; } else if (test_bit(ERROR_WRITES, &fc->flags) && - (fc->corrupt_bio_rw == WRITE || fc->random_write_corrupt)) { + ((fc->corrupt_bio_byte && fc->corrupt_bio_rw == WRITE) || + fc->random_write_corrupt)) { ti->error = "error_writes is incompatible with random_write_corrupt or corrupt_bio_byte with the WRITE flag set"; return -EINVAL; } else if (test_bit(ERROR_READS, &fc->flags) && - (fc->corrupt_bio_rw == READ || fc->random_read_corrupt)) { + ((fc->corrupt_bio_byte && fc->corrupt_bio_rw == READ) || + fc->random_read_corrupt)) { ti->error = "error_reads is incompatible with random_read_corrupt or corrupt_bio_byte with the READ flag set"; return -EINVAL; } From 9de4a3967caf1865a95aebdd63fccf213d174ede Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Mon, 16 Jun 2025 16:50:05 +0200 Subject: [PATCH 0289/2411] dm raid: add support for resync w/o metadata devices Target does not honour the "sync" argument when activated w/o metadata devices, e.g. with table line: "0 $(blockdev --getsz $data1) raid raid1 2 0 sync 2 - $data1 - $data2". Fix this to support temporary, transient raid devices useful for data duplication. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mikulas Patocka --- drivers/md/dm-raid.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index d296770478b2..c4fa8e0e76d2 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2532,6 +2532,10 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) struct md_rdev *rdev, *freshest; struct mddev *mddev = &rs->md; + /* Respect resynchronization requested with "sync" argument. */ + if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags)) + set_bit(MD_ARRAY_FIRST_USE, &mddev->flags); + freshest = NULL; rdev_for_each(rdev, mddev) { if (test_bit(Journal, &rdev->flags)) From 26daa18e35ebc4e192ff55d021f1cd7e69d55487 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 21 May 2025 15:38:10 +0200 Subject: [PATCH 0290/2411] dt-bindings: PCI: qcom,pcie-sc8180x: Drop unrelated clocks from PCIe hosts The TBU clock belongs to the Translation Buffer Unit, part of the SMMU. The ref clock is already being driven upstream through some of the branches. Signed-off-by: Konrad Dybcio Signed-off-by: Manivannan Sadhasivam Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250521-topic-8150_pcie_drop_clocks-v1-1-3d42e84f6453@oss.qualcomm.com --- .../devicetree/bindings/pci/qcom,pcie-sc8180x.yaml | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie-sc8180x.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie-sc8180x.yaml index 331fc25d7a17..34a4d7b2c845 100644 --- a/Documentation/devicetree/bindings/pci/qcom,pcie-sc8180x.yaml +++ b/Documentation/devicetree/bindings/pci/qcom,pcie-sc8180x.yaml @@ -33,8 +33,8 @@ properties: - const: mhi # MHI registers clocks: - minItems: 8 - maxItems: 8 + minItems: 6 + maxItems: 6 clock-names: items: @@ -44,8 +44,6 @@ properties: - const: bus_master # Master AXI clock - const: bus_slave # Slave AXI clock - const: slave_q2a # Slave Q2A clock - - const: ref # REFERENCE clock - - const: tbu # PCIe TBU clock interrupts: minItems: 8 @@ -117,17 +115,13 @@ examples: <&gcc GCC_PCIE_0_CFG_AHB_CLK>, <&gcc GCC_PCIE_0_MSTR_AXI_CLK>, <&gcc GCC_PCIE_0_SLV_AXI_CLK>, - <&gcc GCC_PCIE_0_SLV_Q2A_AXI_CLK>, - <&gcc GCC_PCIE_0_CLKREF_CLK>, - <&gcc GCC_AGGRE_NOC_PCIE_TBU_CLK>; + <&gcc GCC_PCIE_0_SLV_Q2A_AXI_CLK>; clock-names = "pipe", "aux", "cfg", "bus_master", "bus_slave", - "slave_q2a", - "ref", - "tbu"; + "slave_q2a"; dma-coherent; From e1cb67ab82aab44cda410616498d4749399da217 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 21 May 2025 15:38:11 +0200 Subject: [PATCH 0291/2411] dt-bindings: PCI: qcom,pcie-sm8150: Drop unrelated clocks from PCIe hosts The TBU clock belongs to the Translation Buffer Unit, part of the SMMU. The ref clock is already being driven upstream through some of the branches. Signed-off-by: Konrad Dybcio Signed-off-by: Manivannan Sadhasivam Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250521-topic-8150_pcie_drop_clocks-v1-2-3d42e84f6453@oss.qualcomm.com --- .../devicetree/bindings/pci/qcom,pcie-sm8150.yaml | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie-sm8150.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie-sm8150.yaml index 2aa012b04d93..26b247a41785 100644 --- a/Documentation/devicetree/bindings/pci/qcom,pcie-sm8150.yaml +++ b/Documentation/devicetree/bindings/pci/qcom,pcie-sm8150.yaml @@ -38,8 +38,8 @@ properties: - const: mhi # MHI registers clocks: - minItems: 8 - maxItems: 8 + minItems: 6 + maxItems: 6 clock-names: items: @@ -49,8 +49,6 @@ properties: - const: bus_master # Master AXI clock - const: bus_slave # Slave AXI clock - const: slave_q2a # Slave Q2A clock - - const: tbu # PCIe TBU clock - - const: ref # REFERENCE clock interrupts: minItems: 8 @@ -116,17 +114,13 @@ examples: <&gcc GCC_PCIE_0_CFG_AHB_CLK>, <&gcc GCC_PCIE_0_MSTR_AXI_CLK>, <&gcc GCC_PCIE_0_SLV_AXI_CLK>, - <&gcc GCC_PCIE_0_SLV_Q2A_AXI_CLK>, - <&gcc GCC_AGGRE_NOC_PCIE_TBU_CLK>, - <&rpmhcc RPMH_CXO_CLK>; + <&gcc GCC_PCIE_0_SLV_Q2A_AXI_CLK>; clock-names = "pipe", "aux", "cfg", "bus_master", "bus_slave", - "slave_q2a", - "tbu", - "ref"; + "slave_q2a"; interrupts = , , From 255c891533d89f5d7339076468a98afc947c4a73 Mon Sep 17 00:00:00 2001 From: George D Sworo Date: Fri, 6 Jun 2025 14:02:30 -0700 Subject: [PATCH 0292/2411] PCI: vmd: Add VMD Device ID Support for Panther Lake (PTL)-H/P/U Add VMD Device ID Support for PTL-H/P/U processors. Signed-off-by: George D Sworo [mani: expanded PTL] Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250606210230.340803-2-george.d.sworo@intel.com --- drivers/pci/controller/vmd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 8df064b62a2f..375ce9d6d9f6 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -1129,6 +1129,8 @@ static const struct pci_device_id vmd_ids[] = { .driver_data = VMD_FEATS_CLIENT,}, {PCI_VDEVICE(INTEL, 0xb06f), .driver_data = VMD_FEATS_CLIENT,}, + {PCI_VDEVICE(INTEL, 0xb07f), + .driver_data = VMD_FEATS_CLIENT,}, {0,} }; MODULE_DEVICE_TABLE(pci, vmd_ids); From dbb1258daf75f2b98e465ba5a0e26073eda6e539 Mon Sep 17 00:00:00 2001 From: Jim Quinlan Date: Fri, 30 May 2025 18:40:32 -0400 Subject: [PATCH 0293/2411] dt-bindings: PCI: brcm,stb-pcie: Add num-lanes property Add optional num-lanes property Broadcom STB PCIe host controllers. Signed-off-by: Jim Quinlan Signed-off-by: Manivannan Sadhasivam Reviewed-by: Rob Herring (Arm) Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250530224035.41886-2-james.quinlan@broadcom.com --- Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml b/Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml index c4f9674e8695..812ef5957cfc 100644 --- a/Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml @@ -107,6 +107,10 @@ properties: - const: bridge - const: swinit + num-lanes: + default: 1 + maximum: 4 + required: - compatible - reg From a364d10ffe361fb34c3838d33604da493045de1e Mon Sep 17 00:00:00 2001 From: Jim Quinlan Date: Fri, 30 May 2025 18:40:33 -0400 Subject: [PATCH 0294/2411] PCI: brcmstb: Set MLW based on "num-lanes" DT property if present By default, the driver relies on the default hardware defined value for the Max Link Width (MLW) capability. But if the "num-lanes" DT property is present, assume that the chip's default capability information is incorrect or undesired, and use the specified value instead. Signed-off-by: Jim Quinlan [mani: reworded the description and comments] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250530224035.41886-3-james.quinlan@broadcom.com --- drivers/pci/controller/pcie-brcmstb.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c index 92887b394eb4..744df5bd39ae 100644 --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -46,6 +46,7 @@ #define PCIE_RC_CFG_PRIV1_ID_VAL3_CLASS_CODE_MASK 0xffffff #define PCIE_RC_CFG_PRIV1_LINK_CAPABILITY 0x04dc +#define PCIE_RC_CFG_PRIV1_LINK_CAPABILITY_MAX_LINK_WIDTH_MASK 0x1f0 #define PCIE_RC_CFG_PRIV1_LINK_CAPABILITY_ASPM_SUPPORT_MASK 0xc00 #define PCIE_RC_CFG_PRIV1_ROOT_CAP 0x4f8 @@ -55,6 +56,9 @@ #define PCIE_RC_DL_MDIO_WR_DATA 0x1104 #define PCIE_RC_DL_MDIO_RD_DATA 0x1108 +#define PCIE_RC_PL_REG_PHY_CTL_1 0x1804 +#define PCIE_RC_PL_REG_PHY_CTL_1_REG_P2_POWERDOWN_ENA_NOSYNC_MASK 0x8 + #define PCIE_RC_PL_PHY_CTL_15 0x184c #define PCIE_RC_PL_PHY_CTL_15_DIS_PLL_PD_MASK 0x400000 #define PCIE_RC_PL_PHY_CTL_15_PM_CLK_PERIOD_MASK 0xff @@ -1072,7 +1076,7 @@ static int brcm_pcie_setup(struct brcm_pcie *pcie) void __iomem *base = pcie->base; struct pci_host_bridge *bridge; struct resource_entry *entry; - u32 tmp, burst, aspm_support; + u32 tmp, burst, aspm_support, num_lanes, num_lanes_cap; u8 num_out_wins = 0; int num_inbound_wins = 0; int memc, ret; @@ -1180,6 +1184,27 @@ static int brcm_pcie_setup(struct brcm_pcie *pcie) PCIE_RC_CFG_PRIV1_LINK_CAPABILITY_ASPM_SUPPORT_MASK); writel(tmp, base + PCIE_RC_CFG_PRIV1_LINK_CAPABILITY); + /* 'tmp' still holds the contents of PRIV1_LINK_CAPABILITY */ + num_lanes_cap = u32_get_bits(tmp, PCIE_RC_CFG_PRIV1_LINK_CAPABILITY_MAX_LINK_WIDTH_MASK); + num_lanes = 0; + + /* + * Use hardware negotiated Max Link Width value by default. If the + * "num-lanes" DT property is present, assume that the chip's default + * link width capability information is incorrect/undesired and use the + * specified value instead. + */ + if (!of_property_read_u32(pcie->np, "num-lanes", &num_lanes) && + num_lanes && num_lanes <= 4 && num_lanes_cap != num_lanes) { + u32p_replace_bits(&tmp, num_lanes, + PCIE_RC_CFG_PRIV1_LINK_CAPABILITY_MAX_LINK_WIDTH_MASK); + writel(tmp, base + PCIE_RC_CFG_PRIV1_LINK_CAPABILITY); + tmp = readl(base + PCIE_RC_PL_REG_PHY_CTL_1); + u32p_replace_bits(&tmp, 1, + PCIE_RC_PL_REG_PHY_CTL_1_REG_P2_POWERDOWN_ENA_NOSYNC_MASK); + writel(tmp, base + PCIE_RC_PL_REG_PHY_CTL_1); + } + /* * For config space accesses on the RC, show the right class for * a PCIe-PCIe bridge (the default setting is to be EP mode). From 7ea488cce73263231662e426639dd3e836537068 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 3 Jun 2025 19:03:38 +0200 Subject: [PATCH 0295/2411] PCI: endpoint: pci-epf-vntb: Return -ENOENT if pci_epc_get_next_free_bar() fails According the function documentation of epf_ntb_init_epc_bar(), the function should return an error code on error. However, it returns -1 when no BAR is available i.e., when pci_epc_get_next_free_bar() fails. Return -ENOENT instead. Fixes: e35f56bb0330 ("PCI: endpoint: Support NTB transfer between RC and EP") Signed-off-by: Jerome Brunet [mani: changed err code to -ENOENT] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Frank Li Link: https://patch.msgid.link/20250603-pci-vntb-bar-mapping-v2-1-fc685a22ad28@baylibre.com --- drivers/pci/endpoint/functions/pci-epf-vntb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c index e4da3fdb0007..30c6c563335a 100644 --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c @@ -680,7 +680,7 @@ static int epf_ntb_init_epc_bar(struct epf_ntb *ntb) barno = pci_epc_get_next_free_bar(epc_features, barno); if (barno < 0) { dev_err(dev, "Fail to get NTB function BAR\n"); - return barno; + return -ENOENT; } ntb->epf_ntb_bar[bar] = barno; } From a079d83c4afd4896f7f29bd9e807cb382043b360 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 3 Jun 2025 19:03:39 +0200 Subject: [PATCH 0296/2411] PCI: endpoint: pci-epf-vntb: Align MW naming with config names The config file related to the memory windows start the numbering of the MW from 1. The other NTB function does the same, yet the enumeration defining the BARs of the vNTB function starts numbering the MW from 0. Both numbering should be fine, but mixing the two is a bit confusing. The configfs file being the interface with userspace, keep that stable and consistently start the numbering of the MW from 1. Signed-off-by: Jerome Brunet [mani: commit message rewording] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Frank Li Link: https://patch.msgid.link/20250603-pci-vntb-bar-mapping-v2-2-fc685a22ad28@baylibre.com --- drivers/pci/endpoint/functions/pci-epf-vntb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c index 30c6c563335a..1db87d16da8d 100644 --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c @@ -70,9 +70,9 @@ static struct workqueue_struct *kpcintb_workqueue; enum epf_ntb_bar { BAR_CONFIG, BAR_DB, - BAR_MW0, BAR_MW1, BAR_MW2, + BAR_MW3, }; /* @@ -576,7 +576,7 @@ static int epf_ntb_mw_bar_init(struct epf_ntb *ntb) for (i = 0; i < ntb->num_mws; i++) { size = ntb->mws_size[i]; - barno = ntb->epf_ntb_bar[BAR_MW0 + i]; + barno = ntb->epf_ntb_bar[BAR_MW1 + i]; ntb->epf->bar[barno].barno = barno; ntb->epf->bar[barno].size = size; @@ -629,7 +629,7 @@ static void epf_ntb_mw_bar_clear(struct epf_ntb *ntb, int num_mws) int i; for (i = 0; i < num_mws; i++) { - barno = ntb->epf_ntb_bar[BAR_MW0 + i]; + barno = ntb->epf_ntb_bar[BAR_MW1 + i]; pci_epc_clear_bar(ntb->epf->epc, ntb->epf->func_no, ntb->epf->vfunc_no, @@ -676,7 +676,7 @@ static int epf_ntb_init_epc_bar(struct epf_ntb *ntb) epc_features = pci_epc_get_features(ntb->epf->epc, ntb->epf->func_no, ntb->epf->vfunc_no); /* These are required BARs which are mandatory for NTB functionality */ - for (bar = BAR_CONFIG; bar <= BAR_MW0; bar++, barno++) { + for (bar = BAR_CONFIG; bar <= BAR_MW1; bar++, barno++) { barno = pci_epc_get_next_free_bar(epc_features, barno); if (barno < 0) { dev_err(dev, "Fail to get NTB function BAR\n"); @@ -1048,7 +1048,7 @@ static int vntb_epf_mw_set_trans(struct ntb_dev *ndev, int pidx, int idx, struct device *dev; dev = &ntb->ntb.dev; - barno = ntb->epf_ntb_bar[BAR_MW0 + idx]; + barno = ntb->epf_ntb_bar[BAR_MW1 + idx]; epf_bar = &ntb->epf->bar[barno]; epf_bar->phys_addr = addr; epf_bar->barno = barno; From 82a4277fa5e027028b955982ea876e24f660f808 Mon Sep 17 00:00:00 2001 From: Tanmay Shah Date: Wed, 18 Jun 2025 11:19:33 -0700 Subject: [PATCH 0297/2411] remoteproc: xlnx: Allow single core use in split mode When operating in split mode, it is a valid usecase to have only one core enabled in the cluster. Remove exact core count expecatation from the driver. Signed-off-by: Tanmay Shah Link: https://lore.kernel.org/r/20250618181933.1253033-1-tanmay.shah@amd.com Signed-off-by: Mathieu Poirier --- drivers/remoteproc/xlnx_r5_remoteproc.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/remoteproc/xlnx_r5_remoteproc.c b/drivers/remoteproc/xlnx_r5_remoteproc.c index 1af89782e116..5aa3fd1b0530 100644 --- a/drivers/remoteproc/xlnx_r5_remoteproc.c +++ b/drivers/remoteproc/xlnx_r5_remoteproc.c @@ -1329,19 +1329,23 @@ static int zynqmp_r5_cluster_init(struct zynqmp_r5_cluster *cluster) /* * Number of cores is decided by number of child nodes of - * r5f subsystem node in dts. If Split mode is used in dts - * 2 child nodes are expected. + * r5f subsystem node in dts. + * In split mode maximum two child nodes are expected. + * However, only single core can be enabled too. + * Driver can handle following configuration in split mode: + * 1) core0 enabled, core1 disabled + * 2) core0 disabled, core1 enabled + * 3) core0 and core1 both are enabled. + * For now, no more than two cores are expected per cluster + * in split mode. * In lockstep mode if two child nodes are available, * only use first child node and consider it as core0 * and ignore core1 dt node. */ core_count = of_get_available_child_count(dev_node); - if (core_count == 0) { + if (core_count == 0 || core_count > 2) { dev_err(dev, "Invalid number of r5 cores %d", core_count); return -EINVAL; - } else if (cluster_mode == SPLIT_MODE && core_count != 2) { - dev_err(dev, "Invalid number of r5 cores for split mode\n"); - return -EINVAL; } else if (cluster_mode == LOCKSTEP_MODE && core_count == 2) { dev_warn(dev, "Only r5 core0 will be used\n"); core_count = 1; From 3aa54d162490f14d1f1fdf3b3d1170b2ea50276b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 18 Jun 2025 11:11:29 +0200 Subject: [PATCH 0298/2411] PCI/pwrctrl: Fix the kerneldoc tag for private fields The correct tag for marking private fields in kerneldoc is "private:", not capitalized "Private:". Fix the pwrctl struct to silence the following warnings: Warning: include/linux/pci-pwrctrl.h:45 struct member 'nb' not described in 'pci_pwrctrl' Warning: include/linux/pci-pwrctrl.h:45 struct member 'link' not described in 'pci_pwrctrl' Warning: include/linux/pci-pwrctrl.h:45 struct member 'work' not described in 'pci_pwrctrl' Fixes: 4565d2652a37 ("PCI/pwrctl: Add PCI power control core code") Reported-by: Bjorn Helgaas Closes: https://lore.kernel.org/all/20250617233539.GA1177120@bhelgaas/ Signed-off-by: Bartosz Golaszewski Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250618091129.44810-1-brgl@bgdev.pl --- include/linux/pci-pwrctrl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pci-pwrctrl.h b/include/linux/pci-pwrctrl.h index 7d439b0675e9..4aefc7901cd1 100644 --- a/include/linux/pci-pwrctrl.h +++ b/include/linux/pci-pwrctrl.h @@ -39,7 +39,7 @@ struct device_link; struct pci_pwrctrl { struct device *dev; - /* Private: don't use. */ + /* private: internal use only */ struct notifier_block nb; struct device_link *link; struct work_struct work; From d375b70a0f47a032813be33493c97133cc080f74 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 23 Jun 2025 10:12:40 +0200 Subject: [PATCH 0299/2411] MAINTAINERS: rectify file entry in QUALCOMM SMB CHARGER DRIVER Commit 4deeea4b0741 ("MAINTAINERS: add myself as smbx charger driver maintainer") adds the section QUALCOMM SMB CHARGER DRIVER in MAINTAINERS, including a file entry pointing to qcom_smbx_charger.c. Within the same patch series, the commit 5ec53bcc7fce ("power: supply: pmi8998_charger: rename to qcom_smbx") renames qcom_pmi8998_charger.c to qcom_smbx.c and not to qcom_smbx_charger.c, though. Note that the commit message clearly indicates the intentional removal of the "_charger" suffix. Refer to the intended file. Signed-off-by: Lukas Bulwahn Acked-by: Casey Connolly Link: https://lore.kernel.org/r/20250623081240.149446-1-lukas.bulwahn@redhat.com Signed-off-by: Sebastian Reichel --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0a61f8e63245..2e052be4f14e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20528,7 +20528,7 @@ M: Casey Connolly L: linux-arm-msm@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/power/supply/qcom,pmi8998-charger.yaml -F: drivers/power/supply/qcom_smbx_charger.c +F: drivers/power/supply/qcom_smbx.c QUALCOMM QSEECOM DRIVER M: Maximilian Luz From 24bf3ee37fb8ed736094247133d00cb2c3bab3ce Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Thu, 29 May 2025 15:25:32 -0700 Subject: [PATCH 0300/2411] f2fs: make sure zoned device GC to use FG_GC in shortage of free section We already use FG_GC when we have free sections under gc_boost_zoned_gc_percent. So, let's make it consistent. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 2 +- fs/f2fs/segment.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 3cb5242f4ddf..439f0153c24e 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -144,7 +144,7 @@ static int gc_thread_func(void *data) gc_control.one_time; /* foreground GC was been triggered via f2fs_balance_fs() */ - if (foreground) + if (foreground && !f2fs_sb_has_blkzoned(sbi)) sync_mode = false; gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ae1223ef648f..dad5a92b7e70 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -455,7 +455,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) } else { struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, - .init_gc_type = BG_GC, + .init_gc_type = f2fs_sb_has_blkzoned(sbi) ? + FG_GC : BG_GC, .no_bg_gc = true, .should_migrate_blocks = false, .err_gc_skipped = false, From 8142daf8a53806689186ee255cc02f89af7f8890 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Fri, 6 Jun 2025 11:49:04 -0700 Subject: [PATCH 0301/2411] f2fs: turn off one_time when forcibly set to foreground GC one_time mode is only for background GC. So, we need to set it back to false when foreground GC is enforced. Fixes: 9748c2ddea4a ("f2fs: do FG_GC when GC boosting is required for zoned devices") Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 439f0153c24e..30b95ebb4499 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1891,6 +1891,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control) /* Let's run FG_GC, if we don't have enough space. */ if (has_not_enough_free_secs(sbi, 0, 0)) { gc_type = FG_GC; + gc_control->one_time = false; /* * For example, if there are many prefree_segments below given From 1773f63d108b1b9b9d053d8c95f8300c556f93b8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 9 Jun 2025 15:27:12 +0800 Subject: [PATCH 0302/2411] f2fs: handle nat.blkaddr corruption in f2fs_get_node_info() F2FS-fs (dm-55): access invalid blkaddr:972878540 Call trace: dump_backtrace+0xec/0x128 show_stack+0x18/0x28 dump_stack_lvl+0x40/0x88 dump_stack+0x18/0x24 __f2fs_is_valid_blkaddr+0x360/0x3b4 f2fs_is_valid_blkaddr+0x10/0x20 f2fs_get_node_info+0x21c/0x60c __write_node_page+0x15c/0x734 f2fs_sync_node_pages+0x4f8/0x700 f2fs_write_checkpoint+0x4a8/0x99c __checkpoint_and_complete_reqs+0x7c/0x20c issue_checkpoint_thread+0x4c/0xd8 kthread+0x11c/0x1b0 ret_from_fork+0x10/0x20 If nat.blkaddr is corrupted, during checkpoint, f2fs_sync_node_pages() will loop to flush node page w/ corrupted nat.blkaddr. Although, it tags SBI_NEED_FSCK, checkpoint can not persist it due to deadloop. Let's call f2fs_handle_error(, ERROR_INCONSISTENT_NAT) to record such error into superblock, it expects fsck can detect the error and repair inconsistent nat.blkaddr after device reboot. Note that, let's add sanity check in f2fs_get_node_info() to detect in-memory nat.blkaddr inconsistency, but only if CONFIG_F2FS_CHECK_FS is enabled. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index bfe104db284e..2fd287f2bca4 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -555,8 +555,8 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct f2fs_nat_entry ne; struct nat_entry *e; pgoff_t index; - block_t blkaddr; int i; + bool need_cache = true; ni->flag = 0; ni->nid = nid; @@ -569,6 +569,10 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); f2fs_up_read(&nm_i->nat_tree_lock); + if (IS_ENABLED(CONFIG_F2FS_CHECK_FS)) { + need_cache = false; + goto sanity_check; + } return 0; } @@ -594,7 +598,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, up_read(&curseg->journal_rwsem); if (i >= 0) { f2fs_up_read(&nm_i->nat_tree_lock); - goto cache; + goto sanity_check; } /* Fill node_info from nat page */ @@ -609,14 +613,23 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, ne = nat_blk->entries[nid - start_nid]; node_info_from_raw_nat(ni, &ne); f2fs_folio_put(folio, true); -cache: - blkaddr = le32_to_cpu(ne.block_addr); - if (__is_valid_data_blkaddr(blkaddr) && - !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) - return -EFAULT; +sanity_check: + if (__is_valid_data_blkaddr(ni->blk_addr) && + !f2fs_is_valid_blkaddr(sbi, ni->blk_addr, + DATA_GENERIC_ENHANCE)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_err_ratelimited(sbi, + "f2fs_get_node_info of %pS: inconsistent nat entry, " + "ino:%u, nid:%u, blkaddr:%u, ver:%u, flag:%u", + __builtin_return_address(0), + ni->ino, ni->nid, ni->blk_addr, ni->version, ni->flag); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_NAT); + return -EFSCORRUPTED; + } /* cache nat entry */ - cache_nat_entry(sbi, nid, &ne); + if (need_cache) + cache_nat_entry(sbi, nid, &ne); return 0; } From 70b6e8500431ca8bd8d1471ae721d61fc2acc844 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Jun 2025 11:13:15 +0800 Subject: [PATCH 0303/2411] f2fs: do sanity check on fio.new_blkaddr in do_write_page() F2FS-fs (dm-55): access invalid blkaddr:972878540 Call trace: dump_backtrace+0xec/0x128 show_stack+0x18/0x28 dump_stack_lvl+0x40/0x88 dump_stack+0x18/0x24 __f2fs_is_valid_blkaddr+0x360/0x3b4 f2fs_is_valid_blkaddr+0x10/0x20 f2fs_get_node_info+0x21c/0x60c __write_node_page+0x15c/0x734 f2fs_sync_node_pages+0x4f8/0x700 f2fs_write_checkpoint+0x4a8/0x99c __checkpoint_and_complete_reqs+0x7c/0x20c issue_checkpoint_thread+0x4c/0xd8 kthread+0x11c/0x1b0 ret_from_fork+0x10/0x20 If f2fs_allocate_data_block() fails, we may update nat.blkaddr w/ uninitialized fio.new_blkaddr. - __write_node_folio - f2fs_do_write_node_page - do_write_page - f2fs_allocate_data_block : once it fails, it may not allocate new blkaddr - set_node_addr : update w/ uninitialized fio.new_blkaddr variable I've checked all error paths in f2fs_allocate_data_block(), it should be tagged w/ CP_ERROR_FLAG. In addition, f2fs_allocate_data_block() succeeds, fio.new_blkaddr should be valid. Let's add f2fs_bug_on() to check above two conditions to detect any potential bugs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index dad5a92b7e70..5653716460ea 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3948,8 +3948,14 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) folio_end_writeback(folio); if (f2fs_in_warm_node_list(fio->sbi, folio)) f2fs_del_fsync_node_entry(fio->sbi, folio); + f2fs_bug_on(fio->sbi, !is_set_ckpt_flags(fio->sbi, + CP_ERROR_FLAG)); goto out; } + + f2fs_bug_on(fio->sbi, !f2fs_is_valid_blkaddr(fio->sbi, + fio->new_blkaddr, DATA_GENERIC_ENHANCE)); + if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr, 1); From 554d9b7242a73d701ce121ac81bb578a3fca538e Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Sat, 7 Jun 2025 14:41:16 +0800 Subject: [PATCH 0304/2411] f2fs: fix bio memleak when committing super block When committing new super block, bio is allocated but not freed, and kmemleak complains: unreferenced object 0xffff88801d185600 (size 192): comm "kworker/3:2", pid 128, jiffies 4298624992 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 80 67 c3 00 81 88 ff ff .........g...... 01 08 06 00 00 00 00 00 00 00 00 00 01 00 00 00 ................ backtrace (crc 650ecdb1): kmem_cache_alloc_noprof+0x3a9/0x460 mempool_alloc_noprof+0x12f/0x310 bio_alloc_bioset+0x1e2/0x7e0 __f2fs_commit_super+0xe0/0x370 f2fs_commit_super+0x4ed/0x8c0 f2fs_record_error_work+0xc7/0x190 process_one_work+0x7db/0x1970 worker_thread+0x518/0xea0 kthread+0x359/0x690 ret_from_fork+0x34/0x70 ret_from_fork_asm+0x1a/0x30 The issue can be reproduced by: mount /dev/vda /mnt i=0 while :; do echo '[h]abc' > /sys/fs/f2fs/vda/extension_list echo '[h]!abc' > /sys/fs/f2fs/vda/extension_list echo scan > /sys/kernel/debug/kmemleak dmesg | grep "new suspected memory leaks" [ $? -eq 0 ] && break i=$((i + 1)) echo "$i" done umount /mnt Fixes: 5bcde4557862 ("f2fs: get rid of buffer_head use") Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bbf1dad6843f..4cbf3a133474 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3451,6 +3451,7 @@ static int __f2fs_commit_super(struct f2fs_sb_info *sbi, struct folio *folio, f2fs_bug_on(sbi, 1); ret = submit_bio_wait(bio); + bio_put(bio); folio_end_writeback(folio); return ret; From 90d5c9ba3ed91950f1546bf123a7a57cd958b452 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 11 Jun 2025 16:42:18 +0800 Subject: [PATCH 0305/2411] f2fs: fix to avoid invalid wait context issue ============================= [ BUG: Invalid wait context ] 6.13.0-rc1 #84 Tainted: G O ----------------------------- cat/56160 is trying to lock: ffff888105c86648 (&cprc->stat_lock){+.+.}-{3:3}, at: update_general_status+0x32a/0x8c0 [f2fs] other info that might help us debug this: context-{5:5} 2 locks held by cat/56160: #0: ffff88810a002a98 (&p->lock){+.+.}-{4:4}, at: seq_read_iter+0x56/0x4c0 #1: ffffffffa0462638 (f2fs_stat_lock){....}-{2:2}, at: stat_show+0x29/0x1020 [f2fs] stack backtrace: CPU: 0 UID: 0 PID: 56160 Comm: cat Tainted: G O 6.13.0-rc1 #84 Tainted: [O]=OOT_MODULE Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 Call Trace: dump_stack_lvl+0x88/0xd0 dump_stack+0x14/0x20 __lock_acquire+0x8d4/0xbb0 lock_acquire+0xd6/0x300 _raw_spin_lock+0x38/0x50 update_general_status+0x32a/0x8c0 [f2fs] stat_show+0x50/0x1020 [f2fs] seq_read_iter+0x116/0x4c0 seq_read+0xfa/0x130 full_proxy_read+0x66/0x90 vfs_read+0xc4/0x350 ksys_read+0x74/0xf0 __x64_sys_read+0x1d/0x20 x64_sys_call+0x17d9/0x1b80 do_syscall_64+0x68/0x130 entry_SYSCALL_64_after_hwframe+0x67/0x6f RIP: 0033:0x7f2ca53147e2 - seq_read - stat_show - raw_spin_lock_irqsave(&f2fs_stat_lock, flags) : f2fs_stat_lock is raw_spinlock_t type variable - update_general_status - spin_lock(&sbi->cprc_info.stat_lock); : stat_lock is spinlock_t type variable The root cause is the lock order is incorrect [1], we should not acquire spinlock_t lock after raw_spinlock_t lock, as if CONFIG_PREEMPT_LOCK is on, spinlock_t is implemented based on rtmutex, which can sleep after holding the lock. To fix this issue, let's use change f2fs_stat_lock lock type from raw_spinlock_t to spinlock_t, it's safe due to: - we don't need to use raw version of spinlock as the path is not performance sensitive. - we don't need to use irqsave version of spinlock as it won't be used in irq context. Quoted from [1]: "Extend lockdep to validate lock wait-type context. The current wait-types are: LD_WAIT_FREE, /* wait free, rcu etc.. */ LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */ LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */ LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */ Where lockdep validates that the current lock (the one being acquired) fits in the current wait-context (as generated by the held stack). This ensures that there is no attempt to acquire mutexes while holding spinlocks, to acquire spinlocks while holding raw_spinlocks and so on. In other words, its a more fancy might_sleep()." [1] https://lore.kernel.org/all/20200321113242.427089655@linutronix.de Fixes: 98237fcda4a2 ("f2fs: use spin_lock to avoid hang") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 16c2dfb4f595..3417e7e550b2 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -21,7 +21,7 @@ #include "gc.h" static LIST_HEAD(f2fs_stat_list); -static DEFINE_RAW_SPINLOCK(f2fs_stat_lock); +static DEFINE_SPINLOCK(f2fs_stat_lock); #ifdef CONFIG_DEBUG_FS static struct dentry *f2fs_debugfs_root; #endif @@ -439,9 +439,8 @@ static int stat_show(struct seq_file *s, void *v) { struct f2fs_stat_info *si; int i = 0, j = 0; - unsigned long flags; - raw_spin_lock_irqsave(&f2fs_stat_lock, flags); + spin_lock(&f2fs_stat_lock); list_for_each_entry(si, &f2fs_stat_list, stat_list) { struct f2fs_sb_info *sbi = si->sbi; @@ -753,7 +752,7 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - paged : %llu KB\n", si->page_mem >> 10); } - raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); + spin_unlock(&f2fs_stat_lock); return 0; } @@ -765,7 +764,6 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; struct f2fs_dev_stats *dev_stats; - unsigned long flags; int i; si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL); @@ -817,9 +815,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->max_aw_cnt, 0); - raw_spin_lock_irqsave(&f2fs_stat_lock, flags); + spin_lock(&f2fs_stat_lock); list_add_tail(&si->stat_list, &f2fs_stat_list); - raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); + spin_unlock(&f2fs_stat_lock); return 0; } @@ -827,11 +825,10 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); - unsigned long flags; - raw_spin_lock_irqsave(&f2fs_stat_lock, flags); + spin_lock(&f2fs_stat_lock); list_del(&si->stat_list); - raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); + spin_unlock(&f2fs_stat_lock); kfree(si->dev_stats); kfree(si); From 59c1c89e9ba8cefff05aa982dd9e6719f25e8ec5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 13 Jun 2025 13:51:09 +0800 Subject: [PATCH 0306/2411] f2fs: introduce reserved_pin_section sysfs entry This patch introduces /sys/fs/f2fs//reserved_pin_section for tuning @needed parameter of has_not_enough_free_secs(), if we configure it w/ zero, it can avoid f2fs_gc() as much as possible while fallocating on pinned file. Signed-off-by: Chao Yu Reviewed-by: wangzijie Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 9 +++++++++ fs/f2fs/f2fs.h | 3 +++ fs/f2fs/file.c | 5 ++--- fs/f2fs/super.c | 4 ++++ fs/f2fs/sysfs.c | 9 +++++++++ 5 files changed, 27 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index bf03263b9f46..c2a233f2a085 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -861,3 +861,12 @@ Description: This is a read-only entry to show the value of sb.s_encoding_flags, SB_ENC_STRICT_MODE_FL 0x00000001 SB_ENC_NO_COMPAT_FALLBACK_FL 0x00000002 ============================ ========== + +What: /sys/fs/f2fs//reserved_pin_section +Date: June 2025 +Contact: "Chao Yu" +Description: This threshold is used to control triggering garbage collection while + fallocating on pinned file, so, it can guarantee there is enough free + reserved section before preallocating on pinned file. + By default, the value is ovp_sections, especially, for zoned ufs, the + value is 1. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9333a22b9a01..fa27498202a3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1724,6 +1724,9 @@ struct f2fs_sb_info { /* for skip statistic */ unsigned long long skipped_gc_rwsem; /* FG_GC only */ + /* free sections reserved for pinned file */ + unsigned int reserved_pin_section; + /* threshold for gc trials on pinned files */ unsigned short gc_pin_file_threshold; struct f2fs_rwsem pin_sem; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 696131e655ed..a909f79db178 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1887,9 +1887,8 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, } } - if (has_not_enough_free_secs(sbi, 0, f2fs_sb_has_blkzoned(sbi) ? - ZONED_PIN_SEC_REQUIRED_COUNT : - GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { + if (has_not_enough_free_secs(sbi, 0, + sbi->reserved_pin_section)) { f2fs_down_write(&sbi->gc_lock); stat_inc_gc_call_count(sbi, FOREGROUND); err = f2fs_gc(sbi, &gc_control); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4cbf3a133474..9b58cf891a66 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4771,6 +4771,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* get segno of first zoned block device */ sbi->first_seq_zone_segno = get_first_seq_zone_segno(sbi); + sbi->reserved_pin_section = f2fs_sb_has_blkzoned(sbi) ? + ZONED_PIN_SEC_REQUIRED_COUNT : + GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)); + /* Read accumulated write IO statistics if exists */ seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); if (__exist_node_summaries(sbi)) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 75134d69a0bd..51be7ffb38c5 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -824,6 +824,13 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } + if (!strcmp(a->attr.name, "reserved_pin_section")) { + if (t > GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi))) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + *ui = (unsigned int)t; return count; @@ -1130,6 +1137,7 @@ F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec); F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy); #endif F2FS_SBI_GENERAL_RW_ATTR(carve_out); +F2FS_SBI_GENERAL_RW_ATTR(reserved_pin_section); /* STAT_INFO ATTR */ #ifdef CONFIG_F2FS_STAT_FS @@ -1323,6 +1331,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(last_age_weight), ATTR_LIST(max_read_extent_count), ATTR_LIST(carve_out), + ATTR_LIST(reserved_pin_section), NULL, }; ATTRIBUTE_GROUPS(f2fs); From 8e2a9b656474d67c55010f2c003ea2cf889a19ff Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Fri, 13 Jun 2025 09:50:44 +0800 Subject: [PATCH 0307/2411] f2fs: compress: change the first parameter of page_array_{alloc,free} to sbi No logic changes, just cleanup and prepare for fixing the UAF issue in f2fs_free_dic. Signed-off-by: Zhiguo Niu Signed-off-by: Baocong Liu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index b3c1df93a163..832a484963b7 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -23,20 +23,18 @@ static struct kmem_cache *cic_entry_slab; static struct kmem_cache *dic_entry_slab; -static void *page_array_alloc(struct inode *inode, int nr) +static void *page_array_alloc(struct f2fs_sb_info *sbi, int nr) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int size = sizeof(struct page *) * nr; if (likely(size <= sbi->page_array_slab_size)) return f2fs_kmem_cache_alloc(sbi->page_array_slab, - GFP_F2FS_ZERO, false, F2FS_I_SB(inode)); + GFP_F2FS_ZERO, false, sbi); return f2fs_kzalloc(sbi, size, GFP_NOFS); } -static void page_array_free(struct inode *inode, void *pages, int nr) +static void page_array_free(struct f2fs_sb_info *sbi, void *pages, int nr) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int size = sizeof(struct page *) * nr; if (!pages) @@ -149,13 +147,13 @@ int f2fs_init_compress_ctx(struct compress_ctx *cc) if (cc->rpages) return 0; - cc->rpages = page_array_alloc(cc->inode, cc->cluster_size); + cc->rpages = page_array_alloc(F2FS_I_SB(cc->inode), cc->cluster_size); return cc->rpages ? 0 : -ENOMEM; } void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse) { - page_array_free(cc->inode, cc->rpages, cc->cluster_size); + page_array_free(F2FS_I_SB(cc->inode), cc->rpages, cc->cluster_size); cc->rpages = NULL; cc->nr_rpages = 0; cc->nr_cpages = 0; @@ -622,6 +620,7 @@ static void *f2fs_vmap(struct page **pages, unsigned int count) static int f2fs_compress_pages(struct compress_ctx *cc) { + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); struct f2fs_inode_info *fi = F2FS_I(cc->inode); const struct f2fs_compress_ops *cops = f2fs_cops[fi->i_compress_algorithm]; @@ -642,7 +641,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE); cc->valid_nr_cpages = cc->nr_cpages; - cc->cpages = page_array_alloc(cc->inode, cc->nr_cpages); + cc->cpages = page_array_alloc(sbi, cc->nr_cpages); if (!cc->cpages) { ret = -ENOMEM; goto destroy_compress_ctx; @@ -716,7 +715,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) if (cc->cpages[i]) f2fs_compress_free_page(cc->cpages[i]); } - page_array_free(cc->inode, cc->cpages, cc->nr_cpages); + page_array_free(sbi, cc->cpages, cc->nr_cpages); cc->cpages = NULL; destroy_compress_ctx: if (cops->destroy_compress_ctx) @@ -1340,7 +1339,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, cic->magic = F2FS_COMPRESSED_PAGE_MAGIC; cic->inode = inode; atomic_set(&cic->pending_pages, cc->valid_nr_cpages); - cic->rpages = page_array_alloc(cc->inode, cc->cluster_size); + cic->rpages = page_array_alloc(sbi, cc->cluster_size); if (!cic->rpages) goto out_put_cic; @@ -1442,13 +1441,13 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, spin_unlock(&fi->i_size_lock); f2fs_put_rpages(cc); - page_array_free(cc->inode, cc->cpages, cc->nr_cpages); + page_array_free(sbi, cc->cpages, cc->nr_cpages); cc->cpages = NULL; f2fs_destroy_compress_ctx(cc, false); return 0; out_destroy_crypt: - page_array_free(cc->inode, cic->rpages, cc->cluster_size); + page_array_free(sbi, cic->rpages, cc->cluster_size); for (--i; i >= 0; i--) { if (!cc->cpages[i]) @@ -1469,7 +1468,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, f2fs_compress_free_page(cc->cpages[i]); cc->cpages[i] = NULL; } - page_array_free(cc->inode, cc->cpages, cc->nr_cpages); + page_array_free(sbi, cc->cpages, cc->nr_cpages); cc->cpages = NULL; return -EAGAIN; } @@ -1499,7 +1498,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) end_page_writeback(cic->rpages[i]); } - page_array_free(cic->inode, cic->rpages, cic->nr_rpages); + page_array_free(sbi, cic->rpages, cic->nr_rpages); kmem_cache_free(cic_entry_slab, cic); } @@ -1640,7 +1639,7 @@ static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) return 0; - dic->tpages = page_array_alloc(dic->inode, dic->cluster_size); + dic->tpages = page_array_alloc(F2FS_I_SB(dic->inode), dic->cluster_size); if (!dic->tpages) return -ENOMEM; @@ -1700,7 +1699,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) if (!dic) return ERR_PTR(-ENOMEM); - dic->rpages = page_array_alloc(cc->inode, cc->cluster_size); + dic->rpages = page_array_alloc(sbi, cc->cluster_size); if (!dic->rpages) { kmem_cache_free(dic_entry_slab, dic); return ERR_PTR(-ENOMEM); @@ -1721,7 +1720,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) dic->rpages[i] = cc->rpages[i]; dic->nr_rpages = cc->cluster_size; - dic->cpages = page_array_alloc(dic->inode, dic->nr_cpages); + dic->cpages = page_array_alloc(sbi, dic->nr_cpages); if (!dic->cpages) { ret = -ENOMEM; goto out_free; @@ -1751,6 +1750,7 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic, bool bypass_destroy_callback) { int i; + struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); f2fs_release_decomp_mem(dic, bypass_destroy_callback, true); @@ -1762,7 +1762,7 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic, continue; f2fs_compress_free_page(dic->tpages[i]); } - page_array_free(dic->inode, dic->tpages, dic->cluster_size); + page_array_free(sbi, dic->tpages, dic->cluster_size); } if (dic->cpages) { @@ -1771,10 +1771,10 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic, continue; f2fs_compress_free_page(dic->cpages[i]); } - page_array_free(dic->inode, dic->cpages, dic->nr_cpages); + page_array_free(sbi, dic->cpages, dic->nr_cpages); } - page_array_free(dic->inode, dic->rpages, dic->nr_rpages); + page_array_free(sbi, dic->rpages, dic->nr_rpages); kmem_cache_free(dic_entry_slab, dic); } From 39868685c2a94a70762bc6d77dc81d781d05bff5 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Fri, 13 Jun 2025 09:50:45 +0800 Subject: [PATCH 0308/2411] f2fs: compress: fix UAF of f2fs_inode_info in f2fs_free_dic The decompress_io_ctx may be released asynchronously after I/O completion. If this file is deleted immediately after read, and the kworker of processing post_read_wq has not been executed yet due to high workloads, It is possible that the inode(f2fs_inode_info) is evicted and freed before it is used f2fs_free_dic. The UAF case as below: Thread A Thread B - f2fs_decompress_end_io - f2fs_put_dic - queue_work add free_dic work to post_read_wq - do_unlink - iput - evict - call_rcu This file is deleted after read. Thread C kworker to process post_read_wq - rcu_do_batch - f2fs_free_inode - kmem_cache_free inode is freed by rcu - process_scheduled_works - f2fs_late_free_dic - f2fs_free_dic - f2fs_release_decomp_mem read (dic->inode)->i_compress_algorithm This patch store compress_algorithm and sbi in dic to avoid inode UAF. In addition, the previous solution is deprecated in [1] may cause system hang. [1] https://lore.kernel.org/all/c36ab955-c8db-4a8b-a9d0-f07b5f426c3f@kernel.org Cc: Daeho Jeong Fixes: bff139b49d9f ("f2fs: handle decompress only post processing in softirq") Signed-off-by: Zhiguo Niu Signed-off-by: Baocong Liu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 40 ++++++++++++++++++++-------------------- fs/f2fs/f2fs.h | 2 ++ 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 832a484963b7..8cbb8038bc72 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -214,13 +214,13 @@ static int lzo_decompress_pages(struct decompress_io_ctx *dic) ret = lzo1x_decompress_safe(dic->cbuf->cdata, dic->clen, dic->rbuf, &dic->rlen); if (ret != LZO_E_OK) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "lzo decompress failed, ret:%d", ret); return -EIO; } if (dic->rlen != PAGE_SIZE << dic->log_cluster_size) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "lzo invalid rlen:%zu, expected:%lu", dic->rlen, PAGE_SIZE << dic->log_cluster_size); return -EIO; @@ -294,13 +294,13 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic) ret = LZ4_decompress_safe(dic->cbuf->cdata, dic->rbuf, dic->clen, dic->rlen); if (ret < 0) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "lz4 decompress failed, ret:%d", ret); return -EIO; } if (ret != PAGE_SIZE << dic->log_cluster_size) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "lz4 invalid ret:%d, expected:%lu", ret, PAGE_SIZE << dic->log_cluster_size); return -EIO; @@ -422,13 +422,13 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic) workspace_size = zstd_dstream_workspace_bound(max_window_size); - workspace = f2fs_vmalloc(F2FS_I_SB(dic->inode), workspace_size); + workspace = f2fs_vmalloc(dic->sbi, workspace_size); if (!workspace) return -ENOMEM; stream = zstd_init_dstream(max_window_size, workspace, workspace_size); if (!stream) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "%s zstd_init_dstream failed", __func__); vfree(workspace); return -EIO; @@ -464,14 +464,14 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic) ret = zstd_decompress_stream(stream, &outbuf, &inbuf); if (zstd_is_error(ret)) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "%s zstd_decompress_stream failed, ret: %d", __func__, zstd_get_error_code(ret)); return -EIO; } if (dic->rlen != outbuf.pos) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "%s ZSTD invalid rlen:%zu, expected:%lu", __func__, dic->rlen, PAGE_SIZE << dic->log_cluster_size); @@ -733,7 +733,7 @@ static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic, void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) { - struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); + struct f2fs_sb_info *sbi = dic->sbi; struct f2fs_inode_info *fi = F2FS_I(dic->inode); const struct f2fs_compress_ops *cops = f2fs_cops[fi->i_compress_algorithm]; @@ -806,7 +806,7 @@ void f2fs_end_read_compressed_page(struct page *page, bool failed, { struct decompress_io_ctx *dic = (struct decompress_io_ctx *)page_private(page); - struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); + struct f2fs_sb_info *sbi = dic->sbi; dec_page_count(sbi, F2FS_RD_DATA); @@ -1632,14 +1632,13 @@ static inline bool allow_memalloc_for_decomp(struct f2fs_sb_info *sbi, static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, bool pre_alloc) { - const struct f2fs_compress_ops *cops = - f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm]; + const struct f2fs_compress_ops *cops = f2fs_cops[dic->compress_algorithm]; int i; - if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) + if (!allow_memalloc_for_decomp(dic->sbi, pre_alloc)) return 0; - dic->tpages = page_array_alloc(F2FS_I_SB(dic->inode), dic->cluster_size); + dic->tpages = page_array_alloc(dic->sbi, dic->cluster_size); if (!dic->tpages) return -ENOMEM; @@ -1669,10 +1668,9 @@ static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic, bool bypass_destroy_callback, bool pre_alloc) { - const struct f2fs_compress_ops *cops = - f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm]; + const struct f2fs_compress_ops *cops = f2fs_cops[dic->compress_algorithm]; - if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) + if (!allow_memalloc_for_decomp(dic->sbi, pre_alloc)) return; if (!bypass_destroy_callback && cops->destroy_decompress_ctx) @@ -1707,6 +1705,8 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) dic->magic = F2FS_COMPRESSED_PAGE_MAGIC; dic->inode = cc->inode; + dic->sbi = sbi; + dic->compress_algorithm = F2FS_I(cc->inode)->i_compress_algorithm; atomic_set(&dic->remaining_pages, cc->nr_cpages); dic->cluster_idx = cc->cluster_idx; dic->cluster_size = cc->cluster_size; @@ -1750,7 +1750,8 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic, bool bypass_destroy_callback) { int i; - struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); + /* use sbi in dic to avoid UFA of dic->inode*/ + struct f2fs_sb_info *sbi = dic->sbi; f2fs_release_decomp_mem(dic, bypass_destroy_callback, true); @@ -1793,8 +1794,7 @@ static void f2fs_put_dic(struct decompress_io_ctx *dic, bool in_task) f2fs_free_dic(dic, false); } else { INIT_WORK(&dic->free_work, f2fs_late_free_dic); - queue_work(F2FS_I_SB(dic->inode)->post_read_wq, - &dic->free_work); + queue_work(dic->sbi->post_read_wq, &dic->free_work); } } } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fa27498202a3..aa535dcf2297 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1536,6 +1536,7 @@ struct compress_io_ctx { struct decompress_io_ctx { u32 magic; /* magic number to indicate page is compressed */ struct inode *inode; /* inode the context belong to */ + struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ pgoff_t cluster_idx; /* cluster index number */ unsigned int cluster_size; /* page count in cluster */ unsigned int log_cluster_size; /* log of cluster size */ @@ -1576,6 +1577,7 @@ struct decompress_io_ctx { bool failed; /* IO error occurred before decompression? */ bool need_verity; /* need fs-verity verification after decompression? */ + unsigned char compress_algorithm; /* backup algorithm type */ void *private; /* payload buffer for specified decompression algorithm */ void *private2; /* extra payload buffer */ struct work_struct verity_work; /* work to verify the decompressed pages */ From 5d4ffc531a642177362571ef946d950d37ff1259 Mon Sep 17 00:00:00 2001 From: Jesung Yang Date: Wed, 28 May 2025 17:49:55 +0000 Subject: [PATCH 0309/2411] rust: kunit: use crate-level mapping for `c_void` Remove `use core::ffi::c_void`, which shadows `kernel::ffi::c_void` brought in via `use crate::prelude::*`, to maintain consistency and centralize the abstraction. Since `kernel::ffi::c_void` is a straightforward re-export of `core::ffi::c_void`, both are functionally equivalent. However, using `kernel::ffi::c_void` improves consistency across the kernel's Rust code and provides a unified reference point in case the definition ever needs to change, even if such a change is unlikely. Reviewed-by: Benno Lossin Link: https://rust-for-linux.zulipchat.com/#narrow/channel/288089/topic/x/near/520452733 Signed-off-by: Jesung Yang Link: https://lore.kernel.org/r/20250528174953.2948570-1-y.j3ms.n@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/kunit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs index 6930e86d98a9..099a61bbb8f4 100644 --- a/rust/kernel/kunit.rs +++ b/rust/kernel/kunit.rs @@ -7,7 +7,7 @@ //! Reference: use crate::prelude::*; -use core::{ffi::c_void, fmt}; +use core::fmt; #[cfg(CONFIG_PRINTK)] use crate::c_str; From b61b0092eaf22ef34936516d2e7181bb9cee25ac Mon Sep 17 00:00:00 2001 From: Albin Babu Varghese Date: Tue, 27 May 2025 16:49:28 -0400 Subject: [PATCH 0310/2411] rust: list: replace unwrap() with ? in doctest examples Using `unwrap()` in kernel doctests can cause panics on error and may give newcomers the mistaken impression that panicking is acceptable in kernel code. Replace all `.unwrap()` calls in `kernel::list` examples with `.ok_or(EINVAL)?` so that errors are properly propagated. Suggested-by: Miguel Ojeda Link: https://github.com/Rust-for-Linux/linux/issues/1164 Reviewed-by: Benno Lossin Signed-off-by: Albin Babu Varghese Reviewed-by: Alice Ryhl Reviewed-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250527204928.5117-1-albinbabuvarghese20@gmail.com [ Reworded slightly. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/list.rs | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/rust/kernel/list.rs b/rust/kernel/list.rs index c391c30b80f8..fe58a3920e70 100644 --- a/rust/kernel/list.rs +++ b/rust/kernel/list.rs @@ -82,9 +82,9 @@ /// // [15, 10, 30] /// { /// let mut iter = list.iter(); -/// assert_eq!(iter.next().unwrap().value, 15); -/// assert_eq!(iter.next().unwrap().value, 10); -/// assert_eq!(iter.next().unwrap().value, 30); +/// assert_eq!(iter.next().ok_or(EINVAL)?.value, 15); +/// assert_eq!(iter.next().ok_or(EINVAL)?.value, 10); +/// assert_eq!(iter.next().ok_or(EINVAL)?.value, 30); /// assert!(iter.next().is_none()); /// /// // Verify the length of the list. @@ -93,9 +93,9 @@ /// /// // Pop the items from the list using `pop_back()` and verify the content. /// { -/// assert_eq!(list.pop_back().unwrap().value, 30); -/// assert_eq!(list.pop_back().unwrap().value, 10); -/// assert_eq!(list.pop_back().unwrap().value, 15); +/// assert_eq!(list.pop_back().ok_or(EINVAL)?.value, 30); +/// assert_eq!(list.pop_back().ok_or(EINVAL)?.value, 10); +/// assert_eq!(list.pop_back().ok_or(EINVAL)?.value, 15); /// } /// /// // Insert 3 elements using `push_front()`. @@ -107,9 +107,9 @@ /// // [30, 10, 15] /// { /// let mut iter = list.iter(); -/// assert_eq!(iter.next().unwrap().value, 30); -/// assert_eq!(iter.next().unwrap().value, 10); -/// assert_eq!(iter.next().unwrap().value, 15); +/// assert_eq!(iter.next().ok_or(EINVAL)?.value, 30); +/// assert_eq!(iter.next().ok_or(EINVAL)?.value, 10); +/// assert_eq!(iter.next().ok_or(EINVAL)?.value, 15); /// assert!(iter.next().is_none()); /// /// // Verify the length of the list. @@ -118,8 +118,8 @@ /// /// // Pop the items from the list using `pop_front()` and verify the content. /// { -/// assert_eq!(list.pop_front().unwrap().value, 30); -/// assert_eq!(list.pop_front().unwrap().value, 10); +/// assert_eq!(list.pop_front().ok_or(EINVAL)?.value, 30); +/// assert_eq!(list.pop_front().ok_or(EINVAL)?.value, 10); /// } /// /// // Push `list2` to `list` through `push_all_back()`. @@ -135,9 +135,9 @@ /// // list: [15, 25, 35] /// // list2: [] /// let mut iter = list.iter(); -/// assert_eq!(iter.next().unwrap().value, 15); -/// assert_eq!(iter.next().unwrap().value, 25); -/// assert_eq!(iter.next().unwrap().value, 35); +/// assert_eq!(iter.next().ok_or(EINVAL)?.value, 15); +/// assert_eq!(iter.next().ok_or(EINVAL)?.value, 25); +/// assert_eq!(iter.next().ok_or(EINVAL)?.value, 35); /// assert!(iter.next().is_none()); /// assert!(list2.is_empty()); /// } @@ -809,11 +809,11 @@ fn next(&mut self) -> Option> { /// merge_sorted(&mut list, list2); /// /// let mut items = list.into_iter(); -/// assert_eq!(items.next().unwrap().value, 10); -/// assert_eq!(items.next().unwrap().value, 11); -/// assert_eq!(items.next().unwrap().value, 12); -/// assert_eq!(items.next().unwrap().value, 13); -/// assert_eq!(items.next().unwrap().value, 14); +/// assert_eq!(items.next().ok_or(EINVAL)?.value, 10); +/// assert_eq!(items.next().ok_or(EINVAL)?.value, 11); +/// assert_eq!(items.next().ok_or(EINVAL)?.value, 12); +/// assert_eq!(items.next().ok_or(EINVAL)?.value, 13); +/// assert_eq!(items.next().ok_or(EINVAL)?.value, 14); /// assert!(items.next().is_none()); /// # Result::<(), Error>::Ok(()) /// ``` From bfb9e46b5bff33ebaac49cceb27256caceddeee5 Mon Sep 17 00:00:00 2001 From: Guilherme Giacomo Simoes Date: Mon, 9 Jun 2025 09:22:00 -0300 Subject: [PATCH 0311/2411] rust: macros: remove `module!`'s deprecated `author` key Commit 38559da6afb2 ("rust: module: introduce `authors` key") introduced a new `authors` key to support multiple module authors, while keeping the old `author` key for backward compatibility. Now that most in-tree modules have migrated to `authors`, remove: 1. The deprecated `author` key support from the module macro 2. Legacy `author` entries from remaining modules Signed-off-by: Guilherme Giacomo Simoes Acked-by: Andreas Hindborg Reviewed-by: Benno Lossin Acked-by: Danilo Krummrich Acked-by: Viresh Kumar Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250609122200.179307-1-trintaeoitogc@gmail.com [ Reworded slightly. - Miguel ] Signed-off-by: Miguel Ojeda --- drivers/cpufreq/rcpufreq_dt.rs | 2 +- drivers/gpu/drm/nova/nova.rs | 2 +- drivers/gpu/nova-core/nova_core.rs | 2 +- rust/kernel/firmware.rs | 2 +- rust/macros/module.rs | 6 ------ samples/rust/rust_configfs.rs | 2 +- samples/rust/rust_driver_auxiliary.rs | 2 +- 7 files changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/cpufreq/rcpufreq_dt.rs b/drivers/cpufreq/rcpufreq_dt.rs index 43c87d0259b6..30a170570c0e 100644 --- a/drivers/cpufreq/rcpufreq_dt.rs +++ b/drivers/cpufreq/rcpufreq_dt.rs @@ -220,7 +220,7 @@ fn probe( module_platform_driver! { type: CPUFreqDTDriver, name: "cpufreq-dt", - author: "Viresh Kumar ", + authors: ["Viresh Kumar "], description: "Generic CPUFreq DT driver", license: "GPL v2", } diff --git a/drivers/gpu/drm/nova/nova.rs b/drivers/gpu/drm/nova/nova.rs index 902876aa14d1..64fd670e99e1 100644 --- a/drivers/gpu/drm/nova/nova.rs +++ b/drivers/gpu/drm/nova/nova.rs @@ -12,7 +12,7 @@ kernel::module_auxiliary_driver! { type: NovaDriver, name: "Nova", - author: "Danilo Krummrich", + authors: ["Danilo Krummrich"], description: "Nova GPU driver", license: "GPL v2", } diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index 618632f0abcc..f405d7a99c28 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -13,7 +13,7 @@ kernel::module_pci_driver! { type: driver::NovaCore, name: "NovaCore", - author: "Danilo Krummrich", + authors: ["Danilo Krummrich"], description: "Nova Core GPU driver", license: "GPL v2", firmware: [], diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs index 94fa1ea17ef0..7cff0edeab74 100644 --- a/rust/kernel/firmware.rs +++ b/rust/kernel/firmware.rs @@ -182,7 +182,7 @@ unsafe impl Sync for Firmware {} /// module! { /// type: MyModule, /// name: "module_firmware_test", -/// author: "Rust for Linux", +/// authors: ["Rust for Linux"], /// description: "module_firmware! test module", /// license: "GPL", /// } diff --git a/rust/macros/module.rs b/rust/macros/module.rs index 2ddd2eeb2852..5dd276a2e5cb 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -94,7 +94,6 @@ struct ModuleInfo { type_: String, license: String, name: String, - author: Option, authors: Option>, description: Option, alias: Option>, @@ -108,7 +107,6 @@ fn parse(it: &mut token_stream::IntoIter) -> Self { const EXPECTED_KEYS: &[&str] = &[ "type", "name", - "author", "authors", "description", "license", @@ -134,7 +132,6 @@ fn parse(it: &mut token_stream::IntoIter) -> Self { match key.as_str() { "type" => info.type_ = expect_ident(it), "name" => info.name = expect_string_ascii(it), - "author" => info.author = Some(expect_string(it)), "authors" => info.authors = Some(expect_string_array(it)), "description" => info.description = Some(expect_string(it)), "license" => info.license = expect_string_ascii(it), @@ -179,9 +176,6 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { // Rust does not allow hyphens in identifiers, use underscore instead. let ident = info.name.replace('-', "_"); let mut modinfo = ModInfoBuilder::new(ident.as_ref()); - if let Some(author) = info.author { - modinfo.emit("author", &author); - } if let Some(authors) = info.authors { for author in authors { modinfo.emit("author", &author); diff --git a/samples/rust/rust_configfs.rs b/samples/rust/rust_configfs.rs index 60ddbe62cda3..af04bfa35cb2 100644 --- a/samples/rust/rust_configfs.rs +++ b/samples/rust/rust_configfs.rs @@ -14,7 +14,7 @@ module! { type: RustConfigfs, name: "rust_configfs", - author: "Rust for Linux Contributors", + authors: ["Rust for Linux Contributors"], description: "Rust configfs sample", license: "GPL", } diff --git a/samples/rust/rust_driver_auxiliary.rs b/samples/rust/rust_driver_auxiliary.rs index 3e15e6d002bb..abf3d55ed249 100644 --- a/samples/rust/rust_driver_auxiliary.rs +++ b/samples/rust/rust_driver_auxiliary.rs @@ -114,7 +114,7 @@ fn init(module: &'static kernel::ThisModule) -> impl PinInit { module! { type: SampleModule, name: "rust_driver_auxiliary", - author: "Danilo Krummrich", + authors: ["Danilo Krummrich"], description: "Rust auxiliary driver", license: "GPL v2", } From b6985083be1deb1f5fa14d160265f57d9ccb42a1 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 10 Jun 2025 14:33:00 +0530 Subject: [PATCH 0312/2411] rust: Use consistent "# Examples" heading style in rustdoc Use a consistent `# Examples` heading in rustdoc across the codebase. Some modules previously used `## Examples` (even when they should be available as top-level headers), while others used `# Example`, which deviates from the preferred `# Examples` style. Suggested-by: Miguel Ojeda Signed-off-by: Viresh Kumar Acked-by: Benno Lossin Link: https://lore.kernel.org/r/ddd5ce0ac20c99a72a4f1e4322d3de3911056922.1749545815.git.viresh.kumar@linaro.org Signed-off-by: Miguel Ojeda --- rust/kernel/block/mq.rs | 2 +- rust/kernel/clk.rs | 6 +++--- rust/kernel/configfs.rs | 2 +- rust/kernel/cpufreq.rs | 8 ++++---- rust/kernel/cpumask.rs | 4 ++-- rust/kernel/devres.rs | 4 ++-- rust/kernel/firmware.rs | 4 ++-- rust/kernel/opp.rs | 16 ++++++++-------- rust/kernel/pci.rs | 4 ++-- rust/kernel/platform.rs | 2 +- rust/kernel/sync.rs | 2 +- rust/kernel/workqueue.rs | 2 +- rust/pin-init/src/lib.rs | 2 +- 13 files changed, 29 insertions(+), 29 deletions(-) diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs index fb0f393c1cea..831445d37181 100644 --- a/rust/kernel/block/mq.rs +++ b/rust/kernel/block/mq.rs @@ -53,7 +53,7 @@ //! [`GenDiskBuilder`]: gen_disk::GenDiskBuilder //! [`GenDiskBuilder::build`]: gen_disk::GenDiskBuilder::build //! -//! # Example +//! # Examples //! //! ```rust //! use kernel::{ diff --git a/rust/kernel/clk.rs b/rust/kernel/clk.rs index 6041c6d07527..34a19bc99990 100644 --- a/rust/kernel/clk.rs +++ b/rust/kernel/clk.rs @@ -12,7 +12,7 @@ /// /// Represents a frequency in hertz, wrapping a [`c_ulong`] value. /// -/// ## Examples +/// # Examples /// /// ``` /// use kernel::clk::Hertz; @@ -95,7 +95,7 @@ mod common_clk { /// Instances of this type are reference-counted. Calling [`Clk::get`] ensures that the /// allocation remains valid for the lifetime of the [`Clk`]. /// - /// ## Examples + /// # Examples /// /// The following example demonstrates how to obtain and configure a clock for a device. /// @@ -266,7 +266,7 @@ fn drop(&mut self) { /// Instances of this type are reference-counted. Calling [`OptionalClk::get`] ensures that the /// allocation remains valid for the lifetime of the [`OptionalClk`]. /// - /// ## Examples + /// # Examples /// /// The following example demonstrates how to obtain and configure an optional clock for a /// device. The code functions correctly whether or not the clock is available. diff --git a/rust/kernel/configfs.rs b/rust/kernel/configfs.rs index 1ddac786bd0d..aafef70b7177 100644 --- a/rust/kernel/configfs.rs +++ b/rust/kernel/configfs.rs @@ -17,7 +17,7 @@ //! //! C header: [`include/linux/configfs.h`](srctree/include/linux/configfs.h) //! -//! # Example +//! # Examples //! //! ```ignore //! use kernel::alloc::flags; diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs index 14aafb0c0314..e8d231971276 100644 --- a/rust/kernel/cpufreq.rs +++ b/rust/kernel/cpufreq.rs @@ -202,7 +202,7 @@ fn from(index: TableIndex) -> Self { /// The callers must ensure that the `struct cpufreq_frequency_table` is valid for access and /// remains valid for the lifetime of the returned reference. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to read a frequency value from [`Table`]. /// @@ -318,7 +318,7 @@ fn deref(&self) -> &Self::Target { /// /// This is used by the CPU frequency drivers to build a frequency table dynamically. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to create a CPU frequency table. /// @@ -395,7 +395,7 @@ pub fn to_table(mut self) -> Result { /// The callers must ensure that the `struct cpufreq_policy` is valid for access and remains valid /// for the lifetime of the returned reference. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to create a CPU frequency table. /// @@ -834,7 +834,7 @@ fn register_em(_policy: &mut Policy) { /// CPU frequency driver Registration. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to register a cpufreq driver. /// diff --git a/rust/kernel/cpumask.rs b/rust/kernel/cpumask.rs index 19c607709b5f..4bce230a73b6 100644 --- a/rust/kernel/cpumask.rs +++ b/rust/kernel/cpumask.rs @@ -30,7 +30,7 @@ /// The callers must ensure that the `struct cpumask` is valid for access and /// remains valid for the lifetime of the returned reference. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to update a [`Cpumask`]. /// @@ -175,7 +175,7 @@ pub fn copy(&self, dstp: &mut Self) { /// The callers must ensure that the `struct cpumask_var_t` is valid for access and remains valid /// for the lifetime of [`CpumaskVar`]. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to create and update a [`CpumaskVar`]. /// diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index 8dfbc5b21dc1..d0e6c6e162c2 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -44,7 +44,7 @@ struct DevresInner { /// [`Devres`] users should make sure to simply free the corresponding backing resource in `T`'s /// [`Drop`] implementation. /// -/// # Example +/// # Examples /// /// ```no_run /// # use kernel::{bindings, c_str, device::{Bound, Device}, devres::Devres, io::{Io, IoRaw}}; @@ -203,7 +203,7 @@ pub fn new_foreign_owned(dev: &Device, data: T, flags: Flags) -> Result { /// An error is returned if `dev` does not match the same [`Device`] this [`Devres`] instance /// has been created with. /// - /// # Example + /// # Examples /// /// ```no_run /// # #![cfg(CONFIG_PCI)] diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs index 7cff0edeab74..be684e860ed2 100644 --- a/rust/kernel/firmware.rs +++ b/rust/kernel/firmware.rs @@ -140,7 +140,7 @@ unsafe impl Sync for Firmware {} /// Typically, such contracts would be enforced by a trait, however traits do not (yet) support /// const functions. /// -/// # Example +/// # Examples /// /// ``` /// # mod module_firmware_test { @@ -262,7 +262,7 @@ const fn push_internal(mut self, bytes: &[u8]) -> Self { /// Append path components to the [`ModInfoBuilder`] instance. Paths need to be separated /// with [`ModInfoBuilder::new_entry`]. /// - /// # Example + /// # Examples /// /// ``` /// use kernel::firmware::ModInfoBuilder; diff --git a/rust/kernel/opp.rs b/rust/kernel/opp.rs index bc82a85ca883..0e94cb2703ec 100644 --- a/rust/kernel/opp.rs +++ b/rust/kernel/opp.rs @@ -103,7 +103,7 @@ fn to_c_str_array(names: &[CString]) -> Result> { /// /// Represents voltage in microvolts, wrapping a [`c_ulong`] value. /// -/// ## Examples +/// # Examples /// /// ``` /// use kernel::opp::MicroVolt; @@ -128,7 +128,7 @@ fn from(volt: MicroVolt) -> Self { /// /// Represents power in microwatts, wrapping a [`c_ulong`] value. /// -/// ## Examples +/// # Examples /// /// ``` /// use kernel::opp::MicroWatt; @@ -153,7 +153,7 @@ fn from(power: MicroWatt) -> Self { /// /// The associated [`OPP`] is automatically removed when the [`Token`] is dropped. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to create an [`OPP`] dynamically. /// @@ -202,7 +202,7 @@ fn drop(&mut self) { /// Rust abstraction for the C `struct dev_pm_opp_data`, used to define operating performance /// points (OPPs) dynamically. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to create an [`OPP`] with [`Data`]. /// @@ -254,7 +254,7 @@ fn freq(&self) -> Hertz { /// [`OPP`] search options. /// -/// ## Examples +/// # Examples /// /// Defines how to search for an [`OPP`] in a [`Table`] relative to a frequency. /// @@ -326,7 +326,7 @@ fn drop(&mut self) { /// /// Rust abstraction for the C `struct dev_pm_opp_config`. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to set OPP property-name configuration for a [`Device`]. /// @@ -569,7 +569,7 @@ extern "C" fn config_regulators( /// /// Instances of this type are reference-counted. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to get OPP [`Table`] for a [`Cpumask`] and set its /// frequency. @@ -1011,7 +1011,7 @@ fn drop(&mut self) { /// /// A reference to the [`OPP`], &[`OPP`], isn't refcounted by the Rust code. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to get [`OPP`] corresponding to a frequency value and /// configure the device with it. diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index f6b19764ad17..6b94fd7a3ce9 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -100,7 +100,7 @@ extern "C" fn remove_callback(pdev: *mut bindings::pci_dev) { /// Declares a kernel module that exposes a single PCI driver. /// -/// # Example +/// # Examples /// ///```ignore /// kernel::module_pci_driver! { @@ -194,7 +194,7 @@ macro_rules! pci_device_table { /// The PCI driver trait. /// -/// # Example +/// # Examples /// ///``` /// # use kernel::{bindings, device::Core, pci}; diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index 4b06f9fbc172..0a6a6be732b2 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -122,7 +122,7 @@ macro_rules! module_platform_driver { /// /// Drivers must implement this trait in order to get a platform driver registered. /// -/// # Example +/// # Examples /// ///``` /// # use kernel::{bindings, c_str, device::Core, of, platform}; diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs index c23a12639924..63c99e015ad6 100644 --- a/rust/kernel/sync.rs +++ b/rust/kernel/sync.rs @@ -41,7 +41,7 @@ impl LockClassKey { /// Initializes a dynamically allocated lock class key. In the common case of using a /// statically allocated lock class key, the static_lock_class! macro should be used instead. /// - /// # Example + /// # Examples /// ``` /// # use kernel::c_str; /// # use kernel::alloc::KBox; diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index 89e5c2560eec..cce23684af24 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -26,7 +26,7 @@ //! * The [`WorkItemPointer`] trait is implemented for the pointer type that points at a something //! that implements [`WorkItem`]. //! -//! ## Example +//! ## Examples //! //! This example defines a struct that holds an integer and can be scheduled on the workqueue. When //! the struct is executed, it will print the integer. Since there is only one `work_struct` field, diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index 9ab34036e6bc..c5f395b44ec8 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -953,7 +953,7 @@ macro_rules! try_init { /// Asserts that a field on a struct using `#[pin_data]` is marked with `#[pin]` ie. that it is /// structurally pinned. /// -/// # Example +/// # Examples /// /// This will succeed: /// ``` From 0303584766b7bdb6564c7e8f13e0b59b6ef44984 Mon Sep 17 00:00:00 2001 From: Sai Vishnu M Date: Mon, 2 Jun 2025 22:19:24 +0530 Subject: [PATCH 0313/2411] rust: io: avoid mentioning private fields in `IoMem` Removed reference to internal variables in the comment of `IoMem` This avoids using private variable names in public documentation. Suggested-by: Miguel Ojeda Link: https://github.com/Rust-for-Linux/linux/issues/1167 Signed-off-by: Sai Vishnu M Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250602164923.48893-2-saivishnu725@gmail.com [ Reworded title and adjusted tags. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/io.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs index c08de4121637..bd4d720be165 100644 --- a/rust/kernel/io.rs +++ b/rust/kernel/io.rs @@ -43,7 +43,7 @@ pub fn maxsize(&self) -> usize { } } -/// IO-mapped memory, starting at the base address @addr and spanning @maxlen bytes. +/// IO-mapped memory region. /// /// The creator (usually a subsystem / bus such as PCI) is responsible for creating the /// mapping, performing an additional region request etc. From 694174f94ebeeb5ec5cc0e9de9b40c82057e1d95 Mon Sep 17 00:00:00 2001 From: "Yann E. MORIN" Date: Thu, 14 Nov 2013 00:53:32 +0100 Subject: [PATCH 0314/2411] kconfig: lxdialog: fix 'space' to (de)select options In case a menu has comment without letters/numbers (eg. characters matching the regexp '^[^[:alpha:][:digit:]]+$', for example - or *), hitting space will cycle through those comments, rather than selecting/deselecting the currently-highlighted option. This is the behaviour of hitting any letter/digit: jump to the next option which prompt starts with that letter. The only letters that do not behave as such are 'y' 'm' and 'n'. Prompts that start with one of those three letters are instead matched on the first letter that is not 'y', 'm' or 'n'. Fix that by treating 'space' as we treat y/m/n, ie. as an action key, not as shortcut to jump to prompt. Signed-off-by: Yann E. MORIN Signed-off-by: Peter Korsgaard Signed-off-by: Cherniaev Andrei [masahiro: took from Buildroot, adjusted the commit subject] Signed-off-by: Masahiro Yamada --- scripts/kconfig/lxdialog/menubox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/lxdialog/menubox.c b/scripts/kconfig/lxdialog/menubox.c index 6e6244df0c56..d4c19b7beebb 100644 --- a/scripts/kconfig/lxdialog/menubox.c +++ b/scripts/kconfig/lxdialog/menubox.c @@ -264,7 +264,7 @@ int dialog_menu(const char *title, const char *prompt, if (key < 256 && isalpha(key)) key = tolower(key); - if (strchr("ynmh", key)) + if (strchr("ynmh ", key)) i = max_choice; else { for (i = choice + 1; i < max_choice; i++) { From 626c54af35764b0b8a4ed5c446458ba6ddfe9cc8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 9 Jun 2025 01:59:55 +0900 Subject: [PATCH 0315/2411] kheaders: rebuild kheaders_data.tar.xz when a file is modified within a minute When a header file is changed, kernel/gen_kheaders.sh may fail to update kernel/kheaders_data.tar.xz. [steps to reproduce] [1] Build kernel/kheaders_data.tar.xz $ make -j$(nproc) kernel/kheaders.o DESCEND objtool INSTALL libsubcmd_headers CALL scripts/checksyscalls.sh CHK kernel/kheaders_data.tar.xz GEN kernel/kheaders_data.tar.xz CC kernel/kheaders.o [2] Modify a header without changing the file size $ sed -i s/0xdeadbeef/0xfeedbeef/ include/linux/elfnote.h [3] Rebuild kernel/kheaders_data.tar.xz $ make -j$(nproc) kernel/kheaders.o DESCEND objtool INSTALL libsubcmd_headers CALL scripts/checksyscalls.sh CHK kernel/kheaders_data.tar.xz kernel/kheaders_data.tar.xz is not updated if steps [1] - [3] are run within the same minute. The headers_md5 variable stores the MD5 hash of the 'ls -l' output for all header files. This hash value is used to determine whether kheaders_data.tar.xz needs to be rebuilt. However, 'ls -l' prints the modification times with minute-level granularity. If a file is modified within the same minute and its size remains the same, the MD5 hash does not change. To reliably detect file modifications, this commit rewrites kernel/gen_kheaders.sh to output header dependencies to kernel/.kheaders_data.tar.xz.cmd. Then, Make compares the timestamps and reruns kernel/gen_kheaders.sh when necessary. This is the standard mechanism used by Make and Kbuild. Signed-off-by: Masahiro Yamada --- kernel/.gitignore | 2 + kernel/Makefile | 47 +++++++++++++++++++--- kernel/gen_kheaders.sh | 88 +++++++++--------------------------------- 3 files changed, 63 insertions(+), 74 deletions(-) diff --git a/kernel/.gitignore b/kernel/.gitignore index c6b299a6b786..a501bfc80694 100644 --- a/kernel/.gitignore +++ b/kernel/.gitignore @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only /config_data /kheaders.md5 +/kheaders-objlist +/kheaders-srclist diff --git a/kernel/Makefile b/kernel/Makefile index 32e80dd626af..9a9ff405ea89 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -158,11 +158,48 @@ filechk_cat = cat $< $(obj)/config_data: $(KCONFIG_CONFIG) FORCE $(call filechk,cat) +# kheaders_data.tar.xz $(obj)/kheaders.o: $(obj)/kheaders_data.tar.xz -quiet_cmd_genikh = CHK $(obj)/kheaders_data.tar.xz - cmd_genikh = $(CONFIG_SHELL) $(srctree)/kernel/gen_kheaders.sh $@ -$(obj)/kheaders_data.tar.xz: FORCE - $(call cmd,genikh) +quiet_cmd_kheaders_data = GEN $@ + cmd_kheaders_data = "$<" "$@" "$(obj)/kheaders-srclist" "$(obj)/kheaders-objlist" + cmd_kheaders_data_dep = cat $(depfile) >> $(dot-target).cmd; rm -f $(depfile) -clean-files := kheaders_data.tar.xz kheaders.md5 +define rule_kheaders_data + $(call cmd_and_savecmd,kheaders_data) + $(call cmd,kheaders_data_dep) +endef + +targets += kheaders_data.tar.xz +$(obj)/kheaders_data.tar.xz: $(src)/gen_kheaders.sh $(obj)/kheaders-srclist $(obj)/kheaders-objlist $(obj)/kheaders.md5 FORCE + $(call if_changed_rule,kheaders_data) + +# generated headers in objtree +# +# include/generated/utsversion.h is ignored because it is generated +# after gen_kheaders.sh is executed. (utsversion.h is unneeded for kheaders) +filechk_kheaders_objlist = \ + for d in include "arch/$(SRCARCH)/include"; do \ + find "$${d}/generated" ! -path "include/generated/utsversion.h" -a -name "*.h" -print; \ + done + +$(obj)/kheaders-objlist: FORCE + $(call filechk,kheaders_objlist) + +# non-generated headers in srctree +filechk_kheaders_srclist = \ + for d in include "arch/$(SRCARCH)/include"; do \ + find "$(srctree)/$${d}" -path "$(srctree)/$${d}/generated" -prune -o -name "*.h" -print; \ + done + +$(obj)/kheaders-srclist: FORCE + $(call filechk,kheaders_srclist) + +# Some files are symlinks. If symlinks are changed, kheaders_data.tar.xz should +# be rebuilt. +filechk_kheaders_md5sum = xargs -r -a $< stat -c %N | md5sum + +$(obj)/kheaders.md5: $(obj)/kheaders-srclist FORCE + $(call filechk,kheaders_md5sum) + +clean-files := kheaders.md5 kheaders-srclist kheaders-objlist diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index c9e5dc068e85..0ff7beabb21a 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -4,79 +4,33 @@ # This script generates an archive consisting of kernel headers # for CONFIG_IKHEADERS. set -e -sfile="$(readlink -f "$0")" -outdir="$(pwd)" tarfile=$1 -tmpdir=$outdir/${tarfile%/*}/.tmp_dir +srclist=$2 +objlist=$3 -dir_list=" -include/ -arch/$SRCARCH/include/ -" +dir=$(dirname "${tarfile}") +tmpdir=${dir}/.tmp_dir +depfile=${dir}/.$(basename "${tarfile}").d -# Support incremental builds by skipping archive generation -# if timestamps of files being archived are not changed. +# generate dependency list. +{ + echo + echo "deps_${tarfile} := \\" + sed 's:\(.*\): \1 \\:' "${srclist}" + sed -n '/^include\/generated\/autoconf\.h$/!s:\(.*\): \1 \\:p' "${objlist}" + echo + echo "${tarfile}: \$(deps_${tarfile})" + echo + echo "\$(deps_${tarfile}):" -# This block is useful for debugging the incremental builds. -# Uncomment it for debugging. -# if [ ! -f /tmp/iter ]; then iter=1; echo 1 > /tmp/iter; -# else iter=$(($(cat /tmp/iter) + 1)); echo $iter > /tmp/iter; fi -# find $all_dirs -name "*.h" | xargs ls -l > /tmp/ls-$iter - -all_dirs= -if [ "$building_out_of_srctree" ]; then - for d in $dir_list; do - all_dirs="$all_dirs $srctree/$d" - done -fi -all_dirs="$all_dirs $dir_list" - -# include/generated/utsversion.h is ignored because it is generated after this -# script is executed. (utsversion.h is unneeded for kheaders) -# -# When Kconfig regenerates include/generated/autoconf.h, its timestamp is -# updated, but the contents might be still the same. When any CONFIG option is -# changed, Kconfig touches the corresponding timestamp file include/config/*. -# Hence, the md5sum detects the configuration change anyway. We do not need to -# check include/generated/autoconf.h explicitly. -# -# Ignore them for md5 calculation to avoid pointless regeneration. -headers_md5="$(find $all_dirs -name "*.h" -a \ - ! -path include/generated/utsversion.h -a \ - ! -path include/generated/autoconf.h | - xargs ls -l | md5sum | cut -d ' ' -f1)" - -# Any changes to this script will also cause a rebuild of the archive. -this_file_md5="$(ls -l $sfile | md5sum | cut -d ' ' -f1)" -if [ -f $tarfile ]; then tarfile_md5="$(md5sum $tarfile | cut -d ' ' -f1)"; fi -if [ -f kernel/kheaders.md5 ] && - [ "$(head -n 1 kernel/kheaders.md5)" = "$headers_md5" ] && - [ "$(head -n 2 kernel/kheaders.md5 | tail -n 1)" = "$this_file_md5" ] && - [ "$(tail -n 1 kernel/kheaders.md5)" = "$tarfile_md5" ]; then - exit -fi - -echo " GEN $tarfile" +} > "${depfile}" rm -rf "${tmpdir}" mkdir "${tmpdir}" -if [ "$building_out_of_srctree" ]; then - ( - cd $srctree - for f in $dir_list - do find "$f" -name "*.h"; - done | tar -c -f - -T - | tar -xf - -C "${tmpdir}" - ) -fi - -for f in $dir_list; - do find "$f" -name "*.h"; -done | tar -c -f - -T - | tar -xf - -C "${tmpdir}" - -# Always exclude include/generated/utsversion.h -# Otherwise, the contents of the tarball may vary depending on the build steps. -rm -f "${tmpdir}/include/generated/utsversion.h" +# shellcheck disable=SC2154 # srctree is passed as an env variable +sed "s:^${srctree}/::" "${srclist}" | tar -c -f - -C "${srctree}" -T - | tar -xf - -C "${tmpdir}" +tar -c -f - -T "${objlist}" | tar -xf - -C "${tmpdir}" # Remove comments except SDPX lines # Use a temporary file to store directory contents to prevent find/xargs from @@ -92,8 +46,4 @@ tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \ --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \ -I $XZ -cf $tarfile -C "${tmpdir}/" . > /dev/null -echo $headers_md5 > kernel/kheaders.md5 -echo "$this_file_md5" >> kernel/kheaders.md5 -echo "$(md5sum $tarfile | cut -d ' ' -f1)" >> kernel/kheaders.md5 - rm -rf "${tmpdir}" From 1a0faff2833b59a74c8389bcdc390af99dc9d2cf Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 9 Jun 2025 01:59:56 +0900 Subject: [PATCH 0316/2411] kheaders: rebuild kheaders_data.tar.xz when KBUILD_BUILD_TIMESTAMP is changed This problem is similar to commit 7f8256ae0efb ("initramfs: Encode dependency on KBUILD_BUILD_TIMESTAMP"): kernel/gen_kheaders.sh has an internal dependency on KBUILD_BUILD_TIMESTAMP that is not exposed to make, so changing KBUILD_BUILD_TIMESTAMP will not trigger a rebuild of the archive. Move $(KBUILD_BUILD_TIMESTAMP) to the Makefile so that is is recorded in the *.cmd file. Signed-off-by: Masahiro Yamada --- kernel/Makefile | 2 +- kernel/gen_kheaders.sh | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/Makefile b/kernel/Makefile index 9a9ff405ea89..c486f17e669a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -162,7 +162,7 @@ $(obj)/config_data: $(KCONFIG_CONFIG) FORCE $(obj)/kheaders.o: $(obj)/kheaders_data.tar.xz quiet_cmd_kheaders_data = GEN $@ - cmd_kheaders_data = "$<" "$@" "$(obj)/kheaders-srclist" "$(obj)/kheaders-objlist" + cmd_kheaders_data = "$<" "$@" "$(obj)/kheaders-srclist" "$(obj)/kheaders-objlist" "$(KBUILD_BUILD_TIMESTAMP)" cmd_kheaders_data_dep = cat $(depfile) >> $(dot-target).cmd; rm -f $(depfile) define rule_kheaders_data diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 0ff7beabb21a..919bdcf989f4 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -7,6 +7,7 @@ set -e tarfile=$1 srclist=$2 objlist=$3 +timestamp=$4 dir=$(dirname "${tarfile}") tmpdir=${dir}/.tmp_dir @@ -42,7 +43,7 @@ xargs -0 -P8 -n1 \ rm -f "${tmpdir}.contents.txt" # Create archive and try to normalize metadata for reproducibility. -tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \ +tar "${timestamp:+--mtime=$timestamp}" \ --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \ -I $XZ -cf $tarfile -C "${tmpdir}/" . > /dev/null From f4363dfc900a7ffda96587d38982a1f3ea3d10bd Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 9 Jun 2025 01:59:57 +0900 Subject: [PATCH 0317/2411] kheaders: double-quote variables to satisfy shellcheck Fix the following: In kernel/gen_kheaders.sh line 48: -I $XZ -cf $tarfile -C "${tmpdir}/" . > /dev/null ^-^ SC2086 (info): Double quote to prevent globbing and word splitting. ^------^ SC2086 (info): Double quote to prevent globbing and word splitting. Signed-off-by: Masahiro Yamada --- kernel/gen_kheaders.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 919bdcf989f4..c64e5a00a3d9 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -45,6 +45,6 @@ rm -f "${tmpdir}.contents.txt" # Create archive and try to normalize metadata for reproducibility. tar "${timestamp:+--mtime=$timestamp}" \ --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \ - -I $XZ -cf $tarfile -C "${tmpdir}/" . > /dev/null + -I "${XZ}" -cf "${tarfile}" -C "${tmpdir}/" . > /dev/null rm -rf "${tmpdir}" From 7934a8dd8692b56714ce9b36421e316445d94a77 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 6 Jun 2025 13:10:23 +0900 Subject: [PATCH 0318/2411] module: remove meaningless 'name' parameter from __MODULE_INFO() The symbol names in the .modinfo section are never used and already randomized by the __UNIQUE_ID() macro. Therefore, the second parameter of __MODULE_INFO() is meaningless and can be removed to simplify the code. With this change, the symbol names in the .modinfo section will be prefixed with __UNIQUE_ID_modinfo, making it clearer that they originate from MODULE_INFO(). [Before] $ objcopy -j .modinfo vmlinux.o modinfo.o $ nm -n modinfo.o | head -n10 0000000000000000 r __UNIQUE_ID_license560 0000000000000011 r __UNIQUE_ID_file559 0000000000000030 r __UNIQUE_ID_description558 0000000000000074 r __UNIQUE_ID_license580 000000000000008e r __UNIQUE_ID_file579 00000000000000bd r __UNIQUE_ID_description578 00000000000000e6 r __UNIQUE_ID_license581 00000000000000ff r __UNIQUE_ID_file580 0000000000000134 r __UNIQUE_ID_description579 0000000000000179 r __UNIQUE_ID_uncore_no_discover578 [After] $ objcopy -j .modinfo vmlinux.o modinfo.o $ nm -n modinfo.o | head -n10 0000000000000000 r __UNIQUE_ID_modinfo560 0000000000000011 r __UNIQUE_ID_modinfo559 0000000000000030 r __UNIQUE_ID_modinfo558 0000000000000074 r __UNIQUE_ID_modinfo580 000000000000008e r __UNIQUE_ID_modinfo579 00000000000000bd r __UNIQUE_ID_modinfo578 00000000000000e6 r __UNIQUE_ID_modinfo581 00000000000000ff r __UNIQUE_ID_modinfo580 0000000000000134 r __UNIQUE_ID_modinfo579 0000000000000179 r __UNIQUE_ID_modinfo578 Signed-off-by: Masahiro Yamada Reviewed-by: Petr Pavlu --- include/crypto/algapi.h | 4 ++-- include/linux/module.h | 3 --- include/linux/moduleparam.h | 9 +++++---- include/net/tcp.h | 4 ++-- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 188eface0a11..fc4574940636 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -43,8 +43,8 @@ * alias. */ #define MODULE_ALIAS_CRYPTO(name) \ - __MODULE_INFO(alias, alias_userspace, name); \ - __MODULE_INFO(alias, alias_crypto, "crypto-" name) + MODULE_INFO(alias, name); \ + MODULE_INFO(alias, "crypto-" name) struct crypto_aead; struct crypto_instance; diff --git a/include/linux/module.h b/include/linux/module.h index 92e1420fccdf..81b41cc6a19e 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -164,9 +164,6 @@ extern void cleanup_module(void); struct module_kobject *lookup_or_create_module_kobject(const char *name); -/* Generic info of form tag = "info" */ -#define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) - /* For userspace: you can also call me... */ #define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias) diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index bfb85fd13e1f..00166f747e27 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -20,18 +20,19 @@ /* Chosen so that structs with an unsigned long line up. */ #define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long)) -#define __MODULE_INFO(tag, name, info) \ - static const char __UNIQUE_ID(name)[] \ +/* Generic info of form tag = "info" */ +#define MODULE_INFO(tag, info) \ + static const char __UNIQUE_ID(modinfo)[] \ __used __section(".modinfo") __aligned(1) \ = __MODULE_INFO_PREFIX __stringify(tag) "=" info #define __MODULE_PARM_TYPE(name, _type) \ - __MODULE_INFO(parmtype, name##type, #name ":" _type) + MODULE_INFO(parmtype, #name ":" _type) /* One for each parameter, describing how to use it. Some files do multiple of these per line, so can't just use MODULE_INFO. */ #define MODULE_PARM_DESC(_parm, desc) \ - __MODULE_INFO(parm, _parm, #_parm ":" desc) + MODULE_INFO(parm, #_parm ":" desc) struct kernel_param; diff --git a/include/net/tcp.h b/include/net/tcp.h index 5078ad868fee..9b39ef630c92 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2662,8 +2662,8 @@ void tcp_update_ulp(struct sock *sk, struct proto *p, void (*write_space)(struct sock *sk)); #define MODULE_ALIAS_TCP_ULP(name) \ - __MODULE_INFO(alias, alias_userspace, name); \ - __MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name) + MODULE_INFO(alias, name); \ + MODULE_INFO(alias, "tcp-ulp-" name) #ifdef CONFIG_NET_SOCK_MSG struct sk_msg; From ced9ccd21fbc8ca941e6a0c2820c2df89239ccb9 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Jun 2025 22:28:20 +0900 Subject: [PATCH 0319/2411] rust: time: Replace HrTimerMode enum with trait-based mode types Replace the `HrTimerMode` enum with a trait-based approach that uses zero-sized types to represent each mode of operation. Each mode now implements the `HrTimerMode` trait. This refactoring is a preparation for replacing raw `Ktime` in HrTimer with the `Instant` and `Delta` types, and for making `HrTimer` generic over a `ClockSource`. Reviewed-by: Andreas Hindborg Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250610132823.3457263-3-fujita.tomonori@gmail.com Signed-off-by: Andreas Hindborg --- rust/kernel/time/hrtimer.rs | 164 ++++++++++++++++++++---------------- 1 file changed, 90 insertions(+), 74 deletions(-) diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index 20b87a4d65ae..b6322f4b860f 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -98,7 +98,7 @@ pub fn to_ns(self) -> i64 { pub struct HrTimer { #[pin] timer: Opaque, - mode: HrTimerMode, + mode: bindings::hrtimer_mode, _t: PhantomData, } @@ -112,7 +112,7 @@ unsafe impl Sync for HrTimer {} impl HrTimer { /// Return an initializer for a new timer instance. - pub fn new(mode: HrTimerMode) -> impl PinInit + pub fn new() -> impl PinInit where T: HrTimerCallback, { @@ -127,11 +127,11 @@ pub fn new(mode: HrTimerMode) -> impl PinInit place, Some(T::Pointer::run), U::ID, - mode.into_c(), + M::C_MODE, ); } }), - mode: mode, + mode: M::C_MODE, _t: PhantomData, }) } @@ -389,7 +389,7 @@ unsafe fn start(this: *const Self, expires: Ktime) { Self::c_timer_ptr(this).cast_mut(), expires.to_ns(), 0, - (*Self::raw_get_timer(this)).mode.into_c(), + (*Self::raw_get_timer(this)).mode, ); } } @@ -412,77 +412,93 @@ fn into_c(self) -> bindings::hrtimer_restart { } /// Operational mode of [`HrTimer`]. -// NOTE: Some of these have the same encoding on the C side, so we keep -// `repr(Rust)` and convert elsewhere. -#[derive(Clone, Copy, PartialEq, Eq, Debug)] -pub enum HrTimerMode { - /// Timer expires at the given expiration time. - Absolute, - /// Timer expires after the given expiration time interpreted as a duration from now. - Relative, - /// Timer does not move between CPU cores. - Pinned, - /// Timer handler is executed in soft irq context. - Soft, - /// Timer handler is executed in hard irq context. - Hard, - /// Timer expires at the given expiration time. - /// Timer does not move between CPU cores. - AbsolutePinned, - /// Timer expires after the given expiration time interpreted as a duration from now. - /// Timer does not move between CPU cores. - RelativePinned, - /// Timer expires at the given expiration time. - /// Timer handler is executed in soft irq context. - AbsoluteSoft, - /// Timer expires after the given expiration time interpreted as a duration from now. - /// Timer handler is executed in soft irq context. - RelativeSoft, - /// Timer expires at the given expiration time. - /// Timer does not move between CPU cores. - /// Timer handler is executed in soft irq context. - AbsolutePinnedSoft, - /// Timer expires after the given expiration time interpreted as a duration from now. - /// Timer does not move between CPU cores. - /// Timer handler is executed in soft irq context. - RelativePinnedSoft, - /// Timer expires at the given expiration time. - /// Timer handler is executed in hard irq context. - AbsoluteHard, - /// Timer expires after the given expiration time interpreted as a duration from now. - /// Timer handler is executed in hard irq context. - RelativeHard, - /// Timer expires at the given expiration time. - /// Timer does not move between CPU cores. - /// Timer handler is executed in hard irq context. - AbsolutePinnedHard, - /// Timer expires after the given expiration time interpreted as a duration from now. - /// Timer does not move between CPU cores. - /// Timer handler is executed in hard irq context. - RelativePinnedHard, +pub trait HrTimerMode { + /// The C representation of hrtimer mode. + const C_MODE: bindings::hrtimer_mode; } -impl HrTimerMode { - fn into_c(self) -> bindings::hrtimer_mode { - use bindings::*; - match self { - HrTimerMode::Absolute => hrtimer_mode_HRTIMER_MODE_ABS, - HrTimerMode::Relative => hrtimer_mode_HRTIMER_MODE_REL, - HrTimerMode::Pinned => hrtimer_mode_HRTIMER_MODE_PINNED, - HrTimerMode::Soft => hrtimer_mode_HRTIMER_MODE_SOFT, - HrTimerMode::Hard => hrtimer_mode_HRTIMER_MODE_HARD, - HrTimerMode::AbsolutePinned => hrtimer_mode_HRTIMER_MODE_ABS_PINNED, - HrTimerMode::RelativePinned => hrtimer_mode_HRTIMER_MODE_REL_PINNED, - HrTimerMode::AbsoluteSoft => hrtimer_mode_HRTIMER_MODE_ABS_SOFT, - HrTimerMode::RelativeSoft => hrtimer_mode_HRTIMER_MODE_REL_SOFT, - HrTimerMode::AbsolutePinnedSoft => hrtimer_mode_HRTIMER_MODE_ABS_PINNED_SOFT, - HrTimerMode::RelativePinnedSoft => hrtimer_mode_HRTIMER_MODE_REL_PINNED_SOFT, - HrTimerMode::AbsoluteHard => hrtimer_mode_HRTIMER_MODE_ABS_HARD, - HrTimerMode::RelativeHard => hrtimer_mode_HRTIMER_MODE_REL_HARD, - HrTimerMode::AbsolutePinnedHard => hrtimer_mode_HRTIMER_MODE_ABS_PINNED_HARD, - HrTimerMode::RelativePinnedHard => hrtimer_mode_HRTIMER_MODE_REL_PINNED_HARD, - } - } +/// Timer that expires at a fixed point in time. +pub struct AbsoluteMode; + +impl HrTimerMode for AbsoluteMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS; +} + +/// Timer that expires after a delay from now. +pub struct RelativeMode; + +impl HrTimerMode for RelativeMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL; +} + +/// Timer with absolute expiration time, pinned to its current CPU. +pub struct AbsolutePinnedMode; + +impl HrTimerMode for AbsolutePinnedMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_PINNED; +} + +/// Timer with relative expiration time, pinned to its current CPU. +pub struct RelativePinnedMode; + +impl HrTimerMode for RelativePinnedMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_PINNED; +} + +/// Timer with absolute expiration, handled in soft irq context. +pub struct AbsoluteSoftMode; + +impl HrTimerMode for AbsoluteSoftMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_SOFT; +} + +/// Timer with relative expiration, handled in soft irq context. +pub struct RelativeSoftMode; + +impl HrTimerMode for RelativeSoftMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_SOFT; +} + +/// Timer with absolute expiration, pinned to CPU and handled in soft irq context. +pub struct AbsolutePinnedSoftMode; + +impl HrTimerMode for AbsolutePinnedSoftMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_PINNED_SOFT; +} + +/// Timer with relative expiration, pinned to CPU and handled in soft irq context. +pub struct RelativePinnedSoftMode; + +impl HrTimerMode for RelativePinnedSoftMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_PINNED_SOFT; +} + +/// Timer with absolute expiration, handled in hard irq context. +pub struct AbsoluteHardMode; + +impl HrTimerMode for AbsoluteHardMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_HARD; +} + +/// Timer with relative expiration, handled in hard irq context. +pub struct RelativeHardMode; + +impl HrTimerMode for RelativeHardMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_HARD; +} + +/// Timer with absolute expiration, pinned to CPU and handled in hard irq context. +pub struct AbsolutePinnedHardMode; + +impl HrTimerMode for AbsolutePinnedHardMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_PINNED_HARD; +} + +/// Timer with relative expiration, pinned to CPU and handled in hard irq context. +pub struct RelativePinnedHardMode; + +impl HrTimerMode for RelativePinnedHardMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_PINNED_HARD; } /// Use to implement the [`HasHrTimer`] trait. From a02fd05661d73a8507dd70dd820e9b984490c545 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 24 Jun 2025 14:29:27 +0800 Subject: [PATCH 0320/2411] PCI: Extend isolated function probing to LoongArch Like s390 and the jailhouse hypervisor, LoongArch's PCI architecture allows passing isolated PCI functions to a guest OS instance. So it is possible that there is a multi-function device without function 0 for the host or guest. Allow probing such functions by adding a IS_ENABLED(CONFIG_LOONGARCH) case in the hypervisor_isolated_pci_functions() helper. This is similar to commit 189c6c33ff42 ("PCI: Extend isolated function probing to s390"). Signed-off-by: Huacai Chen Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250624062927.4037734-1-chenhuacai@loongson.cn --- include/linux/hypervisor.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/hypervisor.h b/include/linux/hypervisor.h index 9efbc54e35e5..be5417303ecf 100644 --- a/include/linux/hypervisor.h +++ b/include/linux/hypervisor.h @@ -37,6 +37,9 @@ static inline bool hypervisor_isolated_pci_functions(void) if (IS_ENABLED(CONFIG_S390)) return true; + if (IS_ENABLED(CONFIG_LOONGARCH)) + return true; + return jailhouse_paravirt(); } From 3317dc9ebda6d585a4e74a8d4a74d0d2dc6b14c6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 23 Jun 2025 09:19:29 -0700 Subject: [PATCH 0321/2411] perf srcline: Lower verbosity on addr2line debug messages Lower non-error debug messages to verbose 3 or larger. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250623161930.1421216-2-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/srcline.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index f32d0d4f4bc9..3e3449e35dd4 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -524,12 +524,12 @@ static enum a2l_style addr2line_configure(struct child_process *a2l, const char style = LLVM; cached = true; lines = 1; - pr_debug("Detected LLVM addr2line style\n"); + pr_debug3("Detected LLVM addr2line style\n"); } else if (ch == '0') { style = GNU_BINUTILS; cached = true; lines = 3; - pr_debug("Detected binutils addr2line style\n"); + pr_debug3("Detected binutils addr2line style\n"); } else { if (!symbol_conf.disable_add2line_warn) { char *output = NULL; @@ -595,7 +595,7 @@ static int read_addr2line_record(struct io *io, if (io__getline(io, &line, &line_len) < 0 || !line_len) goto error; - pr_debug("%s %s: addr2line read address for sentinel: %s", __func__, dso_name, line); + pr_debug3("%s %s: addr2line read address for sentinel: %s", __func__, dso_name, line); if (style == LLVM && line_len == 2 && line[0] == ',') { /* Found the llvm-addr2line sentinel character. */ zfree(&line); @@ -641,7 +641,7 @@ static int read_addr2line_record(struct io *io, if (first && (io__getline(io, &line, &line_len) < 0 || !line_len)) goto error; - pr_debug("%s %s: addr2line read line: %s", __func__, dso_name, line); + pr_debug3("%s %s: addr2line read line: %s", __func__, dso_name, line); if (function != NULL) *function = strdup(strim(line)); @@ -652,7 +652,7 @@ static int read_addr2line_record(struct io *io, if (io__getline(io, &line, &line_len) < 0 || !line_len) goto error; - pr_debug("%s %s: addr2line filename:number : %s", __func__, dso_name, line); + pr_debug3("%s %s: addr2line filename:number : %s", __func__, dso_name, line); if (filename_split(line, line_nr == NULL ? &dummy_line_nr : line_nr) == 0 && style == GNU_BINUTILS) { ret = 0; From c335a4e927537996b425025586d5d8db2763124c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 20 Jun 2025 18:24:33 -0300 Subject: [PATCH 0322/2411] perf build: Suggest java-latest-openjdk-devel instead of old 1.8.0 one Just tidying up the suggestion to pick the latest and not some specific version. Signed-off-by: Arnaldo Carvalho de Melo Reviewed-by: James Clark Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250620212435.93846-2-acme@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index affe5e173920..342402a24325 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -1152,7 +1152,7 @@ ifndef NO_JVMTI endif endif # NO_JVMTI_CMLR else - $(warning No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel) + $(warning No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-latest-openjdk-devel) NO_JVMTI := 1 endif endif From 7c750d399b60e17f2a346690e0d34aae9c086eac Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 20 Jun 2025 18:24:34 -0300 Subject: [PATCH 0323/2411] perf build: Add the libpfm devel fedora package name to the hint Just to follow the pattern with other devel packages. Signed-off-by: Arnaldo Carvalho de Melo Reviewed-by: James Clark Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250620212435.93846-3-acme@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 342402a24325..2672c249eadf 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -1165,7 +1165,7 @@ ifndef NO_LIBPFM4 ASCIIDOC_EXTRA = -aHAVE_LIBPFM=1 $(call detected,CONFIG_LIBPFM4) else - $(warning libpfm4 not found, disables libpfm4 support. Please install libpfm4-dev) + $(warning libpfm4 not found, disables libpfm4 support. Please install libpfm-devel or libpfm4-dev) endif endif From 970ae86307718c347aff8fe4bf6e780bcce26c58 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 20 Jun 2025 18:24:35 -0300 Subject: [PATCH 0324/2411] perf build: The bfd features are opt-in, stop testing for them by default These are leftovers noticed while updating a build container. We don't need those so that test-all.c can build and thus speed up the feature detection. Test for those features only if the user asks for BUILD_NONDISTRO=1 to build with libbfd. Signed-off-by: Arnaldo Carvalho de Melo Reviewed-by: James Clark Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250620212435.93846-4-acme@kernel.org Signed-off-by: Namhyung Kim --- tools/build/feature/test-all.c | 19 ------------------- tools/perf/Makefile.config | 5 +++++ 2 files changed, 5 insertions(+), 19 deletions(-) diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 03ddaac6f4c4..1010f233d9c1 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -66,14 +66,6 @@ # include "test-libslang.c" #undef main -#define main main_test_libbfd -# include "test-libbfd.c" -#undef main - -#define main main_test_libbfd_buildid -# include "test-libbfd-buildid.c" -#undef main - #define main main_test_backtrace # include "test-backtrace.c" #undef main @@ -158,14 +150,6 @@ # include "test-reallocarray.c" #undef main -#define main main_test_disassembler_four_args -# include "test-disassembler-four-args.c" -#undef main - -#define main main_test_disassembler_init_styled -# include "test-disassembler-init-styled.c" -#undef main - #define main main_test_libzstd # include "test-libzstd.c" #undef main @@ -193,8 +177,6 @@ int main(int argc, char *argv[]) main_test_libelf_gelf_getnote(); main_test_libelf_getshdrstrndx(); main_test_libslang(); - main_test_libbfd(); - main_test_libbfd_buildid(); main_test_backtrace(); main_test_libnuma(); main_test_numa_num_possible_cpus(); @@ -213,7 +195,6 @@ int main(int argc, char *argv[]) main_test_setns(); main_test_libaio(); main_test_reallocarray(); - main_test_disassembler_four_args(); main_test_libzstd(); main_test_libtraceevent(); main_test_libtracefs(); diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 2672c249eadf..24736b0bbb30 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -926,6 +926,8 @@ ifneq ($(NO_JEVENTS),1) endif ifdef BUILD_NONDISTRO + $(call feature_check,libbfd) + ifeq ($(feature-libbfd), 1) EXTLIBS += -lbfd -lopcodes else @@ -954,6 +956,9 @@ ifdef BUILD_NONDISTRO CFLAGS += -DHAVE_LIBBFD_SUPPORT CXXFLAGS += -DHAVE_LIBBFD_SUPPORT + + $(call feature_check,libbfd-buildid) + ifeq ($(feature-libbfd-buildid), 1) CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT else From c21986d33d6beb269a35b38dcb8adaa5bd228527 Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Mon, 5 May 2025 18:44:19 +0100 Subject: [PATCH 0325/2411] perf unwind-libdw: skip non-regular files Without the change `perf `hangs up on charaster devices. On my system it's enough to run system-wide sampler for a few seconds to get the hangup: $ perf record -a -g --call-graph=dwarf $ perf report # hung `strace` shows that hangup happens on reading on a character device `/dev/dri/renderD128` $ strace -y -f -p 2780484 strace: Process 2780484 attached pread64(101, strace: Process 2780484 detached It's call trace descends into `elfutils`: $ gdb -p 2780484 (gdb) bt #0 0x00007f5e508f04b7 in __libc_pread64 (fd=101, buf=0x7fff9df7edb0, count=0, offset=0) at ../sysdeps/unix/sysv/linux/pread64.c:25 #1 0x00007f5e52b79515 in read_file () from /<>/elfutils-0.192/lib/libelf.so.1 #2 0x00007f5e52b25666 in libdw_open_elf () from /<>/elfutils-0.192/lib/libdw.so.1 #3 0x00007f5e52b25907 in __libdw_open_file () from /<>/elfutils-0.192/lib/libdw.so.1 #4 0x00007f5e52b120a9 in dwfl_report_elf@@ELFUTILS_0.156 () from /<>/elfutils-0.192/lib/libdw.so.1 #5 0x000000000068bf20 in __report_module (al=al@entry=0x7fff9df80010, ip=ip@entry=139803237033216, ui=ui@entry=0x5369b5e0) at util/dso.h:537 #6 0x000000000068c3d1 in report_module (ip=139803237033216, ui=0x5369b5e0) at util/unwind-libdw.c:114 #7 frame_callback (state=0x535aef10, arg=0x5369b5e0) at util/unwind-libdw.c:242 #8 0x00007f5e52b261d3 in dwfl_thread_getframes () from /<>/elfutils-0.192/lib/libdw.so.1 #9 0x00007f5e52b25bdb in get_one_thread_cb () from /<>/elfutils-0.192/lib/libdw.so.1 #10 0x00007f5e52b25faa in dwfl_getthreads () from /<>/elfutils-0.192/lib/libdw.so.1 #11 0x00007f5e52b26514 in dwfl_getthread_frames () from /<>/elfutils-0.192/lib/libdw.so.1 #12 0x000000000068c6ce in unwind__get_entries (cb=cb@entry=0x5d4620 , arg=arg@entry=0x10cd5fa0, thread=thread@entry=0x1076a290, data=data@entry=0x7fff9df80540, max_stack=max_stack@entry=127, best_effort=best_effort@entry=false) at util/thread.h:152 #13 0x00000000005dae95 in thread__resolve_callchain_unwind (evsel=0x106006d0, thread=0x1076a290, cursor=0x10cd5fa0, sample=0x7fff9df80540, max_stack=127, symbols=true) at util/machine.c:2939 #14 thread__resolve_callchain_unwind (thread=0x1076a290, cursor=0x10cd5fa0, evsel=0x106006d0, sample=0x7fff9df80540, max_stack=127, symbols=true) at util/machine.c:2920 #15 __thread__resolve_callchain (thread=0x1076a290, cursor=0x10cd5fa0, evsel=0x106006d0, evsel@entry=0x7fff9df80440, sample=0x7fff9df80540, parent=parent@entry=0x7fff9df804a0, root_al=root_al@entry=0x7fff9df80440, max_stack=127, symbols=true) at util/machine.c:2970 #16 0x00000000005d0cb2 in thread__resolve_callchain (thread=, cursor=, evsel=0x7fff9df80440, sample=, parent=0x7fff9df804a0, root_al=0x7fff9df80440, max_stack=127) at util/machine.h:198 #17 sample__resolve_callchain (sample=, cursor=, parent=parent@entry=0x7fff9df804a0, evsel=evsel@entry=0x106006d0, al=al@entry=0x7fff9df80440, max_stack=max_stack@entry=127) at util/callchain.c:1127 #18 0x0000000000617e08 in hist_entry_iter__add (iter=iter@entry=0x7fff9df80480, al=al@entry=0x7fff9df80440, max_stack_depth=127, arg=arg@entry=0x7fff9df81ae0) at util/hist.c:1255 #19 0x000000000045d2d0 in process_sample_event (tool=0x7fff9df81ae0, event=, sample=0x7fff9df80540, evsel=0x106006d0, machine=) at builtin-report.c:334 #20 0x00000000005e3bb1 in perf_session__deliver_event (session=0x105ff2c0, event=0x7f5c7d735ca0, tool=0x7fff9df81ae0, file_offset=2914716832, file_path=0x105ffbf0 "perf.data") at util/session.c:1367 #21 0x00000000005e8d93 in do_flush (oe=0x105ffa50, show_progress=false) at util/ordered-events.c:245 #22 __ordered_events__flush (oe=0x105ffa50, how=OE_FLUSH__ROUND, timestamp=) at util/ordered-events.c:324 #23 0x00000000005e1f64 in perf_session__process_user_event (session=0x105ff2c0, event=0x7f5c7d752b18, file_offset=2914835224, file_path=0x105ffbf0 "perf.data") at util/session.c:1419 #24 0x00000000005e47c7 in reader__read_event (rd=rd@entry=0x7fff9df81260, session=session@entry=0x105ff2c0, --Type for more, q to quit, c to continue without paging-- quit prog=prog@entry=0x7fff9df81220) at util/session.c:2132 #25 0x00000000005e4b37 in reader__process_events (rd=0x7fff9df81260, session=0x105ff2c0, prog=0x7fff9df81220) at util/session.c:2181 #26 __perf_session__process_events (session=0x105ff2c0) at util/session.c:2226 #27 perf_session__process_events (session=session@entry=0x105ff2c0) at util/session.c:2390 #28 0x0000000000460add in __cmd_report (rep=0x7fff9df81ae0) at builtin-report.c:1076 #29 cmd_report (argc=, argv=) at builtin-report.c:1827 #30 0x00000000004c5a40 in run_builtin (p=p@entry=0xd8f7f8 , argc=argc@entry=1, argv=argv@entry=0x7fff9df844b0) at perf.c:351 #31 0x00000000004c5d63 in handle_internal_command (argc=argc@entry=1, argv=argv@entry=0x7fff9df844b0) at perf.c:404 #32 0x0000000000442de3 in run_argv (argcp=, argv=) at perf.c:448 #33 main (argc=, argv=0x7fff9df844b0) at perf.c:556 The hangup happens because nothing in` perf` or `elfutils` checks if a mapped file is easily readable. The change conservatively skips all non-regular files. Signed-off-by: Sergei Trofimovich Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20250505174419.2814857-1-slyich@gmail.com Signed-off-by: Namhyung Kim --- tools/perf/util/unwind-libdw.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 793d11832694..ae70fb56a057 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -84,8 +84,11 @@ static int __report_module(struct addr_location *al, u64 ip, char filename[PATH_MAX]; __symbol__join_symfs(filename, sizeof(filename), dso__long_name(dso)); - mod = dwfl_report_elf(ui->dwfl, dso__short_name(dso), filename, -1, - base, false); + /* Don't hang up on device files like /dev/dri/renderD128. */ + if (is_regular_file(filename)) { + mod = dwfl_report_elf(ui->dwfl, dso__short_name(dso), filename, -1, + base, false); + } } if (!mod) { char filename[PATH_MAX]; From 317fa41b47da63730145e336c9ef47c62b78ee4f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 2 May 2025 13:40:56 -0700 Subject: [PATCH 0326/2411] perf trace: Show zero value in STRARRAY The STRARRAY macro is to print values in a pre-defined array. But sometimes it hides the value because it's 0. The value of 0 can have a meaning in this case so set 'show_zero' field. For example, it can show CREATE_MAP cmd in the bpf syscall. Acked-by: Howard Chu Link: https://lore.kernel.org/r/20250502204056.973977-1-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/builtin-trace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index bf9b5d0630d3..61650be8fccd 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1124,12 +1124,14 @@ static bool syscall_arg__strtoul_btf_type(char *bf __maybe_unused, size_t size _ #define STRARRAY(name, array) \ { .scnprintf = SCA_STRARRAY, \ .strtoul = STUL_STRARRAY, \ - .parm = &strarray__##array, } + .parm = &strarray__##array, \ + .show_zero = true, } #define STRARRAY_FLAGS(name, array) \ { .scnprintf = SCA_STRARRAY_FLAGS, \ .strtoul = STUL_STRARRAY_FLAGS, \ - .parm = &strarray__##array, } + .parm = &strarray__##array, \ + .show_zero = true, } #include "trace/beauty/eventfd.c" #include "trace/beauty/futex_op.c" From df9c299371054cb725eef730fd0f1d0fe2ed6bb0 Mon Sep 17 00:00:00 2001 From: Tianyou Li Date: Tue, 10 Jun 2025 12:04:22 +0800 Subject: [PATCH 0327/2411] perf script: Handle -i option for perf script flamegraph If specify the perf data file with -i option, the script will try to read the header information regardless of the file name specified, instead it will try to access the perf.data. This simple patch use the file name from -i option for command perf report --header-only to read the header. Signed-off-by: Tianyou Li Reviewed-by: Pan Deng Reviewed-by: Zhiguo Zhou Reviewed-by: Wangyang Guo Reviewed-by: Tim Chen Link: https://lore.kernel.org/r/20250610040536.2390060-1-tianyou.li@intel.com Signed-off-by: Namhyung Kim --- tools/perf/scripts/python/flamegraph.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/flamegraph.py b/tools/perf/scripts/python/flamegraph.py index cf7ce8229a6c..4f82dfea0a70 100755 --- a/tools/perf/scripts/python/flamegraph.py +++ b/tools/perf/scripts/python/flamegraph.py @@ -123,7 +123,13 @@ class FlameGraphCLI: return "" try: - output = subprocess.check_output(["perf", "report", "--header-only"]) + # if the file name other than perf.data is given, + # we read the header of that file + if self.args.input: + output = subprocess.check_output(["perf", "report", "--header-only", "-i", self.args.input]) + else: + output = subprocess.check_output(["perf", "report", "--header-only"]) + return output.decode("utf-8") except Exception as err: # pylint: disable=broad-except print("Error reading report header: {}".format(err), file=sys.stderr) From 9a79c50c2a95887859d5ac133180775b708b850a Mon Sep 17 00:00:00 2001 From: Tianyou Li Date: Tue, 10 Jun 2025 12:04:23 +0800 Subject: [PATCH 0328/2411] perf script: Add -e option to flamegraph script When processing the perf data file generated with multiple events, the flamegraph script will count all the events regardless of different event names. This patch tries to add a -e option to specify the event name that the flamegraph will be generated accordingly. If the -e option omitted, the behavior remains unchanged. Signed-off-by: Tianyou Li Reviewed-by: Pan Deng Reviewed-by: Zhiguo Zhou Reviewed-by: Wangyang Guo Reviewed-by: Tim Chen Link: https://lore.kernel.org/r/20250610040536.2390060-2-tianyou.li@intel.com Signed-off-by: Namhyung Kim --- tools/perf/scripts/python/flamegraph.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/flamegraph.py b/tools/perf/scripts/python/flamegraph.py index 4f82dfea0a70..e49ff242b779 100755 --- a/tools/perf/scripts/python/flamegraph.py +++ b/tools/perf/scripts/python/flamegraph.py @@ -94,6 +94,11 @@ class FlameGraphCLI: return child def process_event(self, event): + # ignore events where the event name does not match + # the one specified by the user + if self.args.event_name and event.get("ev_name") != self.args.event_name: + return + pid = event.get("sample", {}).get("pid", 0) # event["dso"] sometimes contains /usr/lib/debug/lib/modules/*/vmlinux # for user-space processes; let's use pid for kernel or user-space distinction @@ -130,7 +135,10 @@ class FlameGraphCLI: else: output = subprocess.check_output(["perf", "report", "--header-only"]) - return output.decode("utf-8") + result = output.decode("utf-8") + if self.args.event_name: + result += "\nFocused event: " + self.args.event_name + return result except Exception as err: # pylint: disable=broad-except print("Error reading report header: {}".format(err), file=sys.stderr) return "" @@ -241,6 +249,11 @@ if __name__ == "__main__": default=False, action="store_true", help="allow unprompted downloading of HTML template") + parser.add_argument("-e", "--event", + default="", + dest="event_name", + type=str, + help="specify the event to generate flamegraph for") cli_args = parser.parse_args() cli = FlameGraphCLI(cli_args) From eda9e47fae276d2b7a2b6a826b38259e6481d879 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 17 Jun 2025 15:33:55 -0700 Subject: [PATCH 0329/2411] perf trace: Add missed freeing of ordered events and thread Caught by leak sanitizer running "perf trace BTF general tests". Make the ordered_events initialization unconditional and early so that trace__exit cleanup is simple - ordered_events__init doesn't allocate and just sets up 4 values and inits 3 list heads. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250617223356.2752099-3-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-trace.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 61650be8fccd..c38225a89fc8 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -5361,6 +5361,7 @@ static int trace__config(const char *var, const char *value, void *arg) static void trace__exit(struct trace *trace) { + thread__zput(trace->current); strlist__delete(trace->ev_qualifier); zfree(&trace->ev_qualifier_ids.entries); if (trace->syscalls.table) { @@ -5371,6 +5372,7 @@ static void trace__exit(struct trace *trace) zfree(&trace->perfconfig_events); evlist__delete(trace->evlist); trace->evlist = NULL; + ordered_events__free(&trace->oe.data); #ifdef HAVE_LIBBPF_SUPPORT btf__free(trace->btf); trace->btf = NULL; @@ -5520,6 +5522,9 @@ int cmd_trace(int argc, const char **argv) sigchld_act.sa_sigaction = sighandler_chld; sigaction(SIGCHLD, &sigchld_act, NULL); + ordered_events__init(&trace.oe.data, ordered_events__deliver_event, &trace); + ordered_events__set_copy_on_queue(&trace.oe.data, true); + trace.evlist = evlist__new(); if (trace.evlist == NULL) { @@ -5678,11 +5683,6 @@ int cmd_trace(int argc, const char **argv) trace__load_vmlinux_btf(&trace); } - if (trace.sort_events) { - ordered_events__init(&trace.oe.data, ordered_events__deliver_event, &trace); - ordered_events__set_copy_on_queue(&trace.oe.data, true); - } - /* * If we are augmenting syscalls, then combine what we put in the * __augmented_syscalls__ BPF map with what is in the From be59dba332e1e8edd3e88d991ba0e4795ae2bcb2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 17 Jun 2025 15:33:56 -0700 Subject: [PATCH 0330/2411] libperf evsel: Add missed puts and asserts A missed evsel__close before evsel__delete was the source of leaking perf events due to a hybrid test. Add asserts in debug builds so that this shouldn't happen in the future. Add puts missing on the cpu map and thread maps. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250617223356.2752099-4-irogers@google.com Signed-off-by: Namhyung Kim --- tools/lib/perf/evsel.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index c475319e2e41..2a85e0bfee1e 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -42,6 +42,12 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr) void perf_evsel__delete(struct perf_evsel *evsel) { + assert(evsel->fd == NULL); /* If not fds were not closed. */ + assert(evsel->mmap == NULL); /* If not munmap wasn't called. */ + assert(evsel->sample_id == NULL); /* If not free_id wasn't called. */ + perf_cpu_map__put(evsel->cpus); + perf_cpu_map__put(evsel->own_cpus); + perf_thread_map__put(evsel->threads); free(evsel); } From d9fc00dc73542eef98db74085447c57174ca290d Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Jun 2025 22:28:21 +0900 Subject: [PATCH 0331/2411] rust: time: Add HrTimerExpires trait Introduce the `HrTimerExpires` trait to represent types that can be used as expiration values for high-resolution timers. Define a required method, `into_nanos()`, which returns the expiration time as a raw nanosecond value suitable for use with C's hrtimer APIs. Also extend the `HrTimerMode` to use the `HrTimerExpires` trait. This refactoring is a preparation for enabling hrtimer code to work uniformly with both absolute and relative expiration modes. Reviewed-by: Andreas Hindborg Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250610132823.3457263-4-fujita.tomonori@gmail.com [ changed conversion method names to `as_*` - Andreas ] Signed-off-by: Andreas Hindborg --- rust/kernel/time.rs | 5 ++ rust/kernel/time/hrtimer.rs | 128 ++++++++++++++++++++++++++---------- 2 files changed, 97 insertions(+), 36 deletions(-) diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs index 1be5ecd814d3..5a9ca0d3b7d4 100644 --- a/rust/kernel/time.rs +++ b/rust/kernel/time.rs @@ -194,6 +194,11 @@ pub fn now() -> Self { pub fn elapsed(&self) -> Delta { Self::now() - *self } + + #[inline] + pub(crate) fn as_nanos(&self) -> i64 { + self.inner + } } impl core::ops::Sub for Instant { diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index b6322f4b860f..cae7aad6e46d 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -67,7 +67,7 @@ //! A `restart` operation on a timer in the **stopped** state is equivalent to a //! `start` operation. -use super::ClockSource; +use super::{ClockSource, Delta, Instant}; use crate::{prelude::*, types::Opaque}; use core::marker::PhantomData; use pin_init::PinInit; @@ -411,94 +411,150 @@ fn into_c(self) -> bindings::hrtimer_restart { } } +/// Time representations that can be used as expiration values in [`HrTimer`]. +pub trait HrTimerExpires { + /// Converts the expiration time into a nanosecond representation. + /// + /// This value corresponds to a raw ktime_t value, suitable for passing to kernel + /// timer functions. The interpretation (absolute vs relative) depends on the + /// associated [HrTimerMode] in use. + fn as_nanos(&self) -> i64; +} + +impl HrTimerExpires for Instant { + #[inline] + fn as_nanos(&self) -> i64 { + Instant::::as_nanos(self) + } +} + +impl HrTimerExpires for Delta { + #[inline] + fn as_nanos(&self) -> i64 { + Delta::as_nanos(*self) + } +} + /// Operational mode of [`HrTimer`]. pub trait HrTimerMode { /// The C representation of hrtimer mode. const C_MODE: bindings::hrtimer_mode; + + /// Type representing the clock source. + type Clock: ClockSource; + + /// Type representing the expiration specification (absolute or relative time). + type Expires: HrTimerExpires; } /// Timer that expires at a fixed point in time. -pub struct AbsoluteMode; +pub struct AbsoluteMode(PhantomData); -impl HrTimerMode for AbsoluteMode { +impl HrTimerMode for AbsoluteMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS; + + type Clock = C; + type Expires = Instant; } /// Timer that expires after a delay from now. -pub struct RelativeMode; +pub struct RelativeMode(PhantomData); -impl HrTimerMode for RelativeMode { +impl HrTimerMode for RelativeMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL; + + type Clock = C; + type Expires = Delta; } /// Timer with absolute expiration time, pinned to its current CPU. -pub struct AbsolutePinnedMode; - -impl HrTimerMode for AbsolutePinnedMode { +pub struct AbsolutePinnedMode(PhantomData); +impl HrTimerMode for AbsolutePinnedMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_PINNED; + + type Clock = C; + type Expires = Instant; } /// Timer with relative expiration time, pinned to its current CPU. -pub struct RelativePinnedMode; - -impl HrTimerMode for RelativePinnedMode { +pub struct RelativePinnedMode(PhantomData); +impl HrTimerMode for RelativePinnedMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_PINNED; + + type Clock = C; + type Expires = Delta; } /// Timer with absolute expiration, handled in soft irq context. -pub struct AbsoluteSoftMode; - -impl HrTimerMode for AbsoluteSoftMode { +pub struct AbsoluteSoftMode(PhantomData); +impl HrTimerMode for AbsoluteSoftMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_SOFT; + + type Clock = C; + type Expires = Instant; } /// Timer with relative expiration, handled in soft irq context. -pub struct RelativeSoftMode; - -impl HrTimerMode for RelativeSoftMode { +pub struct RelativeSoftMode(PhantomData); +impl HrTimerMode for RelativeSoftMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_SOFT; + + type Clock = C; + type Expires = Delta; } /// Timer with absolute expiration, pinned to CPU and handled in soft irq context. -pub struct AbsolutePinnedSoftMode; - -impl HrTimerMode for AbsolutePinnedSoftMode { +pub struct AbsolutePinnedSoftMode(PhantomData); +impl HrTimerMode for AbsolutePinnedSoftMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_PINNED_SOFT; + + type Clock = C; + type Expires = Instant; } -/// Timer with relative expiration, pinned to CPU and handled in soft irq context. -pub struct RelativePinnedSoftMode; - -impl HrTimerMode for RelativePinnedSoftMode { +/// Timer with absolute expiration, pinned to CPU and handled in soft irq context. +pub struct RelativePinnedSoftMode(PhantomData); +impl HrTimerMode for RelativePinnedSoftMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_PINNED_SOFT; + + type Clock = C; + type Expires = Delta; } /// Timer with absolute expiration, handled in hard irq context. -pub struct AbsoluteHardMode; - -impl HrTimerMode for AbsoluteHardMode { +pub struct AbsoluteHardMode(PhantomData); +impl HrTimerMode for AbsoluteHardMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_HARD; + + type Clock = C; + type Expires = Instant; } /// Timer with relative expiration, handled in hard irq context. -pub struct RelativeHardMode; - -impl HrTimerMode for RelativeHardMode { +pub struct RelativeHardMode(PhantomData); +impl HrTimerMode for RelativeHardMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_HARD; + + type Clock = C; + type Expires = Delta; } /// Timer with absolute expiration, pinned to CPU and handled in hard irq context. -pub struct AbsolutePinnedHardMode; - -impl HrTimerMode for AbsolutePinnedHardMode { +pub struct AbsolutePinnedHardMode(PhantomData); +impl HrTimerMode for AbsolutePinnedHardMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_PINNED_HARD; + + type Clock = C; + type Expires = Instant; } /// Timer with relative expiration, pinned to CPU and handled in hard irq context. -pub struct RelativePinnedHardMode; - -impl HrTimerMode for RelativePinnedHardMode { +pub struct RelativePinnedHardMode(PhantomData); +impl HrTimerMode for RelativePinnedHardMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_PINNED_HARD; + + type Clock = C; + type Expires = Delta; } /// Use to implement the [`HasHrTimer`] trait. From e0c0ab04f6785abaa71b9b8dc252cb1a2072c225 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Jun 2025 22:28:22 +0900 Subject: [PATCH 0332/2411] rust: time: Make HasHrTimer generic over HrTimerMode Add a `TimerMode` associated type to the `HasHrTimer` trait to represent the operational mode of the timer, such as absolute or relative expiration. This new type must implement the `HrTimerMode` trait, which defines how expiration values are interpreted. Update the `start()` method to accept an `expires` parameter of type `::Expires` instead of the fixed `Ktime`. This enables different timer modes to provide strongly typed expiration values, such as `Instant` or `Delta`. The `impl_has_hr_timer` macro is also extended to allow specifying the `HrTimerMode`. In the following example, it guarantees that the `start()` method for `Foo` only accepts `Instant`. Using a `Delta` or an `Instant` with a different clock source will result in a compile-time error: struct Foo { #[pin] timer: HrTimer, } impl_has_hr_timer! { impl HasHrTimer for Foo { mode : AbsoluteMode, field : self.timer } } This design eliminates runtime mismatches between expires types and clock sources, and enables stronger type-level guarantees throughout hrtimer. Signed-off-by: FUJITA Tomonori Reviewed-by: Andreas Hindborg Link: https://lore.kernel.org/r/20250610132823.3457263-5-fujita.tomonori@gmail.com [ changed conversion method names to `as_*` - Andreas ] Signed-off-by: Andreas Hindborg --- rust/kernel/time/hrtimer.rs | 55 ++++++++++++++++++++++------- rust/kernel/time/hrtimer/arc.rs | 8 +++-- rust/kernel/time/hrtimer/pin.rs | 8 +++-- rust/kernel/time/hrtimer/pin_mut.rs | 8 +++-- rust/kernel/time/hrtimer/tbox.rs | 8 +++-- 5 files changed, 66 insertions(+), 21 deletions(-) diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index cae7aad6e46d..8b15eb374db0 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -98,7 +98,6 @@ pub fn to_ns(self) -> i64 { pub struct HrTimer { #[pin] timer: Opaque, - mode: bindings::hrtimer_mode, _t: PhantomData, } @@ -112,9 +111,10 @@ unsafe impl Sync for HrTimer {} impl HrTimer { /// Return an initializer for a new timer instance. - pub fn new() -> impl PinInit + pub fn new() -> impl PinInit where T: HrTimerCallback, + T: HasHrTimer, { pin_init!(Self { // INVARIANT: We initialize `timer` with `hrtimer_setup` below. @@ -126,12 +126,11 @@ pub fn new() -> impl PinInit bindings::hrtimer_setup( place, Some(T::Pointer::run), - U::ID, - M::C_MODE, + <>::TimerMode as HrTimerMode>::Clock::ID, + >::TimerMode::C_MODE, ); } }), - mode: M::C_MODE, _t: PhantomData, }) } @@ -193,6 +192,11 @@ pub(crate) unsafe fn raw_cancel(this: *const Self) -> bool { /// exist. A timer can be manipulated through any of the handles, and a handle /// may represent a cancelled timer. pub trait HrTimerPointer: Sync + Sized { + /// The operational mode associated with this timer. + /// + /// This defines how the expiration value is interpreted. + type TimerMode: HrTimerMode; + /// A handle representing a started or restarted timer. /// /// If the timer is running or if the timer callback is executing when the @@ -205,7 +209,7 @@ pub trait HrTimerPointer: Sync + Sized { /// Start the timer with expiry after `expires` time units. If the timer was /// already running, it is restarted with the new expiry time. - fn start(self, expires: Ktime) -> Self::TimerHandle; + fn start(self, expires: ::Expires) -> Self::TimerHandle; } /// Unsafe version of [`HrTimerPointer`] for situations where leaking the @@ -220,6 +224,11 @@ pub trait HrTimerPointer: Sync + Sized { /// [`UnsafeHrTimerPointer`] outlives any associated [`HrTimerPointer::TimerHandle`] /// instances. pub unsafe trait UnsafeHrTimerPointer: Sync + Sized { + /// The operational mode associated with this timer. + /// + /// This defines how the expiration value is interpreted. + type TimerMode: HrTimerMode; + /// A handle representing a running timer. /// /// # Safety @@ -236,7 +245,7 @@ pub unsafe trait UnsafeHrTimerPointer: Sync + Sized { /// /// Caller promises keep the timer structure alive until the timer is dead. /// Caller can ensure this by not leaking the returned [`Self::TimerHandle`]. - unsafe fn start(self, expires: Ktime) -> Self::TimerHandle; + unsafe fn start(self, expires: ::Expires) -> Self::TimerHandle; } /// A trait for stack allocated timers. @@ -246,9 +255,14 @@ pub unsafe trait UnsafeHrTimerPointer: Sync + Sized { /// Implementers must ensure that `start_scoped` does not return until the /// timer is dead and the timer handler is not running. pub unsafe trait ScopedHrTimerPointer { + /// The operational mode associated with this timer. + /// + /// This defines how the expiration value is interpreted. + type TimerMode: HrTimerMode; + /// Start the timer to run after `expires` time units and immediately /// after call `f`. When `f` returns, the timer is cancelled. - fn start_scoped(self, expires: Ktime, f: F) -> T + fn start_scoped(self, expires: ::Expires, f: F) -> T where F: FnOnce() -> T; } @@ -260,7 +274,13 @@ unsafe impl ScopedHrTimerPointer for T where T: UnsafeHrTimerPointer, { - fn start_scoped(self, expires: Ktime, f: F) -> U + type TimerMode = T::TimerMode; + + fn start_scoped( + self, + expires: <::TimerMode as HrTimerMode>::Expires, + f: F, + ) -> U where F: FnOnce() -> U, { @@ -335,6 +355,11 @@ pub unsafe trait HrTimerHandle { /// their documentation. All the methods of this trait must operate on the same /// field. pub unsafe trait HasHrTimer { + /// The operational mode associated with this timer. + /// + /// This defines how the expiration value is interpreted. + type TimerMode: HrTimerMode; + /// Return a pointer to the [`HrTimer`] within `Self`. /// /// This function is useful to get access to the value without creating @@ -382,14 +407,14 @@ unsafe fn c_timer_ptr(this: *const Self) -> *const bindings::hrtimer { /// - `this` must point to a valid `Self`. /// - Caller must ensure that the pointee of `this` lives until the timer /// fires or is canceled. - unsafe fn start(this: *const Self, expires: Ktime) { + unsafe fn start(this: *const Self, expires: ::Expires) { // SAFETY: By function safety requirement, `this` is a valid `Self`. unsafe { bindings::hrtimer_start_range_ns( Self::c_timer_ptr(this).cast_mut(), - expires.to_ns(), + expires.as_nanos(), 0, - (*Self::raw_get_timer(this)).mode, + ::Clock::ID as u32, ); } } @@ -568,12 +593,16 @@ macro_rules! impl_has_hr_timer { impl$({$($generics:tt)*})? HasHrTimer<$timer_type:ty> for $self:ty - { self.$field:ident } + { + mode : $mode:ty, + field : self.$field:ident $(,)? + } $($rest:tt)* ) => { // SAFETY: This implementation of `raw_get_timer` only compiles if the // field has the right type. unsafe impl$(<$($generics)*>)? $crate::time::hrtimer::HasHrTimer<$timer_type> for $self { + type TimerMode = $mode; #[inline] unsafe fn raw_get_timer( diff --git a/rust/kernel/time/hrtimer/arc.rs b/rust/kernel/time/hrtimer/arc.rs index ccf1e66e5b2d..ed490a7a8950 100644 --- a/rust/kernel/time/hrtimer/arc.rs +++ b/rust/kernel/time/hrtimer/arc.rs @@ -4,8 +4,8 @@ use super::HrTimer; use super::HrTimerCallback; use super::HrTimerHandle; +use super::HrTimerMode; use super::HrTimerPointer; -use super::Ktime; use super::RawHrTimerCallback; use crate::sync::Arc; use crate::sync::ArcBorrow; @@ -54,9 +54,13 @@ impl HrTimerPointer for Arc T: HasHrTimer, T: for<'a> HrTimerCallback = Self>, { + type TimerMode = >::TimerMode; type TimerHandle = ArcHrTimerHandle; - fn start(self, expires: Ktime) -> ArcHrTimerHandle { + fn start( + self, + expires: <>::TimerMode as HrTimerMode>::Expires, + ) -> ArcHrTimerHandle { // SAFETY: // - We keep `self` alive by wrapping it in a handle below. // - Since we generate the pointer passed to `start` from a valid diff --git a/rust/kernel/time/hrtimer/pin.rs b/rust/kernel/time/hrtimer/pin.rs index 293ca9cf058c..550aad28d987 100644 --- a/rust/kernel/time/hrtimer/pin.rs +++ b/rust/kernel/time/hrtimer/pin.rs @@ -4,7 +4,7 @@ use super::HrTimer; use super::HrTimerCallback; use super::HrTimerHandle; -use super::Ktime; +use super::HrTimerMode; use super::RawHrTimerCallback; use super::UnsafeHrTimerPointer; use core::pin::Pin; @@ -54,9 +54,13 @@ unsafe impl<'a, T> UnsafeHrTimerPointer for Pin<&'a T> T: HasHrTimer, T: HrTimerCallback = Self>, { + type TimerMode = >::TimerMode; type TimerHandle = PinHrTimerHandle<'a, T>; - unsafe fn start(self, expires: Ktime) -> Self::TimerHandle { + unsafe fn start( + self, + expires: <>::TimerMode as HrTimerMode>::Expires, + ) -> Self::TimerHandle { // Cast to pointer let self_ptr: *const T = self.get_ref(); diff --git a/rust/kernel/time/hrtimer/pin_mut.rs b/rust/kernel/time/hrtimer/pin_mut.rs index 6033572d35ad..bacd3d5d972a 100644 --- a/rust/kernel/time/hrtimer/pin_mut.rs +++ b/rust/kernel/time/hrtimer/pin_mut.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 use super::{ - HasHrTimer, HrTimer, HrTimerCallback, HrTimerHandle, Ktime, RawHrTimerCallback, + HasHrTimer, HrTimer, HrTimerCallback, HrTimerHandle, HrTimerMode, RawHrTimerCallback, UnsafeHrTimerPointer, }; use core::{marker::PhantomData, pin::Pin, ptr::NonNull}; @@ -52,9 +52,13 @@ unsafe impl<'a, T> UnsafeHrTimerPointer for Pin<&'a mut T> T: HasHrTimer, T: HrTimerCallback = Self>, { + type TimerMode = >::TimerMode; type TimerHandle = PinMutHrTimerHandle<'a, T>; - unsafe fn start(mut self, expires: Ktime) -> Self::TimerHandle { + unsafe fn start( + mut self, + expires: <>::TimerMode as HrTimerMode>::Expires, + ) -> Self::TimerHandle { // SAFETY: // - We promise not to move out of `self`. We only pass `self` // back to the caller as a `Pin<&mut self>`. diff --git a/rust/kernel/time/hrtimer/tbox.rs b/rust/kernel/time/hrtimer/tbox.rs index 29526a5da203..ec08303315f2 100644 --- a/rust/kernel/time/hrtimer/tbox.rs +++ b/rust/kernel/time/hrtimer/tbox.rs @@ -4,8 +4,8 @@ use super::HrTimer; use super::HrTimerCallback; use super::HrTimerHandle; +use super::HrTimerMode; use super::HrTimerPointer; -use super::Ktime; use super::RawHrTimerCallback; use crate::prelude::*; use core::ptr::NonNull; @@ -64,9 +64,13 @@ impl HrTimerPointer for Pin> T: for<'a> HrTimerCallback = Pin>>, A: crate::alloc::Allocator, { + type TimerMode = >::TimerMode; type TimerHandle = BoxHrTimerHandle; - fn start(self, expires: Ktime) -> Self::TimerHandle { + fn start( + self, + expires: <>::TimerMode as HrTimerMode>::Expires, + ) -> Self::TimerHandle { // SAFETY: // - We will not move out of this box during timer callback (we pass an // immutable reference to the callback). From 69f66cf45814f45a161688fd087abe21e6d5afbd Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Jun 2025 22:28:23 +0900 Subject: [PATCH 0333/2411] rust: time: Remove Ktime in hrtimer Remove the use of `Ktime` from the hrtimer code, which was originally introduced as a temporary workaround. The hrtimer has now been fully converted to use the `Instant` and `Delta` types instead. Reviewed-by: Andreas Hindborg Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250610132823.3457263-6-fujita.tomonori@gmail.com Signed-off-by: Andreas Hindborg --- rust/kernel/time/hrtimer.rs | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index 8b15eb374db0..1b81bf306d16 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -72,22 +72,6 @@ use core::marker::PhantomData; use pin_init::PinInit; -/// A Rust wrapper around a `ktime_t`. -// NOTE: Ktime is going to be removed when hrtimer is converted to Instant/Delta. -#[repr(transparent)] -#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord)] -pub struct Ktime { - inner: bindings::ktime_t, -} - -impl Ktime { - /// Returns the number of nanoseconds. - #[inline] - pub fn to_ns(self) -> i64 { - self.inner - } -} - /// A timer backed by a C `struct hrtimer`. /// /// # Invariants From 614f806a34e134b7a35eb4b29a139b2c8c1b7795 Mon Sep 17 00:00:00 2001 From: Chun-Tse Shao Date: Fri, 20 Jun 2025 10:40:09 -0700 Subject: [PATCH 0334/2411] perf test: Replace grep perl regexp with awk perl is not universal on all machines and should be replaced with awk, which is much more common. Before: $ perf test "probe libc's inet_pton & backtrace it with ping" -v --- start --- test child forked, pid 145431 grep: Perl matching not supported in a --disable-perl-regexp build FAIL: could not add event ---- end(-1) ---- 121: probe libc's inet_pton & backtrace it with ping : FAILED! After: $ perf test "probe libc's inet_pton & backtrace it with ping" -v 121: probe libc's inet_pton & backtrace it with ping : Ok Suggested-by: Ian Rogers Signed-off-by: Chun-Tse Shao Reviewed-by: James Clark Link: https://lore.kernel.org/r/20250620174034.819894-1-ctshao@google.com [ fold James' suggestion not to escape _ in the event pattern. ] Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/record+probe_libc_inet_pton.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index c4bab5b5cc59..9bdf47aabe9d 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -18,12 +18,13 @@ libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g') nm -Dg $libc 2>/dev/null | grep -F -q inet_pton || exit 254 -event_pattern='probe_libc:inet_pton(\_[[:digit:]]+)?' +event_pattern='probe_libc:inet_pton(_[[:digit:]]+)?' add_libc_inet_pton_event() { event_name=$(perf probe -f -x $libc -a inet_pton 2>&1 | tail -n +2 | head -n -5 | \ - grep -P -o "$event_pattern(?=[[:space:]]\(on inet_pton in $libc\))") + awk -v ep="$event_pattern" -v l="$libc" '$0 ~ ep && $0 ~ \ + ("\\(on inet_pton in " l "\\)") {print $1}') if [ $? -ne 0 ] || [ -z "$event_name" ] ; then printf "FAIL: could not add event\n" From 51f4c00436b89696773e195c5f2d4a808483ff66 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Wed, 11 Jun 2025 15:29:03 +0530 Subject: [PATCH 0335/2411] perf tools: Remove excess variable declarations I thought array declaration might be done in the same line as assigning the value to it. Hence, getting rid of extra steps of reiterating the array name. Signed-off-by: Bhaskar Chowdhury Link: https://lore.kernel.org/r/20250611100256.31089-1-unixbhaskar@gmail.com Signed-off-by: Namhyung Kim --- tools/perf/check-headers.sh | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 8085e4d1d8af..be519c433ce4 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -4,8 +4,7 @@ YELLOW='\033[0;33m' NC='\033[0m' # No Color -declare -a FILES -FILES=( +declare -a FILES=( "include/uapi/linux/const.h" "include/uapi/drm/drm.h" "include/uapi/drm/i915_drm.h" @@ -73,8 +72,7 @@ FILES=( "scripts/syscall.tbl" ) -declare -a SYNC_CHECK_FILES -SYNC_CHECK_FILES=( +declare -a SYNC_CHECK_FILES=( "arch/x86/include/asm/inat.h" "arch/x86/include/asm/insn.h" "arch/x86/lib/inat.c" @@ -86,8 +84,7 @@ SYNC_CHECK_FILES=( # tables that then gets included in .c files for things like id->string syscall # tables (and the reverse lookup as well: string -> id) -declare -a BEAUTY_FILES -BEAUTY_FILES=( +declare -a BEAUTY_FILES=( "arch/x86/include/asm/irq_vectors.h" "arch/x86/include/uapi/asm/prctl.h" "include/linux/socket.h" From 4b443bbcd113cad6ec041a4f9f09179e2342ad60 Mon Sep 17 00:00:00 2001 From: Shiji Yang Date: Wed, 18 Jun 2025 22:53:28 +0800 Subject: [PATCH 0336/2411] pinctrl: falcon: mark pinctrl_falcon_init() as static Fix the following missing-prototypes build warning: drivers/pinctrl/pinctrl-falcon.c:508:12: error: no previous prototype for 'pinctrl_falcon_init' [-Werror=missing-prototypes] 508 | int __init pinctrl_falcon_init(void) | ^~~~~~~~~~~~~~~~~~~ Signed-off-by: Shiji Yang Link: https://lore.kernel.org/OSBPR01MB167014AF54EF9818CB98C83BBC72A@OSBPR01MB1670.jpnprd01.prod.outlook.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-falcon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-falcon.c b/drivers/pinctrl/pinctrl-falcon.c index 0bf9ffbcc79f..100eed175c0d 100644 --- a/drivers/pinctrl/pinctrl-falcon.c +++ b/drivers/pinctrl/pinctrl-falcon.c @@ -505,7 +505,7 @@ static struct platform_driver pinctrl_falcon_driver = { }, }; -int __init pinctrl_falcon_init(void) +static int __init pinctrl_falcon_init(void) { return platform_driver_register(&pinctrl_falcon_driver); } From b4102e35243338d966f73ade1fec66d88e8f55ac Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 19 Jun 2025 19:35:37 +0200 Subject: [PATCH 0337/2411] pinctrl: aw9523: fix mutex unlock in error path We must unlock the mutex *after* the `out` label or we'd trigger a deadlock in error path. Fixes: dffe286e2428 ("pinctrl: aw9523: use new GPIO line value setter callbacks") Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202506191952.A03cvn22-lkp@intel.com/ Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250619173537.64298-1-brgl@bgdev.pl Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-aw9523.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-aw9523.c b/drivers/pinctrl/pinctrl-aw9523.c index c84454038419..2935b2cceb03 100644 --- a/drivers/pinctrl/pinctrl-aw9523.c +++ b/drivers/pinctrl/pinctrl-aw9523.c @@ -652,9 +652,9 @@ static int aw9523_gpio_set_multiple(struct gpio_chip *chip, if (ret) goto out; } - mutex_unlock(&awi->i2c_lock); out: + mutex_unlock(&awi->i2c_lock); return ret; } From e3507c56cbb208d4f160942748c527ef6a528ba1 Mon Sep 17 00:00:00 2001 From: Yuan Chen Date: Fri, 20 Jun 2025 09:27:08 +0800 Subject: [PATCH 0338/2411] pinctrl: sunxi: Fix memory leak on krealloc failure In sunxi_pctrl_dt_node_to_map(), when krealloc() fails to resize the pinctrl_map array, the function returns -ENOMEM directly without freeing the previously allocated *map buffer. This results in a memory leak of the original kmalloc_array allocation. Fixes: e11dee2e98f8 ("pinctrl: sunxi: Deal with configless pins") Signed-off-by: Yuan Chen Link: https://lore.kernel.org/20250620012708.16709-1-chenyuan_fl@163.com Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sunxi.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index a5ce84621e5a..0db8429a013f 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -408,6 +408,7 @@ static int sunxi_pctrl_dt_node_to_map(struct pinctrl_dev *pctldev, const char *function, *pin_prop; const char *group; int ret, npins, nmaps, configlen = 0, i = 0; + struct pinctrl_map *new_map; *map = NULL; *num_maps = 0; @@ -482,9 +483,13 @@ static int sunxi_pctrl_dt_node_to_map(struct pinctrl_dev *pctldev, * We know have the number of maps we need, we can resize our * map array */ - *map = krealloc(*map, i * sizeof(struct pinctrl_map), GFP_KERNEL); - if (!*map) - return -ENOMEM; + new_map = krealloc(*map, i * sizeof(struct pinctrl_map), GFP_KERNEL); + if (!new_map) { + ret = -ENOMEM; + goto err_free_map; + } + + *map = new_map; return 0; From 8f6f303551100291bf2c1e1ccc66b758fffb1168 Mon Sep 17 00:00:00 2001 From: Yuan Chen Date: Fri, 20 Jun 2025 09:53:43 +0800 Subject: [PATCH 0339/2411] pinctrl: berlin: fix memory leak in berlin_pinctrl_build_state() In the original implementation, krealloc() failure handling incorrectly assigned the original memory pointer to NULL after kfree(), causing a memory leak when reallocation failed. Fixes: de845036f997 ("pinctrl: berlin: fix error return code of berlin_pinctrl_build_state()") Signed-off-by: Yuan Chen Link: https://lore.kernel.org/20250620015343.21494-1-chenyuan_fl@163.com Signed-off-by: Linus Walleij --- drivers/pinctrl/berlin/berlin.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/berlin/berlin.c b/drivers/pinctrl/berlin/berlin.c index e5a35b803ce6..8afcfa4e5694 100644 --- a/drivers/pinctrl/berlin/berlin.c +++ b/drivers/pinctrl/berlin/berlin.c @@ -204,6 +204,7 @@ static int berlin_pinctrl_build_state(struct platform_device *pdev) const struct berlin_desc_group *desc_group; const struct berlin_desc_function *desc_function; int i, max_functions = 0; + struct pinfunction *new_functions; pctrl->nfunctions = 0; @@ -229,12 +230,15 @@ static int berlin_pinctrl_build_state(struct platform_device *pdev) } } - pctrl->functions = krealloc(pctrl->functions, + new_functions = krealloc(pctrl->functions, pctrl->nfunctions * sizeof(*pctrl->functions), GFP_KERNEL); - if (!pctrl->functions) + if (!new_functions) { + kfree(pctrl->functions); return -ENOMEM; + } + pctrl->functions = new_functions; /* map functions to theirs groups */ for (i = 0; i < pctrl->desc->ngroups; i++) { desc_group = pctrl->desc->groups + i; From c0d03cdfaccf3bc41c9531af7c4cabb0b0ce4040 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 20 Jun 2025 15:07:55 +0200 Subject: [PATCH 0340/2411] pinctrl: zynq: add CONFIG_OF dependency The zynq driver can be enabled for compile-testing on builds without CONFIG_OF, leading to a link error: ld.lld-21: error: undefined symbol: pinconf_generic_dt_node_to_map referenced by pinconf-generic.h:231 (/home/arnd/arm-soc/include/linux/pinctrl/pinconf-generic.h:231) drivers/pinctrl/pinctrl-zynq.o:(pinconf_generic_dt_node_to_map_all) in archive vmlinux.a Prevent this with the proper compile time dependency. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202506212021.deAhuaWr-lkp@intel.com/ Fixes: 1982621decaf ("pinctrl: Allow compile testing for K210, TB10X and ZYNQ") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/20250620130814.2580678-1-arnd@kernel.org Signed-off-by: Linus Walleij --- drivers/pinctrl/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig index 346003b624ec..8060a9c6828f 100644 --- a/drivers/pinctrl/Kconfig +++ b/drivers/pinctrl/Kconfig @@ -603,6 +603,7 @@ config PINCTRL_TH1520 config PINCTRL_ZYNQ bool "Pinctrl driver for Xilinx Zynq" depends on ARCH_ZYNQ || COMPILE_TEST + depends on OF select PINMUX select GENERIC_PINCONF help From b58ea88d301cd4c0403f298468442dacac4f8c4e Mon Sep 17 00:00:00 2001 From: Da Xue Date: Wed, 18 Jun 2025 22:23:37 -0400 Subject: [PATCH 0341/2411] pinctrl: meson-g12a: add g12b pwm groups G12B and SM1 have additional PWM pinmuxes for b, c, and d. Signed-off-by: Da Xue Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/20250619022337.43900-1-da@libre.computer Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-meson-g12a.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson-g12a.c b/drivers/pinctrl/meson/pinctrl-meson-g12a.c index e2788bfc5874..8b9130c6e170 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-g12a.c +++ b/drivers/pinctrl/meson/pinctrl-meson-g12a.c @@ -270,15 +270,21 @@ static const unsigned int pwm_a_pins[] = { GPIOX_6 }; /* pwm_b */ static const unsigned int pwm_b_x7_pins[] = { GPIOX_7 }; static const unsigned int pwm_b_x19_pins[] = { GPIOX_19 }; +static const unsigned int pwm_b_z0_pins[] = { GPIOZ_0 }; +static const unsigned int pwm_b_z13_pins[] = { GPIOZ_13 }; +static const unsigned int pwm_b_h_pins[] = { GPIOH_7 }; /* pwm_c */ static const unsigned int pwm_c_c_pins[] = { GPIOC_4 }; static const unsigned int pwm_c_x5_pins[] = { GPIOX_5 }; static const unsigned int pwm_c_x8_pins[] = { GPIOX_8 }; +static const unsigned int pwm_c_z_pins[] = { GPIOZ_1 }; /* pwm_d */ static const unsigned int pwm_d_x3_pins[] = { GPIOX_3 }; static const unsigned int pwm_d_x6_pins[] = { GPIOX_6 }; +static const unsigned int pwm_d_z_pins[] = { GPIOZ_2 }; +static const unsigned int pwm_d_a_pins[] = { GPIOA_4 }; /* pwm_e */ static const unsigned int pwm_e_pins[] = { GPIOX_16 }; @@ -649,12 +655,22 @@ static const struct meson_pmx_group meson_g12a_periphs_groups[] = { GROUP(pwm_a, 1), GROUP(pwm_b_x7, 4), GROUP(pwm_b_x19, 1), + GROUP(pwm_b_z0, 5), + GROUP(pwm_b_z13, 5), + GROUP(pwm_b_h, 5), GROUP(pwm_c_x5, 4), GROUP(pwm_c_x8, 5), + GROUP(pwm_c_c, 5), + GROUP(pwm_c_z, 5), + GROUP(pwm_d_z, 4), + GROUP(pwm_d_a, 3), GROUP(pwm_d_x3, 4), GROUP(pwm_d_x6, 4), GROUP(pwm_e, 1), + GROUP(pwm_f_a, 3), + GROUP(pwm_f_h, 4), GROUP(pwm_f_x, 1), + GROUP(pwm_f_z, 5), GROUP(tsin_a_valid, 3), GROUP(tsin_a_sop, 3), GROUP(tsin_a_din0, 3), @@ -1058,15 +1074,15 @@ static const char * const pwm_a_groups[] = { }; static const char * const pwm_b_groups[] = { - "pwm_b_x7", "pwm_b_x19", + "pwm_b_h", "pwm_b_x7", "pwm_b_x19", "pwm_b_z0", "pwm_b_z13" }; static const char * const pwm_c_groups[] = { - "pwm_c_c", "pwm_c_x5", "pwm_c_x8", + "pwm_c_c", "pwm_c_x5", "pwm_c_x8", "pwm_c_z", }; static const char * const pwm_d_groups[] = { - "pwm_d_x3", "pwm_d_x6", + "pwm_d_a", "pwm_d_x3", "pwm_d_x6", "pwm_d_z", }; static const char * const pwm_e_groups[] = { From 65bd0be486390fc12a84eafaad78758c5e5a55e6 Mon Sep 17 00:00:00 2001 From: Ze Huang Date: Tue, 24 Jun 2025 00:11:13 +0800 Subject: [PATCH 0342/2411] pinctrl: canaan: k230: add NULL check in DT parse Add a NULL check for the return value of of_get_property() when retrieving the "pinmux" property in the group parser. This avoids a potential NULL pointer dereference if the property is missing from the device tree node. Also fix a typo ("sintenel") in the device ID match table comment, correcting it to "sentinel". Fixes: 545887eab6f6 ("pinctrl: canaan: Add support for k230 SoC") Reported-by: Yao Zi Signed-off-by: Ze Huang Link: https://lore.kernel.org/20250624-k230-return-check-v1-1-6b4fc5ba0c41@whut.edu.cn Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-k230.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-k230.c b/drivers/pinctrl/pinctrl-k230.c index a9b4627b46b0..4976308e6237 100644 --- a/drivers/pinctrl/pinctrl-k230.c +++ b/drivers/pinctrl/pinctrl-k230.c @@ -477,6 +477,10 @@ static int k230_pinctrl_parse_groups(struct device_node *np, grp->name = np->name; list = of_get_property(np, "pinmux", &size); + if (!list) { + dev_err(dev, "failed to get pinmux property\n"); + return -EINVAL; + } size /= sizeof(*list); grp->num_pins = size; @@ -623,7 +627,7 @@ static int k230_pinctrl_probe(struct platform_device *pdev) static const struct of_device_id k230_dt_ids[] = { { .compatible = "canaan,k230-pinctrl", }, - { /* sintenel */ } + { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, k230_dt_ids); From d94a32ac688f953dc9a9f12b5b4139ecad841bbb Mon Sep 17 00:00:00 2001 From: Ze Huang Date: Tue, 24 Jun 2025 00:11:14 +0800 Subject: [PATCH 0343/2411] pinctrl: canaan: k230: Fix order of DT parse and pinctrl register Move DT parse before pinctrl register. This ensures that device tree parsing is done before calling devm_pinctrl_register() to prevent using uninitialized pin resources. Fixes: 545887eab6f6 ("pinctrl: canaan: Add support for k230 SoC") Reported-by: Yao Zi Signed-off-by: Ze Huang Link: https://lore.kernel.org/20250624-k230-return-check-v1-2-6b4fc5ba0c41@whut.edu.cn Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-k230.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinctrl-k230.c b/drivers/pinctrl/pinctrl-k230.c index 4976308e6237..d716f23d837f 100644 --- a/drivers/pinctrl/pinctrl-k230.c +++ b/drivers/pinctrl/pinctrl-k230.c @@ -590,6 +590,7 @@ static int k230_pinctrl_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct k230_pinctrl *info; struct pinctrl_desc *pctl; + int ret; info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL); if (!info) @@ -615,13 +616,15 @@ static int k230_pinctrl_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(info->regmap_base), "failed to init regmap\n"); + ret = k230_pinctrl_parse_dt(pdev, info); + if (ret) + return ret; + info->pctl_dev = devm_pinctrl_register(dev, pctl, info); if (IS_ERR(info->pctl_dev)) return dev_err_probe(dev, PTR_ERR(info->pctl_dev), "devm_pinctrl_register failed\n"); - k230_pinctrl_parse_dt(pdev, info); - return 0; } From 6cb0e9da949aeb022b28a9b698f767c8828e283d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 23 Jun 2025 23:20:04 +0100 Subject: [PATCH 0344/2411] pinctrl: eswin: Fix unsigned comparison to less than zero issue The u32 variable voltage is being compared to less than zero and this can never be true. Fix this by making voltage an int type which is the same type as the return from the call to regulator_get_voltage. Fixes: 5b797bcc00ef ("pinctrl: eswin: Add EIC7700 pinctrl driver") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/20250623222004.280928-1-colin.i.king@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-eic7700.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinctrl-eic7700.c b/drivers/pinctrl/pinctrl-eic7700.c index 719cd11e276a..4874b5532343 100644 --- a/drivers/pinctrl/pinctrl-eic7700.c +++ b/drivers/pinctrl/pinctrl-eic7700.c @@ -622,8 +622,8 @@ static int eic7700_pinctrl_probe(struct platform_device *pdev) struct pinctrl_dev *pctldev; struct eic7700_pinctrl *pc; struct regulator *regulator; - u32 voltage, rgmii0_mode, rgmii1_mode; - int ret; + u32 rgmii0_mode, rgmii1_mode; + int ret, voltage; pc = devm_kzalloc(dev, struct_size(pc, functions, EIC7700_FUNCTIONS_COUNT), GFP_KERNEL); if (!pc) From 4ab401099d4764d1479914fd4c8b9876d5b3aca1 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 24 Jun 2025 20:47:57 +0200 Subject: [PATCH 0345/2411] pinctrl: amlogic: Staticize some local structs Sparse complains: sparse warnings: (new ones prefixed by >>) >> drivers/pinctrl/meson/pinctrl-amlogic-a4.c:126:24: sparse: sparse: symbol 'multi_mux_s7' was not declared. Should it be static? >> drivers/pinctrl/meson/pinctrl-amlogic-a4.c:135:28: sparse: sparse: symbol 's7_priv_data' was not declared. Should it be static? >> drivers/pinctrl/meson/pinctrl-amlogic-a4.c:140:24: sparse: sparse: symbol 'multi_mux_s6' was not declared. Should it be static? >> drivers/pinctrl/meson/pinctrl-amlogic-a4.c:154:28: sparse: sparse: symbol 's6_priv_data' was not declared. Should it be static? Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202506122145.wWAtKBoy-lkp@intel.com/ Cc: Xianwei Zhao Fixes: 1f8e5dfddaa7 ("pinctrl: meson: support amlogic S6/S7/S7D SoC") Signed-off-by: Linus Walleij Link: https://lore.kernel.org/20250624-amlogic-a4-fix-v1-1-03f0856d10cb@linaro.org --- drivers/pinctrl/meson/pinctrl-amlogic-a4.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c index 2541c864086d..c8958222df8c 100644 --- a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c +++ b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c @@ -123,7 +123,7 @@ static const char *aml_bank_name[31] = { "GPIOCC", "TEST_N", "ANALOG" }; -const struct multi_mux multi_mux_s7[] = { +static const struct multi_mux multi_mux_s7[] = { { .m_bank_id = AMLOGIC_GPIO_CC, .m_bit_offs = 24, @@ -132,12 +132,12 @@ const struct multi_mux multi_mux_s7[] = { }, }; -const struct aml_pctl_data s7_priv_data = { +static const struct aml_pctl_data s7_priv_data = { .number = ARRAY_SIZE(multi_mux_s7), .p_mux = multi_mux_s7, }; -const struct multi_mux multi_mux_s6[] = { +static const struct multi_mux multi_mux_s6[] = { { .m_bank_id = AMLOGIC_GPIO_CC, .m_bit_offs = 24, @@ -151,7 +151,7 @@ const struct multi_mux multi_mux_s6[] = { }, }; -const struct aml_pctl_data s6_priv_data = { +static const struct aml_pctl_data s6_priv_data = { .number = ARRAY_SIZE(multi_mux_s6), .p_mux = multi_mux_s6, }; From 9989e0ca7462c62f93dbc62f684448aa2efb9226 Mon Sep 17 00:00:00 2001 From: Jiwei Sun Date: Thu, 23 Jan 2025 13:51:54 +0800 Subject: [PATCH 0346/2411] PCI: Fix link speed calculation on retrain failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When pcie_failed_link_retrain() fails to retrain, it tries to revert to the previous link speed. However it calculates that speed from the Link Control 2 register without masking out non-speed bits first. PCIE_LNKCTL2_TLS2SPEED() converts such incorrect values to PCI_SPEED_UNKNOWN (0xff), which in turn causes a WARN splat in pcie_set_target_speed(): pci 0000:00:01.1: [1022:14ed] type 01 class 0x060400 PCIe Root Port pci 0000:00:01.1: broken device, retraining non-functional downstream link at 2.5GT/s pci 0000:00:01.1: retraining failed WARNING: CPU: 1 PID: 1 at drivers/pci/pcie/bwctrl.c:168 pcie_set_target_speed RDX: 0000000000000001 RSI: 00000000000000ff RDI: ffff9acd82efa000 pcie_failed_link_retrain pci_device_add pci_scan_single_device Mask out the non-speed bits in PCIE_LNKCTL2_TLS2SPEED() and PCIE_LNKCAP_SLS2SPEED() so they don't incorrectly return PCI_SPEED_UNKNOWN. Fixes: de9a6c8d5dbf ("PCI/bwctrl: Add pcie_set_target_speed() to set PCIe Link Speed") Reported-by: Andrew Closes: https://lore.kernel.org/r/7iNzXbCGpf8yUMJZBQjLdbjPcXrEJqBxy5-bHfppz0ek-h4_-G93b1KUrm106r2VNF2FV_sSq0nENv4RsRIUGnlYZMlQr2ZD2NyB5sdj5aU=@protonmail.com/ Suggested-by: Maciej W. Rozycki Suggested-by: Ilpo Järvinen Signed-off-by: Jiwei Sun [bhelgaas: commit log, add details from https://lore.kernel.org/r/1c92ef6bcb314ee6977839b46b393282e4f52e74.1750684771.git.lukas@wunner.de] Signed-off-by: Bjorn Helgaas Reviewed-by: Ilpo Järvinen Cc: stable@vger.kernel.org # v6.13+ Link: https://patch.msgid.link/20250123055155.22648-2-sjiwei@163.com --- drivers/pci/pci.h | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 12215ee72afb..a9d56acca52c 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -391,12 +391,14 @@ void pci_bus_put(struct pci_bus *bus); #define PCIE_LNKCAP_SLS2SPEED(lnkcap) \ ({ \ - ((lnkcap) == PCI_EXP_LNKCAP_SLS_64_0GB ? PCIE_SPEED_64_0GT : \ - (lnkcap) == PCI_EXP_LNKCAP_SLS_32_0GB ? PCIE_SPEED_32_0GT : \ - (lnkcap) == PCI_EXP_LNKCAP_SLS_16_0GB ? PCIE_SPEED_16_0GT : \ - (lnkcap) == PCI_EXP_LNKCAP_SLS_8_0GB ? PCIE_SPEED_8_0GT : \ - (lnkcap) == PCI_EXP_LNKCAP_SLS_5_0GB ? PCIE_SPEED_5_0GT : \ - (lnkcap) == PCI_EXP_LNKCAP_SLS_2_5GB ? PCIE_SPEED_2_5GT : \ + u32 lnkcap_sls = (lnkcap) & PCI_EXP_LNKCAP_SLS; \ + \ + (lnkcap_sls == PCI_EXP_LNKCAP_SLS_64_0GB ? PCIE_SPEED_64_0GT : \ + lnkcap_sls == PCI_EXP_LNKCAP_SLS_32_0GB ? PCIE_SPEED_32_0GT : \ + lnkcap_sls == PCI_EXP_LNKCAP_SLS_16_0GB ? PCIE_SPEED_16_0GT : \ + lnkcap_sls == PCI_EXP_LNKCAP_SLS_8_0GB ? PCIE_SPEED_8_0GT : \ + lnkcap_sls == PCI_EXP_LNKCAP_SLS_5_0GB ? PCIE_SPEED_5_0GT : \ + lnkcap_sls == PCI_EXP_LNKCAP_SLS_2_5GB ? PCIE_SPEED_2_5GT : \ PCI_SPEED_UNKNOWN); \ }) @@ -411,13 +413,17 @@ void pci_bus_put(struct pci_bus *bus); PCI_SPEED_UNKNOWN) #define PCIE_LNKCTL2_TLS2SPEED(lnkctl2) \ - ((lnkctl2) == PCI_EXP_LNKCTL2_TLS_64_0GT ? PCIE_SPEED_64_0GT : \ - (lnkctl2) == PCI_EXP_LNKCTL2_TLS_32_0GT ? PCIE_SPEED_32_0GT : \ - (lnkctl2) == PCI_EXP_LNKCTL2_TLS_16_0GT ? PCIE_SPEED_16_0GT : \ - (lnkctl2) == PCI_EXP_LNKCTL2_TLS_8_0GT ? PCIE_SPEED_8_0GT : \ - (lnkctl2) == PCI_EXP_LNKCTL2_TLS_5_0GT ? PCIE_SPEED_5_0GT : \ - (lnkctl2) == PCI_EXP_LNKCTL2_TLS_2_5GT ? PCIE_SPEED_2_5GT : \ - PCI_SPEED_UNKNOWN) +({ \ + u16 lnkctl2_tls = (lnkctl2) & PCI_EXP_LNKCTL2_TLS; \ + \ + (lnkctl2_tls == PCI_EXP_LNKCTL2_TLS_64_0GT ? PCIE_SPEED_64_0GT : \ + lnkctl2_tls == PCI_EXP_LNKCTL2_TLS_32_0GT ? PCIE_SPEED_32_0GT : \ + lnkctl2_tls == PCI_EXP_LNKCTL2_TLS_16_0GT ? PCIE_SPEED_16_0GT : \ + lnkctl2_tls == PCI_EXP_LNKCTL2_TLS_8_0GT ? PCIE_SPEED_8_0GT : \ + lnkctl2_tls == PCI_EXP_LNKCTL2_TLS_5_0GT ? PCIE_SPEED_5_0GT : \ + lnkctl2_tls == PCI_EXP_LNKCTL2_TLS_2_5GT ? PCIE_SPEED_2_5GT : \ + PCI_SPEED_UNKNOWN); \ +}) /* PCIe speed to Mb/s reduced by encoding overhead */ #define PCIE_SPEED2MBS_ENC(speed) \ From b85af48de3ece4e5bbdb2248a5360a409991cf67 Mon Sep 17 00:00:00 2001 From: Jiwei Sun Date: Thu, 23 Jan 2025 13:51:55 +0800 Subject: [PATCH 0347/2411] PCI: Adjust the position of reading the Link Control 2 register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a89c82249c37 ("PCI: Work around PCIe link training failures"), if the speed limit is set to 2.5 GT/s and the retraining is successful, an attempt will be made to lift the speed limit. One condition for lifting the speed limit is to check whether the link speed field of the Link Control 2 register is PCI_EXP_LNKCTL2_TLS_2_5GT. However, since de9a6c8d5dbf ("PCI/bwctrl: Add pcie_set_target_speed() to set PCIe Link Speed"), the `lnkctl2` local variable does not undergo any changes during the speed limit setting and retraining process. As a result, the code intended to lift the speed limit is not executed. To address this issue, adjust the position of the Link Control 2 register read operation in the code and place it before its use. Fixes: de9a6c8d5dbf ("PCI/bwctrl: Add pcie_set_target_speed() to set PCIe Link Speed") Suggested-by: Maciej W. Rozycki Suggested-by: Ilpo Järvinen Signed-off-by: Jiwei Sun Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250123055155.22648-3-sjiwei@163.com --- drivers/pci/quirks.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index d7f4ee634263..db6e142b082d 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -105,13 +105,13 @@ int pcie_failed_link_retrain(struct pci_dev *dev) !pcie_cap_has_lnkctl2(dev) || !dev->link_active_reporting) return ret; - pcie_capability_read_word(dev, PCI_EXP_LNKCTL2, &lnkctl2); pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta); if (!(lnksta & PCI_EXP_LNKSTA_DLLLA) && pcie_lbms_seen(dev, lnksta)) { - u16 oldlnkctl2 = lnkctl2; + u16 oldlnkctl2; pci_info(dev, "broken device, retraining non-functional downstream link at 2.5GT/s\n"); + pcie_capability_read_word(dev, PCI_EXP_LNKCTL2, &oldlnkctl2); ret = pcie_set_target_speed(dev, PCIE_SPEED_2_5GT, false); if (ret) { pci_info(dev, "retraining failed\n"); @@ -123,6 +123,8 @@ int pcie_failed_link_retrain(struct pci_dev *dev) pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta); } + pcie_capability_read_word(dev, PCI_EXP_LNKCTL2, &lnkctl2); + if ((lnksta & PCI_EXP_LNKSTA_DLLLA) && (lnkctl2 & PCI_EXP_LNKCTL2_TLS) == PCI_EXP_LNKCTL2_TLS_2_5GT && pci_match_id(ids, dev)) { From 1f136890263c3d34072b8eeea905c1f47e30369a Mon Sep 17 00:00:00 2001 From: Swarna Prabhu Date: Tue, 17 Jun 2025 17:40:47 +0000 Subject: [PATCH 0348/2411] f2fs: Fix the typos in comments This patch fixes minor typos in comments in f2fs. Signed-off-by: Swarna Prabhu Reviewed-by: Luis Chamberlain Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 +++--- fs/f2fs/node.h | 2 +- fs/f2fs/super.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index aa535dcf2297..493f1c5fb2d5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -386,7 +386,7 @@ struct discard_cmd { struct rb_node rb_node; /* rb node located in rb-tree */ struct discard_info di; /* discard info */ struct list_head list; /* command list */ - struct completion wait; /* compleation */ + struct completion wait; /* completion */ struct block_device *bdev; /* bdev */ unsigned short ref; /* reference count */ unsigned char state; /* state */ @@ -1427,7 +1427,7 @@ enum { enum { MEMORY_MODE_NORMAL, /* memory mode for normal devices */ - MEMORY_MODE_LOW, /* memory mode for low memry devices */ + MEMORY_MODE_LOW, /* memory mode for low memory devices */ }; enum errors_option { @@ -1491,7 +1491,7 @@ enum compress_flag { #define COMPRESS_DATA_RESERVED_SIZE 4 struct compress_data { __le32 clen; /* compressed data size */ - __le32 chksum; /* compressed data chksum */ + __le32 chksum; /* compressed data checksum */ __le32 reserved[COMPRESS_DATA_RESERVED_SIZE]; /* reserved */ u8 cdata[]; /* compressed data */ }; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 1446c433b3ec..b5218d642545 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -31,7 +31,7 @@ /* control total # of nats */ #define DEF_NAT_CACHE_THRESHOLD 100000 -/* control total # of node writes used for roll-fowrad recovery */ +/* control total # of node writes used for roll-forward recovery */ #define DEF_RF_NODE_BLOCKS 0 /* vector size for gang look-up from nat cache that consists of radix tree */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9b58cf891a66..5a1b2b6e78f3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2569,7 +2569,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) !test_opt(sbi, MERGE_CHECKPOINT)) { f2fs_stop_ckpt_thread(sbi); } else { - /* Flush if the prevous checkpoint, if exists. */ + /* Flush if the previous checkpoint, if exists. */ f2fs_flush_ckpt_thread(sbi); err = f2fs_start_ckpt_thread(sbi); From 90c5ce37adf074ed85b26d1cd43074f29c0743ba Mon Sep 17 00:00:00 2001 From: wangzijie Date: Tue, 24 Jun 2025 11:59:37 +0800 Subject: [PATCH 0349/2411] f2fs: convert F2FS_I_SB to sbi in f2fs_setattr() Introduce sbi in f2fs_setattr() and convert F2FS_I_SB to it. No logic change, just cleanup and prepare to get CAP_BLKS_PER_SEC(sbi). Signed-off-by: wangzijie Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a909f79db178..63e9fb5a1c59 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1042,9 +1042,10 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, { struct inode *inode = d_inode(dentry); struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; - if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + if (unlikely(f2fs_cp_error(sbi))) return -EIO; if (unlikely(IS_IMMUTABLE(inode))) @@ -1084,12 +1085,11 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, } if (i_uid_needs_update(idmap, attr, inode) || i_gid_needs_update(idmap, attr, inode)) { - f2fs_lock_op(F2FS_I_SB(inode)); + f2fs_lock_op(sbi); err = dquot_transfer(idmap, inode, attr); if (err) { - set_sbi_flag(F2FS_I_SB(inode), - SBI_QUOTA_NEED_REPAIR); - f2fs_unlock_op(F2FS_I_SB(inode)); + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + f2fs_unlock_op(sbi); return err; } /* @@ -1099,7 +1099,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, i_uid_update(idmap, attr, inode); i_gid_update(idmap, attr, inode); f2fs_mark_inode_dirty_sync(inode, true); - f2fs_unlock_op(F2FS_I_SB(inode)); + f2fs_unlock_op(sbi); } if (attr->ia_valid & ATTR_SIZE) { @@ -1162,7 +1162,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, f2fs_mark_inode_dirty_sync(inode, true); /* inode change will produce dirty node pages flushed by checkpoint */ - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); return err; } From 817f989700fddefa56e5e443e7d138018ca6709d Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 25 Jun 2025 12:23:47 +0200 Subject: [PATCH 0350/2411] PCI: Rename PCIE_RESET_CONFIG_DEVICE_WAIT_MS to PCIE_RESET_CONFIG_WAIT_MS Rename PCIE_RESET_CONFIG_DEVICE_WAIT_MS to PCIE_RESET_CONFIG_WAIT_MS. Suggested-by: Bjorn Helgaas Signed-off-by: Niklas Cassel Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250625102347.1205584-10-cassel@kernel.org --- drivers/pci/controller/plda/pcie-starfive.c | 2 +- drivers/pci/pci.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/plda/pcie-starfive.c b/drivers/pci/controller/plda/pcie-starfive.c index e73c1b7bc8ef..3caf53c6c082 100644 --- a/drivers/pci/controller/plda/pcie-starfive.c +++ b/drivers/pci/controller/plda/pcie-starfive.c @@ -368,7 +368,7 @@ static int starfive_pcie_host_init(struct plda_pcie_rp *plda) * of 100ms following exit from a conventional reset before * sending a configuration request to the device. */ - msleep(PCIE_RESET_CONFIG_DEVICE_WAIT_MS); + msleep(PCIE_RESET_CONFIG_WAIT_MS); if (starfive_pcie_host_wait_for_link(pcie)) dev_info(dev, "port link down\n"); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 12215ee72afb..98d6fccb383e 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -61,7 +61,7 @@ struct pcie_tlp_log; * completes before sending a Configuration Request to the device * immediately below that Port." */ -#define PCIE_RESET_CONFIG_DEVICE_WAIT_MS 100 +#define PCIE_RESET_CONFIG_WAIT_MS 100 /* Message Routing (r[2:0]); PCIe r6.0, sec 2.2.8 */ #define PCIE_MSG_TYPE_R_RC 0 From bbc6a829ad3f054181d24a56944f944002e68898 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 25 Jun 2025 12:23:48 +0200 Subject: [PATCH 0351/2411] PCI: rockchip-host: Use macro PCIE_RESET_CONFIG_WAIT_MS Macro PCIE_RESET_CONFIG_DEVICE_WAIT_MS was added to pci.h in commit d5ceb9496c56 ("PCI: Add PCIE_RESET_CONFIG_DEVICE_WAIT_MS waiting time value"). Later, in commit 70a7bfb1e515 ("PCI: rockchip-host: Wait 100ms after reset before starting configuration"), PCIE_T_RRS_READY_MS was added to pci.h. These macros are duplicates, and represent the exact same delay in the PCIe specification. Since the comment above PCIE_RESET_CONFIG_WAIT_MS is strictly more correct than the comment above PCIE_T_RRS_READY_MS, change rockchip-host to use PCIE_RESET_CONFIG_WAIT_MS, and remove PCIE_T_RRS_READY_MS, as rockchip-host is the only user of this macro. Signed-off-by: Niklas Cassel Signed-off-by: Manivannan Sadhasivam Reviewed-by: Wilfred Mallawa Link: https://patch.msgid.link/20250625102347.1205584-11-cassel@kernel.org --- drivers/pci/controller/pcie-rockchip-host.c | 2 +- drivers/pci/pci.h | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c index b9e7a8710cf0..c11ed45c25f6 100644 --- a/drivers/pci/controller/pcie-rockchip-host.c +++ b/drivers/pci/controller/pcie-rockchip-host.c @@ -325,7 +325,7 @@ static int rockchip_pcie_host_init_port(struct rockchip_pcie *rockchip) msleep(PCIE_T_PVPERL_MS); gpiod_set_value_cansleep(rockchip->perst_gpio, 1); - msleep(PCIE_T_RRS_READY_MS); + msleep(PCIE_RESET_CONFIG_WAIT_MS); /* 500ms timeout value should be enough for Gen1/2 training */ err = readl_poll_timeout(rockchip->apb_base + PCIE_CLIENT_BASIC_STATUS1, diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 98d6fccb383e..819833e57590 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -35,13 +35,6 @@ struct pcie_tlp_log; */ #define PCIE_T_PERST_CLK_US 100 -/* - * End of conventional reset (PERST# de-asserted) to first configuration - * request (device able to respond with a "Request Retry Status" completion), - * from PCIe r6.0, sec 6.6.1. - */ -#define PCIE_T_RRS_READY_MS 100 - /* * PCIe r6.0, sec 5.3.3.2.1 * Recommends 1ms to 10ms timeout to check L2 ready. From c7eb9c5e1498882951b7583c56add0b77bfc162e Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 25 Jun 2025 12:23:49 +0200 Subject: [PATCH 0352/2411] PCI: dw-rockchip: Wait PCIE_RESET_CONFIG_WAIT_MS after link-up IRQ Per PCIe r6.0, sec 6.6.1, software must generally wait a minimum of 100ms (PCIE_RESET_CONFIG_WAIT_MS) after Link training completes before sending a Configuration Request. Prior to ec9fd499b9c6 ("PCI: dw-rockchip: Don't wait for link since we can detect Link Up"), dw-rockchip used dw_pcie_wait_for_link(), which waited between 0 and 90ms after the link came up before we enumerate the bus, and this was apparently enough for most devices. After ec9fd499b9c6, rockchip_pcie_rc_sys_irq_thread() started enumeration immediately when handling the link-up IRQ, and devices (e.g., Laszlo Fiat's PLEXTOR PX-256M8PeGN NVMe SSD) may not be ready to handle config requests yet. Delay PCIE_RESET_CONFIG_WAIT_MS after the link-up IRQ before starting enumeration. Fixes: 0e898eb8df4e ("PCI: rockchip-dwc: Add Rockchip RK356X host controller driver") Signed-off-by: Niklas Cassel Signed-off-by: Manivannan Sadhasivam Reviewed-by: Damien Le Moal Reviewed-by: Wilfred Mallawa Cc: Laszlo Fiat Link: https://patch.msgid.link/20250625102347.1205584-12-cassel@kernel.org --- drivers/pci/controller/dwc/pcie-dw-rockchip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/dwc/pcie-dw-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-rockchip.c index 93171a392879..108d30637920 100644 --- a/drivers/pci/controller/dwc/pcie-dw-rockchip.c +++ b/drivers/pci/controller/dwc/pcie-dw-rockchip.c @@ -458,6 +458,7 @@ static irqreturn_t rockchip_pcie_rc_sys_irq_thread(int irq, void *arg) if (reg & PCIE_RDLH_LINK_UP_CHGED) { if (rockchip_pcie_link_up(pci)) { + msleep(PCIE_RESET_CONFIG_WAIT_MS); dev_dbg(dev, "Received Link up event. Starting enumeration!\n"); /* Rescan the bus to enumerate endpoint devices */ pci_lock_rescan_remove(); From 15b6b243cc2b1017cf89e2477aa0b4e1a306a82a Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 25 Jun 2025 12:23:50 +0200 Subject: [PATCH 0353/2411] PCI: qcom: Wait PCIE_RESET_CONFIG_WAIT_MS after link-up IRQ Per PCIe r6.0, sec 6.6.1, software must generally wait a minimum of 100ms (PCIE_RESET_CONFIG_WAIT_MS) after Link training completes before sending a Configuration Request. Prior to 36971d6c5a9a ("PCI: qcom: Don't wait for link if we can detect Link Up"), qcom used dw_pcie_wait_for_link(), which waited between 0 and 90ms after the link came up before we enumerate the bus, and this was apparently enough for most devices. After 36971d6c5a9a, qcom_pcie_global_irq_thread() started enumeration immediately when handling the link-up IRQ, and devices (e.g., Laszlo Fiat's PLEXTOR PX-256M8PeGN NVMe SSD) may not be ready to handle config requests yet. Delay PCIE_RESET_CONFIG_WAIT_MS after the link-up IRQ before starting enumeration. Fixes: 82a823833f4e ("PCI: qcom: Add Qualcomm PCIe controller driver") Signed-off-by: Niklas Cassel Signed-off-by: Manivannan Sadhasivam Reviewed-by: Damien Le Moal Reviewed-by: Wilfred Mallawa Link: https://patch.msgid.link/20250625102347.1205584-13-cassel@kernel.org --- drivers/pci/controller/dwc/pcie-qcom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index c789e3f85655..9b12f2f02042 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -1564,6 +1564,7 @@ static irqreturn_t qcom_pcie_global_irq_thread(int irq, void *data) writel_relaxed(status, pcie->parf + PARF_INT_ALL_CLEAR); if (FIELD_GET(PARF_INT_ALL_LINK_UP, status)) { + msleep(PCIE_RESET_CONFIG_WAIT_MS); dev_dbg(dev, "Received Link up event. Starting enumeration!\n"); /* Rescan the bus to enumerate endpoint devices */ pci_lock_rescan_remove(); From 80dc18a0cba8dea42614f021b20a04354b213d86 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 25 Jun 2025 12:23:51 +0200 Subject: [PATCH 0354/2411] PCI: dwc: Ensure that dw_pcie_wait_for_link() waits 100 ms after link up As per PCIe r6.0, sec 6.6.1, a Downstream Port that supports Link speeds greater than 5.0 GT/s, software must wait a minimum of 100 ms after Link training completes before sending a Configuration Request. Add this delay in dw_pcie_wait_for_link(), after the link is reported as up. The delay will only be performed in the success case where the link came up. DWC glue drivers that have a link up IRQ (drivers that set use_linkup_irq = true) do not call dw_pcie_wait_for_link(), instead they perform this delay in their threaded link up IRQ handler. Signed-off-by: Niklas Cassel Signed-off-by: Manivannan Sadhasivam Reviewed-by: Damien Le Moal Reviewed-by: Wilfred Mallawa Link: https://patch.msgid.link/20250625102347.1205584-14-cassel@kernel.org --- drivers/pci/controller/dwc/pcie-designware.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c index 4d794964fa0f..053e9c540439 100644 --- a/drivers/pci/controller/dwc/pcie-designware.c +++ b/drivers/pci/controller/dwc/pcie-designware.c @@ -714,6 +714,14 @@ int dw_pcie_wait_for_link(struct dw_pcie *pci) return -ETIMEDOUT; } + /* + * As per PCIe r6.0, sec 6.6.1, a Downstream Port that supports Link + * speeds greater than 5.0 GT/s, software must wait a minimum of 100 ms + * after Link training completes before sending a Configuration Request. + */ + if (pci->max_link_speed > 2) + msleep(PCIE_RESET_CONFIG_WAIT_MS); + offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); val = dw_pcie_readw_dbi(pci, offset + PCI_EXP_LNKSTA); From d7467bc72ce4e3f64062017d6c9ae3816e8a7b0e Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 25 Jun 2025 12:23:52 +0200 Subject: [PATCH 0355/2411] PCI: Move link up wait time and max retries macros to pci.h Move the LINK_WAIT_SLEEP_MS and LINK_WAIT_MAX_RETRIES macros to pci.h. Prefix the macros with PCIE_ in order to avoid redefining these for drivers that already have macros named like this. No functional changes. Suggested-by: Manivannan Sadhasivam Signed-off-by: Niklas Cassel Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250625102347.1205584-15-cassel@kernel.org --- drivers/pci/controller/dwc/pcie-designware.c | 6 +++--- drivers/pci/controller/dwc/pcie-designware.h | 4 ---- drivers/pci/pci.h | 4 ++++ 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c index 053e9c540439..89aad5a08928 100644 --- a/drivers/pci/controller/dwc/pcie-designware.c +++ b/drivers/pci/controller/dwc/pcie-designware.c @@ -702,14 +702,14 @@ int dw_pcie_wait_for_link(struct dw_pcie *pci) int retries; /* Check if the link is up or not */ - for (retries = 0; retries < LINK_WAIT_MAX_RETRIES; retries++) { + for (retries = 0; retries < PCIE_LINK_WAIT_MAX_RETRIES; retries++) { if (dw_pcie_link_up(pci)) break; - msleep(LINK_WAIT_SLEEP_MS); + msleep(PCIE_LINK_WAIT_SLEEP_MS); } - if (retries >= LINK_WAIT_MAX_RETRIES) { + if (retries >= PCIE_LINK_WAIT_MAX_RETRIES) { dev_info(pci->dev, "Phy link never came up\n"); return -ETIMEDOUT; } diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h index ce9e18554e42..1bf1e08ab4c3 100644 --- a/drivers/pci/controller/dwc/pcie-designware.h +++ b/drivers/pci/controller/dwc/pcie-designware.h @@ -62,10 +62,6 @@ #define dw_pcie_cap_set(_pci, _cap) \ set_bit(DW_PCIE_CAP_ ## _cap, &(_pci)->caps) -/* Parameters for the waiting for link up routine */ -#define LINK_WAIT_MAX_RETRIES 10 -#define LINK_WAIT_SLEEP_MS 90 - /* Parameters for the waiting for iATU enabled routine */ #define LINK_WAIT_MAX_IATU_RETRIES 5 #define LINK_WAIT_IATU 9 diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 819833e57590..43cb77c27ac0 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -56,6 +56,10 @@ struct pcie_tlp_log; */ #define PCIE_RESET_CONFIG_WAIT_MS 100 +/* Parameters for the waiting for link up routine */ +#define PCIE_LINK_WAIT_MAX_RETRIES 10 +#define PCIE_LINK_WAIT_SLEEP_MS 90 + /* Message Routing (r[2:0]); PCIe r6.0, sec 2.2.8 */ #define PCIE_MSG_TYPE_R_RC 0 #define PCIE_MSG_TYPE_R_ADDR 1 From c8edb80494407f65a253ea63ffbae3fb831f397a Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Thu, 19 Jun 2025 12:18:15 +0100 Subject: [PATCH 0356/2411] pinctrl: samsung: rename exynosautov920_retention_data to no_retention_data To avoid having an exact copy of this struct for gs101 rename it and use it for both SoCs for eint banks. The purpose of this for exynosautov920 and gs101 is to obtain the PMU syscon for writing the calculated WAKEUP_MASK register(s). Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20250619-gs101-eint-mask-v1-1-89438cfd7499@linaro.org Signed-off-by: Krzysztof Kozlowski --- drivers/pinctrl/samsung/pinctrl-exynos-arm64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c index 9fd894729a7b..5fe7c4b9f7bd 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c @@ -1405,7 +1405,7 @@ static const struct samsung_pin_bank_data exynosautov920_pin_banks7[] = { EXYNOSV920_PIN_BANK_EINTG(8, 0x8000, "gpg1", 0x18, 0x24, 0x28), }; -static const struct samsung_retention_data exynosautov920_retention_data __initconst = { +static const struct samsung_retention_data no_retention_data __initconst = { .regs = NULL, .nr_regs = 0, .value = 0, @@ -1421,7 +1421,7 @@ static const struct samsung_pin_ctrl exynosautov920_pin_ctrl[] = { .eint_wkup_init = exynos_eint_wkup_init, .suspend = exynosautov920_pinctrl_suspend, .resume = exynosautov920_pinctrl_resume, - .retention_data = &exynosautov920_retention_data, + .retention_data = &no_retention_data, }, { /* pin-controller instance 1 AUD data */ .pin_banks = exynosautov920_pin_banks1, @@ -1764,6 +1764,7 @@ static const struct samsung_pin_ctrl gs101_pin_ctrl[] __initconst = { .eint_wkup_init = exynos_eint_wkup_init, .suspend = gs101_pinctrl_suspend, .resume = gs101_pinctrl_resume, + .retention_data = &no_retention_data, }, { /* pin banks of gs101 pin-controller (FAR_ALIVE) */ .pin_banks = gs101_pin_far_alive, @@ -1771,6 +1772,7 @@ static const struct samsung_pin_ctrl gs101_pin_ctrl[] __initconst = { .eint_wkup_init = exynos_eint_wkup_init, .suspend = gs101_pinctrl_suspend, .resume = gs101_pinctrl_resume, + .retention_data = &no_retention_data, }, { /* pin banks of gs101 pin-controller (GSACORE) */ .pin_banks = gs101_pin_gsacore, From 2642f55d44ce563f227dd9c620eda0dec8d882be Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Thu, 19 Jun 2025 12:18:16 +0100 Subject: [PATCH 0357/2411] pinctrl: samsung: add support for gs101 wakeup mask programming gs101 differs to other currently supported SoCs in that it has 3 wakeup mask registers for the 67 external wakeup interrupt pins in alive and far_alive. EINT_WAKEUP_MASK 0x3A80 EINT[31:0] EINT_WAKEUP_MASK2 0x3A84 EINT[63:32] EINT_WAKEUP_MASK3 0x3A88 EINT[66:64] Add gs101 specific callbacks and a dedicated gs101_wkup_irq_chip struct to handle these differences. The current wakeup mask with upstream is programmed as WAKEUP_MASK0[0x3A80] value[0xFFFFFFFF] WAKEUP_MASK1[0x3A84] value[0xF2FFEFFF] WAKEUP_MASK2[0x3A88] value[0xFFFFFFFF] Which corresponds to the following wakeup sources: gpa7-3 vol down gpa8-1 vol up gpa10-1 power gpa8-2 typec-int Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20250619-gs101-eint-mask-v1-2-89438cfd7499@linaro.org Signed-off-by: Krzysztof Kozlowski --- drivers/pinctrl/samsung/pinctrl-exynos.c | 100 +++++++++++++++++--- drivers/pinctrl/samsung/pinctrl-samsung.h | 4 + include/linux/soc/samsung/exynos-regs-pmu.h | 1 + 3 files changed, 91 insertions(+), 14 deletions(-) diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c index f3e1c11abe55..5554768d465f 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos.c @@ -32,18 +32,24 @@ #include "pinctrl-samsung.h" #include "pinctrl-exynos.h" +#define MAX_WAKEUP_REG 3 + struct exynos_irq_chip { struct irq_chip chip; u32 eint_con; u32 eint_mask; u32 eint_pend; - u32 *eint_wake_mask_value; + u32 eint_num_wakeup_reg; u32 eint_wake_mask_reg; void (*set_eint_wakeup_mask)(struct samsung_pinctrl_drv_data *drvdata, struct exynos_irq_chip *irq_chip); }; +static u32 eint_wake_mask_values[MAX_WAKEUP_REG] = { EXYNOS_EINT_WAKEUP_MASK_DISABLED, + EXYNOS_EINT_WAKEUP_MASK_DISABLED, + EXYNOS_EINT_WAKEUP_MASK_DISABLED}; + static inline struct exynos_irq_chip *to_exynos_irq_chip(struct irq_chip *chip) { return container_of(chip, struct exynos_irq_chip, chip); @@ -307,7 +313,7 @@ static const struct exynos_irq_chip exynos_gpio_irq_chip __initconst = { .eint_con = EXYNOS_GPIO_ECON_OFFSET, .eint_mask = EXYNOS_GPIO_EMASK_OFFSET, .eint_pend = EXYNOS_GPIO_EPEND_OFFSET, - /* eint_wake_mask_value not used */ + /* eint_wake_mask_values not used */ }; static int exynos_eint_irq_map(struct irq_domain *h, unsigned int virq, @@ -467,10 +473,55 @@ __init int exynos_eint_gpio_init(struct samsung_pinctrl_drv_data *d) return ret; } +#define BITS_PER_U32 32 +static int gs101_wkup_irq_set_wake(struct irq_data *irqd, unsigned int on) +{ + struct samsung_pin_bank *bank = irq_data_get_irq_chip_data(irqd); + struct samsung_pinctrl_drv_data *d = bank->drvdata; + u32 bit, wakeup_reg, shift; + + bit = bank->eint_num + irqd->hwirq; + wakeup_reg = bit / BITS_PER_U32; + shift = bit - (wakeup_reg * BITS_PER_U32); + + if (!on) + eint_wake_mask_values[wakeup_reg] |= BIT_U32(shift); + else + eint_wake_mask_values[wakeup_reg] &= ~BIT_U32(shift); + + dev_info(d->dev, "wake %s for irq %d\n", str_enabled_disabled(on), + irqd->irq); + + return 0; +} + +static void +gs101_pinctrl_set_eint_wakeup_mask(struct samsung_pinctrl_drv_data *drvdata, + struct exynos_irq_chip *irq_chip) +{ + struct regmap *pmu_regs; + + if (!drvdata->retention_ctrl || !drvdata->retention_ctrl->priv) { + dev_warn(drvdata->dev, + "No PMU syscon available. Wake-up mask will not be set.\n"); + return; + } + + pmu_regs = drvdata->retention_ctrl->priv; + + dev_dbg(drvdata->dev, "Setting external wakeup interrupt mask:\n"); + + for (int i = 0; i < irq_chip->eint_num_wakeup_reg; i++) { + dev_dbg(drvdata->dev, "\tWAKEUP_MASK%d[0x%X] value[0x%X]\n", + i, irq_chip->eint_wake_mask_reg + i * 4, + eint_wake_mask_values[i]); + regmap_write(pmu_regs, irq_chip->eint_wake_mask_reg + i * 4, + eint_wake_mask_values[i]); + } +} + static int exynos_wkup_irq_set_wake(struct irq_data *irqd, unsigned int on) { - struct irq_chip *chip = irq_data_get_irq_chip(irqd); - struct exynos_irq_chip *our_chip = to_exynos_irq_chip(chip); struct samsung_pin_bank *bank = irq_data_get_irq_chip_data(irqd); unsigned long bit = 1UL << (2 * bank->eint_offset + irqd->hwirq); @@ -478,9 +529,9 @@ static int exynos_wkup_irq_set_wake(struct irq_data *irqd, unsigned int on) irqd->irq, bank->name, irqd->hwirq); if (!on) - *our_chip->eint_wake_mask_value |= bit; + eint_wake_mask_values[0] |= bit; else - *our_chip->eint_wake_mask_value &= ~bit; + eint_wake_mask_values[0] &= ~bit; return 0; } @@ -500,10 +551,10 @@ exynos_pinctrl_set_eint_wakeup_mask(struct samsung_pinctrl_drv_data *drvdata, pmu_regs = drvdata->retention_ctrl->priv; dev_info(drvdata->dev, "Setting external wakeup interrupt mask: 0x%x\n", - *irq_chip->eint_wake_mask_value); + eint_wake_mask_values[0]); regmap_write(pmu_regs, irq_chip->eint_wake_mask_reg, - *irq_chip->eint_wake_mask_value); + eint_wake_mask_values[0]); } static void @@ -522,11 +573,10 @@ s5pv210_pinctrl_set_eint_wakeup_mask(struct samsung_pinctrl_drv_data *drvdata, clk_base = (void __iomem *) drvdata->retention_ctrl->priv; - __raw_writel(*irq_chip->eint_wake_mask_value, + __raw_writel(eint_wake_mask_values[0], clk_base + irq_chip->eint_wake_mask_reg); } -static u32 eint_wake_mask_value = EXYNOS_EINT_WAKEUP_MASK_DISABLED; /* * irq_chip for wakeup interrupts */ @@ -544,7 +594,7 @@ static const struct exynos_irq_chip s5pv210_wkup_irq_chip __initconst = { .eint_con = EXYNOS_WKUP_ECON_OFFSET, .eint_mask = EXYNOS_WKUP_EMASK_OFFSET, .eint_pend = EXYNOS_WKUP_EPEND_OFFSET, - .eint_wake_mask_value = &eint_wake_mask_value, + .eint_num_wakeup_reg = 1, /* Only differences with exynos4210_wkup_irq_chip: */ .eint_wake_mask_reg = S5PV210_EINT_WAKEUP_MASK, .set_eint_wakeup_mask = s5pv210_pinctrl_set_eint_wakeup_mask, @@ -564,7 +614,7 @@ static const struct exynos_irq_chip exynos4210_wkup_irq_chip __initconst = { .eint_con = EXYNOS_WKUP_ECON_OFFSET, .eint_mask = EXYNOS_WKUP_EMASK_OFFSET, .eint_pend = EXYNOS_WKUP_EPEND_OFFSET, - .eint_wake_mask_value = &eint_wake_mask_value, + .eint_num_wakeup_reg = 1, .eint_wake_mask_reg = EXYNOS_EINT_WAKEUP_MASK, .set_eint_wakeup_mask = exynos_pinctrl_set_eint_wakeup_mask, }; @@ -583,7 +633,7 @@ static const struct exynos_irq_chip exynos7_wkup_irq_chip __initconst = { .eint_con = EXYNOS7_WKUP_ECON_OFFSET, .eint_mask = EXYNOS7_WKUP_EMASK_OFFSET, .eint_pend = EXYNOS7_WKUP_EPEND_OFFSET, - .eint_wake_mask_value = &eint_wake_mask_value, + .eint_num_wakeup_reg = 1, .eint_wake_mask_reg = EXYNOS5433_EINT_WAKEUP_MASK, .set_eint_wakeup_mask = exynos_pinctrl_set_eint_wakeup_mask, }; @@ -599,13 +649,31 @@ static const struct exynos_irq_chip exynosautov920_wkup_irq_chip __initconst = { .irq_request_resources = exynos_irq_request_resources, .irq_release_resources = exynos_irq_release_resources, }, - .eint_wake_mask_value = &eint_wake_mask_value, + .eint_num_wakeup_reg = 1, .eint_wake_mask_reg = EXYNOS5433_EINT_WAKEUP_MASK, .set_eint_wakeup_mask = exynos_pinctrl_set_eint_wakeup_mask, }; +static const struct exynos_irq_chip gs101_wkup_irq_chip __initconst = { + .chip = { + .name = "gs101_wkup_irq_chip", + .irq_unmask = exynos_irq_unmask, + .irq_mask = exynos_irq_mask, + .irq_ack = exynos_irq_ack, + .irq_set_type = exynos_irq_set_type, + .irq_set_wake = gs101_wkup_irq_set_wake, + .irq_request_resources = exynos_irq_request_resources, + .irq_release_resources = exynos_irq_release_resources, + }, + .eint_num_wakeup_reg = 3, + .eint_wake_mask_reg = GS101_EINT_WAKEUP_MASK, + .set_eint_wakeup_mask = gs101_pinctrl_set_eint_wakeup_mask, +}; + /* list of external wakeup controllers supported */ static const struct of_device_id exynos_wkup_irq_ids[] = { + { .compatible = "google,gs101-wakeup-eint", + .data = &gs101_wkup_irq_chip }, { .compatible = "samsung,s5pv210-wakeup-eint", .data = &s5pv210_wkup_irq_chip }, { .compatible = "samsung,exynos4210-wakeup-eint", @@ -688,6 +756,7 @@ static void exynos_irq_demux_eint16_31(struct irq_desc *desc) chained_irq_exit(chip, desc); } +static int eint_num; /* * exynos_eint_wkup_init() - setup handling of external wakeup interrupts. * @d: driver data of samsung pinctrl driver. @@ -736,6 +805,9 @@ __init int exynos_eint_wkup_init(struct samsung_pinctrl_drv_data *d) return -ENXIO; } + bank->eint_num = eint_num; + eint_num = eint_num + bank->nr_pins; + if (!fwnode_property_present(bank->fwnode, "interrupts")) { bank->eint_type = EINT_TYPE_WKUP_MUX; ++muxed_banks; diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.h b/drivers/pinctrl/samsung/pinctrl-samsung.h index fcc57c244d16..1cabcbe1401a 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.h +++ b/drivers/pinctrl/samsung/pinctrl-samsung.h @@ -141,6 +141,7 @@ struct samsung_pin_bank_type { * @eint_type: type of the external interrupt supported by the bank. * @eint_mask: bit mask of pins which support EINT function. * @eint_offset: SoC-specific EINT register or interrupt offset of bank. + * @eint_num: total number of eint pins. * @eint_con_offset: ExynosAuto SoC-specific EINT control register offset of bank. * @eint_mask_offset: ExynosAuto SoC-specific EINT mask register offset of bank. * @eint_pend_offset: ExynosAuto SoC-specific EINT pend register offset of bank. @@ -156,6 +157,7 @@ struct samsung_pin_bank_data { enum eint_type eint_type; u32 eint_mask; u32 eint_offset; + u32 eint_num; u32 eint_con_offset; u32 eint_mask_offset; u32 eint_pend_offset; @@ -174,6 +176,7 @@ struct samsung_pin_bank_data { * @eint_type: type of the external interrupt supported by the bank. * @eint_mask: bit mask of pins which support EINT function. * @eint_offset: SoC-specific EINT register or interrupt offset of bank. + * @eint_num: total number of eint pins. * @eint_con_offset: ExynosAuto SoC-specific EINT register or interrupt offset of bank. * @eint_mask_offset: ExynosAuto SoC-specific EINT mask register offset of bank. * @eint_pend_offset: ExynosAuto SoC-specific EINT pend register offset of bank. @@ -201,6 +204,7 @@ struct samsung_pin_bank { enum eint_type eint_type; u32 eint_mask; u32 eint_offset; + u32 eint_num; u32 eint_con_offset; u32 eint_mask_offset; u32 eint_pend_offset; diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index 1a2c0e0838f9..938c6db235fb 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -669,6 +669,7 @@ #define GS101_CPU_INFORM(cpu) \ (GS101_CPU0_INFORM + (cpu*4)) #define GS101_SYSTEM_CONFIGURATION (0x3A00) +#define GS101_EINT_WAKEUP_MASK (0x3A80) #define GS101_PHY_CTRL_USB20 (0x3EB0) #define GS101_PHY_CTRL_USBDP (0x3EB4) From 9c9f4a27eb1096beb650f312a1ce996a9960b56c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 24 Jun 2025 14:05:00 -0700 Subject: [PATCH 0358/2411] perf debug: Add function symbols to dump_stack Symbolize stack traces by creating a live machine. Add this functionality to dump_stack and switch dump_stack users to use it. Switch TUI to use it. Add stack traces to the child test function which can be useful to diagnose blocked code. Example output: ``` $ perf test -vv PERF_RECORD_ ... 7: PERF_RECORD_* events & perf_sample fields: 7: PERF_RECORD_* events & perf_sample fields : Running (1 active) ^C Signal (2) while running tests. Terminating tests with the same signal Internal test harness failure. Completing any started tests: : 7: PERF_RECORD_* events & perf_sample fields: ---- unexpected signal (2) ---- #0 0x55788c6210a3 in child_test_sig_handler builtin-test.c:0 #1 0x7fc12fe49df0 in __restore_rt libc_sigaction.c:0 #2 0x7fc12fe99687 in __internal_syscall_cancel cancellation.c:64 #3 0x7fc12fee5f7a in clock_nanosleep@GLIBC_2.2.5 clock_nanosleep.c:72 #4 0x7fc12fef1393 in __nanosleep nanosleep.c:26 #5 0x7fc12ff02d68 in __sleep sleep.c:55 #6 0x55788c63196b in test__PERF_RECORD perf-record.c:0 #7 0x55788c620fb0 in run_test_child builtin-test.c:0 #8 0x55788c5bd18d in start_command run-command.c:127 #9 0x55788c621ef3 in __cmd_test builtin-test.c:0 #10 0x55788c6225bf in cmd_test ??:0 #11 0x55788c5afbd0 in run_builtin perf.c:0 #12 0x55788c5afeeb in handle_internal_command perf.c:0 #13 0x55788c52b383 in main ??:0 #14 0x7fc12fe33ca8 in __libc_start_call_main libc_start_call_main.h:74 #15 0x7fc12fe33d65 in __libc_start_main@@GLIBC_2.34 libc-start.c:128 #16 0x55788c52b9d1 in _start ??:0 ---- unexpected signal (2) ---- #0 0x55788c6210a3 in child_test_sig_handler builtin-test.c:0 #1 0x7fc12fe49df0 in __restore_rt libc_sigaction.c:0 #2 0x7fc12fea3a14 in pthread_sigmask@GLIBC_2.2.5 pthread_sigmask.c:45 #3 0x7fc12fe49fd9 in __GI___sigprocmask sigprocmask.c:26 #4 0x7fc12ff2601b in __longjmp_chk longjmp.c:36 #5 0x55788c6210c0 in print_test_result.isra.0 builtin-test.c:0 #6 0x7fc12fe49df0 in __restore_rt libc_sigaction.c:0 #7 0x7fc12fe99687 in __internal_syscall_cancel cancellation.c:64 #8 0x7fc12fee5f7a in clock_nanosleep@GLIBC_2.2.5 clock_nanosleep.c:72 #9 0x7fc12fef1393 in __nanosleep nanosleep.c:26 #10 0x7fc12ff02d68 in __sleep sleep.c:55 #11 0x55788c63196b in test__PERF_RECORD perf-record.c:0 #12 0x55788c620fb0 in run_test_child builtin-test.c:0 #13 0x55788c5bd18d in start_command run-command.c:127 #14 0x55788c621ef3 in __cmd_test builtin-test.c:0 #15 0x55788c6225bf in cmd_test ??:0 #16 0x55788c5afbd0 in run_builtin perf.c:0 #17 0x55788c5afeeb in handle_internal_command perf.c:0 #18 0x55788c52b383 in main ??:0 #19 0x7fc12fe33ca8 in __libc_start_call_main libc_start_call_main.h:74 #20 0x7fc12fe33d65 in __libc_start_main@@GLIBC_2.34 libc-start.c:128 #21 0x55788c52b9d1 in _start ??:0 7: PERF_RECORD_* events & perf_sample fields : Skip (permissions) ``` Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250624210500.2121303-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/tests/builtin-test.c | 15 +++++++- tools/perf/ui/tui/setup.c | 2 +- tools/perf/util/debug.c | 68 +++++++++++++++++++++++++++------ tools/perf/util/debug.h | 1 + 4 files changed, 73 insertions(+), 13 deletions(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 45d3d8b3317a..80375ca39a37 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -6,6 +6,9 @@ */ #include #include +#ifdef HAVE_BACKTRACE_SUPPORT +#include +#endif #include #include #include @@ -231,6 +234,16 @@ static jmp_buf run_test_jmp_buf; static void child_test_sig_handler(int sig) { +#ifdef HAVE_BACKTRACE_SUPPORT + void *stackdump[32]; + size_t stackdump_size; +#endif + + fprintf(stderr, "\n---- unexpected signal (%d) ----\n", sig); +#ifdef HAVE_BACKTRACE_SUPPORT + stackdump_size = backtrace(stackdump, ARRAY_SIZE(stackdump)); + __dump_stack(stderr, stackdump, stackdump_size); +#endif siglongjmp(run_test_jmp_buf, sig); } @@ -244,7 +257,7 @@ static int run_test_child(struct child_process *process) err = sigsetjmp(run_test_jmp_buf, 1); if (err) { - fprintf(stderr, "\n---- unexpected signal (%d) ----\n", err); + /* Received signal. */ err = err > 0 ? -err : -1; goto err_out; } diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index 16c6eff4d241..022534eed68c 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -108,7 +108,7 @@ static void ui__signal_backtrace(int sig) printf("-------- backtrace --------\n"); size = backtrace(stackdump, ARRAY_SIZE(stackdump)); - backtrace_symbols_fd(stackdump, size, STDOUT_FILENO); + __dump_stack(stdout, stackdump, size); exit(0); } diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index f9ef7d045c92..2878a7363ac8 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -14,11 +14,18 @@ #ifdef HAVE_BACKTRACE_SUPPORT #include #endif +#include "addr_location.h" #include "color.h" -#include "event.h" #include "debug.h" +#include "event.h" +#include "machine.h" +#include "map.h" #include "print_binary.h" +#include "srcline.h" +#include "symbol.h" +#include "synthetic-events.h" #include "target.h" +#include "thread.h" #include "trace-event.h" #include "ui/helpline.h" #include "ui/ui.h" @@ -298,21 +305,60 @@ void perf_debug_setup(void) libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper); } +void __dump_stack(FILE *file, void **stackdump, size_t stackdump_size) +{ + /* TODO: async safety. printf, malloc, etc. aren't safe inside a signal handler. */ + pid_t pid = getpid(); + struct machine *machine = machine__new_live(/*kernel_maps=*/false, pid); + struct thread *thread = NULL; + + if (machine) + thread = machine__find_thread(machine, pid, pid); + +#ifdef HAVE_BACKTRACE_SUPPORT + if (!machine || !thread) { + /* + * Backtrace functions are async signal safe. Fall back on them + * if machine/thread creation fails. + */ + backtrace_symbols_fd(stackdump, stackdump_size, fileno(file)); + machine__delete(machine); + return; + } +#endif + + for (size_t i = 0; i < stackdump_size; i++) { + struct addr_location al; + u64 addr = (u64)(uintptr_t)stackdump[i]; + bool printed = false; + + addr_location__init(&al); + if (thread && thread__find_map(thread, PERF_RECORD_MISC_USER, addr, &al)) { + al.sym = map__find_symbol(al.map, al.addr); + if (al.sym) { + fprintf(file, " #%zd %p in %s ", i, stackdump[i], al.sym->name); + printed = true; + } + } + if (!printed) + fprintf(file, " #%zd %p ", i, stackdump[i]); + + map__fprintf_srcline(al.map, al.addr, "", file); + fprintf(file, "\n"); + addr_location__exit(&al); + } + thread__put(thread); + machine__delete(machine); +} + /* Obtain a backtrace and print it to stdout. */ #ifdef HAVE_BACKTRACE_SUPPORT void dump_stack(void) { - void *array[16]; - size_t size = backtrace(array, ARRAY_SIZE(array)); - char **strings = backtrace_symbols(array, size); - size_t i; + void *stackdump[32]; + size_t size = backtrace(stackdump, ARRAY_SIZE(stackdump)); - printf("Obtained %zd stack frames.\n", size); - - for (i = 0; i < size; i++) - printf("%s\n", strings[i]); - - free(strings); + __dump_stack(stdout, stackdump, size); } #else void dump_stack(void) {} diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index a4026d1fd6a3..6b737e195ce1 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -85,6 +85,7 @@ void debug_set_display_time(bool set); void perf_debug_setup(void); int perf_quiet_option(void); +void __dump_stack(FILE *file, void **stackdump, size_t stackdump_size); void dump_stack(void); void sighandler_dump_stack(int sig); From e1ec69ed5ded5351efb04218dcab9d79ab018ac5 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 24 Jun 2025 16:18:35 -0700 Subject: [PATCH 0359/2411] perf parse-events: Avoid scanning PMUs that can't contain events Add perf_pmus__scan_for_event that only reads sysfs for pmus that could contain a given event. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250624231837.179536-2-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/parse-events.c | 33 ++++++++------- tools/perf/util/pmus.c | 77 ++++++++++++++++++++++++++++++++++ tools/perf/util/pmus.h | 2 + 3 files changed, 97 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d1965a7b97ed..4cd64ffa4fcd 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -490,7 +490,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name, int ret = 0; struct evsel *first_wildcard_match = NULL; - while ((pmu = perf_pmus__scan(pmu)) != NULL) { + while ((pmu = perf_pmus__scan_for_event(pmu, name)) != NULL) { LIST_HEAD(config_terms); struct perf_event_attr attr; @@ -1681,7 +1681,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, INIT_LIST_HEAD(list); - while ((pmu = perf_pmus__scan(pmu)) != NULL) { + while ((pmu = perf_pmus__scan_for_event(pmu, event_name)) != NULL) { + if (parse_events__filter_pmu(parse_state, pmu)) continue; @@ -1760,19 +1761,21 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state pmu = NULL; /* Failed to add, try wildcard expansion of event_or_pmu as a PMU name. */ - while ((pmu = perf_pmus__scan(pmu)) != NULL) { - if (!parse_events__filter_pmu(parse_state, pmu) && - perf_pmu__wildcard_match(pmu, event_or_pmu)) { - if (!parse_events_add_pmu(parse_state, *listp, pmu, - const_parsed_terms, - first_wildcard_match, - /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) { - ok++; - parse_state->wild_card_pmus = true; - } - if (first_wildcard_match == NULL) - first_wildcard_match = - container_of((*listp)->prev, struct evsel, core.node); + while ((pmu = perf_pmus__scan_matching_wildcard(pmu, event_or_pmu)) != NULL) { + + if (parse_events__filter_pmu(parse_state, pmu)) + continue; + + if (!parse_events_add_pmu(parse_state, *listp, pmu, + const_parsed_terms, + first_wildcard_match, + /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) { + ok++; + parse_state->wild_card_pmus = true; + } + if (first_wildcard_match == NULL) { + first_wildcard_match = + container_of((*listp)->prev, struct evsel, core.node); } } if (ok) diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c index 3bbd26fec78a..e0094f56b8e7 100644 --- a/tools/perf/util/pmus.c +++ b/tools/perf/util/pmus.c @@ -19,6 +19,7 @@ #include "tool_pmu.h" #include "print-events.h" #include "strbuf.h" +#include "string2.h" /* * core_pmus: A PMU belongs to core_pmus if it's name is "cpu" or it's sysfs @@ -350,6 +351,82 @@ struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu) return NULL; } +struct perf_pmu *perf_pmus__scan_for_event(struct perf_pmu *pmu, const char *event) +{ + bool use_core_pmus = !pmu || pmu->is_core; + + if (!pmu) { + /* Hwmon filename values that aren't used. */ + enum hwmon_type type; + int number; + /* + * Core PMUs, other sysfs PMUs and tool PMU can take all event + * types or aren't wother optimizing for. + */ + unsigned int to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK | + PERF_TOOL_PMU_TYPE_PE_OTHER_MASK | + PERF_TOOL_PMU_TYPE_TOOL_MASK; + + /* Could the event be a hwmon event? */ + if (parse_hwmon_filename(event, &type, &number, /*item=*/NULL, /*alarm=*/NULL)) + to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK; + + pmu_read_sysfs(to_read_pmus); + pmu = list_prepare_entry(pmu, &core_pmus, list); + } + if (use_core_pmus) { + list_for_each_entry_continue(pmu, &core_pmus, list) + return pmu; + + pmu = NULL; + pmu = list_prepare_entry(pmu, &other_pmus, list); + } + list_for_each_entry_continue(pmu, &other_pmus, list) + return pmu; + return NULL; +} + +struct perf_pmu *perf_pmus__scan_matching_wildcard(struct perf_pmu *pmu, const char *wildcard) +{ + bool use_core_pmus = !pmu || pmu->is_core; + + if (!pmu) { + /* + * Core PMUs, other sysfs PMUs and tool PMU can have any name or + * aren't wother optimizing for. + */ + unsigned int to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK | + PERF_TOOL_PMU_TYPE_PE_OTHER_MASK | + PERF_TOOL_PMU_TYPE_TOOL_MASK; + + /* + * Hwmon PMUs have an alias from a sysfs name like hwmon0, + * hwmon1, etc. or have a name of hwmon_. They therefore + * can only have a wildcard match if the wildcard begins with + * "hwmon". + */ + if (strisglob(wildcard) || + (strlen(wildcard) >= 5 && strncmp("hwmon", wildcard, 5) == 0)) + to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK; + + pmu_read_sysfs(to_read_pmus); + pmu = list_prepare_entry(pmu, &core_pmus, list); + } + if (use_core_pmus) { + list_for_each_entry_continue(pmu, &core_pmus, list) { + if (perf_pmu__wildcard_match(pmu, wildcard)) + return pmu; + } + pmu = NULL; + pmu = list_prepare_entry(pmu, &other_pmus, list); + } + list_for_each_entry_continue(pmu, &other_pmus, list) { + if (perf_pmu__wildcard_match(pmu, wildcard)) + return pmu; + } + return NULL; +} + static struct perf_pmu *perf_pmus__scan_skip_duplicates(struct perf_pmu *pmu) { bool use_core_pmus = !pmu || pmu->is_core; diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h index 8def20e615ad..2794d8c3a466 100644 --- a/tools/perf/util/pmus.h +++ b/tools/perf/util/pmus.h @@ -19,6 +19,8 @@ struct perf_pmu *perf_pmus__find_by_type(unsigned int type); struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu); struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu); +struct perf_pmu *perf_pmus__scan_for_event(struct perf_pmu *pmu, const char *event); +struct perf_pmu *perf_pmus__scan_matching_wildcard(struct perf_pmu *pmu, const char *wildcard); const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str); From 28917cb17f9df9c2fc83449feefa375609b38fa4 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 24 Jun 2025 16:18:36 -0700 Subject: [PATCH 0360/2411] perf drm_pmu: Add a tool like PMU to expose DRM information DRM clients expose information through usage stats as documented in Documentation/gpu/drm-usage-stats.rst (available online at https://docs.kernel.org/gpu/drm-usage-stats.html). Add a tool like PMU, similar to the hwmon PMU, that exposes DRM information. For example on a tigerlake laptop: ``` $ perf list drm List of pre-defined events (to be used in -e or -M): drm: drm-active-stolen-system0 [Total memory active in one or more engines. Unit: drm_i915] drm-active-system0 [Total memory active in one or more engines. Unit: drm_i915] drm-engine-capacity-video [Engine capacity. Unit: drm_i915] drm-engine-copy [Utilization in ns. Unit: drm_i915] drm-engine-render [Utilization in ns. Unit: drm_i915] drm-engine-video [Utilization in ns. Unit: drm_i915] drm-engine-video-enhance [Utilization in ns. Unit: drm_i915] drm-purgeable-stolen-system0 [Size of resident and purgeable memory bufers. Unit: drm_i915] drm-purgeable-system0 [Size of resident and purgeable memory bufers. Unit: drm_i915] drm-resident-stolen-system0 [Size of resident memory bufers. Unit: drm_i915] drm-resident-system0 [Size of resident memory bufers. Unit: drm_i915] drm-shared-stolen-system0 [Size of shared memory bufers. Unit: drm_i915] drm-shared-system0 [Size of shared memory bufers. Unit: drm_i915] drm-total-stolen-system0 [Size of shared and private memory. Unit: drm_i915] drm-total-system0 [Size of shared and private memory. Unit: drm_i915] ``` System wide data can be gathered: ``` $ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0 1.000904910,0,bytes,drm-active-stolen-system0,1,100.00,, 1.000904910,0,bytes,drm-active-system0,1,100.00,, 1.000904910,36,capacity,drm-engine-capacity-video,1,100.00,, 1.000904910,0,ns,drm-engine-copy,1,100.00,, 1.000904910,1472970566175,ns,drm-engine-render,1,100.00,, 1.000904910,0,ns,drm-engine-video,1,100.00,, 1.000904910,0,ns,drm-engine-video-enhance,1,100.00,, 1.000904910,0,bytes,drm-purgeable-stolen-system0,1,100.00,, 1.000904910,38199296,bytes,drm-purgeable-system0,1,100.00,, 1.000904910,0,bytes,drm-resident-stolen-system0,1,100.00,, 1.000904910,4643196928,bytes,drm-resident-system0,1,100.00,, 1.000904910,0,bytes,drm-shared-stolen-system0,1,100.00,, 1.000904910,1886871552,bytes,drm-shared-system0,1,100.00,, 1.000904910,0,bytes,drm-total-stolen-system0,1,100.00,, 1.000904910,4643196928,bytes,drm-total-system0,1,100.00,, 2.264426839,0,bytes,drm-active-stolen-system0,1,100.00,, ``` Or for a particular process: ``` $ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0 -p 200027 1.001040274,0,bytes,drm-active-stolen-system0,6,100.00,, 1.001040274,0,bytes,drm-active-system0,6,100.00,, 1.001040274,12,capacity,drm-engine-capacity-video,6,100.00,, 1.001040274,0,ns,drm-engine-copy,6,100.00,, 1.001040274,1542300,ns,drm-engine-render,6,100.00,, 1.001040274,0,ns,drm-engine-video,6,100.00,, 1.001040274,0,ns,drm-engine-video-enhance,6,100.00,, 1.001040274,0,bytes,drm-purgeable-stolen-system0,6,100.00,, 1.001040274,13516800,bytes,drm-purgeable-system0,6,100.00,, 1.001040274,0,bytes,drm-resident-stolen-system0,6,100.00,, 1.001040274,27746304,bytes,drm-resident-system0,6,100.00,, 1.001040274,0,bytes,drm-shared-stolen-system0,6,100.00,, 1.001040274,0,bytes,drm-shared-system0,6,100.00,, 1.001040274,0,bytes,drm-total-stolen-system0,6,100.00,, 1.001040274,27746304,bytes,drm-total-system0,6,100.00,, 2.016629075,0,bytes,drm-active-stolen-system0,6,100.00,, ``` As with the hwmon PMU, high numbered PMU types are used to encode multiple possible "DRM" PMUs. The appropriate fdinfo is found by scanning /proc and filtering which fdinfos to read with stat. To avoid some unneeding scanning, events not starting with "drm-" are ignored. The patch builds on commit 57e13264dcea ("perf pmus: Restructure pmu_read_sysfs to scan fewer PMUs") and later so that only if full wild carding is being done, the PMU starts with "drm_" or the event starts with "drm-" will /proc be scanned. That is there should be little to no cost in this PMU unless DRM events are requested. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250624231837.179536-3-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/Build | 1 + tools/perf/util/drm_pmu.c | 686 ++++++++++++++++++++++++++++++++++++++ tools/perf/util/drm_pmu.h | 39 +++ tools/perf/util/evsel.c | 9 + tools/perf/util/pmu.c | 15 + tools/perf/util/pmu.h | 4 +- tools/perf/util/pmus.c | 30 +- 7 files changed, 779 insertions(+), 5 deletions(-) create mode 100644 tools/perf/util/drm_pmu.c create mode 100644 tools/perf/util/drm_pmu.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 7910d908c814..8a23eb767fb2 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -84,6 +84,7 @@ perf-util-y += pmu.o perf-util-y += pmus.o perf-util-y += pmu-flex.o perf-util-y += pmu-bison.o +perf-util-y += drm_pmu.o perf-util-y += hwmon_pmu.o perf-util-y += tool_pmu.o perf-util-y += svghelper.o diff --git a/tools/perf/util/drm_pmu.c b/tools/perf/util/drm_pmu.c new file mode 100644 index 000000000000..17385a10005b --- /dev/null +++ b/tools/perf/util/drm_pmu.c @@ -0,0 +1,686 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#include "drm_pmu.h" +#include "counts.h" +#include "cpumap.h" +#include "debug.h" +#include "evsel.h" +#include "pmu.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum drm_pmu_unit { + DRM_PMU_UNIT_BYTES, + DRM_PMU_UNIT_CAPACITY, + DRM_PMU_UNIT_CYCLES, + DRM_PMU_UNIT_HZ, + DRM_PMU_UNIT_NS, + + DRM_PMU_UNIT_MAX, +}; + +struct drm_pmu_event { + const char *name; + const char *desc; + enum drm_pmu_unit unit; +}; + +struct drm_pmu { + struct perf_pmu pmu; + struct drm_pmu_event *events; + int num_events; +}; + +static const char * const drm_pmu_unit_strs[DRM_PMU_UNIT_MAX] = { + "bytes", + "capacity", + "cycles", + "hz", + "ns", +}; + +static const char * const drm_pmu_scale_unit_strs[DRM_PMU_UNIT_MAX] = { + "1bytes", + "1capacity", + "1cycles", + "1hz", + "1ns", +}; + +bool perf_pmu__is_drm(const struct perf_pmu *pmu) +{ + return pmu && pmu->type >= PERF_PMU_TYPE_DRM_START && + pmu->type <= PERF_PMU_TYPE_DRM_END; +} + +bool evsel__is_drm(const struct evsel *evsel) +{ + return perf_pmu__is_drm(evsel->pmu); +} + +static struct drm_pmu *add_drm_pmu(struct list_head *pmus, char *line, size_t line_len) +{ + struct drm_pmu *drm; + struct perf_pmu *pmu; + const char *name; + __u32 max_drm_pmu_type = 0, type; + int i = 12; + + if (line[line_len - 1] == '\n') + line[line_len - 1] = '\0'; + while (isspace(line[i])) + i++; + + line[--i] = '_'; + line[--i] = 'm'; + line[--i] = 'r'; + line[--i] = 'd'; + name = &line[i]; + + list_for_each_entry(pmu, pmus, list) { + if (!perf_pmu__is_drm(pmu)) + continue; + if (pmu->type > max_drm_pmu_type) + max_drm_pmu_type = pmu->type; + if (!strcmp(pmu->name, name)) { + /* PMU already exists. */ + return NULL; + } + } + + if (max_drm_pmu_type != 0) + type = max_drm_pmu_type + 1; + else + type = PERF_PMU_TYPE_DRM_START; + + if (type > PERF_PMU_TYPE_DRM_END) { + zfree(&drm); + pr_err("Unable to encode DRM PMU type for %s\n", name); + return NULL; + } + + drm = zalloc(sizeof(*drm)); + if (!drm) + return NULL; + + if (perf_pmu__init(&drm->pmu, type, name) != 0) { + perf_pmu__delete(&drm->pmu); + return NULL; + } + + drm->pmu.cpus = perf_cpu_map__new("0"); + if (!drm->pmu.cpus) { + perf_pmu__delete(&drm->pmu); + return NULL; + } + return drm; +} + + +static bool starts_with(const char *str, const char *prefix) +{ + return !strncmp(prefix, str, strlen(prefix)); +} + +static int add_event(struct drm_pmu_event **events, int *num_events, + const char *line, enum drm_pmu_unit unit, const char *desc) +{ + const char *colon = strchr(line, ':'); + struct drm_pmu_event *tmp; + + if (!colon) + return -EINVAL; + + tmp = reallocarray(*events, *num_events + 1, sizeof(struct drm_pmu_event)); + if (!tmp) + return -ENOMEM; + tmp[*num_events].unit = unit; + tmp[*num_events].desc = desc; + tmp[*num_events].name = strndup(line, colon - line); + if (!tmp[*num_events].name) + return -ENOMEM; + (*num_events)++; + *events = tmp; + return 0; +} + +static int read_drm_pmus_cb(void *args, int fdinfo_dir_fd, const char *fd_name) +{ + struct list_head *pmus = args; + char buf[640]; + struct io io; + char *line = NULL; + size_t line_len; + struct drm_pmu *drm = NULL; + struct drm_pmu_event *events = NULL; + int num_events = 0; + + io__init(&io, openat(fdinfo_dir_fd, fd_name, O_RDONLY), buf, sizeof(buf)); + if (io.fd == -1) { + /* Failed to open file, ignore. */ + return 0; + } + + while (io__getline(&io, &line, &line_len) > 0) { + if (starts_with(line, "drm-driver:")) { + drm = add_drm_pmu(pmus, line, line_len); + if (!drm) + break; + continue; + } + /* + * Note the string matching below is alphabetical, with more + * specific matches appearing before less specific. + */ + if (starts_with(line, "drm-active-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, + "Total memory active in one or more engines"); + continue; + } + if (starts_with(line, "drm-cycles-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_CYCLES, + "Busy cycles"); + continue; + } + if (starts_with(line, "drm-engine-capacity-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_CAPACITY, + "Engine capacity"); + continue; + } + if (starts_with(line, "drm-engine-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_NS, + "Utilization in ns"); + continue; + } + if (starts_with(line, "drm-maxfreq-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_HZ, + "Maximum frequency"); + continue; + } + if (starts_with(line, "drm-purgeable-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, + "Size of resident and purgeable memory bufers"); + continue; + } + if (starts_with(line, "drm-resident-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, + "Size of resident memory bufers"); + continue; + } + if (starts_with(line, "drm-shared-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, + "Size of shared memory bufers"); + continue; + } + if (starts_with(line, "drm-total-cycles-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, + "Total busy cycles"); + continue; + } + if (starts_with(line, "drm-total-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, + "Size of shared and private memory"); + continue; + } + if (verbose > 1 && starts_with(line, "drm-") && + !starts_with(line, "drm-client-id:") && + !starts_with(line, "drm-pdev:")) + pr_debug("Unhandled DRM PMU fdinfo line match '%s'\n", line); + } + if (drm) { + drm->events = events; + drm->num_events = num_events; + list_add_tail(&drm->pmu.list, pmus); + } + free(line); + if (io.fd != -1) + close(io.fd); + return 0; +} + +void drm_pmu__exit(struct perf_pmu *pmu) +{ + struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu); + + free(drm->events); +} + +bool drm_pmu__have_event(const struct perf_pmu *pmu, const char *name) +{ + struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu); + + if (!starts_with(name, "drm-")) + return false; + + for (int i = 0; i < drm->num_events; i++) { + if (!strcasecmp(drm->events[i].name, name)) + return true; + } + return false; +} + +int drm_pmu__for_each_event(const struct perf_pmu *pmu, void *state, pmu_event_callback cb) +{ + struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu); + + for (int i = 0; i < drm->num_events; i++) { + char encoding_buf[128]; + struct pmu_event_info info = { + .pmu = pmu, + .name = drm->events[i].name, + .alias = NULL, + .scale_unit = drm_pmu_scale_unit_strs[drm->events[i].unit], + .desc = drm->events[i].desc, + .long_desc = NULL, + .encoding_desc = encoding_buf, + .topic = "drm", + .pmu_name = pmu->name, + .event_type_desc = "DRM event", + }; + int ret; + + snprintf(encoding_buf, sizeof(encoding_buf), "%s/config=0x%x/", pmu->name, i); + + ret = cb(state, &info); + if (ret) + return ret; + } + return 0; +} + +size_t drm_pmu__num_events(const struct perf_pmu *pmu) +{ + const struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu); + + return drm->num_events; +} + +static int drm_pmu__index_for_event(const struct drm_pmu *drm, const char *name) +{ + for (int i = 0; i < drm->num_events; i++) { + if (!strcmp(drm->events[i].name, name)) + return i; + } + return -1; +} + +static int drm_pmu__config_term(const struct drm_pmu *drm, + struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) +{ + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) { + int i = drm_pmu__index_for_event(drm, term->config); + + if (i >= 0) { + attr->config = i; + return 0; + } + } + if (err) { + char *err_str; + + parse_events_error__handle(err, term->err_val, + asprintf(&err_str, + "unexpected drm event term (%s) %s", + parse_events__term_type_str(term->type_term), + term->config) < 0 + ? strdup("unexpected drm event term") + : err_str, + NULL); + } + return -EINVAL; +} + +int drm_pmu__config_terms(const struct perf_pmu *pmu, + struct perf_event_attr *attr, + struct parse_events_terms *terms, + struct parse_events_error *err) +{ + struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu); + struct parse_events_term *term; + + list_for_each_entry(term, &terms->terms, list) { + if (drm_pmu__config_term(drm, attr, term, err)) + return -EINVAL; + } + + return 0; +} + +int drm_pmu__check_alias(const struct perf_pmu *pmu, struct parse_events_terms *terms, + struct perf_pmu_info *info, struct parse_events_error *err) +{ + struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu); + struct parse_events_term *term = + list_first_entry(&terms->terms, struct parse_events_term, list); + + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) { + int i = drm_pmu__index_for_event(drm, term->config); + + if (i >= 0) { + info->unit = drm_pmu_unit_strs[drm->events[i].unit]; + info->scale = 1; + return 0; + } + } + if (err) { + char *err_str; + + parse_events_error__handle(err, term->err_val, + asprintf(&err_str, + "unexpected drm event term (%s) %s", + parse_events__term_type_str(term->type_term), + term->config) < 0 + ? strdup("unexpected drm event term") + : err_str, + NULL); + } + return -EINVAL; +} + +struct minor_info { + unsigned int *minors; + int minors_num, minors_len; +}; + +static int for_each_drm_fdinfo_in_dir(int (*cb)(void *args, int fdinfo_dir_fd, const char *fd_name), + void *args, int proc_dir, const char *pid_name, + struct minor_info *minors) +{ + char buf[256]; + DIR *fd_dir; + struct dirent *fd_entry; + int fd_dir_fd, fdinfo_dir_fd = -1; + + + scnprintf(buf, sizeof(buf), "%s/fd", pid_name); + fd_dir_fd = openat(proc_dir, buf, O_DIRECTORY); + if (fd_dir_fd == -1) + return 0; /* Presumably lost race to open. */ + fd_dir = fdopendir(fd_dir_fd); + if (!fd_dir) { + close(fd_dir_fd); + return -ENOMEM; + } + while ((fd_entry = readdir(fd_dir)) != NULL) { + struct stat stat; + unsigned int minor; + bool is_dup = false; + int ret; + + if (fd_entry->d_type != DT_LNK) + continue; + + if (fstatat(fd_dir_fd, fd_entry->d_name, &stat, 0) != 0) + continue; + + if ((stat.st_mode & S_IFMT) != S_IFCHR || major(stat.st_rdev) != 226) + continue; + + minor = minor(stat.st_rdev); + for (int i = 0; i < minors->minors_num; i++) { + if (minor(stat.st_rdev) == minors->minors[i]) { + is_dup = true; + break; + } + } + if (is_dup) + continue; + + if (minors->minors_num == minors->minors_len) { + unsigned int *tmp = reallocarray(minors->minors, minors->minors_len + 4, + sizeof(unsigned int)); + + if (tmp) { + minors->minors = tmp; + minors->minors_len += 4; + } + } + minors->minors[minors->minors_num++] = minor; + if (fdinfo_dir_fd == -1) { + /* Open fdinfo dir if we have a DRM fd. */ + scnprintf(buf, sizeof(buf), "%s/fdinfo", pid_name); + fdinfo_dir_fd = openat(proc_dir, buf, O_DIRECTORY); + if (fdinfo_dir_fd == -1) + continue; + } + ret = cb(args, fdinfo_dir_fd, fd_entry->d_name); + if (ret) + return ret; + } + if (fdinfo_dir_fd != -1) + close(fdinfo_dir_fd); + closedir(fd_dir); + return 0; +} + +static int for_each_drm_fdinfo(bool skip_all_duplicates, + int (*cb)(void *args, int fdinfo_dir_fd, const char *fd_name), + void *args) +{ + DIR *proc_dir; + struct dirent *proc_entry; + int ret; + /* + * minors maintains an array of DRM minor device numbers seen for a pid, + * or for all pids if skip_all_duplicates is true, so that duplicates + * are ignored. + */ + struct minor_info minors = { + .minors = NULL, + .minors_num = 0, + .minors_len = 0, + }; + + proc_dir = opendir(procfs__mountpoint()); + if (!proc_dir) + return 0; + + /* Walk through the /proc directory. */ + while ((proc_entry = readdir(proc_dir)) != NULL) { + if (proc_entry->d_type != DT_DIR || + !isdigit(proc_entry->d_name[0])) + continue; + if (!skip_all_duplicates) { + /* Reset the seen minor numbers for each pid. */ + minors.minors_num = 0; + } + ret = for_each_drm_fdinfo_in_dir(cb, args, + dirfd(proc_dir), proc_entry->d_name, + &minors); + if (ret) + break; + } + free(minors.minors); + closedir(proc_dir); + return ret; +} + +int perf_pmus__read_drm_pmus(struct list_head *pmus) +{ + return for_each_drm_fdinfo(/*skip_all_duplicates=*/true, read_drm_pmus_cb, pmus); +} + +int evsel__drm_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx) +{ + (void)evsel; + (void)threads; + (void)start_cpu_map_idx; + (void)end_cpu_map_idx; + return 0; +} + +static uint64_t read_count_and_apply_unit(const char *count_and_unit, enum drm_pmu_unit unit) +{ + char *unit_ptr = NULL; + uint64_t count = strtoul(count_and_unit, &unit_ptr, 10); + + if (!unit_ptr) + return 0; + + while (isblank(*unit_ptr)) + unit_ptr++; + + switch (unit) { + case DRM_PMU_UNIT_BYTES: + if (*unit_ptr == '\0') + assert(count == 0); /* Generally undocumented, happens for 0. */ + else if (!strcmp(unit_ptr, "KiB")) + count *= 1024; + else if (!strcmp(unit_ptr, "MiB")) + count *= 1024 * 1024; + else + pr_err("Unexpected bytes unit '%s'\n", unit_ptr); + break; + case DRM_PMU_UNIT_CAPACITY: + /* No units expected. */ + break; + case DRM_PMU_UNIT_CYCLES: + /* No units expected. */ + break; + case DRM_PMU_UNIT_HZ: + if (!strcmp(unit_ptr, "Hz")) + count *= 1; + else if (!strcmp(unit_ptr, "KHz")) + count *= 1000; + else if (!strcmp(unit_ptr, "MHz")) + count *= 1000000; + else + pr_err("Unexpected hz unit '%s'\n", unit_ptr); + break; + case DRM_PMU_UNIT_NS: + /* Only unit ns expected. */ + break; + case DRM_PMU_UNIT_MAX: + default: + break; + } + return count; +} + +static uint64_t read_drm_event(int fdinfo_dir_fd, const char *fd_name, + const char *match, enum drm_pmu_unit unit) +{ + char buf[640]; + struct io io; + char *line = NULL; + size_t line_len; + uint64_t count = 0; + + io__init(&io, openat(fdinfo_dir_fd, fd_name, O_RDONLY), buf, sizeof(buf)); + if (io.fd == -1) { + /* Failed to open file, ignore. */ + return 0; + } + while (io__getline(&io, &line, &line_len) > 0) { + size_t i = strlen(match); + + if (strncmp(line, match, i)) + continue; + if (line[i] != ':') + continue; + while (isblank(line[++i])) + ; + if (line[line_len - 1] == '\n') + line[line_len - 1] = '\0'; + count = read_count_and_apply_unit(&line[i], unit); + break; + } + free(line); + close(io.fd); + return count; +} + +struct read_drm_event_cb_args { + const char *match; + uint64_t count; + enum drm_pmu_unit unit; +}; + +static int read_drm_event_cb(void *vargs, int fdinfo_dir_fd, const char *fd_name) +{ + struct read_drm_event_cb_args *args = vargs; + + args->count += read_drm_event(fdinfo_dir_fd, fd_name, args->match, args->unit); + return 0; +} + +static uint64_t drm_pmu__read_system_wide(struct drm_pmu *drm, struct evsel *evsel) +{ + struct read_drm_event_cb_args args = { + .count = 0, + .match = drm->events[evsel->core.attr.config].name, + .unit = drm->events[evsel->core.attr.config].unit, + }; + + for_each_drm_fdinfo(/*skip_all_duplicates=*/false, read_drm_event_cb, &args); + return args.count; +} + +static uint64_t drm_pmu__read_for_pid(struct drm_pmu *drm, struct evsel *evsel, int pid) +{ + struct read_drm_event_cb_args args = { + .count = 0, + .match = drm->events[evsel->core.attr.config].name, + .unit = drm->events[evsel->core.attr.config].unit, + }; + struct minor_info minors = { + .minors = NULL, + .minors_num = 0, + .minors_len = 0, + }; + int proc_dir = open(procfs__mountpoint(), O_DIRECTORY); + char pid_name[12]; + int ret; + + if (proc_dir < 0) + return 0; + + snprintf(pid_name, sizeof(pid_name), "%d", pid); + ret = for_each_drm_fdinfo_in_dir(read_drm_event_cb, &args, proc_dir, pid_name, &minors); + free(minors.minors); + close(proc_dir); + return ret == 0 ? args.count : 0; +} + +int evsel__drm_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) +{ + struct drm_pmu *drm = container_of(evsel->pmu, struct drm_pmu, pmu); + struct perf_counts_values *count, *old_count = NULL; + int pid = perf_thread_map__pid(evsel->core.threads, thread); + uint64_t counter; + + if (pid != -1) + counter = drm_pmu__read_for_pid(drm, evsel, pid); + else + counter = drm_pmu__read_system_wide(drm, evsel); + + if (evsel->prev_raw_counts) + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + + count = perf_counts(evsel->counts, cpu_map_idx, thread); + if (old_count) { + count->val = old_count->val + counter; + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + } else { + count->val = counter; + count->run++; + count->ena++; + } + return 0; +} diff --git a/tools/perf/util/drm_pmu.h b/tools/perf/util/drm_pmu.h new file mode 100644 index 000000000000..e7f366fca8a4 --- /dev/null +++ b/tools/perf/util/drm_pmu.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __DRM_PMU_H +#define __DRM_PMU_H +/* + * Linux DRM clients expose information through usage stats as documented in + * Documentation/gpu/drm-usage-stats.rst (available online at + * https://docs.kernel.org/gpu/drm-usage-stats.html). This is a tool like PMU + * that exposes DRM information. + */ + +#include "pmu.h" +#include + +struct list_head; +struct perf_thread_map; + +void drm_pmu__exit(struct perf_pmu *pmu); +bool drm_pmu__have_event(const struct perf_pmu *pmu, const char *name); +int drm_pmu__for_each_event(const struct perf_pmu *pmu, void *state, pmu_event_callback cb); +size_t drm_pmu__num_events(const struct perf_pmu *pmu); +int drm_pmu__config_terms(const struct perf_pmu *pmu, + struct perf_event_attr *attr, + struct parse_events_terms *terms, + struct parse_events_error *err); +int drm_pmu__check_alias(const struct perf_pmu *pmu, struct parse_events_terms *terms, + struct perf_pmu_info *info, struct parse_events_error *err); + + +bool perf_pmu__is_drm(const struct perf_pmu *pmu); +bool evsel__is_drm(const struct evsel *evsel); + +int perf_pmus__read_drm_pmus(struct list_head *pmus); + +int evsel__drm_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx); +int evsel__drm_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread); + +#endif /* __DRM_PMU_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index d55482f094bf..9c50c3960487 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -56,6 +56,7 @@ #include "off_cpu.h" #include "pmu.h" #include "pmus.h" +#include "drm_pmu.h" #include "hwmon_pmu.h" #include "tool_pmu.h" #include "rlimit.h" @@ -1889,6 +1890,9 @@ int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread) if (evsel__is_hwmon(evsel)) return evsel__hwmon_pmu_read(evsel, cpu_map_idx, thread); + if (evsel__is_drm(evsel)) + return evsel__drm_pmu_read(evsel, cpu_map_idx, thread); + if (evsel__is_retire_lat(evsel)) return evsel__tpebs_read(evsel, cpu_map_idx, thread); @@ -2610,6 +2614,11 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, start_cpu_map_idx, end_cpu_map_idx); } + if (evsel__is_drm(evsel)) { + return evsel__drm_pmu_open(evsel, threads, + start_cpu_map_idx, + end_cpu_map_idx); + } for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { cpu = perf_cpu_map__cpu(cpus, idx); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 609828513f6c..f795883c233f 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -20,6 +20,7 @@ #include "debug.h" #include "evsel.h" #include "pmu.h" +#include "drm_pmu.h" #include "hwmon_pmu.h" #include "pmus.h" #include "tool_pmu.h" @@ -1627,6 +1628,8 @@ int perf_pmu__config_terms(const struct perf_pmu *pmu, if (perf_pmu__is_hwmon(pmu)) return hwmon_pmu__config_terms(pmu, attr, terms, err); + if (perf_pmu__is_drm(pmu)) + return drm_pmu__config_terms(pmu, attr, terms, err); list_for_each_entry(term, &terms->terms, list) { if (pmu_config_term(pmu, attr, term, terms, zero, apply_hardcoded, err)) @@ -1767,6 +1770,10 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ ret = hwmon_pmu__check_alias(head_terms, info, err); goto out; } + if (perf_pmu__is_drm(pmu)) { + ret = drm_pmu__check_alias(pmu, head_terms, info, err); + goto out; + } /* Fake PMU doesn't rewrite terms. */ if (perf_pmu__is_fake(pmu)) @@ -1949,6 +1956,8 @@ bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name) return false; if (perf_pmu__is_hwmon(pmu)) return hwmon_pmu__have_event(pmu, name); + if (perf_pmu__is_drm(pmu)) + return drm_pmu__have_event(pmu, name); if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL) return true; if (pmu->cpu_aliases_added || !pmu->events_table) @@ -1962,6 +1971,8 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu) if (perf_pmu__is_hwmon(pmu)) return hwmon_pmu__num_events(pmu); + if (perf_pmu__is_drm(pmu)) + return drm_pmu__num_events(pmu); pmu_aliases_parse(pmu); nr = pmu->sysfs_aliases + pmu->sys_json_aliases; @@ -2030,6 +2041,8 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, if (perf_pmu__is_hwmon(pmu)) return hwmon_pmu__for_each_event(pmu, state, cb); + if (perf_pmu__is_drm(pmu)) + return drm_pmu__for_each_event(pmu, state, cb); strbuf_init(&sb, /*hint=*/ 0); pmu_aliases_parse(pmu); @@ -2511,6 +2524,8 @@ void perf_pmu__delete(struct perf_pmu *pmu) if (perf_pmu__is_hwmon(pmu)) hwmon_pmu__exit(pmu); + else if (perf_pmu__is_drm(pmu)) + drm_pmu__exit(pmu); perf_pmu__del_formats(&pmu->format); perf_pmu__del_aliases(pmu); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 71b8636fd07d..a4a08192154c 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -39,7 +39,9 @@ struct perf_pmu_caps { enum { PERF_PMU_TYPE_PE_START = 0, - PERF_PMU_TYPE_PE_END = 0xFFFEFFFF, + PERF_PMU_TYPE_PE_END = 0xFFFDFFFF, + PERF_PMU_TYPE_DRM_START = 0xFFFE0000, + PERF_PMU_TYPE_DRM_END = 0xFFFEFFFF, PERF_PMU_TYPE_HWMON_START = 0xFFFF0000, PERF_PMU_TYPE_HWMON_END = 0xFFFFFFFD, PERF_PMU_TYPE_TOOL = 0xFFFFFFFE, diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c index e0094f56b8e7..81c2ed689db2 100644 --- a/tools/perf/util/pmus.c +++ b/tools/perf/util/pmus.c @@ -12,6 +12,7 @@ #include #include "cpumap.h" #include "debug.h" +#include "drm_pmu.h" #include "evsel.h" #include "pmus.h" #include "pmu.h" @@ -43,16 +44,19 @@ enum perf_tool_pmu_type { PERF_TOOL_PMU_TYPE_PE_OTHER, PERF_TOOL_PMU_TYPE_TOOL, PERF_TOOL_PMU_TYPE_HWMON, + PERF_TOOL_PMU_TYPE_DRM, #define PERF_TOOL_PMU_TYPE_PE_CORE_MASK (1 << PERF_TOOL_PMU_TYPE_PE_CORE) #define PERF_TOOL_PMU_TYPE_PE_OTHER_MASK (1 << PERF_TOOL_PMU_TYPE_PE_OTHER) #define PERF_TOOL_PMU_TYPE_TOOL_MASK (1 << PERF_TOOL_PMU_TYPE_TOOL) #define PERF_TOOL_PMU_TYPE_HWMON_MASK (1 << PERF_TOOL_PMU_TYPE_HWMON) +#define PERF_TOOL_PMU_TYPE_DRM_MASK (1 << PERF_TOOL_PMU_TYPE_DRM) #define PERF_TOOL_PMU_TYPE_ALL_MASK (PERF_TOOL_PMU_TYPE_PE_CORE_MASK | \ PERF_TOOL_PMU_TYPE_PE_OTHER_MASK | \ PERF_TOOL_PMU_TYPE_TOOL_MASK | \ - PERF_TOOL_PMU_TYPE_HWMON_MASK) + PERF_TOOL_PMU_TYPE_HWMON_MASK | \ + PERF_TOOL_PMU_TYPE_DRM_MASK) }; static unsigned int read_pmu_types; @@ -173,6 +177,8 @@ struct perf_pmu *perf_pmus__find(const char *name) /* Looking up an individual perf event PMU failed, check if a tool PMU should be read. */ if (!strncmp(name, "hwmon_", 6)) to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK; + else if (!strncmp(name, "drm_", 4)) + to_read_pmus |= PERF_TOOL_PMU_TYPE_DRM_MASK; else if (!strcmp(name, "tool")) to_read_pmus |= PERF_TOOL_PMU_TYPE_TOOL_MASK; @@ -273,6 +279,10 @@ static void pmu_read_sysfs(unsigned int to_read_types) (read_pmu_types & PERF_TOOL_PMU_TYPE_HWMON_MASK) == 0) perf_pmus__read_hwmon_pmus(&other_pmus); + if ((to_read_types & PERF_TOOL_PMU_TYPE_DRM_MASK) != 0 && + (read_pmu_types & PERF_TOOL_PMU_TYPE_DRM_MASK) == 0) + perf_pmus__read_drm_pmus(&other_pmus); + list_sort(NULL, &other_pmus, pmus_cmp); read_pmu_types |= to_read_types; @@ -305,6 +315,8 @@ struct perf_pmu *perf_pmus__find_by_type(unsigned int type) if (type >= PERF_PMU_TYPE_PE_START && type <= PERF_PMU_TYPE_PE_END) { to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK | PERF_TOOL_PMU_TYPE_PE_OTHER_MASK; + } else if (type >= PERF_PMU_TYPE_DRM_START && type <= PERF_PMU_TYPE_DRM_END) { + to_read_pmus = PERF_TOOL_PMU_TYPE_DRM_MASK; } else if (type >= PERF_PMU_TYPE_HWMON_START && type <= PERF_PMU_TYPE_HWMON_END) { to_read_pmus = PERF_TOOL_PMU_TYPE_HWMON_MASK; } else { @@ -371,6 +383,10 @@ struct perf_pmu *perf_pmus__scan_for_event(struct perf_pmu *pmu, const char *eve if (parse_hwmon_filename(event, &type, &number, /*item=*/NULL, /*alarm=*/NULL)) to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK; + /* Could the event be a DRM event? */ + if (strlen(event) > 4 && strncmp("drm-", event, 4) == 0) + to_read_pmus |= PERF_TOOL_PMU_TYPE_DRM_MASK; + pmu_read_sysfs(to_read_pmus); pmu = list_prepare_entry(pmu, &core_pmus, list); } @@ -403,11 +419,17 @@ struct perf_pmu *perf_pmus__scan_matching_wildcard(struct perf_pmu *pmu, const c * Hwmon PMUs have an alias from a sysfs name like hwmon0, * hwmon1, etc. or have a name of hwmon_. They therefore * can only have a wildcard match if the wildcard begins with - * "hwmon". + * "hwmon". Similarly drm PMUs must start "drm_", avoid reading + * such events unless the PMU could match. */ - if (strisglob(wildcard) || - (strlen(wildcard) >= 5 && strncmp("hwmon", wildcard, 5) == 0)) + if (strisglob(wildcard)) { + to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK | + PERF_TOOL_PMU_TYPE_DRM_MASK; + } else if (strlen(wildcard) >= 4 && strncmp("drm_", wildcard, 4) == 0) { + to_read_pmus |= PERF_TOOL_PMU_TYPE_DRM_MASK; + } else if (strlen(wildcard) >= 5 && strncmp("hwmon", wildcard, 5) == 0) { to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK; + } pmu_read_sysfs(to_read_pmus); pmu = list_prepare_entry(pmu, &core_pmus, list); From 45cd84bd7afc42c4a2ca630c11f246974fd1e73c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 24 Jun 2025 16:18:37 -0700 Subject: [PATCH 0361/2411] perf tests: Add a DRM PMU test The test opens any DRM devices so that the shell has fdinfo files containing the DRM data. The test then uses perf stat to make sure the events can be read. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250624231837.179536-4-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/drm_pmu.sh | 78 +++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100755 tools/perf/tests/shell/drm_pmu.sh diff --git a/tools/perf/tests/shell/drm_pmu.sh b/tools/perf/tests/shell/drm_pmu.sh new file mode 100755 index 000000000000..e629fe0e8463 --- /dev/null +++ b/tools/perf/tests/shell/drm_pmu.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# DRM PMU +# SPDX-License-Identifier: GPL-2.0 + +set -e + +output=$(mktemp /tmp/perf.drm_pmu.XXXXXX.txt) + +cleanup() { + rm -f "${output}" + + trap - EXIT TERM INT +} + +trap_cleanup() { + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup + exit 1 +} +trap trap_cleanup EXIT TERM INT + +# Array to store file descriptors and device names +declare -A device_fds + +# Open all devices and store file descriptors. Opening the device will create a +# /proc/$$/fdinfo file containing the DRM statistics. +fd_count=3 # Start with file descriptor 3 +for device in /dev/dri/* +do + if [[ ! -c "$device" ]] + then + continue + fi + major=$(stat -c "%Hr" "$device") + if [[ "$major" != 226 ]] + then + continue + fi + echo "Opening $device" + eval "exec $fd_count<\"$device\"" + echo "fdinfo for: $device (FD: $fd_count)" + cat "/proc/$$/fdinfo/$fd_count" + echo + device_fds["$device"]="$fd_count" + fd_count=$((fd_count + 1)) +done + +if [[ ${#device_fds[@]} -eq 0 ]] +then + echo "No DRM devices found [Skip]" + cleanup + exit 2 +fi + +# For each DRM event +err=0 +for p in $(perf list --raw-dump drm-) +do + echo -n "Testing perf stat of $p. " + perf stat -e "$p" --pid=$$ true > "$output" 2>&1 + if ! grep -q "$p" "$output" + then + echo "Missing DRM event in: [Failed]" + cat "$output" + err=1 + else + echo "[OK]" + fi +done + +# Close all file descriptors +for fd in "${device_fds[@]}"; do + eval "exec $fd<&-" +done + +# Finished +cleanup +exit $err From 61051f9a8452d7f0878eaeb30299363310f07fd7 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 6 Jun 2025 23:12:35 -0700 Subject: [PATCH 0362/2411] perf header: In pipe mode dump features without --header/-I In pipe mode the header features are contained within events. While other events dump details the header features only dump if --header or -I are passed, which doesn't make sense as in pipe mode there is no perf file header. Make the printing of the information conditional on dump_trace as with other events. Before: ``` $ perf record -o - -a sleep 1 | perf script -D -i - ... 0x2c8@pipe [0x54]: event: 80 . . ... raw event: size 84 bytes . 0000: 50 00 00 00 00 00 54 00 05 00 00 00 00 00 00 00 P.....T......... . 0010: 40 00 00 00 36 2e 31 35 2e 72 63 37 2e 67 61 64 @...6.15.rc7.gad . 0020: 32 61 36 39 31 63 39 39 66 62 00 00 00 00 00 00 2a691c99fb...... . 0030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ . 0040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ . 0050: 00 00 00 00 .... 0 0 0x2c8 [0x54]: PERF_RECORD_FEATURE ``` After: ``` $ perf record -o - -a sleep 1 | perf script -D -i - ... 0x2c8@pipe [0x54]: event: 80 . . ... raw event: size 84 bytes . 0000: 50 00 00 00 00 00 54 00 05 00 00 00 00 00 00 00 P.....T......... . 0010: 40 00 00 00 36 2e 31 35 2e 72 63 37 2e 67 61 64 @...6.15.rc7.gad . 0020: 32 61 36 39 31 63 39 39 66 62 00 00 00 00 00 00 2a691c99fb...... . 0030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ . 0040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ . 0050: 00 00 00 00 .... 0 0 0x2c8 [0x54]: PERF_RECORD_FEATURE, # perf version : 6.15.rc7.gad2a691c99fb ``` Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250617223356.2752099-4-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/header.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 2dea35237e81..58f45a2a2ab6 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -4326,7 +4326,6 @@ int perf_session__read_header(struct perf_session *session) int perf_event__process_feature(struct perf_session *session, union perf_event *event) { - const struct perf_tool *tool = session->tool; struct feat_fd ff = { .fd = 0 }; struct perf_record_header_feature *fe = (struct perf_record_header_feature *)event; int type = fe->header.type; @@ -4342,28 +4341,23 @@ int perf_event__process_feature(struct perf_session *session, return -1; } - if (!feat_ops[feat].process) - return 0; - ff.buf = (void *)fe->data; ff.size = event->header.size - sizeof(*fe); ff.ph = &session->header; - if (feat_ops[feat].process(&ff, NULL)) { + if (feat_ops[feat].process && feat_ops[feat].process(&ff, NULL)) { ret = -1; goto out; } - if (!feat_ops[feat].print || !tool->show_feat_hdr) - goto out; - - if (!feat_ops[feat].full_only || - tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) { - feat_ops[feat].print(&ff, stdout); - } else { - fprintf(stdout, "# %s info available, use -I to display\n", - feat_ops[feat].name); + if (dump_trace) { + printf(", "); + if (feat_ops[feat].print) + feat_ops[feat].print(&ff, stdout); + else + printf("# %s", feat_ops[feat].name); } + out: free_event_desc(ff.events); return ret; From 57cbd56e2efe26483be2d4e7f62a7d9d816b54f1 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 6 Jun 2025 23:12:36 -0700 Subject: [PATCH 0363/2411] perf header: Allow tracing of attr events In pipe mode attr events capture the perf_event_attr. Allow their dumping as they normally start the file. Before: ``` $ perf record -o - -a sleep 1 | perf script -D -i - . ... raw event: size 272 bytes . 0000: 40 00 00 00 00 00 10 01 00 00 00 00 88 00 00 00 @............... . 0010: 00 00 00 00 00 00 00 00 a0 0f 00 00 00 00 00 00 ................ . 0020: 87 01 01 00 00 00 00 00 14 00 00 00 00 00 00 00 ................ . 0030: 01 84 05 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ . 0040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ . 0050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ . 0060: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ . 0070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ . 0080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ . 0090: 91 08 00 00 00 00 00 00 92 08 00 00 00 00 00 00 ................ . 00a0: 93 08 00 00 00 00 00 00 94 08 00 00 00 00 00 00 ................ . 00b0: 95 08 00 00 00 00 00 00 96 08 00 00 00 00 00 00 ................ . 00c0: 97 08 00 00 00 00 00 00 98 08 00 00 00 00 00 00 ................ . 00d0: 99 08 00 00 00 00 00 00 9a 08 00 00 00 00 00 00 ................ . 00e0: 9b 08 00 00 00 00 00 00 9c 08 00 00 00 00 00 00 ................ . 00f0: 9d 08 00 00 00 00 00 00 9e 08 00 00 00 00 00 00 ................ . 0100: 9f 08 00 00 00 00 00 00 a0 08 00 00 00 00 00 00 ................ -1 -1 0 [0x110]: PERF_RECORD_ATTR 0x110@pipe [0x110]: event: 64 ... ``` After: ``` $ perf record -o - -a sleep 1 | perf script -D -i - 0@pipe [0x110]: event: 64 . . ... raw event: size 272 bytes . 0000: 40 00 00 00 00 00 10 01 00 00 00 00 88 00 00 00 @............... . 0010: 00 00 00 00 00 00 00 00 a0 0f 00 00 00 00 00 00 ................ . 0020: 87 01 01 00 00 00 00 00 14 00 00 00 00 00 00 00 ................ . 0030: 01 84 05 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ . 0040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ . 0050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ . 0060: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ . 0070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ . 0080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ . 0090: 5c 08 00 00 00 00 00 00 5d 08 00 00 00 00 00 00 \.......]....... . 00a0: 5e 08 00 00 00 00 00 00 5f 08 00 00 00 00 00 00 ^......._....... . 00b0: 60 08 00 00 00 00 00 00 61 08 00 00 00 00 00 00 `.......a....... . 00c0: 62 08 00 00 00 00 00 00 63 08 00 00 00 00 00 00 b.......c....... . 00d0: 64 08 00 00 00 00 00 00 65 08 00 00 00 00 00 00 d.......e....... . 00e0: 66 08 00 00 00 00 00 00 67 08 00 00 00 00 00 00 f.......g....... . 00f0: 68 08 00 00 00 00 00 00 69 08 00 00 00 00 00 00 h.......i....... . 0100: 6a 08 00 00 00 00 00 00 6b 08 00 00 00 00 00 00 j.......k....... -1 -1 0 [0x110]: PERF_RECORD_ATTR, type = 0 (PERF_TYPE_HARDWARE), size = 136, config = 0 (PERF_COUNT_HW_CPU_CYCLES), { sample_period, sample_freq } = 4000, sample_type = IP|TID|TIME|CPU|PERIOD|IDENTIFIER, read_format = ID|LOST, disabled = 1, freq = 1, precise_ip = 3, sample_id_all = 1 0x110@pipe [0x110]: event: 64 ... ``` Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250617223356.2752099-4-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/header.c | 8 ++++++++ tools/perf/util/header.h | 1 + 2 files changed, 9 insertions(+) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 58f45a2a2ab6..3f1b78810059 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -4399,6 +4399,11 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) return ret; } +size_t perf_event__fprintf_attr(union perf_event *event, FILE *fp) +{ + return perf_event_attr__fprintf(fp, &event->attr.attr, __desc_attr__fprintf, NULL); +} + int perf_event__process_attr(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct evlist **pevlist) @@ -4408,6 +4413,9 @@ int perf_event__process_attr(const struct perf_tool *tool __maybe_unused, struct evsel *evsel; struct evlist *evlist = *pevlist; + if (dump_trace) + perf_event__fprintf_attr(event, stdout); + if (evlist == NULL) { *pevlist = evlist = evlist__new(); if (evlist == NULL) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 5201af6305f4..d16dfceccd74 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -175,6 +175,7 @@ int perf_event__process_attr(const struct perf_tool *tool, union perf_event *eve int perf_event__process_event_update(const struct perf_tool *tool, union perf_event *event, struct evlist **pevlist); +size_t perf_event__fprintf_attr(union perf_event *event, FILE *fp); size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp); #ifdef HAVE_LIBTRACEEVENT int perf_event__process_tracing_data(struct perf_session *session, From 4d2eefd7fb91482f1327f28f14112201e0b45dff Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 6 Jun 2025 23:12:37 -0700 Subject: [PATCH 0364/2411] perf header: Display message if BPF/BTF info is empty The perf.data file may contain a bpf_prog_info or bpf_btf feature. If the contents of these are empty then nothing is displayed. Rather than display nothing and not account for the file space, display an empty message. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250617223356.2752099-4-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/header.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 3f1b78810059..a9538bb1004d 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1814,6 +1814,9 @@ static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp) root = &env->bpf_progs.infos; next = rb_first(root); + if (!next) + printf("# bpf_prog_info empty\n"); + while (next) { struct bpf_prog_info_node *node; @@ -1838,6 +1841,9 @@ static void print_bpf_btf(struct feat_fd *ff, FILE *fp) root = &env->bpf_progs.btfs; next = rb_first(root); + if (!next) + printf("# btf info empty\n"); + while (next) { struct btf_node *node; From f0d0f978f3f5830ab06d71d1f37b3b30d47d6219 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 6 Jun 2025 23:12:38 -0700 Subject: [PATCH 0365/2411] perf header: Don't write empty BPF/BTF info If there are no values in bpf_prog_info or bpf_btf feature don't write the data into the header. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250617223356.2752099-4-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/header.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a9538bb1004d..487f663ed2de 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1016,10 +1016,13 @@ static int write_bpf_prog_info(struct feat_fd *ff, struct perf_env *env = &ff->ph->env; struct rb_root *root; struct rb_node *next; - int ret; + int ret = 0; down_read(&env->bpf_progs.lock); + if (env->bpf_progs.infos_cnt == 0) + goto out; + ret = do_write(ff, &env->bpf_progs.infos_cnt, sizeof(env->bpf_progs.infos_cnt)); if (ret < 0) @@ -1058,10 +1061,13 @@ static int write_bpf_btf(struct feat_fd *ff, struct perf_env *env = &ff->ph->env; struct rb_root *root; struct rb_node *next; - int ret; + int ret = 0; down_read(&env->bpf_progs.lock); + if (env->bpf_progs.btfs_cnt == 0) + goto out; + ret = do_write(ff, &env->bpf_progs.btfs_cnt, sizeof(env->bpf_progs.btfs_cnt)); From 032f05be51ab4a1d67d08a8083ec16dd934d255e Mon Sep 17 00:00:00 2001 From: Hans Zhang <18255117159@163.com> Date: Fri, 13 Jun 2025 00:12:26 +0800 Subject: [PATCH 0366/2411] PCI: dwc: Simplify the return value of PTM debugfs functions returning bool Replace redundant ternary conditional expressions with direct boolean returns in PTM debugfs functions. Specifically change this pattern: return (condition) ? true : false; to the simpler: return condition; Signed-off-by: Hans Zhang <18255117159@163.com> [mani: subject rewording] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Frank Li Reviewed-by: Niklas Cassel Link: https://patch.msgid.link/20250612161226.950937-1-18255117159@163.com --- .../pci/controller/dwc/pcie-designware-debugfs.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-debugfs.c b/drivers/pci/controller/dwc/pcie-designware-debugfs.c index c67601096c48..6f438a36f840 100644 --- a/drivers/pci/controller/dwc/pcie-designware-debugfs.c +++ b/drivers/pci/controller/dwc/pcie-designware-debugfs.c @@ -814,14 +814,14 @@ static bool dw_pcie_ptm_context_update_visible(void *drvdata) { struct dw_pcie *pci = drvdata; - return (pci->mode == DW_PCIE_EP_TYPE) ? true : false; + return pci->mode == DW_PCIE_EP_TYPE; } static bool dw_pcie_ptm_context_valid_visible(void *drvdata) { struct dw_pcie *pci = drvdata; - return (pci->mode == DW_PCIE_RC_TYPE) ? true : false; + return pci->mode == DW_PCIE_RC_TYPE; } static bool dw_pcie_ptm_local_clock_visible(void *drvdata) @@ -834,35 +834,35 @@ static bool dw_pcie_ptm_master_clock_visible(void *drvdata) { struct dw_pcie *pci = drvdata; - return (pci->mode == DW_PCIE_EP_TYPE) ? true : false; + return pci->mode == DW_PCIE_EP_TYPE; } static bool dw_pcie_ptm_t1_visible(void *drvdata) { struct dw_pcie *pci = drvdata; - return (pci->mode == DW_PCIE_EP_TYPE) ? true : false; + return pci->mode == DW_PCIE_EP_TYPE; } static bool dw_pcie_ptm_t2_visible(void *drvdata) { struct dw_pcie *pci = drvdata; - return (pci->mode == DW_PCIE_RC_TYPE) ? true : false; + return pci->mode == DW_PCIE_RC_TYPE; } static bool dw_pcie_ptm_t3_visible(void *drvdata) { struct dw_pcie *pci = drvdata; - return (pci->mode == DW_PCIE_RC_TYPE) ? true : false; + return pci->mode == DW_PCIE_RC_TYPE; } static bool dw_pcie_ptm_t4_visible(void *drvdata) { struct dw_pcie *pci = drvdata; - return (pci->mode == DW_PCIE_EP_TYPE) ? true : false; + return pci->mode == DW_PCIE_EP_TYPE; } const struct pcie_ptm_ops dw_pcie_ptm_ops = { From d79123d79a8154b4318529b7b2ff7e15806f480b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 24 Jun 2025 20:45:43 +0900 Subject: [PATCH 0367/2411] PCI: endpoint: Fix configfs group list head handling Doing a list_del() on the epf_group field of struct pci_epf_driver in pci_epf_remove_cfs() is not correct as this field is a list head, not a list entry. This list_del() call triggers a KASAN warning when an endpoint function driver which has a configfs attribute group is torn down: ================================================================== BUG: KASAN: slab-use-after-free in pci_epf_remove_cfs+0x17c/0x198 Write of size 8 at addr ffff00010f4a0d80 by task rmmod/319 CPU: 3 UID: 0 PID: 319 Comm: rmmod Not tainted 6.16.0-rc2 #1 NONE Hardware name: Radxa ROCK 5B (DT) Call trace: show_stack+0x2c/0x84 (C) dump_stack_lvl+0x70/0x98 print_report+0x17c/0x538 kasan_report+0xb8/0x190 __asan_report_store8_noabort+0x20/0x2c pci_epf_remove_cfs+0x17c/0x198 pci_epf_unregister_driver+0x18/0x30 nvmet_pci_epf_cleanup_module+0x24/0x30 [nvmet_pci_epf] __arm64_sys_delete_module+0x264/0x424 invoke_syscall+0x70/0x260 el0_svc_common.constprop.0+0xac/0x230 do_el0_svc+0x40/0x58 el0_svc+0x48/0xdc el0t_64_sync_handler+0x10c/0x138 el0t_64_sync+0x198/0x19c ... Remove this incorrect list_del() call from pci_epf_remove_cfs(). Fixes: ef1433f717a2 ("PCI: endpoint: Create configfs entry for each pci_epf_device_id table entry") Signed-off-by: Damien Le Moal Signed-off-by: Manivannan Sadhasivam Reviewed-by: Niklas Cassel Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250624114544.342159-2-dlemoal@kernel.org --- drivers/pci/endpoint/pci-epf-core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pci/endpoint/pci-epf-core.c b/drivers/pci/endpoint/pci-epf-core.c index 577a9e490115..defc6aecfdef 100644 --- a/drivers/pci/endpoint/pci-epf-core.c +++ b/drivers/pci/endpoint/pci-epf-core.c @@ -338,7 +338,6 @@ static void pci_epf_remove_cfs(struct pci_epf_driver *driver) mutex_lock(&pci_epf_mutex); list_for_each_entry_safe(group, tmp, &driver->epf_group, group_entry) pci_ep_cfs_remove_epf_group(group); - list_del(&driver->epf_group); mutex_unlock(&pci_epf_mutex); } From 910bdb8197f9322790c738bb32feaa11dba26909 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 24 Jun 2025 20:45:44 +0900 Subject: [PATCH 0368/2411] PCI: endpoint: Fix configfs group removal on driver teardown An endpoint driver configfs attributes group is added to the epf_group list of struct pci_epf_driver by pci_epf_add_cfs() but an added group is not removed from this list when the attribute group is unregistered with pci_ep_cfs_remove_epf_group(). Add the missing list_del() call in pci_ep_cfs_remove_epf_group() to correctly remove the attribute group from the driver list. With this change, once the loop over all attribute groups in pci_epf_remove_cfs() completes, the driver epf_group list should be empty. Add a WARN_ON() to make sure of that. Fixes: ef1433f717a2 ("PCI: endpoint: Create configfs entry for each pci_epf_device_id table entry") Signed-off-by: Damien Le Moal Signed-off-by: Manivannan Sadhasivam Reviewed-by: Niklas Cassel Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250624114544.342159-3-dlemoal@kernel.org --- drivers/pci/endpoint/pci-ep-cfs.c | 1 + drivers/pci/endpoint/pci-epf-core.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/pci/endpoint/pci-ep-cfs.c b/drivers/pci/endpoint/pci-ep-cfs.c index d712c7a866d2..ef50c82e647f 100644 --- a/drivers/pci/endpoint/pci-ep-cfs.c +++ b/drivers/pci/endpoint/pci-ep-cfs.c @@ -691,6 +691,7 @@ void pci_ep_cfs_remove_epf_group(struct config_group *group) if (IS_ERR_OR_NULL(group)) return; + list_del(&group->group_entry); configfs_unregister_default_group(group); } EXPORT_SYMBOL(pci_ep_cfs_remove_epf_group); diff --git a/drivers/pci/endpoint/pci-epf-core.c b/drivers/pci/endpoint/pci-epf-core.c index defc6aecfdef..167dc6ee63f7 100644 --- a/drivers/pci/endpoint/pci-epf-core.c +++ b/drivers/pci/endpoint/pci-epf-core.c @@ -338,6 +338,7 @@ static void pci_epf_remove_cfs(struct pci_epf_driver *driver) mutex_lock(&pci_epf_mutex); list_for_each_entry_safe(group, tmp, &driver->epf_group, group_entry) pci_ep_cfs_remove_epf_group(group); + WARN_ON(!list_empty(&driver->epf_group)); mutex_unlock(&pci_epf_mutex); } From 2f5d370dec3f800b44bbf7b68875d521e0af43cd Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 23 Jun 2025 10:00:12 +0100 Subject: [PATCH 0369/2411] perf test: Change all remaining #!/bin/sh to #!/bin/bash There are 43 instances of posix shell tests and 35 instances of bash. To give us a single consistent language for testing in, replace all #!/bin/sh to #!/bin/bash. Common sources that are included in both different shells will now work as expected. And we no longer have to fix up bashisms that appear to work when someone's system has sh symlinked to bash, but don't work on other systems that have both shells installed. Although we could have chosen sh, it's not backwards compatible so it wouldn't be possible to bulk convert without re-writing the existing bash tests. Choosing bash also gives us some nicer features including 'local' variable definitions and regexes in if statements that are already widely used in the tests. It's not expected that there are any users with only sh available due to the large number of bash tests that exist. Discussed in relation to running shellcheck here: https://lore.kernel.org/linux-perf-users/e3751a74be34bbf3781c4644f518702a7270220b.1749785642.git.collin.funk1@gmail.com/ Signed-off-by: James Clark Reviewed-by: Collin Funk Acked-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20250623-james-perf-bash-tests-v1-1-f572f54d4559@linaro.org Signed-off-by: Namhyung Kim --- tools/perf/tests/perf-targz-src-pkg | 2 +- tools/perf/tests/shell/amd-ibs-swfilt.sh | 2 +- tools/perf/tests/shell/buildid.sh | 2 +- tools/perf/tests/shell/coresight/asm_pure_loop.sh | 2 +- tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh | 2 +- tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh | 2 +- tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh | 2 +- tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh | 2 +- tools/perf/tests/shell/diff.sh | 2 +- tools/perf/tests/shell/ftrace.sh | 2 +- tools/perf/tests/shell/lib/perf_has_symbol.sh | 2 +- tools/perf/tests/shell/lib/probe_vfs_getname.sh | 2 +- tools/perf/tests/shell/lib/setup_python.sh | 2 +- tools/perf/tests/shell/lib/waiting.sh | 2 +- tools/perf/tests/shell/list.sh | 2 +- tools/perf/tests/shell/lock_contention.sh | 2 +- tools/perf/tests/shell/perf-report-hierarchy.sh | 2 +- tools/perf/tests/shell/probe_vfs_getname.sh | 2 +- tools/perf/tests/shell/record+probe_libc_inet_pton.sh | 2 +- tools/perf/tests/shell/record+script_probe_vfs_getname.sh | 2 +- tools/perf/tests/shell/record+zstd_comp_decomp.sh | 2 +- tools/perf/tests/shell/record_bpf_filter.sh | 2 +- tools/perf/tests/shell/record_offcpu.sh | 2 +- tools/perf/tests/shell/record_sideband.sh | 2 +- tools/perf/tests/shell/script.sh | 2 +- tools/perf/tests/shell/stat+csv_summary.sh | 2 +- tools/perf/tests/shell/stat+shadow_stat.sh | 2 +- tools/perf/tests/shell/stat_all_pfm.sh | 2 +- tools/perf/tests/shell/stat_bpf_counters.sh | 2 +- tools/perf/tests/shell/stat_bpf_counters_cgrp.sh | 2 +- tools/perf/tests/shell/test_arm_callgraph_fp.sh | 2 +- tools/perf/tests/shell/test_arm_coresight.sh | 2 +- tools/perf/tests/shell/test_arm_coresight_disasm.sh | 2 +- tools/perf/tests/shell/test_arm_spe.sh | 2 +- tools/perf/tests/shell/test_arm_spe_fork.sh | 2 +- tools/perf/tests/shell/test_bpf_metadata.sh | 2 +- tools/perf/tests/shell/test_intel_pt.sh | 2 +- tools/perf/tests/shell/trace+probe_vfs_getname.sh | 2 +- tools/perf/tests/shell/trace_btf_enum.sh | 2 +- tools/perf/tests/shell/trace_exit_race.sh | 2 +- tools/perf/tests/shell/trace_record_replay.sh | 2 +- tools/perf/tests/shell/trace_summary.sh | 2 +- tools/perf/tests/tests-scripts.c | 2 +- 43 files changed, 43 insertions(+), 43 deletions(-) diff --git a/tools/perf/tests/perf-targz-src-pkg b/tools/perf/tests/perf-targz-src-pkg index b3075c168cb2..52a90e6bd8af 100755 --- a/tools/perf/tests/perf-targz-src-pkg +++ b/tools/perf/tests/perf-targz-src-pkg @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Test one of the main kernel Makefile targets to generate a perf sources tarball # suitable for build outside the full kernel sources. diff --git a/tools/perf/tests/shell/amd-ibs-swfilt.sh b/tools/perf/tests/shell/amd-ibs-swfilt.sh index 83937aa687cc..7045ec72ba4c 100755 --- a/tools/perf/tests/shell/amd-ibs-swfilt.sh +++ b/tools/perf/tests/shell/amd-ibs-swfilt.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # AMD IBS software filtering echo "check availability of IBS swfilt" diff --git a/tools/perf/tests/shell/buildid.sh b/tools/perf/tests/shell/buildid.sh index 3383ca3399d4..d2eb213da01d 100755 --- a/tools/perf/tests/shell/buildid.sh +++ b/tools/perf/tests/shell/buildid.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # build id cache operations # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/coresight/asm_pure_loop.sh b/tools/perf/tests/shell/coresight/asm_pure_loop.sh index c63bc8c73e26..0301904b9637 100755 --- a/tools/perf/tests/shell/coresight/asm_pure_loop.sh +++ b/tools/perf/tests/shell/coresight/asm_pure_loop.sh @@ -1,4 +1,4 @@ -#!/bin/sh -e +#!/bin/bash -e # CoreSight / ASM Pure Loop (exclusive) # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh b/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh index 8e29630957c8..1f765d69acc3 100755 --- a/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh +++ b/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh @@ -1,4 +1,4 @@ -#!/bin/sh -e +#!/bin/bash -e # CoreSight / Memcpy 16k 10 Threads (exclusive) # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh b/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh index 0c4c82a1c8e1..7f43a93a2ac2 100755 --- a/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh +++ b/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh @@ -1,4 +1,4 @@ -#!/bin/sh -e +#!/bin/bash -e # CoreSight / Thread Loop 10 Threads - Check TID (exclusive) # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh b/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh index d3aea9fc6ced..a94d2079ed06 100755 --- a/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh +++ b/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh @@ -1,4 +1,4 @@ -#!/bin/sh -e +#!/bin/bash -e # CoreSight / Thread Loop 2 Threads - Check TID (exclusive) # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh b/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh index 7429d3a2ae43..cb3e97a0a89f 100755 --- a/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh +++ b/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh @@ -1,4 +1,4 @@ -#!/bin/sh -e +#!/bin/bash -e # CoreSight / Unroll Loop Thread 10 (exclusive) # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/diff.sh b/tools/perf/tests/shell/diff.sh index e05a5dc49479..fe05fdebcab5 100755 --- a/tools/perf/tests/shell/diff.sh +++ b/tools/perf/tests/shell/diff.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # perf diff tests # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/ftrace.sh b/tools/perf/tests/shell/ftrace.sh index c243731d2fbf..7f8aafcbb761 100755 --- a/tools/perf/tests/shell/ftrace.sh +++ b/tools/perf/tests/shell/ftrace.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # perf ftrace tests # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/lib/perf_has_symbol.sh b/tools/perf/tests/shell/lib/perf_has_symbol.sh index 561c93b75d77..0b35cce0b13d 100644 --- a/tools/perf/tests/shell/lib/perf_has_symbol.sh +++ b/tools/perf/tests/shell/lib/perf_has_symbol.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 perf_has_symbol() diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh index 58debce9ab42..88cd0e26d5f6 100644 --- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Arnaldo Carvalho de Melo , 2017 perf probe -l 2>&1 | grep -q probe:vfs_getname diff --git a/tools/perf/tests/shell/lib/setup_python.sh b/tools/perf/tests/shell/lib/setup_python.sh index c2fce1793538..a58e5536f2ed 100644 --- a/tools/perf/tests/shell/lib/setup_python.sh +++ b/tools/perf/tests/shell/lib/setup_python.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 if [ "x$PYTHON" = "x" ] diff --git a/tools/perf/tests/shell/lib/waiting.sh b/tools/perf/tests/shell/lib/waiting.sh index bdd5a7c71591..3a152892e077 100644 --- a/tools/perf/tests/shell/lib/waiting.sh +++ b/tools/perf/tests/shell/lib/waiting.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 tenths=date\ +%s%1N diff --git a/tools/perf/tests/shell/list.sh b/tools/perf/tests/shell/list.sh index 76a9846cff22..0c04b3159cef 100755 --- a/tools/perf/tests/shell/list.sh +++ b/tools/perf/tests/shell/list.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # perf list tests # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh index 30d195d4c62f..dde5bc737eb2 100755 --- a/tools/perf/tests/shell/lock_contention.sh +++ b/tools/perf/tests/shell/lock_contention.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # kernel lock contention analysis test # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/perf-report-hierarchy.sh b/tools/perf/tests/shell/perf-report-hierarchy.sh index 02e3b6aee4ed..e3c6f9a24f33 100755 --- a/tools/perf/tests/shell/perf-report-hierarchy.sh +++ b/tools/perf/tests/shell/perf-report-hierarchy.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # perf report --hierarchy # SPDX-License-Identifier: GPL-2.0 # Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/shell/probe_vfs_getname.sh b/tools/perf/tests/shell/probe_vfs_getname.sh index 0f52654c914a..5fe5682c28ce 100755 --- a/tools/perf/tests/shell/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/probe_vfs_getname.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Add vfs_getname probe to get syscall args filenames (exclusive) # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index 9bdf47aabe9d..ab99bef556bf 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # probe libc's inet_pton & backtrace it with ping (exclusive) # Installs a probe on libc's inet_pton function, that will use uprobes, diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh index 1ad252f0d36e..002f7037f182 100755 --- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh +++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Use vfs_getname probe to get syscall args filenames (exclusive) # Uses the 'perf test shell' library to add probe:vfs_getname to the system diff --git a/tools/perf/tests/shell/record+zstd_comp_decomp.sh b/tools/perf/tests/shell/record+zstd_comp_decomp.sh index 8929046e9057..f6b82223834e 100755 --- a/tools/perf/tests/shell/record+zstd_comp_decomp.sh +++ b/tools/perf/tests/shell/record+zstd_comp_decomp.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Zstd perf.data compression/decompression # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/record_bpf_filter.sh b/tools/perf/tests/shell/record_bpf_filter.sh index 4d6c3c1b7fb9..383574cb3bd3 100755 --- a/tools/perf/tests/shell/record_bpf_filter.sh +++ b/tools/perf/tests/shell/record_bpf_filter.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # perf record sample filtering (by BPF) tests # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/record_offcpu.sh b/tools/perf/tests/shell/record_offcpu.sh index 21a22efe08f5..860a2d6f4b75 100755 --- a/tools/perf/tests/shell/record_offcpu.sh +++ b/tools/perf/tests/shell/record_offcpu.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # perf record offcpu profiling tests (exclusive) # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/record_sideband.sh b/tools/perf/tests/shell/record_sideband.sh index ac70ac27d590..2182551873be 100755 --- a/tools/perf/tests/shell/record_sideband.sh +++ b/tools/perf/tests/shell/record_sideband.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # perf record sideband tests # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/script.sh b/tools/perf/tests/shell/script.sh index d3e2958d2242..7007f1cdf761 100755 --- a/tools/perf/tests/shell/script.sh +++ b/tools/perf/tests/shell/script.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # perf script tests # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/stat+csv_summary.sh b/tools/perf/tests/shell/stat+csv_summary.sh index 323123ff4d19..9a4353db3825 100755 --- a/tools/perf/tests/shell/stat+csv_summary.sh +++ b/tools/perf/tests/shell/stat+csv_summary.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # perf stat csv summary test # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/stat+shadow_stat.sh b/tools/perf/tests/shell/stat+shadow_stat.sh index 0c7d79a230ea..8824f445d343 100755 --- a/tools/perf/tests/shell/stat+shadow_stat.sh +++ b/tools/perf/tests/shell/stat+shadow_stat.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # perf stat metrics (shadow stat) test # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/stat_all_pfm.sh b/tools/perf/tests/shell/stat_all_pfm.sh index 4d004f777a6e..c08c186af2c4 100755 --- a/tools/perf/tests/shell/stat_all_pfm.sh +++ b/tools/perf/tests/shell/stat_all_pfm.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # perf all libpfm4 events test # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh index 95d2ad5d17c6..f43e28a136d3 100755 --- a/tools/perf/tests/shell/stat_bpf_counters.sh +++ b/tools/perf/tests/shell/stat_bpf_counters.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # perf stat --bpf-counters test (exclusive) # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh b/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh index 2ec69060c42f..ff2e06c408bc 100755 --- a/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh +++ b/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # perf stat --bpf-counters --for-each-cgroup test # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/test_arm_callgraph_fp.sh b/tools/perf/tests/shell/test_arm_callgraph_fp.sh index 9caa36130175..9172dd68a81d 100755 --- a/tools/perf/tests/shell/test_arm_callgraph_fp.sh +++ b/tools/perf/tests/shell/test_arm_callgraph_fp.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Check Arm64 callgraphs are complete in fp mode # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 573af9235b72..1c750b67d141 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Check Arm CoreSight trace data recording and synthesized samples (exclusive) # Uses the 'perf record' to record trace data with Arm CoreSight sinks; diff --git a/tools/perf/tests/shell/test_arm_coresight_disasm.sh b/tools/perf/tests/shell/test_arm_coresight_disasm.sh index be2d26303f94..0dfb4fadf531 100755 --- a/tools/perf/tests/shell/test_arm_coresight_disasm.sh +++ b/tools/perf/tests/shell/test_arm_coresight_disasm.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Check Arm CoreSight disassembly script completes without errors (exclusive) # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/test_arm_spe.sh b/tools/perf/tests/shell/test_arm_spe.sh index a69aab70dd8a..bb76ea88aa14 100755 --- a/tools/perf/tests/shell/test_arm_spe.sh +++ b/tools/perf/tests/shell/test_arm_spe.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Check Arm SPE trace data recording and synthesized samples (exclusive) # Uses the 'perf record' to record trace data of Arm SPE events; diff --git a/tools/perf/tests/shell/test_arm_spe_fork.sh b/tools/perf/tests/shell/test_arm_spe_fork.sh index 8efeef9fb956..5bcca51c03ac 100755 --- a/tools/perf/tests/shell/test_arm_spe_fork.sh +++ b/tools/perf/tests/shell/test_arm_spe_fork.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Check Arm SPE doesn't hang when there are forks # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/test_bpf_metadata.sh b/tools/perf/tests/shell/test_bpf_metadata.sh index 11df592fb661..bc9aef161664 100755 --- a/tools/perf/tests/shell/test_bpf_metadata.sh +++ b/tools/perf/tests/shell/test_bpf_metadata.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # BPF metadata collection test. diff --git a/tools/perf/tests/shell/test_intel_pt.sh b/tools/perf/tests/shell/test_intel_pt.sh index 32a9b8dcb200..8ee761f03c38 100755 --- a/tools/perf/tests/shell/test_intel_pt.sh +++ b/tools/perf/tests/shell/test_intel_pt.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Miscellaneous Intel PT testing (exclusive) # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 5d5019988d61..7a0b1145d0cd 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Check open filename arg using perf trace + vfs_getname (exclusive) # Uses the 'perf test shell' library to add probe:vfs_getname to the system diff --git a/tools/perf/tests/shell/trace_btf_enum.sh b/tools/perf/tests/shell/trace_btf_enum.sh index c37017bfeb5e..572001d75d78 100755 --- a/tools/perf/tests/shell/trace_btf_enum.sh +++ b/tools/perf/tests/shell/trace_btf_enum.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # perf trace enum augmentation tests # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/trace_exit_race.sh b/tools/perf/tests/shell/trace_exit_race.sh index 1e247693e756..db300cde94fb 100755 --- a/tools/perf/tests/shell/trace_exit_race.sh +++ b/tools/perf/tests/shell/trace_exit_race.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # perf trace exit race # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/trace_record_replay.sh b/tools/perf/tests/shell/trace_record_replay.sh index 6b4ed863c1ef..88d30a03dcec 100755 --- a/tools/perf/tests/shell/trace_record_replay.sh +++ b/tools/perf/tests/shell/trace_record_replay.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # perf trace record and replay # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/shell/trace_summary.sh b/tools/perf/tests/shell/trace_summary.sh index f9bb7f9388be..22e2651d5919 100755 --- a/tools/perf/tests/shell/trace_summary.sh +++ b/tools/perf/tests/shell/trace_summary.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # perf trace summary (exclusive) # SPDX-License-Identifier: GPL-2.0 diff --git a/tools/perf/tests/tests-scripts.c b/tools/perf/tests/tests-scripts.c index 3a2a8438f9af..f18c4cd337c8 100644 --- a/tools/perf/tests/tests-scripts.c +++ b/tools/perf/tests/tests-scripts.c @@ -85,7 +85,7 @@ static char *shell_test__description(int dir_fd, const char *name) if (io.fd < 0) return NULL; - /* Skip first line - should be #!/bin/sh Shebang */ + /* Skip first line - should be #!/bin/bash Shebang */ if (io__get_char(&io) != '#') goto err_out; if (io__get_char(&io) != '!') From f6109fb6f5d7fb9403cecfc75302bbf47ed83b8d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 23 Jun 2025 15:57:21 -0700 Subject: [PATCH 0370/2411] perf trace: Split BPF skel code to util/bpf_trace_augment.c And make builtin-trace.c less conditional. Dummy functions will be called when BUILD_BPF_SKEL=0 is used. This makes the builtin-trace.c slightly smaller and simpler by removing the skeleton and its helpers. The conditional guard of trace__init_syscalls_bpf_prog_array_maps() is changed from the HAVE_BPF_SKEL to HAVE_LIBBPF_SUPPORT as it doesn't have a skeleton in the code directly. And a dummy function is added so that it can be called unconditionally. The function will succeed only if the both conditions are true. Do not include trace_augment.h from the BPF code and move the definition of TRACE_AUG_MAX_BUF to the BPF directly. Reviewed-by: Howard Chu Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20250623225721.21553-1-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/builtin-trace.c | 187 +++++------------- tools/perf/util/Build | 1 + .../bpf_skel/augmented_raw_syscalls.bpf.c | 3 +- tools/perf/util/bpf_trace_augment.c | 143 ++++++++++++++ tools/perf/util/trace_augment.h | 62 +++++- 5 files changed, 255 insertions(+), 141 deletions(-) create mode 100644 tools/perf/util/bpf_trace_augment.c diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index c38225a89fc8..bb2dbc1d2ffa 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -20,9 +20,6 @@ #include #include #include -#ifdef HAVE_BPF_SKEL -#include "bpf_skel/augmented_raw_syscalls.skel.h" -#endif #endif #include "util/bpf_map.h" #include "util/rlimit.h" @@ -155,9 +152,6 @@ struct trace { *bpf_output; } events; } syscalls; -#ifdef HAVE_BPF_SKEL - struct augmented_raw_syscalls_bpf *skel; -#endif #ifdef HAVE_LIBBPF_SUPPORT struct btf *btf; #endif @@ -3703,7 +3697,10 @@ static int trace__set_ev_qualifier_tp_filter(struct trace *trace) goto out; } -#ifdef HAVE_BPF_SKEL +#ifdef HAVE_LIBBPF_SUPPORT + +static struct bpf_program *unaugmented_prog; + static int syscall_arg_fmt__cache_btf_struct(struct syscall_arg_fmt *arg_fmt, struct btf *btf, char *type) { int id; @@ -3721,26 +3718,8 @@ static int syscall_arg_fmt__cache_btf_struct(struct syscall_arg_fmt *arg_fmt, st return 0; } -static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name) -{ - struct bpf_program *pos, *prog = NULL; - const char *sec_name; - - if (trace->skel->obj == NULL) - return NULL; - - bpf_object__for_each_program(pos, trace->skel->obj) { - sec_name = bpf_program__section_name(pos); - if (sec_name && !strcmp(sec_name, name)) { - prog = pos; - break; - } - } - - return prog; -} - -static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc, +static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace __maybe_unused, + struct syscall *sc, const char *prog_name, const char *type) { struct bpf_program *prog; @@ -3748,19 +3727,19 @@ static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, str if (prog_name == NULL) { char default_prog_name[256]; scnprintf(default_prog_name, sizeof(default_prog_name), "tp/syscalls/sys_%s_%s", type, sc->name); - prog = trace__find_bpf_program_by_title(trace, default_prog_name); + prog = augmented_syscalls__find_by_title(default_prog_name); if (prog != NULL) goto out_found; if (sc->fmt && sc->fmt->alias) { scnprintf(default_prog_name, sizeof(default_prog_name), "tp/syscalls/sys_%s_%s", type, sc->fmt->alias); - prog = trace__find_bpf_program_by_title(trace, default_prog_name); + prog = augmented_syscalls__find_by_title(default_prog_name); if (prog != NULL) goto out_found; } goto out_unaugmented; } - prog = trace__find_bpf_program_by_title(trace, prog_name); + prog = augmented_syscalls__find_by_title(prog_name); if (prog != NULL) { out_found: @@ -3770,7 +3749,7 @@ static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, str pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n", prog_name, type, sc->name); out_unaugmented: - return trace->skel->progs.syscall_unaugmented; + return unaugmented_prog; } static void trace__init_syscall_bpf_progs(struct trace *trace, int e_machine, int id) @@ -3787,13 +3766,13 @@ static void trace__init_syscall_bpf_progs(struct trace *trace, int e_machine, in static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int e_machine, int id) { struct syscall *sc = trace__syscall_info(trace, NULL, e_machine, id); - return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->skel->progs.syscall_unaugmented); + return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(unaugmented_prog); } static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int e_machine, int id) { struct syscall *sc = trace__syscall_info(trace, NULL, e_machine, id); - return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->skel->progs.syscall_unaugmented); + return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(unaugmented_prog); } static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int e_machine, int key, unsigned int *beauty_array) @@ -3903,7 +3882,7 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace bool is_candidate = false; if (pair == NULL || pair->id == sc->id || - pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented) + pair->bpf_prog.sys_enter == unaugmented_prog) continue; for (field = sc->args, candidate_field = pair->args; @@ -3969,7 +3948,7 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace */ if (pair_prog == NULL) { pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter"); - if (pair_prog == trace->skel->progs.syscall_unaugmented) + if (pair_prog == unaugmented_prog) goto next_candidate; } @@ -3985,12 +3964,17 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_machine) { - int map_enter_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_enter); - int map_exit_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_exit); - int beauty_map_fd = bpf_map__fd(trace->skel->maps.beauty_map_enter); + int map_enter_fd; + int map_exit_fd; + int beauty_map_fd; int err = 0; unsigned int beauty_array[6]; + if (augmented_syscalls__get_map_fds(&map_enter_fd, &map_exit_fd, &beauty_map_fd) < 0) + return -1; + + unaugmented_prog = augmented_syscalls__unaugmented(); + for (int i = 0, num_idx = syscalltbl__num_idx(e_machine); i < num_idx; ++i) { int prog_fd, key = syscalltbl__id_at_idx(e_machine, i); @@ -4060,7 +4044,7 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_m * For now we're just reusing the sys_enter prog, and if it * already has an augmenter, we don't need to find one. */ - if (sc->bpf_prog.sys_enter != trace->skel->progs.syscall_unaugmented) + if (sc->bpf_prog.sys_enter != unaugmented_prog) continue; /* @@ -4085,7 +4069,13 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_m return err; } -#endif // HAVE_BPF_SKEL +#else // !HAVE_LIBBPF_SUPPORT +static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused, + int e_machine __maybe_unused) +{ + return -1; +} +#endif // HAVE_LIBBPF_SUPPORT static int trace__set_ev_qualifier_filter(struct trace *trace) { @@ -4094,24 +4084,6 @@ static int trace__set_ev_qualifier_filter(struct trace *trace) return 0; } -static int bpf_map__set_filter_pids(struct bpf_map *map __maybe_unused, - size_t npids __maybe_unused, pid_t *pids __maybe_unused) -{ - int err = 0; -#ifdef HAVE_LIBBPF_SUPPORT - bool value = true; - int map_fd = bpf_map__fd(map); - size_t i; - - for (i = 0; i < npids; ++i) { - err = bpf_map_update_elem(map_fd, &pids[i], &value, BPF_ANY); - if (err) - break; - } -#endif - return err; -} - static int trace__set_filter_loop_pids(struct trace *trace) { unsigned int nr = 1, err; @@ -4140,8 +4112,8 @@ static int trace__set_filter_loop_pids(struct trace *trace) thread__put(thread); err = evlist__append_tp_filter_pids(trace->evlist, nr, pids); - if (!err && trace->filter_pids.map) - err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids); + if (!err) + err = augmented_syscalls__set_filter_pids(nr, pids); return err; } @@ -4158,8 +4130,8 @@ static int trace__set_filter_pids(struct trace *trace) if (trace->filter_pids.nr > 0) { err = evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr, trace->filter_pids.entries); - if (!err && trace->filter_pids.map) { - err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr, + if (!err) { + err = augmented_syscalls__set_filter_pids(trace->filter_pids.nr, trace->filter_pids.entries); } } else if (perf_thread_map__pid(trace->evlist->core.threads, 0) == -1) { @@ -4482,41 +4454,18 @@ static int trace__run(struct trace *trace, int argc, const char **argv) err = evlist__open(evlist); if (err < 0) goto out_error_open; -#ifdef HAVE_BPF_SKEL - if (trace->syscalls.events.bpf_output) { - struct perf_cpu cpu; - /* - * Set up the __augmented_syscalls__ BPF map to hold for each - * CPU the bpf-output event's file descriptor. - */ - perf_cpu_map__for_each_cpu(cpu, i, trace->syscalls.events.bpf_output->core.cpus) { - int mycpu = cpu.cpu; + augmented_syscalls__setup_bpf_output(); - bpf_map__update_elem(trace->skel->maps.__augmented_syscalls__, - &mycpu, sizeof(mycpu), - xyarray__entry(trace->syscalls.events.bpf_output->core.fd, - mycpu, 0), - sizeof(__u32), BPF_ANY); - } - } - - if (trace->skel) - trace->filter_pids.map = trace->skel->maps.pids_filtered; -#endif err = trace__set_filter_pids(trace); if (err < 0) goto out_error_mem; -#ifdef HAVE_BPF_SKEL - if (trace->skel && trace->skel->progs.sys_enter) { - /* - * TODO: Initialize for all host binary machine types, not just - * those matching the perf binary. - */ - trace__init_syscalls_bpf_prog_array_maps(trace, EM_HOST); - } -#endif + /* + * TODO: Initialize for all host binary machine types, not just + * those matching the perf binary. + */ + trace__init_syscalls_bpf_prog_array_maps(trace, EM_HOST); if (trace->ev_qualifier_ids.nr > 0) { err = trace__set_ev_qualifier_filter(trace); @@ -5379,18 +5328,6 @@ static void trace__exit(struct trace *trace) #endif } -#ifdef HAVE_BPF_SKEL -static int bpf__setup_bpf_output(struct evlist *evlist) -{ - int err = parse_event(evlist, "bpf-output/no-inherit=1,name=__augmented_syscalls__/"); - - if (err) - pr_debug("ERROR: failed to create the \"__augmented_syscalls__\" bpf-output event\n"); - - return err; -} -#endif - int cmd_trace(int argc, const char **argv) { const char *trace_usage[] = { @@ -5587,7 +5524,6 @@ int cmd_trace(int argc, const char **argv) "cgroup monitoring only available in system-wide mode"); } -#ifdef HAVE_BPF_SKEL if (!trace.trace_syscalls) goto skip_augmentation; @@ -5606,42 +5542,17 @@ int cmd_trace(int argc, const char **argv) goto skip_augmentation; } - trace.skel = augmented_raw_syscalls_bpf__open(); - if (!trace.skel) { - pr_debug("Failed to open augmented syscalls BPF skeleton"); - } else { - /* - * Disable attaching the BPF programs except for sys_enter and - * sys_exit that tail call into this as necessary. - */ - struct bpf_program *prog; + err = augmented_syscalls__prepare(); + if (err < 0) + goto skip_augmentation; - bpf_object__for_each_program(prog, trace.skel->obj) { - if (prog != trace.skel->progs.sys_enter && prog != trace.skel->progs.sys_exit) - bpf_program__set_autoattach(prog, /*autoattach=*/false); - } + trace__add_syscall_newtp(&trace); - err = augmented_raw_syscalls_bpf__load(trace.skel); + err = augmented_syscalls__create_bpf_output(trace.evlist); + if (err == 0) + trace.syscalls.events.bpf_output = evlist__last(trace.evlist); - if (err < 0) { - libbpf_strerror(err, bf, sizeof(bf)); - pr_debug("Failed to load augmented syscalls BPF skeleton: %s\n", bf); - } else { - augmented_raw_syscalls_bpf__attach(trace.skel); - trace__add_syscall_newtp(&trace); - } - } - - err = bpf__setup_bpf_output(trace.evlist); - if (err) { - libbpf_strerror(err, bf, sizeof(bf)); - pr_err("ERROR: Setup BPF output event failed: %s\n", bf); - goto out; - } - trace.syscalls.events.bpf_output = evlist__last(trace.evlist); - assert(evsel__name_is(trace.syscalls.events.bpf_output, "__augmented_syscalls__")); skip_augmentation: -#endif err = -1; if (trace.trace_pgfaults) { @@ -5833,8 +5744,6 @@ int cmd_trace(int argc, const char **argv) fclose(trace.output); out: trace__exit(&trace); -#ifdef HAVE_BPF_SKEL - augmented_raw_syscalls_bpf__destroy(trace.skel); -#endif + augmented_syscalls__cleanup(); return err; } diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 8a23eb767fb2..07dc1e704f90 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -176,6 +176,7 @@ perf-util-$(CONFIG_PERF_BPF_SKEL) += btf.o ifeq ($(CONFIG_TRACE),y) perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-trace-summary.o + perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_trace_augment.o endif ifeq ($(CONFIG_LIBTRACEEVENT),y) diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c index e4352881e3fa..cb86e261b4de 100644 --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -7,7 +7,6 @@ */ #include "vmlinux.h" -#include "../trace_augment.h" #include #include @@ -27,6 +26,8 @@ #define MAX_CPUS 4096 +#define TRACE_AUG_MAX_BUF 32 /* for buffer augmentation in perf trace */ + /* bpf-output associated map */ struct __augmented_syscalls__ { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); diff --git a/tools/perf/util/bpf_trace_augment.c b/tools/perf/util/bpf_trace_augment.c new file mode 100644 index 000000000000..56ed17534caa --- /dev/null +++ b/tools/perf/util/bpf_trace_augment.c @@ -0,0 +1,143 @@ +#include +#include + +#include "util/debug.h" +#include "util/evlist.h" +#include "util/trace_augment.h" + +#include "bpf_skel/augmented_raw_syscalls.skel.h" + +static struct augmented_raw_syscalls_bpf *skel; +static struct evsel *bpf_output; + +int augmented_syscalls__prepare(void) +{ + struct bpf_program *prog; + char buf[128]; + int err; + + skel = augmented_raw_syscalls_bpf__open(); + if (!skel) { + pr_debug("Failed to open augmented syscalls BPF skeleton\n"); + return -errno; + } + + /* + * Disable attaching the BPF programs except for sys_enter and + * sys_exit that tail call into this as necessary. + */ + bpf_object__for_each_program(prog, skel->obj) { + if (prog != skel->progs.sys_enter && prog != skel->progs.sys_exit) + bpf_program__set_autoattach(prog, /*autoattach=*/false); + } + + err = augmented_raw_syscalls_bpf__load(skel); + if (err < 0) { + libbpf_strerror(err, buf, sizeof(buf)); + pr_debug("Failed to load augmented syscalls BPF skeleton: %s\n", buf); + return err; + } + + augmented_raw_syscalls_bpf__attach(skel); + return 0; +} + +int augmented_syscalls__create_bpf_output(struct evlist *evlist) +{ + int err = parse_event(evlist, "bpf-output/no-inherit=1,name=__augmented_syscalls__/"); + + if (err) { + pr_err("ERROR: Setup BPF output event failed: %d\n", err); + return err; + } + + bpf_output = evlist__last(evlist); + assert(evsel__name_is(bpf_output, "__augmented_syscalls__")); + + return 0; +} + +void augmented_syscalls__setup_bpf_output(void) +{ + struct perf_cpu cpu; + int i; + + if (bpf_output == NULL) + return; + + /* + * Set up the __augmented_syscalls__ BPF map to hold for each + * CPU the bpf-output event's file descriptor. + */ + perf_cpu_map__for_each_cpu(cpu, i, bpf_output->core.cpus) { + int mycpu = cpu.cpu; + + bpf_map__update_elem(skel->maps.__augmented_syscalls__, + &mycpu, sizeof(mycpu), + xyarray__entry(bpf_output->core.fd, + mycpu, 0), + sizeof(__u32), BPF_ANY); + } +} + +int augmented_syscalls__set_filter_pids(unsigned int nr, pid_t *pids) +{ + bool value = true; + int err = 0; + + if (skel == NULL) + return 0; + + for (size_t i = 0; i < nr; ++i) { + err = bpf_map__update_elem(skel->maps.pids_filtered, &pids[i], + sizeof(*pids), &value, sizeof(value), + BPF_ANY); + if (err) + break; + } + return err; +} + +int augmented_syscalls__get_map_fds(int *enter_fd, int *exit_fd, int *beauty_fd) +{ + if (skel == NULL) + return -1; + + *enter_fd = bpf_map__fd(skel->maps.syscalls_sys_enter); + *exit_fd = bpf_map__fd(skel->maps.syscalls_sys_exit); + *beauty_fd = bpf_map__fd(skel->maps.beauty_map_enter); + + if (*enter_fd < 0 || *exit_fd < 0 || *beauty_fd < 0) { + pr_err("Error: failed to get syscall or beauty map fd\n"); + return -1; + } + + return 0; +} + +struct bpf_program *augmented_syscalls__unaugmented(void) +{ + return skel->progs.syscall_unaugmented; +} + +struct bpf_program *augmented_syscalls__find_by_title(const char *name) +{ + struct bpf_program *pos; + const char *sec_name; + + if (skel->obj == NULL) + return NULL; + + bpf_object__for_each_program(pos, skel->obj) { + sec_name = bpf_program__section_name(pos); + if (sec_name && !strcmp(sec_name, name)) + return pos; + } + + return NULL; +} + +void augmented_syscalls__cleanup(void) +{ + augmented_raw_syscalls_bpf__destroy(skel); +} diff --git a/tools/perf/util/trace_augment.h b/tools/perf/util/trace_augment.h index 57a3e5045937..4f729bc67753 100644 --- a/tools/perf/util/trace_augment.h +++ b/tools/perf/util/trace_augment.h @@ -1,6 +1,66 @@ #ifndef TRACE_AUGMENT_H #define TRACE_AUGMENT_H -#define TRACE_AUG_MAX_BUF 32 /* for buffer augmentation in perf trace */ +#include + +struct bpf_program; +struct evlist; + +#ifdef HAVE_BPF_SKEL + +int augmented_syscalls__prepare(void); +int augmented_syscalls__create_bpf_output(struct evlist *evlist); +void augmented_syscalls__setup_bpf_output(void); +int augmented_syscalls__set_filter_pids(unsigned int nr, pid_t *pids); +int augmented_syscalls__get_map_fds(int *enter_fd, int *exit_fd, int *beauty_fd); +struct bpf_program *augmented_syscalls__find_by_title(const char *name); +struct bpf_program *augmented_syscalls__unaugmented(void); +void augmented_syscalls__cleanup(void); + +#else /* !HAVE_BPF_SKEL */ + +static inline int augmented_syscalls__prepare(void) +{ + return -1; +} + +static inline int augmented_syscalls__create_bpf_output(struct evlist *evlist __maybe_unused) +{ + return -1; +} + +static inline void augmented_syscalls__setup_bpf_output(void) +{ +} + +static inline int augmented_syscalls__set_filter_pids(unsigned int nr __maybe_unused, + pid_t *pids __maybe_unused) +{ + return 0; +} + +static inline int augmented_syscalls__get_map_fds(int *enter_fd __maybe_unused, + int *exit_fd __maybe_unused, + int *beauty_fd __maybe_unused) +{ + return -1; +} + +static inline struct bpf_program * +augmented_syscalls__find_by_title(const char *name __maybe_unused) +{ + return NULL; +} + +static inline struct bpf_program *augmented_syscalls__unaugmented(void) +{ + return NULL; +} + +static inline void augmented_syscalls__cleanup(void) +{ +} + +#endif /* HAVE_BPF_SKEL */ #endif From ac871873bac736edd3945ade222d2902c0b10ac2 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Thu, 12 Jun 2025 11:36:58 -0500 Subject: [PATCH 0371/2411] perf tools: move perf_pmus__find_core_pmu() prototype to pmus.h perf_pmus__find_core_pmu() is implemented in util/pmus.c but its prototpye is in util/pmu.h. Move it to util/pmus.h. Suggested-by: Ian Rogers Signed-off-by: Thomas Falcon Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250612163659.1357950-1-thomas.falcon@intel.com Signed-off-by: Namhyung Kim --- tools/perf/util/pmu.h | 1 - tools/perf/util/pmus.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index a4a08192154c..1ebcf0242af8 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -302,7 +302,6 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char bool eager_load); struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus); void perf_pmu__delete(struct perf_pmu *pmu); -struct perf_pmu *perf_pmus__find_core_pmu(void); const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config); bool perf_pmu__is_fake(const struct perf_pmu *pmu); diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h index 2794d8c3a466..33ecf765a92f 100644 --- a/tools/perf/util/pmus.h +++ b/tools/perf/util/pmus.h @@ -35,5 +35,6 @@ struct perf_pmu *perf_pmus__add_test_hwmon_pmu(int hwmon_dir, const char *sysfs_name, const char *name); struct perf_pmu *perf_pmus__fake_pmu(void); +struct perf_pmu *perf_pmus__find_core_pmu(void); #endif /* __PMUS_H */ From c72bf82f96019216bb0a291d39c244977603661f Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Thu, 12 Jun 2025 11:36:59 -0500 Subject: [PATCH 0372/2411] perf top: populate PMU capabilities data in perf_env Calling perf top with branch filters enabled on Intel CPU's with branch counters logging (A.K.A LBR event logging [1]) support results in a segfault. $ perf top -e '{cpu_core/cpu-cycles/,cpu_core/event=0xc6,umask=0x3,frontend=0x11,name=frontend_retired_dsb_miss/}' -j any,counter ... Thread 27 "perf" received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7fffafff76c0 (LWP 949003)] perf_env__find_br_cntr_info (env=0xf66dc0 , nr=0x0, width=0x7fffafff62c0) at util/env.c:653 653 *width = env->cpu_pmu_caps ? env->br_cntr_width : (gdb) bt #0 perf_env__find_br_cntr_info (env=0xf66dc0 , nr=0x0, width=0x7fffafff62c0) at util/env.c:653 #1 0x00000000005b1599 in symbol__account_br_cntr (branch=0x7fffcc3db580, evsel=0xfea2d0, offset=12, br_cntr=8) at util/annotate.c:345 #2 0x00000000005b17fb in symbol__account_cycles (addr=5658172, start=5658160, sym=0x7fffcc0ee420, cycles=539, evsel=0xfea2d0, br_cntr=8) at util/annotate.c:389 #3 0x00000000005b1976 in addr_map_symbol__account_cycles (ams=0x7fffcd7b01d0, start=0x7fffcd7b02b0, cycles=539, evsel=0xfea2d0, br_cntr=8) at util/annotate.c:422 #4 0x000000000068d57f in hist__account_cycles (bs=0x110d288, al=0x7fffafff6540, sample=0x7fffafff6760, nonany_branch_mode=false, total_cycles=0x0, evsel=0xfea2d0) at util/hist.c:2850 #5 0x0000000000446216 in hist_iter__top_callback (iter=0x7fffafff6590, al=0x7fffafff6540, single=true, arg=0x7fffffff9e00) at builtin-top.c:737 #6 0x0000000000689787 in hist_entry_iter__add (iter=0x7fffafff6590, al=0x7fffafff6540, max_stack_depth=127, arg=0x7fffffff9e00) at util/hist.c:1359 #7 0x0000000000446710 in perf_event__process_sample (tool=0x7fffffff9e00, event=0x110d250, evsel=0xfea2d0, sample=0x7fffafff6760, machine=0x108c968) at builtin-top.c:845 #8 0x0000000000447735 in deliver_event (qe=0x7fffffffa120, qevent=0x10fc200) at builtin-top.c:1211 #9 0x000000000064ccae in do_flush (oe=0x7fffffffa120, show_progress=false) at util/ordered-events.c:245 #10 0x000000000064d005 in __ordered_events__flush (oe=0x7fffffffa120, how=OE_FLUSH__TOP, timestamp=0) at util/ordered-events.c:324 #11 0x000000000064d0ef in ordered_events__flush (oe=0x7fffffffa120, how=OE_FLUSH__TOP) at util/ordered-events.c:342 #12 0x00000000004472a9 in process_thread (arg=0x7fffffff9e00) at builtin-top.c:1120 #13 0x00007ffff6e7dba8 in start_thread (arg=) at pthread_create.c:448 #14 0x00007ffff6f01b8c in __GI___clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:78 The cause is that perf_env__find_br_cntr_info tries to access a null pointer pmu_caps in the perf_env struct. A similar issue exists for homogeneous core systems which use the cpu_pmu_caps structure. Fix this by populating cpu_pmu_caps and pmu_caps structures with values from sysfs when calling perf top with branch stack sampling enabled. [1], LBR event logging introduced here: https://lore.kernel.org/all/20231025201626.3000228-5-kan.liang@linux.intel.com/ Reviewed-by: Ian Rogers Signed-off-by: Thomas Falcon Link: https://lore.kernel.org/r/20250612163659.1357950-2-thomas.falcon@intel.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-top.c | 8 +++ tools/perf/util/env.c | 110 +++++++++++++++++++++++++++++++++++++++ tools/perf/util/env.h | 1 + 3 files changed, 119 insertions(+) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 051ded5ba9ba..c77e195ea786 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1734,6 +1734,14 @@ int cmd_top(int argc, const char **argv) if (opts->branch_stack && callchain_param.enabled) symbol_conf.show_branchflag_count = true; + if (opts->branch_stack) { + status = perf_env__read_core_pmu_caps(&perf_env); + if (status) { + pr_err("PMU capability data is not available\n"); + goto out_delete_evlist; + } + } + sort__mode = SORT_MODE__TOP; /* display thread wants entries to be collapsed in a different tree */ perf_hpp_list.need_collapse = 1; diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 05a4f2657d72..ee51378fb0d9 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -433,6 +433,116 @@ static int perf_env__read_nr_cpus_avail(struct perf_env *env) return env->nr_cpus_avail ? 0 : -ENOENT; } +static int __perf_env__read_core_pmu_caps(const struct perf_pmu *pmu, + int *nr_caps, char ***caps, + unsigned int *max_branches, + unsigned int *br_cntr_nr, + unsigned int *br_cntr_width) +{ + struct perf_pmu_caps *pcaps = NULL; + char *ptr, **tmp; + int ret = 0; + + *nr_caps = 0; + *caps = NULL; + + if (!pmu->nr_caps) + return 0; + + *caps = calloc(pmu->nr_caps, sizeof(char *)); + if (!*caps) + return -ENOMEM; + + tmp = *caps; + list_for_each_entry(pcaps, &pmu->caps, list) { + if (asprintf(&ptr, "%s=%s", pcaps->name, pcaps->value) < 0) { + ret = -ENOMEM; + goto error; + } + + *tmp++ = ptr; + + if (!strcmp(pcaps->name, "branches")) + *max_branches = atoi(pcaps->value); + else if (!strcmp(pcaps->name, "branch_counter_nr")) + *br_cntr_nr = atoi(pcaps->value); + else if (!strcmp(pcaps->name, "branch_counter_width")) + *br_cntr_width = atoi(pcaps->value); + } + *nr_caps = pmu->nr_caps; + return 0; +error: + while (tmp-- != *caps) + zfree(tmp); + zfree(caps); + *nr_caps = 0; + return ret; +} + +int perf_env__read_core_pmu_caps(struct perf_env *env) +{ + struct pmu_caps *pmu_caps; + struct perf_pmu *pmu = NULL; + int nr_pmu, i = 0, j; + int ret; + + nr_pmu = perf_pmus__num_core_pmus(); + + if (!nr_pmu) + return -ENODEV; + + if (nr_pmu == 1) { + pmu = perf_pmus__find_core_pmu(); + if (!pmu) + return -ENODEV; + ret = perf_pmu__caps_parse(pmu); + if (ret < 0) + return ret; + return __perf_env__read_core_pmu_caps(pmu, &env->nr_cpu_pmu_caps, + &env->cpu_pmu_caps, + &env->max_branches, + &env->br_cntr_nr, + &env->br_cntr_width); + } + + pmu_caps = calloc(nr_pmu, sizeof(*pmu_caps)); + if (!pmu_caps) + return -ENOMEM; + + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + if (perf_pmu__caps_parse(pmu) <= 0) + continue; + ret = __perf_env__read_core_pmu_caps(pmu, &pmu_caps[i].nr_caps, + &pmu_caps[i].caps, + &pmu_caps[i].max_branches, + &pmu_caps[i].br_cntr_nr, + &pmu_caps[i].br_cntr_width); + if (ret) + goto error; + + pmu_caps[i].pmu_name = strdup(pmu->name); + if (!pmu_caps[i].pmu_name) { + ret = -ENOMEM; + goto error; + } + i++; + } + + env->nr_pmus_with_caps = nr_pmu; + env->pmu_caps = pmu_caps; + + return 0; +error: + for (i = 0; i < nr_pmu; i++) { + for (j = 0; j < pmu_caps[i].nr_caps; j++) + zfree(&pmu_caps[i].caps[j]); + zfree(&pmu_caps[i].caps); + zfree(&pmu_caps[i].pmu_name); + } + zfree(&pmu_caps); + return ret; +} + const char *perf_env__raw_arch(struct perf_env *env) { return env && !perf_env__read_arch(env) ? env->arch : "unknown"; diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index c90c1d717e73..d8df59072529 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -152,6 +152,7 @@ struct btf_node; extern struct perf_env perf_env; +int perf_env__read_core_pmu_caps(struct perf_env *env); void perf_env__exit(struct perf_env *env); int perf_env__kernel_is_64_bit(struct perf_env *env); From 55a18d2f3ff79c9082225f44e0abbaea6286bf99 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 25 Jun 2025 13:23:08 -0700 Subject: [PATCH 0373/2411] perf build: enable -fno-strict-aliasing perf pulls in code from kernel headers that assumes it is being built with -fno-strict-aliasing, namely put_unaligned_*() from which write the data using packed structs that lack the may_alias attribute. Enable -fno-strict-aliasing to prevent miscompilations in sha1.c which would otherwise occur due to this issue. Signed-off-by: Eric Biggers Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250625202311.23244-2-ebiggers@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/Makefile.config | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 24736b0bbb30..70a3e771c7c0 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -19,6 +19,10 @@ detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected) CFLAGS := $(EXTRA_CFLAGS) $(filter-out -Wnested-externs,$(EXTRA_WARNINGS)) HOSTCFLAGS := $(filter-out -Wnested-externs,$(EXTRA_WARNINGS)) +# This is required because the kernel is built with this and some of the code +# borrowed from kernel headers depends on it, e.g. put_unaligned_*(). +CFLAGS += -fno-strict-aliasing + # Enabled Wthread-safety analysis for clang builds. ifeq ($(CC_NO_CLANG), 0) CFLAGS += -Wthread-safety From 43830468b6436811ff732b062f8d6306c6eddb77 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 25 Jun 2025 13:23:09 -0700 Subject: [PATCH 0374/2411] perf util: add a basic SHA-1 implementation SHA-1 can be written in fewer than 100 lines of code. Just add a basic SHA-1 implementation so that there's no need to use an external library or try to pull in the kernel's SHA-1 implementation. The kernel's SHA-1 implementation is not really intended to be pulled into userspace programs in the way that it was proposed to do so for perf (https://lore.kernel.org/r/20250521225307.743726-3-yuzhuo@google.com/), and it's also likely to undergo some refactoring in the future. There's no need to tie userspace tools to it. Include a test for sha1() in the util test suite. Signed-off-by: Eric Biggers Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250625202311.23244-3-ebiggers@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/tests/util.c | 45 ++++++++++++++++++- tools/perf/util/Build | 1 + tools/perf/util/sha1.c | 97 +++++++++++++++++++++++++++++++++++++++++ tools/perf/util/sha1.h | 6 +++ 4 files changed, 148 insertions(+), 1 deletion(-) create mode 100644 tools/perf/util/sha1.c create mode 100644 tools/perf/util/sha1.h diff --git a/tools/perf/tests/util.c b/tools/perf/tests/util.c index 6366db5cbf8c..b273d287e164 100644 --- a/tools/perf/tests/util.c +++ b/tools/perf/tests/util.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "tests.h" #include "util/debug.h" +#include "util/sha1.h" #include #include @@ -16,6 +17,48 @@ static int test_strreplace(char needle, const char *haystack, return ret == 0; } +#define MAX_LEN 512 + +/* Test sha1() for all lengths from 0 to MAX_LEN inclusively. */ +static int test_sha1(void) +{ + u8 data[MAX_LEN]; + size_t digests_size = (MAX_LEN + 1) * SHA1_DIGEST_SIZE; + u8 *digests; + u8 digest_of_digests[SHA1_DIGEST_SIZE]; + /* + * The correctness of this value was verified by running this test with + * sha1() replaced by OpenSSL's SHA1(). + */ + static const u8 expected_digest_of_digests[SHA1_DIGEST_SIZE] = { + 0x74, 0xcd, 0x4c, 0xb9, 0xd8, 0xa6, 0xd5, 0x95, 0x22, 0x8b, + 0x7e, 0xd6, 0x8b, 0x7e, 0x46, 0x95, 0x31, 0x9b, 0xa2, 0x43, + }; + size_t i; + + digests = malloc(digests_size); + TEST_ASSERT_VAL("failed to allocate digests", digests != NULL); + + /* Generate MAX_LEN bytes of data. */ + for (i = 0; i < MAX_LEN; i++) + data[i] = i; + + /* Calculate a SHA-1 for each length 0 through MAX_LEN inclusively. */ + for (i = 0; i <= MAX_LEN; i++) + sha1(data, i, &digests[i * SHA1_DIGEST_SIZE]); + + /* Calculate digest of all digests calculated above. */ + sha1(digests, digests_size, digest_of_digests); + + free(digests); + + /* Check for the expected result. */ + TEST_ASSERT_VAL("wrong output from sha1()", + memcmp(digest_of_digests, expected_digest_of_digests, + SHA1_DIGEST_SIZE) == 0); + return 0; +} + static int test__util(struct test_suite *t __maybe_unused, int subtest __maybe_unused) { TEST_ASSERT_VAL("empty string", test_strreplace(' ', "", "123", "")); @@ -25,7 +68,7 @@ static int test__util(struct test_suite *t __maybe_unused, int subtest __maybe_u TEST_ASSERT_VAL("replace long", test_strreplace('a', "abcabc", "longlong", "longlongbclonglongbc")); - return 0; + return test_sha1(); } DEFINE_SUITE("util", util); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 07dc1e704f90..45515b8f615a 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -41,6 +41,7 @@ perf-util-y += rbtree.o perf-util-y += libstring.o perf-util-y += bitmap.o perf-util-y += hweight.o +perf-util-y += sha1.o perf-util-y += smt.o perf-util-y += strbuf.o perf-util-y += string.o diff --git a/tools/perf/util/sha1.c b/tools/perf/util/sha1.c new file mode 100644 index 000000000000..7032fa4ff3fd --- /dev/null +++ b/tools/perf/util/sha1.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * SHA-1 message digest algorithm + * + * Copyright 2025 Google LLC + */ +#include +#include +#include +#include + +#include "sha1.h" + +#define SHA1_BLOCK_SIZE 64 + +static const u32 sha1_K[4] = { 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6 }; + +#define SHA1_ROUND(i, a, b, c, d, e) \ + do { \ + if ((i) >= 16) \ + w[i] = rol32(w[(i) - 16] ^ w[(i) - 14] ^ w[(i) - 8] ^ \ + w[(i) - 3], \ + 1); \ + e += w[i] + rol32(a, 5) + sha1_K[(i) / 20]; \ + if ((i) < 20) \ + e += (b & (c ^ d)) ^ d; \ + else if ((i) < 40 || (i) >= 60) \ + e += b ^ c ^ d; \ + else \ + e += (c & d) ^ (b & (c ^ d)); \ + b = rol32(b, 30); \ + /* The new (a, b, c, d, e) is the old (e, a, b, c, d). */ \ + } while (0) + +#define SHA1_5ROUNDS(i) \ + do { \ + SHA1_ROUND((i) + 0, a, b, c, d, e); \ + SHA1_ROUND((i) + 1, e, a, b, c, d); \ + SHA1_ROUND((i) + 2, d, e, a, b, c); \ + SHA1_ROUND((i) + 3, c, d, e, a, b); \ + SHA1_ROUND((i) + 4, b, c, d, e, a); \ + } while (0) + +#define SHA1_20ROUNDS(i) \ + do { \ + SHA1_5ROUNDS((i) + 0); \ + SHA1_5ROUNDS((i) + 5); \ + SHA1_5ROUNDS((i) + 10); \ + SHA1_5ROUNDS((i) + 15); \ + } while (0) + +static void sha1_blocks(u32 h[5], const u8 *data, size_t nblocks) +{ + while (nblocks--) { + u32 a = h[0]; + u32 b = h[1]; + u32 c = h[2]; + u32 d = h[3]; + u32 e = h[4]; + u32 w[80]; + + for (int i = 0; i < 16; i++) + w[i] = get_unaligned_be32(&data[i * 4]); + SHA1_20ROUNDS(0); + SHA1_20ROUNDS(20); + SHA1_20ROUNDS(40); + SHA1_20ROUNDS(60); + + h[0] += a; + h[1] += b; + h[2] += c; + h[3] += d; + h[4] += e; + data += SHA1_BLOCK_SIZE; + } +} + +/* Calculate the SHA-1 message digest of the given data. */ +void sha1(const void *data, size_t len, u8 out[SHA1_DIGEST_SIZE]) +{ + u32 h[5] = { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, + 0xC3D2E1F0 }; + u8 final_data[2 * SHA1_BLOCK_SIZE] = { 0 }; + size_t final_len = len % SHA1_BLOCK_SIZE; + + sha1_blocks(h, data, len / SHA1_BLOCK_SIZE); + + memcpy(final_data, data + len - final_len, final_len); + final_data[final_len] = 0x80; + final_len = round_up(final_len + 9, SHA1_BLOCK_SIZE); + put_unaligned_be64((u64)len * 8, &final_data[final_len - 8]); + + sha1_blocks(h, final_data, final_len / SHA1_BLOCK_SIZE); + + for (int i = 0; i < 5; i++) + put_unaligned_be32(h[i], &out[i * 4]); +} diff --git a/tools/perf/util/sha1.h b/tools/perf/util/sha1.h new file mode 100644 index 000000000000..e92c9966e1d5 --- /dev/null +++ b/tools/perf/util/sha1.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include + +#define SHA1_DIGEST_SIZE 20 + +void sha1(const void *data, size_t len, u8 out[SHA1_DIGEST_SIZE]); From e3f612c1d8f3945bb0cc8aad173fc12a3b20dc2a Mon Sep 17 00:00:00 2001 From: Yuzhuo Jing Date: Wed, 25 Jun 2025 13:23:10 -0700 Subject: [PATCH 0375/2411] perf genelf: Remove libcrypto dependency and use built-in sha1() genelf is the only file in perf that depends on libcrypto (or openssl) which only calculates a Build ID (SHA1, MD5, or URANDOM). SHA1 was expected to be the default option, but MD5 was used by default due to previous issues when linking against Java. This commit switches genelf to use the in-house sha1(), and also removes MD5 and URANDOM options since we have a reliable SHA1 implementation to rely on. It passes the tools/perf/tests/shell/test_java_symbol.sh test. Signed-off-by: Yuzhuo Jing Co-developed-by: Eric Biggers Signed-off-by: Eric Biggers Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250625202311.23244-4-ebiggers@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/util/genelf.c | 85 ++-------------------------------------- 1 file changed, 3 insertions(+), 82 deletions(-) diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index cdce7f173d00..fcf86a27f69e 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -12,15 +12,14 @@ #include #include #include -#include #include -#include #include #ifdef HAVE_LIBDW_SUPPORT #include #endif #include "genelf.h" +#include "sha1.h" #include "../util/jitdump.h" #include @@ -28,25 +27,6 @@ #define NT_GNU_BUILD_ID 3 #endif -#define BUILD_ID_URANDOM /* different uuid for each run */ - -#ifdef HAVE_LIBCRYPTO_SUPPORT - -#define BUILD_ID_MD5 -#undef BUILD_ID_SHA /* does not seem to work well when linked with Java */ -#undef BUILD_ID_URANDOM /* different uuid for each run */ - -#ifdef BUILD_ID_SHA -#include -#endif - -#ifdef BUILD_ID_MD5 -#include -#include -#endif -#endif - - typedef struct { unsigned int namesz; /* Size of entry's owner string */ unsigned int descsz; /* Size of the note descriptor */ @@ -71,7 +51,7 @@ static char shd_string_table[] = { static struct buildid_note { Elf_Note desc; /* descsz: size of build-id, must be multiple of 4 */ char name[4]; /* GNU\0 */ - char build_id[20]; + u8 build_id[SHA1_DIGEST_SIZE]; } bnote; static Elf_Sym symtab[]={ @@ -92,65 +72,6 @@ static Elf_Sym symtab[]={ } }; -#ifdef BUILD_ID_URANDOM -static void -gen_build_id(struct buildid_note *note, - unsigned long load_addr __maybe_unused, - const void *code __maybe_unused, - size_t csize __maybe_unused) -{ - int fd; - size_t sz = sizeof(note->build_id); - ssize_t sret; - - fd = open("/dev/urandom", O_RDONLY); - if (fd == -1) - err(1, "cannot access /dev/urandom for buildid"); - - sret = read(fd, note->build_id, sz); - - close(fd); - - if (sret != (ssize_t)sz) - memset(note->build_id, 0, sz); -} -#endif - -#ifdef BUILD_ID_SHA -static void -gen_build_id(struct buildid_note *note, - unsigned long load_addr __maybe_unused, - const void *code, - size_t csize) -{ - if (sizeof(note->build_id) < SHA_DIGEST_LENGTH) - errx(1, "build_id too small for SHA1"); - - SHA1(code, csize, (unsigned char *)note->build_id); -} -#endif - -#ifdef BUILD_ID_MD5 -static void -gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *code, size_t csize) -{ - EVP_MD_CTX *mdctx; - - if (sizeof(note->build_id) < 16) - errx(1, "build_id too small for MD5"); - - mdctx = EVP_MD_CTX_new(); - if (!mdctx) - errx(2, "failed to create EVP_MD_CTX"); - - EVP_DigestInit_ex(mdctx, EVP_md5(), NULL); - EVP_DigestUpdate(mdctx, &load_addr, sizeof(load_addr)); - EVP_DigestUpdate(mdctx, code, csize); - EVP_DigestFinal_ex(mdctx, (unsigned char *)note->build_id, NULL); - EVP_MD_CTX_free(mdctx); -} -#endif - static int jit_add_eh_frame_info(Elf *e, void* unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size, uint64_t base_offset) @@ -473,7 +394,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, /* * build-id generation */ - gen_build_id(&bnote, load_addr, code, csize); + sha1(code, csize, bnote.build_id); bnote.desc.namesz = sizeof(bnote.name); /* must include 0 termination */ bnote.desc.descsz = sizeof(bnote.build_id); bnote.desc.type = NT_GNU_BUILD_ID; From 8e63fd1e00f59eab01ab43eb094abc380f8d0c28 Mon Sep 17 00:00:00 2001 From: Yuzhuo Jing Date: Wed, 25 Jun 2025 13:23:11 -0700 Subject: [PATCH 0376/2411] tools: Remove libcrypto dependency Remove all occurrence of libcrypto in the build system. Signed-off-by: Yuzhuo Jing Signed-off-by: Eric Biggers Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250625202311.23244-5-ebiggers@kernel.org Signed-off-by: Namhyung Kim --- tools/build/Makefile.feature | 2 -- tools/build/feature/Makefile | 4 ---- tools/build/feature/test-all.c | 5 ----- tools/build/feature/test-libcrypto.c | 25 ------------------------- tools/perf/Documentation/perf-check.txt | 1 - tools/perf/Makefile.config | 13 ------------- tools/perf/Makefile.perf | 3 --- tools/perf/builtin-check.c | 1 - tools/perf/tests/make | 4 +--- 9 files changed, 1 insertion(+), 57 deletions(-) delete mode 100644 tools/build/feature/test-libcrypto.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 2e5f4c8b6547..649c5ab8e8f2 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -86,7 +86,6 @@ FEATURE_TESTS_BASIC := \ libtraceevent \ libtracefs \ libcpupower \ - libcrypto \ pthread-attr-setaffinity-np \ pthread-barrier \ reallocarray \ @@ -147,7 +146,6 @@ FEATURE_DISPLAY ?= \ numa_num_possible_cpus \ libperl \ libpython \ - libcrypto \ libcapstone \ llvm-perf \ zlib \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 0c4e541ed56e..b41a42818d8a 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -38,7 +38,6 @@ FILES= \ test-libtraceevent.bin \ test-libcpupower.bin \ test-libtracefs.bin \ - test-libcrypto.bin \ test-libunwind.bin \ test-libunwind-debug-frame.bin \ test-libunwind-x86.bin \ @@ -247,9 +246,6 @@ $(OUTPUT)test-libcpupower.bin: $(OUTPUT)test-libtracefs.bin: $(BUILD) $(shell $(PKG_CONFIG) --cflags libtracefs 2>/dev/null) -ltracefs -$(OUTPUT)test-libcrypto.bin: - $(BUILD) -lcrypto - $(OUTPUT)test-gtk2.bin: $(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) -Wno-deprecated-declarations diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 1010f233d9c1..4419fb4710bd 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -130,10 +130,6 @@ # include "test-bpf.c" #undef main -#define main main_test_libcrypto -# include "test-libcrypto.c" -#undef main - #define main main_test_sdt # include "test-sdt.c" #undef main @@ -188,7 +184,6 @@ int main(int argc, char *argv[]) main_test_lzma(); main_test_get_cpuid(); main_test_bpf(); - main_test_libcrypto(); main_test_scandirat(); main_test_sched_getcpu(); main_test_sdt(); diff --git a/tools/build/feature/test-libcrypto.c b/tools/build/feature/test-libcrypto.c deleted file mode 100644 index bc34a5bbb504..000000000000 --- a/tools/build/feature/test-libcrypto.c +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include - -int main(void) -{ - EVP_MD_CTX *mdctx; - unsigned char md[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH]; - unsigned char dat[] = "12345"; - unsigned int digest_len; - - mdctx = EVP_MD_CTX_new(); - if (!mdctx) - return 0; - - EVP_DigestInit_ex(mdctx, EVP_md5(), NULL); - EVP_DigestUpdate(mdctx, &dat[0], sizeof(dat)); - EVP_DigestFinal_ex(mdctx, &md[0], &digest_len); - EVP_MD_CTX_free(mdctx); - - SHA1(&dat[0], sizeof(dat), &md[0]); - - return 0; -} diff --git a/tools/perf/Documentation/perf-check.txt b/tools/perf/Documentation/perf-check.txt index 799982d8d868..ee92042082f7 100644 --- a/tools/perf/Documentation/perf-check.txt +++ b/tools/perf/Documentation/perf-check.txt @@ -54,7 +54,6 @@ feature:: libbfd / HAVE_LIBBFD_SUPPORT libbpf-strings / HAVE_LIBBPF_STRINGS_SUPPORT libcapstone / HAVE_LIBCAPSTONE_SUPPORT - libcrypto / HAVE_LIBCRYPTO_SUPPORT libdw-dwarf-unwind / HAVE_LIBDW_SUPPORT libelf / HAVE_LIBELF_SUPPORT libnuma / HAVE_LIBNUMA_SUPPORT diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 70a3e771c7c0..5a5832ee7b53 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -134,8 +134,6 @@ ifndef NO_LIBUNWIND FEATURE_CHECK_LDFLAGS-libunwind-x86_64 += -lunwind -llzma -lunwind-x86_64 endif -FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto - ifdef CSINCLUDES LIBOPENCSD_CFLAGS := -I$(CSINCLUDES) endif @@ -784,17 +782,6 @@ ifneq ($(NO_LIBTRACEEVENT),1) $(call detected,CONFIG_TRACE) endif -ifndef NO_LIBCRYPTO - ifneq ($(feature-libcrypto), 1) - $(warning No libcrypto.h found, disables jitted code injection, please install openssl-devel or libssl-dev) - NO_LIBCRYPTO := 1 - else - CFLAGS += -DHAVE_LIBCRYPTO_SUPPORT - EXTLIBS += -lcrypto - $(call detected,CONFIG_CRYPTO) - endif -endif - ifndef NO_SLANG ifneq ($(feature-libslang), 1) ifneq ($(feature-libslang-include-subdir), 1) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 4f292edeca5a..62697d62f706 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -61,9 +61,6 @@ include ../scripts/utilities.mak # # Define NO_LIBBIONIC if you do not want bionic support # -# Define NO_LIBCRYPTO if you do not want libcrypto (openssl) support -# used for generating build-ids for ELFs generated by jitdump. -# # Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support # for dwarf backtrace post unwind. # diff --git a/tools/perf/builtin-check.c b/tools/perf/builtin-check.c index f4827f0ddb47..b1e205871ab1 100644 --- a/tools/perf/builtin-check.c +++ b/tools/perf/builtin-check.c @@ -45,7 +45,6 @@ struct feature_status supported_features[] = { FEATURE_STATUS_TIP("libbfd", HAVE_LIBBFD_SUPPORT, "Deprecated, license incompatibility, use BUILD_NONDISTRO=1 and install binutils-dev[el]"), FEATURE_STATUS("libbpf-strings", HAVE_LIBBPF_STRINGS_SUPPORT), FEATURE_STATUS("libcapstone", HAVE_LIBCAPSTONE_SUPPORT), - FEATURE_STATUS("libcrypto", HAVE_LIBCRYPTO_SUPPORT), FEATURE_STATUS("libdw-dwarf-unwind", HAVE_LIBDW_SUPPORT), FEATURE_STATUS("libelf", HAVE_LIBELF_SUPPORT), FEATURE_STATUS("libnuma", HAVE_LIBNUMA_SUPPORT), diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 0ee94caf9ec1..e3651e5b195a 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -91,7 +91,6 @@ make_no_auxtrace := NO_AUXTRACE=1 make_no_libbpf := NO_LIBBPF=1 make_libbpf_dynamic := LIBBPF_DYNAMIC=1 make_no_libbpf_DEBUG := NO_LIBBPF=1 DEBUG=1 -make_no_libcrypto := NO_LIBCRYPTO=1 make_no_libllvm := NO_LIBLLVM=1 make_with_babeltrace:= LIBBABELTRACE=1 make_with_coresight := CORESIGHT=1 @@ -122,7 +121,7 @@ make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_GTK2=1 make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_BACKTRACE=1 make_minimal += NO_LIBNUMA=1 NO_LIBBIONIC=1 make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 -make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 NO_LIBZSTD=1 +make_minimal += NO_SDT=1 NO_JVMTI=1 NO_LIBZSTD=1 make_minimal += NO_LIBCAP=1 NO_CAPSTONE=1 # $(run) contains all available tests @@ -160,7 +159,6 @@ run += make_no_libbionic run += make_no_auxtrace run += make_no_libbpf run += make_no_libbpf_DEBUG -run += make_no_libcrypto run += make_no_libllvm run += make_no_sdt run += make_no_syscall_tbl From a6f494becf09c9ebba72ed67d3728f6811daa634 Mon Sep 17 00:00:00 2001 From: Akshay Jindal Date: Fri, 20 Jun 2025 00:20:30 +0530 Subject: [PATCH 0377/2411] PCI/AER: Add message when AER_MAX_MULTI_ERR_DEVICES limit is hit When a PCIe device detects an error, it logs the error locally and issues an error Message routed to the Root Complex (PCIe r6.0, sec 6.2.5). If the Root Port or RCEC supports AER and Linux has enabled the AER interrupt, aer_isr() traverses the relevant devices and adds those with AER errors logged to the aer_err_info.dev[] array for error logging and recovery. If aer_isr() finds more than AER_MAX_MULTI_ERR_DEVICES devices with AER errors logged, it silently ignores them, and those extra devices are not included in the recovery flow. Emit an error message if we find more than AER_MAX_MULTI_ERR_DEVICES devices with AER errors logged. Testing details at link below. Signed-off-by: Akshay Jindal [bhelgaas: commit log, join error message] Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250619185041.73240-1-akshayaj.lkd@gmail.com --- drivers/pci/pcie/aer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 361957502831..e286c197d716 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1039,7 +1039,8 @@ static int find_device_iter(struct pci_dev *dev, void *data) /* List this device */ if (add_error_device(e_info, dev)) { /* We cannot handle more... Stop iteration */ - /* TODO: Should print error message here? */ + pci_err(dev, "Exceeded max supported (%d) devices with errors logged\n", + AER_MAX_MULTI_ERR_DEVICES); return 1; } From ae6a0f5b8a5b0ca2e4bf1c0380267ad83aca8401 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 24 Jun 2025 13:55:07 +0100 Subject: [PATCH 0378/2411] soundwire: Correct some property names The DisCo properties should be mipi-sdw-paging-supported and mipi-sdw-bank-delay-supported, with an 'ed' on the end. Correct the property names used in sdw_slave_read_prop(). The internal flag bank_delay_support is currently unimplemented, so that being read wrong does not currently affect anything. The two existing users for this helper and the paging_support flag rt1320-sdw.c and rt721-sdca-sdw.c both manually set the flag in their slave properties, thus are not affected by this bug either. Fixes: 56d4fe31af77 ("soundwire: Add MIPI DisCo property helpers") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20250624125507.2866346-1-ckeepax@opensource.cirrus.com Signed-off-by: Vinod Koul --- drivers/soundwire/mipi_disco.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soundwire/mipi_disco.c b/drivers/soundwire/mipi_disco.c index 65afb28ef8fa..c69b78cd0b62 100644 --- a/drivers/soundwire/mipi_disco.c +++ b/drivers/soundwire/mipi_disco.c @@ -451,10 +451,10 @@ int sdw_slave_read_prop(struct sdw_slave *slave) "mipi-sdw-highPHY-capable"); prop->paging_support = mipi_device_property_read_bool(dev, - "mipi-sdw-paging-support"); + "mipi-sdw-paging-supported"); prop->bank_delay_support = mipi_device_property_read_bool(dev, - "mipi-sdw-bank-delay-support"); + "mipi-sdw-bank-delay-supported"); device_property_read_u32(dev, "mipi-sdw-port15-read-behavior", &prop->p15_behave); From 8168dba757c08aed00d0a1a25426c807adbf4491 Mon Sep 17 00:00:00 2001 From: Naveen Manohar Date: Thu, 26 Jun 2025 14:09:37 +0800 Subject: [PATCH 0379/2411] soundwire: intel_auxdevice: add rt721 codec to wake_capable_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rt721 has wake capability. Add it to the wake_capable_list. Signed-off-by: Naveen Manohar Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20250626060937.405978-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel_auxdevice.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c index 10a602d4843a..6df2601fff90 100644 --- a/drivers/soundwire/intel_auxdevice.c +++ b/drivers/soundwire/intel_auxdevice.c @@ -65,6 +65,7 @@ static struct wake_capable_part wake_capable_list[] = { {0x025d, 0x715}, {0x025d, 0x716}, {0x025d, 0x717}, + {0x025d, 0x721}, {0x025d, 0x722}, }; From 72bbf6e866a7911aaa0b4d0e9bb03109c7c046f2 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 20 Jun 2025 15:55:20 +0530 Subject: [PATCH 0380/2411] soundwire: amd: add check for status update registers Add check to proceed handling SoundWire interrupts when valid status is reported in any one of the status registers. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20250620102617.73437-2-Vijendar.Mukunda@amd.com Signed-off-by: Vinod Koul --- drivers/soundwire/amd_manager.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index 9a767704b603..d4e62c383b12 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -931,6 +931,9 @@ static void amd_sdw_irq_thread(struct work_struct *work) status_change_8to11 = readl(amd_manager->mmio + ACP_SW_STATE_CHANGE_STATUS_8TO11); status_change_0to7 = readl(amd_manager->mmio + ACP_SW_STATE_CHANGE_STATUS_0TO7); + if (!status_change_0to7 && !status_change_8to11) + return; + dev_dbg(amd_manager->dev, "[SDW%d] SDW INT: 0to7=0x%x, 8to11=0x%x\n", amd_manager->instance, status_change_0to7, status_change_8to11); if (status_change_8to11 & AMD_SDW_WAKE_STAT_MASK) From 814f047fc96d6631bb2c76557aad8e4aee8f532b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bence=20Cs=C3=B3k=C3=A1s?= Date: Wed, 25 Jun 2025 10:54:50 +0200 Subject: [PATCH 0381/2411] dmaengine: sun4i: Simplify error handling in probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clean up error handling by using devm functions and dev_err_probe(). This should make it easier to add new code, as we can eliminate the "goto ladder" in sun4i_dma_probe(). Suggested-by: Chen-Yu Tsai Reviewed-by: Jernej Skrabec Acked-by: Chen-Yu Tsai Reviewed-by: Julian Calaby Signed-off-by: Bence Csókás Link: https://lore.kernel.org/r/20250625085450.154280-2-csokas.bence@prolan.hu Signed-off-by: Vinod Koul --- drivers/dma/sun4i-dma.c | 46 ++++++++++++----------------------------- 1 file changed, 13 insertions(+), 33 deletions(-) diff --git a/drivers/dma/sun4i-dma.c b/drivers/dma/sun4i-dma.c index 24796aaaddfa..00d2fd38d17f 100644 --- a/drivers/dma/sun4i-dma.c +++ b/drivers/dma/sun4i-dma.c @@ -1249,11 +1249,10 @@ static int sun4i_dma_probe(struct platform_device *pdev) if (priv->irq < 0) return priv->irq; - priv->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(priv->clk)) { - dev_err(&pdev->dev, "No clock specified\n"); - return PTR_ERR(priv->clk); - } + priv->clk = devm_clk_get_enabled(&pdev->dev, NULL); + if (IS_ERR(priv->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(priv->clk), + "Couldn't start the clock\n"); if (priv->cfg->has_reset) { priv->rst = devm_reset_control_get_exclusive_deasserted(&pdev->dev, NULL); @@ -1328,12 +1327,6 @@ static int sun4i_dma_probe(struct platform_device *pdev) vchan_init(&vchan->vc, &priv->slave); } - ret = clk_prepare_enable(priv->clk); - if (ret) { - dev_err(&pdev->dev, "Couldn't enable the clock\n"); - return ret; - } - /* * Make sure the IRQs are all disabled and accounted for. The bootloader * likes to leave these dirty @@ -1343,33 +1336,23 @@ static int sun4i_dma_probe(struct platform_device *pdev) ret = devm_request_irq(&pdev->dev, priv->irq, sun4i_dma_interrupt, 0, dev_name(&pdev->dev), priv); - if (ret) { - dev_err(&pdev->dev, "Cannot request IRQ\n"); - goto err_clk_disable; - } + if (ret) + return dev_err_probe(&pdev->dev, ret, "Cannot request IRQ\n"); - ret = dma_async_device_register(&priv->slave); - if (ret) { - dev_warn(&pdev->dev, "Failed to register DMA engine device\n"); - goto err_clk_disable; - } + ret = dmaenginem_async_device_register(&priv->slave); + if (ret) + return dev_err_probe(&pdev->dev, ret, + "Failed to register DMA engine device\n"); ret = of_dma_controller_register(pdev->dev.of_node, sun4i_dma_of_xlate, priv); - if (ret) { - dev_err(&pdev->dev, "of_dma_controller_register failed\n"); - goto err_dma_unregister; - } + if (ret) + return dev_err_probe(&pdev->dev, ret, + "Failed to register translation function\n"); dev_dbg(&pdev->dev, "Successfully probed SUN4I_DMA\n"); return 0; - -err_dma_unregister: - dma_async_device_unregister(&priv->slave); -err_clk_disable: - clk_disable_unprepare(priv->clk); - return ret; } static void sun4i_dma_remove(struct platform_device *pdev) @@ -1380,9 +1363,6 @@ static void sun4i_dma_remove(struct platform_device *pdev) disable_irq(priv->irq); of_dma_controller_free(pdev->dev.of_node); - dma_async_device_unregister(&priv->slave); - - clk_disable_unprepare(priv->clk); } static struct sun4i_dma_config sun4i_a10_dma_cfg = { From e201757f7a0a901e313d638c545ed6cd0dc6870e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 25 Jun 2025 16:03:39 -0700 Subject: [PATCH 0382/2411] perf annotate: Fix source code annotate with objdump Recently it uses llvm and capstone to speed up annotation or disassembly of instructions. But they don't support source code view yet. Until it fixed, we can force to use objdump for source code annotation. To prevent performance loss, it's disabled by default and turned it on when user requests it in TUI by pressing 's' key. Acked-by: Ian Rogers Link: https://lore.kernel.org/r/20250625230339.702610-1-namhyung@kernel.org Reported-by: Ingo Molnar Signed-off-by: Namhyung Kim --- tools/perf/ui/browsers/annotate.c | 86 +++++++++++++++++++++++++++++-- tools/perf/util/annotate.c | 2 + tools/perf/util/annotate.h | 1 + tools/perf/util/disasm.c | 7 +++ 4 files changed, 93 insertions(+), 3 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ab776b1ed2d5..183902dac042 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -345,6 +345,23 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, browser->curr_hot = rb_last(&browser->entries); } +static struct annotation_line *annotate_browser__find_new_asm_line( + struct annotate_browser *browser, + int idx_asm) +{ + struct annotation_line *al; + struct list_head *head = browser->b.entries; + + /* find an annotation line in the new list with the same idx_asm */ + list_for_each_entry(al, head, node) { + if (al->idx_asm == idx_asm) + return al; + } + + /* There are no asm lines */ + return NULL; +} + static struct annotation_line *annotate_browser__find_next_asm_line( struct annotate_browser *browser, struct annotation_line *al) @@ -368,7 +385,31 @@ static struct annotation_line *annotate_browser__find_next_asm_line( return NULL; } -static bool annotate_browser__toggle_source(struct annotate_browser *browser) +static bool annotation__has_source(struct annotation *notes) +{ + struct annotation_line *al; + bool found_asm = false; + + /* Let's skip the first non-asm lines which present regardless of source. */ + list_for_each_entry(al, ¬es->src->source, node) { + if (al->offset >= 0) { + found_asm = true; + break; + } + } + + if (found_asm) { + /* After assembly lines, any line without offset means source. */ + list_for_each_entry_continue(al, ¬es->src->source, node) { + if (al->offset == -1) + return true; + } + } + return false; +} + +static bool annotate_browser__toggle_source(struct annotate_browser *browser, + struct evsel *evsel) { struct annotation *notes = browser__annotation(&browser->b); struct annotation_line *al; @@ -377,6 +418,39 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) browser->b.seek(&browser->b, offset, SEEK_CUR); al = list_entry(browser->b.top, struct annotation_line, node); + if (!annotate_opts.annotate_src) + annotate_opts.annotate_src = true; + + /* + * It's about to get source code annotation for the first time. + * Drop the existing annotation_lines and get the new one with source. + * And then move to the original line at the same asm index. + */ + if (annotate_opts.hide_src_code && !notes->src->tried_source) { + struct map_symbol *ms = browser->b.priv; + int orig_idx_asm = al->idx_asm; + + /* annotate again with source code info */ + annotate_opts.hide_src_code = false; + annotated_source__purge(notes->src); + symbol__annotate2(ms, evsel, &browser->arch); + annotate_opts.hide_src_code = true; + + /* should be after annotated_source__purge() */ + notes->src->tried_source = true; + + if (!annotation__has_source(notes)) + ui__warning("Annotation has no source code."); + + browser->b.entries = ¬es->src->source; + al = annotate_browser__find_new_asm_line(browser, orig_idx_asm); + if (unlikely(al == NULL)) { + al = list_first_entry(¬es->src->source, + struct annotation_line, node); + } + browser->b.seek(&browser->b, al->idx_asm, SEEK_SET); + } + if (annotate_opts.hide_src_code) { if (al->idx_asm < offset) offset = al->idx; @@ -833,7 +907,7 @@ static int annotate_browser__run(struct annotate_browser *browser, nd = browser->curr_hot; break; case 's': - if (annotate_browser__toggle_source(browser)) + if (annotate_browser__toggle_source(browser, evsel)) ui_helpline__puts(help); annotate__scnprintf_title(hists, title, sizeof(title)); annotate_browser__show(&browser->b, title, help); @@ -1011,6 +1085,12 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, ui__error("Couldn't annotate %s:\n%s", sym->name, msg); return -1; } + + if (!annotate_opts.hide_src_code) { + notes->src->tried_source = true; + if (!annotation__has_source(notes)) + ui__warning("Annotation has no source code."); + } } ui_helpline__push("Press ESC to exit"); @@ -1025,7 +1105,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, ret = annotate_browser__run(&browser, evsel, hbt); - if(not_annotated) + if (not_annotated && !notes->src->tried_source) annotated_source__purge(notes->src); return ret; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 264a212b47df..0dd475a744b6 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1451,6 +1451,7 @@ void annotated_source__purge(struct annotated_source *as) list_del_init(&al->node); disasm_line__free(disasm_line(al)); } + as->tried_source = false; } static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp) @@ -2280,6 +2281,7 @@ void annotation_options__init(void) opt->annotate_src = true; opt->offset_level = ANNOTATION__OFFSET_JUMP_TARGETS; opt->percent_type = PERCENT_PERIOD_LOCAL; + opt->hide_src_code = true; opt->hide_src_code_on_title = true; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index bbb89b32f398..8b5131d257b0 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -294,6 +294,7 @@ struct annotated_source { int nr_entries; int nr_asm_entries; int max_jump_sources; + bool tried_source; u64 start; struct { u8 addr; diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c index 8f0eb56c6fc6..ff475a239f4b 100644 --- a/tools/perf/util/disasm.c +++ b/tools/perf/util/disasm.c @@ -2284,6 +2284,13 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) } } + /* FIXME: LLVM and CAPSTONE should support source code */ + if (options->annotate_src && !options->hide_src_code) { + err = symbol__disassemble_objdump(symfs_filename, sym, args); + if (err == 0) + goto out_remove_tmp; + } + err = -1; for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers) && err != 0; i++) { enum perf_disassembler dis = options->disassemblers[i]; From e19bdbaa31082b43dab1d936e20efcebc30aa73d Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Tue, 24 Jun 2025 09:31:37 +0200 Subject: [PATCH 0383/2411] dmaengine: stm32-dma: configure next sg only if there are more than 2 sgs DMA operates in Double Buffer Mode (DBM) when the transfer is cyclic and there are at least two periods. When DBM is enabled, the DMA toggles between two memory targets (SxM0AR and SxM1AR), indicated by the SxSCR.CT bit (Current Target). There is no need to update the next memory address if two periods are configured, as SxM0AR and SxM1AR are already properly set up before the transfer begins in the stm32_dma_start_transfer() function. This avoids unnecessary updates to SxM0AR/SxM1AR, thereby preventing potential Transfer Errors. Specifically, when the channel is enabled, SxM0AR and SxM1AR can only be written if SxSCR.CT=1 and SxSCR.CT=0, respectively. Otherwise, a Transfer Error interrupt is triggered, and the stream is automatically disabled. Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20250624-stm32_dma_dbm_fix-v1-1-337c40d6c93e@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32/stm32-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/stm32/stm32-dma.c b/drivers/dma/stm32/stm32-dma.c index 917f8e922373..0e39f99bce8b 100644 --- a/drivers/dma/stm32/stm32-dma.c +++ b/drivers/dma/stm32/stm32-dma.c @@ -744,7 +744,7 @@ static void stm32_dma_handle_chan_done(struct stm32_dma_chan *chan, u32 scr) /* cyclic while CIRC/DBM disable => post resume reconfiguration needed */ if (!(scr & (STM32_DMA_SCR_CIRC | STM32_DMA_SCR_DBM))) stm32_dma_post_resume_reconfigure(chan); - else if (scr & STM32_DMA_SCR_DBM) + else if (scr & STM32_DMA_SCR_DBM && chan->desc->num_sgs > 2) stm32_dma_configure_next_sg(chan); } else { chan->busy = false; From 6e6d3c6f0ef235a95c25385b2dad98e8ad6223eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 18 Jun 2025 09:43:34 +0200 Subject: [PATCH 0384/2411] dmaengine: stm32: Don't use %pK through printk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the past %pK was preferable to %p as it would not leak raw pointer values into the kernel log. Since commit ad67b74d2469 ("printk: hash addresses printed with %p") the regular %p has been improved to avoid this issue. Furthermore, restricted pointers ("%pK") were never meant to be used through printk(). They can still unintentionally leak raw pointers or acquire sleeping locks in atomic contexts. Switch to the regular pointer formatting which is safer and easier to reason about. Signed-off-by: Thomas Weißschuh Reviewed-by: Amelie Delaunay Link: https://lore.kernel.org/r/20250618-restricted-pointers-dma-v2-1-bc39dafc201d@linutronix.de Signed-off-by: Vinod Koul --- drivers/dma/stm32/stm32-dma.c | 10 +++++----- drivers/dma/stm32/stm32-dma3.c | 10 +++++----- drivers/dma/stm32/stm32-mdma.c | 8 ++++---- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/dma/stm32/stm32-dma.c b/drivers/dma/stm32/stm32-dma.c index 0e39f99bce8b..04389936c8a6 100644 --- a/drivers/dma/stm32/stm32-dma.c +++ b/drivers/dma/stm32/stm32-dma.c @@ -613,7 +613,7 @@ static void stm32_dma_start_transfer(struct stm32_dma_chan *chan) reg->dma_scr |= STM32_DMA_SCR_EN; stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg->dma_scr); - dev_dbg(chan2dev(chan), "vchan %pK: started\n", &chan->vchan); + dev_dbg(chan2dev(chan), "vchan %p: started\n", &chan->vchan); } static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan) @@ -676,7 +676,7 @@ static void stm32_dma_handle_chan_paused(struct stm32_dma_chan *chan) chan->status = DMA_PAUSED; - dev_dbg(chan2dev(chan), "vchan %pK: paused\n", &chan->vchan); + dev_dbg(chan2dev(chan), "vchan %p: paused\n", &chan->vchan); } static void stm32_dma_post_resume_reconfigure(struct stm32_dma_chan *chan) @@ -728,7 +728,7 @@ static void stm32_dma_post_resume_reconfigure(struct stm32_dma_chan *chan) dma_scr |= STM32_DMA_SCR_EN; stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), dma_scr); - dev_dbg(chan2dev(chan), "vchan %pK: reconfigured after pause/resume\n", &chan->vchan); + dev_dbg(chan2dev(chan), "vchan %p: reconfigured after pause/resume\n", &chan->vchan); } static void stm32_dma_handle_chan_done(struct stm32_dma_chan *chan, u32 scr) @@ -820,7 +820,7 @@ static void stm32_dma_issue_pending(struct dma_chan *c) spin_lock_irqsave(&chan->vchan.lock, flags); if (vchan_issue_pending(&chan->vchan) && !chan->desc && !chan->busy) { - dev_dbg(chan2dev(chan), "vchan %pK: issued\n", &chan->vchan); + dev_dbg(chan2dev(chan), "vchan %p: issued\n", &chan->vchan); stm32_dma_start_transfer(chan); } @@ -922,7 +922,7 @@ static int stm32_dma_resume(struct dma_chan *c) spin_unlock_irqrestore(&chan->vchan.lock, flags); - dev_dbg(chan2dev(chan), "vchan %pK: resumed\n", &chan->vchan); + dev_dbg(chan2dev(chan), "vchan %p: resumed\n", &chan->vchan); return 0; } diff --git a/drivers/dma/stm32/stm32-dma3.c b/drivers/dma/stm32/stm32-dma3.c index 0c6c4258b195..50e7106c5cb7 100644 --- a/drivers/dma/stm32/stm32-dma3.c +++ b/drivers/dma/stm32/stm32-dma3.c @@ -801,7 +801,7 @@ static void stm32_dma3_chan_start(struct stm32_dma3_chan *chan) chan->dma_status = DMA_IN_PROGRESS; - dev_dbg(chan2dev(chan), "vchan %pK: started\n", &chan->vchan); + dev_dbg(chan2dev(chan), "vchan %p: started\n", &chan->vchan); } static int stm32_dma3_chan_suspend(struct stm32_dma3_chan *chan, bool susp) @@ -1452,7 +1452,7 @@ static int stm32_dma3_pause(struct dma_chan *c) chan->dma_status = DMA_PAUSED; - dev_dbg(chan2dev(chan), "vchan %pK: paused\n", &chan->vchan); + dev_dbg(chan2dev(chan), "vchan %p: paused\n", &chan->vchan); return 0; } @@ -1465,7 +1465,7 @@ static int stm32_dma3_resume(struct dma_chan *c) chan->dma_status = DMA_IN_PROGRESS; - dev_dbg(chan2dev(chan), "vchan %pK: resumed\n", &chan->vchan); + dev_dbg(chan2dev(chan), "vchan %p: resumed\n", &chan->vchan); return 0; } @@ -1490,7 +1490,7 @@ static int stm32_dma3_terminate_all(struct dma_chan *c) spin_unlock_irqrestore(&chan->vchan.lock, flags); vchan_dma_desc_free_list(&chan->vchan, &head); - dev_dbg(chan2dev(chan), "vchan %pK: terminated\n", &chan->vchan); + dev_dbg(chan2dev(chan), "vchan %p: terminated\n", &chan->vchan); return 0; } @@ -1543,7 +1543,7 @@ static void stm32_dma3_issue_pending(struct dma_chan *c) spin_lock_irqsave(&chan->vchan.lock, flags); if (vchan_issue_pending(&chan->vchan) && !chan->swdesc) { - dev_dbg(chan2dev(chan), "vchan %pK: issued\n", &chan->vchan); + dev_dbg(chan2dev(chan), "vchan %p: issued\n", &chan->vchan); stm32_dma3_chan_start(chan); } diff --git a/drivers/dma/stm32/stm32-mdma.c b/drivers/dma/stm32/stm32-mdma.c index e6d525901de7..080c1c725216 100644 --- a/drivers/dma/stm32/stm32-mdma.c +++ b/drivers/dma/stm32/stm32-mdma.c @@ -1187,7 +1187,7 @@ static void stm32_mdma_start_transfer(struct stm32_mdma_chan *chan) chan->busy = true; - dev_dbg(chan2dev(chan), "vchan %pK: started\n", &chan->vchan); + dev_dbg(chan2dev(chan), "vchan %p: started\n", &chan->vchan); } static void stm32_mdma_issue_pending(struct dma_chan *c) @@ -1200,7 +1200,7 @@ static void stm32_mdma_issue_pending(struct dma_chan *c) if (!vchan_issue_pending(&chan->vchan)) goto end; - dev_dbg(chan2dev(chan), "vchan %pK: issued\n", &chan->vchan); + dev_dbg(chan2dev(chan), "vchan %p: issued\n", &chan->vchan); if (!chan->desc && !chan->busy) stm32_mdma_start_transfer(chan); @@ -1220,7 +1220,7 @@ static int stm32_mdma_pause(struct dma_chan *c) spin_unlock_irqrestore(&chan->vchan.lock, flags); if (!ret) - dev_dbg(chan2dev(chan), "vchan %pK: pause\n", &chan->vchan); + dev_dbg(chan2dev(chan), "vchan %p: pause\n", &chan->vchan); return ret; } @@ -1261,7 +1261,7 @@ static int stm32_mdma_resume(struct dma_chan *c) spin_unlock_irqrestore(&chan->vchan.lock, flags); - dev_dbg(chan2dev(chan), "vchan %pK: resume\n", &chan->vchan); + dev_dbg(chan2dev(chan), "vchan %p: resume\n", &chan->vchan); return 0; } From 06b80ad4ffa5e614e89f04dffc44b85377c7ee24 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 25 May 2025 21:26:01 +0200 Subject: [PATCH 0385/2411] dmaengine: dw-edma: Drop unused dchan2dev() and chan2dev() Static functions dchan2dev() and chan2dev() are not used, W=1 build: dw-edma-core.c:27:16: error: unused function 'dchan2dev' [-Werror,-Wunused-function] dw-edma-core.c:33:16: error: unused function 'chan2dev' [-Werror,-Wunused-function] Fixes: e63d79d1ffcd ("dmaengine: Add Synopsys eDMA IP core driver") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250525-dma-fixes-v1-1-89d06dac9bcb@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-edma-core.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c index c2b88cc99e5d..b43255f914f3 100644 --- a/drivers/dma/dw-edma/dw-edma-core.c +++ b/drivers/dma/dw-edma/dw-edma-core.c @@ -23,18 +23,6 @@ #include "../dmaengine.h" #include "../virt-dma.h" -static inline -struct device *dchan2dev(struct dma_chan *dchan) -{ - return &dchan->dev->device; -} - -static inline -struct device *chan2dev(struct dw_edma_chan *chan) -{ - return &chan->vc.chan.dev->device; -} - static inline struct dw_edma_desc *vd2dw_edma_desc(struct virt_dma_desc *vd) { From f0368c23caba175e07062a3f24e58a2b4ec5bb1c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 25 May 2025 21:26:02 +0200 Subject: [PATCH 0386/2411] dmaengine: fsl-dpaa2-qdma: Drop unused mc_enc() Static function mc_enc() is not used, W=1 build: dpdmai.c:51:19: error: unused function 'mc_enc' [-Werror,-Wunused-function] Fixes: 26a4d2aedac2 ("dmaengine: fsl-dpaa2-qdma: Remove unused function dpdmai_create()") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250525-dma-fixes-v1-2-89d06dac9bcb@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/fsl-dpaa2-qdma/dpdmai.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/dma/fsl-dpaa2-qdma/dpdmai.c b/drivers/dma/fsl-dpaa2-qdma/dpdmai.c index b4323d243d6d..4be81db24a19 100644 --- a/drivers/dma/fsl-dpaa2-qdma/dpdmai.c +++ b/drivers/dma/fsl-dpaa2-qdma/dpdmai.c @@ -48,11 +48,6 @@ struct dpdmai_cmd_destroy { __le32 dpdmai_id; } __packed; -static inline u64 mc_enc(int lsoffset, int width, u64 val) -{ - return (val & MAKE_UMASK64(width)) << lsoffset; -} - /** * dpdmai_open() - Open a control session for the specified object * @mc_io: Pointer to MC portal's I/O object From 24c13df655ca1fad5fc6fa4fbacb828f4a6d4f2b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 25 May 2025 21:26:03 +0200 Subject: [PATCH 0387/2411] dmaengine: qcom: gpi: Drop unused gpi_write_reg_field() Static function gpi_write_reg_field() is not used, W=1 build: gpi.c:573:20: error: unused function 'gpi_write_reg_field' [-Werror,-Wunused-function] Fixes: 5d0c3533a19f ("dmaengine: qcom: Add GPI dma driver") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250525-dma-fixes-v1-3-89d06dac9bcb@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/qcom/gpi.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index b1f0001cc99c..8e87738086b2 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -569,17 +569,6 @@ static inline void gpi_write_reg(struct gpii *gpii, void __iomem *addr, u32 val) writel_relaxed(val, addr); } -/* gpi_write_reg_field - write to specific bit field */ -static inline void gpi_write_reg_field(struct gpii *gpii, void __iomem *addr, - u32 mask, u32 shift, u32 val) -{ - u32 tmp = gpi_read_reg(gpii, addr); - - tmp &= ~mask; - val = tmp | ((val << shift) & mask); - gpi_write_reg(gpii, addr, val); -} - static __always_inline void gpi_update_reg(struct gpii *gpii, u32 offset, u32 mask, u32 val) { From 85a4ca2902c1d3b8ccea03837b10e178405192c5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 25 May 2025 21:26:04 +0200 Subject: [PATCH 0388/2411] dmaengine: fsl-qdma: Add missing fsl_qdma_format kerneldoc Document '__reserved2' and 'cmd' fields of 'struct fsl_qdma_format' to fix W=1 warnings: fsl-qdma.c:169 struct member '__reserved2' not described in 'fsl_qdma_format' fsl-qdma.c:169 struct member 'cmd' not described in 'fsl_qdma_format' Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250525-dma-fixes-v1-4-89d06dac9bcb@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/fsl-qdma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 823f5c6bc2e1..21e13f1207cb 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -148,6 +148,9 @@ * @__reserved1: Reserved field. * @cfg8b_w1: Compound descriptor command queue origin produced * by qDMA and dynamic debug field. + * @__reserved2: Reserved field. + * @cmd: Command for QDMA (see FSL_QDMA_CMD_RWTTYPE and + * others). * @data: Pointer to the memory 40-bit address, describes DMA * source information and DMA destination information. */ From a0b1589b62e2fcfb112996e0f4d5593bd2edf069 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 25 May 2025 21:26:05 +0200 Subject: [PATCH 0389/2411] dmaengine: mmp: Fix again Wvoid-pointer-to-enum-cast warning This was fixed and re-introduced. 'type' is an enum, thus cast of pointer on 64-bit compile test with W=1 causes: mmp_tdma.c:644:9: error: cast to smaller integer type 'enum mmp_tdma_type' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast] Fixes: a67ba97dfb30 ("dmaengine: Use device_get_match_data()") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250525-dma-fixes-v1-5-89d06dac9bcb@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/mmp_tdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c index c8dc504510f1..b7fb843c67a6 100644 --- a/drivers/dma/mmp_tdma.c +++ b/drivers/dma/mmp_tdma.c @@ -641,7 +641,7 @@ static int mmp_tdma_probe(struct platform_device *pdev) int chan_num = TDMA_CHANNEL_NUM; struct gen_pool *pool = NULL; - type = (enum mmp_tdma_type)device_get_match_data(&pdev->dev); + type = (kernel_ulong_t)device_get_match_data(&pdev->dev); /* always have couple channels */ tdev = devm_kzalloc(&pdev->dev, sizeof(*tdev), GFP_KERNEL); From 8c2442663f683f4fabadb3c491821169da6c89a8 Mon Sep 17 00:00:00 2001 From: Anil S Keshavamurthy Date: Wed, 21 May 2025 19:13:31 -0400 Subject: [PATCH 0390/2411] dmaengine: idxd: Fix warning for deadcode.deadstore Deletes the second initialization as the value stored to 'dev' during its initialization (struct device *dev = &idxd->pdev->dev;) is sufficient. ../drivers/dma/idxd/init.c:988:17: warning: Value stored to 'dev' during its initialization is never read [deadcode.DeadStores] 988 | struct device *dev = &idxd->pdev->dev; | ^~~ ~~~~~~~~~~~~~~~~ Signed-off-by: Anil S Keshavamurthy Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20250521231331.889204-1-anil.s.keshavamurthy@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 80355d03004d..35bdefd3728b 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -1036,7 +1036,6 @@ static void idxd_reset_prepare(struct pci_dev *pdev) const char *idxd_name; int rc; - dev = &idxd->pdev->dev; idxd_name = dev_name(idxd_confdev(idxd)); struct idxd_saved_states *idxd_saved __free(kfree) = From 587dd30449fb10121fc8a319bb825dc6152b8dd5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 4 Apr 2025 14:21:13 +0200 Subject: [PATCH 0391/2411] dmaengine: sh: Do not enable SH_DMAE_BASE by default during compile testing Enabling the compile test should not cause automatic enabling of all drivers. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250404122114.359087-1-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/sh/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig index 6ea5a880b433..8184d475a49a 100644 --- a/drivers/dma/sh/Kconfig +++ b/drivers/dma/sh/Kconfig @@ -16,7 +16,7 @@ config SH_DMAE_BASE depends on SUPERH || COMPILE_TEST depends on !SUPERH || SH_DMA depends on !SH_DMA_API - default y + default SUPERH || SH_DMA select RENESAS_DMA help Enable support for the Renesas SuperH DMA controllers. From ddf16e16346a36ec6616e5282f675f2e3cdc826f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 4 Apr 2025 14:21:14 +0200 Subject: [PATCH 0392/2411] dmaengine: ti: Do not enable by default during compile testing Enabling the compile test should not cause automatic enabling of all drivers. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250404122114.359087-2-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/ti/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/ti/Kconfig b/drivers/dma/ti/Kconfig index 2adc2cca10e9..dbf168146d35 100644 --- a/drivers/dma/ti/Kconfig +++ b/drivers/dma/ti/Kconfig @@ -17,7 +17,7 @@ config TI_EDMA select DMA_ENGINE select DMA_VIRTUAL_CHANNELS select TI_DMA_CROSSBAR if (ARCH_OMAP || COMPILE_TEST) - default y + default ARCH_DAVINCI || ARCH_OMAP || ARCH_KEYSTONE help Enable support for the TI EDMA (Enhanced DMA) controller. This DMA engine is found on TI DaVinci, AM33xx, AM43xx, DRA7xx and Keystone 2 @@ -29,7 +29,7 @@ config DMA_OMAP select DMA_ENGINE select DMA_VIRTUAL_CHANNELS select TI_DMA_CROSSBAR if (SOC_DRA7XX || COMPILE_TEST) - default y + default ARCH_OMAP help Enable support for the TI sDMA (System DMA or DMA4) controller. This DMA engine is found on OMAP and DRA7xx parts. From 9d8511daf1e81a93007b7bb5020d4ce5ce001deb Mon Sep 17 00:00:00 2001 From: Tianyou Li Date: Thu, 26 Jun 2025 00:14:01 +0800 Subject: [PATCH 0393/2411] tools/perf: Add --exclude-buildids option to perf archive command When make a perf archive, it may contains the binaries that user did not want to ship with, add --exclude-buildids option to specify a file which contains the buildids need to be excluded. The file can be generated from command: perf buildid-list -i perf.data --with-hits | grep -v "^ " > exclude-buildids.txt Then remove the lines from the exclude-buildids.txt for buildids should be included. Signed-off-by: Tianyou Li Reviewed-by: Wangyang Guo Link: https://lore.kernel.org/r/20250625161509.2599646-1-tianyou.li@intel.com Signed-off-by: Namhyung Kim --- tools/perf/perf-archive.sh | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh index 6ed7e52ab881..7977e9b0a5ea 100755 --- a/tools/perf/perf-archive.sh +++ b/tools/perf/perf-archive.sh @@ -16,6 +16,13 @@ while [ $# -gt 0 ] ; do elif [ $1 == "--unpack" ]; then UNPACK=1 shift + elif [ $1 == "--exclude-buildids" ]; then + EXCLUDE_BUILDIDS="$2" + if [ ! -e "$EXCLUDE_BUILDIDS" ]; then + echo "Provided exclude-buildids file $EXCLUDE_BUILDIDS does not exist" + exit 1 + fi + shift 2 else PERF_DATA=$1 UNPACK_TAR=$1 @@ -86,11 +93,29 @@ fi BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX) -perf buildid-list -i $PERF_DATA --with-hits | grep -v "^ " > $BUILDIDS -if [ ! -s $BUILDIDS ] ; then - echo "perf archive: no build-ids found" - rm $BUILDIDS || true - exit 1 +# +# EXCLUDE_BUILDIDS is an optional file that contains build-ids to be excluded from the +# archive. It is a list of build-ids, one per line, without any leading or trailing spaces. +# If the file is empty, all build-ids will be included in the archive. To create a exclude- +# buildids file, you can use the following command: +# perf buildid-list -i perf.data --with-hits | grep -v "^ " > exclude_buildids.txt +# You can edit the file to remove the lines that you want to keep in the archive, then: +# perf archive --exclude-buildids exclude_buildids.txt +# +if [ -s "$EXCLUDE_BUILDIDS" ]; then + perf buildid-list -i $PERF_DATA --with-hits | grep -v "^ " | grep -Fv -f $EXCLUDE_BUILDIDS > $BUILDIDS + if [ ! -s "$BUILDIDS" ] ; then + echo "perf archive: no build-ids found after applying exclude-buildids file" + rm $BUILDIDS || true + exit 1 + fi +else + perf buildid-list -i $PERF_DATA --with-hits | grep -v "^ " > $BUILDIDS + if [ ! -s "$BUILDIDS" ] ; then + echo "perf archive: no build-ids found" + rm $BUILDIDS || true + exit 1 + fi fi MANIFEST=$(mktemp /tmp/perf-archive-manifest.XXXXXX) From 06f77ff9d852c9f2764659ea81489364d8a69a9c Mon Sep 17 00:00:00 2001 From: Rodrigo Gobbi Date: Thu, 26 Jun 2025 18:33:14 -0300 Subject: [PATCH 0394/2411] soundwire: debugfs: move debug statement outside of error handling The start_t and finish_t variables are not properly initialized if errors happens over request_firmware actions. This was also detected by smatch: drivers/soundwire/debugfs.c:301 cmd_go() error: uninitialized symbol 'finish_t'. drivers/soundwire/debugfs.c:301 cmd_go() error: uninitialized symbol 'start_t'. Move the debug statement outside of firmware error handling. Signed-off-by: Rodrigo Gobbi Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-sound/0db6d0bf-7bac-43a7-b624-a00d3d2bf829@stanley.mountain/ Fixes: bb5cb09eedce ("soundwire: debugfs: add interface for BPT/BRA transfers") Link: https://lore.kernel.org/r/20250626213628.9575-1-rodrigo.gobbi.7@gmail.com Signed-off-by: Vinod Koul --- drivers/soundwire/debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/soundwire/debugfs.c b/drivers/soundwire/debugfs.c index 3099ea074f10..230a51489486 100644 --- a/drivers/soundwire/debugfs.c +++ b/drivers/soundwire/debugfs.c @@ -291,6 +291,9 @@ static int cmd_go(void *data, u64 value) finish_t = ktime_get(); + dev_dbg(&slave->dev, "command completed, num_byte %zu status %d, time %lld ms\n", + num_bytes, ret, div_u64(finish_t - start_t, NSEC_PER_MSEC)); + out: if (fw) release_firmware(fw); @@ -298,9 +301,6 @@ static int cmd_go(void *data, u64 value) pm_runtime_mark_last_busy(&slave->dev); pm_runtime_put(&slave->dev); - dev_dbg(&slave->dev, "command completed, num_byte %zu status %d, time %lld ms\n", - num_bytes, ret, div_u64(finish_t - start_t, NSEC_PER_MSEC)); - return ret; } DEFINE_DEBUGFS_ATTRIBUTE(cmd_go_fops, NULL, From 720fa0cb59e411eca6b274f78073b6d2fe68eb45 Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Mon, 23 Jun 2025 19:18:09 +0530 Subject: [PATCH 0395/2411] scsi: ufs: qcom : Fix NULL pointer dereference in ufs_qcom_setup_clocks Fix a NULL pointer dereference in ufs_qcom_setup_clocks due to an uninitialized 'host' variable. The variable 'phy' is now assigned after confirming 'host' is not NULL. Call Stack: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 ufs_qcom_setup_clocks+0x28/0x148 ufs_qcom (P) ufshcd_setup_clocks (drivers/ufs/core/ufshcd-priv.h:142) ufshcd_init (drivers/ufs/core/ufshcd.c:9468) ufshcd_pltfrm_init (drivers/ufs/host/ufshcd-pltfrm.c:504) ufs_qcom_probe+0x28/0x68 ufs_qcom platform_probe (drivers/base/platform.c:1404) really_probe (drivers/base/dd.c:579 drivers/base/dd.c:657) __driver_probe_device (drivers/base/dd.c:799) driver_probe_device (drivers/base/dd.c:829) __driver_attach (drivers/base/dd.c:1216) bus_for_each_dev (drivers/base/bus.c:370) driver_attach (drivers/base/dd.c:1234) bus_add_driver (drivers/base/bus.c:678) driver_register (drivers/base/driver.c:249) __platform_driver_register (drivers/base/platform.c:868) ufs_qcom_pltform_init+0x28/0xff8 ufs_qcom do_one_initcall (init/main.c:1274) do_init_module (kernel/module/main.c:3041) load_module (kernel/module/main.c:3511) init_module_from_file (kernel/module/main.c:3704) __arm64_sys_finit_module (kernel/module/main.c:3715. Reviewed-by: Manivannan Sadhasivam Reviewed-by: Neil Armstrong Fixes: 77d2fa54a945 ("scsi: ufs: qcom : Refactor phy_power_on/off calls") Tested-by: Dmitry Baryshkov Tested-by: Naresh Kamboju Tested-by: Neil Armstrong Reported-by: Aishwarya Closes: https://lore.kernel.org/lkml/20250620214408.11028-1-aishwarya.tcv@arm.com/ Reported-by: Naresh Kamboju Closes: https://lore.kernel.org/linux-scsi/CA+G9fYuFQ2dBvYm1iB6rbwT=4b1c8e4NJ3yxqFPGZGUKH3GmMA@mail.gmail.com/T/#t Co-developed-by: Ram Kumar Dwivedi Signed-off-by: Ram Kumar Dwivedi Signed-off-by: Nitin Rawat Reviewed-by: Martin K. Petersen Reported-by: Linux Kernel Functional Testing Tested-by: Linux Kernel Functional Testing Tested-by: Marek Szyprowski Link: https://lore.kernel.org/r/20250623134809.20405-1-quic_nitirawa@quicinc.com Signed-off-by: Vinod Koul --- drivers/ufs/host/ufs-qcom.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index ba4b2880279c..318dca7fe3d7 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -1124,7 +1124,7 @@ static int ufs_qcom_setup_clocks(struct ufs_hba *hba, bool on, enum ufs_notify_change_status status) { struct ufs_qcom_host *host = ufshcd_get_variant(hba); - struct phy *phy = host->generic_phy; + struct phy *phy; int err; /* @@ -1135,6 +1135,8 @@ static int ufs_qcom_setup_clocks(struct ufs_hba *hba, bool on, if (!host) return 0; + phy = host->generic_phy; + switch (status) { case PRE_CHANGE: if (on) { From 05c6f31991300f1c0e5e80eb1f66a580b9b5ca5f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 24 Jun 2025 15:22:29 +0200 Subject: [PATCH 0396/2411] dt-bindings: phy: apm,xgene-phy: Remove trailing whitespace Remove trailing whitespace which hurts my eyes. Fixes: 65ad0d068c426c2f ("dt-bindings: phy: Convert apm,xgene-phy to DT schema") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/5b8e9b4f645bcac9d50059e513abba4db7e1aaea.1750771156.git.geert+renesas@glider.be Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/phy/apm,xgene-phy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/phy/apm,xgene-phy.yaml b/Documentation/devicetree/bindings/phy/apm,xgene-phy.yaml index d1e6b112b6de..0674391feeae 100644 --- a/Documentation/devicetree/bindings/phy/apm,xgene-phy.yaml +++ b/Documentation/devicetree/bindings/phy/apm,xgene-phy.yaml @@ -8,7 +8,7 @@ title: APM X-Gene 15Gbps Multi-purpose PHY maintainers: - Khuong Dinh - + description: PHY nodes are defined to describe on-chip 15Gbps Multi-purpose PHY. Each PHY (pair of lanes) has its own node. From 9cc82c2498b4fac77fb2438080458e42c1d0d5cb Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 23 Jun 2025 14:03:14 +0200 Subject: [PATCH 0397/2411] phy: mediatek: tphy: Clarify and add kerneldoc to mtk_phy_pdata As struct mtk_phy_pdata was almost fully documented, transfer the comments into kerneldoc on top. While at it, also rewrite the comments to both improve the writing writing and the actual information in the documentation, and add a description for the `version` member of the structure. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250623120315.109881-2-angelogioacchino.delregno@collabora.com Signed-off-by: Vinod Koul --- drivers/phy/mediatek/phy-mtk-tphy.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c index 644a34bd2b0b..858824b4476e 100644 --- a/drivers/phy/mediatek/phy-mtk-tphy.c +++ b/drivers/phy/mediatek/phy-mtk-tphy.c @@ -277,19 +277,19 @@ enum mtk_phy_version { MTK_PHY_V3, }; +/** + * mtk_phy_pdata - SoC specific platform data + * @avoid_rx_sen_degradation: Avoid TX Sensitivity level degradation (MT6795/8173 only) + * @sw_pll_48m_to_26m: Workaround for V3 IP (MT8195) - switch the 48MHz PLL from + * fractional mode to integer to output 26MHz for U2PHY + * @sw_efuse_supported: Switches off eFuse auto-load from PHY and applies values + * read from different nvmem (usually different eFuse array) + * that is pointed at in the device tree node for this PHY + * @version: PHY IP Version + */ struct mtk_phy_pdata { - /* avoid RX sensitivity level degradation only for mt8173 */ bool avoid_rx_sen_degradation; - /* - * workaround only for mt8195, HW fix it for others of V3, - * u2phy should use integer mode instead of fractional mode of - * 48M PLL, fix it by switching PLL to 26M from default 48M - */ bool sw_pll_48m_to_26m; - /* - * Some SoCs (e.g. mt8195) drop a bit when use auto load efuse, - * support sw way, also support it for v2/v3 optionally. - */ bool sw_efuse_supported; enum mtk_phy_version version; }; From d6306fc5d77b7cbdf75a90159f58ebb84ae6f02a Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 23 Jun 2025 14:03:15 +0200 Subject: [PATCH 0398/2411] phy: mediatek: tphy: Cleanup and document slew calibration While it's true that, generally, the T-PHY V3 does not support the slew calibration process, some minor versions of it actually do, moreover, some SoCs may not support this even though the version of the PHY IP does. The reference clock and rate coefficient parameters are used only for slew calibration: move those to platform data, then document and change the checks in hs_slew_rate_calibrate() to perform the calibration only if: - EYE value was not supplied (pre-calculated calibration); and - Slew reference clock value is present (not zero); and - Slew coefficient is present (not zero). Moreover, change the probe function to always check if both the slew reference clock and coefficient properties are present and, if not, assign the value from platform data (which, as reminder, if not added means that it's zero!), instead of checking the PHY IP version. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250623120315.109881-3-angelogioacchino.delregno@collabora.com Signed-off-by: Vinod Koul --- drivers/phy/mediatek/phy-mtk-tphy.c | 45 +++++++++++++++++------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c index 858824b4476e..f6504e0ecd1a 100644 --- a/drivers/phy/mediatek/phy-mtk-tphy.c +++ b/drivers/phy/mediatek/phy-mtk-tphy.c @@ -210,8 +210,6 @@ #define P2F_USB_FM_VALID BIT(0) #define P2F_RG_FRCK_EN BIT(8) -#define U3P_REF_CLK 26 /* MHZ */ -#define U3P_SLEW_RATE_COEF 28 #define U3P_SR_COEF_DIVISOR 1000 #define U3P_FM_DET_CYCLE_CNT 1024 @@ -285,12 +283,16 @@ enum mtk_phy_version { * @sw_efuse_supported: Switches off eFuse auto-load from PHY and applies values * read from different nvmem (usually different eFuse array) * that is pointed at in the device tree node for this PHY + * @slew_ref_clk_mhz: Default reference clock (in MHz) for slew rate calibration + * @slew_rate_coefficient: Coefficient for slew rate calibration * @version: PHY IP Version */ struct mtk_phy_pdata { bool avoid_rx_sen_degradation; bool sw_pll_48m_to_26m; bool sw_efuse_supported; + u8 slew_ref_clock_mhz; + u8 slew_rate_coefficient; enum mtk_phy_version version; }; @@ -686,12 +688,14 @@ static void hs_slew_rate_calibrate(struct mtk_tphy *tphy, int fm_out; u32 tmp; - /* HW V3 doesn't support slew rate cal anymore */ - if (tphy->pdata->version == MTK_PHY_V3) - return; - - /* use force value */ - if (instance->eye_src) + /* + * If a fixed HS slew rate (EYE) value was supplied, don't run the + * calibration sequence and prefer using that value instead; also, + * if there is no reference clock for slew calibration or there is + * no slew coefficient, this means that the slew rate calibration + * sequence is not supported. + */ + if (instance->eye_src || !tphy->src_ref_clk || !tphy->src_coef) return; /* enable USB ring oscillator */ @@ -1516,12 +1520,16 @@ static const struct phy_ops mtk_tphy_ops = { static const struct mtk_phy_pdata tphy_v1_pdata = { .avoid_rx_sen_degradation = false, + .slew_ref_clock_mhz = 26, + .slew_rate_coefficient = 28, .version = MTK_PHY_V1, }; static const struct mtk_phy_pdata tphy_v2_pdata = { .avoid_rx_sen_degradation = false, .sw_efuse_supported = true, + .slew_ref_clock_mhz = 26, + .slew_rate_coefficient = 28, .version = MTK_PHY_V2, }; @@ -1532,6 +1540,8 @@ static const struct mtk_phy_pdata tphy_v3_pdata = { static const struct mtk_phy_pdata mt8173_pdata = { .avoid_rx_sen_degradation = true, + .slew_ref_clock_mhz = 26, + .slew_rate_coefficient = 28, .version = MTK_PHY_V1, }; @@ -1561,7 +1571,7 @@ static int mtk_tphy_probe(struct platform_device *pdev) struct resource *sif_res; struct mtk_tphy *tphy; struct resource res; - int port; + int port, ret; tphy = devm_kzalloc(dev, sizeof(*tphy), GFP_KERNEL); if (!tphy) @@ -1591,15 +1601,14 @@ static int mtk_tphy_probe(struct platform_device *pdev) } } - if (tphy->pdata->version < MTK_PHY_V3) { - tphy->src_ref_clk = U3P_REF_CLK; - tphy->src_coef = U3P_SLEW_RATE_COEF; - /* update parameters of slew rate calibrate if exist */ - device_property_read_u32(dev, "mediatek,src-ref-clk-mhz", - &tphy->src_ref_clk); - device_property_read_u32(dev, "mediatek,src-coef", - &tphy->src_coef); - } + /* Optional properties for slew calibration variation */ + ret = device_property_read_u32(dev, "mediatek,src-ref-clk-mhz", &tphy->src_ref_clk); + if (ret) + tphy->src_ref_clk = tphy->pdata->slew_ref_clock_mhz; + + ret = device_property_read_u32(dev, "mediatek,src-coef", &tphy->src_coef); + if (ret) + tphy->src_coef = tphy->pdata->slew_rate_coefficient; port = 0; for_each_child_of_node_scoped(np, child_np) { From db9f3e3ff9347a233a17eadefae9c1b29ec8f3ed Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Tue, 17 Jun 2025 10:26:35 +0200 Subject: [PATCH 0399/2411] dt-bindings: phy: qcom,snps-eusb2-repeater: Remove default tuning values The reset default tuning value depends on the PMIC, so remove them from the doc since they're not accurate for all PMICs. Signed-off-by: Luca Weiss Link: https://lore.kernel.org/r/20250617-eusb2-repeater-tuning-v2-1-ed6c484f18ee@fairphone.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/qcom,snps-eusb2-repeater.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-repeater.yaml b/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-repeater.yaml index d16a543a7848..27f064a71c9f 100644 --- a/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-repeater.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-repeater.yaml @@ -39,21 +39,18 @@ properties: description: High-Speed disconnect threshold minimum: 0 maximum: 7 - default: 0 qcom,tune-usb2-amplitude: $ref: /schemas/types.yaml#/definitions/uint8 description: High-Speed transmit amplitude minimum: 0 maximum: 15 - default: 8 qcom,tune-usb2-preem: $ref: /schemas/types.yaml#/definitions/uint8 description: High-Speed TX pre-emphasis tuning minimum: 0 maximum: 7 - default: 5 required: - compatible From 31bc94de76026c527f82c238f414539a14f0f3e6 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Tue, 17 Jun 2025 10:26:36 +0200 Subject: [PATCH 0400/2411] phy: qualcomm: phy-qcom-eusb2-repeater: Don't zero-out registers Zeroing out registers does not happen in the downstream kernel, and will "tune" the repeater in surely unexpected ways since most registers don't have a reset value of 0x0. Stop doing that and instead just set the registers that are in the init sequence (though long term I don't think there's actually PMIC-specific init sequences, there's board specific tuning, but that's a story for another day). Fixes: 99a517a582fc ("phy: qualcomm: phy-qcom-eusb2-repeater: Zero out untouched tuning regs") Reviewed-by: Konrad Dybcio Reviewed-by: Neil Armstrong Signed-off-by: Luca Weiss Reviewed-by: Dmitry Baryshkov Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20250617-eusb2-repeater-tuning-v2-2-ed6c484f18ee@fairphone.com Signed-off-by: Vinod Koul --- .../phy/qualcomm/phy-qcom-eusb2-repeater.c | 83 +++++++------------ 1 file changed, 30 insertions(+), 53 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c index 260894b6932c..e0f2acc8109c 100644 --- a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c +++ b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c @@ -37,32 +37,13 @@ #define EUSB2_TUNE_EUSB_EQU 0x5A #define EUSB2_TUNE_EUSB_HS_COMP_CUR 0x5B -enum eusb2_reg_layout { - TUNE_EUSB_HS_COMP_CUR, - TUNE_EUSB_EQU, - TUNE_EUSB_SLEW, - TUNE_USB2_HS_COMP_CUR, - TUNE_USB2_PREEM, - TUNE_USB2_EQU, - TUNE_USB2_SLEW, - TUNE_SQUELCH_U, - TUNE_HSDISC, - TUNE_RES_FSDIF, - TUNE_IUSB2, - TUNE_USB2_CROSSOVER, - NUM_TUNE_FIELDS, - - FORCE_VAL_5 = NUM_TUNE_FIELDS, - FORCE_EN_5, - - EN_CTL1, - - RPTR_STATUS, - LAYOUT_SIZE, +struct eusb2_repeater_init_tbl_reg { + unsigned int reg; + unsigned int value; }; struct eusb2_repeater_cfg { - const u32 *init_tbl; + const struct eusb2_repeater_init_tbl_reg *init_tbl; int init_tbl_num; const char * const *vreg_list; int num_vregs; @@ -82,16 +63,16 @@ static const char * const pm8550b_vreg_l[] = { "vdd18", "vdd3", }; -static const u32 pm8550b_init_tbl[NUM_TUNE_FIELDS] = { - [TUNE_IUSB2] = 0x8, - [TUNE_SQUELCH_U] = 0x3, - [TUNE_USB2_PREEM] = 0x5, +static const struct eusb2_repeater_init_tbl_reg pm8550b_init_tbl[] = { + { EUSB2_TUNE_IUSB2, 0x8 }, + { EUSB2_TUNE_SQUELCH_U, 0x3 }, + { EUSB2_TUNE_USB2_PREEM, 0x5 }, }; -static const u32 smb2360_init_tbl[NUM_TUNE_FIELDS] = { - [TUNE_IUSB2] = 0x5, - [TUNE_SQUELCH_U] = 0x3, - [TUNE_USB2_PREEM] = 0x2, +static const struct eusb2_repeater_init_tbl_reg smb2360_init_tbl[] = { + { EUSB2_TUNE_IUSB2, 0x5 }, + { EUSB2_TUNE_SQUELCH_U, 0x3 }, + { EUSB2_TUNE_USB2_PREEM, 0x2 }, }; static const struct eusb2_repeater_cfg pm8550b_eusb2_cfg = { @@ -129,17 +110,10 @@ static int eusb2_repeater_init(struct phy *phy) struct eusb2_repeater *rptr = phy_get_drvdata(phy); struct device_node *np = rptr->dev->of_node; struct regmap *regmap = rptr->regmap; - const u32 *init_tbl = rptr->cfg->init_tbl; - u8 tune_usb2_preem = init_tbl[TUNE_USB2_PREEM]; - u8 tune_hsdisc = init_tbl[TUNE_HSDISC]; - u8 tune_iusb2 = init_tbl[TUNE_IUSB2]; u32 base = rptr->base; - u32 val; + u32 poll_val; int ret; - - of_property_read_u8(np, "qcom,tune-usb2-amplitude", &tune_iusb2); - of_property_read_u8(np, "qcom,tune-usb2-disc-thres", &tune_hsdisc); - of_property_read_u8(np, "qcom,tune-usb2-preem", &tune_usb2_preem); + u8 val; ret = regulator_bulk_enable(rptr->cfg->num_vregs, rptr->vregs); if (ret) @@ -147,21 +121,24 @@ static int eusb2_repeater_init(struct phy *phy) regmap_write(regmap, base + EUSB2_EN_CTL1, EUSB2_RPTR_EN); - regmap_write(regmap, base + EUSB2_TUNE_EUSB_HS_COMP_CUR, init_tbl[TUNE_EUSB_HS_COMP_CUR]); - regmap_write(regmap, base + EUSB2_TUNE_EUSB_EQU, init_tbl[TUNE_EUSB_EQU]); - regmap_write(regmap, base + EUSB2_TUNE_EUSB_SLEW, init_tbl[TUNE_EUSB_SLEW]); - regmap_write(regmap, base + EUSB2_TUNE_USB2_HS_COMP_CUR, init_tbl[TUNE_USB2_HS_COMP_CUR]); - regmap_write(regmap, base + EUSB2_TUNE_USB2_EQU, init_tbl[TUNE_USB2_EQU]); - regmap_write(regmap, base + EUSB2_TUNE_USB2_SLEW, init_tbl[TUNE_USB2_SLEW]); - regmap_write(regmap, base + EUSB2_TUNE_SQUELCH_U, init_tbl[TUNE_SQUELCH_U]); - regmap_write(regmap, base + EUSB2_TUNE_RES_FSDIF, init_tbl[TUNE_RES_FSDIF]); - regmap_write(regmap, base + EUSB2_TUNE_USB2_CROSSOVER, init_tbl[TUNE_USB2_CROSSOVER]); + /* Write registers from init table */ + for (int i = 0; i < rptr->cfg->init_tbl_num; i++) + regmap_write(regmap, base + rptr->cfg->init_tbl[i].reg, + rptr->cfg->init_tbl[i].value); - regmap_write(regmap, base + EUSB2_TUNE_USB2_PREEM, tune_usb2_preem); - regmap_write(regmap, base + EUSB2_TUNE_HSDISC, tune_hsdisc); - regmap_write(regmap, base + EUSB2_TUNE_IUSB2, tune_iusb2); + /* Override registers from devicetree values */ + if (!of_property_read_u8(np, "qcom,tune-usb2-amplitude", &val)) + regmap_write(regmap, base + EUSB2_TUNE_USB2_PREEM, val); - ret = regmap_read_poll_timeout(regmap, base + EUSB2_RPTR_STATUS, val, val & RPTR_OK, 10, 5); + if (!of_property_read_u8(np, "qcom,tune-usb2-disc-thres", &val)) + regmap_write(regmap, base + EUSB2_TUNE_HSDISC, val); + + if (!of_property_read_u8(np, "qcom,tune-usb2-preem", &val)) + regmap_write(regmap, base + EUSB2_TUNE_IUSB2, val); + + /* Wait for status OK */ + ret = regmap_read_poll_timeout(regmap, base + EUSB2_RPTR_STATUS, poll_val, + poll_val & RPTR_OK, 10, 5); if (ret) dev_err(rptr->dev, "initialization timed-out\n"); From 03aa45d6c62d6861dcbcff627d88814c0ddecc88 Mon Sep 17 00:00:00 2001 From: Mrinmay Sarkar Date: Tue, 17 Jun 2025 17:08:19 +0530 Subject: [PATCH 0401/2411] phy: qcom: qmp-pcie: Update PHY settings for QCS8300 & SA8775P Update the PHY settings to align with the latest PCIe PHY Hardware Programming Guide for both PCIe controllers on the SA8775P platform. Add the ln_shrd region for SA8775P, incorporating new register writes as specified in the updated Hardware Programming Guide. Update pcs table for QCS8300, since both QCS8300 and SA8775P are closely related and share same pcs settings. Signed-off-by: Mrinmay Sarkar Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250617-update_phy-v5-1-2df83ed6a373@quicinc.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 89 ++++++++++--------- .../qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h | 2 + drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h | 4 + .../phy-qcom-qmp-qserdes-ln-shrd-v5.h | 11 +++ drivers/phy/qualcomm/phy-qcom-qmp.h | 1 + 5 files changed, 66 insertions(+), 41 deletions(-) create mode 100644 drivers/phy/qualcomm/phy-qcom-qmp-qserdes-ln-shrd-v5.h diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 461b9e0af610..95830dcfdec9 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2639,29 +2639,29 @@ static const struct qmp_phy_init_tbl sa8775p_qmp_gen4x2_pcie_rc_serdes_alt_tbl[] }; static const struct qmp_phy_init_tbl sa8775p_qmp_gen4x2_pcie_rx_alt_tbl[] = { - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_UCDR_PI_CONTROLS, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_UCDR_PI_CONTROLS, 0x07), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B0, 0x9a), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B0, 0x9b), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B1, 0xb0), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B2, 0x92), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B2, 0xe4), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B3, 0xf0), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B4, 0x42), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B5, 0x99), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B6, 0x29), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B0, 0x9a), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B5, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B6, 0x20), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B0, 0x9b), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B1, 0xfb), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B2, 0x92), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B2, 0xe4), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B3, 0xec), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B4, 0x43), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B5, 0xdd), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B6, 0x0d), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B0, 0xf3), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B0, 0xb3), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B1, 0xf8), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B2, 0xec), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B3, 0xd6), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B4, 0x83), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B5, 0xf5), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B6, 0x5e), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B2, 0xed), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B3, 0xe5), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B4, 0x8d), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B5, 0xd6), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B6, 0x7e), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_PHPRE_CTRL, 0x20), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_AUX_DATA_THRESH_BIN_RATE_0_1, 0x3f), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_AUX_DATA_THRESH_BIN_RATE_2_3, 0x37), @@ -2680,12 +2680,12 @@ static const struct qmp_phy_init_tbl sa8775p_qmp_gen4x2_pcie_rx_alt_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_UCDR_FO_GAIN_RATE3, 0x08), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_UCDR_SO_GAIN_RATE3, 0x04), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_VGA_CAL_CNTRL1, 0x04), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_VGA_CAL_MAN_VAL, 0x08), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_EQU_ADAPTOR_CNTRL4, 0x0b), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_VGA_CAL_MAN_VAL, 0x03), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_EQU_ADAPTOR_CNTRL4, 0x08), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x7c), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_IDAC_SAOFFSET, 0x10), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_DFE_DAC_ENABLE1, 0x00), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_GM_CAL, 0x05), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_GM_CAL, 0x01), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_TX_ADAPT_POST_THRESH1, 0x00), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_TX_ADAPT_POST_THRESH2, 0x1f), }; @@ -2699,6 +2699,8 @@ static const struct qmp_phy_init_tbl sa8775p_qmp_gen4_pcie_tx_tbl[] = { }; static const struct qmp_phy_init_tbl sa8775p_qmp_gen4_pcie_pcs_misc_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_PCIE_V5_20_PCS_G3_RXEQEVAL_TIME, 0x27), + QMP_PHY_INIT_CFG(QPHY_PCIE_V5_20_PCS_G4_RXEQEVAL_TIME, 0x27), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_EQ_CONFIG1, 0x16), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_G4_EQ_CONFIG5, 0x02), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_G4_PRE_GAIN, 0x2e), @@ -2711,11 +2713,19 @@ static const struct qmp_phy_init_tbl sa8775p_qmp_gen4_pcie_rc_pcs_misc_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00), }; -static const struct qmp_phy_init_tbl sa8775p_qmp_gen4x2_pcie_pcs_alt_tbl[] = { +static const struct qmp_phy_init_tbl sa8775p_qmp_gen4_pcie_pcs_alt_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_EQ_CONFIG4, 0x16), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_EQ_CONFIG5, 0x22), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_G3S2_PRE_GAIN, 0x2e), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_RX_SIGDET_LVL, 0x66), + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_LOCK_DETECT_CONFIG1, 0xff), + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_LOCK_DETECT_CONFIG2, 0x89), + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_ALIGN_DETECT_CONFIG1, 0x00), + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_ALIGN_DETECT_CONFIG2, 0x50), +}; + +static const struct qmp_phy_init_tbl sa8775p_qmp_gen4x2_pcie_ln_shrd_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_v5_LN_SHRD_UCDR_PI_CTRL2, 0x00), }; static const struct qmp_phy_init_tbl sa8775p_qmp_gen4x4_pcie_rx_alt_tbl[] = { @@ -2739,27 +2749,27 @@ static const struct qmp_phy_init_tbl sa8775p_qmp_gen4x4_pcie_rx_alt_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH5_RATE3, 0x1f), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH6_RATE3, 0x1f), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_Q_PI_INTRINSIC_BIAS_RATE32, 0x09), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B0, 0x99), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B0, 0x9b), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B1, 0xb0), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B2, 0x92), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B2, 0xd2), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B3, 0xf0), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B4, 0x42), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B5, 0x00), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B6, 0x20), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B0, 0x9a), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B0, 0x9b), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B1, 0xb6), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B2, 0x92), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B2, 0xd2), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B3, 0xf0), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B4, 0x43), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B5, 0xdd), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B6, 0x0d), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B0, 0xf3), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B0, 0xb3), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B1, 0xf6), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B2, 0xee), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B3, 0xd2), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B2, 0xe4), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B3, 0xe6), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B4, 0x83), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B5, 0xf9), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B6, 0x3d), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B5, 0xd6), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B6, 0x7e), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_TX_ADAPT_POST_THRESH1, 0x00), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_TX_ADAPT_POST_THRESH2, 0x1f), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_UCDR_FO_GAIN_RATE2, 0x0c), @@ -2767,14 +2777,7 @@ static const struct qmp_phy_init_tbl sa8775p_qmp_gen4x4_pcie_rx_alt_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_UCDR_SO_GAIN_RATE3, 0x04), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_UCDR_PI_CONTROLS, 0x16), QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_VGA_CAL_CNTRL1, 0x04), - QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_VGA_CAL_MAN_VAL, 0x08), -}; - -static const struct qmp_phy_init_tbl sa8775p_qmp_gen4x4_pcie_pcs_alt_tbl[] = { - QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_EQ_CONFIG4, 0x16), - QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_EQ_CONFIG5, 0x22), - QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_G3S2_PRE_GAIN, 0x2e), - QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_RX_SIGDET_LVL, 0x66), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_VGA_CAL_MAN_VAL, 0x06), }; static const struct qmp_phy_init_tbl sa8775p_qmp_gen4x4_pcie_serdes_alt_tbl[] = { @@ -3191,6 +3194,7 @@ static const struct qmp_pcie_offsets qmp_pcie_offsets_v5_20 = { .rx = 0x0200, .tx2 = 0x0800, .rx2 = 0x0a00, + .ln_shrd = 0x0e00, }; static const struct qmp_pcie_offsets qmp_pcie_offsets_v5_30 = { @@ -3398,8 +3402,8 @@ static const struct qmp_phy_cfg qcs8300_qmp_gen4x2_pciephy_cfg = { .tx_num = ARRAY_SIZE(sa8775p_qmp_gen4_pcie_tx_tbl), .rx = qcs8300_qmp_gen4x2_pcie_rx_alt_tbl, .rx_num = ARRAY_SIZE(qcs8300_qmp_gen4x2_pcie_rx_alt_tbl), - .pcs = sa8775p_qmp_gen4x2_pcie_pcs_alt_tbl, - .pcs_num = ARRAY_SIZE(sa8775p_qmp_gen4x2_pcie_pcs_alt_tbl), + .pcs = sa8775p_qmp_gen4_pcie_pcs_alt_tbl, + .pcs_num = ARRAY_SIZE(sa8775p_qmp_gen4_pcie_pcs_alt_tbl), .pcs_misc = sa8775p_qmp_gen4_pcie_pcs_misc_tbl, .pcs_misc_num = ARRAY_SIZE(sa8775p_qmp_gen4_pcie_pcs_misc_tbl), }, @@ -4067,12 +4071,15 @@ static const struct qmp_phy_cfg sa8775p_qmp_gen4x2_pciephy_cfg = { .tx_num = ARRAY_SIZE(sa8775p_qmp_gen4_pcie_tx_tbl), .rx = sa8775p_qmp_gen4x2_pcie_rx_alt_tbl, .rx_num = ARRAY_SIZE(sa8775p_qmp_gen4x2_pcie_rx_alt_tbl), - .pcs = sa8775p_qmp_gen4x2_pcie_pcs_alt_tbl, - .pcs_num = ARRAY_SIZE(sa8775p_qmp_gen4x2_pcie_pcs_alt_tbl), - .pcs_misc = sa8775p_qmp_gen4_pcie_pcs_misc_tbl, + .pcs = sa8775p_qmp_gen4_pcie_pcs_alt_tbl, + .pcs_num = ARRAY_SIZE(sa8775p_qmp_gen4_pcie_pcs_alt_tbl), + .pcs_misc = sa8775p_qmp_gen4_pcie_pcs_misc_tbl, .pcs_misc_num = ARRAY_SIZE(sa8775p_qmp_gen4_pcie_pcs_misc_tbl), .pcs_lane1 = sdx65_qmp_pcie_pcs_lane1_tbl, .pcs_lane1_num = ARRAY_SIZE(sdx65_qmp_pcie_pcs_lane1_tbl), + .ln_shrd = sa8775p_qmp_gen4x2_pcie_ln_shrd_tbl, + .ln_shrd_num = ARRAY_SIZE(sa8775p_qmp_gen4x2_pcie_ln_shrd_tbl), + }, .tbls_rc = &(const struct qmp_phy_cfg_tbls) { @@ -4112,8 +4119,8 @@ static const struct qmp_phy_cfg sa8775p_qmp_gen4x4_pciephy_cfg = { .tx_num = ARRAY_SIZE(sa8775p_qmp_gen4_pcie_tx_tbl), .rx = sa8775p_qmp_gen4x4_pcie_rx_alt_tbl, .rx_num = ARRAY_SIZE(sa8775p_qmp_gen4x4_pcie_rx_alt_tbl), - .pcs = sa8775p_qmp_gen4x4_pcie_pcs_alt_tbl, - .pcs_num = ARRAY_SIZE(sa8775p_qmp_gen4x4_pcie_pcs_alt_tbl), + .pcs = sa8775p_qmp_gen4_pcie_pcs_alt_tbl, + .pcs_num = ARRAY_SIZE(sa8775p_qmp_gen4_pcie_pcs_alt_tbl), .pcs_misc = sa8775p_qmp_gen4_pcie_pcs_misc_tbl, .pcs_misc_num = ARRAY_SIZE(sa8775p_qmp_gen4_pcie_pcs_misc_tbl), }, diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h index 283d63c81593..951de964dc12 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h @@ -13,6 +13,8 @@ #define QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS 0x090 #define QPHY_V5_20_PCS_PCIE_EQ_CONFIG1 0x0a0 #define QPHY_V5_20_PCS_PCIE_PRESET_P10_POST 0x0e0 +#define QPHY_PCIE_V5_20_PCS_G3_RXEQEVAL_TIME 0x0f0 +#define QPHY_PCIE_V5_20_PCS_G4_RXEQEVAL_TIME 0x0f4 #define QPHY_V5_20_PCS_PCIE_G4_EQ_CONFIG2 0x0fc #define QPHY_V5_20_PCS_PCIE_G4_EQ_CONFIG5 0x108 #define QPHY_V5_20_PCS_PCIE_G4_PRE_GAIN 0x15c diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h index d3ad5b7f5425..bbee68df4e14 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h @@ -8,8 +8,12 @@ #define QPHY_V5_20_PCS_INSIG_SW_CTRL7 0x060 #define QPHY_V5_20_PCS_INSIG_MX_CTRL7 0x07c +#define QPHY_V5_20_PCS_LOCK_DETECT_CONFIG1 0x0c4 +#define QPHY_V5_20_PCS_LOCK_DETECT_CONFIG2 0x0c8 #define QPHY_V5_20_PCS_G3S2_PRE_GAIN 0x170 #define QPHY_V5_20_PCS_RX_SIGDET_LVL 0x188 +#define QPHY_V5_20_PCS_ALIGN_DETECT_CONFIG1 0x1b8 +#define QPHY_V5_20_PCS_ALIGN_DETECT_CONFIG2 0x1bc #define QPHY_V5_20_PCS_EQ_CONFIG2 0x1d8 #define QPHY_V5_20_PCS_EQ_CONFIG4 0x1e0 #define QPHY_V5_20_PCS_EQ_CONFIG5 0x1e4 diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-ln-shrd-v5.h b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-ln-shrd-v5.h new file mode 100644 index 000000000000..68c38fdfc1d8 --- /dev/null +++ b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-ln-shrd-v5.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2025, The Linux Foundation. All rights reserved. + */ + +#ifndef QCOM_PHY_QMP_QSERDES_LN_SHRD_V5_H_ +#define QCOM_PHY_QMP_QSERDES_LN_SHRD_V5_H_ + +#define QSERDES_v5_LN_SHRD_UCDR_PI_CTRL2 0x04c + +#endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.h b/drivers/phy/qualcomm/phy-qcom-qmp.h index 8148853ff275..f58c82b2dd23 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp.h @@ -25,6 +25,7 @@ #include "phy-qcom-qmp-qserdes-txrx-v6.h" #include "phy-qcom-qmp-qserdes-txrx-v6_20.h" #include "phy-qcom-qmp-qserdes-txrx-v6_n4.h" +#include "phy-qcom-qmp-qserdes-ln-shrd-v5.h" #include "phy-qcom-qmp-qserdes-ln-shrd-v6.h" #include "phy-qcom-qmp-qserdes-com-v7.h" From 603bd9808f58009e1f230271f94e1b9e13d506ba Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 17 Jun 2025 10:05:03 +0200 Subject: [PATCH 0402/2411] phy: qcom: m31-eusb2: fix match data santity check The device_get_match_data() helper returns NULL if a new entry is ever added without corresponding match data. Fixes: 9c8504861cc4 ("phy: qcom: Add M31 based eUSB2 PHY driver") Cc: Wesley Cheng Cc: Melody Olvera Signed-off-by: Johan Hovold Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250617080503.11262-1-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-m31-eusb2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c b/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c index 7b7120e4214f..520eabcc61f7 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c +++ b/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c @@ -253,7 +253,7 @@ static int m31eusb2_phy_probe(struct platform_device *pdev) return -ENOMEM; data = device_get_match_data(dev); - if (IS_ERR(data)) + if (!data) return -EINVAL; phy->data = data; From 2bff9083c1744dc8751ddc0844a65e3bee89f519 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 17 Jun 2025 10:04:01 +0200 Subject: [PATCH 0403/2411] phy: qcom: m31-eusb2: drop registration printk Drivers should generally be quiet on successful probe so drop the registration printk from the recently added M31 EUSB2 driver. Signed-off-by: Johan Hovold Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250617080401.11147-1-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-m31-eusb2.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c b/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c index 520eabcc61f7..bf32572566c4 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c +++ b/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c @@ -289,8 +289,6 @@ static int m31eusb2_phy_probe(struct platform_device *pdev) "failed to get repeater\n"); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); - if (!IS_ERR(phy_provider)) - dev_info(dev, "Registered M31 USB phy\n"); return PTR_ERR_OR_ZERO(phy_provider); } From 304c102cff7382353a28039907a7017bde795db9 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 16 Jun 2025 08:25:42 +0200 Subject: [PATCH 0404/2411] phy: qcom: qmp-combo: Add missing PLL (VCO) configuration on SM8750 Add missing DP PHY status and VCO clock configuration registers to fix configuring the VCO rate on SM8750. Without proper VCO rate setting, it works on after-reset half of rate which is not enough for DP over USB to work as seen on logs: [drm:msm_dp_ctrl_link_train_1_2] *ERROR* max v_level reached [drm:msm_dp_ctrl_link_train_1_2] *ERROR* link training #1 on phy 0 failed. ret=-11 Fixes: c4364048baf4 ("phy: qcom: qmp-combo: Add new PHY sequences for SM8750") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250616062541.7167-2-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 8b9710a9654a..f07d097b129f 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -228,6 +228,9 @@ static const unsigned int qmp_v8_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_COM_CMN_STATUS] = QSERDES_V8_COM_CMN_STATUS, [QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V8_COM_BIAS_EN_CLKBUFLR_EN, + [QPHY_DP_PHY_STATUS] = QSERDES_V6_DP_PHY_STATUS, + [QPHY_DP_PHY_VCO_DIV] = QSERDES_V6_DP_PHY_VCO_DIV, + [QPHY_TX_TX_POL_INV] = QSERDES_V8_TX_TX_POL_INV, [QPHY_TX_TX_DRV_LVL] = QSERDES_V8_TX_TX_DRV_LVL, [QPHY_TX_TX_EMP_POST1_LVL] = QSERDES_V8_TX_TX_EMP_POST1_LVL, From d142643c06bcbc8be173a4d749adf42dd798a617 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Jun 2025 10:14:56 +0900 Subject: [PATCH 0405/2411] dm: Remove unnecessary return in dm_zone_endio() The return statement at the end of dm_zone_endio() is not needed. Remove it. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: Mikulas Patocka --- drivers/md/dm-zone.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index 3d31b82e0730..78e17dd4d01b 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -467,8 +467,6 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone) bdev_offset_from_zone_start(disk->part0, clone->bi_iter.bi_sector); } - - return; } static int dm_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx, From 548d88f74ed49c3c9dbd68550b7b335c2afa6413 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Jun 2025 10:14:57 +0900 Subject: [PATCH 0406/2411] dm: Simplify dm_io_complete() The local variable first_requeue is not needed since it is always equal to dm_io_flagged(io, DM_IO_WAS_SPLIT). Call __dm_io_complete() passing this value directly and remove first_requeue. Also declare dm_io_complete() as inline to make sure it is inlined in its single call site, thus avoiding the cost of a function call. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: Mikulas Patocka --- drivers/md/dm.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1726f0f828cc..55579adbeb3f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1024,10 +1024,8 @@ static void dm_wq_requeue_work(struct work_struct *work) * * 2) io->orig_bio points to new cloned bio which matches the requeued dm_io. */ -static void dm_io_complete(struct dm_io *io) +static inline void dm_io_complete(struct dm_io *io) { - bool first_requeue; - /* * Only dm_io that has been split needs two stage requeue, otherwise * we may run into long bio clone chain during suspend and OOM could @@ -1036,12 +1034,7 @@ static void dm_io_complete(struct dm_io *io) * Also flush data dm_io won't be marked as DM_IO_WAS_SPLIT, so they * also aren't handled via the first stage requeue. */ - if (dm_io_flagged(io, DM_IO_WAS_SPLIT)) - first_requeue = true; - else - first_requeue = false; - - __dm_io_complete(io, first_requeue); + __dm_io_complete(io, dm_io_flagged(io, DM_IO_WAS_SPLIT)); } /* From ebbd17695e9e1f4c3cdb36149c8b8f38b585e14d Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Thu, 26 Jun 2025 19:07:50 +0300 Subject: [PATCH 0407/2411] dm: ima: avoid extra calls to strlen() Since 'scnprintf()' returns the number of characters emitted (not including the trailing '\0'), use that return value instead of the subsequent calls to 'strlen()' where appropriate. Compile tested only. Signed-off-by: Dmitry Antipov Signed-off-by: Mikulas Patocka --- drivers/md/dm-ima.c | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c index b90f34259fbb..8b50c908c6f4 100644 --- a/drivers/md/dm-ima.c +++ b/drivers/md/dm-ima.c @@ -241,10 +241,11 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl /* * First retrieve the target metadata. */ - scnprintf(target_metadata_buf, DM_IMA_TARGET_METADATA_BUF_LEN, - "target_index=%d,target_begin=%llu,target_len=%llu,", - i, ti->begin, ti->len); - target_metadata_buf_len = strlen(target_metadata_buf); + target_metadata_buf_len = + scnprintf(target_metadata_buf, + DM_IMA_TARGET_METADATA_BUF_LEN, + "target_index=%d,target_begin=%llu,target_len=%llu,", + i, ti->begin, ti->len); /* * Then retrieve the actual target data. @@ -448,11 +449,9 @@ void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap) if (r) goto error; - scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, - "%sname=%s,uuid=%s;device_resume=no_data;", - DM_IMA_VERSION_STR, dev_name, dev_uuid); - l = strlen(device_table_data); - + l = scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, + "%sname=%s,uuid=%s;device_resume=no_data;", + DM_IMA_VERSION_STR, dev_name, dev_uuid); } capacity_len = strlen(capacity_str); @@ -561,10 +560,9 @@ void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all) if (dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio)) goto error; - scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, - "%sname=%s,uuid=%s;device_remove=no_data;", - DM_IMA_VERSION_STR, dev_name, dev_uuid); - l = strlen(device_table_data); + l = scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, + "%sname=%s,uuid=%s;device_remove=no_data;", + DM_IMA_VERSION_STR, dev_name, dev_uuid); } memcpy(device_table_data + l, remove_all_str, remove_all_len); @@ -647,10 +645,9 @@ void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map) if (dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio)) goto error2; - scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, - "%sname=%s,uuid=%s;table_clear=no_data;", - DM_IMA_VERSION_STR, dev_name, dev_uuid); - l = strlen(device_table_data); + l = scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, + "%sname=%s,uuid=%s;table_clear=no_data;", + DM_IMA_VERSION_STR, dev_name, dev_uuid); } capacity_len = strlen(capacity_str); @@ -706,7 +703,7 @@ void dm_ima_measure_on_device_rename(struct mapped_device *md) char *old_device_data = NULL, *new_device_data = NULL, *combined_device_data = NULL; char *new_dev_name = NULL, *new_dev_uuid = NULL, *capacity_str = NULL; bool noio = true; - int r; + int r, len; if (dm_ima_alloc_and_copy_device_data(md, &new_device_data, md->ima.active_table.num_targets, noio)) @@ -728,12 +725,11 @@ void dm_ima_measure_on_device_rename(struct mapped_device *md) md->ima.active_table.device_metadata = new_device_data; md->ima.active_table.device_metadata_len = strlen(new_device_data); - scnprintf(combined_device_data, DM_IMA_DEVICE_BUF_LEN * 2, - "%s%snew_name=%s,new_uuid=%s;%s", DM_IMA_VERSION_STR, old_device_data, - new_dev_name, new_dev_uuid, capacity_str); + len = scnprintf(combined_device_data, DM_IMA_DEVICE_BUF_LEN * 2, + "%s%snew_name=%s,new_uuid=%s;%s", DM_IMA_VERSION_STR, old_device_data, + new_dev_name, new_dev_uuid, capacity_str); - dm_ima_measure_data("dm_device_rename", combined_device_data, strlen(combined_device_data), - noio); + dm_ima_measure_data("dm_device_rename", combined_device_data, len, noio); goto exit; From b04c7e88bcf5ddcd15e2c620b802c28848f437bb Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:49 +0900 Subject: [PATCH 0408/2411] kconfig: set MENU_CHANGED to choice when the selected member is changed In gconf, choice entries display the selected symbol in the 'Value' column, but it is not updated when the selected symbol is changed. Set the MENU_CHANGED flag, so it is updated. Signed-off-by: Masahiro Yamada --- scripts/kconfig/symbol.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index d57f8cbba291..26ab10c0fd76 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -195,6 +195,10 @@ static void sym_set_changed(struct symbol *sym) list_for_each_entry(menu, &sym->menus, link) menu->flags |= MENU_CHANGED; + + menu = sym_get_choice_menu(sym); + if (menu) + menu->flags |= MENU_CHANGED; } static void sym_set_all_changed(void) From 0044c5fcae3d2e89ee7d4979a52e1014774d4f92 Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Fri, 4 Apr 2025 13:36:14 +0800 Subject: [PATCH 0409/2411] dmaengine: idxd: Remove __packed from structures The __packed attribute introduces potential unaligned memory accesses and endianness portability issues. Instead of relying on compiler-specific packing, it's much better to explicitly fill structure gaps using padding fields, ensuring natural alignment. Since all previously __packed structures already enforce proper alignment through manual padding, the __packed qualifiers are unnecessary and can be safely removed. Signed-off-by: Yi Sun Reviewed-by: Andy Shevchenko Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Link: https://lore.kernel.org/r/20250404053614.3096769-1-yi.sun@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/registers.h | 60 ++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 006ba206ab1b..9c1c546fe443 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -45,7 +45,7 @@ union gen_cap_reg { u64 rsvd3:32; }; u64 bits; -} __packed; +}; #define IDXD_GENCAP_OFFSET 0x10 union wq_cap_reg { @@ -65,7 +65,7 @@ union wq_cap_reg { u64 rsvd4:8; }; u64 bits; -} __packed; +}; #define IDXD_WQCAP_OFFSET 0x20 #define IDXD_WQCFG_MIN 5 @@ -79,7 +79,7 @@ union group_cap_reg { u64 rsvd:45; }; u64 bits; -} __packed; +}; #define IDXD_GRPCAP_OFFSET 0x30 union engine_cap_reg { @@ -88,7 +88,7 @@ union engine_cap_reg { u64 rsvd:56; }; u64 bits; -} __packed; +}; #define IDXD_ENGCAP_OFFSET 0x38 @@ -114,7 +114,7 @@ union offsets_reg { u64 rsvd:48; }; u64 bits[2]; -} __packed; +}; #define IDXD_TABLE_MULT 0x100 @@ -128,7 +128,7 @@ union gencfg_reg { u32 rsvd2:18; }; u32 bits; -} __packed; +}; #define IDXD_GENCTRL_OFFSET 0x88 union genctrl_reg { @@ -139,7 +139,7 @@ union genctrl_reg { u32 rsvd:29; }; u32 bits; -} __packed; +}; #define IDXD_GENSTATS_OFFSET 0x90 union gensts_reg { @@ -149,7 +149,7 @@ union gensts_reg { u32 rsvd:28; }; u32 bits; -} __packed; +}; enum idxd_device_status_state { IDXD_DEVICE_STATE_DISABLED = 0, @@ -183,7 +183,7 @@ union idxd_command_reg { u32 int_req:1; }; u32 bits; -} __packed; +}; enum idxd_cmd { IDXD_CMD_ENABLE_DEVICE = 1, @@ -213,7 +213,7 @@ union cmdsts_reg { u8 active:1; }; u32 bits; -} __packed; +}; #define IDXD_CMDSTS_ACTIVE 0x80000000 #define IDXD_CMDSTS_ERR_MASK 0xff #define IDXD_CMDSTS_RES_SHIFT 8 @@ -284,7 +284,7 @@ union sw_err_reg { u64 rsvd5; }; u64 bits[4]; -} __packed; +}; union iaa_cap_reg { struct { @@ -303,7 +303,7 @@ union iaa_cap_reg { u64 rsvd:52; }; u64 bits; -} __packed; +}; #define IDXD_IAACAP_OFFSET 0x180 @@ -320,7 +320,7 @@ union evlcfg_reg { u64 rsvd2:28; }; u64 bits[2]; -} __packed; +}; #define IDXD_EVL_SIZE_MIN 0x0040 #define IDXD_EVL_SIZE_MAX 0xffff @@ -334,7 +334,7 @@ union msix_perm { u32 pasid:20; }; u32 bits; -} __packed; +}; union group_flags { struct { @@ -352,13 +352,13 @@ union group_flags { u64 rsvd5:26; }; u64 bits; -} __packed; +}; struct grpcfg { u64 wqs[4]; u64 engines; union group_flags flags; -} __packed; +}; union wqcfg { struct { @@ -410,7 +410,7 @@ union wqcfg { u64 op_config[4]; }; u32 bits[16]; -} __packed; +}; #define WQCFG_PASID_IDX 2 #define WQCFG_PRIVL_IDX 2 @@ -474,7 +474,7 @@ union idxd_perfcap { u64 rsvd3:8; }; u64 bits; -} __packed; +}; #define IDXD_EVNTCAP_OFFSET 0x80 union idxd_evntcap { @@ -483,7 +483,7 @@ union idxd_evntcap { u64 rsvd:36; }; u64 bits; -} __packed; +}; struct idxd_event { union { @@ -493,7 +493,7 @@ struct idxd_event { }; u32 val; }; -} __packed; +}; #define IDXD_CNTRCAP_OFFSET 0x800 struct idxd_cntrcap { @@ -506,7 +506,7 @@ struct idxd_cntrcap { u32 val; }; struct idxd_event events[]; -} __packed; +}; #define IDXD_PERFRST_OFFSET 0x10 union idxd_perfrst { @@ -516,7 +516,7 @@ union idxd_perfrst { u32 rsvd:30; }; u32 val; -} __packed; +}; #define IDXD_OVFSTATUS_OFFSET 0x30 #define IDXD_PERFFRZ_OFFSET 0x20 @@ -533,7 +533,7 @@ union idxd_cntrcfg { u64 rsvd3:4; }; u64 val; -} __packed; +}; #define IDXD_FLTCFG_OFFSET 0x300 @@ -543,7 +543,7 @@ union idxd_cntrdata { u64 event_count_value; }; u64 val; -} __packed; +}; union event_cfg { struct { @@ -551,7 +551,7 @@ union event_cfg { u64 event_enc:28; }; u64 val; -} __packed; +}; union filter_cfg { struct { @@ -562,7 +562,7 @@ union filter_cfg { u64 eng:8; }; u64 val; -} __packed; +}; #define IDXD_EVLSTATUS_OFFSET 0xf0 @@ -580,7 +580,7 @@ union evl_status_reg { u32 bits_upper32; }; u64 bits; -} __packed; +}; #define IDXD_MAX_BATCH_IDENT 256 @@ -620,17 +620,17 @@ struct __evl_entry { }; u64 fault_addr; u64 rsvd5; -} __packed; +}; struct dsa_evl_entry { struct __evl_entry e; struct dsa_completion_record cr; -} __packed; +}; struct iax_evl_entry { struct __evl_entry e; u64 rsvd[4]; struct iax_completion_record cr; -} __packed; +}; #endif From ef0f7c235e5c2195ff61a2c9a5b9efb2375ce433 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 27 Jun 2025 09:38:56 -0700 Subject: [PATCH 0410/2411] perf build: Fix a build error on REFCNT_CHECKING=1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recently it added -fno-strict-aliasing to sync with the kernel behavior. But it caused an error due to potential uninitialized access like below: In file included from util/symbol.c:27: In function ‘dso__set_symbol_names_len’, inlined from ‘dso__sort_by_name’ at util/symbol.c:638:4: util/dso.h:654:46: error: ‘len’ may be used uninitialized [-Werror=maybe-uninitialized] 654 | RC_CHK_ACCESS(dso)->symbol_names_len = len; | ^ util/symbol.c: In function ‘dso__sort_by_name’: util/symbol.c:634:24: note: ‘len’ was declared here 634 | size_t len; | ^~~ Let's just initialize it with 0. Fixes: 55a18d2f3ff79c90 ("perf build: enable -fno-strict-aliasing") Closes: https://lore.kernel.org/r/aF7JC8zkG5-_-nY_@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/symbol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 8b30c6f16a9e..73dab94fab74 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -631,7 +631,7 @@ void dso__sort_by_name(struct dso *dso) { mutex_lock(dso__lock(dso)); if (!dso__sorted_by_name(dso)) { - size_t len; + size_t len = 0; dso__set_symbol_names(dso, symbols__sort_by_name(dso__symbols(dso), &len)); if (dso__symbol_names(dso)) { From aa497357c125662d7526d6ec8ce1259e72b2c8af Mon Sep 17 00:00:00 2001 From: Chun-Tse Shao Date: Fri, 27 Jun 2025 13:16:41 -0700 Subject: [PATCH 0411/2411] perf stat: Fix uncore aggregation number Follow up: lore.kernel.org/CAP-5=fVDF4-qYL1Lm7efgiHk7X=_nw_nEFMBZFMcsnOOJgX4Kg@mail.gmail.com/ The patch adds unit aggregation during evsel merge the aggregated uncore counters. Change the name of the column to `ctrs` and `counters` for json mode. Tested on a 2-socket machine with SNC3, uncore_imc_[0-11] and cpumask="0,120" Before: perf stat -e clockticks -I 1000 --per-socket # time socket cpus counts unit events 1.001085024 S0 1 9615386315 clockticks 1.001085024 S1 1 9614287448 clockticks perf stat -e clockticks -I 1000 --per-node # time node cpus counts unit events 1.001029867 N0 1 3205726984 clockticks 1.001029867 N1 1 3205444421 clockticks 1.001029867 N2 1 3205234018 clockticks 1.001029867 N3 1 3205224660 clockticks 1.001029867 N4 1 3205207213 clockticks 1.001029867 N5 1 3205528246 clockticks After: perf stat -e clockticks -I 1000 --per-socket # time socket ctrs counts unit events 1.001026071 S0 12 9619677996 clockticks 1.001026071 S1 12 9618612614 clockticks perf stat -e clockticks -I 1000 --per-node # time node ctrs counts unit events 1.001027449 N0 4 3207251859 clockticks 1.001027449 N1 4 3207315930 clockticks 1.001027449 N2 4 3206981828 clockticks 1.001027449 N3 4 3206566126 clockticks 1.001027449 N4 4 3206032609 clockticks 1.001027449 N5 4 3205651355 clockticks Tested with JSON output linter: perf test "perf stat JSON output linter" 94: perf stat JSON output linter : Ok Suggested-by: Ian Rogers Reviewed-by: Ian Rogers Signed-off-by: Chun-Tse Shao Link: https://lore.kernel.org/r/20250627201818.479421-1-ctshao@google.com Signed-off-by: Namhyung Kim --- tools/perf/Documentation/perf-stat.txt | 6 ++-- .../tests/shell/lib/perf_json_output_lint.py | 4 +-- tools/perf/util/stat-display.c | 34 +++++++++---------- tools/perf/util/stat.c | 2 +- 4 files changed, 24 insertions(+), 22 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 61d091670dee..1a766d4a2233 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -640,18 +640,20 @@ JSON FORMAT With -j, perf stat is able to print out a JSON format output that can be used for parsing. -- timestamp : optional usec time stamp in fractions of second (with -I) +- interval : optional timestamp in fractions of second (with -I) - optional aggregate options: - core : core identifier (with --per-core) - die : die identifier (with --per-die) - socket : socket identifier (with --per-socket) - node : node identifier (with --per-node) - thread : thread identifier (with --per-thread) +- counters : number of aggregated PMU counters - counter-value : counter value - unit : unit of the counter value or empty - event : event name - variance : optional variance if multiple values are collected (with -r) -- runtime : run time of counter +- event-runtime : run time of the event +- pcnt-running : percentage of time the event was running - metric-value : optional metric value - metric-unit : optional unit of metric diff --git a/tools/perf/tests/shell/lib/perf_json_output_lint.py b/tools/perf/tests/shell/lib/perf_json_output_lint.py index 9e772a89ce38..c6750ef06c0f 100644 --- a/tools/perf/tests/shell/lib/perf_json_output_lint.py +++ b/tools/perf/tests/shell/lib/perf_json_output_lint.py @@ -45,7 +45,7 @@ def is_counter_value(num): def check_json_output(expected_items): checks = { - 'aggregate-number': lambda x: isfloat(x), + 'counters': lambda x: isfloat(x), 'core': lambda x: True, 'counter-value': lambda x: is_counter_value(x), 'cgroup': lambda x: True, @@ -75,7 +75,7 @@ def check_json_output(expected_items): if count not in expected_items and count >= 1 and count <= 7 and 'metric-value' in item: # Events that generate >1 metric may have isolated metric # values and possibly other prefixes like interval, core, - # aggregate-number, or event-runtime/pcnt-running from multiplexing. + # counters, or event-runtime/pcnt-running from multiplexing. pass elif count not in expected_items and count >= 1 and count <= 5 and 'metricgroup' in item: pass diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 729ad5cd52cb..9cb5245a92aa 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -50,15 +50,15 @@ static int aggr_header_lens[] = { }; static const char *aggr_header_csv[] = { - [AGGR_CORE] = "core,cpus,", - [AGGR_CACHE] = "cache,cpus,", - [AGGR_CLUSTER] = "cluster,cpus,", - [AGGR_DIE] = "die,cpus,", - [AGGR_SOCKET] = "socket,cpus,", - [AGGR_NONE] = "cpu,", - [AGGR_THREAD] = "comm-pid,", - [AGGR_NODE] = "node,", - [AGGR_GLOBAL] = "" + [AGGR_CORE] = "core,ctrs,", + [AGGR_CACHE] = "cache,ctrs,", + [AGGR_CLUSTER] = "cluster,ctrs,", + [AGGR_DIE] = "die,ctrs,", + [AGGR_SOCKET] = "socket,ctrs,", + [AGGR_NONE] = "cpu,", + [AGGR_THREAD] = "comm-pid,", + [AGGR_NODE] = "node,", + [AGGR_GLOBAL] = "" }; static const char *aggr_header_std[] = { @@ -304,7 +304,7 @@ static void print_aggr_id_std(struct perf_stat_config *config, return; } - fprintf(output, "%-*s %*d ", aggr_header_lens[idx], buf, 4, aggr_nr); + fprintf(output, "%-*s %*d ", aggr_header_lens[idx], buf, /*strlen("ctrs")*/ 4, aggr_nr); } static void print_aggr_id_csv(struct perf_stat_config *config, @@ -366,27 +366,27 @@ static void print_aggr_id_json(struct perf_stat_config *config, struct outstate { switch (config->aggr_mode) { case AGGR_CORE: - json_out(os, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d", + json_out(os, "\"core\" : \"S%d-D%d-C%d\", \"counters\" : %d", id.socket, id.die, id.core, aggr_nr); break; case AGGR_CACHE: - json_out(os, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"aggregate-number\" : %d", + json_out(os, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"counters\" : %d", id.socket, id.die, id.cache_lvl, id.cache, aggr_nr); break; case AGGR_CLUSTER: - json_out(os, "\"cluster\" : \"S%d-D%d-CLS%d\", \"aggregate-number\" : %d", + json_out(os, "\"cluster\" : \"S%d-D%d-CLS%d\", \"counters\" : %d", id.socket, id.die, id.cluster, aggr_nr); break; case AGGR_DIE: - json_out(os, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d", + json_out(os, "\"die\" : \"S%d-D%d\", \"counters\" : %d", id.socket, id.die, aggr_nr); break; case AGGR_SOCKET: - json_out(os, "\"socket\" : \"S%d\", \"aggregate-number\" : %d", + json_out(os, "\"socket\" : \"S%d\", \"counters\" : %d", id.socket, aggr_nr); break; case AGGR_NODE: - json_out(os, "\"node\" : \"N%d\", \"aggregate-number\" : %d", + json_out(os, "\"node\" : \"N%d\", \"counters\" : %d", id.node, aggr_nr); break; case AGGR_NONE: @@ -1317,7 +1317,7 @@ static void print_header_interval_std(struct perf_stat_config *config, case AGGR_CLUSTER: case AGGR_CACHE: case AGGR_CORE: - fprintf(output, "#%*s %-*s cpus", + fprintf(output, "#%*s %-*s ctrs", INTERVAL_LEN - 1, "time", aggr_header_lens[config->aggr_mode], aggr_header_std[config->aggr_mode]); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 355a7d5c8ab8..b0205e99a4c9 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -526,7 +526,7 @@ static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias) struct perf_counts_values *aggr_counts_a = &ps_a->aggr[i].counts; struct perf_counts_values *aggr_counts_b = &ps_b->aggr[i].counts; - /* NB: don't increase aggr.nr for aliases */ + ps_a->aggr[i].nr += ps_b->aggr[i].nr; aggr_counts_a->val += aggr_counts_b->val; aggr_counts_a->ena += aggr_counts_b->ena; From fbcd4b7bf5c92f7d456eefcecac518023357cea4 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 16 Jun 2025 10:36:21 +0000 Subject: [PATCH 0412/2411] rust: rbtree: add RBTree::is_empty In Rust Binder I need to be able to determine whether a red/black tree is empty. Thus, add a method for that operation to replace rbtree.iter().next().is_none() This is terrible, so add a method for this purpose. We do not add a RBTree::len method because computing the number of elements requires iterating the entire tree, but checking whether it is empty can be done cheaply. Signed-off-by: Alice Ryhl Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250616-rbtree-is-empty-v1-1-61f7cfb012e3@google.com [ Adjusted title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/rbtree.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rust/kernel/rbtree.rs b/rust/kernel/rbtree.rs index 8d978c896747..9457134eb3af 100644 --- a/rust/kernel/rbtree.rs +++ b/rust/kernel/rbtree.rs @@ -191,6 +191,12 @@ pub fn new() -> Self { } } + /// Returns true if this tree is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.root.rb_node.is_null() + } + /// Returns an iterator over the tree nodes, sorted by key. pub fn iter(&self) -> Iter<'_, K, V> { Iter { From d6763e0abb43d550791eb66d2b91e82cb29807f9 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Thu, 12 Jun 2025 11:17:33 +0000 Subject: [PATCH 0413/2411] rust: revocable: document why &T is not used in RevocableGuard When a reference appears in a function argument, the reference is assumed to be valid for the entire duration of that function call; this is called a stack protector [1]. Because of that, custom pointer types whose destructor may invalidate the pointee (i.e. they are more similar to Box than &T) cannot internally use a reference, and must instead use a raw pointer. This issue is something that is often missed during unsafe review. For examples, see [2] and [3]. To ensure that people don't try to simplify RevocableGuard by changing the raw pointer to a reference, add a comment to that effect. Link: https://perso.crans.org/vanille/treebor/protectors.html [1] Link: https://users.rust-lang.org/t/unsafe-code-review-semi-owning-weak-rwlock-t-guard/95706 [2] Link: https://lore.kernel.org/all/aEqdur4JTFa1V20U@google.com/ [3] Signed-off-by: Alice Ryhl Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250612-revocable-ptr-comment-v1-1-db36785877f6@google.com [ Adjusted title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/revocable.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rust/kernel/revocable.rs b/rust/kernel/revocable.rs index 06a3cdfce344..1cd4511f0260 100644 --- a/rust/kernel/revocable.rs +++ b/rust/kernel/revocable.rs @@ -231,6 +231,10 @@ fn drop(self: Pin<&mut Self>) { /// /// The RCU read-side lock is held while the guard is alive. pub struct RevocableGuard<'a, T> { + // This can't use the `&'a T` type because references that appear in function arguments must + // not become dangling during the execution of the function, which can happen if the + // `RevocableGuard` is passed as a function argument and then dropped during execution of the + // function. data_ref: *const T, _rcu_guard: rcu::Guard, _p: PhantomData<&'a ()>, From cd5f1534a37e0b05733a8714195ec90474c20e82 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:50 +0900 Subject: [PATCH 0414/2411] kconfig: qconf: do not show checkbox icon for choice When you select "Show All Options" or "Show Prompt Options", choice entries display a check box icon, but this has no point because choice is always y since commit fde192511bdb ("kconfig: remove tristate choice support"). Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/qconf.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index eaa465b0ccf9..546738a5c3b1 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -92,7 +92,6 @@ void ConfigItem::updateMenu(void) { ConfigList* list; struct symbol* sym; - struct property *prop; QString prompt; int type; tristate expr; @@ -105,11 +104,10 @@ void ConfigItem::updateMenu(void) } sym = menu->sym; - prop = menu->prompt; prompt = menu_get_prompt(menu); - if (prop) switch (prop->type) { - case P_MENU: + switch (menu->type) { + case M_MENU: if (list->mode == singleMode) { /* a menuconfig entry is displayed differently * depending whether it's at the view root or a child. @@ -123,10 +121,13 @@ void ConfigItem::updateMenu(void) setIcon(promptColIdx, QIcon()); } goto set_prompt; - case P_COMMENT: + case M_COMMENT: setIcon(promptColIdx, QIcon()); prompt = "*** " + prompt + " ***"; goto set_prompt; + case M_CHOICE: + setIcon(promptColIdx, QIcon()); + goto set_prompt; default: ; } From 604f5b2127fb76e15dcc6dabbd73b541817a2fba Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:51 +0900 Subject: [PATCH 0415/2411] kconfig: qconf: show selected choice in the Value column It is useful to display the selected choice's value in the Value column. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/qconf.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 546738a5c3b1..68640e507ec4 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -127,6 +127,9 @@ void ConfigItem::updateMenu(void) goto set_prompt; case M_CHOICE: setIcon(promptColIdx, QIcon()); + sym = sym_calc_choice(menu); + if (sym) + setText(dataColIdx, sym->name); goto set_prompt; default: ; @@ -189,7 +192,11 @@ void ConfigItem::testUpdateMenu(void) if (!menu) return; - sym_calc_value(menu->sym); + if (menu->type == M_CHOICE) + sym_calc_choice(menu); + else + sym_calc_value(menu->sym); + if (menu->flags & MENU_CHANGED) { /* the menu entry changed, so update all list items */ menu->flags &= ~MENU_CHANGED; From 3c292cd0047c8758a2db7a44e441314e78b4db00 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:52 +0900 Subject: [PATCH 0416/2411] kconfig: rename menu_get_parent_menu() to menu_get_menu_or_parent_menu() The current menu_get_parent_menu() does not always return the parent menu; if the given argument is itself a menu, it returns that menu. Rename this function to better reflect this behavior. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/conf.c | 2 +- scripts/kconfig/lkc.h | 2 +- scripts/kconfig/menu.c | 8 +++++++- scripts/kconfig/qconf.cc | 6 +++--- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index 8abe57041955..a7b44cd8ae14 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -594,7 +594,7 @@ static void check_conf(struct menu *menu) default: if (!conf_cnt++) printf("*\n* Restart config...\n*\n"); - rootEntry = menu_get_parent_menu(menu); + rootEntry = menu_get_menu_or_parent_menu(menu); conf(rootEntry); break; } diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h index fbc907f75eac..5cc85c3d4aaa 100644 --- a/scripts/kconfig/lkc.h +++ b/scripts/kconfig/lkc.h @@ -97,7 +97,7 @@ bool menu_is_empty(struct menu *menu); bool menu_is_visible(struct menu *menu); bool menu_has_prompt(const struct menu *menu); const char *menu_get_prompt(const struct menu *menu); -struct menu *menu_get_parent_menu(struct menu *menu); +struct menu *menu_get_menu_or_parent_menu(struct menu *menu); int get_jump_key_char(void); struct gstr get_relations_str(struct symbol **sym_arr, struct list_head *head); void menu_get_ext_help(struct menu *menu, struct gstr *help); diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index 7d48a692bd27..ccb690bbf05d 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -575,7 +575,13 @@ const char *menu_get_prompt(const struct menu *menu) return NULL; } -struct menu *menu_get_parent_menu(struct menu *menu) +/** + * menu_get_menu_or_parent_menu - return the parent menu or the menu itself + * @menu: pointer to the menu + * return: the parent menu. If the given argument is already a menu, return + * itself. + */ +struct menu *menu_get_menu_or_parent_menu(struct menu *menu) { enum prop_type type; diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 68640e507ec4..dc056b0a8fde 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -577,7 +577,7 @@ void ConfigList::setParentMenu(void) oldroot = rootEntry; if (rootEntry == &rootmenu) return; - setRootMenu(menu_get_parent_menu(rootEntry->parent)); + setRootMenu(menu_get_menu_or_parent_menu(rootEntry->parent)); QTreeWidgetItemIterator it(this); while (*it) { @@ -1540,7 +1540,7 @@ void ConfigMainWindow::setMenuLink(struct menu *menu) switch (configList->mode) { case singleMode: list = configList; - parent = menu_get_parent_menu(menu); + parent = menu_get_menu_or_parent_menu(menu); if (!parent) return; list->setRootMenu(parent); @@ -1551,7 +1551,7 @@ void ConfigMainWindow::setMenuLink(struct menu *menu) configList->clearSelection(); list = configList; } else { - parent = menu_get_parent_menu(menu->parent); + parent = menu_get_menu_or_parent_menu(menu->parent); if (!parent) return; From 7d1bfaa457686b1e791de03450a3d49f28bdd022 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:53 +0900 Subject: [PATCH 0417/2411] kconfig: re-add menu_get_parent_menu() that returns parent menu This helper returns the parent menu, or NULL if there is no parent. The main difference from the previous version is that it always returns the parent menu even when the given argument is itself a menu. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/lkc.h | 1 + scripts/kconfig/menu.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h index 5cc85c3d4aaa..37b606c74bff 100644 --- a/scripts/kconfig/lkc.h +++ b/scripts/kconfig/lkc.h @@ -97,6 +97,7 @@ bool menu_is_empty(struct menu *menu); bool menu_is_visible(struct menu *menu); bool menu_has_prompt(const struct menu *menu); const char *menu_get_prompt(const struct menu *menu); +struct menu *menu_get_parent_menu(struct menu *menu); struct menu *menu_get_menu_or_parent_menu(struct menu *menu); int get_jump_key_char(void); struct gstr get_relations_str(struct symbol **sym_arr, struct list_head *head); diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index ccb690bbf05d..a5e5b4fdcd93 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -575,6 +575,20 @@ const char *menu_get_prompt(const struct menu *menu) return NULL; } +/** + * menu_get_parent_menu - return the parent menu or NULL + * @menu: pointer to the menu + * return: the parent menu, or NULL if there is no parent. + */ +struct menu *menu_get_parent_menu(struct menu *menu) +{ + for (menu = menu->parent; menu; menu = menu->parent) + if (menu->type == M_MENU) + return menu; + + return NULL; +} + /** * menu_get_menu_or_parent_menu - return the parent menu or the menu itself * @menu: pointer to the menu From 2f2d60f489f0b2410f33103fa42296f7466673e0 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:54 +0900 Subject: [PATCH 0418/2411] kconfig: gconf: make columns resizable The variable "resizeable" is a typo and always set to FALSE, resulting in dead code in init_right_tree(). It is unclear column resizing should be disabled. Enable it. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/gconf.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index c0f46f189060..a3978d3420d1 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -30,7 +30,6 @@ static gint view_mode = FULL_VIEW; static gboolean show_name = TRUE; static gboolean show_range = TRUE; static gboolean show_value = TRUE; -static gboolean resizeable = FALSE; static int opt_mode = OPT_NORMAL; GtkWidget *main_wnd = NULL; @@ -312,11 +311,9 @@ static void init_right_tree(void) column = gtk_tree_view_get_column(view, COL_VALUE); gtk_tree_view_column_set_visible(column, show_value); - if (resizeable) { - for (i = 0; i < COL_VALUE; i++) { - column = gtk_tree_view_get_column(view, i); - gtk_tree_view_column_set_resizable(column, TRUE); - } + for (i = 0; i < COL_VALUE; i++) { + column = gtk_tree_view_get_column(view, i); + gtk_tree_view_column_set_resizable(column, TRUE); } sel = gtk_tree_view_get_selection(view); From f72ed4c6a375e52a3f4b75615e4a89d29d8acea7 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:55 +0900 Subject: [PATCH 0419/2411] kconfig: gconf: fix potential memory leak in renderer_edited() If gtk_tree_model_get_iter() fails, gtk_tree_path_free() is not called. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap --- scripts/kconfig/gconf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index a3978d3420d1..769f38307f34 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -745,7 +745,7 @@ static void renderer_edited(GtkCellRendererText * cell, struct symbol *sym; if (!gtk_tree_model_get_iter(model2, &iter, path)) - return; + goto free; gtk_tree_model_get(model2, &iter, COL_MENU, &menu, -1); sym = menu->sym; @@ -757,6 +757,7 @@ static void renderer_edited(GtkCellRendererText * cell, update_tree(&rootmenu, NULL); +free: gtk_tree_path_free(path); } From fc38b7ff879683669bd9ff5dc7e7b6aeeb07bf2a Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 18 Jun 2025 08:28:06 +0900 Subject: [PATCH 0420/2411] rust: time: Seal the HrTimerMode trait Prevent downstream crates or drivers from implementing `HrTimerMode` for arbitrary types, which could otherwise leads to unsupported behavior. Introduce a `private::Sealed` trait and implement it for all types that implement `HrTimerMode`. Signed-off-by: FUJITA Tomonori Reviewed-by: Boqun Feng Link: https://lore.kernel.org/r/20250617232806.3950141-1-fujita.tomonori@gmail.com Signed-off-by: Andreas Hindborg --- rust/kernel/time/hrtimer.rs | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index 1b81bf306d16..8818775afaf6 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -444,8 +444,27 @@ fn as_nanos(&self) -> i64 { } } +mod private { + use crate::time::ClockSource; + + pub trait Sealed {} + + impl Sealed for super::AbsoluteMode {} + impl Sealed for super::RelativeMode {} + impl Sealed for super::AbsolutePinnedMode {} + impl Sealed for super::RelativePinnedMode {} + impl Sealed for super::AbsoluteSoftMode {} + impl Sealed for super::RelativeSoftMode {} + impl Sealed for super::AbsolutePinnedSoftMode {} + impl Sealed for super::RelativePinnedSoftMode {} + impl Sealed for super::AbsoluteHardMode {} + impl Sealed for super::RelativeHardMode {} + impl Sealed for super::AbsolutePinnedHardMode {} + impl Sealed for super::RelativePinnedHardMode {} +} + /// Operational mode of [`HrTimer`]. -pub trait HrTimerMode { +pub trait HrTimerMode: private::Sealed { /// The C representation of hrtimer mode. const C_MODE: bindings::hrtimer_mode; From d4b29ddf82a458935f1bd4909b8a7a13df9d3bdc Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 17 Jun 2025 23:41:55 +0900 Subject: [PATCH 0421/2411] rust: time: Add wrapper for fsleep() function Add a wrapper for fsleep(), flexible sleep functions in include/linux/delay.h which typically deals with hardware delays. The kernel supports several sleep functions to handle various lengths of delay. This adds fsleep(), automatically chooses the best sleep method based on a duration. fsleep() can only be used in a nonatomic context. This requirement is not checked by these abstractions, but it is intended that klint [1] or a similar tool will be used to check it in the future. Link: https://rust-for-linux.com/klint [1] Reviewed-by: Gary Guo Reviewed-by: Alice Ryhl Reviewed-by: Fiona Behrens Tested-by: Daniel Almeida Reviewed-by: Andreas Hindborg Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250617144155.3903431-3-fujita.tomonori@gmail.com Signed-off-by: Andreas Hindborg --- rust/helpers/time.c | 6 +++++ rust/kernel/time.rs | 1 + rust/kernel/time/delay.rs | 49 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+) create mode 100644 rust/kernel/time/delay.rs diff --git a/rust/helpers/time.c b/rust/helpers/time.c index 08755db43fc2..a318e9fa4408 100644 --- a/rust/helpers/time.c +++ b/rust/helpers/time.c @@ -1,8 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include +void rust_helper_fsleep(unsigned long usecs) +{ + fsleep(usecs); +} + ktime_t rust_helper_ktime_get_real(void) { return ktime_get_real(); diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs index 5a9ca0d3b7d4..64c8dcf548d6 100644 --- a/rust/kernel/time.rs +++ b/rust/kernel/time.rs @@ -26,6 +26,7 @@ use core::marker::PhantomData; +pub mod delay; pub mod hrtimer; /// The number of nanoseconds per microsecond. diff --git a/rust/kernel/time/delay.rs b/rust/kernel/time/delay.rs new file mode 100644 index 000000000000..eb8838da62bc --- /dev/null +++ b/rust/kernel/time/delay.rs @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Delay and sleep primitives. +//! +//! This module contains the kernel APIs related to delay and sleep that +//! have been ported or wrapped for usage by Rust code in the kernel. +//! +//! C header: [`include/linux/delay.h`](srctree/include/linux/delay.h). + +use super::Delta; +use crate::prelude::*; + +/// Sleeps for a given duration at least. +/// +/// Equivalent to the C side [`fsleep()`], flexible sleep function, +/// which automatically chooses the best sleep method based on a duration. +/// +/// `delta` must be within `[0, i32::MAX]` microseconds; +/// otherwise, it is erroneous behavior. That is, it is considered a bug +/// to call this function with an out-of-range value, in which case the function +/// will sleep for at least the maximum value in the range and may warn +/// in the future. +/// +/// The behavior above differs from the C side [`fsleep()`] for which out-of-range +/// values mean "infinite timeout" instead. +/// +/// This function can only be used in a nonatomic context. +/// +/// [`fsleep()`]: https://docs.kernel.org/timers/delay_sleep_functions.html#c.fsleep +pub fn fsleep(delta: Delta) { + // The maximum value is set to `i32::MAX` microseconds to prevent integer + // overflow inside fsleep, which could lead to unintentional infinite sleep. + const MAX_DELTA: Delta = Delta::from_micros(i32::MAX as i64); + + let delta = if (Delta::ZERO..=MAX_DELTA).contains(&delta) { + delta + } else { + // TODO: Add WARN_ONCE() when it's supported. + MAX_DELTA + }; + + // SAFETY: It is always safe to call `fsleep()` with any duration. + unsafe { + // Convert the duration to microseconds and round up to preserve + // the guarantee; `fsleep()` sleeps for at least the provided duration, + // but that it may sleep for longer under some circumstances. + bindings::fsleep(delta.as_micros_ceil() as c_ulong) + } +} From f86272350f38d3fa4049944257a1b4260f3eba2e Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 30 Jun 2025 15:23:03 +0200 Subject: [PATCH 0422/2411] dm-mpath: make dm_unregister_path_selector return void dm_unregister_path_selector may only return error if there's a bug in the code - so we make it return void and print a warning if the user abuses this function to unregister a target that was not registered. Signed-off-by: Mikulas Patocka --- drivers/md/dm-path-selector.c | 8 +++----- drivers/md/dm-path-selector.h | 2 +- drivers/md/dm-ps-historical-service-time.c | 5 +---- drivers/md/dm-ps-io-affinity.c | 5 +---- drivers/md/dm-ps-queue-length.c | 5 +---- drivers/md/dm-ps-round-robin.c | 5 +---- drivers/md/dm-ps-service-time.c | 5 +---- 7 files changed, 9 insertions(+), 26 deletions(-) diff --git a/drivers/md/dm-path-selector.c b/drivers/md/dm-path-selector.c index 3e4cb81ce512..d0b883fabfeb 100644 --- a/drivers/md/dm-path-selector.c +++ b/drivers/md/dm-path-selector.c @@ -117,16 +117,16 @@ int dm_register_path_selector(struct path_selector_type *pst) } EXPORT_SYMBOL_GPL(dm_register_path_selector); -int dm_unregister_path_selector(struct path_selector_type *pst) +void dm_unregister_path_selector(struct path_selector_type *pst) { struct ps_internal *psi; down_write(&_ps_lock); psi = __find_path_selector_type(pst->name); - if (!psi) { + if (WARN_ON(!psi)) { up_write(&_ps_lock); - return -EINVAL; + return; } list_del(&psi->list); @@ -134,7 +134,5 @@ int dm_unregister_path_selector(struct path_selector_type *pst) up_write(&_ps_lock); kfree(psi); - - return 0; } EXPORT_SYMBOL_GPL(dm_unregister_path_selector); diff --git a/drivers/md/dm-path-selector.h b/drivers/md/dm-path-selector.h index 3861b2d8b963..7b2270532e64 100644 --- a/drivers/md/dm-path-selector.h +++ b/drivers/md/dm-path-selector.h @@ -96,7 +96,7 @@ struct path_selector_type { int dm_register_path_selector(struct path_selector_type *type); /* Unregister a path selector */ -int dm_unregister_path_selector(struct path_selector_type *type); +void dm_unregister_path_selector(struct path_selector_type *type); /* Returns a registered path selector type */ struct path_selector_type *dm_get_path_selector(const char *name); diff --git a/drivers/md/dm-ps-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c index b49e10d76d03..9c58a72c6e55 100644 --- a/drivers/md/dm-ps-historical-service-time.c +++ b/drivers/md/dm-ps-historical-service-time.c @@ -551,10 +551,7 @@ static int __init dm_hst_init(void) static void __exit dm_hst_exit(void) { - int r = dm_unregister_path_selector(&hst_ps); - - if (r < 0) - DMERR("unregister failed %d", r); + dm_unregister_path_selector(&hst_ps); } module_init(dm_hst_init); diff --git a/drivers/md/dm-ps-io-affinity.c b/drivers/md/dm-ps-io-affinity.c index 716807e511ee..80415a045c68 100644 --- a/drivers/md/dm-ps-io-affinity.c +++ b/drivers/md/dm-ps-io-affinity.c @@ -260,10 +260,7 @@ static int __init dm_ioa_init(void) static void __exit dm_ioa_exit(void) { - int ret = dm_unregister_path_selector(&ioa_ps); - - if (ret < 0) - DMERR("unregister failed %d", ret); + dm_unregister_path_selector(&ioa_ps); } module_init(dm_ioa_init); diff --git a/drivers/md/dm-ps-queue-length.c b/drivers/md/dm-ps-queue-length.c index e305f05ad1e5..93812c0ecc32 100644 --- a/drivers/md/dm-ps-queue-length.c +++ b/drivers/md/dm-ps-queue-length.c @@ -270,10 +270,7 @@ static int __init dm_ql_init(void) static void __exit dm_ql_exit(void) { - int r = dm_unregister_path_selector(&ql_ps); - - if (r < 0) - DMERR("unregister failed %d", r); + dm_unregister_path_selector(&ql_ps); } module_init(dm_ql_init); diff --git a/drivers/md/dm-ps-round-robin.c b/drivers/md/dm-ps-round-robin.c index d1745b123dc1..c7f2869d8978 100644 --- a/drivers/md/dm-ps-round-robin.c +++ b/drivers/md/dm-ps-round-robin.c @@ -230,10 +230,7 @@ static int __init dm_rr_init(void) static void __exit dm_rr_exit(void) { - int r = dm_unregister_path_selector(&rr_ps); - - if (r < 0) - DMERR("unregister failed %d", r); + dm_unregister_path_selector(&rr_ps); } module_init(dm_rr_init); diff --git a/drivers/md/dm-ps-service-time.c b/drivers/md/dm-ps-service-time.c index 969d31c40272..239c5850c2b1 100644 --- a/drivers/md/dm-ps-service-time.c +++ b/drivers/md/dm-ps-service-time.c @@ -351,10 +351,7 @@ static int __init dm_st_init(void) static void __exit dm_st_exit(void) { - int r = dm_unregister_path_selector(&st_ps); - - if (r < 0) - DMERR("unregister failed %d", r); + dm_unregister_path_selector(&st_ps); } module_init(dm_st_init); From 6e11952a6abc4641dc8ae63f01b318b31b44e8db Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 30 Jun 2025 15:24:22 +0200 Subject: [PATCH 0423/2411] dm-mpath: don't print the "loaded" message if registering fails If dm_register_path_selector, don't print the "version X loaded" message. Signed-off-by: Mikulas Patocka --- drivers/md/dm-ps-historical-service-time.c | 4 +++- drivers/md/dm-ps-queue-length.c | 4 +++- drivers/md/dm-ps-round-robin.c | 4 +++- drivers/md/dm-ps-service-time.c | 4 +++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-ps-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c index 9c58a72c6e55..f07e773d9cc0 100644 --- a/drivers/md/dm-ps-historical-service-time.c +++ b/drivers/md/dm-ps-historical-service-time.c @@ -541,8 +541,10 @@ static int __init dm_hst_init(void) { int r = dm_register_path_selector(&hst_ps); - if (r < 0) + if (r < 0) { DMERR("register failed %d", r); + return r; + } DMINFO("version " HST_VERSION " loaded"); diff --git a/drivers/md/dm-ps-queue-length.c b/drivers/md/dm-ps-queue-length.c index 93812c0ecc32..9c68701ed7a4 100644 --- a/drivers/md/dm-ps-queue-length.c +++ b/drivers/md/dm-ps-queue-length.c @@ -260,8 +260,10 @@ static int __init dm_ql_init(void) { int r = dm_register_path_selector(&ql_ps); - if (r < 0) + if (r < 0) { DMERR("register failed %d", r); + return r; + } DMINFO("version " QL_VERSION " loaded"); diff --git a/drivers/md/dm-ps-round-robin.c b/drivers/md/dm-ps-round-robin.c index c7f2869d8978..0c12f4073461 100644 --- a/drivers/md/dm-ps-round-robin.c +++ b/drivers/md/dm-ps-round-robin.c @@ -220,8 +220,10 @@ static int __init dm_rr_init(void) { int r = dm_register_path_selector(&rr_ps); - if (r < 0) + if (r < 0) { DMERR("register failed %d", r); + return r; + } DMINFO("version " RR_VERSION " loaded"); diff --git a/drivers/md/dm-ps-service-time.c b/drivers/md/dm-ps-service-time.c index 239c5850c2b1..0543fe7969c4 100644 --- a/drivers/md/dm-ps-service-time.c +++ b/drivers/md/dm-ps-service-time.c @@ -341,8 +341,10 @@ static int __init dm_st_init(void) { int r = dm_register_path_selector(&st_ps); - if (r < 0) + if (r < 0) { DMERR("register failed %d", r); + return r; + } DMINFO("version " ST_VERSION " loaded"); From 844f962ca6bf5b01d0af0bc62a7f06135581fe92 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 30 Jun 2025 11:16:13 +0200 Subject: [PATCH 0424/2411] perf test: perf header test fails on s390 commit 2d584688643fa ("perf test: Add header shell test") introduced a new test case for perf header. It fails on s390 because call graph option -g is not supported on s390. Also the option --call-graph dwarf is only supported for the event cpu-clock. Remove this option and the test succeeds. Output after: # ./perf test 76 76: perf header tests : Ok Fixes: 2d584688643fa ("perf test: Add header shell test") Signed-off-by: Thomas Richter Reviewed-by: Ian Rogers Acked-by: Sumanth Korikkar Link: https://lore.kernel.org/r/20250630091613.3061664-1-tmricht@linux.ibm.com Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/header.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/header.sh b/tools/perf/tests/shell/header.sh index 813831cff0bd..412263de6ed7 100755 --- a/tools/perf/tests/shell/header.sh +++ b/tools/perf/tests/shell/header.sh @@ -51,7 +51,7 @@ check_header_output() { test_file() { echo "Test perf header file" - perf record -o "${perfdata}" -g -- perf test -w noploop + perf record -o "${perfdata}" -- perf test -w noploop perf report --header-only -I -i "${perfdata}" > "${script_output}" check_header_output @@ -61,7 +61,7 @@ test_file() { test_pipe() { echo "Test perf header pipe" - perf record -o - -g -- perf test -w noploop | perf report --header-only -I -i - > "${script_output}" + perf record -o - -- perf test -w noploop | perf report --header-only -I -i - > "${script_output}" check_header_output echo "Test perf header pipe [Done]" From bb986e4720009da4221aeeeed7dec3f56d96502c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 30 Jun 2025 13:51:28 +0100 Subject: [PATCH 0425/2411] perf drm_pmu: Fix spelling mistake "bufers" -> "buffers" There are spelling mistakes in some literal strings. Fix these. Fixes: 28917cb17f9d ("perf drm_pmu: Add a tool like PMU to expose DRM information") Signed-off-by: Colin Ian King Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250630125128.562895-1-colin.i.king@gmail.com Signed-off-by: Namhyung Kim --- tools/perf/util/drm_pmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/drm_pmu.c b/tools/perf/util/drm_pmu.c index 17385a10005b..988890f37ba7 100644 --- a/tools/perf/util/drm_pmu.c +++ b/tools/perf/util/drm_pmu.c @@ -210,17 +210,17 @@ static int read_drm_pmus_cb(void *args, int fdinfo_dir_fd, const char *fd_name) } if (starts_with(line, "drm-purgeable-")) { add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, - "Size of resident and purgeable memory bufers"); + "Size of resident and purgeable memory buffers"); continue; } if (starts_with(line, "drm-resident-")) { add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, - "Size of resident memory bufers"); + "Size of resident memory buffers"); continue; } if (starts_with(line, "drm-shared-")) { add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, - "Size of shared memory bufers"); + "Size of shared memory buffers"); continue; } if (starts_with(line, "drm-total-cycles-")) { From 5ceedc09f27f87a6adc00d522b06dcce990a1986 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 27 Jun 2025 18:55:53 -0700 Subject: [PATCH 0426/2411] perf test: Add basic callgraph test to record testing Give some basic perf record callgraph coverage. Signed-off-by: Ian Rogers Reviewed-by: James Clark Tested-by: Thomas Richter Link: https://lore.kernel.org/r/20250628015553.1270748-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/record.sh | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 2022a4f739be..b1ad24fb3b33 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -12,8 +12,10 @@ shelldir=$(dirname "$0") . "${shelldir}"/lib/perf_has_symbol.sh testsym="test_loop" +testsym2="brstack" skip_test_missing_symbol ${testsym} +skip_test_missing_symbol ${testsym2} err=0 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) @@ -359,6 +361,33 @@ test_precise_max() { fi } +test_callgraph() { + echo "Callgraph test" + + case $(uname -m) + in s390x) + cmd_flags="--call-graph dwarf -e cpu-clock";; + *) + cmd_flags="-g";; + esac + + if ! perf record -o "${perfdata}" $cmd_flags perf test -w brstack + then + echo "Callgraph test [Failed missing output]" + err=1 + return + fi + + if ! perf report -i "${perfdata}" 2>&1 | grep "${testsym2}" + then + echo "Callgraph test [Failed missing symbol]" + err=1 + return + fi + + echo "Callgraph test [Success]" +} + # raise the limit of file descriptors to minimum if [[ $default_fd_limit -lt $min_fd_limit ]]; then ulimit -Sn $min_fd_limit @@ -374,6 +403,7 @@ test_uid test_leader_sampling test_topdown_leader_sampling test_precise_max +test_callgraph # restore the default value ulimit -Sn $default_fd_limit From 146847932278fef1ce13b5a839077e51ca019395 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 27 Jun 2025 18:58:32 -0700 Subject: [PATCH 0427/2411] perf test annotate: Use --percent-limit rather than head to reduce output The annotate test was sped up by Thomas Richter in commit 658a8805cb60 ("perf test: Speed up test case 70 annotate basic tests") by reducing the annotate output using head. This causes flakes on hybrid machines where the first event dumped may not have the samples for the test within it. Rather than reduce the output using `head` switch to `--percent-limit 10` which will stop annotate dumping functions that have an overhead of less than 10%, the noploop program should be using more. Add the missing objdump option for the pipe mode version of the objdump with a command test. Signed-off-by: Ian Rogers Tested-by: Thomas Richter Link: https://lore.kernel.org/r/20250628015832.1271229-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/annotate.sh | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tools/perf/tests/shell/annotate.sh b/tools/perf/tests/shell/annotate.sh index 16a1ccd06089..689de58e9238 100755 --- a/tools/perf/tests/shell/annotate.sh +++ b/tools/perf/tests/shell/annotate.sh @@ -53,21 +53,22 @@ test_basic() { # Generate the annotated output file if [ "x${mode}" == "xBasic" ] then - perf annotate --no-demangle -i "${perfdata}" --stdio 2> /dev/null > "${perfout}" + perf annotate --no-demangle -i "${perfdata}" --stdio --percent-limit 10 2> /dev/null > "${perfout}" else - perf annotate --no-demangle -i - --stdio 2> /dev/null < "${perfdata}" > "${perfout}" + perf annotate --no-demangle -i - --stdio 2> /dev/null --percent-limit 10 < "${perfdata}" > "${perfout}" fi # check if it has the target symbol - if ! head -250 "${perfout}" | grep -q "${testsym}" + if ! grep -q "${testsym}" "${perfout}" then echo "${mode} annotate [Failed: missing target symbol]" + cat "${perfout}" err=1 return fi # check if it has the disassembly lines - if ! head -250 "${perfout}" | grep -q "${disasm_regex}" + if ! grep -q "${disasm_regex}" "${perfout}" then echo "${mode} annotate [Failed: missing disasm output from default disassembler]" err=1 @@ -92,11 +93,11 @@ test_basic() { # check one more with external objdump tool (forced by --objdump option) if [ "x${mode}" == "xBasic" ] then - perf annotate --no-demangle -i "${perfdata}" --objdump=objdump 2> /dev/null > "${perfout}" + perf annotate --no-demangle -i "${perfdata}" --percent-limit 10 --objdump=objdump 2> /dev/null > "${perfout}" else - perf annotate --no-demangle -i - "${testsym}" 2> /dev/null < "${perfdata}" > "${perfout}" + perf annotate --no-demangle -i - "${testsym}" --percent-limit 10 --objdump=objdump 2> /dev/null < "${perfdata}" > "${perfout}" fi - if ! head -250 "${perfout}" | grep -q -m 3 "${disasm_regex}" + if ! grep -q -m 3 "${disasm_regex}" "${perfout}" then echo "${mode} annotate [Failed: missing disasm output from non default disassembler (using --objdump)]" err=1 From 114339ee4d66a328d186264ffa23a766542a9a15 Mon Sep 17 00:00:00 2001 From: Collin Funk Date: Fri, 27 Jun 2025 20:41:25 -0700 Subject: [PATCH 0428/2411] perf build: Specify shellcheck should use bash When someone has a global shellcheckrc file, for example at ~/.config/shellcheckrc, with the directive 'shell=sh', building perf will fail with many shellcheck errors like: In tests/shell/base_probe/test_adding_kernel.sh line 294: (( TEST_RESULT += $? )) ^---------------------^ SC3006 (warning): In POSIX sh, standalone ((..)) is undefined. For more information: https://www.shellcheck.net/wiki/SC3006 -- In POSIX sh, standalone ((..)) is... make[5]: *** [tests/Build:91: tests/shell/base_probe/test_adding_kernel.sh.shellcheck_log] Error 1 Passing the '-s bash' option ensures that it runs correctly regardless of a developers global configuration. This patch adds '-s bash' and other options to the SHELLCHECK variable in Makefile.perf and makes use of the variable consistently. Signed-off-by: Collin Funk Link: https://lore.kernel.org/r/63491dbc8439edf2e949d80e264b9d22332fea61.1751082075.git.collin.funk1@gmail.com Signed-off-by: Namhyung Kim --- tools/perf/Build | 2 +- tools/perf/Makefile.perf | 2 ++ tools/perf/arch/x86/Build | 2 +- tools/perf/arch/x86/tests/Build | 2 +- tools/perf/tests/Build | 2 +- tools/perf/trace/beauty/Build | 2 +- tools/perf/util/Build | 2 +- 7 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tools/perf/Build b/tools/perf/Build index 06107f1e1d42..b03cc59dabf8 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -73,7 +73,7 @@ endif $(OUTPUT)%.shellcheck_log: % $(call rule_mkdir) - $(Q)$(call echo-cmd,test)shellcheck -s bash -a -S warning "$<" > $@ || (cat $@ && rm $@ && false) + $(Q)$(call echo-cmd,test)$(SHELLCHECK) "$<" > $@ || (cat $@ && rm $@ && false) perf-y += $(SHELL_TEST_LOGS) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 62697d62f706..9b51593628c1 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -259,6 +259,8 @@ ifneq ($(SHELLCHECK),) ifeq ($(shell expr $(shell $(SHELLCHECK) --version | grep version: | \ sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \< 060), 1) SHELLCHECK := + else + SHELLCHECK := $(SHELLCHECK) -s bash -a -S warning endif endif diff --git a/tools/perf/arch/x86/Build b/tools/perf/arch/x86/Build index afae7b8f6bd6..d31a1168757c 100644 --- a/tools/perf/arch/x86/Build +++ b/tools/perf/arch/x86/Build @@ -10,6 +10,6 @@ endif $(OUTPUT)%.shellcheck_log: % $(call rule_mkdir) - $(Q)$(call echo-cmd,test)shellcheck -a -S warning "$<" > $@ || (cat $@ && rm $@ && false) + $(Q)$(call echo-cmd,test)$(SHELLCHECK) "$<" > $@ || (cat $@ && rm $@ && false) perf-test-y += $(SHELL_TEST_LOGS) diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 5e00cbfd2d56..01d5527f38c7 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -22,6 +22,6 @@ endif $(OUTPUT)%.shellcheck_log: % $(call rule_mkdir) - $(Q)$(call echo-cmd,test)shellcheck -a -S warning "$<" > $@ || (cat $@ && rm $@ && false) + $(Q)$(call echo-cmd,test)$(SHELLCHECK) "$<" > $@ || (cat $@ && rm $@ && false) perf-test-y += $(SHELL_TEST_LOGS) diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 2181f5a92148..d6c35dd0de3b 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -89,7 +89,7 @@ endif $(OUTPUT)%.shellcheck_log: % $(call rule_mkdir) - $(Q)$(call echo-cmd,test)shellcheck -a -S warning "$<" > $@ || (cat $@ && rm $@ && false) + $(Q)$(call echo-cmd,test)$(SHELLCHECK) "$<" > $@ || (cat $@ && rm $@ && false) perf-test-y += $(SHELL_TEST_LOGS) diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index f50ebdc445b8..561590ee8cda 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -31,6 +31,6 @@ endif $(OUTPUT)%.shellcheck_log: % $(call rule_mkdir) - $(Q)$(call echo-cmd,test)shellcheck -s bash -a -S warning "$<" > $@ || (cat $@ && rm $@ && false) + $(Q)$(call echo-cmd,test)$(SHELLCHECK) "$<" > $@ || (cat $@ && rm $@ && false) perf-y += $(SHELL_TEST_LOGS) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 45515b8f615a..12bc01c843b2 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -424,7 +424,7 @@ endif $(OUTPUT)%.shellcheck_log: % $(call rule_mkdir) - $(Q)$(call echo-cmd,test)shellcheck -a -S warning "$<" > $@ || (cat $@ && rm $@ && false) + $(Q)$(call echo-cmd,test)$(SHELLCHECK) "$<" > $@ || (cat $@ && rm $@ && false) perf-util-y += $(SHELL_TEST_LOGS) From 154467f4ad033473e5c903a03e7b9bca7df9a0fa Mon Sep 17 00:00:00 2001 From: Abinash Singh Date: Wed, 25 Jun 2025 16:35:37 +0530 Subject: [PATCH 0429/2411] f2fs: fix KMSAN uninit-value in extent_info usage KMSAN reported a use of uninitialized value in `__is_extent_mergeable()` and `__is_back_mergeable()` via the read extent tree path. The root cause is that `get_read_extent_info()` only initializes three fields (`fofs`, `blk`, `len`) of `struct extent_info`, leaving the remaining fields uninitialized. This leads to undefined behavior when those fields are accessed later, especially during extent merging. Fix it by zero-initializing the `extent_info` struct before population. Reported-by: syzbot+b8c1d60e95df65e827d4@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=b8c1d60e95df65e827d4 Fixes: 94afd6d6e525 ("f2fs: extent cache: support unaligned extent") Reviewed-by: Chao Yu Signed-off-by: Abinash Singh Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index cfe925a3d555..4ce19a310f38 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -414,7 +414,7 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct folio *ifolio) struct f2fs_extent *i_ext = &F2FS_INODE(&ifolio->page)->i_ext; struct extent_tree *et; struct extent_node *en; - struct extent_info ei; + struct extent_info ei = {0}; if (!__may_extent_tree(inode, EX_READ)) { /* drop largest read extent */ From 10dcaa56ef93f2a45e4c3fec27d8e1594edad110 Mon Sep 17 00:00:00 2001 From: "yohan.joung" Date: Wed, 25 Jun 2025 09:14:07 +0900 Subject: [PATCH 0430/2411] f2fs: fix to check upper boundary for value of gc_boost_zoned_gc_percent to check the upper boundary when setting gc_boost_zoned_gc_percent Fixes: 9a481a1c16f4 ("f2fs: create gc_no_zoned_gc_percent and gc_boost_zoned_gc_percent") Signed-off-by: yohan.joung Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 51be7ffb38c5..2b5c35ce7b8c 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -628,6 +628,13 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } + if (!strcmp(a->attr.name, "gc_boost_zoned_gc_percent")) { + if (t > 100) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + #ifdef CONFIG_F2FS_IOSTAT if (!strcmp(a->attr.name, "iostat_enable")) { sbi->iostat_enable = !!t; From 956b81b3d41adacbf4b51289ad49a71f9813c7b8 Mon Sep 17 00:00:00 2001 From: "yohan.joung" Date: Wed, 25 Jun 2025 09:13:35 +0900 Subject: [PATCH 0431/2411] f2fs: enable tuning of boost_zoned_gc_percent via sysfs to allow users to dynamically tune the boost_zoned_gc_percent parameter Signed-off-by: yohan.joung Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 5c1eaf55e127..11fba7636af7 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -194,6 +194,7 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) static inline bool need_to_boost_gc(struct f2fs_sb_info *sbi) { if (f2fs_sb_has_blkzoned(sbi)) - return !has_enough_free_blocks(sbi, LIMIT_BOOST_ZONED_GC); + return !has_enough_free_blocks(sbi, + sbi->gc_thread->boost_zoned_gc_percent); return has_enough_invalid_blocks(sbi); } From 55fc364b430e3b234ecb9b6e1aa48b242a8663cc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Jun 2025 10:59:43 +0800 Subject: [PATCH 0432/2411] f2fs: account and print more stats during recovery F2FS-fs (vdc): f2fs_recover_fsync_data: recovery fsync data, check_only: 0 F2FS-fs (vdc): do_recover_data: start to recover dnode F2FS-fs (vdc): recover_inode: ino = 5, name = testfile.t2, inline = 21 F2FS-fs (vdc): recover_data: ino = 5, nid = 5 (i_size: recover), range (0, 864), recovered = 1, err = 0 F2FS-fs (vdc): do_recover_data: dnode: (recoverable: 256, fsynced: 256, total: 256), recovered: (inode: 256, dentry: 1, dnode: 256), err: 0 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 51ebed4e1521..f7d2fc86aeb1 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -624,7 +624,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, { struct dnode_of_data dn; struct node_info ni; - unsigned int start, end; + unsigned int start = 0, end = 0, index; int err = 0, recovered = 0; /* step 1: recover xattr */ @@ -679,7 +679,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, goto err; } - for (; start < end; start++, dn.ofs_in_node++) { + for (index = start; index < end; index++, dn.ofs_in_node++) { block_t src, dest; src = f2fs_data_blkaddr(&dn); @@ -708,9 +708,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } if (!file_keep_isize(inode) && - (i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT))) + (i_size_read(inode) <= ((loff_t)index << PAGE_SHIFT))) f2fs_i_size_write(inode, - (loff_t)(start + 1) << PAGE_SHIFT); + (loff_t)(index + 1) << PAGE_SHIFT); /* * dest is reserved block, invalidate src block @@ -765,9 +765,11 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, err: f2fs_put_dnode(&dn); out: - f2fs_notice(sbi, "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d", - inode->i_ino, file_keep_isize(inode) ? "keep" : "recover", - recovered, err); + f2fs_notice(sbi, "recover_data: ino = %lx, nid = %x (i_size: %s), " + "range (%u, %u), recovered = %d, err = %d", + inode->i_ino, nid_of_node(&folio->page), + file_keep_isize(inode) ? "keep" : "recover", + start, end, recovered, err); return err; } @@ -778,6 +780,14 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, int err = 0; block_t blkaddr; unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; + unsigned int recoverable_dnode = 0; + unsigned int fsynced_dnode = 0; + unsigned int total_dnode = 0; + unsigned int recovered_inode = 0; + unsigned int recovered_dentry = 0; + unsigned int recovered_dnode = 0; + + f2fs_notice(sbi, "do_recover_data: start to recover dnode"); /* get node pages in the current segment */ curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); @@ -800,10 +810,12 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, f2fs_folio_put(folio, true); break; } + recoverable_dnode++; entry = get_fsync_inode(inode_list, ino_of_node(&folio->page)); if (!entry) goto next; + fsynced_dnode++; /* * inode(x) | CP | inode(x) | dnode(F) * In this case, we can lose the latest inode(x). @@ -815,6 +827,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, f2fs_folio_put(folio, true); break; } + recovered_inode++; } if (entry->last_dentry == blkaddr) { err = recover_dentry(entry->inode, &folio->page, dir_list); @@ -822,12 +835,14 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, f2fs_folio_put(folio, true); break; } + recovered_dentry++; } err = do_recover_data(sbi, entry->inode, folio); if (err) { f2fs_folio_put(folio, true); break; } + recovered_dnode++; if (entry->blkaddr == blkaddr) list_move_tail(&entry->list, tmp_inode_list); @@ -840,9 +855,15 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, f2fs_folio_put(folio, true); f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks); + total_dnode++; } if (!err) err = f2fs_allocate_new_segments(sbi); + + f2fs_notice(sbi, "do_recover_data: dnode: (recoverable: %u, fsynced: %u, " + "total: %u), recovered: (inode: %u, dentry: %u, dnode: %u), err: %d", + recoverable_dnode, fsynced_dnode, total_dnode, recovered_inode, + recovered_dentry, recovered_dnode, err); return err; } @@ -855,6 +876,9 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) unsigned long s_flags = sbi->sb->s_flags; bool need_writecp = false; + f2fs_notice(sbi, "f2fs_recover_fsync_data: recovery fsync data, " + "check_only: %d", check_only); + if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE)) f2fs_info(sbi, "recover fsync data on readonly fs"); From 7a96d1d73ce9de5041e891a623b722f900651561 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Jun 2025 10:38:17 +0800 Subject: [PATCH 0433/2411] f2fs: fix to check upper boundary for gc_valid_thresh_ratio This patch adds missing upper boundary check while setting gc_valid_thresh_ratio via sysfs. Fixes: e791d00bd06c ("f2fs: add valid block ratio not to do excessive GC for one time GC") Cc: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 2b5c35ce7b8c..d74472d96026 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -635,6 +635,13 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } + if (!strcmp(a->attr.name, "gc_valid_thresh_ratio")) { + if (t > 100) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + #ifdef CONFIG_F2FS_IOSTAT if (!strcmp(a->attr.name, "iostat_enable")) { sbi->iostat_enable = !!t; From a919ae794ad2dc6d04b3eea2f9bc86332c1630cc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Jun 2025 10:38:18 +0800 Subject: [PATCH 0434/2411] f2fs: fix to check upper boundary for gc_no_zoned_gc_percent This patch adds missing upper boundary check while setting gc_no_zoned_gc_percent via sysfs. Fixes: 9a481a1c16f4 ("f2fs: create gc_no_zoned_gc_percent and gc_boost_zoned_gc_percent") Cc: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index d74472d96026..bdef926b3377 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -628,6 +628,13 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } + if (!strcmp(a->attr.name, "gc_no_zoned_gc_percent")) { + if (t > 100) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + if (!strcmp(a->attr.name, "gc_boost_zoned_gc_percent")) { if (t > 100) return -EINVAL; From d738f708564764ed591cb6ab50d55489f87c726a Mon Sep 17 00:00:00 2001 From: wangzijie Date: Tue, 24 Jun 2025 11:59:38 +0800 Subject: [PATCH 0435/2411] f2fs: don't allow unaligned truncation to smaller/equal size on pinned file To prevent scattered pin block generation, don't allow non-section aligned truncation to smaller or equal size on pinned file. But for truncation to larger size, after commit 3fdd89b452c2("f2fs: prevent writing without fallocate() for pinned files"), we only support overwrite IO to pinned file, so we don't need to consider attr->ia_size > i_size case. Signed-off-by: wangzijie Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 63e9fb5a1c59..bc0ca697e064 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1064,6 +1064,17 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, !IS_ALIGNED(attr->ia_size, F2FS_BLK_TO_BYTES(fi->i_cluster_size))) return -EINVAL; + /* + * To prevent scattered pin block generation, we don't allow + * smaller/equal size unaligned truncation for pinned file. + * We only support overwrite IO to pinned file, so don't + * care about larger size truncation. + */ + if (f2fs_is_pinned_file(inode) && + attr->ia_size <= i_size_read(inode) && + !IS_ALIGNED(attr->ia_size, + F2FS_BLK_TO_BYTES(CAP_BLKS_PER_SEC(sbi)))) + return -EINVAL; } err = setattr_prepare(idmap, dentry, attr); From 7ac6612d6b7994491ac410401ed2fbac2bdefc18 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 3 Jun 2025 11:52:54 -0700 Subject: [PATCH 0436/2411] Documentation/driver-api/cxl: Introduce conventions.rst There exists shipping platforms that bend, break, or otherwise lean on ambiguities in the CXL specification. Without driver changes to accommodate these deviations, end users are left without CXL subsystem RAS features. Specifically, provisioning, error translation, and other flows require the CXL subsystem to understand the platforms CXL topology beyond undecorated memory address ranges. Those isolated compatibility problems risk growing into deeper upstream maintenance burden if different platform vendors arrive at diverging solutions. For example, there are multiple options for resolving low-memory-mmio intersecting large-interleave-ways CXL windows. Linux should only entertain one solution to that problem. Now, with the ACPI Specification Working Group, situations like this would be resolved with the "Code First ECN" process to codify Linux expectations in a specification. In the absence of such a process for the CXL specification, create a file in Linux documentation to detail the motivations, assumptions, tradeoffs, and proposals for amending specification language. The goal is to capture the issues such that platform vendors arrive at compatible solutions for these problems and serve as a repository for potential specification updates. The expectation is to update conventions.rst along with CXL subsystem code changes to accommodate the platform topology. [ dj: Rebased against v6.16-rc1 ] Signed-off-by: Dan Williams Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Robert Richter Link: https://patch.msgid.link/20250603185254.3730099-1-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- Documentation/driver-api/cxl/conventions.rst | 47 ++++++++++++++++++++ Documentation/driver-api/cxl/index.rst | 1 + 2 files changed, 48 insertions(+) create mode 100644 Documentation/driver-api/cxl/conventions.rst diff --git a/Documentation/driver-api/cxl/conventions.rst b/Documentation/driver-api/cxl/conventions.rst new file mode 100644 index 000000000000..da347a81a237 --- /dev/null +++ b/Documentation/driver-api/cxl/conventions.rst @@ -0,0 +1,47 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + +======================================= +Compute Express Link: Linux Conventions +======================================= + +There exists shipping platforms that bend or break CXL specification +expectations. Record the details and the rationale for those deviations. +Borrow the ACPI Code First template format to capture the assumptions +and tradeoffs such that multiple platform implementations can follow the +same convention. + +<(template) Title> +================== + +Document +-------- +CXL Revision , Version + +License +------- +SPDX-License Identifier: CC-BY-4.0 + +Creator/Contributors +-------------------- + +Summary of the Change +--------------------- + + + + +Benefits of the Change +---------------------- + + + +References +---------- + +Detailed Description of the Change +---------------------------------- + + diff --git a/Documentation/driver-api/cxl/index.rst b/Documentation/driver-api/cxl/index.rst index 9e1414ad3357..c1106a68b67c 100644 --- a/Documentation/driver-api/cxl/index.rst +++ b/Documentation/driver-api/cxl/index.rst @@ -14,6 +14,7 @@ that have impacts on each other. The docs here break up configurations steps. theory-of-operation maturity-map + conventions .. toctree:: :maxdepth: 2 From 38b502e0a65215ddefaf84b672ec3908af97bacf Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Thu, 29 May 2025 13:51:13 -0700 Subject: [PATCH 0437/2411] cxl/pci: Replace mutex_lock_io() w mutex_lock() for mailbox access mutex_lock_io() differs from mutex_lock() in that it may call io_schedule() when a task must sleep waiting for the lock. This distinction only makes sense in block I/O or memory reclaim paths, where giving I/O a chance to make progress is useful. At this call site, cxl_pci_mbox_send(), the mutex protects an MMIO mailbox. The task holding the lock is not blocking I/O progress, so calling io_schedule(), as mutex_lock_io() may do, has no practical effect. Although there is no functional change, using the correct locking primitive, that more accurately reflects the semantics and intended use of the lock, improves code clarity and avoids misleading readers and tools. [ dj: Dropped fixes tag, no need to backport ] Reported-by: Alok Tiwari Closes: https://lore.kernel.org/linux-cxl/0d2af1e8-7f1b-438c-a090-fd366c8c63e0@oracle.com/ Suggested-by: Dan Williams Signed-off-by: Alison Schofield Reviewed-by: Davidlohr Bueso Reviewed-by: Dan Williams Link: https://patch.msgid.link/20250529205117.1990465-1-alison.schofield@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 785aa2af5eaa..bd100ac31672 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -379,7 +379,7 @@ static int cxl_pci_mbox_send(struct cxl_mailbox *cxl_mbox, { int rc; - mutex_lock_io(&cxl_mbox->mbox_mutex); + mutex_lock(&cxl_mbox->mbox_mutex); rc = __cxl_pci_mbox_send_cmd(cxl_mbox, cmd); mutex_unlock(&cxl_mbox->mbox_mutex); From 60da1f685a94bc9bd94caf46d953cfa43468c29e Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 27 May 2025 16:34:51 +0100 Subject: [PATCH 0438/2411] cxl_test: Limit location for fake CFMWS to mappable range Some architectures (e.g. arm64) only support memory hotplug operations on a restricted set of physical addresses. This applies even when we are faking some CXL fixed memory windows for the purposes of cxl_test. That range can be queried with mhp_get_pluggable_range(true). Use the minimum of that the top of that range and iomem_resource.end to establish the 64GiB region used by cxl_test. From thread #2 which was related to the issue in #1. [ dj: Add CONFIG_MEMORY_HOTPLUG config check, from Alison ] Link: https://lore.kernel.org/linux-cxl/20250522145622.00002633@huawei.com/ #2 Reported-by: Itaru Kitayama Closes: https://github.com/pmem/ndctl/issues/278 #1 Reviewed-by: Dan Williams Tested-by: Itaru Kitayama Tested-by: Marc Herbert Signed-off-by: Jonathan Cameron Link: https://patch.msgid.link/20250527153451.82858-1-Jonathan.Cameron@huawei.com Signed-off-by: Dave Jiang --- tools/testing/cxl/config_check.c | 1 + tools/testing/cxl/test/cxl.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/testing/cxl/config_check.c b/tools/testing/cxl/config_check.c index 0902c5d6e410..a80bc2c062fe 100644 --- a/tools/testing/cxl/config_check.c +++ b/tools/testing/cxl/config_check.c @@ -14,4 +14,5 @@ void check(void) BUILD_BUG_ON(!IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)); BUILD_BUG_ON(!IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST)); BUILD_BUG_ON(!IS_ENABLED(CONFIG_DEBUG_FS)); + BUILD_BUG_ON(!IS_ENABLED(CONFIG_MEMORY_HOTPLUG)); } diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 8a5815ca870d..6a25cca5636f 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -2,6 +2,7 @@ // Copyright(c) 2021 Intel Corporation. All rights reserved. #include +#include #include #include #include @@ -1328,6 +1329,7 @@ static int cxl_mem_init(void) static __init int cxl_test_init(void) { int rc, i; + struct range mappable; cxl_acpi_test(); cxl_core_test(); @@ -1342,8 +1344,11 @@ static __init int cxl_test_init(void) rc = -ENOMEM; goto err_gen_pool_create; } + mappable = mhp_get_pluggable_range(true); - rc = gen_pool_add(cxl_mock_pool, iomem_resource.end + 1 - SZ_64G, + rc = gen_pool_add(cxl_mock_pool, + min(iomem_resource.end + 1 - SZ_64G, + mappable.end + 1 - SZ_64G), SZ_64G, NUMA_NO_NODE); if (rc) goto err_gen_pool_add; From 5af29a583a17f9699b2a6de5e8148e8349d99a46 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Mon, 9 Jun 2025 10:10:48 -0700 Subject: [PATCH 0439/2411] Documentation: cxl: fix typos and improve clarity in memory-devices.rst This patch corrects several typographical issues and improves phrasing in memory-devices.rst: - Fixes duplicate word ("1 one") and adjusts phrasing for clarity. - Adds missing hyphen in "on-device". - Corrects "a give memory device" to "a given memory device". - fix singular/plural "decoder resource" -> "decoder resources". - Clarifies "spans to Host Bridges" -> "spans two Host Bridges". - change "at a" -> "a" These changes improve readability and accuracy of the documentation. Signed-off-by: Alok Tiwari Reviewed-by: Randy Dunlap Reviewed-by: Gregory Price Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250609171130.2375901-1-alok.a.tiwari@oracle.com Signed-off-by: Dave Jiang --- Documentation/driver-api/cxl/theory-of-operation.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/driver-api/cxl/theory-of-operation.rst b/Documentation/driver-api/cxl/theory-of-operation.rst index 40793dad3630..257f513e320c 100644 --- a/Documentation/driver-api/cxl/theory-of-operation.rst +++ b/Documentation/driver-api/cxl/theory-of-operation.rst @@ -29,8 +29,8 @@ Platform firmware enumerates a menu of interleave options at the "CXL root port" (Linux term for the top of the CXL decode topology). From there, PCIe topology dictates which endpoints can participate in which Host Bridge decode regimes. Each PCIe Switch in the path between the root and an endpoint introduces a point -at which the interleave can be split. For example platform firmware may say at a -given range only decodes to 1 one Host Bridge, but that Host Bridge may in turn +at which the interleave can be split. For example, platform firmware may say a +given range only decodes to one Host Bridge, but that Host Bridge may in turn interleave cycles across multiple Root Ports. An intervening Switch between a port and an endpoint may interleave cycles across multiple Downstream Switch Ports, etc. @@ -187,7 +187,7 @@ decodes them to "ports", "ports" decode to "endpoints", and "endpoints" represent the decode from SPA (System Physical Address) to DPA (Device Physical Address). -Continuing the RAID analogy, disks have both topology metadata and on device +Continuing the RAID analogy, disks have both topology metadata and on-device metadata that determine RAID set assembly. CXL Port topology and CXL Port link status is metadata for CXL.mem set assembly. The CXL Port topology is enumerated by the arrival of a CXL.mem device. I.e. unless and until the PCIe core attaches @@ -197,7 +197,7 @@ the Linux PCI core to tear down switch-level CXL resources because the endpoint ->remove() event cleans up the port data that was established to support that Memory Expander. -The port metadata and potential decode schemes that a give memory device may +The port metadata and potential decode schemes that a given memory device may participate can be determined via a command like:: # cxl list -BDMu -d root -m mem3 @@ -249,8 +249,8 @@ participate can be determined via a command like:: ...which queries the CXL topology to ask "given CXL Memory Expander with a kernel device name of 'mem3' which platform level decode ranges may this device participate". A given expander can participate in multiple CXL.mem interleave -sets simultaneously depending on how many decoder resource it has. In this -example mem3 can participate in one or more of a PMEM interleave that spans to +sets simultaneously depending on how many decoder resources it has. In this +example mem3 can participate in one or more of a PMEM interleave that spans two Host Bridges, a PMEM interleave that targets a single Host Bridge, a Volatile memory interleave that spans 2 Host Bridges, and a Volatile memory interleave that only targets a single Host Bridge. From 7d14230db8a76c776985d510b9f27f66aedc7b14 Mon Sep 17 00:00:00 2001 From: Nai-Chen Cheng Date: Wed, 11 Jun 2025 01:31:52 +0800 Subject: [PATCH 0440/2411] Documentation: fix typo in CXL driver documentation Fix typo 'enumates' to 'enumerate' in CXL driver operation documentation to improve readability. Signed-off-by: Nai-Chen Cheng Reviewed-by: Jonathan Cameron Reviewed-by: Li Ming Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20250610173152.33566-1-bleach1827@gmail.com Signed-off-by: Dave Jiang --- Documentation/driver-api/cxl/linux/cxl-driver.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/driver-api/cxl/linux/cxl-driver.rst b/Documentation/driver-api/cxl/linux/cxl-driver.rst index 9759e90c3cf1..dd6dd17dc536 100644 --- a/Documentation/driver-api/cxl/linux/cxl-driver.rst +++ b/Documentation/driver-api/cxl/linux/cxl-driver.rst @@ -20,7 +20,7 @@ The CXL driver is split into a number of drivers. * cxl_port - initializes root and provides port enumeration interface. * cxl_acpi - initializes root decoders and interacts with ACPI data. * cxl_p/mem - initializes memory devices -* cxl_pci - uses cxl_port to enumates the actual fabric hierarchy. +* cxl_pci - uses cxl_port to enumerate the actual fabric hierarchy. Driver Devices ============== From 8ad85794be61e046697df8305de34a49791d2ed1 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Sun, 15 Jun 2025 23:07:32 -0700 Subject: [PATCH 0441/2411] cxl: docs/devices Fix typos and clarify wording in device-types.rst Fix several typos and improve comment clarity in the CXL device types docs: "w/" replaced with "with" "sill" -> "still" "The allows" -> "This allows" "capacity" corrected to "capable" "more devices" corrected to "more upstream devices" in MLD description These changes improve readability and enhance the documentation quality. [ dj: Fix up "one or more hosts" to "one or more upstream devices" from Gregory ] Signed-off-by: Alok Tiwari Reviewed-by: Gregory Price Link: https://patch.msgid.link/20250616060737.1645393-1-alok.a.tiwari@oracle.com Signed-off-by: Dave Jiang --- Documentation/driver-api/cxl/devices/device-types.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/driver-api/cxl/devices/device-types.rst b/Documentation/driver-api/cxl/devices/device-types.rst index f5e4330c1cfe..923f5d89bc04 100644 --- a/Documentation/driver-api/cxl/devices/device-types.rst +++ b/Documentation/driver-api/cxl/devices/device-types.rst @@ -63,13 +63,13 @@ A Type-2 CXL Device: * Supports cxl.io, cxl.cache, and cxl.mem protocols * Optionally implements coherent cache and Host-Managed Device Memory -* Is typically an accelerator device w/ high bandwidth memory. +* Is typically an accelerator device with high bandwidth memory. The primary difference between a type-1 and type-2 device is the presence of host-managed device memory, which allows the device to operate on a -local memory bank - while the CPU sill has coherent DMA to the same memory. +local memory bank - while the CPU still has coherent DMA to the same memory. -The allows things like GPUs to expose their memory via DAX devices or file +This allows things like GPUs to expose their memory via DAX devices or file descriptors, allows drivers and programs direct access to device memory rather than use block-transfer semantics. @@ -89,7 +89,7 @@ basic coherent DMA. Switch ------ -A CXL switch is a device capacity of routing any CXL (and by extension, PCIe) +A CXL switch is a device capable of routing any CXL (and by extension, PCIe) protocol between an upstream, downstream, or peer devices. Many devices, such as Multi-Logical Devices, imply the presence of switching in some manner. @@ -103,7 +103,7 @@ A Single-Logical Device (SLD) is a device which presents a single device to one or more heads. A Multi-Logical Device (MLD) is a device which may present multiple devices -to one or more devices. +to one or more upstream devices. A Single-Headed Device exposes only a single physical connection. From d7b9056c3a6c58d41074b7ba19ab7fd34ce9f63e Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Sun, 22 Jun 2025 11:39:16 -0700 Subject: [PATCH 0442/2411] cxl/edac: Use correct format specifier for u32 val The dev_dbg() message in cxl_set_ecs_threshold() used %d for an unsigned value, which could lead to incorrect logging. Update the format specifier to %u to match variable type. Signed-off-by: Alok Tiwari Reviewed-by: Shiju Jose Reviewed-by: Alison Schofield Reviewed-by: Ira Weiny Link: https://patch.msgid.link/20250622183919.4156343-1-alok.a.tiwari@oracle.com Signed-off-by: Dave Jiang --- drivers/cxl/core/edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/edac.c b/drivers/cxl/core/edac.c index 623aaa4439c4..cd3873750e78 100644 --- a/drivers/cxl/core/edac.c +++ b/drivers/cxl/core/edac.c @@ -697,7 +697,7 @@ static int cxl_set_ecs_threshold(struct device *dev, u8 *log_cap, u16 *config, ECS_THRESHOLD_IDX_4096); break; default: - dev_dbg(dev, "Invalid CXL ECS threshold count(%d) to set\n", + dev_dbg(dev, "Invalid CXL ECS threshold count(%u) to set\n", val); dev_dbg(dev, "Supported ECS threshold counts: %u, %u, %u\n", ECS_THRESHOLD_256, ECS_THRESHOLD_1024, From 02bb13bd6c55bffb53de8da1eae87533d332235d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:56 +0900 Subject: [PATCH 0443/2411] kconfig: gconf: always destroy dialog in on_window1_delete_event() When gtk_dialog_run() returns GTK_RESPONSE_YES or GTK_RESPONSE_NO, gtk_widget_destroy() is not called, resulting in a memory leak. It is better to always destroy the dialog, even if the application is about to exit. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap --- scripts/kconfig/gconf.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 769f38307f34..52d439a5119b 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -378,6 +378,7 @@ gboolean on_window1_delete_event(GtkWidget * widget, GdkEvent * event, { GtkWidget *dialog, *label; gint result; + gint ret = FALSE; if (!conf_get_changed()) return FALSE; @@ -404,17 +405,19 @@ gboolean on_window1_delete_event(GtkWidget * widget, GdkEvent * event, switch (result) { case GTK_RESPONSE_YES: on_save_activate(NULL, NULL); - return FALSE; + break; case GTK_RESPONSE_NO: - return FALSE; + break; case GTK_RESPONSE_CANCEL: case GTK_RESPONSE_DELETE_EVENT: default: - gtk_widget_destroy(dialog); - return TRUE; + ret = TRUE; + break; } - return FALSE; + gtk_widget_destroy(dialog); + + return ret; } From bff576a2a90954c6b242bf02d915c49f52b1e3cb Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:57 +0900 Subject: [PATCH 0444/2411] kconfig: gconf: remove old #ifdef GTK_CHECK_VERSION Remove old code. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 52d439a5119b..b2a0208b0a5f 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -849,16 +849,12 @@ on_treeview2_button_press_event(GtkWidget * widget, struct menu *menu; gint col; -#if GTK_CHECK_VERSION(2,1,4) // bug in ctree with earlier version of GTK gint tx = (gint) event->x; gint ty = (gint) event->y; gint cx, cy; gtk_tree_view_get_path_at_pos(view, tx, ty, &path, &column, &cx, &cy); -#else - gtk_tree_view_get_cursor(view, &path, &column); -#endif if (path == NULL) return FALSE; From ab2924ab5e75380b007fad1fded809b5ba650b76 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:58 +0900 Subject: [PATCH 0445/2411] kconfig: gconf: remove empty if-block This if-block is empty. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap --- scripts/kconfig/gconf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index b2a0208b0a5f..7960c456e3b9 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -913,8 +913,6 @@ on_treeview2_key_press_event(GtkWidget * widget, gtk_tree_view_expand_row(view, path, FALSE); return TRUE; } - if (event->keyval == GDK_KP_Enter) { - } if (widget == tree1_w) return FALSE; From 5575df3d3a216860db720ed5d3d1dcef33ab4d6d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:59 +0900 Subject: [PATCH 0446/2411] kconfig: gconf: remove meaningless code in init_main_window() The 'widget' variable is set, but not used in later code. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap --- scripts/kconfig/gconf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 7960c456e3b9..4b5befa4f685 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -129,7 +129,6 @@ static void init_main_window(const gchar *glade_file) conf_set_changed_callback(conf_changed); style = gtk_widget_get_style(main_wnd); - widget = glade_xml_get_widget(xml, "toolbar1"); replace_button_icon(xml, main_wnd->window, style, "button4", (gchar **) xpm_single_view); From 08726436886e05c46efcb0655018a74c534ddead Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:00 +0900 Subject: [PATCH 0447/2411] kconfig: gconf: remove unneeded gtk_tree_view_set_headers_visible() calls The headers are visible by default. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 4b5befa4f685..32e5e9054846 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -189,7 +189,6 @@ static void init_left_tree(void) GtkTreeViewColumn *column; gtk_tree_view_set_model(view, model1); - gtk_tree_view_set_headers_visible(view, TRUE); gtk_tree_view_set_rules_hint(view, TRUE); column = gtk_tree_view_column_new(); @@ -232,7 +231,6 @@ static void init_right_tree(void) gint i; gtk_tree_view_set_model(view, model2); - gtk_tree_view_set_headers_visible(view, TRUE); gtk_tree_view_set_rules_hint(view, TRUE); column = gtk_tree_view_column_new(); From ede0a43249d47660ca977c90a279b6cfc9da314a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:01 +0900 Subject: [PATCH 0448/2411] kconfig: gconf: remove gtk_tree_view_column_set_visible() calls The columns are visible by default. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 32e5e9054846..a027f0f10af9 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -297,17 +297,6 @@ static void init_right_tree(void) g_signal_connect(G_OBJECT(renderer), "edited", G_CALLBACK(renderer_edited), NULL); - column = gtk_tree_view_get_column(view, COL_NAME); - gtk_tree_view_column_set_visible(column, show_name); - column = gtk_tree_view_get_column(view, COL_NO); - gtk_tree_view_column_set_visible(column, show_range); - column = gtk_tree_view_get_column(view, COL_MOD); - gtk_tree_view_column_set_visible(column, show_range); - column = gtk_tree_view_get_column(view, COL_YES); - gtk_tree_view_column_set_visible(column, show_range); - column = gtk_tree_view_get_column(view, COL_VALUE); - gtk_tree_view_column_set_visible(column, show_value); - for (i = 0; i < COL_VALUE; i++) { column = gtk_tree_view_get_column(view, i); gtk_tree_view_column_set_resizable(column, TRUE); From dc1de6c03bc67ff918d904c7f239eaebea34b99b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:02 +0900 Subject: [PATCH 0449/2411] kconfig: gconf: remove gtk_widget_realize() calls This function is primarily used in widget implementations, and isn't very useful otherwise. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index a027f0f10af9..3f9b9957f089 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -215,7 +215,6 @@ static void init_left_tree(void) sel = gtk_tree_view_get_selection(view); gtk_tree_selection_set_mode(sel, GTK_SELECTION_SINGLE); - gtk_widget_realize(tree1_w); } static void renderer_edited(GtkCellRendererText * cell, @@ -967,7 +966,6 @@ on_treeview1_button_press_event(GtkWidget * widget, display_tree_part(); } - gtk_widget_realize(tree2_w); gtk_tree_view_set_cursor(view, path, NULL, FALSE); gtk_widget_grab_focus(tree2_w); From ace8bee8369c209b647600c3dc28b529e4f44966 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:03 +0900 Subject: [PATCH 0450/2411] kconfig: gconf: remove gtk_tree_view_set_rules_hint() calls The use of the this function is not recommended, and it has been deprecated since GTK 3.14. [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/3.14.0/gtk/gtktreeview.c#L11891 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 3f9b9957f089..4bbc8f87deb6 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -189,7 +189,6 @@ static void init_left_tree(void) GtkTreeViewColumn *column; gtk_tree_view_set_model(view, model1); - gtk_tree_view_set_rules_hint(view, TRUE); column = gtk_tree_view_column_new(); gtk_tree_view_append_column(view, column); @@ -230,7 +229,6 @@ static void init_right_tree(void) gint i; gtk_tree_view_set_model(view, model2); - gtk_tree_view_set_rules_hint(view, TRUE); column = gtk_tree_view_column_new(); gtk_tree_view_append_column(view, column); From f931a5d37a17f941492392b13f0ad67b1bac8bef Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:04 +0900 Subject: [PATCH 0451/2411] kconfig: gconf: remove unnecessary gtk_set_locale() call gtk_set_locale() has been deprecated since version 2.24, and setlocale() should be used directly. [1] However, gtk_init() automatically does this, so there is typically no point in calling this function. [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/gtk/gtkmain.c#L1152 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 4bbc8f87deb6..3e632a325c10 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -1367,7 +1367,6 @@ int main(int ac, char *av[]) gchar *glade_file; /* GTK stuffs */ - gtk_set_locale(); gtk_init(&ac, &av); glade_init(); From a54b0397d36706ce6f60e0e56709ad94791eda45 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:05 +0900 Subject: [PATCH 0452/2411] kconfig: gconf: remove internal-child="image" nodes from glade These nodes do not appear to serve anything useful. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.glade | 84 ------------------------------------- 1 file changed, 84 deletions(-) diff --git a/scripts/kconfig/gconf.glade b/scripts/kconfig/gconf.glade index aa483cb32755..19b80f2ec1ff 100644 --- a/scripts/kconfig/gconf.glade +++ b/scripts/kconfig/gconf.glade @@ -48,18 +48,6 @@ True - - - - True - gtk-open - 1 - 0.5 - 0.5 - 0 - 0 - - @@ -71,18 +59,6 @@ True - - - - True - gtk-save - 1 - 0.5 - 0.5 - 0 - 0 - - @@ -93,18 +69,6 @@ Save _as True - - - - True - gtk-save-as - 1 - 0.5 - 0.5 - 0 - 0 - - @@ -121,18 +85,6 @@ True - - - - True - gtk-quit - 1 - 0.5 - 0.5 - 0 - 0 - - @@ -244,18 +196,6 @@ True - - - - True - gtk-dialog-question - 1 - 0.5 - 0.5 - 0 - 0 - - @@ -266,18 +206,6 @@ True - - - - True - gtk-properties - 1 - 0.5 - 0.5 - 0 - 0 - - @@ -287,18 +215,6 @@ _License True - - - - True - gtk-justify-fill - 1 - 0.5 - 0.5 - 0 - 0 - - From b3841b501c4a58ba20f190afd25aa4b93cd664f9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:06 +0900 Subject: [PATCH 0453/2411] kconfig: gconf: remove parents[] array and indent variable The parents[] array is used to store the GtkTreeIter of parent nodes, but this can be simplified: we can pass a GtkTreeIter pointer down when _display_tree() recurses. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 49 ++++++++++++----------------------------- 1 file changed, 14 insertions(+), 35 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 3e632a325c10..432a467e3250 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -47,8 +47,6 @@ GdkColor color; GtkTreeStore *tree1, *tree2, *tree; GtkTreeModel *model1, *model2; -static GtkTreeIter *parents[256]; -static gint indent; static struct menu *current; // current node for SINGLE view static struct menu *browsed; // browsed node for SPLIT view @@ -153,8 +151,6 @@ static void init_main_window(const gchar *glade_file) static void init_tree_model(void) { - gint i; - tree = tree2 = gtk_tree_store_new(COL_NUMBER, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, @@ -166,9 +162,6 @@ static void init_tree_model(void) G_TYPE_BOOLEAN); model2 = GTK_TREE_MODEL(tree2); - for (parents[0] = NULL, i = 1; i < 256; i++) - parents[i] = (GtkTreeIter *) g_malloc(sizeof(GtkTreeIter)); - tree1 = gtk_tree_store_new(COL_NUMBER, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, @@ -1131,18 +1124,6 @@ static void set_node(GtkTreeIter * node, struct menu *menu, gchar ** row) g_object_unref(pix); } - -/* Add a node to the tree */ -static void place_node(struct menu *menu, char **row) -{ - GtkTreeIter *parent = parents[indent - 1]; - GtkTreeIter *node = parents[indent]; - - gtk_tree_store_append(tree, node, parent); - set_node(node, menu, row); -} - - /* Find a node in the GTK+ tree */ static GtkTreeIter found; @@ -1193,9 +1174,6 @@ static void update_tree(struct menu *src, GtkTreeIter * dst) struct symbol *sym; struct menu *menu1, *menu2; - if (src == &rootmenu) - indent = 1; - valid = gtk_tree_model_iter_children(model2, child2, dst); for (child1 = src->list; child1; child1 = child1->next) { @@ -1253,9 +1231,7 @@ static void update_tree(struct menu *src, GtkTreeIter * dst) set_node(child2, menu1, fill_row(menu1)); } - indent++; update_tree(child1, child2); - indent--; valid = gtk_tree_model_iter_next(model2, child2); } @@ -1263,16 +1239,15 @@ static void update_tree(struct menu *src, GtkTreeIter * dst) /* Display the whole tree (single/split/full view) */ -static void display_tree(struct menu *menu) +static void _display_tree(struct menu *menu, GtkTreeIter *parent) { struct property *prop; struct menu *child; enum prop_type ptype; + GtkTreeIter iter; - if (menu == &rootmenu) { - indent = 1; + if (menu == &rootmenu) current = &rootmenu; - } for (child = menu->list; child; child = child->next) { prop = child->prompt; @@ -1290,8 +1265,10 @@ static void display_tree(struct menu *menu) if ((opt_mode == OPT_NORMAL && menu_is_visible(child)) || (opt_mode == OPT_PROMPT && menu_has_prompt(child)) || - (opt_mode == OPT_ALL && menu_get_prompt(child))) - place_node(child, fill_row(child)); + (opt_mode == OPT_ALL && menu_get_prompt(child))) { + gtk_tree_store_append(tree, &iter, parent); + set_node(&iter, child, fill_row(child)); + } if ((view_mode != FULL_VIEW) && (ptype == P_MENU) && (tree == tree2)) @@ -1308,14 +1285,16 @@ static void display_tree(struct menu *menu) if (((view_mode == SINGLE_VIEW) && (menu->flags & MENU_ROOT)) || (view_mode == FULL_VIEW) - || (view_mode == SPLIT_VIEW)) { - indent++; - display_tree(child); - indent--; - } + || (view_mode == SPLIT_VIEW)) + _display_tree(child, &iter); } } +static void display_tree(struct menu *menu) +{ + _display_tree(menu, NULL); +} + /* Display a part of the tree starting at current node (single/split view) */ static void display_tree_part(void) { From 9b8338fd45fc10961130bc2477aa72e4484e2732 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:07 +0900 Subject: [PATCH 0454/2411] kconfig: gconf: remove unnecessary NULL checks for tree1 and tree2 The tree1 and tree2 variables are initialized earlier in init_tree_model(), so the NULL checks are redundant. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 432a467e3250..2ab000adcced 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -678,8 +678,7 @@ void on_split_clicked(GtkButton * button, gpointer user_data) gtk_widget_show(tree1_w); gtk_window_get_default_size(GTK_WINDOW(main_wnd), &w, &h); gtk_paned_set_position(GTK_PANED(hpaned), w / 2); - if (tree2) - gtk_tree_store_clear(tree2); + gtk_tree_store_clear(tree2); display_list(); /* Disable back btn, like in full mode. */ @@ -691,8 +690,7 @@ void on_full_clicked(GtkButton * button, gpointer user_data) { view_mode = FULL_VIEW; gtk_widget_hide(tree1_w); - if (tree2) - gtk_tree_store_clear(tree2); + gtk_tree_store_clear(tree2); display_tree(&rootmenu); gtk_widget_set_sensitive(back_btn, FALSE); } @@ -1298,8 +1296,7 @@ static void display_tree(struct menu *menu) /* Display a part of the tree starting at current node (single/split view) */ static void display_tree_part(void) { - if (tree2) - gtk_tree_store_clear(tree2); + gtk_tree_store_clear(tree2); if (view_mode == SINGLE_VIEW) display_tree(current); else if (view_mode == SPLIT_VIEW) @@ -1312,8 +1309,7 @@ static void display_tree_part(void) /* Display the list in the left frame (split view) */ static void display_list(void) { - if (tree1) - gtk_tree_store_clear(tree1); + gtk_tree_store_clear(tree1); tree = tree1; display_tree(&rootmenu); From 8e3136eb27211eaf0560543dd0ee4698c6eb751f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:08 +0900 Subject: [PATCH 0455/2411] kconfig: gconf: remove unneeded variable in on_split_clicked() The height of the window is not used here. Passing NULL to gtk_window_get_default_size() is allowed. [1] [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/gtk/gtkwindow.c#L3974 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 2ab000adcced..c78eded5c01b 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -673,10 +673,10 @@ void on_single_clicked(GtkButton * button, gpointer user_data) void on_split_clicked(GtkButton * button, gpointer user_data) { - gint w, h; + gint w; view_mode = SPLIT_VIEW; gtk_widget_show(tree1_w); - gtk_window_get_default_size(GTK_WINDOW(main_wnd), &w, &h); + gtk_window_get_default_size(GTK_WINDOW(main_wnd), &w, NULL); gtk_paned_set_position(GTK_PANED(hpaned), w / 2); gtk_tree_store_clear(tree2); display_list(); From 57b63d17f73e2d5576e57521fb10307b91439b72 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:09 +0900 Subject: [PATCH 0456/2411] kconfig: gconf: remove unneeded variables in on_treeview*_button_press_event() Not all position parameters are used here. Passing NULL to gtk_tree_view_get_cursor() or gtk_tree_view_get_path_at_pos() is allowed. [1] [2] [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/gtk/gtktreeview.c#L12638 [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/gtk/gtktreeview.c#L12795 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index c78eded5c01b..ab2e0df21037 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -822,13 +822,10 @@ on_treeview2_button_press_event(GtkWidget * widget, GtkTreeIter iter; struct menu *menu; gint col; - gint tx = (gint) event->x; gint ty = (gint) event->y; - gint cx, cy; - gtk_tree_view_get_path_at_pos(view, tx, ty, &path, &column, &cx, - &cy); + gtk_tree_view_get_path_at_pos(view, tx, ty, &path, &column, NULL, NULL); if (path == NULL) return FALSE; @@ -871,12 +868,11 @@ on_treeview2_key_press_event(GtkWidget * widget, { GtkTreeView *view = GTK_TREE_VIEW(widget); GtkTreePath *path; - GtkTreeViewColumn *column; GtkTreeIter iter; struct menu *menu; gint col; - gtk_tree_view_get_cursor(view, &path, &column); + gtk_tree_view_get_cursor(view, &path, NULL); if (path == NULL) return FALSE; @@ -930,16 +926,12 @@ on_treeview1_button_press_event(GtkWidget * widget, { GtkTreeView *view = GTK_TREE_VIEW(widget); GtkTreePath *path; - GtkTreeViewColumn *column; GtkTreeIter iter; struct menu *menu; - gint tx = (gint) event->x; gint ty = (gint) event->y; - gint cx, cy; - gtk_tree_view_get_path_at_pos(view, tx, ty, &path, &column, &cx, - &cy); + gtk_tree_view_get_path_at_pos(view, tx, ty, &path, NULL, NULL, NULL); if (path == NULL) return FALSE; From b4809e25e2bf05de6398830f0990abe5dff49ac5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:10 +0900 Subject: [PATCH 0457/2411] kconfig: gconf: remove unused 'color' variable This is not used at all. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index ab2e0df21037..4fff931f34fc 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -43,7 +43,6 @@ GtkWidget *save_btn = NULL; GtkWidget *save_menu_item = NULL; GtkTextTag *tag1, *tag2; -GdkColor color; GtkTreeStore *tree1, *tree2, *tree; GtkTreeModel *model1, *model2; From 290fc035dfeb07fcec57b09d888f837531333af7 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:11 +0900 Subject: [PATCH 0458/2411] kconfig: gconf: add static qualifiers to variables I also removed unnecessary initializers. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 4fff931f34fc..0d5a02706bcb 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -32,20 +32,20 @@ static gboolean show_range = TRUE; static gboolean show_value = TRUE; static int opt_mode = OPT_NORMAL; -GtkWidget *main_wnd = NULL; -GtkWidget *tree1_w = NULL; // left frame -GtkWidget *tree2_w = NULL; // right frame -GtkWidget *text_w = NULL; -GtkWidget *hpaned = NULL; -GtkWidget *vpaned = NULL; -GtkWidget *back_btn = NULL; -GtkWidget *save_btn = NULL; -GtkWidget *save_menu_item = NULL; +static GtkWidget *main_wnd; +static GtkWidget *tree1_w; // left frame +static GtkWidget *tree2_w; // right frame +static GtkWidget *text_w; +static GtkWidget *hpaned; +static GtkWidget *vpaned; +static GtkWidget *back_btn; +static GtkWidget *save_btn; +static GtkWidget *save_menu_item; -GtkTextTag *tag1, *tag2; +static GtkTextTag *tag1, *tag2; -GtkTreeStore *tree1, *tree2, *tree; -GtkTreeModel *model1, *model2; +static GtkTreeStore *tree1, *tree2, *tree; +static GtkTreeModel *model1, *model2; static struct menu *current; // current node for SINGLE view static struct menu *browsed; // browsed node for SPLIT view From 7ef533938e6cd7d0e33e1c24389c34a21221979b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:12 +0900 Subject: [PATCH 0459/2411] kconfig: gconf: move init_*() functions below This allows removal of the forward declaration of renderer_edited(). Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 450 ++++++++++++++++++++-------------------- 1 file changed, 222 insertions(+), 228 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 0d5a02706bcb..9d06c050b270 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -62,240 +62,12 @@ static void display_tree(struct menu *menu); static void display_tree_part(void); static void update_tree(struct menu *src, GtkTreeIter * dst); -static void replace_button_icon(GladeXML *xml, GdkDrawable *window, - GtkStyle *style, gchar *btn_name, gchar **xpm) -{ - GdkPixmap *pixmap; - GdkBitmap *mask; - GtkToolButton *button; - GtkWidget *image; - - pixmap = gdk_pixmap_create_from_xpm_d(window, &mask, - &style->bg[GTK_STATE_NORMAL], - xpm); - - button = GTK_TOOL_BUTTON(glade_xml_get_widget(xml, btn_name)); - image = gtk_image_new_from_pixmap(pixmap, mask); - gtk_widget_show(image); - gtk_tool_button_set_icon_widget(button, image); -} - static void conf_changed(bool dirty) { gtk_widget_set_sensitive(save_btn, dirty); gtk_widget_set_sensitive(save_menu_item, dirty); } -/* Main Window Initialization */ -static void init_main_window(const gchar *glade_file) -{ - GladeXML *xml; - GtkWidget *widget; - GtkTextBuffer *txtbuf; - GtkStyle *style; - - xml = glade_xml_new(glade_file, "window1", NULL); - if (!xml) - g_error("GUI loading failed !\n"); - glade_xml_signal_autoconnect(xml); - - main_wnd = glade_xml_get_widget(xml, "window1"); - hpaned = glade_xml_get_widget(xml, "hpaned1"); - vpaned = glade_xml_get_widget(xml, "vpaned1"); - tree1_w = glade_xml_get_widget(xml, "treeview1"); - tree2_w = glade_xml_get_widget(xml, "treeview2"); - text_w = glade_xml_get_widget(xml, "textview3"); - - back_btn = glade_xml_get_widget(xml, "button1"); - gtk_widget_set_sensitive(back_btn, FALSE); - - widget = glade_xml_get_widget(xml, "show_name1"); - gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, - show_name); - - widget = glade_xml_get_widget(xml, "show_range1"); - gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, - show_range); - - widget = glade_xml_get_widget(xml, "show_data1"); - gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, - show_value); - - save_btn = glade_xml_get_widget(xml, "button3"); - save_menu_item = glade_xml_get_widget(xml, "save1"); - conf_set_changed_callback(conf_changed); - - style = gtk_widget_get_style(main_wnd); - - replace_button_icon(xml, main_wnd->window, style, - "button4", (gchar **) xpm_single_view); - replace_button_icon(xml, main_wnd->window, style, - "button5", (gchar **) xpm_split_view); - replace_button_icon(xml, main_wnd->window, style, - "button6", (gchar **) xpm_tree_view); - - txtbuf = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_w)); - tag1 = gtk_text_buffer_create_tag(txtbuf, "mytag1", - "foreground", "red", - "weight", PANGO_WEIGHT_BOLD, - NULL); - tag2 = gtk_text_buffer_create_tag(txtbuf, "mytag2", - /*"style", PANGO_STYLE_OBLIQUE, */ - NULL); - - gtk_window_set_title(GTK_WINDOW(main_wnd), rootmenu.prompt->text); - - gtk_widget_show(main_wnd); -} - -static void init_tree_model(void) -{ - tree = tree2 = gtk_tree_store_new(COL_NUMBER, - G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_POINTER, GDK_TYPE_COLOR, - G_TYPE_BOOLEAN, GDK_TYPE_PIXBUF, - G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, - G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, - G_TYPE_BOOLEAN); - model2 = GTK_TREE_MODEL(tree2); - - tree1 = gtk_tree_store_new(COL_NUMBER, - G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_POINTER, GDK_TYPE_COLOR, - G_TYPE_BOOLEAN, GDK_TYPE_PIXBUF, - G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, - G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, - G_TYPE_BOOLEAN); - model1 = GTK_TREE_MODEL(tree1); -} - -static void init_left_tree(void) -{ - GtkTreeView *view = GTK_TREE_VIEW(tree1_w); - GtkCellRenderer *renderer; - GtkTreeSelection *sel; - GtkTreeViewColumn *column; - - gtk_tree_view_set_model(view, model1); - - column = gtk_tree_view_column_new(); - gtk_tree_view_append_column(view, column); - gtk_tree_view_column_set_title(column, "Options"); - - renderer = gtk_cell_renderer_toggle_new(); - gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), - renderer, FALSE); - gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), - renderer, - "active", COL_BTNACT, - "inconsistent", COL_BTNINC, - "visible", COL_BTNVIS, - "radio", COL_BTNRAD, NULL); - renderer = gtk_cell_renderer_text_new(); - gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), - renderer, FALSE); - gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), - renderer, - "text", COL_OPTION, - "foreground-gdk", - COL_COLOR, NULL); - - sel = gtk_tree_view_get_selection(view); - gtk_tree_selection_set_mode(sel, GTK_SELECTION_SINGLE); -} - -static void renderer_edited(GtkCellRendererText * cell, - const gchar * path_string, - const gchar * new_text, gpointer user_data); - -static void init_right_tree(void) -{ - GtkTreeView *view = GTK_TREE_VIEW(tree2_w); - GtkCellRenderer *renderer; - GtkTreeSelection *sel; - GtkTreeViewColumn *column; - gint i; - - gtk_tree_view_set_model(view, model2); - - column = gtk_tree_view_column_new(); - gtk_tree_view_append_column(view, column); - gtk_tree_view_column_set_title(column, "Options"); - - renderer = gtk_cell_renderer_pixbuf_new(); - gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), - renderer, FALSE); - gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), - renderer, - "pixbuf", COL_PIXBUF, - "visible", COL_PIXVIS, NULL); - renderer = gtk_cell_renderer_toggle_new(); - gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), - renderer, FALSE); - gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), - renderer, - "active", COL_BTNACT, - "inconsistent", COL_BTNINC, - "visible", COL_BTNVIS, - "radio", COL_BTNRAD, NULL); - renderer = gtk_cell_renderer_text_new(); - gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), - renderer, FALSE); - gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), - renderer, - "text", COL_OPTION, - "foreground-gdk", - COL_COLOR, NULL); - - renderer = gtk_cell_renderer_text_new(); - gtk_tree_view_insert_column_with_attributes(view, -1, - "Name", renderer, - "text", COL_NAME, - "foreground-gdk", - COL_COLOR, NULL); - renderer = gtk_cell_renderer_text_new(); - gtk_tree_view_insert_column_with_attributes(view, -1, - "N", renderer, - "text", COL_NO, - "foreground-gdk", - COL_COLOR, NULL); - renderer = gtk_cell_renderer_text_new(); - gtk_tree_view_insert_column_with_attributes(view, -1, - "M", renderer, - "text", COL_MOD, - "foreground-gdk", - COL_COLOR, NULL); - renderer = gtk_cell_renderer_text_new(); - gtk_tree_view_insert_column_with_attributes(view, -1, - "Y", renderer, - "text", COL_YES, - "foreground-gdk", - COL_COLOR, NULL); - renderer = gtk_cell_renderer_text_new(); - gtk_tree_view_insert_column_with_attributes(view, -1, - "Value", renderer, - "text", COL_VALUE, - "editable", - COL_EDIT, - "foreground-gdk", - COL_COLOR, NULL); - g_signal_connect(G_OBJECT(renderer), "edited", - G_CALLBACK(renderer_edited), NULL); - - for (i = 0; i < COL_VALUE; i++) { - column = gtk_tree_view_get_column(view, i); - gtk_tree_view_column_set_resizable(column, TRUE); - } - - sel = gtk_tree_view_get_selection(view); - gtk_tree_selection_set_mode(sel, GTK_SELECTION_SINGLE); -} - - /* Utility Functions */ @@ -1324,6 +1096,228 @@ static void fixup_rootmenu(struct menu *menu) } } +/* Main Window Initialization */ +static void replace_button_icon(GladeXML *xml, GdkDrawable *window, + GtkStyle *style, gchar *btn_name, gchar **xpm) +{ + GdkPixmap *pixmap; + GdkBitmap *mask; + GtkToolButton *button; + GtkWidget *image; + + pixmap = gdk_pixmap_create_from_xpm_d(window, &mask, + &style->bg[GTK_STATE_NORMAL], + xpm); + + button = GTK_TOOL_BUTTON(glade_xml_get_widget(xml, btn_name)); + image = gtk_image_new_from_pixmap(pixmap, mask); + gtk_widget_show(image); + gtk_tool_button_set_icon_widget(button, image); +} + +static void init_main_window(const gchar *glade_file) +{ + GladeXML *xml; + GtkWidget *widget; + GtkTextBuffer *txtbuf; + GtkStyle *style; + + xml = glade_xml_new(glade_file, "window1", NULL); + if (!xml) + g_error("GUI loading failed !\n"); + glade_xml_signal_autoconnect(xml); + + main_wnd = glade_xml_get_widget(xml, "window1"); + hpaned = glade_xml_get_widget(xml, "hpaned1"); + vpaned = glade_xml_get_widget(xml, "vpaned1"); + tree1_w = glade_xml_get_widget(xml, "treeview1"); + tree2_w = glade_xml_get_widget(xml, "treeview2"); + text_w = glade_xml_get_widget(xml, "textview3"); + + back_btn = glade_xml_get_widget(xml, "button1"); + gtk_widget_set_sensitive(back_btn, FALSE); + + widget = glade_xml_get_widget(xml, "show_name1"); + gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, + show_name); + + widget = glade_xml_get_widget(xml, "show_range1"); + gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, + show_range); + + widget = glade_xml_get_widget(xml, "show_data1"); + gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, + show_value); + + save_btn = glade_xml_get_widget(xml, "button3"); + save_menu_item = glade_xml_get_widget(xml, "save1"); + conf_set_changed_callback(conf_changed); + + style = gtk_widget_get_style(main_wnd); + + replace_button_icon(xml, main_wnd->window, style, + "button4", (gchar **) xpm_single_view); + replace_button_icon(xml, main_wnd->window, style, + "button5", (gchar **) xpm_split_view); + replace_button_icon(xml, main_wnd->window, style, + "button6", (gchar **) xpm_tree_view); + + txtbuf = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_w)); + tag1 = gtk_text_buffer_create_tag(txtbuf, "mytag1", + "foreground", "red", + "weight", PANGO_WEIGHT_BOLD, + NULL); + tag2 = gtk_text_buffer_create_tag(txtbuf, "mytag2", + /*"style", PANGO_STYLE_OBLIQUE, */ + NULL); + + gtk_window_set_title(GTK_WINDOW(main_wnd), rootmenu.prompt->text); + + gtk_widget_show(main_wnd); +} + +static void init_tree_model(void) +{ + tree = tree2 = gtk_tree_store_new(COL_NUMBER, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_POINTER, GDK_TYPE_COLOR, + G_TYPE_BOOLEAN, GDK_TYPE_PIXBUF, + G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, + G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, + G_TYPE_BOOLEAN); + model2 = GTK_TREE_MODEL(tree2); + + tree1 = gtk_tree_store_new(COL_NUMBER, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_POINTER, GDK_TYPE_COLOR, + G_TYPE_BOOLEAN, GDK_TYPE_PIXBUF, + G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, + G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, + G_TYPE_BOOLEAN); + model1 = GTK_TREE_MODEL(tree1); +} + +static void init_left_tree(void) +{ + GtkTreeView *view = GTK_TREE_VIEW(tree1_w); + GtkCellRenderer *renderer; + GtkTreeSelection *sel; + GtkTreeViewColumn *column; + + gtk_tree_view_set_model(view, model1); + + column = gtk_tree_view_column_new(); + gtk_tree_view_append_column(view, column); + gtk_tree_view_column_set_title(column, "Options"); + + renderer = gtk_cell_renderer_toggle_new(); + gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), + renderer, FALSE); + gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), + renderer, + "active", COL_BTNACT, + "inconsistent", COL_BTNINC, + "visible", COL_BTNVIS, + "radio", COL_BTNRAD, NULL); + renderer = gtk_cell_renderer_text_new(); + gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), + renderer, FALSE); + gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), + renderer, + "text", COL_OPTION, + "foreground-gdk", + COL_COLOR, NULL); + + sel = gtk_tree_view_get_selection(view); + gtk_tree_selection_set_mode(sel, GTK_SELECTION_SINGLE); +} + +static void init_right_tree(void) +{ + GtkTreeView *view = GTK_TREE_VIEW(tree2_w); + GtkCellRenderer *renderer; + GtkTreeSelection *sel; + GtkTreeViewColumn *column; + gint i; + + gtk_tree_view_set_model(view, model2); + + column = gtk_tree_view_column_new(); + gtk_tree_view_append_column(view, column); + gtk_tree_view_column_set_title(column, "Options"); + + renderer = gtk_cell_renderer_pixbuf_new(); + gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), + renderer, FALSE); + gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), + renderer, + "pixbuf", COL_PIXBUF, + "visible", COL_PIXVIS, NULL); + renderer = gtk_cell_renderer_toggle_new(); + gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), + renderer, FALSE); + gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), + renderer, + "active", COL_BTNACT, + "inconsistent", COL_BTNINC, + "visible", COL_BTNVIS, + "radio", COL_BTNRAD, NULL); + renderer = gtk_cell_renderer_text_new(); + gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), + renderer, FALSE); + gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), + renderer, + "text", COL_OPTION, + "foreground-gdk", + COL_COLOR, NULL); + + renderer = gtk_cell_renderer_text_new(); + gtk_tree_view_insert_column_with_attributes(view, -1, + "Name", renderer, + "text", COL_NAME, + "foreground-gdk", + COL_COLOR, NULL); + renderer = gtk_cell_renderer_text_new(); + gtk_tree_view_insert_column_with_attributes(view, -1, + "N", renderer, + "text", COL_NO, + "foreground-gdk", + COL_COLOR, NULL); + renderer = gtk_cell_renderer_text_new(); + gtk_tree_view_insert_column_with_attributes(view, -1, + "M", renderer, + "text", COL_MOD, + "foreground-gdk", + COL_COLOR, NULL); + renderer = gtk_cell_renderer_text_new(); + gtk_tree_view_insert_column_with_attributes(view, -1, + "Y", renderer, + "text", COL_YES, + "foreground-gdk", + COL_COLOR, NULL); + renderer = gtk_cell_renderer_text_new(); + gtk_tree_view_insert_column_with_attributes(view, -1, + "Value", renderer, + "text", COL_VALUE, + "editable", + COL_EDIT, + "foreground-gdk", + COL_COLOR, NULL); + g_signal_connect(G_OBJECT(renderer), "edited", + G_CALLBACK(renderer_edited), NULL); + + for (i = 0; i < COL_VALUE; i++) { + column = gtk_tree_view_get_column(view, i); + gtk_tree_view_column_set_resizable(column, TRUE); + } + + sel = gtk_tree_view_get_selection(view); + gtk_tree_selection_set_mode(sel, GTK_SELECTION_SINGLE); +} /* Main */ int main(int ac, char *av[]) From 2dedf83d54c6248317ce86d9fc677f89e37ab04c Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Sat, 28 Jun 2025 18:49:54 +0200 Subject: [PATCH 0460/2411] rust: dma: require mutable reference for as_slice_mut() and write() Given the safety requirements of as_slice_mut() and write() taking an immutable reference is technically not incorrect. However, let's leverage the compiler's capabilities and require a mutable reference to ensure exclusive access. This also fixes a clippy warning introduced with 1.88: warning: mutable borrow from immutable input(s) --> rust/kernel/dma.rs:297:78 | 297 | pub unsafe fn as_slice_mut(&self, offset: usize, count: usize) -> Result<&mut [T]> { | ^^^^^^^^ Fixes: d37a39f607c4 ("rust: dma: add as_slice/write functions for CoherentAllocation") Reviewed-by: Alice Ryhl Reviewed-by: Andreas Hindborg Reviewed-by: Abdiel Janulgue Reviewed-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250628165120.90149-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/dma.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index 25dfa0e6cc3c..2ac4c47aeed3 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -294,7 +294,7 @@ pub unsafe fn as_slice(&self, offset: usize, count: usize) -> Result<&[T]> { /// slice is live. /// * Callers must ensure that this call does not race with a read or write to the same region /// while the returned slice is live. - pub unsafe fn as_slice_mut(&self, offset: usize, count: usize) -> Result<&mut [T]> { + pub unsafe fn as_slice_mut(&mut self, offset: usize, count: usize) -> Result<&mut [T]> { self.validate_range(offset, count)?; // SAFETY: // - The pointer is valid due to type invariant on `CoherentAllocation`, @@ -326,7 +326,7 @@ pub unsafe fn as_slice_mut(&self, offset: usize, count: usize) -> Result<&mut [T /// unsafe { alloc.write(buf, 0)?; } /// # Ok::<(), Error>(()) } /// ``` - pub unsafe fn write(&self, src: &[T], offset: usize) -> Result { + pub unsafe fn write(&mut self, src: &[T], offset: usize) -> Result { self.validate_range(offset, src.len())?; // SAFETY: // - The pointer is valid due to type invariant on `CoherentAllocation` From a60d92f6d941bd77bf3aaec724a7c95857c0165b Mon Sep 17 00:00:00 2001 From: Mayank Rana Date: Mon, 16 Jun 2025 15:42:56 -0700 Subject: [PATCH 0461/2411] PCI: dwc: Export DWC MSI controller related APIs Export dw_pcie_msi_host_init(), dw_pcie_msi_init(), and dw_pcie_free_msi() APIs to allow them to be reused by the upcoming DWC based ECAM driver implementation. Also, move MSI IRQ related initialization code to dw_pcie_msi_init(), as this code must be executed before dw_pcie_msi_init() API can be used with ECAM driver. Signed-off-by: Mayank Rana [mani: commit message rewording] Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250616224259.3549811-2-mayank.rana@oss.qualcomm.com --- .../pci/controller/dwc/pcie-designware-host.c | 38 ++++++++++--------- drivers/pci/controller/dwc/pcie-designware.h | 14 +++++++ 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index 906277f9ffaf..af6c91ec7312 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -250,7 +250,7 @@ int dw_pcie_allocate_domains(struct dw_pcie_rp *pp) return 0; } -static void dw_pcie_free_msi(struct dw_pcie_rp *pp) +void dw_pcie_free_msi(struct dw_pcie_rp *pp) { u32 ctrl; @@ -263,19 +263,34 @@ static void dw_pcie_free_msi(struct dw_pcie_rp *pp) irq_domain_remove(pp->msi_domain); irq_domain_remove(pp->irq_domain); } +EXPORT_SYMBOL_GPL(dw_pcie_free_msi); -static void dw_pcie_msi_init(struct dw_pcie_rp *pp) +void dw_pcie_msi_init(struct dw_pcie_rp *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); u64 msi_target = (u64)pp->msi_data; + u32 ctrl, num_ctrls; if (!pci_msi_enabled() || !pp->has_msi_ctrl) return; + num_ctrls = pp->num_vectors / MAX_MSI_IRQS_PER_CTRL; + + /* Initialize IRQ Status array */ + for (ctrl = 0; ctrl < num_ctrls; ctrl++) { + dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK + + (ctrl * MSI_REG_CTRL_BLOCK_SIZE), + pp->irq_mask[ctrl]); + dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_ENABLE + + (ctrl * MSI_REG_CTRL_BLOCK_SIZE), + ~0); + } + /* Program the msi_data */ dw_pcie_writel_dbi(pci, PCIE_MSI_ADDR_LO, lower_32_bits(msi_target)); dw_pcie_writel_dbi(pci, PCIE_MSI_ADDR_HI, upper_32_bits(msi_target)); } +EXPORT_SYMBOL_GPL(dw_pcie_msi_init); static int dw_pcie_parse_split_msi_irq(struct dw_pcie_rp *pp) { @@ -317,7 +332,7 @@ static int dw_pcie_parse_split_msi_irq(struct dw_pcie_rp *pp) return 0; } -static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp) +int dw_pcie_msi_host_init(struct dw_pcie_rp *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct device *dev = pci->dev; @@ -391,6 +406,7 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp) return 0; } +EXPORT_SYMBOL_GPL(dw_pcie_msi_host_init); static void dw_pcie_host_request_msg_tlp_res(struct dw_pcie_rp *pp) { @@ -909,7 +925,7 @@ static void dw_pcie_config_presets(struct dw_pcie_rp *pp) int dw_pcie_setup_rc(struct dw_pcie_rp *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); - u32 val, ctrl, num_ctrls; + u32 val; int ret; /* @@ -920,20 +936,6 @@ int dw_pcie_setup_rc(struct dw_pcie_rp *pp) dw_pcie_setup(pci); - if (pp->has_msi_ctrl) { - num_ctrls = pp->num_vectors / MAX_MSI_IRQS_PER_CTRL; - - /* Initialize IRQ Status array */ - for (ctrl = 0; ctrl < num_ctrls; ctrl++) { - dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK + - (ctrl * MSI_REG_CTRL_BLOCK_SIZE), - pp->irq_mask[ctrl]); - dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_ENABLE + - (ctrl * MSI_REG_CTRL_BLOCK_SIZE), - ~0); - } - } - dw_pcie_msi_init(pp); /* Setup RC BARs */ diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h index ce9e18554e42..4165c49a0a50 100644 --- a/drivers/pci/controller/dwc/pcie-designware.h +++ b/drivers/pci/controller/dwc/pcie-designware.h @@ -759,6 +759,9 @@ static inline enum dw_pcie_ltssm dw_pcie_get_ltssm(struct dw_pcie *pci) int dw_pcie_suspend_noirq(struct dw_pcie *pci); int dw_pcie_resume_noirq(struct dw_pcie *pci); irqreturn_t dw_handle_msi_irq(struct dw_pcie_rp *pp); +void dw_pcie_msi_init(struct dw_pcie_rp *pp); +int dw_pcie_msi_host_init(struct dw_pcie_rp *pp); +void dw_pcie_free_msi(struct dw_pcie_rp *pp); int dw_pcie_setup_rc(struct dw_pcie_rp *pp); int dw_pcie_host_init(struct dw_pcie_rp *pp); void dw_pcie_host_deinit(struct dw_pcie_rp *pp); @@ -781,6 +784,17 @@ static inline irqreturn_t dw_handle_msi_irq(struct dw_pcie_rp *pp) return IRQ_NONE; } +static inline void dw_pcie_msi_init(struct dw_pcie_rp *pp) +{ } + +static inline int dw_pcie_msi_host_init(struct dw_pcie_rp *pp) +{ + return -ENODEV; +} + +static inline void dw_pcie_free_msi(struct dw_pcie_rp *pp) +{ } + static inline int dw_pcie_setup_rc(struct dw_pcie_rp *pp) { return 0; From fefbc58271be1eaaab1b3c8815569109764a32c4 Mon Sep 17 00:00:00 2001 From: Mayank Rana Date: Mon, 16 Jun 2025 15:42:57 -0700 Subject: [PATCH 0462/2411] PCI: host-generic: Rename and export gen_pci_init() for PCIe controller drivers Rename gen_pci_init() API as pci_host_common_ecam_create() and export it to allow the PCIe controller drivers to create and configure the ECAM region. Note that this API should only used by the drivers managing the drvdata on their own. Rest should continue using pci_host_common_init() API. Signed-off-by: Mayank Rana [mani: commit message rewording] Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250616224259.3549811-3-mayank.rana@oss.qualcomm.com --- drivers/pci/controller/pci-host-common.c | 5 +++-- drivers/pci/controller/pci-host-common.h | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/pci-host-common.c b/drivers/pci/controller/pci-host-common.c index b0992325dd65..5b61b5a9e0f9 100644 --- a/drivers/pci/controller/pci-host-common.c +++ b/drivers/pci/controller/pci-host-common.c @@ -22,7 +22,7 @@ static void gen_pci_unmap_cfg(void *ptr) pci_ecam_free((struct pci_config_window *)ptr); } -static struct pci_config_window *gen_pci_init(struct device *dev, +struct pci_config_window *pci_host_common_ecam_create(struct device *dev, struct pci_host_bridge *bridge, const struct pci_ecam_ops *ops) { int err; @@ -50,6 +50,7 @@ static struct pci_config_window *gen_pci_init(struct device *dev, return cfg; } +EXPORT_SYMBOL_GPL(pci_host_common_ecam_create); int pci_host_common_init(struct platform_device *pdev, const struct pci_ecam_ops *ops) @@ -65,7 +66,7 @@ int pci_host_common_init(struct platform_device *pdev, of_pci_check_probe_only(); /* Parse and map our Configuration Space windows */ - cfg = gen_pci_init(dev, bridge, ops); + cfg = pci_host_common_ecam_create(dev, bridge, ops); if (IS_ERR(cfg)) return PTR_ERR(cfg); diff --git a/drivers/pci/controller/pci-host-common.h b/drivers/pci/controller/pci-host-common.h index 65bd9e032353..51c35ec0cf37 100644 --- a/drivers/pci/controller/pci-host-common.h +++ b/drivers/pci/controller/pci-host-common.h @@ -17,4 +17,6 @@ int pci_host_common_init(struct platform_device *pdev, const struct pci_ecam_ops *ops); void pci_host_common_remove(struct platform_device *pdev); +struct pci_config_window *pci_host_common_ecam_create(struct device *dev, + struct pci_host_bridge *bridge, const struct pci_ecam_ops *ops); #endif From ac0fe6a5731700bcea6fecfd5d0b76c0454b3a20 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 1 Jul 2025 14:07:39 +0200 Subject: [PATCH 0463/2411] cxl: make cxl_bus_type constant Now that the driver core can properly handle constant struct bus_type, move the cxl_bus_type variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Davidlohr Bueso Cc: Jonathan Cameron Cc: Dave Jiang Cc: Alison Schofield Cc: Vishal Verma Cc: Ira Weiny Cc: Dan Williams Cc: linux-cxl@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2025070138-vigorous-negative-eae7@gregkh Signed-off-by: Dave Jiang --- drivers/cxl/core/port.c | 2 +- drivers/cxl/cxl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index eb46c6764d20..0696f7fcef56 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -2293,7 +2293,7 @@ static const struct attribute_group *cxl_bus_attribute_groups[] = { NULL, }; -struct bus_type cxl_bus_type = { +const struct bus_type cxl_bus_type = { .name = "cxl", .uevent = cxl_bus_uevent, .match = cxl_bus_match, diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 3f1695c96abc..e7b66ca1d423 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -815,7 +815,7 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, bool is_cxl_region(struct device *dev); -extern struct bus_type cxl_bus_type; +extern const struct bus_type cxl_bus_type; struct cxl_driver { const char *name; From e23ab8028de0d92df5921a570f5212c0370db3b5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 30 Jun 2025 16:06:09 +0000 Subject: [PATCH 0464/2411] f2fs: check the generic conditions first Let's return errors caught by the generic checks. This fixes generic/494 where it expects to see EBUSY by setattr_prepare instead of EINVAL by f2fs for active swapfile. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index bc0ca697e064..bd835c4f874a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1048,6 +1048,18 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = setattr_prepare(idmap, dentry, attr); + if (err) + return err; + + err = fscrypt_prepare_setattr(dentry, attr); + if (err) + return err; + + err = fsverity_prepare_setattr(dentry, attr); + if (err) + return err; + if (unlikely(IS_IMMUTABLE(inode))) return -EPERM; @@ -1077,18 +1089,6 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return -EINVAL; } - err = setattr_prepare(idmap, dentry, attr); - if (err) - return err; - - err = fscrypt_prepare_setattr(dentry, attr); - if (err) - return err; - - err = fsverity_prepare_setattr(dentry, attr); - if (err) - return err; - if (is_quota_modification(idmap, inode, attr)) { err = f2fs_dquot_initialize(inode); if (err) From 185f203a6991f7dd7f8070d6638415215da35d7e Mon Sep 17 00:00:00 2001 From: Jianan Huang Date: Mon, 30 Jun 2025 20:57:53 +0800 Subject: [PATCH 0465/2411] f2fs: avoid splitting bio when reading multiple pages When fewer pages are read, nr_pages may be smaller than nr_cpages. Due to the nr_vecs limit, the compressed pages will be split into multiple bios and then merged at the block level. In this case, nr_cpages should be used to pre-allocate bvecs. To handle this case, align max_nr_pages to cluster_size, which should be enough for all compressed pages. Signed-off-by: Jianan Huang Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 31e892842625..40292e4ad341 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2303,7 +2303,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, } if (!bio) { - bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, + bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages - i, f2fs_ra_op_flags(rac), folio->index, for_write); if (IS_ERR(bio)) { @@ -2376,6 +2376,14 @@ static int f2fs_mpage_readpages(struct inode *inode, unsigned max_nr_pages = nr_pages; int ret = 0; +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + index = rac ? readahead_index(rac) : folio->index; + max_nr_pages = round_up(index + nr_pages, cc.cluster_size) - + round_down(index, cc.cluster_size); + } +#endif + map.m_pblk = 0; map.m_lblk = 0; map.m_len = 0; From 8f4688591d96be9a71c0ddfbf32032d55dd54cfa Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 1 Jul 2025 17:26:10 +0800 Subject: [PATCH 0466/2411] f2fs: fix to use f2fs_is_valid_blkaddr_raw() in do_write_page() As syzbot reported as below: F2FS-fs (loop9): inject invalid blkaddr in f2fs_is_valid_blkaddr of do_write_page+0x277/0xb10 fs/f2fs/segment.c:3956 ------------[ cut here ]------------ kernel BUG at fs/f2fs/segment.c:3957! Oops: invalid opcode: 0000 [#1] SMP KASAN PTI CPU: 0 UID: 0 PID: 10538 Comm: syz-executor Not tainted 6.16.0-rc3-next-20250627-syzkaller #0 PREEMPT(full) Call Trace: f2fs_outplace_write_data+0x11a/0x220 fs/f2fs/segment.c:4017 f2fs_do_write_data_page+0x12ea/0x1a40 fs/f2fs/data.c:2752 f2fs_write_single_data_page+0xa68/0x1680 fs/f2fs/data.c:2851 f2fs_write_cache_pages fs/f2fs/data.c:3133 [inline] __f2fs_write_data_pages fs/f2fs/data.c:3282 [inline] f2fs_write_data_pages+0x195b/0x3000 fs/f2fs/data.c:3309 do_writepages+0x32b/0x550 mm/page-writeback.c:2636 filemap_fdatawrite_wbc mm/filemap.c:386 [inline] __filemap_fdatawrite_range mm/filemap.c:419 [inline] __filemap_fdatawrite mm/filemap.c:425 [inline] filemap_fdatawrite+0x199/0x240 mm/filemap.c:430 f2fs_sync_dirty_inodes+0x31f/0x830 fs/f2fs/checkpoint.c:1108 block_operations fs/f2fs/checkpoint.c:1247 [inline] f2fs_write_checkpoint+0x95a/0x1df0 fs/f2fs/checkpoint.c:1638 kill_f2fs_super+0x2c3/0x6c0 fs/f2fs/super.c:5081 deactivate_locked_super+0xb9/0x130 fs/super.c:474 cleanup_mnt+0x425/0x4c0 fs/namespace.c:1417 task_work_run+0x1d4/0x260 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop+0xec/0x110 kernel/entry/common.c:114 exit_to_user_mode_prepare include/linux/entry-common.h:330 [inline] syscall_exit_to_user_mode_work include/linux/entry-common.h:414 [inline] syscall_exit_to_user_mode include/linux/entry-common.h:449 [inline] do_syscall_64+0x2bd/0x3b0 arch/x86/entry/syscall_64.c:100 entry_SYSCALL_64_after_hwframe+0x77/0x7f If we inject block address fault, it may trigger kernel panic, we need to use f2fs_is_valid_blkaddr_raw() instead of f2fs_is_valid_blkaddr() in do_write_page() to avoid such issue. Fixes: 70b6e8500431 ("f2fs: do sanity check on fio.new_blkaddr in do_write_page()") Reported-by: syzbot+9201a61c060513d4be38@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/68639520.a70a0220.3b7e22.17e6.GAE@google.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5653716460ea..b89bdb867508 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3953,7 +3953,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) goto out; } - f2fs_bug_on(fio->sbi, !f2fs_is_valid_blkaddr(fio->sbi, + f2fs_bug_on(fio->sbi, !f2fs_is_valid_blkaddr_raw(fio->sbi, fio->new_blkaddr, DATA_GENERIC_ENHANCE)); if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) From b6cea9b4f892e15d6d0dfabb11f3db299cdb9f01 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 27 Jun 2025 18:23:01 -0700 Subject: [PATCH 0467/2411] perf test: Name the noploop process Name the noploop process "perf-noploop" so that tests can easily check for its existence. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250628012302.1242532-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/tests/workloads/noploop.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/tests/workloads/noploop.c b/tools/perf/tests/workloads/noploop.c index 940ea5910a84..656e472e6188 100644 --- a/tools/perf/tests/workloads/noploop.c +++ b/tools/perf/tests/workloads/noploop.c @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include #include #include #include @@ -16,6 +17,7 @@ static int noploop(int argc, const char **argv) { int sec = 1; + pthread_setname_np(pthread_self(), "perf-noploop"); if (argc > 0) sec = atoi(argv[0]); From 0e22c5ca44e687981f79598e650d26faad101746 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 27 Jun 2025 18:23:02 -0700 Subject: [PATCH 0468/2411] perf test: Add sched latency and script shell tests Add shell tests covering the `perf sched latency` and `perf sched script` commands. The test creates 2 noploop processes on the same forced CPU, it then checks that the process appears in the `perf sched` output. Reviewed-by: James Clark Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250628012302.1242532-2-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/sched.sh | 93 +++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100755 tools/perf/tests/shell/sched.sh diff --git a/tools/perf/tests/shell/sched.sh b/tools/perf/tests/shell/sched.sh new file mode 100755 index 000000000000..c030126d1a0c --- /dev/null +++ b/tools/perf/tests/shell/sched.sh @@ -0,0 +1,93 @@ +#!/bin/bash +# perf sched tests +# SPDX-License-Identifier: GPL-2.0 + +set -e + +if [ "$(id -u)" != 0 ]; then + echo "[Skip] No root permission" + exit 2 +fi + +err=0 +perfdata=$(mktemp /tmp/__perf_test_sched.perf.data.XXXXX) +PID1=0 +PID2=0 + +cleanup() { + rm -f "${perfdata}" + rm -f "${perfdata}".old + + trap - EXIT TERM INT +} + +trap_cleanup() { + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup + exit 1 +} +trap trap_cleanup EXIT TERM INT + +start_noploops() { + # Start two noploop workloads on CPU0 to trigger scheduling. + perf test -w noploop 10 & + PID1=$! + taskset -pc 0 $PID1 + perf test -w noploop 10 & + PID2=$! + taskset -pc 0 $PID2 + + if ! grep -q 'Cpus_allowed_list:\s*0$' "/proc/$PID1/status" + then + echo "Sched [Error taskset did not work for the 1st noploop ($PID1)]" + grep Cpus_allowed /proc/$PID1/status + err=1 + fi + + if ! grep -q 'Cpus_allowed_list:\s*0$' "/proc/$PID2/status" + then + echo "Sched [Error taskset did not work for the 2nd noploop ($PID2)]" + grep Cpus_allowed /proc/$PID2/status + err=1 + fi +} + +cleanup_noploops() { + kill "$PID1" "$PID2" +} + +test_sched_latency() { + echo "Sched latency" + + start_noploops + + perf sched record --no-inherit -o "${perfdata}" sleep 1 + if ! perf sched latency -i "${perfdata}" | grep -q perf-noploop + then + echo "Sched latency [Failed missing output]" + err=1 + fi + + cleanup_noploops +} + +test_sched_script() { + echo "Sched script" + + start_noploops + + perf sched record --no-inherit -o "${perfdata}" sleep 1 + if ! perf sched script -i "${perfdata}" | grep -q perf-noploop + then + echo "Sched script [Failed missing output]" + err=1 + fi + + cleanup_noploops +} + +test_sched_latency +test_sched_script + +cleanup +exit $err From 139ee54a2b3e9a4042307dd0484f85c0b3b45539 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 1 Jul 2025 13:10:25 -0700 Subject: [PATCH 0469/2411] perf test: Check test suite description properly Currently perf test checks the given string with descriptions for both test suites and cases (subtests). But sometimes it's confusing since the subtests don't contain the important keyword. I think it's better to check the suite level and run the whole suite together if it matches description in the suite. Before: $ perf test hwmon (no output) After: $ perf test hwmon 10: Hwmon PMU : 10.1: Basic parsing test : Ok 10.2: Parsing without PMU name : Ok 10.3: Parsing with PMU name : Ok And keep the existing behavior when it only matches test description only. $ perf test "Equal cpu map" 39.5: Equal cpu map : Ok Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250701201027.1171561-1-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/tests/builtin-test.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 80375ca39a37..846c9b3a732c 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -539,6 +539,7 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[], for (struct test_suite **t = suites; *t; t++, curr_suite++) { int curr_test_case; + bool suite_matched = false; if (!perf_test__matches(test_description(*t, -1), curr_suite, argc, argv)) { /* @@ -556,6 +557,8 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[], } if (skip) continue; + } else { + suite_matched = true; } if (intlist__find(skiplist, curr_suite + 1)) { @@ -567,10 +570,10 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[], for (unsigned int run = 0; run < runs_per_test; run++) { test_suite__for_each_test_case(*t, curr_test_case) { - if (!perf_test__matches(test_description(*t, curr_test_case), + if (!suite_matched && + !perf_test__matches(test_description(*t, curr_test_case), curr_suite, argc, argv)) continue; - err = start_test(*t, curr_suite, curr_test_case, &child_tests[child_test_num++], width, pass); From 34c4ff1cbf7e7b600496c5adb72131ec5510e459 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 1 Jul 2025 13:10:26 -0700 Subject: [PATCH 0470/2411] perf test: Add libsubcmd help tests Add a set of tests for subcmd routines. Currently it fails the last one since there's a bug. It'll be fixed by the next commit. $ perf test subcmd 69: libsubcmd help tests : 69.1: Load subcmd names : Ok 69.2: Uniquify subcmd names : Ok 69.3: Exclude duplicate subcmd names : FAILED! Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250701201027.1171561-2-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/subcmd-help.c | 108 ++++++++++++++++++++++++++++++++ tools/perf/tests/tests.h | 2 + 4 files changed, 112 insertions(+) create mode 100644 tools/perf/tests/subcmd-help.c diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index d6c35dd0de3b..3e8394be15ae 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -69,6 +69,7 @@ perf-test-y += symbols.o perf-test-y += util.o perf-test-y += hwmon_pmu.o perf-test-y += tool_pmu.o +perf-test-y += subcmd-help.o ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc)) perf-test-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 846c9b3a732c..e242d56523ce 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -139,6 +139,7 @@ static struct test_suite *generic_tests[] = { &suite__event_groups, &suite__symbols, &suite__util, + &suite__subcmd_help, NULL, }; diff --git a/tools/perf/tests/subcmd-help.c b/tools/perf/tests/subcmd-help.c new file mode 100644 index 000000000000..2280b4c0e5e7 --- /dev/null +++ b/tools/perf/tests/subcmd-help.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "tests.h" +#include +#include + +static int test__load_cmdnames(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + struct cmdnames cmds = {}; + + add_cmdname(&cmds, "aaa", 3); + add_cmdname(&cmds, "foo", 3); + add_cmdname(&cmds, "xyz", 3); + + TEST_ASSERT_VAL("cannot find cmd", is_in_cmdlist(&cmds, "aaa") == 1); + TEST_ASSERT_VAL("wrong cmd", is_in_cmdlist(&cmds, "bar") == 0); + TEST_ASSERT_VAL("case sensitive", is_in_cmdlist(&cmds, "XYZ") == 0); + + clean_cmdnames(&cmds); + return TEST_OK; +} + +static int test__uniq_cmdnames(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + struct cmdnames cmds = {}; + + /* uniq() assumes it's sorted */ + add_cmdname(&cmds, "aaa", 3); + add_cmdname(&cmds, "aaa", 3); + add_cmdname(&cmds, "bbb", 3); + + TEST_ASSERT_VAL("invalid original size", cmds.cnt == 3); + /* uniquify command names (to remove second 'aaa') */ + uniq(&cmds); + TEST_ASSERT_VAL("invalid final size", cmds.cnt == 2); + + TEST_ASSERT_VAL("cannot find cmd", is_in_cmdlist(&cmds, "aaa") == 1); + TEST_ASSERT_VAL("cannot find cmd", is_in_cmdlist(&cmds, "bbb") == 1); + TEST_ASSERT_VAL("wrong cmd", is_in_cmdlist(&cmds, "ccc") == 0); + + clean_cmdnames(&cmds); + return TEST_OK; +} + +static int test__exclude_cmdnames(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + struct cmdnames cmds1 = {}; + struct cmdnames cmds2 = {}; + + add_cmdname(&cmds1, "aaa", 3); + add_cmdname(&cmds1, "bbb", 3); + add_cmdname(&cmds1, "ccc", 3); + add_cmdname(&cmds1, "ddd", 3); + add_cmdname(&cmds1, "eee", 3); + add_cmdname(&cmds1, "fff", 3); + add_cmdname(&cmds1, "ggg", 3); + add_cmdname(&cmds1, "hhh", 3); + add_cmdname(&cmds1, "iii", 3); + add_cmdname(&cmds1, "jjj", 3); + + add_cmdname(&cmds2, "bbb", 3); + add_cmdname(&cmds2, "eee", 3); + add_cmdname(&cmds2, "jjj", 3); + + TEST_ASSERT_VAL("invalid original size", cmds1.cnt == 10); + TEST_ASSERT_VAL("invalid original size", cmds2.cnt == 3); + + /* remove duplicate command names in cmds1 */ + exclude_cmds(&cmds1, &cmds2); + + TEST_ASSERT_VAL("invalid excluded size", cmds1.cnt == 7); + TEST_ASSERT_VAL("invalid excluded size", cmds2.cnt == 3); + + /* excluded commands should not belong to cmds1 */ + TEST_ASSERT_VAL("cannot find cmd", is_in_cmdlist(&cmds1, "aaa") == 1); + TEST_ASSERT_VAL("wrong cmd", is_in_cmdlist(&cmds1, "bbb") == 0); + TEST_ASSERT_VAL("cannot find cmd", is_in_cmdlist(&cmds1, "ccc") == 1); + TEST_ASSERT_VAL("cannot find cmd", is_in_cmdlist(&cmds1, "ddd") == 1); + TEST_ASSERT_VAL("wrong cmd", is_in_cmdlist(&cmds1, "eee") == 0); + TEST_ASSERT_VAL("cannot find cmd", is_in_cmdlist(&cmds1, "fff") == 1); + TEST_ASSERT_VAL("cannot find cmd", is_in_cmdlist(&cmds1, "ggg") == 1); + TEST_ASSERT_VAL("cannot find cmd", is_in_cmdlist(&cmds1, "hhh") == 1); + TEST_ASSERT_VAL("cannot find cmd", is_in_cmdlist(&cmds1, "iii") == 1); + TEST_ASSERT_VAL("wrong cmd", is_in_cmdlist(&cmds1, "jjj") == 0); + + /* they should be only in cmds2 */ + TEST_ASSERT_VAL("cannot find cmd", is_in_cmdlist(&cmds2, "bbb") == 1); + TEST_ASSERT_VAL("cannot find cmd", is_in_cmdlist(&cmds2, "eee") == 1); + TEST_ASSERT_VAL("cannot find cmd", is_in_cmdlist(&cmds2, "jjj") == 1); + + clean_cmdnames(&cmds1); + clean_cmdnames(&cmds2); + return TEST_OK; +} + +static struct test_case tests__subcmd_help[] = { + TEST_CASE("Load subcmd names", load_cmdnames), + TEST_CASE("Uniquify subcmd names", uniq_cmdnames), + TEST_CASE("Exclude duplicate subcmd names", exclude_cmdnames), + { .name = NULL, } +}; + +struct test_suite suite__subcmd_help = { + .desc = "libsubcmd help tests", + .test_cases = tests__subcmd_help, +}; diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 4c128a959441..97e62db8764a 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -3,6 +3,7 @@ #define TESTS_H #include +#include "util/debug.h" enum { TEST_OK = 0, @@ -177,6 +178,7 @@ DECLARE_SUITE(sigtrap); DECLARE_SUITE(event_groups); DECLARE_SUITE(symbols); DECLARE_SUITE(util); +DECLARE_SUITE(subcmd_help); /* * PowerPC and S390 do not support creation of instruction breakpoints using the From 8d6b5c14ab0bdca5b8a7583ccbec4a092e3458bf Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:13 +0900 Subject: [PATCH 0471/2411] kconfig: gconf: refactor view setting code Factor out common code for setting the view into a new function, set_view_mode(). Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap --- scripts/kconfig/gconf.c | 74 +++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 36 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 9d06c050b270..185084fccc23 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -18,7 +18,7 @@ #include #include -enum { +enum view_mode { SINGLE_VIEW, SPLIT_VIEW, FULL_VIEW }; @@ -116,6 +116,39 @@ static void text_insert_msg(const char *title, const char *message) NULL); } +static void set_view_mode(enum view_mode mode) +{ + view_mode = mode; + + if (mode == SPLIT_VIEW) { // two panes + gint w; + + gtk_widget_show(tree1_w); + gtk_window_get_default_size(GTK_WINDOW(main_wnd), &w, NULL); + gtk_paned_set_position(GTK_PANED(hpaned), w / 2); + } else { + gtk_widget_hide(tree1_w); + gtk_paned_set_position(GTK_PANED(hpaned), 0); + } + + switch (mode) { + case SINGLE_VIEW: + current = &rootmenu; + display_tree_part(); + break; + case SPLIT_VIEW: + gtk_tree_store_clear(tree2); + display_list(); + break; + case FULL_VIEW: + gtk_tree_store_clear(tree2); + display_tree(&rootmenu); + break; + } + + if (mode != SINGLE_VIEW) + gtk_widget_set_sensitive(back_btn, FALSE); +} /* Main Windows Callbacks */ @@ -435,35 +468,19 @@ void on_load_clicked(GtkButton * button, gpointer user_data) void on_single_clicked(GtkButton * button, gpointer user_data) { - view_mode = SINGLE_VIEW; - gtk_widget_hide(tree1_w); - current = &rootmenu; - display_tree_part(); + set_view_mode(SINGLE_VIEW); } void on_split_clicked(GtkButton * button, gpointer user_data) { - gint w; - view_mode = SPLIT_VIEW; - gtk_widget_show(tree1_w); - gtk_window_get_default_size(GTK_WINDOW(main_wnd), &w, NULL); - gtk_paned_set_position(GTK_PANED(hpaned), w / 2); - gtk_tree_store_clear(tree2); - display_list(); - - /* Disable back btn, like in full mode. */ - gtk_widget_set_sensitive(back_btn, FALSE); + set_view_mode(SPLIT_VIEW); } void on_full_clicked(GtkButton * button, gpointer user_data) { - view_mode = FULL_VIEW; - gtk_widget_hide(tree1_w); - gtk_tree_store_clear(tree2); - display_tree(&rootmenu); - gtk_widget_set_sensitive(back_btn, FALSE); + set_view_mode(FULL_VIEW); } @@ -1039,11 +1056,6 @@ static void _display_tree(struct menu *menu, GtkTreeIter *parent) || (view_mode == FULL_VIEW) || (view_mode == SPLIT_VIEW))*/ - /* Change paned position if the view is not in 'split mode' */ - if (view_mode == SINGLE_VIEW || view_mode == FULL_VIEW) { - gtk_paned_set_position(GTK_PANED(hpaned), 0); - } - if (((view_mode == SINGLE_VIEW) && (menu->flags & MENU_ROOT)) || (view_mode == FULL_VIEW) || (view_mode == SPLIT_VIEW)) @@ -1368,17 +1380,7 @@ int main(int ac, char *av[]) conf_read(NULL); - switch (view_mode) { - case SINGLE_VIEW: - display_tree_part(); - break; - case SPLIT_VIEW: - display_list(); - break; - case FULL_VIEW: - display_tree(&rootmenu); - break; - } + set_view_mode(view_mode); gtk_main(); From b22bbaea7f59ea1fa609462bed2eb075eea97586 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:14 +0900 Subject: [PATCH 0472/2411] kconfig: gconf: grey out button for current view This clarifies which view is currently selected. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/gconf.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 185084fccc23..77e742eebf24 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -38,8 +38,7 @@ static GtkWidget *tree2_w; // right frame static GtkWidget *text_w; static GtkWidget *hpaned; static GtkWidget *vpaned; -static GtkWidget *back_btn; -static GtkWidget *save_btn; +static GtkWidget *back_btn, *save_btn, *single_btn, *split_btn, *full_btn; static GtkWidget *save_menu_item; static GtkTextTag *tag1, *tag2; @@ -131,18 +130,25 @@ static void set_view_mode(enum view_mode mode) gtk_paned_set_position(GTK_PANED(hpaned), 0); } + gtk_widget_set_sensitive(single_btn, TRUE); + gtk_widget_set_sensitive(split_btn, TRUE); + gtk_widget_set_sensitive(full_btn, TRUE); + switch (mode) { case SINGLE_VIEW: current = &rootmenu; display_tree_part(); + gtk_widget_set_sensitive(single_btn, FALSE); break; case SPLIT_VIEW: gtk_tree_store_clear(tree2); display_list(); + gtk_widget_set_sensitive(split_btn, FALSE); break; case FULL_VIEW: gtk_tree_store_clear(tree2); display_tree(&rootmenu); + gtk_widget_set_sensitive(full_btn, FALSE); break; } @@ -1167,10 +1173,15 @@ static void init_main_window(const gchar *glade_file) style = gtk_widget_get_style(main_wnd); + single_btn = glade_xml_get_widget(xml, "button4"); replace_button_icon(xml, main_wnd->window, style, "button4", (gchar **) xpm_single_view); + + split_btn = glade_xml_get_widget(xml, "button5"); replace_button_icon(xml, main_wnd->window, style, "button5", (gchar **) xpm_split_view); + + full_btn = glade_xml_get_widget(xml, "button6"); replace_button_icon(xml, main_wnd->window, style, "button6", (gchar **) xpm_tree_view); From 3e0fb3ef01584bcace87c42a4f96abacad624386 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:15 +0900 Subject: [PATCH 0473/2411] kconfig: gconf: move the main window event handlers below This allows removal of the forward delcaration of on_save_activate(). Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 163 +++++++++++++++++++--------------------- 1 file changed, 78 insertions(+), 85 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 77e742eebf24..d265d25f72cb 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -156,83 +156,6 @@ static void set_view_mode(enum view_mode mode) gtk_widget_set_sensitive(back_btn, FALSE); } -/* Main Windows Callbacks */ - -void on_save_activate(GtkMenuItem * menuitem, gpointer user_data); -gboolean on_window1_delete_event(GtkWidget * widget, GdkEvent * event, - gpointer user_data) -{ - GtkWidget *dialog, *label; - gint result; - gint ret = FALSE; - - if (!conf_get_changed()) - return FALSE; - - dialog = gtk_dialog_new_with_buttons("Warning !", - GTK_WINDOW(main_wnd), - (GtkDialogFlags) - (GTK_DIALOG_MODAL | - GTK_DIALOG_DESTROY_WITH_PARENT), - GTK_STOCK_OK, - GTK_RESPONSE_YES, - GTK_STOCK_NO, - GTK_RESPONSE_NO, - GTK_STOCK_CANCEL, - GTK_RESPONSE_CANCEL, NULL); - gtk_dialog_set_default_response(GTK_DIALOG(dialog), - GTK_RESPONSE_CANCEL); - - label = gtk_label_new("\nSave configuration ?\n"); - gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->vbox), label); - gtk_widget_show(label); - - result = gtk_dialog_run(GTK_DIALOG(dialog)); - switch (result) { - case GTK_RESPONSE_YES: - on_save_activate(NULL, NULL); - break; - case GTK_RESPONSE_NO: - break; - case GTK_RESPONSE_CANCEL: - case GTK_RESPONSE_DELETE_EVENT: - default: - ret = TRUE; - break; - } - - gtk_widget_destroy(dialog); - - return ret; -} - - -void on_window1_destroy(GtkObject * object, gpointer user_data) -{ - gtk_main_quit(); -} - - -void -on_window1_size_request(GtkWidget * widget, - GtkRequisition * requisition, gpointer user_data) -{ - static gint old_h; - gint w, h; - - if (widget->window == NULL) - gtk_window_get_default_size(GTK_WINDOW(main_wnd), &w, &h); - else - gdk_window_get_size(widget->window, &w, &h); - - if (h == old_h) - return; - old_h = h; - - gtk_paned_set_position(GTK_PANED(vpaned), 2 * h / 3); -} - - /* Menu & Toolbar Callbacks */ @@ -311,14 +234,6 @@ void on_save_as1_activate(GtkMenuItem * menuitem, gpointer user_data) gtk_widget_show(fs); } - -void on_quit1_activate(GtkMenuItem * menuitem, gpointer user_data) -{ - if (!on_window1_delete_event(NULL, NULL, NULL)) - gtk_widget_destroy(GTK_WIDGET(main_wnd)); -} - - void on_show_name1_activate(GtkMenuItem * menuitem, gpointer user_data) { GtkTreeViewColumn *col; @@ -501,6 +416,84 @@ void on_expand_clicked(GtkButton * button, gpointer user_data) gtk_tree_view_expand_all(GTK_TREE_VIEW(tree2_w)); } +/* Main Windows Callbacks */ + +void on_window1_destroy(GtkObject *object, gpointer user_data) +{ + gtk_main_quit(); +} + +void on_window1_size_request(GtkWidget *widget, + GtkRequisition *requisition, + gpointer user_data) +{ + static gint old_h; + gint w, h; + + if (widget->window == NULL) + gtk_window_get_default_size(GTK_WINDOW(main_wnd), &w, &h); + else + gdk_window_get_size(widget->window, &w, &h); + + if (h == old_h) + return; + old_h = h; + + gtk_paned_set_position(GTK_PANED(vpaned), 2 * h / 3); +} + +gboolean on_window1_delete_event(GtkWidget *widget, GdkEvent *event, + gpointer user_data) +{ + GtkWidget *dialog, *label; + gint result; + gint ret = FALSE; + + if (!conf_get_changed()) + return FALSE; + + dialog = gtk_dialog_new_with_buttons("Warning !", + GTK_WINDOW(main_wnd), + (GtkDialogFlags) + (GTK_DIALOG_MODAL | + GTK_DIALOG_DESTROY_WITH_PARENT), + GTK_STOCK_OK, + GTK_RESPONSE_YES, + GTK_STOCK_NO, + GTK_RESPONSE_NO, + GTK_STOCK_CANCEL, + GTK_RESPONSE_CANCEL, NULL); + gtk_dialog_set_default_response(GTK_DIALOG(dialog), + GTK_RESPONSE_CANCEL); + + label = gtk_label_new("\nSave configuration ?\n"); + gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->vbox), label); + gtk_widget_show(label); + + result = gtk_dialog_run(GTK_DIALOG(dialog)); + switch (result) { + case GTK_RESPONSE_YES: + on_save_activate(NULL, NULL); + break; + case GTK_RESPONSE_NO: + break; + case GTK_RESPONSE_CANCEL: + case GTK_RESPONSE_DELETE_EVENT: + default: + ret = TRUE; + break; + } + + gtk_widget_destroy(dialog); + + return ret; +} + +void on_quit1_activate(GtkMenuItem *menuitem, gpointer user_data) +{ + if (!on_window1_delete_event(NULL, NULL, NULL)) + gtk_widget_destroy(GTK_WIDGET(main_wnd)); +} /* CTree Callbacks */ From f0049c937d2f38ae208c06aa5ef114dac226d01c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:16 +0900 Subject: [PATCH 0474/2411] kconfig: gconf: move button1 and save1 initialization code Move the relevant initialization code closer together. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index d265d25f72cb..3a7bb27b4871 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -1145,8 +1145,8 @@ static void init_main_window(const gchar *glade_file) tree2_w = glade_xml_get_widget(xml, "treeview2"); text_w = glade_xml_get_widget(xml, "textview3"); - back_btn = glade_xml_get_widget(xml, "button1"); - gtk_widget_set_sensitive(back_btn, FALSE); + /* menubar */ + save_menu_item = glade_xml_get_widget(xml, "save1"); widget = glade_xml_get_widget(xml, "show_name1"); gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, @@ -1160,9 +1160,11 @@ static void init_main_window(const gchar *glade_file) gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, show_value); + /* toolbar */ + back_btn = glade_xml_get_widget(xml, "button1"); + gtk_widget_set_sensitive(back_btn, FALSE); + save_btn = glade_xml_get_widget(xml, "button3"); - save_menu_item = glade_xml_get_widget(xml, "save1"); - conf_set_changed_callback(conf_changed); style = gtk_widget_get_style(main_wnd); @@ -1190,6 +1192,8 @@ static void init_main_window(const gchar *glade_file) gtk_window_set_title(GTK_WINDOW(main_wnd), rootmenu.prompt->text); gtk_widget_show(main_wnd); + + conf_set_changed_callback(conf_changed); } static void init_tree_model(void) From 77e8ff988918de554e0176c4ce0064944935efb7 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:17 +0900 Subject: [PATCH 0475/2411] kconfig: gconf: add static qualifiers to event handlers This fixes several -Wmissing-prototypes warnings. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap --- scripts/kconfig/gconf.c | 178 +++++++++++++++++++++++++----------- scripts/kconfig/gconf.glade | 30 ------ 2 files changed, 125 insertions(+), 83 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 3a7bb27b4871..3b4bd897856c 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -173,7 +173,7 @@ load_filename(GtkFileSelection * file_selector, gpointer user_data) display_tree_part(); } -void on_load1_activate(GtkMenuItem * menuitem, gpointer user_data) +static void on_load1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkWidget *fs; @@ -192,8 +192,7 @@ void on_load1_activate(GtkMenuItem * menuitem, gpointer user_data) gtk_widget_show(fs); } - -void on_save_activate(GtkMenuItem * menuitem, gpointer user_data) +static void on_save_activate(GtkMenuItem *menuitem, gpointer user_data) { if (conf_write(NULL)) text_insert_msg("Error", "Unable to save configuration !"); @@ -215,7 +214,7 @@ store_filename(GtkFileSelection * file_selector, gpointer user_data) gtk_widget_destroy(GTK_WIDGET(user_data)); } -void on_save_as1_activate(GtkMenuItem * menuitem, gpointer user_data) +static void on_save_as1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkWidget *fs; @@ -234,7 +233,7 @@ void on_save_as1_activate(GtkMenuItem * menuitem, gpointer user_data) gtk_widget_show(fs); } -void on_show_name1_activate(GtkMenuItem * menuitem, gpointer user_data) +static void on_show_name1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkTreeViewColumn *col; @@ -244,8 +243,7 @@ void on_show_name1_activate(GtkMenuItem * menuitem, gpointer user_data) gtk_tree_view_column_set_visible(col, show_name); } - -void on_show_range1_activate(GtkMenuItem * menuitem, gpointer user_data) +static void on_show_range1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkTreeViewColumn *col; @@ -262,8 +260,7 @@ void on_show_range1_activate(GtkMenuItem * menuitem, gpointer user_data) } - -void on_show_data1_activate(GtkMenuItem * menuitem, gpointer user_data) +static void on_show_data1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkTreeViewColumn *col; @@ -273,35 +270,31 @@ void on_show_data1_activate(GtkMenuItem * menuitem, gpointer user_data) gtk_tree_view_column_set_visible(col, show_value); } - -void -on_set_option_mode1_activate(GtkMenuItem *menuitem, gpointer user_data) +static void on_set_option_mode1_activate(GtkMenuItem *menuitem, + gpointer user_data) { opt_mode = OPT_NORMAL; gtk_tree_store_clear(tree2); display_tree(&rootmenu); /* instead of update_tree to speed-up */ } - -void -on_set_option_mode2_activate(GtkMenuItem *menuitem, gpointer user_data) +static void on_set_option_mode2_activate(GtkMenuItem *menuitem, + gpointer user_data) { opt_mode = OPT_ALL; gtk_tree_store_clear(tree2); display_tree(&rootmenu); /* instead of update_tree to speed-up */ } - -void -on_set_option_mode3_activate(GtkMenuItem *menuitem, gpointer user_data) +static void on_set_option_mode3_activate(GtkMenuItem *menuitem, + gpointer user_data) { opt_mode = OPT_PROMPT; gtk_tree_store_clear(tree2); display_tree(&rootmenu); /* instead of update_tree to speed-up */ } - -void on_introduction1_activate(GtkMenuItem * menuitem, gpointer user_data) +static void on_introduction1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkWidget *dialog; const gchar *intro_text = @@ -328,8 +321,7 @@ void on_introduction1_activate(GtkMenuItem * menuitem, gpointer user_data) gtk_widget_show_all(dialog); } - -void on_about1_activate(GtkMenuItem * menuitem, gpointer user_data) +static void on_about1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkWidget *dialog; const gchar *about_text = @@ -346,8 +338,7 @@ void on_about1_activate(GtkMenuItem * menuitem, gpointer user_data) gtk_widget_show_all(dialog); } - -void on_license1_activate(GtkMenuItem * menuitem, gpointer user_data) +static void on_license1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkWidget *dialog; const gchar *license_text = @@ -365,8 +356,8 @@ void on_license1_activate(GtkMenuItem * menuitem, gpointer user_data) gtk_widget_show_all(dialog); } - -void on_back_clicked(GtkButton * button, gpointer user_data) +/* toolbar handlers */ +static void on_back_clicked(GtkButton *button, gpointer user_data) { enum prop_type ptype; @@ -380,50 +371,44 @@ void on_back_clicked(GtkButton * button, gpointer user_data) gtk_widget_set_sensitive(back_btn, FALSE); } - -void on_load_clicked(GtkButton * button, gpointer user_data) +static void on_load_clicked(GtkButton *button, gpointer user_data) { on_load1_activate(NULL, user_data); } - -void on_single_clicked(GtkButton * button, gpointer user_data) +static void on_single_clicked(GtkButton *button, gpointer user_data) { set_view_mode(SINGLE_VIEW); } - -void on_split_clicked(GtkButton * button, gpointer user_data) +static void on_split_clicked(GtkButton *button, gpointer user_data) { set_view_mode(SPLIT_VIEW); } - -void on_full_clicked(GtkButton * button, gpointer user_data) +static void on_full_clicked(GtkButton *button, gpointer user_data) { set_view_mode(FULL_VIEW); } - -void on_collapse_clicked(GtkButton * button, gpointer user_data) +static void on_collapse_clicked(GtkButton *button, gpointer user_data) { gtk_tree_view_collapse_all(GTK_TREE_VIEW(tree2_w)); } - -void on_expand_clicked(GtkButton * button, gpointer user_data) +static void on_expand_clicked(GtkButton *button, gpointer user_data) { gtk_tree_view_expand_all(GTK_TREE_VIEW(tree2_w)); } /* Main Windows Callbacks */ -void on_window1_destroy(GtkObject *object, gpointer user_data) +static void on_window1_destroy(GtkObject *object, gpointer user_data) { gtk_main_quit(); } -void on_window1_size_request(GtkWidget *widget, +static void on_window1_size_request(GtkWidget *widget, GtkRequisition *requisition, gpointer user_data) { @@ -442,7 +427,7 @@ void on_window1_size_request(GtkWidget *widget, gtk_paned_set_position(GTK_PANED(vpaned), 2 * h / 3); } -gboolean on_window1_delete_event(GtkWidget *widget, GdkEvent *event, +static gboolean on_window1_delete_event(GtkWidget *widget, GdkEvent *event, gpointer user_data) { GtkWidget *dialog, *label; @@ -489,7 +474,7 @@ gboolean on_window1_delete_event(GtkWidget *widget, GdkEvent *event, return ret; } -void on_quit1_activate(GtkMenuItem *menuitem, gpointer user_data) +static void on_quit1_activate(GtkMenuItem *menuitem, gpointer user_data) { if (!on_window1_delete_event(NULL, NULL, NULL)) gtk_widget_destroy(GTK_WIDGET(main_wnd)); @@ -599,9 +584,9 @@ static gint column2index(GtkTreeViewColumn * column) /* User click: update choice (full) or goes down (single) */ -gboolean -on_treeview2_button_press_event(GtkWidget * widget, - GdkEventButton * event, gpointer user_data) +static gboolean on_treeview2_button_press_event(GtkWidget *widget, + GdkEventButton *event, + gpointer user_data) { GtkTreeView *view = GTK_TREE_VIEW(widget); GtkTreePath *path; @@ -649,9 +634,9 @@ on_treeview2_button_press_event(GtkWidget * widget, } /* Key pressed: update choice */ -gboolean -on_treeview2_key_press_event(GtkWidget * widget, - GdkEventKey * event, gpointer user_data) +static gboolean on_treeview2_key_press_event(GtkWidget *widget, + GdkEventKey *event, + gpointer user_data) { GtkTreeView *view = GTK_TREE_VIEW(widget); GtkTreePath *path; @@ -691,8 +676,8 @@ on_treeview2_key_press_event(GtkWidget * widget, /* Row selection changed: update help */ -void -on_treeview2_cursor_changed(GtkTreeView * treeview, gpointer user_data) +static void on_treeview2_cursor_changed(GtkTreeView *treeview, + gpointer user_data) { GtkTreeSelection *selection; GtkTreeIter iter; @@ -707,9 +692,9 @@ on_treeview2_cursor_changed(GtkTreeView * treeview, gpointer user_data) /* User click: display sub-tree in the right frame. */ -gboolean -on_treeview1_button_press_event(GtkWidget * widget, - GdkEventButton * event, gpointer user_data) +static gboolean on_treeview1_button_press_event(GtkWidget *widget, + GdkEventButton *event, + gpointer user_data) { GtkTreeView *view = GTK_TREE_VIEW(widget); GtkTreePath *path; @@ -1139,47 +1124,134 @@ static void init_main_window(const gchar *glade_file) glade_xml_signal_autoconnect(xml); main_wnd = glade_xml_get_widget(xml, "window1"); + g_signal_connect(main_wnd, "destroy", + G_CALLBACK(on_window1_destroy), NULL); + g_signal_connect(main_wnd, "size_request", + G_CALLBACK(on_window1_size_request), NULL); + g_signal_connect(main_wnd, "delete_event", + G_CALLBACK(on_window1_delete_event), NULL); + hpaned = glade_xml_get_widget(xml, "hpaned1"); vpaned = glade_xml_get_widget(xml, "vpaned1"); tree1_w = glade_xml_get_widget(xml, "treeview1"); + g_signal_connect(tree1_w, "cursor_changed", + G_CALLBACK(on_treeview2_cursor_changed), NULL); + g_signal_connect(tree1_w, "button_press_event", + G_CALLBACK(on_treeview1_button_press_event), NULL); + g_signal_connect(tree1_w, "key_press_event", + G_CALLBACK(on_treeview2_key_press_event), NULL); + tree2_w = glade_xml_get_widget(xml, "treeview2"); + g_signal_connect(tree2_w, "cursor_changed", + G_CALLBACK(on_treeview2_cursor_changed), NULL); + g_signal_connect(tree2_w, "button_press_event", + G_CALLBACK(on_treeview2_button_press_event), NULL); + g_signal_connect(tree2_w, "key_press_event", + G_CALLBACK(on_treeview2_key_press_event), NULL); + text_w = glade_xml_get_widget(xml, "textview3"); /* menubar */ + widget = glade_xml_get_widget(xml, "load1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_load1_activate), NULL); + save_menu_item = glade_xml_get_widget(xml, "save1"); + g_signal_connect(save_menu_item, "activate", + G_CALLBACK(on_save_activate), NULL); + + widget = glade_xml_get_widget(xml, "save_as1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_save_as1_activate), NULL); + + widget = glade_xml_get_widget(xml, "quit1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_quit1_activate), NULL); widget = glade_xml_get_widget(xml, "show_name1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_show_name1_activate), NULL); gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, show_name); widget = glade_xml_get_widget(xml, "show_range1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_show_range1_activate), NULL); gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, show_range); widget = glade_xml_get_widget(xml, "show_data1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_show_data1_activate), NULL); gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, show_value); + widget = glade_xml_get_widget(xml, "set_option_mode1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_set_option_mode1_activate), NULL); + + widget = glade_xml_get_widget(xml, "set_option_mode2"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_set_option_mode2_activate), NULL); + + widget = glade_xml_get_widget(xml, "set_option_mode3"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_set_option_mode3_activate), NULL); + + widget = glade_xml_get_widget(xml, "introduction1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_introduction1_activate), NULL); + + widget = glade_xml_get_widget(xml, "about1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_about1_activate), NULL); + + widget = glade_xml_get_widget(xml, "license1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_license1_activate), NULL); + /* toolbar */ back_btn = glade_xml_get_widget(xml, "button1"); + g_signal_connect(back_btn, "clicked", + G_CALLBACK(on_back_clicked), NULL); gtk_widget_set_sensitive(back_btn, FALSE); + widget = glade_xml_get_widget(xml, "button2"); + g_signal_connect(widget, "clicked", + G_CALLBACK(on_load_clicked), NULL); + save_btn = glade_xml_get_widget(xml, "button3"); + g_signal_connect(save_btn, "clicked", + G_CALLBACK(on_save_activate), NULL); style = gtk_widget_get_style(main_wnd); single_btn = glade_xml_get_widget(xml, "button4"); + g_signal_connect(single_btn, "clicked", + G_CALLBACK(on_single_clicked), NULL); replace_button_icon(xml, main_wnd->window, style, "button4", (gchar **) xpm_single_view); split_btn = glade_xml_get_widget(xml, "button5"); + g_signal_connect(split_btn, "clicked", + G_CALLBACK(on_split_clicked), NULL); replace_button_icon(xml, main_wnd->window, style, "button5", (gchar **) xpm_split_view); full_btn = glade_xml_get_widget(xml, "button6"); + g_signal_connect(full_btn, "clicked", + G_CALLBACK(on_full_clicked), NULL); replace_button_icon(xml, main_wnd->window, style, "button6", (gchar **) xpm_tree_view); + widget = glade_xml_get_widget(xml, "button7"); + g_signal_connect(widget, "clicked", + G_CALLBACK(on_collapse_clicked), NULL); + + widget = glade_xml_get_widget(xml, "button8"); + g_signal_connect(widget, "clicked", + G_CALLBACK(on_expand_clicked), NULL); + txtbuf = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_w)); tag1 = gtk_text_buffer_create_tag(txtbuf, "mytag1", "foreground", "red", diff --git a/scripts/kconfig/gconf.glade b/scripts/kconfig/gconf.glade index 19b80f2ec1ff..8519104a3c2b 100644 --- a/scripts/kconfig/gconf.glade +++ b/scripts/kconfig/gconf.glade @@ -17,9 +17,6 @@ False GDK_WINDOW_TYPE_HINT_NORMAL GDK_GRAVITY_NORTH_WEST - - - @@ -46,7 +43,6 @@ Load a config file _Load True - @@ -57,7 +53,6 @@ Save the config in .config _Save True - @@ -68,7 +63,6 @@ Save the config in a file Save _as True - @@ -83,7 +77,6 @@ True _Quit True - @@ -108,7 +101,6 @@ Show _name True False - @@ -119,7 +111,6 @@ Show _range True False - @@ -130,7 +121,6 @@ Show _data True False - @@ -147,7 +137,6 @@ Show normal options True True - @@ -159,7 +148,6 @@ True False set_option_mode1 - @@ -171,7 +159,6 @@ True False set_option_mode1 - @@ -194,7 +181,6 @@ True _Introduction True - @@ -204,7 +190,6 @@ True _About True - @@ -214,7 +199,6 @@ True _License True - @@ -254,7 +238,6 @@ True True False - False @@ -291,7 +274,6 @@ True True False - False @@ -309,7 +291,6 @@ True True False - False @@ -346,7 +327,6 @@ True True False - False @@ -364,7 +344,6 @@ True True False - False @@ -382,7 +361,6 @@ True True False - False @@ -419,7 +397,6 @@ True True False - False @@ -437,7 +414,6 @@ True True False - False @@ -477,9 +453,6 @@ False False False - - - @@ -512,9 +485,6 @@ False False False - - - From e06b176bf1b4a779f76b686ab5230dce45a8360e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:18 +0900 Subject: [PATCH 0476/2411] kconfig: gconf: remove glade_xml_signal_autoconnect() call Now that all signals are connected manually, this is no longer necessary. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 3b4bd897856c..1c2fd71369f0 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -1121,7 +1121,6 @@ static void init_main_window(const gchar *glade_file) xml = glade_xml_new(glade_file, "window1", NULL); if (!xml) g_error("GUI loading failed !\n"); - glade_xml_signal_autoconnect(xml); main_wnd = glade_xml_get_widget(xml, "window1"); g_signal_connect(main_wnd, "destroy", From 3beae8659513550b6b82a4ccdc4d25be9497f208 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:19 +0900 Subject: [PATCH 0477/2411] kconfig: gconf: make key_press_event work in left pane too Currently, on_treeview2_key_press_event() returns early for the tree1 widget. We can make it work on the left pane as well by avoiding the hardcoded use of model2. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 1c2fd71369f0..4acbcf912c6e 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -639,6 +639,7 @@ static gboolean on_treeview2_key_press_event(GtkWidget *widget, gpointer user_data) { GtkTreeView *view = GTK_TREE_VIEW(widget); + GtkTreeModel *model = gtk_tree_view_get_model(view); GtkTreePath *path; GtkTreeIter iter; struct menu *menu; @@ -655,11 +656,9 @@ static gboolean on_treeview2_key_press_event(GtkWidget *widget, gtk_tree_view_expand_row(view, path, FALSE); return TRUE; } - if (widget == tree1_w) - return FALSE; - gtk_tree_model_get_iter(model2, &iter, path); - gtk_tree_model_get(model2, &iter, COL_MENU, &menu, -1); + gtk_tree_model_get_iter(model, &iter, path); + gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); if (!strcasecmp(event->string, "n")) col = COL_NO; From cae9cdbcd9af044810bcceeb43a87accca47c71d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:20 +0900 Subject: [PATCH 0478/2411] kconfig: gconf: avoid hardcoding model2 in on_treeview2_cursor_changed() The on_treeview2_cursor_changed() handler is connected to both the left and right tree views, but it hardcodes model2 (the GtkTreeModel of the right tree view). This is incorrect. Get the associated model from the view. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 4acbcf912c6e..d7aa7bad965f 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -678,13 +678,14 @@ static gboolean on_treeview2_key_press_event(GtkWidget *widget, static void on_treeview2_cursor_changed(GtkTreeView *treeview, gpointer user_data) { + GtkTreeModel *model = gtk_tree_view_get_model(treeview); GtkTreeSelection *selection; GtkTreeIter iter; struct menu *menu; selection = gtk_tree_view_get_selection(treeview); - if (gtk_tree_selection_get_selected(selection, &model2, &iter)) { - gtk_tree_model_get(model2, &iter, COL_MENU, &menu, -1); + if (gtk_tree_selection_get_selected(selection, &model, &iter)) { + gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); text_insert_help(menu); } } From e6991e8004bf1ff8fc31b14833c4995672f18b04 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:21 +0900 Subject: [PATCH 0479/2411] kconfig: gconf: avoid hardcoding model2 in renderer_edited() Although this is only used in the right tree view, it is better not to hardcode model2 for consistency. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index d7aa7bad965f..df822f4e13c5 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -487,19 +487,21 @@ static void renderer_edited(GtkCellRendererText * cell, const gchar * path_string, const gchar * new_text, gpointer user_data) { + GtkTreeView *view = GTK_TREE_VIEW(user_data); + GtkTreeModel *model = gtk_tree_view_get_model(view); GtkTreePath *path = gtk_tree_path_new_from_string(path_string); GtkTreeIter iter; const char *old_def, *new_def; struct menu *menu; struct symbol *sym; - if (!gtk_tree_model_get_iter(model2, &iter, path)) + if (!gtk_tree_model_get_iter(model, &iter, path)) goto free; - gtk_tree_model_get(model2, &iter, COL_MENU, &menu, -1); + gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); sym = menu->sym; - gtk_tree_model_get(model2, &iter, COL_VALUE, &old_def, -1); + gtk_tree_model_get(model, &iter, COL_VALUE, &old_def, -1); new_def = new_text; sym_set_string_value(sym, new_def); @@ -1399,7 +1401,7 @@ static void init_right_tree(void) "foreground-gdk", COL_COLOR, NULL); g_signal_connect(G_OBJECT(renderer), "edited", - G_CALLBACK(renderer_edited), NULL); + G_CALLBACK(renderer_edited), tree2_w); for (i = 0; i < COL_VALUE; i++) { column = gtk_tree_view_get_column(view, i); From 59adbcd8051a222023f52cfac0143d927735b194 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:22 +0900 Subject: [PATCH 0480/2411] kconfig: gconf: avoid hardcoding model* in on_treeview*_button_press_event() It is better not to hardcode model1 or model2 for consistency. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index df822f4e13c5..f03e94cd5fa3 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -591,6 +591,7 @@ static gboolean on_treeview2_button_press_event(GtkWidget *widget, gpointer user_data) { GtkTreeView *view = GTK_TREE_VIEW(widget); + GtkTreeModel *model = gtk_tree_view_get_model(view); GtkTreePath *path; GtkTreeViewColumn *column; GtkTreeIter iter; @@ -603,9 +604,9 @@ static gboolean on_treeview2_button_press_event(GtkWidget *widget, if (path == NULL) return FALSE; - if (!gtk_tree_model_get_iter(model2, &iter, path)) + if (!gtk_tree_model_get_iter(model, &iter, path)) return FALSE; - gtk_tree_model_get(model2, &iter, COL_MENU, &menu, -1); + gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); col = column2index(column); if (event->type == GDK_2BUTTON_PRESS) { @@ -699,6 +700,7 @@ static gboolean on_treeview1_button_press_event(GtkWidget *widget, gpointer user_data) { GtkTreeView *view = GTK_TREE_VIEW(widget); + GtkTreeModel *model = gtk_tree_view_get_model(view); GtkTreePath *path; GtkTreeIter iter; struct menu *menu; @@ -709,8 +711,8 @@ static gboolean on_treeview1_button_press_event(GtkWidget *widget, if (path == NULL) return FALSE; - gtk_tree_model_get_iter(model1, &iter, path); - gtk_tree_model_get(model1, &iter, COL_MENU, &menu, -1); + gtk_tree_model_get_iter(model, &iter, path); + gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); if (event->type == GDK_2BUTTON_PRESS) { toggle_sym_value(menu); From 4d89059a722d9a562bfe6ee1e3941ccc3c6c70b6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:23 +0900 Subject: [PATCH 0481/2411] kconfig: gconf: add on_save_clicked() event handler The "clicked" event handler for GtkToolButton takes the GtkToolButton* as the first parameter. This is different from the existing on_save_activate() handler. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index f03e94cd5fa3..4e21cf46ff01 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -376,6 +376,11 @@ static void on_load_clicked(GtkButton *button, gpointer user_data) on_load1_activate(NULL, user_data); } +static void on_save_clicked(GtkButton *button, gpointer user_data) +{ + on_save_activate(NULL, user_data); +} + static void on_single_clicked(GtkButton *button, gpointer user_data) { set_view_mode(SINGLE_VIEW); @@ -1225,7 +1230,7 @@ static void init_main_window(const gchar *glade_file) save_btn = glade_xml_get_widget(xml, "button3"); g_signal_connect(save_btn, "clicked", - G_CALLBACK(on_save_activate), NULL); + G_CALLBACK(on_save_clicked), NULL); style = gtk_widget_get_style(main_wnd); From 30dda0fdf7a6655bf180b61063087aed28812007 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:24 +0900 Subject: [PATCH 0482/2411] kconfig: gconf: use GtkFileChooser in on_load1_activate() gtk_file_selection_new() is deprecated, and gtk_file_chooser_dialog_new() should be used instead. [1] [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/docs/reference/gtk/tmpl/gtkfilesel.sgml?ref_type=tags#L156 Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/gconf.c | 58 ++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 4e21cf46ff01..20a20a5888b9 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -158,38 +158,38 @@ static void set_view_mode(enum view_mode mode) /* Menu & Toolbar Callbacks */ - -static void -load_filename(GtkFileSelection * file_selector, gpointer user_data) -{ - const gchar *fn; - - fn = gtk_file_selection_get_filename(GTK_FILE_SELECTION - (user_data)); - - if (conf_read(fn)) - text_insert_msg("Error", "Unable to load configuration !"); - else - display_tree_part(); -} - static void on_load1_activate(GtkMenuItem *menuitem, gpointer user_data) { - GtkWidget *fs; + GtkWidget *dialog; + GtkFileChooser *chooser; + gint res; - fs = gtk_file_selection_new("Load file..."); - g_signal_connect(GTK_OBJECT(GTK_FILE_SELECTION(fs)->ok_button), - "clicked", - G_CALLBACK(load_filename), (gpointer) fs); - g_signal_connect_swapped(GTK_OBJECT - (GTK_FILE_SELECTION(fs)->ok_button), - "clicked", G_CALLBACK(gtk_widget_destroy), - (gpointer) fs); - g_signal_connect_swapped(GTK_OBJECT - (GTK_FILE_SELECTION(fs)->cancel_button), - "clicked", G_CALLBACK(gtk_widget_destroy), - (gpointer) fs); - gtk_widget_show(fs); + dialog = gtk_file_chooser_dialog_new("Load file...", + GTK_WINDOW(user_data), + GTK_FILE_CHOOSER_ACTION_OPEN, + "_Cancel", GTK_RESPONSE_CANCEL, + "_Open", GTK_RESPONSE_ACCEPT, + NULL); + + chooser = GTK_FILE_CHOOSER(dialog); + gtk_file_chooser_set_filename(chooser, conf_get_configname()); + + res = gtk_dialog_run(GTK_DIALOG(dialog)); + if (res == GTK_RESPONSE_ACCEPT) { + char *filename; + + filename = gtk_file_chooser_get_filename(chooser); + + if (conf_read(filename)) + text_insert_msg("Error", + "Unable to load configuration!"); + else + display_tree_part(); + + g_free(filename); + } + + gtk_widget_destroy(GTK_WIDGET(dialog)); } static void on_save_activate(GtkMenuItem *menuitem, gpointer user_data) From fd7fd8024c32ae2037f98d53198e42d5c597ca0b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:25 +0900 Subject: [PATCH 0483/2411] kconfig: gconf: use GtkFileChooser in on_save_as1_activate() gtk_file_selection_new() is deprecated, and gtk_file_chooser_dialog_new() should be used instead. [1] [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/docs/reference/gtk/tmpl/gtkfilesel.sgml?ref_type=tags#L156 Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/gconf.c | 56 ++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 20a20a5888b9..82e8edb8a82c 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -199,38 +199,36 @@ static void on_save_activate(GtkMenuItem *menuitem, gpointer user_data) conf_write_autoconf(0); } - -static void -store_filename(GtkFileSelection * file_selector, gpointer user_data) -{ - const gchar *fn; - - fn = gtk_file_selection_get_filename(GTK_FILE_SELECTION - (user_data)); - - if (conf_write(fn)) - text_insert_msg("Error", "Unable to save configuration !"); - - gtk_widget_destroy(GTK_WIDGET(user_data)); -} - static void on_save_as1_activate(GtkMenuItem *menuitem, gpointer user_data) { - GtkWidget *fs; + GtkWidget *dialog; + GtkFileChooser *chooser; + gint res; - fs = gtk_file_selection_new("Save file as..."); - g_signal_connect(GTK_OBJECT(GTK_FILE_SELECTION(fs)->ok_button), - "clicked", - G_CALLBACK(store_filename), (gpointer) fs); - g_signal_connect_swapped(GTK_OBJECT - (GTK_FILE_SELECTION(fs)->ok_button), - "clicked", G_CALLBACK(gtk_widget_destroy), - (gpointer) fs); - g_signal_connect_swapped(GTK_OBJECT - (GTK_FILE_SELECTION(fs)->cancel_button), - "clicked", G_CALLBACK(gtk_widget_destroy), - (gpointer) fs); - gtk_widget_show(fs); + dialog = gtk_file_chooser_dialog_new("Save file as...", + GTK_WINDOW(user_data), + GTK_FILE_CHOOSER_ACTION_SAVE, + "_Cancel", GTK_RESPONSE_CANCEL, + "_Save", GTK_RESPONSE_ACCEPT, + NULL); + + chooser = GTK_FILE_CHOOSER(dialog); + gtk_file_chooser_set_filename(chooser, conf_get_configname()); + + res = gtk_dialog_run(GTK_DIALOG(dialog)); + if (res == GTK_RESPONSE_ACCEPT) { + char *filename; + + filename = gtk_file_chooser_get_filename(chooser); + + if (conf_write(filename)) + text_insert_msg("Error", + "Unable to save configuration !"); + + g_free(filename); + } + + gtk_widget_destroy(dialog); } static void on_show_name1_activate(GtkMenuItem *menuitem, gpointer user_data) From 9517f47dbf8ab7a7e554e7b34563982cfc63c366 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:26 +0900 Subject: [PATCH 0484/2411] kconfig: gconf: use GdkPixbuf in replace_button_icon() gdk_pixmap_create_from_xpm_d has been deprecated since version 2.22. Use a GdkPixbuf instead. You can use gdk_pixbuf_new_from_xpm_data() to create it. [1] [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/gdk/gdkpixmap.c#L742 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 82e8edb8a82c..2eac486cec5b 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -1103,17 +1103,16 @@ static void fixup_rootmenu(struct menu *menu) static void replace_button_icon(GladeXML *xml, GdkDrawable *window, GtkStyle *style, gchar *btn_name, gchar **xpm) { - GdkPixmap *pixmap; - GdkBitmap *mask; - GtkToolButton *button; + GdkPixbuf *pixbuf; GtkWidget *image; + GtkToolButton *button; - pixmap = gdk_pixmap_create_from_xpm_d(window, &mask, - &style->bg[GTK_STATE_NORMAL], - xpm); + pixbuf = gdk_pixbuf_new_from_xpm_data((const char **)xpm); + image = gtk_image_new_from_pixbuf(pixbuf); + g_object_unref(pixbuf); button = GTK_TOOL_BUTTON(glade_xml_get_widget(xml, btn_name)); - image = gtk_image_new_from_pixmap(pixmap, mask); + gtk_widget_show(image); gtk_tool_button_set_icon_widget(button, image); } From b8f660aabcf2dbbd7e8f84c6341e015124bbfc4e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:27 +0900 Subject: [PATCH 0485/2411] kconfig: gconf: refactor replace_button_icon() The "window" and "style" arguments for replace_button_icon() are now unused. Remove them and refactor the function accordingly. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 2eac486cec5b..045729d76feb 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -1100,21 +1100,17 @@ static void fixup_rootmenu(struct menu *menu) } /* Main Window Initialization */ -static void replace_button_icon(GladeXML *xml, GdkDrawable *window, - GtkStyle *style, gchar *btn_name, gchar **xpm) +static void replace_button_icon(GtkWidget *widget, const char * const xpm[]) { GdkPixbuf *pixbuf; GtkWidget *image; - GtkToolButton *button; pixbuf = gdk_pixbuf_new_from_xpm_data((const char **)xpm); image = gtk_image_new_from_pixbuf(pixbuf); g_object_unref(pixbuf); - button = GTK_TOOL_BUTTON(glade_xml_get_widget(xml, btn_name)); - gtk_widget_show(image); - gtk_tool_button_set_icon_widget(button, image); + gtk_tool_button_set_icon_widget(GTK_TOOL_BUTTON(widget), image); } static void init_main_window(const gchar *glade_file) @@ -1122,7 +1118,6 @@ static void init_main_window(const gchar *glade_file) GladeXML *xml; GtkWidget *widget; GtkTextBuffer *txtbuf; - GtkStyle *style; xml = glade_xml_new(glade_file, "window1", NULL); if (!xml) @@ -1229,25 +1224,20 @@ static void init_main_window(const gchar *glade_file) g_signal_connect(save_btn, "clicked", G_CALLBACK(on_save_clicked), NULL); - style = gtk_widget_get_style(main_wnd); - single_btn = glade_xml_get_widget(xml, "button4"); g_signal_connect(single_btn, "clicked", G_CALLBACK(on_single_clicked), NULL); - replace_button_icon(xml, main_wnd->window, style, - "button4", (gchar **) xpm_single_view); + replace_button_icon(single_btn, xpm_single_view); split_btn = glade_xml_get_widget(xml, "button5"); g_signal_connect(split_btn, "clicked", G_CALLBACK(on_split_clicked), NULL); - replace_button_icon(xml, main_wnd->window, style, - "button5", (gchar **) xpm_split_view); + replace_button_icon(split_btn, xpm_split_view); full_btn = glade_xml_get_widget(xml, "button6"); g_signal_connect(full_btn, "clicked", G_CALLBACK(on_full_clicked), NULL); - replace_button_icon(xml, main_wnd->window, style, - "button6", (gchar **) xpm_tree_view); + replace_button_icon(full_btn, xpm_tree_view); widget = glade_xml_get_widget(xml, "button7"); g_signal_connect(widget, "clicked", From 20f375cbfe4f7e3870226f68877c9285bd8401fe Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:28 +0900 Subject: [PATCH 0486/2411] kconfig: gconf: make introduction, about, license dialogs modal These are modal dialogs in xconfig. Make them modal in gconfig as well. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/gconf.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 045729d76feb..0e9e078f9c34 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -313,10 +313,8 @@ static void on_introduction1_activate(GtkMenuItem *menuitem, gpointer user_data) GTK_DIALOG_DESTROY_WITH_PARENT, GTK_MESSAGE_INFO, GTK_BUTTONS_CLOSE, "%s", intro_text); - g_signal_connect_swapped(GTK_OBJECT(dialog), "response", - G_CALLBACK(gtk_widget_destroy), - GTK_OBJECT(dialog)); - gtk_widget_show_all(dialog); + gtk_dialog_run(GTK_DIALOG(dialog)); + gtk_widget_destroy(dialog); } static void on_about1_activate(GtkMenuItem *menuitem, gpointer user_data) @@ -330,10 +328,8 @@ static void on_about1_activate(GtkMenuItem *menuitem, gpointer user_data) GTK_DIALOG_DESTROY_WITH_PARENT, GTK_MESSAGE_INFO, GTK_BUTTONS_CLOSE, "%s", about_text); - g_signal_connect_swapped(GTK_OBJECT(dialog), "response", - G_CALLBACK(gtk_widget_destroy), - GTK_OBJECT(dialog)); - gtk_widget_show_all(dialog); + gtk_dialog_run(GTK_DIALOG(dialog)); + gtk_widget_destroy(dialog); } static void on_license1_activate(GtkMenuItem *menuitem, gpointer user_data) @@ -348,10 +344,8 @@ static void on_license1_activate(GtkMenuItem *menuitem, gpointer user_data) GTK_DIALOG_DESTROY_WITH_PARENT, GTK_MESSAGE_INFO, GTK_BUTTONS_CLOSE, "%s", license_text); - g_signal_connect_swapped(GTK_OBJECT(dialog), "response", - G_CALLBACK(gtk_widget_destroy), - GTK_OBJECT(dialog)); - gtk_widget_show_all(dialog); + gtk_dialog_run(GTK_DIALOG(dialog)); + gtk_widget_destroy(dialog); } /* toolbar handlers */ From 1bd81df0b1cc8d17b7818889c4c1cdf53415e606 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:29 +0900 Subject: [PATCH 0487/2411] kconfig: gconf: remove global 'tree' variable Pass the tree store as a function parameter to make it clearer which tree is being updated. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 42 ++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 0e9e078f9c34..b0dffca142a0 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -43,7 +43,7 @@ static GtkWidget *save_menu_item; static GtkTextTag *tag1, *tag2; -static GtkTreeStore *tree1, *tree2, *tree; +static GtkTreeStore *tree1, *tree2; static GtkTreeModel *model1, *model2; static struct menu *current; // current node for SINGLE view @@ -57,7 +57,7 @@ enum { }; static void display_list(void); -static void display_tree(struct menu *menu); +static void display_tree(GtkTreeStore *store, struct menu *menu); static void display_tree_part(void); static void update_tree(struct menu *src, GtkTreeIter * dst); @@ -147,7 +147,7 @@ static void set_view_mode(enum view_mode mode) break; case FULL_VIEW: gtk_tree_store_clear(tree2); - display_tree(&rootmenu); + display_tree(tree2, &rootmenu); gtk_widget_set_sensitive(full_btn, FALSE); break; } @@ -273,7 +273,7 @@ static void on_set_option_mode1_activate(GtkMenuItem *menuitem, { opt_mode = OPT_NORMAL; gtk_tree_store_clear(tree2); - display_tree(&rootmenu); /* instead of update_tree to speed-up */ + display_tree(tree2, &rootmenu); /* instead of update_tree to speed-up */ } static void on_set_option_mode2_activate(GtkMenuItem *menuitem, @@ -281,7 +281,7 @@ static void on_set_option_mode2_activate(GtkMenuItem *menuitem, { opt_mode = OPT_ALL; gtk_tree_store_clear(tree2); - display_tree(&rootmenu); /* instead of update_tree to speed-up */ + display_tree(tree2, &rootmenu); /* instead of update_tree to speed-up */ } static void on_set_option_mode3_activate(GtkMenuItem *menuitem, @@ -289,7 +289,7 @@ static void on_set_option_mode3_activate(GtkMenuItem *menuitem, { opt_mode = OPT_PROMPT; gtk_tree_store_clear(tree2); - display_tree(&rootmenu); /* instead of update_tree to speed-up */ + display_tree(tree2, &rootmenu); /* instead of update_tree to speed-up */ } static void on_introduction1_activate(GtkMenuItem *menuitem, gpointer user_data) @@ -853,7 +853,8 @@ static gchar **fill_row(struct menu *menu) /* Set the node content with a row of strings */ -static void set_node(GtkTreeIter * node, struct menu *menu, gchar ** row) +static void set_node(GtkTreeStore *tree, GtkTreeIter *node, + struct menu *menu, gchar **row) { GdkColor color; gboolean success; @@ -977,7 +978,7 @@ static void update_tree(struct menu *src, GtkTreeIter * dst) gtk_tree_store_insert_before(tree2, child2, dst, sibling); - set_node(child2, menu1, fill_row(menu1)); + set_node(tree2, child2, menu1, fill_row(menu1)); if (menu2 == NULL) valid = TRUE; } else { // remove node @@ -991,7 +992,7 @@ static void update_tree(struct menu *src, GtkTreeIter * dst) goto reparse; // next child } } else if (sym && (child1->flags & MENU_CHANGED)) { - set_node(child2, menu1, fill_row(menu1)); + set_node(tree2, child2, menu1, fill_row(menu1)); } update_tree(child1, child2); @@ -1002,7 +1003,8 @@ static void update_tree(struct menu *src, GtkTreeIter * dst) /* Display the whole tree (single/split/full view) */ -static void _display_tree(struct menu *menu, GtkTreeIter *parent) +static void _display_tree(GtkTreeStore *tree, struct menu *menu, + GtkTreeIter *parent) { struct property *prop; struct menu *child; @@ -1030,7 +1032,7 @@ static void _display_tree(struct menu *menu, GtkTreeIter *parent) (opt_mode == OPT_PROMPT && menu_has_prompt(child)) || (opt_mode == OPT_ALL && menu_get_prompt(child))) { gtk_tree_store_append(tree, &iter, parent); - set_node(&iter, child, fill_row(child)); + set_node(tree, &iter, child, fill_row(child)); } if ((view_mode != FULL_VIEW) && (ptype == P_MENU) @@ -1044,13 +1046,13 @@ static void _display_tree(struct menu *menu, GtkTreeIter *parent) if (((view_mode == SINGLE_VIEW) && (menu->flags & MENU_ROOT)) || (view_mode == FULL_VIEW) || (view_mode == SPLIT_VIEW)) - _display_tree(child, &iter); + _display_tree(tree, child, &iter); } } -static void display_tree(struct menu *menu) +static void display_tree(GtkTreeStore *store, struct menu *menu) { - _display_tree(menu, NULL); + _display_tree(store, menu, NULL); } /* Display a part of the tree starting at current node (single/split view) */ @@ -1058,11 +1060,11 @@ static void display_tree_part(void) { gtk_tree_store_clear(tree2); if (view_mode == SINGLE_VIEW) - display_tree(current); + display_tree(tree2, current); else if (view_mode == SPLIT_VIEW) - display_tree(browsed); + display_tree(tree2, browsed); else if (view_mode == FULL_VIEW) - display_tree(&rootmenu); + display_tree(tree2, &rootmenu); gtk_tree_view_expand_all(GTK_TREE_VIEW(tree2_w)); } @@ -1071,10 +1073,8 @@ static void display_list(void) { gtk_tree_store_clear(tree1); - tree = tree1; - display_tree(&rootmenu); + display_tree(tree1, &rootmenu); gtk_tree_view_expand_all(GTK_TREE_VIEW(tree1_w)); - tree = tree2; } static void fixup_rootmenu(struct menu *menu) @@ -1259,7 +1259,7 @@ static void init_main_window(const gchar *glade_file) static void init_tree_model(void) { - tree = tree2 = gtk_tree_store_new(COL_NUMBER, + tree2 = gtk_tree_store_new(COL_NUMBER, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, From ecaa87d4e9c2820a376270955cd166cd77f82891 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:30 +0900 Subject: [PATCH 0488/2411] kconfig: gconf: merge 'current' and 'browsed' global variables The 'current' (SINGLE view) and 'browsed' (SPLIT_VIEW) variables serve similar purposes and are not needed at the same time. Merge them. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index b0dffca142a0..bebd18ae07b0 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -46,8 +46,7 @@ static GtkTextTag *tag1, *tag2; static GtkTreeStore *tree1, *tree2; static GtkTreeModel *model1, *model2; -static struct menu *current; // current node for SINGLE view -static struct menu *browsed; // browsed node for SPLIT view +static struct menu *browsed; // browsed menu for SINGLE/SPLIT view enum { COL_OPTION, COL_NAME, COL_NO, COL_MOD, COL_YES, COL_VALUE, @@ -136,7 +135,7 @@ static void set_view_mode(enum view_mode mode) switch (mode) { case SINGLE_VIEW: - current = &rootmenu; + browsed = &rootmenu; display_tree_part(); gtk_widget_set_sensitive(single_btn, FALSE); break; @@ -353,13 +352,13 @@ static void on_back_clicked(GtkButton *button, gpointer user_data) { enum prop_type ptype; - current = current->parent; - ptype = current->prompt ? current->prompt->type : P_UNKNOWN; + browsed = browsed->parent; + ptype = browsed->prompt ? browsed->prompt->type : P_UNKNOWN; if (ptype != P_MENU) - current = current->parent; + browsed = browsed->parent; display_tree_part(); - if (current == &rootmenu) + if (browsed == &rootmenu) gtk_widget_set_sensitive(back_btn, FALSE); } @@ -612,7 +611,7 @@ static gboolean on_treeview2_button_press_event(GtkWidget *widget, if (ptype == P_MENU && view_mode != FULL_VIEW && col == COL_OPTION) { // goes down into menu - current = menu; + browsed = menu; display_tree_part(); gtk_widget_set_sensitive(back_btn, TRUE); } else if (col == COL_OPTION) { @@ -711,14 +710,11 @@ static gboolean on_treeview1_button_press_event(GtkWidget *widget, gtk_tree_model_get_iter(model, &iter, path); gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); - if (event->type == GDK_2BUTTON_PRESS) { + if (event->type == GDK_2BUTTON_PRESS) toggle_sym_value(menu); - current = menu; - display_tree_part(); - } else { - browsed = menu; - display_tree_part(); - } + + browsed = menu; + display_tree_part(); gtk_tree_view_set_cursor(view, path, NULL, FALSE); gtk_widget_grab_focus(tree2_w); @@ -1012,7 +1008,7 @@ static void _display_tree(GtkTreeStore *tree, struct menu *menu, GtkTreeIter iter; if (menu == &rootmenu) - current = &rootmenu; + browsed = &rootmenu; for (child = menu->list; child; child = child->next) { prop = child->prompt; @@ -1059,9 +1055,7 @@ static void display_tree(GtkTreeStore *store, struct menu *menu) static void display_tree_part(void) { gtk_tree_store_clear(tree2); - if (view_mode == SINGLE_VIEW) - display_tree(tree2, current); - else if (view_mode == SPLIT_VIEW) + if (view_mode == SINGLE_VIEW || view_mode == SPLIT_VIEW) display_tree(tree2, browsed); else if (view_mode == FULL_VIEW) display_tree(tree2, &rootmenu); From ab026457d3f8132b62f6855840817467ea92671e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:31 +0900 Subject: [PATCH 0489/2411] kconfig: gconf: preserve menu selection when switching view mode Preserve the current menu selection when switching to a different view mode, as it improves usability. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 87 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 79 insertions(+), 8 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index bebd18ae07b0..eed6a10660eb 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -47,6 +47,7 @@ static GtkTreeStore *tree1, *tree2; static GtkTreeModel *model1, *model2; static struct menu *browsed; // browsed menu for SINGLE/SPLIT view +static struct menu *selected; // selected entry enum { COL_OPTION, COL_NAME, COL_NO, COL_MOD, COL_YES, COL_VALUE, @@ -114,6 +115,49 @@ static void text_insert_msg(const char *title, const char *message) NULL); } +static void _select_menu(GtkTreeView *view, GtkTreeModel *model, + GtkTreeIter *parent, struct menu *match) +{ + GtkTreeIter iter; + gboolean valid; + + valid = gtk_tree_model_iter_children(model, &iter, parent); + while (valid) { + struct menu *menu; + + gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); + + if (menu == match) { + GtkTreeSelection *selection; + GtkTreePath *path; + + /* + * Expand parents to reflect the selection, and + * scroll down to it. + */ + path = gtk_tree_model_get_path(model, &iter); + gtk_tree_view_expand_to_path(view, path); + gtk_tree_view_scroll_to_cell(view, path, NULL, TRUE, + 0.5, 0.0); + gtk_tree_path_free(path); + + selection = gtk_tree_view_get_selection(view); + gtk_tree_selection_select_iter(selection, &iter); + + text_insert_help(menu); + } + + _select_menu(view, model, &iter, match); + + valid = gtk_tree_model_iter_next(model, &iter); + } +} + +static void select_menu(GtkTreeView *view, struct menu *match) +{ + _select_menu(view, gtk_tree_view_get_model(view), NULL, match); +} + static void set_view_mode(enum view_mode mode) { view_mode = mode; @@ -135,24 +179,39 @@ static void set_view_mode(enum view_mode mode) switch (mode) { case SINGLE_VIEW: - browsed = &rootmenu; + if (selected) + browsed = menu_get_parent_menu(selected) ?: &rootmenu; + else + browsed = &rootmenu; display_tree_part(); + text_insert_msg("", ""); + select_menu(GTK_TREE_VIEW(tree2_w), selected); gtk_widget_set_sensitive(single_btn, FALSE); break; case SPLIT_VIEW: + browsed = selected; + while (browsed && !(browsed->flags & MENU_ROOT)) + browsed = browsed->parent; gtk_tree_store_clear(tree2); display_list(); + if (browsed) + display_tree(tree2, browsed); + text_insert_msg("", ""); + select_menu(GTK_TREE_VIEW(tree1_w), browsed); + select_menu(GTK_TREE_VIEW(tree2_w), selected); gtk_widget_set_sensitive(split_btn, FALSE); break; case FULL_VIEW: gtk_tree_store_clear(tree2); display_tree(tree2, &rootmenu); + text_insert_msg("", ""); + select_menu(GTK_TREE_VIEW(tree2_w), selected); gtk_widget_set_sensitive(full_btn, FALSE); break; } - if (mode != SINGLE_VIEW) - gtk_widget_set_sensitive(back_btn, FALSE); + gtk_widget_set_sensitive(back_btn, + mode == SINGLE_VIEW && browsed != &rootmenu); } /* Menu & Toolbar Callbacks */ @@ -604,6 +663,8 @@ static gboolean on_treeview2_button_press_event(GtkWidget *widget, return FALSE; gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); + selected = menu; + col = column2index(column); if (event->type == GDK_2BUTTON_PRESS) { enum prop_type ptype; @@ -713,8 +774,12 @@ static gboolean on_treeview1_button_press_event(GtkWidget *widget, if (event->type == GDK_2BUTTON_PRESS) toggle_sym_value(menu); - browsed = menu; - display_tree_part(); + selected = menu; + + if (menu->type == M_MENU) { + browsed = menu; + display_tree_part(); + } gtk_tree_view_set_cursor(view, path, NULL, FALSE); gtk_widget_grab_focus(tree2_w); @@ -1007,10 +1072,16 @@ static void _display_tree(GtkTreeStore *tree, struct menu *menu, enum prop_type ptype; GtkTreeIter iter; - if (menu == &rootmenu) - browsed = &rootmenu; - for (child = menu->list; child; child = child->next) { + /* + * REVISIT: + * menu_finalize() creates empty "if" entries. + * Do not confuse gtk_tree_model_get(), which would otherwise + * return "if" menu entry. + */ + if (child->type == M_IF) + continue; + prop = child->prompt; ptype = prop ? prop->type : P_UNKNOWN; From e3795479366833d576f8e096be8ef9f42f9d124e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:32 +0900 Subject: [PATCH 0490/2411] kconfig: gconf: use GtkTreeModelFilter to control row visibility Currently, update_tree() adds/removes entries to show/hide rows. This approach is extremely complicated. Use the tree model filter to control row visibility instead. Do not toggle the MENU_CHANGED flag, as it is hard to control this correctly. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 217 +++++++++++++++------------------------- 1 file changed, 80 insertions(+), 137 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index eed6a10660eb..e2b98b220513 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -59,7 +59,9 @@ enum { static void display_list(void); static void display_tree(GtkTreeStore *store, struct menu *menu); static void display_tree_part(void); -static void update_tree(struct menu *src, GtkTreeIter * dst); +static gchar **fill_row(struct menu *menu); +static void set_node(GtkTreeStore *tree, GtkTreeIter *node, + struct menu *menu, gchar **row); static void conf_changed(bool dirty) { @@ -158,6 +160,47 @@ static void select_menu(GtkTreeView *view, struct menu *match) _select_menu(view, gtk_tree_view_get_model(view), NULL, match); } +static void _update_row_visibility(GtkTreeView *view) +{ + GtkTreeModelFilter *filter = GTK_TREE_MODEL_FILTER(gtk_tree_view_get_model(view)); + + gtk_tree_model_filter_refilter(filter); +} + +static void update_row_visibility(void) +{ + if (view_mode == SPLIT_VIEW) + _update_row_visibility(GTK_TREE_VIEW(tree1_w)); + _update_row_visibility(GTK_TREE_VIEW(tree2_w)); +} + +static void _update_tree(GtkTreeStore *store, GtkTreeIter *parent) +{ + GtkTreeModel *model = GTK_TREE_MODEL(store); + GtkTreeIter iter; + gboolean valid; + + valid = gtk_tree_model_iter_children(model, &iter, parent); + while (valid) { + struct menu *menu; + + gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); + + if (menu) + set_node(store, &iter, menu, fill_row(menu)); + + _update_tree(store, &iter); + + valid = gtk_tree_model_iter_next(model, &iter); + } +} + +static void update_tree(GtkTreeStore *store) +{ + _update_tree(store, NULL); + update_row_visibility(); +} + static void set_view_mode(enum view_mode mode) { view_mode = mode; @@ -330,24 +373,21 @@ static void on_set_option_mode1_activate(GtkMenuItem *menuitem, gpointer user_data) { opt_mode = OPT_NORMAL; - gtk_tree_store_clear(tree2); - display_tree(tree2, &rootmenu); /* instead of update_tree to speed-up */ + update_row_visibility(); } static void on_set_option_mode2_activate(GtkMenuItem *menuitem, gpointer user_data) { opt_mode = OPT_ALL; - gtk_tree_store_clear(tree2); - display_tree(tree2, &rootmenu); /* instead of update_tree to speed-up */ + update_row_visibility(); } static void on_set_option_mode3_activate(GtkMenuItem *menuitem, gpointer user_data) { opt_mode = OPT_PROMPT; - gtk_tree_store_clear(tree2); - display_tree(tree2, &rootmenu); /* instead of update_tree to speed-up */ + update_row_visibility(); } static void on_introduction1_activate(GtkMenuItem *menuitem, gpointer user_data) @@ -561,7 +601,7 @@ static void renderer_edited(GtkCellRendererText * cell, sym_set_string_value(sym, new_def); - update_tree(&rootmenu, NULL); + update_tree(tree2); free: gtk_tree_path_free(path); @@ -592,9 +632,9 @@ static void change_sym_value(struct menu *menu, gint col) newval = yes; sym_set_tristate_value(sym, newval); if (view_mode == FULL_VIEW) - update_tree(&rootmenu, NULL); + update_tree(tree2); else if (view_mode == SPLIT_VIEW) { - update_tree(browsed, NULL); + update_tree(tree2); display_list(); } else if (view_mode == SINGLE_VIEW) @@ -615,9 +655,9 @@ static void toggle_sym_value(struct menu *menu) sym_toggle_tristate_value(menu->sym); if (view_mode == FULL_VIEW) - update_tree(&rootmenu, NULL); + update_tree(tree2); else if (view_mode == SPLIT_VIEW) { - update_tree(browsed, NULL); + update_tree(tree2); display_list(); } else if (view_mode == SINGLE_VIEW) @@ -844,7 +884,6 @@ static gchar **fill_row(struct menu *menu) row[COL_NAME] = g_strdup(sym->name); sym_calc_value(sym); - menu->flags &= ~MENU_CHANGED; if (sym_is_choice(sym)) { // parse childs for getting final value struct menu *child; @@ -949,120 +988,6 @@ static void set_node(GtkTreeStore *tree, GtkTreeIter *node, g_object_unref(pix); } -/* Find a node in the GTK+ tree */ -static GtkTreeIter found; - -/* - * Find a menu in the GtkTree starting at parent. - */ -static GtkTreeIter *gtktree_iter_find_node(GtkTreeIter *parent, - struct menu *tofind) -{ - GtkTreeIter iter; - GtkTreeIter *child = &iter; - gboolean valid; - GtkTreeIter *ret; - - valid = gtk_tree_model_iter_children(model2, child, parent); - while (valid) { - struct menu *menu; - - gtk_tree_model_get(model2, child, 6, &menu, -1); - - if (menu == tofind) { - memcpy(&found, child, sizeof(GtkTreeIter)); - return &found; - } - - ret = gtktree_iter_find_node(child, tofind); - if (ret) - return ret; - - valid = gtk_tree_model_iter_next(model2, child); - } - - return NULL; -} - - -/* - * Update the tree by adding/removing entries - * Does not change other nodes - */ -static void update_tree(struct menu *src, GtkTreeIter * dst) -{ - struct menu *child1; - GtkTreeIter iter, tmp; - GtkTreeIter *child2 = &iter; - gboolean valid; - GtkTreeIter *sibling; - struct symbol *sym; - struct menu *menu1, *menu2; - - valid = gtk_tree_model_iter_children(model2, child2, dst); - for (child1 = src->list; child1; child1 = child1->next) { - - sym = child1->sym; - - reparse: - menu1 = child1; - if (valid) - gtk_tree_model_get(model2, child2, COL_MENU, - &menu2, -1); - else - menu2 = NULL; // force adding of a first child - - if ((opt_mode == OPT_NORMAL && !menu_is_visible(child1)) || - (opt_mode == OPT_PROMPT && !menu_has_prompt(child1)) || - (opt_mode == OPT_ALL && !menu_get_prompt(child1))) { - - /* remove node */ - if (gtktree_iter_find_node(dst, menu1) != NULL) { - memcpy(&tmp, child2, sizeof(GtkTreeIter)); - valid = gtk_tree_model_iter_next(model2, - child2); - gtk_tree_store_remove(tree2, &tmp); - if (!valid) - return; /* next parent */ - else - goto reparse; /* next child */ - } else - continue; - } - - if (menu1 != menu2) { - if (gtktree_iter_find_node(dst, menu1) == NULL) { // add node - if (!valid && !menu2) - sibling = NULL; - else - sibling = child2; - gtk_tree_store_insert_before(tree2, - child2, - dst, sibling); - set_node(tree2, child2, menu1, fill_row(menu1)); - if (menu2 == NULL) - valid = TRUE; - } else { // remove node - memcpy(&tmp, child2, sizeof(GtkTreeIter)); - valid = gtk_tree_model_iter_next(model2, - child2); - gtk_tree_store_remove(tree2, &tmp); - if (!valid) - return; // next parent - else - goto reparse; // next child - } - } else if (sym && (child1->flags & MENU_CHANGED)) { - set_node(tree2, child2, menu1, fill_row(menu1)); - } - - update_tree(child1, child2); - - valid = gtk_tree_model_iter_next(model2, child2); - } -} - - /* Display the whole tree (single/split/full view) */ static void _display_tree(GtkTreeStore *tree, struct menu *menu, GtkTreeIter *parent) @@ -1085,8 +1010,6 @@ static void _display_tree(GtkTreeStore *tree, struct menu *menu, prop = child->prompt; ptype = prop ? prop->type : P_UNKNOWN; - menu->flags &= ~MENU_CHANGED; - if ((view_mode == SPLIT_VIEW) && !(child->flags & MENU_ROOT) && (tree == tree1)) continue; @@ -1095,12 +1018,8 @@ static void _display_tree(GtkTreeStore *tree, struct menu *menu, && (tree == tree2)) continue; - if ((opt_mode == OPT_NORMAL && menu_is_visible(child)) || - (opt_mode == OPT_PROMPT && menu_has_prompt(child)) || - (opt_mode == OPT_ALL && menu_get_prompt(child))) { - gtk_tree_store_append(tree, &iter, parent); - set_node(tree, &iter, child, fill_row(child)); - } + gtk_tree_store_append(tree, &iter, parent); + set_node(tree, &iter, child, fill_row(child)); if ((view_mode != FULL_VIEW) && (ptype == P_MENU) && (tree == tree2)) @@ -1322,6 +1241,20 @@ static void init_main_window(const gchar *glade_file) conf_set_changed_callback(conf_changed); } +static gboolean visible_func(GtkTreeModel *model, GtkTreeIter *iter, + gpointer data) +{ + struct menu *menu; + + gtk_tree_model_get(model, iter, COL_MENU, &menu, -1); + + if (!menu) + return FALSE; + + return menu_is_visible(menu) || opt_mode == OPT_ALL || + (opt_mode == OPT_PROMPT && menu_has_prompt(menu)); +} + static void init_tree_model(void) { tree2 = gtk_tree_store_new(COL_NUMBER, @@ -1353,8 +1286,13 @@ static void init_left_tree(void) GtkCellRenderer *renderer; GtkTreeSelection *sel; GtkTreeViewColumn *column; + GtkTreeModel *filter; - gtk_tree_view_set_model(view, model1); + filter = gtk_tree_model_filter_new(model1, NULL); + + gtk_tree_model_filter_set_visible_func(GTK_TREE_MODEL_FILTER(filter), + visible_func, NULL, NULL); + gtk_tree_view_set_model(view, filter); column = gtk_tree_view_column_new(); gtk_tree_view_append_column(view, column); @@ -1388,9 +1326,14 @@ static void init_right_tree(void) GtkCellRenderer *renderer; GtkTreeSelection *sel; GtkTreeViewColumn *column; + GtkTreeModel *filter; gint i; - gtk_tree_view_set_model(view, model2); + filter = gtk_tree_model_filter_new(model2, NULL); + + gtk_tree_model_filter_set_visible_func(GTK_TREE_MODEL_FILTER(filter), + visible_func, NULL, NULL); + gtk_tree_view_set_model(view, filter); column = gtk_tree_view_column_new(); gtk_tree_view_append_column(view, column); From b674af6ec8541151750b424520fc1acec32deae9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:33 +0900 Subject: [PATCH 0491/2411] kconfig: gconf: remove global 'model1' and 'model2' variables These variables are unnecessary because the current model can be retrieved using gtk_tree_view_get_model(). Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap --- scripts/kconfig/gconf.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index e2b98b220513..bf3151382e5d 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -44,7 +44,6 @@ static GtkWidget *save_menu_item; static GtkTextTag *tag1, *tag2; static GtkTreeStore *tree1, *tree2; -static GtkTreeModel *model1, *model2; static struct menu *browsed; // browsed menu for SINGLE/SPLIT view static struct menu *selected; // selected entry @@ -1266,7 +1265,6 @@ static void init_tree_model(void) G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN); - model2 = GTK_TREE_MODEL(tree2); tree1 = gtk_tree_store_new(COL_NUMBER, G_TYPE_STRING, G_TYPE_STRING, @@ -1277,7 +1275,6 @@ static void init_tree_model(void) G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN); - model1 = GTK_TREE_MODEL(tree1); } static void init_left_tree(void) @@ -1288,7 +1285,7 @@ static void init_left_tree(void) GtkTreeViewColumn *column; GtkTreeModel *filter; - filter = gtk_tree_model_filter_new(model1, NULL); + filter = gtk_tree_model_filter_new(GTK_TREE_MODEL(tree1), NULL); gtk_tree_model_filter_set_visible_func(GTK_TREE_MODEL_FILTER(filter), visible_func, NULL, NULL); @@ -1329,7 +1326,7 @@ static void init_right_tree(void) GtkTreeModel *filter; gint i; - filter = gtk_tree_model_filter_new(model2, NULL); + filter = gtk_tree_model_filter_new(GTK_TREE_MODEL(tree2), NULL); gtk_tree_model_filter_set_visible_func(GTK_TREE_MODEL_FILTER(filter), visible_func, NULL, NULL); From ed332436f3ca8a130ee9fc49d0882af5fbc344ef Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:34 +0900 Subject: [PATCH 0492/2411] kconfig: gconf: remove init_tree_model() Move the relevant code into init_left_tree() or init_right_tree(). Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index bf3151382e5d..8c024e93c302 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -1254,17 +1254,13 @@ static gboolean visible_func(GtkTreeModel *model, GtkTreeIter *iter, (opt_mode == OPT_PROMPT && menu_has_prompt(menu)); } -static void init_tree_model(void) +static void init_left_tree(void) { - tree2 = gtk_tree_store_new(COL_NUMBER, - G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_POINTER, GDK_TYPE_COLOR, - G_TYPE_BOOLEAN, GDK_TYPE_PIXBUF, - G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, - G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, - G_TYPE_BOOLEAN); + GtkTreeView *view = GTK_TREE_VIEW(tree1_w); + GtkCellRenderer *renderer; + GtkTreeSelection *sel; + GtkTreeViewColumn *column; + GtkTreeModel *filter; tree1 = gtk_tree_store_new(COL_NUMBER, G_TYPE_STRING, G_TYPE_STRING, @@ -1275,15 +1271,6 @@ static void init_tree_model(void) G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN); -} - -static void init_left_tree(void) -{ - GtkTreeView *view = GTK_TREE_VIEW(tree1_w); - GtkCellRenderer *renderer; - GtkTreeSelection *sel; - GtkTreeViewColumn *column; - GtkTreeModel *filter; filter = gtk_tree_model_filter_new(GTK_TREE_MODEL(tree1), NULL); @@ -1326,6 +1313,16 @@ static void init_right_tree(void) GtkTreeModel *filter; gint i; + tree2 = gtk_tree_store_new(COL_NUMBER, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_POINTER, GDK_TYPE_COLOR, + G_TYPE_BOOLEAN, GDK_TYPE_PIXBUF, + G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, + G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, + G_TYPE_BOOLEAN); + filter = gtk_tree_model_filter_new(GTK_TREE_MODEL(tree2), NULL); gtk_tree_model_filter_set_visible_func(GTK_TREE_MODEL_FILTER(filter), @@ -1448,7 +1445,6 @@ int main(int ac, char *av[]) /* Load the interface and connect signals */ init_main_window(glade_file); - init_tree_model(); init_left_tree(); init_right_tree(); From 64285dc5c41fc7a031695c2c286a2bfef9eaf2c6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:35 +0900 Subject: [PATCH 0493/2411] kconfig: gconf: inline fill_row() into set_node() The row[] array is used to prepare data passed to set_node(), but this indirection is unnecessary. Squash fill_row() into set_node() and call gtk_tree_store_set() directly. Also, calling gdk_pixbuf_new_from_xpm_data() for every row is inefficient. Call it once and store the resulting pixbuf in a global variable. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 272 ++++++++++++++++------------------------ 1 file changed, 106 insertions(+), 166 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 8c024e93c302..9df32f47bf6b 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -44,6 +44,7 @@ static GtkWidget *save_menu_item; static GtkTextTag *tag1, *tag2; static GtkTreeStore *tree1, *tree2; +static GdkPixbuf *pix_menu; static struct menu *browsed; // browsed menu for SINGLE/SPLIT view static struct menu *selected; // selected entry @@ -58,9 +59,6 @@ enum { static void display_list(void); static void display_tree(GtkTreeStore *store, struct menu *menu); static void display_tree_part(void); -static gchar **fill_row(struct menu *menu); -static void set_node(GtkTreeStore *tree, GtkTreeIter *node, - struct menu *menu, gchar **row); static void conf_changed(bool dirty) { @@ -173,6 +171,104 @@ static void update_row_visibility(void) _update_row_visibility(GTK_TREE_VIEW(tree2_w)); } +static void set_node(GtkTreeStore *tree, GtkTreeIter *node, struct menu *menu) +{ + struct symbol *sym = menu->sym; + tristate val; + gchar *option; + const gchar *_no = ""; + const gchar *_mod = ""; + const gchar *_yes = ""; + const gchar *value = ""; + GdkColor color; + gboolean editable = FALSE; + gboolean btnvis = FALSE; + + option = g_strdup_printf("%s %s %s %s", + menu->type == M_COMMENT ? "***" : "", + menu_get_prompt(menu), + menu->type == M_COMMENT ? "***" : "", + sym && !sym_has_value(sym) ? "(NEW)" : ""); + + gdk_color_parse(menu_is_visible(menu) ? "Black" : "DarkGray", &color); + + if (!sym) + goto set; + + sym_calc_value(sym); + + if (menu->type == M_CHOICE) { // parse children to get a final value + struct symbol *def_sym = sym_calc_choice(menu); + struct menu *def_menu = NULL; + + for (struct menu *child = menu->list; child; child = child->next) { + if (menu_is_visible(child) && child->sym == def_sym) + def_menu = child; + } + + if (def_menu) + value = menu_get_prompt(def_menu); + + goto set; + } + + switch (sym_get_type(sym)) { + case S_BOOLEAN: + case S_TRISTATE: + + btnvis = TRUE; + + val = sym_get_tristate_value(sym); + switch (val) { + case no: + _no = "N"; + value = "N"; + break; + case mod: + _mod = "M"; + value = "M"; + break; + case yes: + _yes = "Y"; + value = "Y"; + break; + } + + if (val != no && sym_tristate_within_range(sym, no)) + _no = "_"; + if (val != mod && sym_tristate_within_range(sym, mod)) + _mod = "_"; + if (val != yes && sym_tristate_within_range(sym, yes)) + _yes = "_"; + break; + default: + value = sym_get_string_value(sym); + editable = TRUE; + break; + } + +set: + gtk_tree_store_set(tree, node, + COL_OPTION, option, + COL_NAME, sym ? sym->name : "", + COL_NO, _no, + COL_MOD, _mod, + COL_YES, _yes, + COL_VALUE, value, + COL_MENU, (gpointer) menu, + COL_COLOR, &color, + COL_EDIT, editable, + COL_PIXBUF, pix_menu, + COL_PIXVIS, view_mode == SINGLE_VIEW && menu->type == M_MENU, + COL_BTNVIS, btnvis, + COL_BTNACT, _yes[0] == 'Y', + COL_BTNINC, _mod[0] == 'M', + COL_BTNRAD, sym && sym_is_choice_value(sym), + -1); + + g_free(option); +} + static void _update_tree(GtkTreeStore *store, GtkTreeIter *parent) { GtkTreeModel *model = GTK_TREE_MODEL(store); @@ -186,7 +282,7 @@ static void _update_tree(GtkTreeStore *store, GtkTreeIter *parent) gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); if (menu) - set_node(store, &iter, menu, fill_row(menu)); + set_node(store, &iter, menu); _update_tree(store, &iter); @@ -565,6 +661,9 @@ static gboolean on_window1_delete_event(GtkWidget *widget, GdkEvent *event, gtk_widget_destroy(dialog); + if (!ret) + g_object_unref(pix_menu); + return ret; } @@ -826,167 +925,6 @@ static gboolean on_treeview1_button_press_event(GtkWidget *widget, return FALSE; } - -/* Fill a row of strings */ -static gchar **fill_row(struct menu *menu) -{ - static gchar *row[COL_NUMBER]; - struct symbol *sym = menu->sym; - const char *def; - int stype; - tristate val; - enum prop_type ptype; - int i; - - for (i = COL_OPTION; i <= COL_COLOR; i++) - g_free(row[i]); - bzero(row, sizeof(row)); - - ptype = menu->prompt ? menu->prompt->type : P_UNKNOWN; - - row[COL_OPTION] = - g_strdup_printf("%s %s %s %s", - ptype == P_COMMENT ? "***" : "", - menu_get_prompt(menu), - ptype == P_COMMENT ? "***" : "", - sym && !sym_has_value(sym) ? "(NEW)" : ""); - - if (opt_mode == OPT_ALL && !menu_is_visible(menu)) - row[COL_COLOR] = g_strdup("DarkGray"); - else if (opt_mode == OPT_PROMPT && - menu_has_prompt(menu) && !menu_is_visible(menu)) - row[COL_COLOR] = g_strdup("DarkGray"); - else - row[COL_COLOR] = g_strdup("Black"); - - switch (ptype) { - case P_MENU: - row[COL_PIXBUF] = (gchar *) xpm_menu; - if (view_mode == SINGLE_VIEW) - row[COL_PIXVIS] = GINT_TO_POINTER(TRUE); - row[COL_BTNVIS] = GINT_TO_POINTER(FALSE); - break; - case P_COMMENT: - row[COL_PIXBUF] = (gchar *) xpm_void; - row[COL_PIXVIS] = GINT_TO_POINTER(FALSE); - row[COL_BTNVIS] = GINT_TO_POINTER(FALSE); - break; - default: - row[COL_PIXBUF] = (gchar *) xpm_void; - row[COL_PIXVIS] = GINT_TO_POINTER(FALSE); - row[COL_BTNVIS] = GINT_TO_POINTER(TRUE); - break; - } - - if (!sym) - return row; - row[COL_NAME] = g_strdup(sym->name); - - sym_calc_value(sym); - - if (sym_is_choice(sym)) { // parse childs for getting final value - struct menu *child; - struct symbol *def_sym = sym_calc_choice(menu); - struct menu *def_menu = NULL; - - for (child = menu->list; child; child = child->next) { - if (menu_is_visible(child) - && child->sym == def_sym) - def_menu = child; - } - - if (def_menu) - row[COL_VALUE] = - g_strdup(menu_get_prompt(def_menu)); - - row[COL_BTNVIS] = GINT_TO_POINTER(FALSE); - return row; - } - if (sym_is_choice_value(sym)) - row[COL_BTNRAD] = GINT_TO_POINTER(TRUE); - - stype = sym_get_type(sym); - switch (stype) { - case S_BOOLEAN: - case S_TRISTATE: - val = sym_get_tristate_value(sym); - switch (val) { - case no: - row[COL_NO] = g_strdup("N"); - row[COL_VALUE] = g_strdup("N"); - row[COL_BTNACT] = GINT_TO_POINTER(FALSE); - row[COL_BTNINC] = GINT_TO_POINTER(FALSE); - break; - case mod: - row[COL_MOD] = g_strdup("M"); - row[COL_VALUE] = g_strdup("M"); - row[COL_BTNINC] = GINT_TO_POINTER(TRUE); - break; - case yes: - row[COL_YES] = g_strdup("Y"); - row[COL_VALUE] = g_strdup("Y"); - row[COL_BTNACT] = GINT_TO_POINTER(TRUE); - row[COL_BTNINC] = GINT_TO_POINTER(FALSE); - break; - } - - if (val != no && sym_tristate_within_range(sym, no)) - row[COL_NO] = g_strdup("_"); - if (val != mod && sym_tristate_within_range(sym, mod)) - row[COL_MOD] = g_strdup("_"); - if (val != yes && sym_tristate_within_range(sym, yes)) - row[COL_YES] = g_strdup("_"); - break; - case S_INT: - case S_HEX: - case S_STRING: - def = sym_get_string_value(sym); - row[COL_VALUE] = g_strdup(def); - row[COL_EDIT] = GINT_TO_POINTER(TRUE); - row[COL_BTNVIS] = GINT_TO_POINTER(FALSE); - break; - } - - return row; -} - - -/* Set the node content with a row of strings */ -static void set_node(GtkTreeStore *tree, GtkTreeIter *node, - struct menu *menu, gchar **row) -{ - GdkColor color; - gboolean success; - GdkPixbuf *pix; - - pix = gdk_pixbuf_new_from_xpm_data((const char **) - row[COL_PIXBUF]); - - gdk_color_parse(row[COL_COLOR], &color); - gdk_colormap_alloc_colors(gdk_colormap_get_system(), &color, 1, - FALSE, FALSE, &success); - - gtk_tree_store_set(tree, node, - COL_OPTION, row[COL_OPTION], - COL_NAME, row[COL_NAME], - COL_NO, row[COL_NO], - COL_MOD, row[COL_MOD], - COL_YES, row[COL_YES], - COL_VALUE, row[COL_VALUE], - COL_MENU, (gpointer) menu, - COL_COLOR, &color, - COL_EDIT, GPOINTER_TO_INT(row[COL_EDIT]), - COL_PIXBUF, pix, - COL_PIXVIS, GPOINTER_TO_INT(row[COL_PIXVIS]), - COL_BTNVIS, GPOINTER_TO_INT(row[COL_BTNVIS]), - COL_BTNACT, GPOINTER_TO_INT(row[COL_BTNACT]), - COL_BTNINC, GPOINTER_TO_INT(row[COL_BTNINC]), - COL_BTNRAD, GPOINTER_TO_INT(row[COL_BTNRAD]), - -1); - - g_object_unref(pix); -} - /* Display the whole tree (single/split/full view) */ static void _display_tree(GtkTreeStore *tree, struct menu *menu, GtkTreeIter *parent) @@ -1018,7 +956,7 @@ static void _display_tree(GtkTreeStore *tree, struct menu *menu, continue; gtk_tree_store_append(tree, &iter, parent); - set_node(tree, &iter, child, fill_row(child)); + set_node(tree, &iter, child); if ((view_mode != FULL_VIEW) && (ptype == P_MENU) && (tree == tree2)) @@ -1393,6 +1331,8 @@ static void init_right_tree(void) g_signal_connect(G_OBJECT(renderer), "edited", G_CALLBACK(renderer_edited), tree2_w); + pix_menu = gdk_pixbuf_new_from_xpm_data((const char **)xpm_menu); + for (i = 0; i < COL_VALUE; i++) { column = gtk_tree_view_get_column(view, i); gtk_tree_view_column_set_resizable(column, TRUE); From 035c2f56f57432caa78378e3ab498a5fb9bd276b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:36 +0900 Subject: [PATCH 0494/2411] kconfig: gconf: do not reconstruct tree store when a symbol is changed There is no need to reconstruct the entire tree store when a symbol's value changes. Simply call gtk_tree_store_set() to update the row data. Introduce update_trees() to factor out the common update logic. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 9df32f47bf6b..73736f79ddcb 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -296,6 +296,13 @@ static void update_tree(GtkTreeStore *store) update_row_visibility(); } +static void update_trees(void) +{ + if (view_mode == SPLIT_VIEW) + update_tree(tree1); + update_tree(tree2); +} + static void set_view_mode(enum view_mode mode) { view_mode = mode; @@ -380,7 +387,7 @@ static void on_load1_activate(GtkMenuItem *menuitem, gpointer user_data) text_insert_msg("Error", "Unable to load configuration!"); else - display_tree_part(); + update_trees(); g_free(filename); } @@ -699,7 +706,7 @@ static void renderer_edited(GtkCellRendererText * cell, sym_set_string_value(sym, new_def); - update_tree(tree2); + update_trees(); free: gtk_tree_path_free(path); @@ -729,14 +736,7 @@ static void change_sym_value(struct menu *menu, gint col) if (!sym_tristate_within_range(sym, newval)) newval = yes; sym_set_tristate_value(sym, newval); - if (view_mode == FULL_VIEW) - update_tree(tree2); - else if (view_mode == SPLIT_VIEW) { - update_tree(tree2); - display_list(); - } - else if (view_mode == SINGLE_VIEW) - display_tree_part(); //fixme: keep exp/coll + update_trees(); break; case S_INT: case S_HEX: @@ -752,14 +752,7 @@ static void toggle_sym_value(struct menu *menu) return; sym_toggle_tristate_value(menu->sym); - if (view_mode == FULL_VIEW) - update_tree(tree2); - else if (view_mode == SPLIT_VIEW) { - update_tree(tree2); - display_list(); - } - else if (view_mode == SINGLE_VIEW) - display_tree_part(); //fixme: keep exp/coll + update_trees(); } static gint column2index(GtkTreeViewColumn * column) From 063a274a5e297720e18b3a1d7bbfe2d039e12192 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:37 +0900 Subject: [PATCH 0495/2411] kconfig: gconf: inline display_list() into set_view_mode() This function is now only called by set_view_mode(), so inline it for simplicity. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 73736f79ddcb..3c2e6be30c00 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -56,7 +56,6 @@ enum { COL_NUMBER }; -static void display_list(void); static void display_tree(GtkTreeStore *store, struct menu *menu); static void display_tree_part(void); @@ -337,8 +336,10 @@ static void set_view_mode(enum view_mode mode) browsed = selected; while (browsed && !(browsed->flags & MENU_ROOT)) browsed = browsed->parent; + gtk_tree_store_clear(tree1); + display_tree(tree1, &rootmenu); + gtk_tree_view_expand_all(GTK_TREE_VIEW(tree1_w)); gtk_tree_store_clear(tree2); - display_list(); if (browsed) display_tree(tree2, browsed); text_insert_msg("", ""); @@ -982,15 +983,6 @@ static void display_tree_part(void) gtk_tree_view_expand_all(GTK_TREE_VIEW(tree2_w)); } -/* Display the list in the left frame (split view) */ -static void display_list(void) -{ - gtk_tree_store_clear(tree1); - - display_tree(tree1, &rootmenu); - gtk_tree_view_expand_all(GTK_TREE_VIEW(tree1_w)); -} - static void fixup_rootmenu(struct menu *menu) { struct menu *child; From bf5792da5ac14c5e95f1e8612df70096ee5a44d1 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:38 +0900 Subject: [PATCH 0496/2411] kconfig: gconf: remove dead code in display_tree_part() This function is no longer called in the FULL_VIEW mode, so remove the dead code. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 3c2e6be30c00..6afdba85158a 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -976,10 +976,7 @@ static void display_tree(GtkTreeStore *store, struct menu *menu) static void display_tree_part(void) { gtk_tree_store_clear(tree2); - if (view_mode == SINGLE_VIEW || view_mode == SPLIT_VIEW) - display_tree(tree2, browsed); - else if (view_mode == FULL_VIEW) - display_tree(tree2, &rootmenu); + display_tree(tree2, browsed); gtk_tree_view_expand_all(GTK_TREE_VIEW(tree2_w)); } From 475c878f971661511fb3911af96c0ee0cb533527 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:42 +0900 Subject: [PATCH 0497/2411] kconfig: gconf: replace GDK_space with GDK_KEY_space In GTK3, keysyms changed to have a KEY_ prefix. [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/gdk/gdkkeysyms-compat.h#L24 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 6afdba85158a..b24d02972090 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -841,7 +841,7 @@ static gboolean on_treeview2_key_press_event(GtkWidget *widget, if (path == NULL) return FALSE; - if (event->keyval == GDK_space) { + if (event->keyval == GDK_KEY_space) { if (gtk_tree_view_row_expanded(view, path)) gtk_tree_view_collapse_row(view, path); else From 7783290143b37c568538d8e699d81d1e6c8af72b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:43 +0900 Subject: [PATCH 0498/2411] kconfig: gconf: replace GTK_STOCK_{OK,NO,CANCEL} These are deprecated with GTK 3.10. [1] Use "_OK", "_no", "_Cancel". [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/3.10.0/gtk/deprecated/gtkstock.h#L827 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index b24d02972090..085a06667a21 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -640,11 +640,11 @@ static gboolean on_window1_delete_event(GtkWidget *widget, GdkEvent *event, (GtkDialogFlags) (GTK_DIALOG_MODAL | GTK_DIALOG_DESTROY_WITH_PARENT), - GTK_STOCK_OK, + "_OK", GTK_RESPONSE_YES, - GTK_STOCK_NO, + "_No", GTK_RESPONSE_NO, - GTK_STOCK_CANCEL, + "_Cancel", GTK_RESPONSE_CANCEL, NULL); gtk_dialog_set_default_response(GTK_DIALOG(dialog), GTK_RESPONSE_CANCEL); From ad452c27aeb80d1c3ee449250c3f790e7bd8ffaa Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:44 +0900 Subject: [PATCH 0499/2411] kconfig: gconf: remove "tooltips" property from glade The tips are still displayed without this. This property does not exist in GtkBuilder with GTK 3. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.glade | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/kconfig/gconf.glade b/scripts/kconfig/gconf.glade index 8519104a3c2b..c0ada331a5bf 100644 --- a/scripts/kconfig/gconf.glade +++ b/scripts/kconfig/gconf.glade @@ -225,7 +225,6 @@ True GTK_ORIENTATION_HORIZONTAL GTK_TOOLBAR_BOTH - True True From 9d0e47c4c879dfbaa1f407ecbd4176682f711871 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:45 +0900 Subject: [PATCH 0500/2411] kconfig: gconf: replace "tooltip" property with "tooltip-text" This is no longer available in GTK 3. Use "tooltip-text" instead. Also reword "Goes up of one level" to "Goes up one level" while I am here. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.glade | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/scripts/kconfig/gconf.glade b/scripts/kconfig/gconf.glade index c0ada331a5bf..35d5257f33c9 100644 --- a/scripts/kconfig/gconf.glade +++ b/scripts/kconfig/gconf.glade @@ -40,7 +40,7 @@ True - Load a config file + Load a config file _Load True @@ -50,7 +50,7 @@ True - Save the config in .config + Save the config in .config _Save True @@ -60,7 +60,7 @@ True - Save the config in a file + Save the config in a file Save _as True @@ -97,7 +97,7 @@ True - Show name + Show name Show _name True False @@ -107,7 +107,7 @@ True - Show range (Y/M/N) + Show range (Y/M/N) Show _range True False @@ -117,7 +117,7 @@ True - Show value of the option + Show value of the option Show _data True False @@ -133,7 +133,7 @@ True - Show normal options + Show normal options Show normal options True True @@ -143,7 +143,7 @@ True - Show all options + Show all options Show all _options True False @@ -154,7 +154,7 @@ True - Show all options with prompts + Show all options with prompts Show all prompt options True False @@ -230,7 +230,7 @@ True - Goes up of one level (single view) + Goes up one level (single view) Back True gtk-undo @@ -266,7 +266,7 @@ True - Load a config file + Load a config file Load True gtk-open @@ -283,7 +283,7 @@ True - Save a config file + Save a config file Save True gtk-save @@ -319,7 +319,7 @@ True - Single view + Single view Single True gtk-missing-image @@ -336,7 +336,7 @@ True - Split view + Split view Split True gtk-missing-image @@ -353,7 +353,7 @@ True - Full view + Full view Full True gtk-missing-image @@ -389,7 +389,7 @@ True - Collapse the whole tree in the right frame + Collapse the whole tree in the right frame Collapse True gtk-remove @@ -406,7 +406,7 @@ True - Expand the whole tree in the right frame + Expand the whole tree in the right frame Expand True gtk-add From ea1438f720aa2fa287237fbeb7e76a1e83a10af8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:46 +0900 Subject: [PATCH 0501/2411] kconfig: gconf: remove unnecessary default message in text view This message looks odd because it is displayed when nothing is selected. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.glade | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/kconfig/gconf.glade b/scripts/kconfig/gconf.glade index 35d5257f33c9..488342e6fce1 100644 --- a/scripts/kconfig/gconf.glade +++ b/scripts/kconfig/gconf.glade @@ -517,7 +517,6 @@ 0 0 0 - Sorry, no help available for this option yet. From 07944f94fc8c02344f283e461b1ea817a9108e17 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:47 +0900 Subject: [PATCH 0502/2411] kconfig: gconf: use gtk_check_menu_item_get_active() accessor GTK 3 removes many implementation details and struct members from its public headers. Use the gtk_check_menu_item_get_active() accessor. [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/docs/reference/gtk/compiling.sgml#L85 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 085a06667a21..a02078926274 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -439,7 +439,7 @@ static void on_show_name1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkTreeViewColumn *col; - show_name = GTK_CHECK_MENU_ITEM(menuitem)->active; + show_name = gtk_check_menu_item_get_active(GTK_CHECK_MENU_ITEM(menuitem)); col = gtk_tree_view_get_column(GTK_TREE_VIEW(tree2_w), COL_NAME); if (col) gtk_tree_view_column_set_visible(col, show_name); @@ -449,7 +449,7 @@ static void on_show_range1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkTreeViewColumn *col; - show_range = GTK_CHECK_MENU_ITEM(menuitem)->active; + show_range = gtk_check_menu_item_get_active(GTK_CHECK_MENU_ITEM(menuitem)); col = gtk_tree_view_get_column(GTK_TREE_VIEW(tree2_w), COL_NO); if (col) gtk_tree_view_column_set_visible(col, show_range); @@ -466,7 +466,7 @@ static void on_show_data1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkTreeViewColumn *col; - show_value = GTK_CHECK_MENU_ITEM(menuitem)->active; + show_value = gtk_check_menu_item_get_active(GTK_CHECK_MENU_ITEM(menuitem)); col = gtk_tree_view_get_column(GTK_TREE_VIEW(tree2_w), COL_VALUE); if (col) gtk_tree_view_column_set_visible(col, show_value); From c34d632370592cb503991728afdf5287b2d1f7ed Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:48 +0900 Subject: [PATCH 0503/2411] kconfig: gconf: use gtk_dialog_get_content_area() accessor GTK 3 removes many implementation details and struct members from its public headers. Use the gtk_check_menu_item_get_active() accessor. [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/docs/reference/gtk/compiling.sgml#L85 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index a02078926274..28c4b5b37448 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -628,7 +628,7 @@ static void on_window1_size_request(GtkWidget *widget, static gboolean on_window1_delete_event(GtkWidget *widget, GdkEvent *event, gpointer user_data) { - GtkWidget *dialog, *label; + GtkWidget *dialog, *label, *content_area; gint result; gint ret = FALSE; @@ -650,7 +650,8 @@ static gboolean on_window1_delete_event(GtkWidget *widget, GdkEvent *event, GTK_RESPONSE_CANCEL); label = gtk_label_new("\nSave configuration ?\n"); - gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->vbox), label); + content_area = gtk_dialog_get_content_area(GTK_DIALOG(dialog)); + gtk_container_add(GTK_CONTAINER(content_area), label); gtk_widget_show(label); result = gtk_dialog_run(GTK_DIALOG(dialog)); From 9e0bd6db622c7c19aec5a8b248bb34493c9998e6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:49 +0900 Subject: [PATCH 0504/2411] kconfig: gconf: remove GtkHandleBox from glade GtkHandleBox is deprecated with GTK 3.4. [1] [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/3.4.0/gtk/deprecated/gtkhandlebox.c#L426 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.glade | 9 --------- 1 file changed, 9 deletions(-) diff --git a/scripts/kconfig/gconf.glade b/scripts/kconfig/gconf.glade index 488342e6fce1..cd714e64cff1 100644 --- a/scripts/kconfig/gconf.glade +++ b/scripts/kconfig/gconf.glade @@ -214,13 +214,6 @@ - - True - GTK_SHADOW_OUT - GTK_POS_LEFT - GTK_POS_TOP - - True GTK_ORIENTATION_HORIZONTAL @@ -420,8 +413,6 @@ - - 0 False From 93e20e2b7a556e7c1699e48ebd4dd6b2ce929d43 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 24 Jun 2025 05:11:14 +0000 Subject: [PATCH 0505/2411] pinctrl: renesas: Sort Renesas Kconfig configs Current Renesas Kconfig is randomly arranged. Let's sort it by alphabetical/number order, same as Makefile. Signed-off-by: Kuninori Morimoto Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/87cyatrafh.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Geert Uytterhoeven --- drivers/pinctrl/renesas/Kconfig | 236 ++++++++++++++++---------------- 1 file changed, 118 insertions(+), 118 deletions(-) diff --git a/drivers/pinctrl/renesas/Kconfig b/drivers/pinctrl/renesas/Kconfig index e16034fc1bbf..5a30134edd54 100644 --- a/drivers/pinctrl/renesas/Kconfig +++ b/drivers/pinctrl/renesas/Kconfig @@ -86,16 +86,52 @@ config PINCTRL_PFC_EMEV2 bool "pin control support for Emma Mobile EV2" if COMPILE_TEST select PINCTRL_SH_PFC -config PINCTRL_PFC_R8A77995 - bool "pin control support for R-Car D3" if COMPILE_TEST +config PINCTRL_PFC_R8A73A4 + bool "pin control support for R-Mobile APE6" if COMPILE_TEST + select PINCTRL_SH_PFC_GPIO + +config PINCTRL_PFC_R8A7740 + bool "pin control support for R-Mobile A1" if COMPILE_TEST + select PINCTRL_SH_PFC_GPIO + +config PINCTRL_PFC_R8A7742 + bool "pin control support for RZ/G1H" if COMPILE_TEST select PINCTRL_SH_PFC -config PINCTRL_PFC_R8A7794 - bool "pin control support for R-Car E2" if COMPILE_TEST +config PINCTRL_PFC_R8A7743 + bool "pin control support for RZ/G1M" if COMPILE_TEST select PINCTRL_SH_PFC -config PINCTRL_PFC_R8A77990 - bool "pin control support for R-Car E3" if COMPILE_TEST +config PINCTRL_PFC_R8A7744 + bool "pin control support for RZ/G1N" if COMPILE_TEST + select PINCTRL_SH_PFC + +config PINCTRL_PFC_R8A7745 + bool "pin control support for RZ/G1E" if COMPILE_TEST + select PINCTRL_SH_PFC + +config PINCTRL_PFC_R8A77470 + bool "pin control support for RZ/G1C" if COMPILE_TEST + select PINCTRL_SH_PFC + +config PINCTRL_PFC_R8A774A1 + bool "pin control support for RZ/G2M" if COMPILE_TEST + select PINCTRL_SH_PFC + +config PINCTRL_PFC_R8A774B1 + bool "pin control support for RZ/G2N" if COMPILE_TEST + select PINCTRL_SH_PFC + +config PINCTRL_PFC_R8A774C0 + bool "pin control support for RZ/G2E" if COMPILE_TEST + select PINCTRL_SH_PFC + +config PINCTRL_PFC_R8A774E1 + bool "pin control support for RZ/G2H" if COMPILE_TEST + select PINCTRL_SH_PFC + +config PINCTRL_PFC_R8A7778 + bool "pin control support for R-Car M1A" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A7779 @@ -106,24 +142,24 @@ config PINCTRL_PFC_R8A7790 bool "pin control support for R-Car H2" if COMPILE_TEST select PINCTRL_SH_PFC -config PINCTRL_PFC_R8A77951 - bool "pin control support for R-Car H3 ES2.0+" if COMPILE_TEST +config PINCTRL_PFC_R8A7791 + bool "pin control support for R-Car M2-W" if COMPILE_TEST select PINCTRL_SH_PFC -config PINCTRL_PFC_R8A7778 - bool "pin control support for R-Car M1A" if COMPILE_TEST +config PINCTRL_PFC_R8A7792 + bool "pin control support for R-Car V2H" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A7793 bool "pin control support for R-Car M2-N" if COMPILE_TEST select PINCTRL_SH_PFC -config PINCTRL_PFC_R8A7791 - bool "pin control support for R-Car M2-W" if COMPILE_TEST +config PINCTRL_PFC_R8A7794 + bool "pin control support for R-Car E2" if COMPILE_TEST select PINCTRL_SH_PFC -config PINCTRL_PFC_R8A77965 - bool "pin control support for R-Car M3-N" if COMPILE_TEST +config PINCTRL_PFC_R8A77951 + bool "pin control support for R-Car H3 ES2.0+" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A77960 @@ -134,26 +170,34 @@ config PINCTRL_PFC_R8A77961 bool "pin control support for R-Car M3-W+" if COMPILE_TEST select PINCTRL_SH_PFC -config PINCTRL_PFC_R8A779F0 - bool "pin control support for R-Car S4-8" if COMPILE_TEST - select PINCTRL_SH_PFC - -config PINCTRL_PFC_R8A7792 - bool "pin control support for R-Car V2H" if COMPILE_TEST - select PINCTRL_SH_PFC - -config PINCTRL_PFC_R8A77980 - bool "pin control support for R-Car V3H" if COMPILE_TEST +config PINCTRL_PFC_R8A77965 + bool "pin control support for R-Car M3-N" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A77970 bool "pin control support for R-Car V3M" if COMPILE_TEST select PINCTRL_SH_PFC +config PINCTRL_PFC_R8A77980 + bool "pin control support for R-Car V3H" if COMPILE_TEST + select PINCTRL_SH_PFC + +config PINCTRL_PFC_R8A77990 + bool "pin control support for R-Car E3" if COMPILE_TEST + select PINCTRL_SH_PFC + +config PINCTRL_PFC_R8A77995 + bool "pin control support for R-Car D3" if COMPILE_TEST + select PINCTRL_SH_PFC + config PINCTRL_PFC_R8A779A0 bool "pin control support for R-Car V3U" if COMPILE_TEST select PINCTRL_SH_PFC +config PINCTRL_PFC_R8A779F0 + bool "pin control support for R-Car S4-8" if COMPILE_TEST + select PINCTRL_SH_PFC + config PINCTRL_PFC_R8A779G0 bool "pin control support for R-Car V4H" if COMPILE_TEST select PINCTRL_SH_PFC @@ -162,13 +206,58 @@ config PINCTRL_PFC_R8A779H0 bool "pin control support for R-Car V4M" if COMPILE_TEST select PINCTRL_SH_PFC -config PINCTRL_PFC_R8A7740 - bool "pin control support for R-Mobile A1" if COMPILE_TEST - select PINCTRL_SH_PFC_GPIO +config PINCTRL_PFC_SH7203 + bool "pin control support for SH7203" if COMPILE_TEST + select PINCTRL_SH_FUNC_GPIO -config PINCTRL_PFC_R8A73A4 - bool "pin control support for R-Mobile APE6" if COMPILE_TEST +config PINCTRL_PFC_SH7264 + bool "pin control support for SH7264" if COMPILE_TEST + select PINCTRL_SH_FUNC_GPIO + +config PINCTRL_PFC_SH7269 + bool "pin control support for SH7269" if COMPILE_TEST + select PINCTRL_SH_FUNC_GPIO + +config PINCTRL_PFC_SH73A0 + bool "pin control support for SH-Mobile AG5" if COMPILE_TEST select PINCTRL_SH_PFC_GPIO + select REGULATOR + +config PINCTRL_PFC_SH7720 + bool "pin control support for SH7720" if COMPILE_TEST + select PINCTRL_SH_FUNC_GPIO + +config PINCTRL_PFC_SH7722 + bool "pin control support for SH7722" if COMPILE_TEST + select PINCTRL_SH_FUNC_GPIO + +config PINCTRL_PFC_SH7723 + bool "pin control support for SH-Mobile R2" if COMPILE_TEST + select PINCTRL_SH_FUNC_GPIO + +config PINCTRL_PFC_SH7724 + bool "pin control support for SH-Mobile R2R" if COMPILE_TEST + select PINCTRL_SH_FUNC_GPIO + +config PINCTRL_PFC_SH7734 + bool "pin control support for SH7734" if COMPILE_TEST + select PINCTRL_SH_FUNC_GPIO + +config PINCTRL_PFC_SH7757 + bool "pin control support for SH7757" if COMPILE_TEST + select PINCTRL_SH_FUNC_GPIO + +config PINCTRL_PFC_SH7785 + bool "pin control support for SH7785" if COMPILE_TEST + select PINCTRL_SH_FUNC_GPIO + +config PINCTRL_PFC_SH7786 + bool "pin control support for SH7786" if COMPILE_TEST + select PINCTRL_SH_FUNC_GPIO + +config PINCTRL_PFC_SHX3 + bool "pin control support for SH-X3" if COMPILE_TEST + select PINCTRL_SH_FUNC_GPIO config PINCTRL_RZA1 bool "pin control support for RZ/A1" @@ -204,42 +293,6 @@ config PINCTRL_RZG2L This selects GPIO and pinctrl driver for Renesas RZ/{G2L,G2UL,V2L} platforms. -config PINCTRL_PFC_R8A77470 - bool "pin control support for RZ/G1C" if COMPILE_TEST - select PINCTRL_SH_PFC - -config PINCTRL_PFC_R8A7745 - bool "pin control support for RZ/G1E" if COMPILE_TEST - select PINCTRL_SH_PFC - -config PINCTRL_PFC_R8A7742 - bool "pin control support for RZ/G1H" if COMPILE_TEST - select PINCTRL_SH_PFC - -config PINCTRL_PFC_R8A7743 - bool "pin control support for RZ/G1M" if COMPILE_TEST - select PINCTRL_SH_PFC - -config PINCTRL_PFC_R8A7744 - bool "pin control support for RZ/G1N" if COMPILE_TEST - select PINCTRL_SH_PFC - -config PINCTRL_PFC_R8A774C0 - bool "pin control support for RZ/G2E" if COMPILE_TEST - select PINCTRL_SH_PFC - -config PINCTRL_PFC_R8A774E1 - bool "pin control support for RZ/G2H" if COMPILE_TEST - select PINCTRL_SH_PFC - -config PINCTRL_PFC_R8A774A1 - bool "pin control support for RZ/G2M" if COMPILE_TEST - select PINCTRL_SH_PFC - -config PINCTRL_PFC_R8A774B1 - bool "pin control support for RZ/G2N" if COMPILE_TEST - select PINCTRL_SH_PFC - config PINCTRL_RZN1 bool "pin control support for RZ/N1" depends on OF @@ -261,57 +314,4 @@ config PINCTRL_RZV2M This selects GPIO and pinctrl driver for Renesas RZ/V2M platforms. -config PINCTRL_PFC_SH7203 - bool "pin control support for SH7203" if COMPILE_TEST - select PINCTRL_SH_FUNC_GPIO - -config PINCTRL_PFC_SH7264 - bool "pin control support for SH7264" if COMPILE_TEST - select PINCTRL_SH_FUNC_GPIO - -config PINCTRL_PFC_SH7269 - bool "pin control support for SH7269" if COMPILE_TEST - select PINCTRL_SH_FUNC_GPIO - -config PINCTRL_PFC_SH7720 - bool "pin control support for SH7720" if COMPILE_TEST - select PINCTRL_SH_FUNC_GPIO - -config PINCTRL_PFC_SH7722 - bool "pin control support for SH7722" if COMPILE_TEST - select PINCTRL_SH_FUNC_GPIO - -config PINCTRL_PFC_SH7734 - bool "pin control support for SH7734" if COMPILE_TEST - select PINCTRL_SH_FUNC_GPIO - -config PINCTRL_PFC_SH7757 - bool "pin control support for SH7757" if COMPILE_TEST - select PINCTRL_SH_FUNC_GPIO - -config PINCTRL_PFC_SH7785 - bool "pin control support for SH7785" if COMPILE_TEST - select PINCTRL_SH_FUNC_GPIO - -config PINCTRL_PFC_SH7786 - bool "pin control support for SH7786" if COMPILE_TEST - select PINCTRL_SH_FUNC_GPIO - -config PINCTRL_PFC_SH73A0 - bool "pin control support for SH-Mobile AG5" if COMPILE_TEST - select PINCTRL_SH_PFC_GPIO - select REGULATOR - -config PINCTRL_PFC_SH7723 - bool "pin control support for SH-Mobile R2" if COMPILE_TEST - select PINCTRL_SH_FUNC_GPIO - -config PINCTRL_PFC_SH7724 - bool "pin control support for SH-Mobile R2R" if COMPILE_TEST - select PINCTRL_SH_FUNC_GPIO - -config PINCTRL_PFC_SHX3 - bool "pin control support for SH-X3" if COMPILE_TEST - select PINCTRL_SH_FUNC_GPIO - endmenu From 8ca43e41fc94b72e274301365f8f32c2536515c7 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 24 Jun 2025 05:11:31 +0000 Subject: [PATCH 0506/2411] pinctrl: renesas: Unify config naming Renesas SoC has chip number / chip name. Some SoC is using chip number, and some SoC is using chip name on current Renesas pincontrol Kconfig. Let's unify "pin control support for ${CHIP_NUMBER} (${CHIP_NAME}). Signed-off-by: Kuninori Morimoto Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/87bjqdraf1.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Geert Uytterhoeven --- drivers/pinctrl/renesas/Kconfig | 66 ++++++++++++++++----------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/drivers/pinctrl/renesas/Kconfig b/drivers/pinctrl/renesas/Kconfig index 5a30134edd54..b955fe395a65 100644 --- a/drivers/pinctrl/renesas/Kconfig +++ b/drivers/pinctrl/renesas/Kconfig @@ -87,123 +87,123 @@ config PINCTRL_PFC_EMEV2 select PINCTRL_SH_PFC config PINCTRL_PFC_R8A73A4 - bool "pin control support for R-Mobile APE6" if COMPILE_TEST + bool "pin control support for R8A73A4 (R-Mobile APE6)" if COMPILE_TEST select PINCTRL_SH_PFC_GPIO config PINCTRL_PFC_R8A7740 - bool "pin control support for R-Mobile A1" if COMPILE_TEST + bool "pin control support for R8A7740 (R-Mobile A1)" if COMPILE_TEST select PINCTRL_SH_PFC_GPIO config PINCTRL_PFC_R8A7742 - bool "pin control support for RZ/G1H" if COMPILE_TEST + bool "pin control support for R8A7742 (RZ/G1H)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A7743 - bool "pin control support for RZ/G1M" if COMPILE_TEST + bool "pin control support for R8A7743 (RZ/G1M)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A7744 - bool "pin control support for RZ/G1N" if COMPILE_TEST + bool "pin control support for R8A7744 (RZ/G1N)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A7745 - bool "pin control support for RZ/G1E" if COMPILE_TEST + bool "pin control support for R8A7745 (RZ/G1E)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A77470 - bool "pin control support for RZ/G1C" if COMPILE_TEST + bool "pin control support for R8A77470 (RZ/G1C)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A774A1 - bool "pin control support for RZ/G2M" if COMPILE_TEST + bool "pin control support for R8A774A1 (RZ/G2M)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A774B1 - bool "pin control support for RZ/G2N" if COMPILE_TEST + bool "pin control support for R8A774B1 (RZ/G2N)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A774C0 - bool "pin control support for RZ/G2E" if COMPILE_TEST + bool "pin control support for R8A774C0 (RZ/G2E)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A774E1 - bool "pin control support for RZ/G2H" if COMPILE_TEST + bool "pin control support for R8A774E1 (RZ/G2H)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A7778 - bool "pin control support for R-Car M1A" if COMPILE_TEST + bool "pin control support for R8A7778 (R-Car M1A)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A7779 - bool "pin control support for R-Car H1" if COMPILE_TEST + bool "pin control support for R8A7779 (R-Car H1)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A7790 - bool "pin control support for R-Car H2" if COMPILE_TEST + bool "pin control support for R8A7790 (R-Car H2)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A7791 - bool "pin control support for R-Car M2-W" if COMPILE_TEST + bool "pin control support for R8A7791 (R-Car M2-W)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A7792 - bool "pin control support for R-Car V2H" if COMPILE_TEST + bool "pin control support for R8A7792 (R-Car V2H)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A7793 - bool "pin control support for R-Car M2-N" if COMPILE_TEST + bool "pin control support for R8A7793 (R-Car M2-N)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A7794 - bool "pin control support for R-Car E2" if COMPILE_TEST + bool "pin control support for R8A7794 (R-Car E2)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A77951 - bool "pin control support for R-Car H3 ES2.0+" if COMPILE_TEST + bool "pin control support for R8A77951 (R-Car H3 ES2.0+)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A77960 - bool "pin control support for R-Car M3-W" if COMPILE_TEST + bool "pin control support for R8A77960 (R-Car M3-W)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A77961 - bool "pin control support for R-Car M3-W+" if COMPILE_TEST + bool "pin control support for R8A77961 (R-Car M3-W+)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A77965 - bool "pin control support for R-Car M3-N" if COMPILE_TEST + bool "pin control support for R8A77965 (R-Car M3-N)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A77970 - bool "pin control support for R-Car V3M" if COMPILE_TEST + bool "pin control support for R8A77970 (R-Car V3M)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A77980 - bool "pin control support for R-Car V3H" if COMPILE_TEST + bool "pin control support for R8A77980 (R-Car V3H)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A77990 - bool "pin control support for R-Car E3" if COMPILE_TEST + bool "pin control support for R8A77990 (R-Car E3)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A77995 - bool "pin control support for R-Car D3" if COMPILE_TEST + bool "pin control support for R8A77995 (R-Car D3)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A779A0 - bool "pin control support for R-Car V3U" if COMPILE_TEST + bool "pin control support for R8A779A0 (R-Car V3U)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A779F0 - bool "pin control support for R-Car S4-8" if COMPILE_TEST + bool "pin control support for R8A779F0 (R-Car S4-8)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A779G0 - bool "pin control support for R-Car V4H" if COMPILE_TEST + bool "pin control support for R8A779G0 (R-Car V4H)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_R8A779H0 - bool "pin control support for R-Car V4M" if COMPILE_TEST + bool "pin control support for R8A779H0 (R-Car V4M)" if COMPILE_TEST select PINCTRL_SH_PFC config PINCTRL_PFC_SH7203 @@ -219,7 +219,7 @@ config PINCTRL_PFC_SH7269 select PINCTRL_SH_FUNC_GPIO config PINCTRL_PFC_SH73A0 - bool "pin control support for SH-Mobile AG5" if COMPILE_TEST + bool "pin control support for SH73A0 (SH-Mobile AG5)" if COMPILE_TEST select PINCTRL_SH_PFC_GPIO select REGULATOR @@ -232,11 +232,11 @@ config PINCTRL_PFC_SH7722 select PINCTRL_SH_FUNC_GPIO config PINCTRL_PFC_SH7723 - bool "pin control support for SH-Mobile R2" if COMPILE_TEST + bool "pin control support for SH7723 (SH-Mobile R2)" if COMPILE_TEST select PINCTRL_SH_FUNC_GPIO config PINCTRL_PFC_SH7724 - bool "pin control support for SH-Mobile R2R" if COMPILE_TEST + bool "pin control support for SH7724 (SH-Mobile R2R)" if COMPILE_TEST select PINCTRL_SH_FUNC_GPIO config PINCTRL_PFC_SH7734 From 7000167796a00d64322dc3ed0c0970e31d481ed6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 26 Jun 2025 15:50:39 +0200 Subject: [PATCH 0507/2411] pinctrl: renesas: Simplify PINCTRL_RZV2M logic PINCTRL_RZV2M is selected by ARCH_R9A09G011, hence there is no need to depend on the latter. Move the dependency on COMPILE_TEST to the symbol prompt, like is done for all other auto-selected pin control symbols. Signed-off-by: Geert Uytterhoeven Reviewed-by: Lad Prabhakar Link: https://lore.kernel.org/d74843e06f73cd4c6e822d65f606e6042a50a0b7.1750945516.git.geert+renesas@glider.be --- drivers/pinctrl/renesas/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pinctrl/renesas/Kconfig b/drivers/pinctrl/renesas/Kconfig index b955fe395a65..99ae34a56871 100644 --- a/drivers/pinctrl/renesas/Kconfig +++ b/drivers/pinctrl/renesas/Kconfig @@ -303,9 +303,8 @@ config PINCTRL_RZN1 This selects pinctrl driver for Renesas RZ/N1 devices. config PINCTRL_RZV2M - bool "pin control support for RZ/V2M" + bool "pin control support for RZ/V2M" if COMPILE_TEST depends on OF - depends on ARCH_R9A09G011 || COMPILE_TEST select GPIOLIB select GENERIC_PINCTRL_GROUPS select GENERIC_PINMUX_FUNCTIONS From 1fdf938168c4d26fa279d4f204768690d1f9c4ae Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 1 Jul 2025 13:10:27 -0700 Subject: [PATCH 0508/2411] perf tools: Fix use-after-free in help_unknown_cmd() Currently perf aborts when it finds an invalid command. I guess it depends on the environment as I have some custom commands in the path. $ perf bad-command perf: 'bad-command' is not a perf-command. See 'perf --help'. Aborted (core dumped) It's because the exclude_cmds() in libsubcmd has a use-after-free when it removes some entries. After copying one to another entry, it keeps the pointer in the both position. And the next copy operation will free the later one but it's the same entry in the previous one. For example, let's say cmds = { A, B, C, D, E } and excludes = { B, E }. ci cj ei cmds-name excludes -----------+-------------------- 0 0 0 | A B : cmp < 0, ci == cj 1 1 0 | B B : cmp == 0 2 1 1 | C E : cmp < 0, ci != cj At this point, it frees cmds->names[1] and cmds->names[1] is assigned to cmds->names[2]. 3 2 1 | D E : cmp < 0, ci != cj Now it frees cmds->names[2] but it's the same as cmds->names[1]. So accessing cmds->names[1] will be invalid. This makes the subcmd tests succeed. $ perf test subcmd 69: libsubcmd help tests : 69.1: Load subcmd names : Ok 69.2: Uniquify subcmd names : Ok 69.3: Exclude duplicate subcmd names : Ok Fixes: 4b96679170c6 ("libsubcmd: Avoid SEGV/use-after-free when commands aren't excluded") Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250701201027.1171561-3-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/lib/subcmd/help.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c index 8561b0f01a24..9ef569492560 100644 --- a/tools/lib/subcmd/help.c +++ b/tools/lib/subcmd/help.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "subcmd-util.h" #include "help.h" #include "exec-cmd.h" @@ -82,10 +83,11 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) ci++; cj++; } else { - zfree(&cmds->names[cj]); - cmds->names[cj++] = cmds->names[ci++]; + cmds->names[cj++] = cmds->names[ci]; + cmds->names[ci++] = NULL; } } else if (cmp == 0) { + zfree(&cmds->names[ci]); ci++; ei++; } else if (cmp > 0) { @@ -94,12 +96,12 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) } if (ci != cj) { while (ci < cmds->cnt) { - zfree(&cmds->names[cj]); - cmds->names[cj++] = cmds->names[ci++]; + cmds->names[cj++] = cmds->names[ci]; + cmds->names[ci++] = NULL; } } for (ci = cj; ci < cmds->cnt; ci++) - zfree(&cmds->names[ci]); + assert(cmds->names[ci] == NULL); cmds->cnt = cj; } From 508b228942b291cb69f11027c07ca17ab2ac03bc Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 23 Jun 2025 15:27:31 +0200 Subject: [PATCH 0509/2411] perf list: Add IBM z17 event descriptions Update IBM z17 counter description using document SA23-2260-08: "The Load-Program-Parameter and the CPU-Measurement Facilities" released in May 2025 to include counter definitions for IBM z17 counter sets: * Basic counter set * Problem/user counter set * Crypto counter set. Use document SA23-2261-09: "The CPU-Measurement Facility Extended Counters Definition for z10, z196/z114, zEC12/zBC12, z13/z13s, z14, z15, z16 and z17" released on April 2025 to include counter definitions for IBM z17 * Extended counter set * MT-Diagnostic counter set. Use document SA22-7832-14: "z/Architecture Principles of Operation." released in April 2025 to include counter definitions for IBM z17 * PAI-Crypto counter set * PAI-Extention counter set. Use document "CPU MF Formulas and Updates April 2025" released in April 2025 to include metric calculations. Signed-off-by: Thomas Richter Acked-by: Sumanth Korikkar Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250623132731.899525-1-tmricht@linux.ibm.com Signed-off-by: Namhyung Kim --- .../pmu-events/arch/s390/cf_z17/basic.json | 58 + .../pmu-events/arch/s390/cf_z17/crypto6.json | 142 ++ .../pmu-events/arch/s390/cf_z17/extended.json | 541 ++++++++ .../arch/s390/cf_z17/pai_crypto.json | 1213 +++++++++++++++++ .../pmu-events/arch/s390/cf_z17/pai_ext.json | 261 ++++ .../arch/s390/cf_z17/transaction.json | 72 + tools/perf/pmu-events/arch/s390/mapfile.csv | 1 + 7 files changed, 2288 insertions(+) create mode 100644 tools/perf/pmu-events/arch/s390/cf_z17/basic.json create mode 100644 tools/perf/pmu-events/arch/s390/cf_z17/crypto6.json create mode 100644 tools/perf/pmu-events/arch/s390/cf_z17/extended.json create mode 100644 tools/perf/pmu-events/arch/s390/cf_z17/pai_crypto.json create mode 100644 tools/perf/pmu-events/arch/s390/cf_z17/pai_ext.json create mode 100644 tools/perf/pmu-events/arch/s390/cf_z17/transaction.json diff --git a/tools/perf/pmu-events/arch/s390/cf_z17/basic.json b/tools/perf/pmu-events/arch/s390/cf_z17/basic.json new file mode 100644 index 000000000000..1023d47028ce --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_z17/basic.json @@ -0,0 +1,58 @@ +[ + { + "Unit": "CPU-M-CF", + "EventCode": "0", + "EventName": "CPU_CYCLES", + "BriefDescription": "Cycle Count", + "PublicDescription": "This counter counts the total number of CPU cycles, excluding the number of cycles while the CPU is in the wait state." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "1", + "EventName": "INSTRUCTIONS", + "BriefDescription": "Instruction Count", + "PublicDescription": "This counter counts the total number of instructions executed by the CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "2", + "EventName": "L1I_DIR_WRITES", + "BriefDescription": "Level-1 I-Cache Directory Write Count", + "PublicDescription": "This counter counts the total number of level-1 instruction-cache or unified-cache directory writes." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "3", + "EventName": "L1I_PENALTY_CYCLES", + "BriefDescription": "Level-1 I-Cache Penalty Cycle Count", + "PublicDescription": "This counter counts the total number of cache penalty cycles for level-1 instruction cache or unified cache." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "4", + "EventName": "L1D_DIR_WRITES", + "BriefDescription": "Level-1 D-Cache Directory Write Count", + "PublicDescription": "This counter counts the total number of level-1 data-cache directory writes." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "5", + "EventName": "L1D_PENALTY_CYCLES", + "BriefDescription": "Level-1 D-Cache Penalty Cycle Count", + "PublicDescription": "This counter counts the total number of cache penalty cycles for level-1 data cache." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "32", + "EventName": "PROBLEM_STATE_CPU_CYCLES", + "BriefDescription": "Problem-State Cycle Count", + "PublicDescription": "This counter counts the total number of CPU cycles when the CPU is in the problem state, excluding the number of cycles while the CPU is in the wait state." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "33", + "EventName": "PROBLEM_STATE_INSTRUCTIONS", + "BriefDescription": "Problem-State Instruction Count", + "PublicDescription": "This counter counts the total number of instructions executed by the CPU while in the problem state." + } +] diff --git a/tools/perf/pmu-events/arch/s390/cf_z17/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_z17/crypto6.json new file mode 100644 index 000000000000..8b4380b8e489 --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_z17/crypto6.json @@ -0,0 +1,142 @@ +[ + { + "Unit": "CPU-M-CF", + "EventCode": "64", + "EventName": "PRNG_FUNCTIONS", + "BriefDescription": "PRNG Function Count", + "PublicDescription": "This counter counts the total number of the pseudorandom-number-generation functions issued by the CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "65", + "EventName": "PRNG_CYCLES", + "BriefDescription": "PRNG Cycle Count", + "PublicDescription": "This counter counts the total number of CPU cycles when the DEA/AES/SHA coprocessor is busy performing the pseudorandom- number-generation functions issued by the CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "66", + "EventName": "PRNG_BLOCKED_FUNCTIONS", + "BriefDescription": "PRNG Blocked Function Count", + "PublicDescription": "This counter counts the total number of the pseudorandom-number-generation functions that are issued by the CPU and are blocked because the DEA/AES/SHA coprocessor is busy performing a function issued by another CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "67", + "EventName": "PRNG_BLOCKED_CYCLES", + "BriefDescription": "PRNG Blocked Cycle Count", + "PublicDescription": "This counter counts the total number of CPU cycles blocked for the pseudorandom-number-generation functions issued by the CPU because the DEA/AES/SHA coprocessor is busy performing a function issued by another CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "68", + "EventName": "SHA_FUNCTIONS", + "BriefDescription": "SHA Function Count", + "PublicDescription": "This counter counts the total number of the SHA functions issued by the CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "69", + "EventName": "SHA_CYCLES", + "BriefDescription": "SHA Cycle Count", + "PublicDescription": "This counter counts the total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "70", + "EventName": "SHA_BLOCKED_FUNCTIONS", + "BriefDescription": "SHA Blocked Function Count", + "PublicDescription": "This counter counts the total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "71", + "EventName": "SHA_BLOCKED_CYCLES", + "BriefDescription": "SHA Blocked Cycle Count", + "PublicDescription": "This counter counts the total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "72", + "EventName": "DEA_FUNCTIONS", + "BriefDescription": "DEA Function Count", + "PublicDescription": "This counter counts the total number of the DEA functions issued by the CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "73", + "EventName": "DEA_CYCLES", + "BriefDescription": "DEA Cycle Count", + "PublicDescription": "This counter counts the total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "74", + "EventName": "DEA_BLOCKED_FUNCTIONS", + "BriefDescription": "DEA Blocked Function Count", + "PublicDescription": "This counter counts the total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "75", + "EventName": "DEA_BLOCKED_CYCLES", + "BriefDescription": "DEA Blocked Cycle Count", + "PublicDescription": "This counter counts the total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "76", + "EventName": "AES_FUNCTIONS", + "BriefDescription": "AES Function Count", + "PublicDescription": "This counter counts the total number of the AES functions issued by the CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "77", + "EventName": "AES_CYCLES", + "BriefDescription": "AES Cycle Count", + "PublicDescription": "This counter counts the total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "78", + "EventName": "AES_BLOCKED_FUNCTIONS", + "BriefDescription": "AES Blocked Function Count", + "PublicDescription": "This counter counts the total number of the AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "79", + "EventName": "AES_BLOCKED_CYCLES", + "BriefDescription": "AES Blocked Cycle Count", + "PublicDescription": "This counter counts the total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "80", + "EventName": "ECC_FUNCTION_COUNT", + "BriefDescription": "ECC Function Count", + "PublicDescription": "This counter counts the total number of the elliptic-curve cryptography (ECC) functions issued by the CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "81", + "EventName": "ECC_CYCLES_COUNT", + "BriefDescription": "ECC Cycles Count", + "PublicDescription": "This counter counts the total number of CPU cycles when the ECC coprocessor is busy performing the elliptic-curve cryptography (ECC) functions issued by the CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "82", + "EventName": "ECC_BLOCKED_FUNCTION_COUNT", + "BriefDescription": "Ecc Blocked Function Count", + "PublicDescription": "This counter counts the total number of the elliptic-curve cryptography (ECC) functions that are issued by the CPU and are blocked because the ECC coprocessor is busy performing a function issued by another CPU." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "83", + "EventName": "ECC_BLOCKED_CYCLES_COUNT", + "BriefDescription": "ECC Blocked Cycles Count", + "PublicDescription": "This counter counts the total number of CPU cycles blocked for the elliptic-curve cryptography (ECC) functions issued by the CPU because the ECC coprocessor is busy performing a function issued by another CPU." + } +] diff --git a/tools/perf/pmu-events/arch/s390/cf_z17/extended.json b/tools/perf/pmu-events/arch/s390/cf_z17/extended.json new file mode 100644 index 000000000000..e139482e217f --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_z17/extended.json @@ -0,0 +1,541 @@ +[ + { + "Unit": "CPU-M-CF", + "EventCode": "128", + "EventName": "L1D_RO_EXCL_WRITES", + "BriefDescription": "L1D Read-only Exclusive Writes", + "PublicDescription": "A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "129", + "EventName": "DTLB2_WRITES", + "BriefDescription": "DTLB2 Writes", + "PublicDescription": "A translation has been written into The Translation Lookaside Buffer 2 (TLB2) and the request was made by the Level-1 Data cache. This is a replacement for what was provided for the DTLB on z13 and prior machines." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "130", + "EventName": "DTLB2_MISSES", + "BriefDescription": "DTLB2 Misses", + "PublicDescription": "A TLB2 miss is in progress for a request made by the Level-1 Data cache. Incremented by one for every TLB2 miss in progress for the Level-1 Data cache on this cycle. This is a replacement for what was provided for the DTLB on z13 and prior machines." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "131", + "EventName": "CRSTE_1MB_WRITES", + "BriefDescription": "One Megabyte CRSTE writes", + "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "132", + "EventName": "DTLB2_GPAGE_WRITES", + "BriefDescription": "DTLB2 Two-Gigabyte Page Writes", + "PublicDescription": "A translation entry for a two-gigabyte page was written into the Level-2 TLB." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "134", + "EventName": "ITLB2_WRITES", + "BriefDescription": "ITLB2 Writes", + "PublicDescription": "A translation entry has been written into the Translation Lookaside Buffer 2 (TLB2) and the request was made by the Level-1 Instruction cache. This is a replacement for what was provided for the ITLB on z13 and prior machines." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "135", + "EventName": "ITLB2_MISSES", + "BriefDescription": "ITLB2 Misses", + "PublicDescription": "A TLB2 miss is in progress for a request made by the Level-1 Instruction cache. Incremented by one for every TLB2 miss in progress for the Level-1 Instruction cache in a cycle. This is a replacement for what was provided for the ITLB on z13 and prior machines." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "137", + "EventName": "TLB2_PTE_WRITES", + "BriefDescription": "TLB2 Page Table Entry Writes", + "PublicDescription": "A translation entry was written into the Page Table Entry array in the Level-2 TLB." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "138", + "EventName": "TLB2_CRSTE_WRITES", + "BriefDescription": "TLB2 Combined Region and Segment Entry Writes", + "PublicDescription": "Translation entries were written into the Combined Region and Segment Table Entry array and the Page Table Entry array in the Level-2 TLB." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "139", + "EventName": "TLB2_ENGINES_BUSY", + "BriefDescription": "TLB2 Engines Busy", + "PublicDescription": "The number of Level-2 TLB translation engines busy in a cycle." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "140", + "EventName": "TX_C_TEND", + "BriefDescription": "Completed TEND instructions in constrained TX mode", + "PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "141", + "EventName": "TX_NC_TEND", + "BriefDescription": "Completed TEND instructions in non-constrained TX mode", + "PublicDescription": "A TEND instruction has completed in a non-constrained transactional-execution mode." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "143", + "EventName": "L1C_TLB2_MISSES", + "BriefDescription": "L1C TLB2 Misses", + "PublicDescription": "Increments by one for any cycle where a Level-1 cache or Level-2 TLB miss is in progress." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "145", + "EventName": "DCW_REQ", + "BriefDescription": "Directory Write Level 1 Data Cache from L2-Cache", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the requestors Level-2 cache." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "146", + "EventName": "DCW_REQ_IV", + "BriefDescription": "Directory Write Level 1 Data Cache from L2-Cache with Intervention", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the requestors Level-2 cache with intervention." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "147", + "EventName": "DCW_REQ_CHIP_HIT", + "BriefDescription": "Directory Write Level 1 Data Cache from L2-Cache with Chip HP Hit", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the requestors Level-2 cache after using chip level horizontal persistence, Chip-HP hit." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "148", + "EventName": "DCW_REQ_DRAWER_HIT", + "BriefDescription": "Directory Write Level 1 Data Cache from L2-Cache with Drawer HP Hit", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the requestors Level-2 cache after using drawer level horizontal persistence, Drawer-HP hit." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "149", + "EventName": "DCW_ON_CHIP", + "BriefDescription": "Directory Write Level 1 Data Cache from On-Chip L2-Cache", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-2 cache." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "150", + "EventName": "DCW_ON_CHIP_IV", + "BriefDescription": "Directory Write Level 1 Data Cache from On-Chip L2-Cache with Intervention", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-2 cache with intervention." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "151", + "EventName": "DCW_ON_CHIP_CHIP_HIT", + "BriefDescription": "Directory Write Level 1 Data Cache from On-Chip L2-Cache with Chip HP Hit", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-2 cache after using chip level horizontal persistence, Chip-HP hit." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "152", + "EventName": "DCW_ON_CHIP_DRAWER_HIT", + "BriefDescription": "Directory Write Level 1 Data Cache from On-Chip L2-Cache with Drawer HP Hit", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-2 cache after using drawer level horizontal persistence, Drawer-HP hit." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "153", + "EventName": "DCW_ON_MODULE", + "BriefDescription": "Directory Write Level 1 Data Cache from On-Module L2-Cache", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Module Level-2 cache." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "154", + "EventName": "DCW_ON_DRAWER", + "BriefDescription": "Directory Write Level 1 Data Cache from On-Drawer L2-Cache", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Drawer Level-2 cache." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "155", + "EventName": "DCW_OFF_DRAWER", + "BriefDescription": "Directory Write Level 1 Data Cache from Off-Drawer L2-Cache", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-2 cache." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "156", + "EventName": "DCW_ON_CHIP_MEMORY", + "BriefDescription": "Directory Write Level 1 Cache from On-Chip Memory", + "PublicDescription": "A directory write to the Level-1 Data or Level-1 Instruction cache directory where the returned cache line was sourced from On-Chip memory." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "157", + "EventName": "DCW_ON_MODULE_MEMORY", + "BriefDescription": "Directory Write Level 1 Cache from On-Module Memory", + "PublicDescription": "A directory write to the Level-1 Data or Level-1 Instruction cache directory where the returned cache line was sourced from On-Module memory." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "158", + "EventName": "DCW_ON_DRAWER_MEMORY", + "BriefDescription": "Directory Write Level 1 Cache from On-Drawer Memory", + "PublicDescription": "A directory write to the Level-1 Data or Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer memory." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "159", + "EventName": "DCW_OFF_DRAWER_MEMORY", + "BriefDescription": "Directory Write Level 1 Cache from Off-Drawer Memory", + "PublicDescription": "A directory write to the Level-1 Data or Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer memory." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "160", + "EventName": "IDCW_ON_MODULE_IV", + "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from On-Module Memory L2-Cache with Intervention", + "PublicDescription": "A directory write to the Level-1 Data or Level-1 Instruction cache directory where the returned cache line was sourced from an On-Module Level-2 cache with intervention." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "161", + "EventName": "IDCW_ON_MODULE_CHIP_HIT", + "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from On-Module Memory L2-Cache with Chip Hit", + "PublicDescription": "A directory write to the Level-1 Data or Level-1 Instruction cache directory where the returned cache line was sourced from an On-Module Level-2 cache after using chip level horizontal persistence, Chip-HP hit." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "162", + "EventName": "IDCW_ON_MODULE_DRAWER_HIT", + "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from On-Module Memory L2-Cache with Drawer Hit", + "PublicDescription": "A directory write to the Level-1 Data or Level-1 Instruction cache directory where the returned cache line was sourced from an On-Module Level-2 cache after using drawer level horizontal persistence, Drawer-HP hit." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "163", + "EventName": "IDCW_ON_DRAWER_IV", + "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from On-Drawer L2-Cache with Intervention", + "PublicDescription": "A directory write to the Level-1 Data or Level-1 Instruction cache directory where the returned cache line was sourced from an On-Drawer Level-2 cache with intervention." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "164", + "EventName": "IDCW_ON_DRAWER_CHIP_HIT", + "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from On-Drawer L2-Cache with Chip Hit", + "PublicDescription": "A directory write to the Level-1 Data or Level-1 instruction cache directory where the returned cache line was sourced from an On-Drawer Level-2 cache after using chip level horizontal persistence, Chip-HP hit." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "165", + "EventName": "IDCW_ON_DRAWER_DRAWER_HIT", + "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from On-Drawer L2-Cache with Drawer Hit", + "PublicDescription": "A directory write to the Level-1 Data or Level-1 instruction cache directory where the returned cache line was sourced from an On-Drawer Level-2 cache after using drawer level horizontal persistence, Drawer-HP hit." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "166", + "EventName": "IDCW_OFF_DRAWER_IV", + "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from Off-Drawer L2-Cache with Intervention", + "PublicDescription": "A directory write to the Level-1 Data or Level-1 instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-2 cache with intervention." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "167", + "EventName": "IDCW_OFF_DRAWER_CHIP_HIT", + "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from Off-Drawer L2-Cache with Chip Hit", + "PublicDescription": "A directory write to the Level-1 Data or Level-1 instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-2 cache after using chip level horizontal persistence, Chip-HP hit." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "168", + "EventName": "IDCW_OFF_DRAWER_DRAWER_HIT", + "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from Off-Drawer L2-Cache with Drawer Hit", + "PublicDescription": "A directory write to the Level-1 Data or Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-2 cache after using drawer level horizontal persistence, Drawer-HP hit." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "169", + "EventName": "ICW_REQ", + "BriefDescription": "Directory Write Level 1 Instruction Cache from L2-Cache", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced the requestors Level-2 cache." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "170", + "EventName": "ICW_REQ_IV", + "BriefDescription": "Directory Write Level 1 Instruction Cache from L2-Cache with Intervention", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the requestors Level-2 cache with intervention." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "171", + "EventName": "ICW_REQ_CHIP_HIT", + "BriefDescription": "Directory Write Level 1 Instruction Cache from L2-Cache with Chip HP Hit", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the requestors Level-2 cache after using chip level horizontal persistence, Chip-HP hit." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "172", + "EventName": "ICW_REQ_DRAWER_HIT", + "BriefDescription": "Directory Write Level 1 Instruction Cache from L2-Cache with Drawer HP Hit", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the requestors Level-2 cache after using drawer level horizontal persistence, Drawer-HP hit." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "173", + "EventName": "ICW_ON_CHIP", + "BriefDescription": "Directory Write Level 1 Instruction Cache from On-Chip L2-Cache", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Chip Level-2 cache." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "174", + "EventName": "ICW_ON_CHIP_IV", + "BriefDescription": "Directory Write Level 1 Instruction Cache from On-Chip L2-Cache with Intervention", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Chip Level-2 cache with intervention." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "175", + "EventName": "ICW_ON_CHIP_CHIP_HIT", + "BriefDescription": "Directory Write Level 1 Instruction Cache from On-Chip L2-Cache with Chip HP Hit", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Chip Level-2 cache after using chip level horizontal persistence, Chip-HP hit." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "176", + "EventName": "ICW_ON_CHIP_DRAWER_HIT", + "BriefDescription": "Directory Write Level 1 Instruction Cache from On-Chip L2-Cache with Drawer HP Hit", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Chip level 2 cache after using drawer level horizontal persistence, Drawer-HP hit." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "177", + "EventName": "ICW_ON_MODULE", + "BriefDescription": "Directory Write Level 1 Instruction Cache from On-Module L2-Cache", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Module Level-2 cache." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "178", + "EventName": "ICW_ON_DRAWER", + "BriefDescription": "Directory Write Level 1 Instruction Cache from On-Drawer L2-Cache", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Drawer Level-2 cache." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "179", + "EventName": "ICW_OFF_DRAWER", + "BriefDescription": "Directory Write Level 1 Instruction Cache from Off-Drawer L2-Cache", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-2 cache." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "202", + "EventName": "CYCLES_SAMETHRD", + "BriefDescription": "CPU is not in wait state and CPU is running by itself", + "PublicDescription": "The number of cycles the CPU is not in wait state and the CPU is running by itself on the Core." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "203", + "EventName": "CYCLES_DIFFTHRD", + "BriefDescription": "CPU is not in wait state and CPU is running by another thread", + "PublicDescription": "The number of cycles the CPU is not in wait state and the CPU is running with another thread on the Core." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "204", + "EventName": "INST_SAMETHRD", + "BriefDescription": "Instructions executed on CPU by itself", + "PublicDescription": "The number of instructions executed on the CPU and the CPU is running by itself on the Core." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "205", + "EventName": "INST_DIFFTHRD", + "BriefDescription": "Instructions executed on CPU by another thread", + "PublicDescription": "The number of instructions executed on the CPU and the CPU is running with another thread on the Core." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "206", + "EventName": "WRONG_BRANCH_PREDICTION", + "BriefDescription": "Incorrect branch prediction on core", + "PublicDescription": "A count of the number of branches that were predicted incorrectly by the branch prediction logic in the Core. This includes incorrectly predicted branches that are executed in Firmware. Examples of instructions implemented in Firmware are complicated instructions like MVCL (Move Character Long) and PC (Program Call)." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "225", + "EventName": "VX_BCD_EXECUTION_SLOTS", + "BriefDescription": "Count finished vector arithmetic Binary Coded Decimal instructions", + "PublicDescription": "Count of floating point execution slots used for finished vector arithmetic Binary Coded Decimal instructions. Instructions: VAP, VSP, VMP, VMSP, VDP, VSDP, VRP, VLIP, VSRP, VPSOP, VCP, VTP, VPKZ, VUPKZ, VCVB, VCVBG, VCVD, VCVDG, VSCHP, VSCSHP, VCSPH, VCLZDP, VPKZR, VSRPR, VUPKZH, VUPKZL, VTZ, VUPH, VUPL, VCVBX, VCVDX." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "226", + "EventName": "DECIMAL_INSTRUCTIONS", + "BriefDescription": "Decimal instruction dispatched", + "PublicDescription": "Decimal instruction dispatched. Instructions: CVB, CVD, AP, CP, DP, ED, EDMK, MP, SRP, SP, ZAP, TP." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "232", + "EventName": "LAST_HOST_TRANSLATIONS", + "BriefDescription": "Last host translation done", + "PublicDescription": "Last Host Translation done." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "244", + "EventName": "TX_NC_TABORT", + "BriefDescription": "Aborted transactions in unconstrained TX mode", + "PublicDescription": "A transaction abort has occurred in a non-constrained transactional-execution mode." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "245", + "EventName": "TX_C_TABORT_NO_SPECIAL", + "BriefDescription": "Aborted transactions in constrained TX mode", + "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "246", + "EventName": "TX_C_TABORT_SPECIAL", + "BriefDescription": "Aborted transactions in constrained TX mode using special completion logic", + "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "248", + "EventName": "DFLT_ACCESS", + "BriefDescription": "Cycles CPU spent obtaining access to Deflate unit", + "PublicDescription": "Cycles CPU spent obtaining access to Deflate unit." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "253", + "EventName": "DFLT_CYCLES", + "BriefDescription": "Cycles CPU is using Deflate unit", + "PublicDescription": "Cycles CPU is using Deflate unit." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "256", + "EventName": "SORTL", + "BriefDescription": "Count SORTL instructions", + "PublicDescription": "Increments by one for every SORT LISTS (SORTL) instruction executed." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "265", + "EventName": "DFLT_CC", + "BriefDescription": "Increments DEFLATE CONVERSION CALL", + "PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL (DFLTCC) instruction executed." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "266", + "EventName": "DFLT_CCFINISH", + "BriefDescription": "Increments completed DEFLATE CONVERSION CALL", + "PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL (DFLTCC) instruction executed that ended in Condition Codes 0, 1 or 2." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "267", + "EventName": "NNPA_INVOCATIONS", + "BriefDescription": "NNPA Total invocations", + "PublicDescription": "Increments by one for every NEURAL NETWORK PROCESSING ASSIST (NNPA) instruction executed." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "268", + "EventName": "NNPA_COMPLETIONS", + "BriefDescription": "NNPA Total completions", + "PublicDescription": "Increments by one for every NEURAL NETWORK PROCESSING ASSIST (NNPA) instruction executed that ended in Condition Code 0." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "269", + "EventName": "NNPA_WAIT_LOCK", + "BriefDescription": "Cycles spent obtaining NNPA lock", + "PublicDescription": "Cycles CPU spent obtaining access to IBM Z Integrated Accelerator for AI." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "270", + "EventName": "NNPA_HOLD_LOCK", + "BriefDescription": "Cycles spent holding NNPA lock", + "PublicDescription": "Cycles CPU is using IBM Z Integrated Accelerator for AI." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "272", + "EventName": "NNPA_INST_ONCHIP", + "BriefDescription": "NNPA instructions used on-chip Integrated Accelerator", + "PublicDescription": "A NEURAL NETWORK PROCESSING ASSIST (NNPA) instruction has used the Local On-Chip IBM Z Integrated Accelerator for AI during its execution" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "273", + "EventName": "NNPA_INST_OFFCHIP", + "BriefDescription": "NNPA instructions used off-chip Integrated Accelerator", + "PublicDescription": "A NEURAL NETWORK PROCESSING ASSIST (NNPA) instruction has used an Off-Chip IBM Z Integrated Accelerator for AI during its execution." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "274", + "EventName": "NNPA_INST_DIFF", + "BriefDescription": "NNPA instructions used different Integrated Accelerator", + "PublicDescription": "A NEURAL NETWORK PROCESSING ASSIST (NNPA) instruction has used a different IBM Z Integrated Accelerator for AI since it was last executed." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "276", + "EventName": "NNPA_4K_PREFETCH", + "BriefDescription": "Number of 4K prefetches for Integated Accelerator", + "PublicDescription": "Number of 4K prefetches done for a remote IBM Z Integated Accelerator for AI." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "277", + "EventName": "NNPA_COMPL_LOCK", + "BriefDescription": "A Perform Locked Operation has completed", + "PublicDescription": "A PERFORM LOCKED OPERATION (PLO) has completed." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "278", + "EventName": "NNPA_RETRY_LOCK", + "BriefDescription": "A Perform Locked Operation has been retried", + "PublicDescription": "A PERFORM LOCKED OPERATION (PLO) has been retried and the CPU did not use any special logic to allow the PLO to complete." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "279", + "EventName": "NNPA_RETRY_LOCK_WITH_PLO", + "BriefDescription": "A Perform Locked Operation has been retried using special logic", + "PublicDescription": "A PERFORM LOCKED OPERATION (PLO) has been retried and the CPU is using special logic to allow PLO to complete." + }, + { + "Unit": "CPU-M-CF", + "EventCode": "448", + "EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE", + "BriefDescription": "Cycle count with one thread active", + "PublicDescription": "Cycle count with one thread active" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "449", + "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE", + "BriefDescription": "Cycle count with two threads active", + "PublicDescription": "Cycle count with two threads active" + } +] diff --git a/tools/perf/pmu-events/arch/s390/cf_z17/pai_crypto.json b/tools/perf/pmu-events/arch/s390/cf_z17/pai_crypto.json new file mode 100644 index 000000000000..a7176c988b8a --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_z17/pai_crypto.json @@ -0,0 +1,1213 @@ +[ + { + "Unit": "PAI-CRYPTO", + "EventCode": "4096", + "EventName": "CRYPTO_ALL", + "BriefDescription": "CRYPTO ALL", + "PublicDescription": "Sums of all non zero cryptography counters" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4097", + "EventName": "KM_DEA", + "BriefDescription": "KM DEA", + "PublicDescription": "KM-DEA function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4098", + "EventName": "KM_TDEA_128", + "BriefDescription": "KM TDEA 128", + "PublicDescription": "KM-TDEA-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4099", + "EventName": "KM_TDEA_192", + "BriefDescription": "KM TDEA 192", + "PublicDescription": "KM-TDEA-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4100", + "EventName": "KM_ENCRYPTED_DEA", + "BriefDescription": "KM ENCRYPTED DEA", + "PublicDescription": "KM-Encrypted-DEA function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4101", + "EventName": "KM_ENCRYPTED_TDEA_128", + "BriefDescription": "KM ENCRYPTED TDEA 128", + "PublicDescription": "KM-Encrypted-TDEA-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4102", + "EventName": "KM_ENCRYPTED_TDEA_192", + "BriefDescription": "KM ENCRYPTED TDEA 192", + "PublicDescription": "KM-Encrypted-TDEA-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4103", + "EventName": "KM_AES_128", + "BriefDescription": "KM AES 128", + "PublicDescription": "KM-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4104", + "EventName": "KM_AES_192", + "BriefDescription": "KM AES 192", + "PublicDescription": "KM-AES-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4105", + "EventName": "KM_AES_256", + "BriefDescription": "KM AES 256", + "PublicDescription": "KM-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4106", + "EventName": "KM_ENCRYPTED_AES_128", + "BriefDescription": "KM ENCRYPTED AES 128", + "PublicDescription": "KM-Encrypted-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4107", + "EventName": "KM_ENCRYPTED_AES_192", + "BriefDescription": "KM ENCRYPTED AES 192", + "PublicDescription": "KM-Encrypted-AES-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4108", + "EventName": "KM_ENCRYPTED_AES_256", + "BriefDescription": "KM ENCRYPTED AES 256", + "PublicDescription": "KM-Encrypted-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4109", + "EventName": "KM_XTS_AES_128", + "BriefDescription": "KM XTS AES 128", + "PublicDescription": "KM-XTS-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4110", + "EventName": "KM_XTS_AES_256", + "BriefDescription": "KM XTS AES 256", + "PublicDescription": "KM-XTS-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4111", + "EventName": "KM_XTS_ENCRYPTED_AES_128", + "BriefDescription": "KM XTS ENCRYPTED AES 128", + "PublicDescription": "KM-XTS-Encrypted-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4112", + "EventName": "KM_XTS_ENCRYPTED_AES_256", + "BriefDescription": "KM XTS ENCRYPTED AES 256", + "PublicDescription": "KM-XTS-Encrypted-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4113", + "EventName": "KMC_DEA", + "BriefDescription": "KMC DEA", + "PublicDescription": "KMC-DEA function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4114", + "EventName": "KMC_TDEA_128", + "BriefDescription": "KMC TDEA 128", + "PublicDescription": "KMC-TDEA-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4115", + "EventName": "KMC_TDEA_192", + "BriefDescription": "KMC TDEA 192", + "PublicDescription": "KMC-TDEA-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4116", + "EventName": "KMC_ENCRYPTED_DEA", + "BriefDescription": "KMC ENCRYPTED DEA", + "PublicDescription": "KMC-Encrypted-DEA function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4117", + "EventName": "KMC_ENCRYPTED_TDEA_128", + "BriefDescription": "KMC ENCRYPTED TDEA 128", + "PublicDescription": "KMC-Encrypted-TDEA-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4118", + "EventName": "KMC_ENCRYPTED_TDEA_192", + "BriefDescription": "KMC ENCRYPTED TDEA 192", + "PublicDescription": "KMC-Encrypted-TDEA-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4119", + "EventName": "KMC_AES_128", + "BriefDescription": "KMC AES 128", + "PublicDescription": "KMC-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4120", + "EventName": "KMC_AES_192", + "BriefDescription": "KMC AES 192", + "PublicDescription": "KMC-AES-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4121", + "EventName": "KMC_AES_256", + "BriefDescription": "KMC AES 256", + "PublicDescription": "KMC-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4122", + "EventName": "KMC_ENCRYPTED_AES_128", + "BriefDescription": "KMC ENCRYPTED AES 128", + "PublicDescription": "KMC-Encrypted-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4123", + "EventName": "KMC_ENCRYPTED_AES_192", + "BriefDescription": "KMC ENCRYPTED AES 192", + "PublicDescription": "KMC-Encrypted-AES-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4124", + "EventName": "KMC_ENCRYPTED_AES_256", + "BriefDescription": "KMC ENCRYPTED AES 256", + "PublicDescription": "KMC-Encrypted-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4125", + "EventName": "KMC_PRNG", + "BriefDescription": "KMC PRNG", + "PublicDescription": "KMC-PRNG function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4126", + "EventName": "KMA_GCM_AES_128", + "BriefDescription": "KMA GCM AES 128", + "PublicDescription": "KMA-GCM-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4127", + "EventName": "KMA_GCM_AES_192", + "BriefDescription": "KMA GCM AES 192", + "PublicDescription": "KMA-GCM-AES-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4128", + "EventName": "KMA_GCM_AES_256", + "BriefDescription": "KMA GCM AES 256", + "PublicDescription": "KMA-GCM-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4129", + "EventName": "KMA_GCM_ENCRYPTED_AES_128", + "BriefDescription": "KMA GCM ENCRYPTED AES 128", + "PublicDescription": "KMA-GCM-Encrypted-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4130", + "EventName": "KMA_GCM_ENCRYPTED_AES_192", + "BriefDescription": "KMA GCM ENCRYPTED AES 192", + "PublicDescription": "KMA-GCM-Encrypted-AES-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4131", + "EventName": "KMA_GCM_ENCRYPTED_AES_256", + "BriefDescription": "KMA GCM ENCRYPTED AES 256", + "PublicDescription": "KMA-GCM-Encrypted-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4132", + "EventName": "KMF_DEA", + "BriefDescription": "KMF DEA", + "PublicDescription": "KMF-DEA function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4133", + "EventName": "KMF_TDEA_128", + "BriefDescription": "KMF TDEA 128", + "PublicDescription": "KMF-TDEA-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4134", + "EventName": "KMF_TDEA_192", + "BriefDescription": "KMF TDEA 192", + "PublicDescription": "KMF-TDEA-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4135", + "EventName": "KMF_ENCRYPTED_DEA", + "BriefDescription": "KMF ENCRYPTED DEA", + "PublicDescription": "KMF-Encrypted-DEA function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4136", + "EventName": "KMF_ENCRYPTED_TDEA_128", + "BriefDescription": "KMF ENCRYPTED TDEA 128", + "PublicDescription": "KMF-Encrypted-TDEA-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4137", + "EventName": "KMF_ENCRYPTED_TDEA_192", + "BriefDescription": "KMF ENCRYPTED TDEA 192", + "PublicDescription": "KMF-Encrypted-TDEA-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4138", + "EventName": "KMF_AES_128", + "BriefDescription": "KMF AES 128", + "PublicDescription": "KMF-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4139", + "EventName": "KMF_AES_192", + "BriefDescription": "KMF AES 192", + "PublicDescription": "KMF-AES-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4140", + "EventName": "KMF_AES_256", + "BriefDescription": "KMF AES 256", + "PublicDescription": "KMF-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4141", + "EventName": "KMF_ENCRYPTED_AES_128", + "BriefDescription": "KMF ENCRYPTED AES 128", + "PublicDescription": "KMF-Encrypted-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4142", + "EventName": "KMF_ENCRYPTED_AES_192", + "BriefDescription": "KMF ENCRYPTED AES 192", + "PublicDescription": "KMF-Encrypted-AES-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4143", + "EventName": "KMF_ENCRYPTED_AES_256", + "BriefDescription": "KMF ENCRYPTED AES 256", + "PublicDescription": "KMF-Encrypted-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4144", + "EventName": "KMCTR_DEA", + "BriefDescription": "KMCTR DEA", + "PublicDescription": "KMCTR-DEA function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4145", + "EventName": "KMCTR_TDEA_128", + "BriefDescription": "KMCTR TDEA 128", + "PublicDescription": "KMCTR-TDEA-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4146", + "EventName": "KMCTR_TDEA_192", + "BriefDescription": "KMCTR TDEA 192", + "PublicDescription": "KMCTR-TDEA-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4147", + "EventName": "KMCTR_ENCRYPTED_DEA", + "BriefDescription": "KMCTR ENCRYPTED DEA", + "PublicDescription": "KMCTR-Encrypted-DEA function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4148", + "EventName": "KMCTR_ENCRYPTED_TDEA_128", + "BriefDescription": "KMCTR ENCRYPTED TDEA 128", + "PublicDescription": "KMCTR-Encrypted-TDEA-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4149", + "EventName": "KMCTR_ENCRYPTED_TDEA_192", + "BriefDescription": "KMCTR ENCRYPTED TDEA 192", + "PublicDescription": "KMCTR-Encrypted-TDEA-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4150", + "EventName": "KMCTR_AES_128", + "BriefDescription": "KMCTR AES 128", + "PublicDescription": "KMCTR-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4151", + "EventName": "KMCTR_AES_192", + "BriefDescription": "KMCTR AES 192", + "PublicDescription": "KMCTR-AES-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4152", + "EventName": "KMCTR_AES_256", + "BriefDescription": "KMCTR AES 256", + "PublicDescription": "KMCTR-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4153", + "EventName": "KMCTR_ENCRYPTED_AES_128", + "BriefDescription": "KMCTR ENCRYPTED AES 128", + "PublicDescription": "KMCTR-Encrypted-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4154", + "EventName": "KMCTR_ENCRYPTED_AES_192", + "BriefDescription": "KMCTR ENCRYPTED AES 192", + "PublicDescription": "KMCTR-Encrypted-AES-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4155", + "EventName": "KMCTR_ENCRYPTED_AES_256", + "BriefDescription": "KMCTR ENCRYPTED AES 256", + "PublicDescription": "KMCTR-Encrypted-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4156", + "EventName": "KMO_DEA", + "BriefDescription": "KMO DEA", + "PublicDescription": "KMO-DEA function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4157", + "EventName": "KMO_TDEA_128", + "BriefDescription": "KMO TDEA 128", + "PublicDescription": "KMO-TDEA-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4158", + "EventName": "KMO_TDEA_192", + "BriefDescription": "KMO TDEA 192", + "PublicDescription": "KMO-TDEA-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4159", + "EventName": "KMO_ENCRYPTED_DEA", + "BriefDescription": "KMO ENCRYPTED DEA", + "PublicDescription": "KMO-Encrypted-DEA function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4160", + "EventName": "KMO_ENCRYPTED_TDEA_128", + "BriefDescription": "KMO ENCRYPTED TDEA 128", + "PublicDescription": "KMO-Encrypted-TDEA-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4161", + "EventName": "KMO_ENCRYPTED_TDEA_192", + "BriefDescription": "KMO ENCRYPTED TDEA 192", + "PublicDescription": "KMO-Encrypted-TDEA-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4162", + "EventName": "KMO_AES_128", + "BriefDescription": "KMO AES 128", + "PublicDescription": "KMO-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4163", + "EventName": "KMO_AES_192", + "BriefDescription": "KMO AES 192", + "PublicDescription": "KMO-AES-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4164", + "EventName": "KMO_AES_256", + "BriefDescription": "KMO AES 256", + "PublicDescription": "KMO-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4165", + "EventName": "KMO_ENCRYPTED_AES_128", + "BriefDescription": "KMO ENCRYPTED AES 128", + "PublicDescription": "KMO-Encrypted-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4166", + "EventName": "KMO_ENCRYPTED_AES_192", + "BriefDescription": "KMO ENCRYPTED AES 192", + "PublicDescription": "KMO-Encrypted-AES-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4167", + "EventName": "KMO_ENCRYPTED_AES_256", + "BriefDescription": "KMO ENCRYPTED AES 256", + "PublicDescription": "KMO-Encrypted-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4168", + "EventName": "KIMD_SHA_1", + "BriefDescription": "KIMD SHA 1", + "PublicDescription": "KIMD-SHA-1 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4169", + "EventName": "KIMD_SHA_256", + "BriefDescription": "KIMD SHA 256", + "PublicDescription": "KIMD-SHA-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4170", + "EventName": "KIMD_SHA_512", + "BriefDescription": "KIMD SHA 512", + "PublicDescription": "KIMD-SHA-512 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4171", + "EventName": "KIMD_SHA3_224", + "BriefDescription": "KIMD SHA3 224", + "PublicDescription": "KIMD-SHA3-224 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4172", + "EventName": "KIMD_SHA3_256", + "BriefDescription": "KIMD SHA3 256", + "PublicDescription": "KIMD-SHA3-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4173", + "EventName": "KIMD_SHA3_384", + "BriefDescription": "KIMD SHA3 384", + "PublicDescription": "KIMD-SHA3-384 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4174", + "EventName": "KIMD_SHA3_512", + "BriefDescription": "KIMD SHA3 512", + "PublicDescription": "KIMD-SHA3-512 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4175", + "EventName": "KIMD_SHAKE_128", + "BriefDescription": "KIMD SHAKE 128", + "PublicDescription": "KIMD-SHAKE-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4176", + "EventName": "KIMD_SHAKE_256", + "BriefDescription": "KIMD SHAKE 256", + "PublicDescription": "KIMD-SHAKE-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4177", + "EventName": "KIMD_GHASH", + "BriefDescription": "KIMD GHASH", + "PublicDescription": "KIMD-GHASH function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4178", + "EventName": "KLMD_SHA_1", + "BriefDescription": "KLMD SHA 1", + "PublicDescription": "KLMD-SHA-1 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4179", + "EventName": "KLMD_SHA_256", + "BriefDescription": "KLMD SHA 256", + "PublicDescription": "KLMD-SHA-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4180", + "EventName": "KLMD_SHA_512", + "BriefDescription": "KLMD SHA 512", + "PublicDescription": "KLMD-SHA-512 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4181", + "EventName": "KLMD_SHA3_224", + "BriefDescription": "KLMD SHA3 224", + "PublicDescription": "KLMD-SHA3-224 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4182", + "EventName": "KLMD_SHA3_256", + "BriefDescription": "KLMD SHA3 256", + "PublicDescription": "KLMD-SHA3-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4183", + "EventName": "KLMD_SHA3_384", + "BriefDescription": "KLMD SHA3 384", + "PublicDescription": "KLMD-SHA3-384 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4184", + "EventName": "KLMD_SHA3_512", + "BriefDescription": "KLMD SHA3 512", + "PublicDescription": "KLMD-SHA3-512 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4185", + "EventName": "KLMD_SHAKE_128", + "BriefDescription": "KLMD SHAKE 128", + "PublicDescription": "KLMD-SHAKE-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4186", + "EventName": "KLMD_SHAKE_256", + "BriefDescription": "KLMD SHAKE 256", + "PublicDescription": "KLMD-SHAKE-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4187", + "EventName": "KMAC_DEA", + "BriefDescription": "KMAC DEA", + "PublicDescription": "KMAC-DEA function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4188", + "EventName": "KMAC_TDEA_128", + "BriefDescription": "KMAC TDEA 128", + "PublicDescription": "KMAC-TDEA-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4189", + "EventName": "KMAC_TDEA_192", + "BriefDescription": "KMAC TDEA 192", + "PublicDescription": "KMAC-TDEA-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4190", + "EventName": "KMAC_ENCRYPTED_DEA", + "BriefDescription": "KMAC ENCRYPTED DEA", + "PublicDescription": "KMAC-Encrypted-DEA function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4191", + "EventName": "KMAC_ENCRYPTED_TDEA_128", + "BriefDescription": "KMAC ENCRYPTED TDEA 128", + "PublicDescription": "KMAC-Encrypted-TDEA-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4192", + "EventName": "KMAC_ENCRYPTED_TDEA_192", + "BriefDescription": "KMAC ENCRYPTED TDEA 192", + "PublicDescription": "KMAC-Encrypted-TDEA-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4193", + "EventName": "KMAC_AES_128", + "BriefDescription": "KMAC AES 128", + "PublicDescription": "KMAC-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4194", + "EventName": "KMAC_AES_192", + "BriefDescription": "KMAC AES 192", + "PublicDescription": "KMAC-AES-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4195", + "EventName": "KMAC_AES_256", + "BriefDescription": "KMAC AES 256", + "PublicDescription": "KMAC-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4196", + "EventName": "KMAC_ENCRYPTED_AES_128", + "BriefDescription": "KMAC ENCRYPTED AES 128", + "PublicDescription": "KMAC-Encrypted-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4197", + "EventName": "KMAC_ENCRYPTED_AES_192", + "BriefDescription": "KMAC ENCRYPTED AES 192", + "PublicDescription": "KMAC-Encrypted-AES-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4198", + "EventName": "KMAC_ENCRYPTED_AES_256", + "BriefDescription": "KMAC ENCRYPTED AES 256", + "PublicDescription": "KMAC-Encrypted-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4199", + "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_DEA", + "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING DEA", + "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-DEA function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4200", + "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_128", + "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING TDEA 128", + "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-TDEA-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4201", + "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_192", + "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING TDEA 192", + "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-TDEA-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4202", + "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_DEA", + "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED DEA", + "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-DEA function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4203", + "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_128", + "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED TDEA 128", + "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-TDEA-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4204", + "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_192", + "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED TDEA 192", + "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-TDEA-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4205", + "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_128", + "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING AES 128", + "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4206", + "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_192", + "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING AES 192", + "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-AES-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4207", + "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_256", + "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING AES 256", + "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4208", + "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128", + "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED AES 128", + "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4209", + "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192", + "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED AES 192", + "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-AES-192 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4210", + "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256A", + "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED AES 256A", + "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-AES-256A function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4211", + "EventName": "PCC_COMPUTE_XTS_PARAMETER_USING_AES_128", + "BriefDescription": "PCC COMPUTE XTS PARAMETER USING AES 128", + "PublicDescription": "PCC-Compute-XTS-Parameter-Using-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4212", + "EventName": "PCC_COMPUTE_XTS_PARAMETER_USING_AES_256", + "BriefDescription": "PCC COMPUTE XTS PARAMETER USING AES 256", + "PublicDescription": "PCC-Compute-XTS-Parameter-Using-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4213", + "EventName": "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_128", + "BriefDescription": "PCC COMPUTE XTS PARAMETER USING ENCRYPTED AES 128", + "PublicDescription": "PCC-Compute-XTS-Parameter-Using-Encrypted-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4214", + "EventName": "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_256", + "BriefDescription": "PCC COMPUTE XTS PARAMETER USING ENCRYPTED AES 256", + "PublicDescription": "PCC-Compute-XTS-Parameter-Using-Encrypted-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4215", + "EventName": "PCC_SCALAR_MULTIPLY_P256", + "BriefDescription": "PCC SCALAR MULTIPLY P256", + "PublicDescription": "PCC-Scalar-Multiply-P256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4216", + "EventName": "PCC_SCALAR_MULTIPLY_P384", + "BriefDescription": "PCC SCALAR MULTIPLY P384", + "PublicDescription": "PCC-Scalar-Multiply-P384 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4217", + "EventName": "PCC_SCALAR_MULTIPLY_P521", + "BriefDescription": "PCC SCALAR MULTIPLY P521", + "PublicDescription": "PCC-Scalar-Multiply-P521 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4218", + "EventName": "PCC_SCALAR_MULTIPLY_ED25519", + "BriefDescription": "PCC SCALAR MULTIPLY ED25519", + "PublicDescription": "PCC-Scalar-Multiply-Ed25519 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4219", + "EventName": "PCC_SCALAR_MULTIPLY_ED448", + "BriefDescription": "PCC SCALAR MULTIPLY ED448", + "PublicDescription": "PCC-Scalar-Multiply-Ed448 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4220", + "EventName": "PCC_SCALAR_MULTIPLY_X25519", + "BriefDescription": "PCC SCALAR MULTIPLY X25519", + "PublicDescription": "PCC-Scalar-Multiply-X25519 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4221", + "EventName": "PCC_SCALAR_MULTIPLY_X448", + "BriefDescription": "PCC SCALAR MULTIPLY X448", + "PublicDescription": "PCC-Scalar-Multiply-X448 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4222", + "EventName": "PRNO_SHA_512_DRNG", + "BriefDescription": "PRNO SHA 512 DRNG", + "PublicDescription": "PRNO-SHA-512-DRNG function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4223", + "EventName": "PRNO_TRNG_QUERY_RAW_TO_CONDITIONED_RATIO", + "BriefDescription": "PRNO TRNG QUERY RAW TO CONDITIONED RATIO", + "PublicDescription": "PRNO-TRNG-Query-Raw-to-Conditioned-Ratio function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4224", + "EventName": "PRNO_TRNG", + "BriefDescription": "PRNO TRNG", + "PublicDescription": "PRNO-TRNG function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4225", + "EventName": "KDSA_ECDSA_VERIFY_P256", + "BriefDescription": "KDSA ECDSA VERIFY P256", + "PublicDescription": "KDSA-ECDSA-Verify-P256 function ending with CC=0 or CC=2" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4226", + "EventName": "KDSA_ECDSA_VERIFY_P384", + "BriefDescription": "KDSA ECDSA VERIFY P384", + "PublicDescription": "KDSA-ECDSA-Verify-P384 function ending with CC=0 or CC=2" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4227", + "EventName": "KDSA_ECDSA_VERIFY_P521", + "BriefDescription": "KDSA ECDSA VERIFY P521", + "PublicDescription": "KDSA-ECDSA-Verify-P521 function ending with CC=0 or CC=2" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4228", + "EventName": "KDSA_ECDSA_SIGN_P256", + "BriefDescription": "KDSA ECDSA SIGN P256", + "PublicDescription": "KDSA-ECDSA-Sign-P256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4229", + "EventName": "KDSA_ECDSA_SIGN_P384", + "BriefDescription": "KDSA ECDSA SIGN P384", + "PublicDescription": "KDSA-ECDSA-Sign-P384 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4230", + "EventName": "KDSA_ECDSA_SIGN_P521", + "BriefDescription": "KDSA ECDSA SIGN P521", + "PublicDescription": "KDSA-ECDSA-Sign-P521 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4231", + "EventName": "KDSA_ENCRYPTED_ECDSA_SIGN_P256", + "BriefDescription": "KDSA ENCRYPTED ECDSA SIGN P256", + "PublicDescription": "KDSA-Encrypted-ECDSA-Sign-P256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4232", + "EventName": "KDSA_ENCRYPTED_ECDSA_SIGN_P384", + "BriefDescription": "KDSA ENCRYPTED ECDSA SIGN P384", + "PublicDescription": "KDSA-Encrypted-ECDSA-Sign-P384 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4233", + "EventName": "KDSA_ENCRYPTED_ECDSA_SIGN_P521", + "BriefDescription": "KDSA ENCRYPTED ECDSA SIGN P521", + "PublicDescription": "KDSA-Encrypted-ECDSA-Sign-P521 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4234", + "EventName": "KDSA_EDDSA_VERIFY_ED25519", + "BriefDescription": "KDSA EDDSA VERIFY ED25519", + "PublicDescription": "KDSA-EdDSA-Verify-Ed25519 function ending with CC=0 or CC=2" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4235", + "EventName": "KDSA_EDDSA_VERIFY_ED448", + "BriefDescription": "KDSA EDDSA VERIFY ED448", + "PublicDescription": "KDSA-EdDSA-Verify-Ed448 function ending with CC=0 or CC=2" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4236", + "EventName": "KDSA_EDDSA_SIGN_ED25519", + "BriefDescription": "KDSA EDDSA SIGN ED25519", + "PublicDescription": "KDSA-EdDSA-Sign-Ed25519 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4237", + "EventName": "KDSA_EDDSA_SIGN_ED448", + "BriefDescription": "KDSA EDDSA SIGN ED448", + "PublicDescription": "KDSA-EdDSA-Sign-Ed448 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4238", + "EventName": "KDSA_ENCRYPTED_EDDSA_SIGN_ED25519", + "BriefDescription": "KDSA ENCRYPTED EDDSA SIGN ED25519", + "PublicDescription": "KDSA-Encrypted-EdDSA-Sign-Ed25519 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4239", + "EventName": "KDSA_ENCRYPTED_EDDSA_SIGN_ED448", + "BriefDescription": "KDSA ENCRYPTED EDDSA SIGN ED448", + "PublicDescription": "KDSA-Encrypted-EdDSA-Sign-Ed448 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4240", + "EventName": "PCKMO_ENCRYPT_DEA_KEY", + "BriefDescription": "PCKMO ENCRYPT DEA KEY", + "PublicDescription": "PCKMO-Encrypt-DEA-key function" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4241", + "EventName": "PCKMO_ENCRYPT_TDEA_128_KEY", + "BriefDescription": "PCKMO ENCRYPT TDEA 128 KEY", + "PublicDescription": "PCKMO-Encrypt-TDEA-128-key function" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4242", + "EventName": "PCKMO_ENCRYPT_TDEA_192_KEY", + "BriefDescription": "PCKMO ENCRYPT TDEA 192 KEY", + "PublicDescription": "PCKMO-Encrypt-TDEA-192-key function" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4243", + "EventName": "PCKMO_ENCRYPT_AES_128_KEY", + "BriefDescription": "PCKMO ENCRYPT AES 128 KEY", + "PublicDescription": "PCKMO-Encrypt-AES-128-key function" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4244", + "EventName": "PCKMO_ENCRYPT_AES_192_KEY", + "BriefDescription": "PCKMO ENCRYPT AES 192 KEY", + "PublicDescription": "PCKMO-Encrypt-AES-192-key function" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4245", + "EventName": "PCKMO_ENCRYPT_AES_256_KEY", + "BriefDescription": "PCKMO ENCRYPT AES 256 KEY", + "PublicDescription": "PCKMO-Encrypt-AES-256-key function" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4246", + "EventName": "PCKMO_ENCRYPT_ECC_P256_KEY", + "BriefDescription": "PCKMO ENCRYPT ECC P256 KEY", + "PublicDescription": "PCKMO-Encrypt-ECC-P256-key function" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4247", + "EventName": "PCKMO_ENCRYPT_ECC_P384_KEY", + "BriefDescription": "PCKMO ENCRYPT ECC P384 KEY", + "PublicDescription": "PCKMO-Encrypt-ECC-P384-key function" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4248", + "EventName": "PCKMO_ENCRYPT_ECC_P521_KEY", + "BriefDescription": "PCKMO ENCRYPT ECC P521 KEY", + "PublicDescription": "PCKMO-Encrypt-ECC-P521-key function" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4249", + "EventName": "PCKMO_ENCRYPT_ECC_ED25519_KEY", + "BriefDescription": "PCKMO ENCRYPT ECC ED25519 KEY", + "PublicDescription": "PCKMO-Encrypt-ECC-Ed25519-key function" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4250", + "EventName": "PCKMO_ENCRYPT_ECC_ED448_KEY", + "BriefDescription": "PCKMO ENCRYPT ECC ED448 KEY", + "PublicDescription": "PCKMO-Encrypt-ECC-Ed448-key function" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4251", + "EventName": "IBM_RESERVED_155", + "BriefDescription": "IBM RESERVED_155", + "PublicDescription": "Reserved for IBM use" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4252", + "EventName": "IBM_RESERVED_156", + "BriefDescription": "IBM RESERVED_156", + "PublicDescription": "Reserved for IBM use" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4253", + "EventName": "KM_FULL_XTS_AES_128", + "BriefDescription": "KM FULL XTS AES 128", + "PublicDescription": "KM-Full-XTS-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4254", + "EventName": "KM_FULL_XTS_AES_256", + "BriefDescription": "KM FULL XTS AES 256", + "PublicDescription": "KM-Full-XTS-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4255", + "EventName": "KM_FULL_XTS_ENCRYPTED_AES_128", + "BriefDescription": "KM FULL XTS ENCRYPTED AES 128", + "PublicDescription": "KM-Full-XTS-Encrypted-AES-128 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4256", + "EventName": "KM_FULL_XTS_ENCRYPTED_AES_256", + "BriefDescription": "KM FULL XTS ENCRYPTED AES 256", + "PublicDescription": "KM-FULL-XTS-ENCRYPTED-AES-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4257", + "EventName": "KMAC_HMAC_SHA_224", + "BriefDescription": "KMAC HMAC SHA 224", + "PublicDescription": "KMAC-HMAC-SHA-224 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4258", + "EventName": "KMAC_HMAC_SHA_256", + "BriefDescription": "KMAC HMAC SHA 256", + "PublicDescription": "KMAC-HMAC-SHA-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4259", + "EventName": "KMAC_HMAC_SHA_384", + "BriefDescription": "KMAC HMAC SHA 384", + "PublicDescription": "KMAC-HMAC-SHA-384 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4260", + "EventName": "KMAC_HMAC_SHA_512", + "BriefDescription": "KMAC HMAC SHA 512", + "PublicDescription": "KMAC-HMAC-SHA-512 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4261", + "EventName": "KMAC_HMAC_ENCRYPTED_SHA_224", + "BriefDescription": "KMAC HMAC ENCRYPTED SHA 224", + "PublicDescription": "KMAC-HMAC-Encrypted-SHA-224 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4262", + "EventName": "KMAC_HMAC_ENCRYPTED_SHA_256", + "BriefDescription": "KMAC HMAC ENCRYPTED SHA 256", + "PublicDescription": "KMAC-HMAC-Encrypted-SHA-256 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4263", + "EventName": "KMAC_HMAC_ENCRYPTED_SHA_384", + "BriefDescription": "KMAC HMAC ENCRYPTED SHA 384", + "PublicDescription": "KMAC-HMAC-Encrypted-SHA-384 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4264", + "EventName": "KMAC_HMAC_ENCRYPTED_SHA_512", + "BriefDescription": "KMAC HMAC ENCRYPTED SHA 512", + "PublicDescription": "KMAC-HMAC-Encrypted-SHA-512 function ending with CC=0" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4265", + "EventName": "PCKMO_ENCRYPT_HMAC_512_KEY", + "BriefDescription": "PCKMO ENCRYPT HMAC 512 KEY", + "PublicDescription": "PCKMO-Encrypt-HMAC-512-Key function" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4266", + "EventName": "PCKMO_ENCRYPT_HMAC_1024_KEY", + "BriefDescription": "PCKMO ENCRYPT HMAC 1024 KEY", + "PublicDescription": "PCKMO-Encrypt-HMAC-1024-Key function" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4267", + "EventName": "PCKMO_ENCRYPT_AES_XTS_128", + "BriefDescription": "PCKMO ENCRYPT AES XTS Double Key 128", + "PublicDescription": "PCKMO-ENCRYPT-AES-XTS-128 Double Key function" + }, + { + "Unit": "PAI-CRYPTO", + "EventCode": "4268", + "EventName": "PCKMO_ENCRYPT_AES_XTS_256", + "BriefDescription": "PCKMO ENCRYPT AES XTS Double Key 256", + "PublicDescription": "PCKMO-ENCRYPT-AES-XTS-256 Double Key function" + } +] diff --git a/tools/perf/pmu-events/arch/s390/cf_z17/pai_ext.json b/tools/perf/pmu-events/arch/s390/cf_z17/pai_ext.json new file mode 100644 index 000000000000..935e9f5763b4 --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_z17/pai_ext.json @@ -0,0 +1,261 @@ +[ + { + "Unit": "PAI-EXT", + "EventCode": "6144", + "EventName": "NNPA_ALL", + "BriefDescription": "NNPA ALL", + "PublicDescription": "Sums of all non zero NNPA counters" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6145", + "EventName": "NNPA_ADD", + "BriefDescription": "NNPA ADD function", + "PublicDescription": "NNPA-ADD function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6146", + "EventName": "NNPA_SUB", + "BriefDescription": "NNPA SUB function", + "PublicDescription": "NNPA-SUB function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6147", + "EventName": "NNPA_MUL", + "BriefDescription": "NNPA MUL function", + "PublicDescription": "NNPA-MUL function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6148", + "EventName": "NNPA_DIV", + "BriefDescription": "NNPA_DIV function", + "PublicDescription": "NNPA-DIV function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6149", + "EventName": "NNPA_MIN", + "BriefDescription": "NNPA MIN function", + "PublicDescription": "NNPA-MIN function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6150", + "EventName": "NNPA_MAX", + "BriefDescription": "NNPA MAX function", + "PublicDescription": "NNPA-MAX function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6151", + "EventName": "NNPA_LOG", + "BriefDescription": "NNPA LOG function", + "PublicDescription": "NNPA Log function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6152", + "EventName": "NNPA_EXP", + "BriefDescription": "NNPA EXP function", + "PublicDescription": "NNPA-EXP function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6153", + "EventName": "NNPA_IBM_RESERVED_9", + "BriefDescription": "Reserved for IBM use", + "PublicDescription": "Reserved for IBM use" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6154", + "EventName": "NNPA_RELU", + "BriefDescription": "NNPA RELU function", + "PublicDescription": "NNPA-RELU function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6155", + "EventName": "NNPA_TANH", + "BriefDescription": "NNPA TANH function", + "PublicDescription": "NNPA-TANH function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6156", + "EventName": "NNPA_SIGMOID", + "BriefDescription": "NNPA SIGMOID function", + "PublicDescription": "NNPA-SIGMOID function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6157", + "EventName": "NNPA_SOFTMAX", + "BriefDescription": "NNPA SOFTMAX function", + "PublicDescription": "NNPA-SOFTMAX function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6158", + "EventName": "NNPA_BATCHNORM", + "BriefDescription": "NNPA BATCHNORM function", + "PublicDescription": "NNPA-BATCHNORM function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6159", + "EventName": "NNPA_MAXPOOL2D", + "BriefDescription": "NNPA MAXPOOL2D function", + "PublicDescription": "NNPA-MAXPOOL2D function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6160", + "EventName": "NNPA_AVGPOOL2D", + "BriefDescription": "NNPA_AVGPOOL2D function", + "PublicDescription": "NNPA-AVGPOOL2D function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6161", + "EventName": "NNPA_LSTMACT", + "BriefDescription": "NNPA LSTMACT function", + "PublicDescription": "NNPA-LSTMACT function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6162", + "EventName": "NNPA_GRUACT", + "BriefDescription": "NNPA GRUACT function", + "PublicDescription": "NNPA-GRUACT function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6163", + "EventName": "NNPA_CONVOLUTION", + "BriefDescription": "NNPA CONVOLUTION function", + "PublicDescription": "NNPA-CONVOLUTION function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6164", + "EventName": "NNPA_MATMUL_OP", + "BriefDescription": "NNPA MATMUL OP function", + "PublicDescription": "NNPA-MATMUL-OP function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6165", + "EventName": "NNPA_MATMUL_OP_BCAST23", + "BriefDescription": "NNPA MATMUL OP BCAST23 function", + "PublicDescription": "NNPA-MATMUL-OP-BCAST23 function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6166", + "EventName": "NNPA_SMALLBATCH", + "BriefDescription": "NNPA Counter 22", + "PublicDescription": "NNPA function with conditions as described in Principles of Operation" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6167", + "EventName": "NNPA_LARGEDIM", + "BriefDescription": "NNPA Counter 23", + "PublicDescription": "NNPA function with conditions as described in Principles of Operation" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6168", + "EventName": "NNPA_SMALLTENSOR", + "BriefDescription": "NNPA Counter 24", + "PublicDescription": "NNPA function with conditions as described in Principles of Operation" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6169", + "EventName": "NNPA_1MFRAME", + "BriefDescription": "NNPA Counter 25", + "PublicDescription": "NNPA function with conditions as described in Principles of Operation" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6170", + "EventName": "NNPA_2GFRAME", + "BriefDescription": "NNPA Counter 26", + "PublicDescription": "NNPA function with conditions as described in Principles of Operation" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6171", + "EventName": "NNPA_ACCESSEXCEPT", + "BriefDescription": "NNPA Counter 27", + "PublicDescription": "NNPA function with conditions as described in Principles of Operation" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6172", + "EventName": "NNPA_TRANSFORM", + "BriefDescription": "NNPA-TRANSFORM function", + "PublicDescription": "NNPA-TRANSFORM function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6173", + "EventName": "NNPA_GELU", + "BriefDescription": "NNPA-GELU function", + "PublicDescription": "NNPA-GELU function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6174", + "EventName": "NNPA_MOMENTS", + "BriefDescription": "NNPA-MOMENTS function", + "PublicDescription": "NNPA-MOMENTS function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6175", + "EventName": "NNPA_LAYERNORM", + "BriefDescription": "NNPA-LAYERNORM function", + "PublicDescription": "NNPA-LAYERNORM function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6176", + "EventName": "NNPA_MATMUL_OP_BCAST1", + "BriefDescription": "NNPA-MATMUL_OP_BCAST1 function", + "PublicDescription": "NNPA-MATMUL-OP-BCAST1 function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6177", + "EventName": "NNPA_SQRT", + "BriefDescription": "NNPA-SQRT function", + "PublicDescription": "NNPA-SQRT function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6178", + "EventName": "NNPA_INVSQRT", + "BriefDescription": "NNPA-INVSQRT function", + "PublicDescription": "NNPA-INVSQRT function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6179", + "EventName": "NNPA_NORM", + "BriefDescription": "NNPA-NORM function", + "PublicDescription": "NNPA-NORM function ending with CC=0" + }, + { + "Unit": "PAI-EXT", + "EventCode": "6180", + "EventName": "NNPA_REDUCE", + "BriefDescription": "NNPA-REDUCE function", + "PublicDescription": "NNPA-REDUCE function ending with CC=0" + } +] diff --git a/tools/perf/pmu-events/arch/s390/cf_z17/transaction.json b/tools/perf/pmu-events/arch/s390/cf_z17/transaction.json new file mode 100644 index 000000000000..74df533c8b6f --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_z17/transaction.json @@ -0,0 +1,72 @@ +[ + { + "BriefDescription": "Transaction count", + "MetricName": "transaction", + "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL if has_event(TX_C_TEND) else 0" + }, + { + "BriefDescription": "Cycles per Instruction", + "MetricName": "cpi", + "MetricExpr": "CPU_CYCLES / INSTRUCTIONS if has_event(INSTRUCTIONS) else 0" + }, + { + "BriefDescription": "Problem State Instruction Ratio", + "MetricName": "prbstate", + "MetricExpr": "(PROBLEM_STATE_INSTRUCTIONS / INSTRUCTIONS) * 100 if has_event(INSTRUCTIONS) else 0" + }, + { + "BriefDescription": "Level One Miss per 100 Instructions", + "MetricName": "l1mp", + "MetricExpr": "((L1I_DIR_WRITES + L1D_DIR_WRITES) / INSTRUCTIONS) * 100 if has_event(INSTRUCTIONS) else 0" + }, + { + "BriefDescription": "Percentage sourced from Level 2 cache", + "MetricName": "l2p", + "MetricExpr": "((DCW_REQ + DCW_REQ_IV + ICW_REQ + ICW_REQ_IV) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_REQ) else 0" + }, + { + "BriefDescription": "Percentage sourced from Level 3 on same chip cache", + "MetricName": "l3p", + "MetricExpr": "((DCW_REQ_CHIP_HIT + DCW_ON_CHIP + DCW_ON_CHIP_IV + DCW_ON_CHIP_CHIP_HIT + ICW_REQ_CHIP_HIT + ICW_ON_CHIP + ICW_ON_CHIP_IV + ICW_ON_CHIP_CHIP_HIT) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_REQ_CHIP_HIT) else 0" + }, + { + "BriefDescription": "Percentage sourced from Level 4 Local cache on same drawer", + "MetricName": "l4lp", + "MetricExpr": "((DCW_REQ_DRAWER_HIT + DCW_ON_CHIP_DRAWER_HIT + DCW_ON_MODULE + DCW_ON_DRAWER + IDCW_ON_MODULE_IV + IDCW_ON_MODULE_CHIP_HIT + IDCW_ON_MODULE_DRAWER_HIT + IDCW_ON_DRAWER_IV + IDCW_ON_DRAWER_CHIP_HIT + IDCW_ON_DRAWER_DRAWER_HIT + ICW_REQ_DRAWER_HIT + ICW_ON_CHIP_DRAWER_HIT + ICW_ON_MODULE + ICW_ON_DRAWER) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_REQ_DRAWER_HIT) else 0" + }, + { + "BriefDescription": "Percentage sourced from Level 4 Remote cache on different book", + "MetricName": "l4rp", + "MetricExpr": "((DCW_OFF_DRAWER + IDCW_OFF_DRAWER_IV + IDCW_OFF_DRAWER_CHIP_HIT + IDCW_OFF_DRAWER_DRAWER_HIT + ICW_OFF_DRAWER) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_OFF_DRAWER) else 0" + }, + { + "BriefDescription": "Percentage sourced from memory", + "MetricName": "memp", + "MetricExpr": "((DCW_ON_CHIP_MEMORY + DCW_ON_MODULE_MEMORY + DCW_ON_DRAWER_MEMORY + DCW_OFF_DRAWER_MEMORY) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_ON_CHIP_MEMORY) else 0" + }, + { + "BriefDescription": "Cycles per Instructions from Finite cache/memory", + "MetricName": "finite_cpi", + "MetricExpr": "L1C_TLB2_MISSES / INSTRUCTIONS if has_event(L1C_TLB2_MISSES) else 0" + }, + { + "BriefDescription": "Estimated Instruction Complexity CPI infinite Level 1", + "MetricName": "est_cpi", + "MetricExpr": "(CPU_CYCLES / INSTRUCTIONS) - (L1C_TLB2_MISSES / INSTRUCTIONS) if has_event(INSTRUCTIONS) else 0" + }, + { + "BriefDescription": "Estimated Sourcing Cycles per Level 1 Miss", + "MetricName": "scpl1m", + "MetricExpr": "L1C_TLB2_MISSES / (L1I_DIR_WRITES + L1D_DIR_WRITES) if has_event(L1C_TLB2_MISSES) else 0" + }, + { + "BriefDescription": "Estimated TLB CPU percentage of Total CPU", + "MetricName": "tlb_percent", + "MetricExpr": "((DTLB2_MISSES + ITLB2_MISSES) / CPU_CYCLES) * (L1C_TLB2_MISSES / (L1I_PENALTY_CYCLES + L1D_PENALTY_CYCLES)) * 100 if has_event(CPU_CYCLES) else 0" + }, + { + "BriefDescription": "Estimated Cycles per TLB Miss", + "MetricName": "tlb_miss", + "MetricExpr": "((DTLB2_MISSES + ITLB2_MISSES) / (DTLB2_WRITES + ITLB2_WRITES)) * (L1C_TLB2_MISSES / (L1I_PENALTY_CYCLES + L1D_PENALTY_CYCLES)) if has_event(DTLB2_MISSES) else 0" + } +] diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv index b22648d12751..6fdede50e7b2 100644 --- a/tools/perf/pmu-events/arch/s390/mapfile.csv +++ b/tools/perf/pmu-events/arch/s390/mapfile.csv @@ -6,3 +6,4 @@ Family-model,Version,Filename,EventType ^IBM.390[67].*[13]\.[1-5].[[:xdigit:]]+$,3,cf_z14,core ^IBM.856[12].*3\.6.[[:xdigit:]]+$,3,cf_z15,core ^IBM.393[12].*$,3,cf_z16,core +^IBM.917[56].*$,3,cf_z17,core From d4ae1620c6209661ced9244d058f3582d1847dca Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 2 Jul 2025 10:54:02 -0700 Subject: [PATCH 0510/2411] perf genelf: Fix NO_LIBDW=1 build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With NO_LIBDW=1 a new unused-parameter warning/error has appeared: ``` util/genelf.c: In function ‘jit_write_elf’: util/genelf.c:163:32: error: unused parameter ‘load_addr’ [-Werror=unused-parameter] 163 | jit_write_elf(int fd, uint64_t load_addr, const char *sym, ``` Fixes: e3f612c1d8f3 ("perf genelf: Remove libcrypto dependency and use built-in sha1()") Signed-off-by: Ian Rogers Tested-by: Namhyung Kim Link: https://lore.kernel.org/r/20250702175402.761818-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/genelf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index fcf86a27f69e..591548b10e34 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -160,7 +160,7 @@ jit_add_eh_frame_info(Elf *e, void* unwinding, uint64_t unwinding_header_size, * csize: the code size in bytes */ int -jit_write_elf(int fd, uint64_t load_addr, const char *sym, +jit_write_elf(int fd, uint64_t load_addr __maybe_unused, const char *sym, const void *code, int csize, void *debug __maybe_unused, int nr_debug_entries __maybe_unused, void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size) From 63a088e999de3f431f87d9a367933da894ddb613 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 24 Jun 2025 12:03:21 -0700 Subject: [PATCH 0511/2411] perf dso: Add missed dso__put to dso__load_kcore The kcore loading creates a set of list nodes that have reference counted references to maps of the kcore. The list node freeing in the success path wasn't releasing the maps, add the missing puts. It is unclear why this leak was being missed by leak sanitizer. Fixes: 83720209961f ("perf map: Move map list node into symbol") Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250624190326.2038704-2-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/symbol.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 73dab94fab74..ae0bd568ac45 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1422,6 +1422,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, goto out_err; } } + map__zput(new_node->map); free(new_node); } From 7a8557fc4aa12cffc97e5c8a1b8b8fd0275464b2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 24 Jun 2025 12:03:22 -0700 Subject: [PATCH 0512/2411] perf test code-reading: Avoid a leak of cpus and threads The perf_evlist__set_maps does the necessary gets on the arguments passed, so the reference count bumping isn't necessary and creates a memory leak. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250624190326.2038704-3-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/tests/code-reading.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index cf6edbe697b2..6efb6b4bbcce 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -749,13 +749,6 @@ static int do_test_code_reading(bool try_kcore) pr_debug("perf_evlist__open() failed!\n%s\n", errbuf); } - /* - * Both cpus and threads are now owned by evlist - * and will be freed by following perf_evlist__set_maps - * call. Getting reference to keep them alive. - */ - perf_cpu_map__get(cpus); - perf_thread_map__get(threads); perf_evlist__set_maps(&evlist->core, NULL, NULL); evlist__delete(evlist); evlist = NULL; From d1f18106778b4d1af5ca6bde191e05e075c7e697 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 24 Jun 2025 12:03:23 -0700 Subject: [PATCH 0513/2411] perf hwmon_pmu: Hold path rather than fd Hold the path to the hwmon_pmu rather than the file descriptor. The file descriptor is somewhat problematic in that it reflects the directory state when opened, something that may vary in testing. Using a path simplifies testing and to some extent cleanup as the hwmon_pmu is owned by the pmus list and intentionally global and leaked when perf terminates, the file descriptor being left open looks like a leak. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250624190326.2038704-4-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/tests/hwmon_pmu.c | 11 ++++++----- tools/perf/util/hwmon_pmu.c | 38 ++++++++++++++++++++++++++---------- tools/perf/util/hwmon_pmu.h | 4 ++-- tools/perf/util/pmus.c | 2 +- tools/perf/util/pmus.h | 2 +- 5 files changed, 38 insertions(+), 19 deletions(-) diff --git a/tools/perf/tests/hwmon_pmu.c b/tools/perf/tests/hwmon_pmu.c index 0837aca1cdfa..151f02701c8c 100644 --- a/tools/perf/tests/hwmon_pmu.c +++ b/tools/perf/tests/hwmon_pmu.c @@ -93,9 +93,10 @@ static struct perf_pmu *test_pmu_get(char *dir, size_t sz) pr_err("Failed to mkdir hwmon directory\n"); goto err_out; } - hwmon_dirfd = openat(test_dirfd, "hwmon1234", O_DIRECTORY); + strncat(dir, "/hwmon1234", sz - strlen(dir)); + hwmon_dirfd = open(dir, O_PATH|O_DIRECTORY); if (hwmon_dirfd < 0) { - pr_err("Failed to open test hwmon directory \"%s/hwmon1234\"\n", dir); + pr_err("Failed to open test hwmon directory \"%s\"\n", dir); goto err_out; } file = openat(hwmon_dirfd, "name", O_WRONLY | O_CREAT, 0600); @@ -130,18 +131,18 @@ static struct perf_pmu *test_pmu_get(char *dir, size_t sz) } /* Make the PMU reading the files created above. */ - hwm = perf_pmus__add_test_hwmon_pmu(hwmon_dirfd, "hwmon1234", test_hwmon_name); + hwm = perf_pmus__add_test_hwmon_pmu(dir, "hwmon1234", test_hwmon_name); if (!hwm) pr_err("Test hwmon creation failed\n"); err_out: if (!hwm) { test_pmu_put(dir, hwm); - if (hwmon_dirfd >= 0) - close(hwmon_dirfd); } if (test_dirfd >= 0) close(test_dirfd); + if (hwmon_dirfd >= 0) + close(hwmon_dirfd); return hwm; } diff --git a/tools/perf/util/hwmon_pmu.c b/tools/perf/util/hwmon_pmu.c index c25e7296f1c1..7edda010ba27 100644 --- a/tools/perf/util/hwmon_pmu.c +++ b/tools/perf/util/hwmon_pmu.c @@ -104,7 +104,7 @@ static const char *const hwmon_units[HWMON_TYPE_MAX] = { struct hwmon_pmu { struct perf_pmu pmu; struct hashmap events; - int hwmon_dir_fd; + char *hwmon_dir; }; /** @@ -245,7 +245,7 @@ static int hwmon_pmu__read_events(struct hwmon_pmu *pmu) return 0; /* Use openat so that the directory contents are refreshed. */ - io_dir__init(&dir, openat(pmu->hwmon_dir_fd, ".", O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + io_dir__init(&dir, open(pmu->hwmon_dir, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); if (dir.dirfd < 0) return -ENOENT; @@ -283,7 +283,7 @@ static int hwmon_pmu__read_events(struct hwmon_pmu *pmu) __set_bit(item, alarm ? value->alarm_items : value->items); if (item == HWMON_ITEM_LABEL) { char buf[128]; - int fd = openat(pmu->hwmon_dir_fd, ent->d_name, O_RDONLY); + int fd = openat(dir.dirfd, ent->d_name, O_RDONLY); ssize_t read_len; if (fd < 0) @@ -342,7 +342,8 @@ static int hwmon_pmu__read_events(struct hwmon_pmu *pmu) return err; } -struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, int hwmon_dir, const char *sysfs_name, const char *name) +struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, const char *hwmon_dir, + const char *sysfs_name, const char *name) { char buf[32]; struct hwmon_pmu *hwm; @@ -365,7 +366,11 @@ struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, int hwmon_dir, const cha return NULL; } - hwm->hwmon_dir_fd = hwmon_dir; + hwm->hwmon_dir = strdup(hwmon_dir); + if (!hwm->hwmon_dir) { + perf_pmu__delete(&hwm->pmu); + return NULL; + } hwm->pmu.alias_name = strdup(sysfs_name); if (!hwm->pmu.alias_name) { perf_pmu__delete(&hwm->pmu); @@ -399,7 +404,7 @@ void hwmon_pmu__exit(struct perf_pmu *pmu) free(value); } hashmap__clear(&hwm->events); - close(hwm->hwmon_dir_fd); + zfree(&hwm->hwmon_dir); } static size_t hwmon_pmu__describe_items(struct hwmon_pmu *hwm, char *out_buf, size_t out_buf_len, @@ -409,6 +414,10 @@ static size_t hwmon_pmu__describe_items(struct hwmon_pmu *hwm, char *out_buf, si size_t bit; char buf[64]; size_t len = 0; + int dir = open(hwm->hwmon_dir, O_CLOEXEC | O_DIRECTORY | O_RDONLY); + + if (dir < 0) + return 0; for_each_set_bit(bit, items, HWMON_ITEM__MAX) { int fd; @@ -421,7 +430,7 @@ static size_t hwmon_pmu__describe_items(struct hwmon_pmu *hwm, char *out_buf, si key.num, hwmon_item_strs[bit], is_alarm ? "_alarm" : ""); - fd = openat(hwm->hwmon_dir_fd, buf, O_RDONLY); + fd = openat(dir, buf, O_RDONLY); if (fd > 0) { ssize_t read_len = read(fd, buf, sizeof(buf)); @@ -443,6 +452,7 @@ static size_t hwmon_pmu__describe_items(struct hwmon_pmu *hwm, char *out_buf, si close(fd); } } + close(dir); return len; } @@ -712,6 +722,7 @@ int perf_pmus__read_hwmon_pmus(struct list_head *pmus) size_t line_len; int hwmon_dir, name_fd; struct io io; + char buf2[128]; if (class_hwmon_ent->d_type != DT_LNK) continue; @@ -730,12 +741,13 @@ int perf_pmus__read_hwmon_pmus(struct list_head *pmus) close(hwmon_dir); continue; } - io__init(&io, name_fd, buf, sizeof(buf)); + io__init(&io, name_fd, buf2, sizeof(buf2)); io__getline(&io, &line, &line_len); if (line_len > 0 && line[line_len - 1] == '\n') line[line_len - 1] = '\0'; - hwmon_pmu__new(pmus, hwmon_dir, class_hwmon_ent->d_name, line); + hwmon_pmu__new(pmus, buf, class_hwmon_ent->d_name, line); close(name_fd); + close(hwmon_dir); } free(line); close(class_hwmon_dir.dirfd); @@ -753,6 +765,10 @@ int evsel__hwmon_pmu_open(struct evsel *evsel, .type_and_num = evsel->core.attr.config, }; int idx = 0, thread = 0, nthreads, err = 0; + int dir = open(hwm->hwmon_dir, O_CLOEXEC | O_DIRECTORY | O_RDONLY); + + if (dir < 0) + return -errno; nthreads = perf_thread_map__nr(threads); for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { @@ -763,7 +779,7 @@ int evsel__hwmon_pmu_open(struct evsel *evsel, snprintf(buf, sizeof(buf), "%s%d_input", hwmon_type_strs[key.type], key.num); - fd = openat(hwm->hwmon_dir_fd, buf, O_RDONLY); + fd = openat(dir, buf, O_RDONLY); FD(evsel, idx, thread) = fd; if (fd < 0) { err = -errno; @@ -771,6 +787,7 @@ int evsel__hwmon_pmu_open(struct evsel *evsel, } } } + close(dir); return 0; out_close: if (err) @@ -784,6 +801,7 @@ int evsel__hwmon_pmu_open(struct evsel *evsel, } thread = nthreads; } while (--idx >= 0); + close(dir); return err; } diff --git a/tools/perf/util/hwmon_pmu.h b/tools/perf/util/hwmon_pmu.h index b3329774d2b2..dc711b289ff5 100644 --- a/tools/perf/util/hwmon_pmu.h +++ b/tools/perf/util/hwmon_pmu.h @@ -135,14 +135,14 @@ bool parse_hwmon_filename(const char *filename, * hwmon_pmu__new() - Allocate and construct a hwmon PMU. * * @pmus: The list of PMUs to be added to. - * @hwmon_dir: An O_DIRECTORY file descriptor for a hwmon directory. + * @hwmon_dir: The path to a hwmon directory. * @sysfs_name: Name of the hwmon sysfs directory like hwmon0. * @name: The contents of the "name" file in the hwmon directory. * * Exposed for testing. Regular construction should happen via * perf_pmus__read_hwmon_pmus. */ -struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, int hwmon_dir, +struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, const char *hwmon_dir, const char *sysfs_name, const char *name); void hwmon_pmu__exit(struct perf_pmu *pmu); diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c index 81c2ed689db2..409b909cfa02 100644 --- a/tools/perf/util/pmus.c +++ b/tools/perf/util/pmus.c @@ -861,7 +861,7 @@ struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name) return perf_pmu__lookup(&other_pmus, test_sysfs_dirfd, name, /*eager_load=*/true); } -struct perf_pmu *perf_pmus__add_test_hwmon_pmu(int hwmon_dir, +struct perf_pmu *perf_pmus__add_test_hwmon_pmu(const char *hwmon_dir, const char *sysfs_name, const char *name) { diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h index 33ecf765a92f..86842ee5f539 100644 --- a/tools/perf/util/pmus.h +++ b/tools/perf/util/pmus.h @@ -31,7 +31,7 @@ int perf_pmus__num_core_pmus(void); bool perf_pmus__supports_extended_type(void); struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name); -struct perf_pmu *perf_pmus__add_test_hwmon_pmu(int hwmon_dir, +struct perf_pmu *perf_pmus__add_test_hwmon_pmu(const char *hwmon_dir, const char *sysfs_name, const char *name); struct perf_pmu *perf_pmus__fake_pmu(void); From e793e2c0f188fb7a7998224f14241c0d87df5249 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 24 Jun 2025 12:03:24 -0700 Subject: [PATCH 0514/2411] perf dso: With ref count checking, avoid dso_data holding dso live With the dso_data embedded in a dso there is a reference counted pointer to the dso rather than using container_of with reference count checking. This data can hold the dso live meaning that no dso__put ever deletes it. Add a check for this case and close the dso_data when it happens. There isn't an infinite loop as the dso_data clears the file descriptor prior to putting on the dso. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250624190326.2038704-5-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/dso.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 057fcf4225ac..c6c1637e098c 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1612,6 +1612,10 @@ struct dso *dso__get(struct dso *dso) void dso__put(struct dso *dso) { +#ifdef REFCNT_CHECKING + if (dso && dso__data(dso) && refcount_read(&RC_CHK_ACCESS(dso)->refcnt) == 2) + dso__data_close(dso); +#endif if (dso && refcount_dec_and_test(&RC_CHK_ACCESS(dso)->refcnt)) dso__delete(dso); else From e9846f5ead26d2ed2eea0987e3991a667fc38d22 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 24 Jun 2025 12:03:25 -0700 Subject: [PATCH 0515/2411] perf test: In forked mode add check that fds aren't leaked When a test is forked no file descriptors should be open, however, parent ones may have been inherited - in particular those of the pipes of other forked child test processes. Add a loop to clean-up/close those file descriptors prior to running the test. At the end of the test assert that no additional file descriptors are present as this would indicate a file descriptor leak. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250624190326.2038704-6-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/tests/builtin-test.c | 69 +++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index e242d56523ce..85142dfb3e01 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -4,6 +4,7 @@ * * Builtin regression testing command: ever growing number of sanity tests */ +#include #include #include #ifdef HAVE_BACKTRACE_SUPPORT @@ -159,6 +160,71 @@ static struct test_workload *workloads[] = { #define test_suite__for_each_test_case(suite, idx) \ for (idx = 0; (suite)->test_cases && (suite)->test_cases[idx].name != NULL; idx++) +static void close_parent_fds(void) +{ + DIR *dir = opendir("/proc/self/fd"); + struct dirent *ent; + + while ((ent = readdir(dir))) { + char *end; + long fd; + + if (ent->d_type != DT_LNK) + continue; + + if (!isdigit(ent->d_name[0])) + continue; + + fd = strtol(ent->d_name, &end, 10); + if (*end) + continue; + + if (fd <= 3 || fd == dirfd(dir)) + continue; + + close(fd); + } + closedir(dir); +} + +static void check_leaks(void) +{ + DIR *dir = opendir("/proc/self/fd"); + struct dirent *ent; + int leaks = 0; + + while ((ent = readdir(dir))) { + char path[PATH_MAX]; + char *end; + long fd; + ssize_t len; + + if (ent->d_type != DT_LNK) + continue; + + if (!isdigit(ent->d_name[0])) + continue; + + fd = strtol(ent->d_name, &end, 10); + if (*end) + continue; + + if (fd <= 3 || fd == dirfd(dir)) + continue; + + leaks++; + len = readlinkat(dirfd(dir), ent->d_name, path, sizeof(path)); + if (len > 0 && (size_t)len < sizeof(path)) + path[len] = '\0'; + else + strncpy(path, ent->d_name, sizeof(path)); + pr_err("Leak of file descriptor %s that opened: '%s'\n", ent->d_name, path); + } + closedir(dir); + if (leaks) + abort(); +} + static int test_suite__num_test_cases(const struct test_suite *t) { int num; @@ -256,6 +322,8 @@ static int run_test_child(struct child_process *process) struct child_test *child = container_of(process, struct child_test, process); int err; + close_parent_fds(); + err = sigsetjmp(run_test_jmp_buf, 1); if (err) { /* Received signal. */ @@ -271,6 +339,7 @@ static int run_test_child(struct child_process *process) err = test_function(child->test, child->test_case_num)(child->test, child->test_case_num); pr_debug("---- end(%d) ----\n", err); + check_leaks(); err_out: fflush(NULL); for (size_t i = 0; i < ARRAY_SIZE(signals); i++) From 209be2857bcc4bae88ef1b0981da2db99f84dbb5 Mon Sep 17 00:00:00 2001 From: Tanmay Shah Date: Fri, 20 Jun 2025 12:57:28 -0700 Subject: [PATCH 0516/2411] remoteproc: xlnx: Add shutdown callback In case of kexec call, each driver's shutdown callback is called. Handle this call for rproc driver and shutdown/detach each core that was powered on before. This is needed for proper Life Cycle Management of remote processor. Otherwise on next linux boot, remote processor can't be started due to bad refcount of power-domain managed by platform management controller. Signed-off-by: Tanmay Shah Link: https://lore.kernel.org/r/20250620195728.3216935-1-tanmay.shah@amd.com Signed-off-by: Mathieu Poirier --- drivers/remoteproc/xlnx_r5_remoteproc.c | 40 +++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/remoteproc/xlnx_r5_remoteproc.c b/drivers/remoteproc/xlnx_r5_remoteproc.c index 5aa3fd1b0530..a51523456c6e 100644 --- a/drivers/remoteproc/xlnx_r5_remoteproc.c +++ b/drivers/remoteproc/xlnx_r5_remoteproc.c @@ -1467,6 +1467,45 @@ static void zynqmp_r5_cluster_exit(void *data) platform_set_drvdata(pdev, NULL); } +/* + * zynqmp_r5_remoteproc_shutdown() + * Follow shutdown sequence in case of kexec call. + * + * @pdev: domain platform device for cluster + * + * Return: None. + */ +static void zynqmp_r5_remoteproc_shutdown(struct platform_device *pdev) +{ + const char *rproc_state_str = NULL; + struct zynqmp_r5_cluster *cluster; + struct zynqmp_r5_core *r5_core; + struct rproc *rproc; + int i, ret = 0; + + cluster = platform_get_drvdata(pdev); + + for (i = 0; i < cluster->core_count; i++) { + r5_core = cluster->r5_cores[i]; + rproc = r5_core->rproc; + + if (rproc->state == RPROC_RUNNING) { + ret = rproc_shutdown(rproc); + rproc_state_str = "shutdown"; + } else if (rproc->state == RPROC_ATTACHED) { + ret = rproc_detach(rproc); + rproc_state_str = "detach"; + } else { + ret = 0; + } + + if (ret) { + dev_err(cluster->dev, "failed to %s rproc %d\n", + rproc_state_str, rproc->index); + } + } +} + /* * zynqmp_r5_remoteproc_probe() * parse device-tree, initialize hardware and allocate required resources @@ -1528,6 +1567,7 @@ static struct platform_driver zynqmp_r5_remoteproc_driver = { .name = "zynqmp_r5_remoteproc", .of_match_table = zynqmp_r5_remoteproc_match, }, + .shutdown = zynqmp_r5_remoteproc_shutdown, }; module_platform_driver(zynqmp_r5_remoteproc_driver); From 6c21316e52959f60e9367a41a7893d8459d7dfab Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 2 Jul 2025 21:20:00 -0700 Subject: [PATCH 0517/2411] perf header: Fix pipe mode header dumping The pipe mode header dumping was accidentally removed when tracing of header feature events in pipe mode was added. Minor spelling tweak to header test failure message. Fixes: 61051f9a8452 ("perf header: In pipe mode dump features without --header/-I") Signed-off-by: Ian Rogers Tested-by: Namhyung Kim Link: https://lore.kernel.org/r/20250703042000.2740640-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/header.sh | 2 +- tools/perf/util/header.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/header.sh b/tools/perf/tests/shell/header.sh index 412263de6ed7..e1628ac0a614 100755 --- a/tools/perf/tests/shell/header.sh +++ b/tools/perf/tests/shell/header.sh @@ -42,7 +42,7 @@ check_header_output() { do if ! grep -q -E "$i" "${script_output}" then - echo "Failed to find expect $i in output" + echo "Failed to find expected $i in output" err=1 fi done diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 487f663ed2de..53d54fbda10d 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -4343,6 +4343,7 @@ int perf_event__process_feature(struct perf_session *session, int type = fe->header.type; u64 feat = fe->feat_id; int ret = 0; + bool print = dump_trace; if (type < 0 || type >= PERF_RECORD_HEADER_MAX) { pr_warning("invalid record type %d in pipe-mode\n", type); @@ -4362,8 +4363,20 @@ int perf_event__process_feature(struct perf_session *session, goto out; } - if (dump_trace) { + if (session->tool->show_feat_hdr) { + if (!feat_ops[feat].full_only || + session->tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) { + print = true; + } else { + fprintf(stdout, "# %s info available, use -I to display\n", + feat_ops[feat].name); + } + } + + if (dump_trace) printf(", "); + + if (print) { if (feat_ops[feat].print) feat_ops[feat].print(&ff, stdout); else From 8081ca8d6be8c4b08b5d2fa06b2129f00aa95451 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 2 Jul 2025 22:36:22 -0700 Subject: [PATCH 0518/2411] perf tests make: Add NO_LIBDW=1 to minimal and add standalone test Missing testing coverage of NO_LIBDW=1 and add NO_LIBDW=1 to the minimal test configuration. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250703053622.3141424-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/tests/make | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/make b/tools/perf/tests/make index e3651e5b195a..c574a678c28a 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -81,6 +81,7 @@ make_no_gtk2 := NO_GTK2=1 make_no_ui := NO_SLANG=1 NO_GTK2=1 make_no_demangle := NO_DEMANGLE=1 make_no_libelf := NO_LIBELF=1 +make_no_libdw := NO_LIBDW=1 make_libunwind := LIBUNWIND=1 make_no_libdw_dwarf_unwind := NO_LIBDW_DWARF_UNWIND=1 make_no_backtrace := NO_BACKTRACE=1 @@ -119,7 +120,7 @@ make_static := LDFLAGS=-static NO_PERF_READ_VDSO32=1 NO_PERF_READ_VDSOX3 # all the NO_* variable combined make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_GTK2=1 make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_BACKTRACE=1 -make_minimal += NO_LIBNUMA=1 NO_LIBBIONIC=1 +make_minimal += NO_LIBNUMA=1 NO_LIBBIONIC=1 NO_LIBDW=1 make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 make_minimal += NO_SDT=1 NO_JVMTI=1 NO_LIBZSTD=1 make_minimal += NO_LIBCAP=1 NO_CAPSTONE=1 @@ -150,6 +151,7 @@ run += make_no_gtk2 run += make_no_ui run += make_no_demangle run += make_no_libelf +run += make_no_libdw run += make_libunwind run += make_no_libdw_dwarf_unwind run += make_no_backtrace From 10d9b89203765fb776512742c13af8dd92821842 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 2 Jul 2025 18:49:35 -0700 Subject: [PATCH 0519/2411] perf sched: Make sure it frees the usage string The parse_options_subcommand() allocates the usage string based on the given subcommands. So it should reach the end of the function to free the string to prevent memory leaks. Fixes: 1a5efc9e13f357ab ("libsubcmd: Don't free the usage string") Reviewed-by: Ian Rogers Tested-by: Ian Rogers Link: https://lore.kernel.org/r/20250703014942.1369397-2-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/builtin-sched.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 26ece6e9bfd1..b7bbfad0ed60 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3902,9 +3902,9 @@ int cmd_sched(int argc, const char **argv) * Aliased to 'perf script' for now: */ if (!strcmp(argv[0], "script")) { - return cmd_script(argc, argv); + ret = cmd_script(argc, argv); } else if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) { - return __cmd_record(argc, argv); + ret = __cmd_record(argc, argv); } else if (strlen(argv[0]) > 2 && strstarts("latency", argv[0])) { sched.tp_handler = &lat_ops; if (argc > 1) { @@ -3913,7 +3913,7 @@ int cmd_sched(int argc, const char **argv) usage_with_options(latency_usage, latency_options); } setup_sorting(&sched, latency_options, latency_usage); - return perf_sched__lat(&sched); + ret = perf_sched__lat(&sched); } else if (!strcmp(argv[0], "map")) { if (argc) { argc = parse_options(argc, argv, map_options, map_usage, 0); @@ -3924,13 +3924,14 @@ int cmd_sched(int argc, const char **argv) sched.map.task_names = strlist__new(sched.map.task_name, NULL); if (sched.map.task_names == NULL) { fprintf(stderr, "Failed to parse task names\n"); - return -1; + ret = -1; + goto out; } } } sched.tp_handler = &map_ops; setup_sorting(&sched, latency_options, latency_usage); - return perf_sched__map(&sched); + ret = perf_sched__map(&sched); } else if (strlen(argv[0]) > 2 && strstarts("replay", argv[0])) { sched.tp_handler = &replay_ops; if (argc) { @@ -3938,7 +3939,7 @@ int cmd_sched(int argc, const char **argv) if (argc) usage_with_options(replay_usage, replay_options); } - return perf_sched__replay(&sched); + ret = perf_sched__replay(&sched); } else if (!strcmp(argv[0], "timehist")) { if (argc) { argc = parse_options(argc, argv, timehist_options, @@ -3954,19 +3955,19 @@ int cmd_sched(int argc, const char **argv) parse_options_usage(NULL, timehist_options, "w", true); if (sched.show_next) parse_options_usage(NULL, timehist_options, "n", true); - return -EINVAL; + ret = -EINVAL; + goto out; } ret = symbol__validate_sym_arguments(); - if (ret) - return ret; - - return perf_sched__timehist(&sched); + if (!ret) + ret = perf_sched__timehist(&sched); } else { usage_with_options(sched_usage, sched_options); } +out: /* free usage string allocated by parse_options_subcommand */ free((void *)sched_usage[0]); - return 0; + return ret; } From aa9fdd106bab8c478d37eba5703c0950ad5c0d4f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 2 Jul 2025 18:49:36 -0700 Subject: [PATCH 0520/2411] perf sched: Free thread->priv using priv_destructor In many perf sched subcommand saves priv data structure in the thread but it forgot to free them. As it's an opaque type with 'void *', it needs to register that knows how to free the data. In this case, just regular 'free()' is fine. Fixes: 04cb4fc4d40a5bf1 ("perf thread: Allow tools to register a thread->priv destructor") Reviewed-by: Ian Rogers Tested-by: Ian Rogers Link: https://lore.kernel.org/r/20250703014942.1369397-3-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/builtin-sched.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index b7bbfad0ed60..fa4052e04020 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3898,6 +3898,8 @@ int cmd_sched(int argc, const char **argv) if (!argc) usage_with_options(sched_usage, sched_options); + thread__set_priv_destructor(free); + /* * Aliased to 'perf script' for now: */ From dc3a80c98884d86389b3b572c50ccc7f502cd41b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 2 Jul 2025 18:49:37 -0700 Subject: [PATCH 0521/2411] perf sched: Fix memory leaks in 'perf sched map' It maintains per-cpu pointers for the current thread but it doesn't release the refcounts. Fixes: 5e895278697c014e ("perf sched: Move curr_thread initialization to perf_sched__map()") Reviewed-by: Ian Rogers Tested-by: Ian Rogers Link: https://lore.kernel.org/r/20250703014942.1369397-4-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/builtin-sched.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index fa4052e04020..b73989fb6ace 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1634,6 +1634,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, const char *color = PERF_COLOR_NORMAL; char stimestamp[32]; const char *str; + int ret = -1; BUG_ON(this_cpu.cpu >= MAX_CPUS || this_cpu.cpu < 0); @@ -1664,17 +1665,20 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, sched_in = map__findnew_thread(sched, machine, -1, next_pid); sched_out = map__findnew_thread(sched, machine, -1, prev_pid); if (sched_in == NULL || sched_out == NULL) - return -1; + goto out; tr = thread__get_runtime(sched_in); - if (tr == NULL) { - thread__put(sched_in); - return -1; - } + if (tr == NULL) + goto out; + + thread__put(sched->curr_thread[this_cpu.cpu]); + thread__put(sched->curr_out_thread[this_cpu.cpu]); sched->curr_thread[this_cpu.cpu] = thread__get(sched_in); sched->curr_out_thread[this_cpu.cpu] = thread__get(sched_out); + ret = 0; + str = thread__comm_str(sched_in); new_shortname = 0; if (!tr->shortname[0]) { @@ -1769,12 +1773,10 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, color_fprintf(stdout, color, "\n"); out: - if (sched->map.task_name) - thread__put(sched_out); - + thread__put(sched_out); thread__put(sched_in); - return 0; + return ret; } static int process_sched_switch_event(const struct perf_tool *tool, @@ -3556,10 +3558,10 @@ static int perf_sched__map(struct perf_sched *sched) sched->curr_out_thread = calloc(MAX_CPUS, sizeof(*(sched->curr_out_thread))); if (!sched->curr_out_thread) - return rc; + goto out_free_curr_thread; if (setup_cpus_switch_event(sched)) - goto out_free_curr_thread; + goto out_free_curr_out_thread; if (setup_map_cpus(sched)) goto out_free_cpus_switch_event; @@ -3590,7 +3592,14 @@ static int perf_sched__map(struct perf_sched *sched) out_free_cpus_switch_event: free_cpus_switch_event(sched); +out_free_curr_out_thread: + for (int i = 0; i < MAX_CPUS; i++) + thread__put(sched->curr_out_thread[i]); + zfree(&sched->curr_out_thread); + out_free_curr_thread: + for (int i = 0; i < MAX_CPUS; i++) + thread__put(sched->curr_thread[i]); zfree(&sched->curr_thread); return rc; } From e2eb59260c4f6bac403491d0112891766b8650d1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 2 Jul 2025 18:49:38 -0700 Subject: [PATCH 0522/2411] perf sched: Fix thread leaks in 'perf sched timehist' Add missing thread__put() after machine__findnew_thread() or timehist_get_thread(). Also idle threads' last_thread should be refcounted properly. Fixes: 699b5b920db04a6f ("perf sched timehist: Save callchain when entering idle") Reviewed-by: Ian Rogers Tested-by: Ian Rogers Link: https://lore.kernel.org/r/20250703014942.1369397-5-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/builtin-sched.c | 48 +++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index b73989fb6ace..83b5a85a91b7 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2313,8 +2313,10 @@ static void save_task_callchain(struct perf_sched *sched, return; } - if (!sched->show_callchain || sample->callchain == NULL) + if (!sched->show_callchain || sample->callchain == NULL) { + thread__put(thread); return; + } cursor = get_tls_callchain_cursor(); @@ -2323,10 +2325,12 @@ static void save_task_callchain(struct perf_sched *sched, if (verbose > 0) pr_err("Failed to resolve callchain. Skipping\n"); + thread__put(thread); return; } callchain_cursor_commit(cursor); + thread__put(thread); while (true) { struct callchain_cursor_node *node; @@ -2403,8 +2407,17 @@ static void free_idle_threads(void) return; for (i = 0; i < idle_max_cpu; ++i) { - if ((idle_threads[i])) - thread__delete(idle_threads[i]); + struct thread *idle = idle_threads[i]; + + if (idle) { + struct idle_thread_runtime *itr; + + itr = thread__priv(idle); + if (itr) + thread__put(itr->last_thread); + + thread__delete(idle); + } } free(idle_threads); @@ -2441,7 +2454,7 @@ static struct thread *get_idle_thread(int cpu) } } - return idle_threads[cpu]; + return thread__get(idle_threads[cpu]); } static void save_idle_callchain(struct perf_sched *sched, @@ -2496,7 +2509,8 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, if (itr == NULL) return NULL; - itr->last_thread = thread; + thread__put(itr->last_thread); + itr->last_thread = thread__get(thread); /* copy task callchain when entering to idle */ if (evsel__intval(evsel, sample, "next_pid") == 0) @@ -2567,6 +2581,7 @@ static void timehist_print_wakeup_event(struct perf_sched *sched, /* show wakeup unless both awakee and awaker are filtered */ if (timehist_skip_sample(sched, thread, evsel, sample) && timehist_skip_sample(sched, awakened, evsel, sample)) { + thread__put(thread); return; } @@ -2583,6 +2598,8 @@ static void timehist_print_wakeup_event(struct perf_sched *sched, printf("awakened: %s", timehist_get_commstr(awakened)); printf("\n"); + + thread__put(thread); } static int timehist_sched_wakeup_ignore(const struct perf_tool *tool __maybe_unused, @@ -2611,8 +2628,10 @@ static int timehist_sched_wakeup_event(const struct perf_tool *tool, return -1; tr = thread__get_runtime(thread); - if (tr == NULL) + if (tr == NULL) { + thread__put(thread); return -1; + } if (tr->ready_to_run == 0) tr->ready_to_run = sample->time; @@ -2622,6 +2641,7 @@ static int timehist_sched_wakeup_event(const struct perf_tool *tool, !perf_time__skip_sample(&sched->ptime, sample->time)) timehist_print_wakeup_event(sched, evsel, sample, machine, thread); + thread__put(thread); return 0; } @@ -2649,6 +2669,7 @@ static void timehist_print_migration_event(struct perf_sched *sched, if (timehist_skip_sample(sched, thread, evsel, sample) && timehist_skip_sample(sched, migrated, evsel, sample)) { + thread__put(thread); return; } @@ -2676,6 +2697,7 @@ static void timehist_print_migration_event(struct perf_sched *sched, printf(" cpu %d => %d", ocpu, dcpu); printf("\n"); + thread__put(thread); } static int timehist_migrate_task_event(const struct perf_tool *tool, @@ -2695,8 +2717,10 @@ static int timehist_migrate_task_event(const struct perf_tool *tool, return -1; tr = thread__get_runtime(thread); - if (tr == NULL) + if (tr == NULL) { + thread__put(thread); return -1; + } tr->migrations++; tr->migrated = sample->time; @@ -2706,6 +2730,7 @@ static int timehist_migrate_task_event(const struct perf_tool *tool, timehist_print_migration_event(sched, evsel, sample, machine, thread); } + thread__put(thread); return 0; } @@ -2728,10 +2753,10 @@ static void timehist_update_task_prio(struct evsel *evsel, return; tr = thread__get_runtime(thread); - if (tr == NULL) - return; + if (tr != NULL) + tr->prio = next_prio; - tr->prio = next_prio; + thread__put(thread); } static int timehist_sched_change_event(const struct perf_tool *tool, @@ -2743,7 +2768,7 @@ static int timehist_sched_change_event(const struct perf_tool *tool, struct perf_sched *sched = container_of(tool, struct perf_sched, tool); struct perf_time_interval *ptime = &sched->ptime; struct addr_location al; - struct thread *thread; + struct thread *thread = NULL; struct thread_runtime *tr = NULL; u64 tprev, t = sample->time; int rc = 0; @@ -2867,6 +2892,7 @@ static int timehist_sched_change_event(const struct perf_tool *tool, evsel__save_time(evsel, sample->time, sample->cpu); + thread__put(thread); addr_location__exit(&al); return rc; } From 117e5c33b1c44037af016d77ce6c0b086d55535f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 2 Jul 2025 18:49:39 -0700 Subject: [PATCH 0523/2411] perf sched: Fix memory leaks for evsel->priv in timehist It uses evsel->priv to save per-cpu timing information. It should be freed when the evsel is released. Add the priv destructor for evsel same as thread to handle that. Fixes: 49394a2a24c78ce0 ("perf sched timehist: Introduce timehist command") Reviewed-by: Ian Rogers Tested-by: Ian Rogers Link: https://lore.kernel.org/r/20250703014942.1369397-6-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/builtin-sched.c | 12 ++++++++++++ tools/perf/util/evsel.c | 11 +++++++++++ tools/perf/util/evsel.h | 2 ++ 3 files changed, 25 insertions(+) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 83b5a85a91b7..a6eb0462dd5b 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2020,6 +2020,16 @@ static u64 evsel__get_time(struct evsel *evsel, u32 cpu) return r->last_time[cpu]; } +static void timehist__evsel_priv_destructor(void *priv) +{ + struct evsel_runtime *r = priv; + + if (r) { + free(r->last_time); + free(r); + } +} + static int comm_width = 30; static char *timehist_get_commstr(struct thread *thread) @@ -3314,6 +3324,8 @@ static int perf_sched__timehist(struct perf_sched *sched) setup_pager(); + evsel__set_priv_destructor(timehist__evsel_priv_destructor); + /* prefer sched_waking if it is captured */ if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking")) handlers[1].handler = timehist_sched_wakeup_ignore; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 9c50c3960487..3896a04d90af 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1657,6 +1657,15 @@ static void evsel__free_config_terms(struct evsel *evsel) free_config_terms(&evsel->config_terms); } +static void (*evsel__priv_destructor)(void *priv); + +void evsel__set_priv_destructor(void (*destructor)(void *priv)) +{ + assert(evsel__priv_destructor == NULL); + + evsel__priv_destructor = destructor; +} + void evsel__exit(struct evsel *evsel) { assert(list_empty(&evsel->core.node)); @@ -1687,6 +1696,8 @@ void evsel__exit(struct evsel *evsel) hashmap__free(evsel->per_pkg_mask); evsel->per_pkg_mask = NULL; zfree(&evsel->metric_events); + if (evsel__priv_destructor) + evsel__priv_destructor(evsel->priv); perf_evsel__object.fini(evsel); if (evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME || evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 6dbc9690e0c9..b84ee274602d 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -280,6 +280,8 @@ void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx); void evsel__exit(struct evsel *evsel); void evsel__delete(struct evsel *evsel); +void evsel__set_priv_destructor(void (*destructor)(void *priv)); + struct callchain_param; void evsel__config(struct evsel *evsel, struct record_opts *opts, From 7a4002ec9e0fced907179da94f67c3082d7b4162 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 2 Jul 2025 18:49:40 -0700 Subject: [PATCH 0524/2411] perf sched: Use RC_CHK_EQUAL() to compare pointers So that it can check two pointers to the same object properly when REFCNT_CHECKING is on. Fixes: 78c32f4cb12f9430 ("libperf rc_check: Add RC_CHK_EQUAL") Reviewed-by: Ian Rogers Tested-by: Ian Rogers Link: https://lore.kernel.org/r/20250703014942.1369397-7-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/builtin-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index a6eb0462dd5b..087d4eaba5f7 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -994,7 +994,7 @@ thread_atoms_search(struct rb_root_cached *root, struct thread *thread, else if (cmp < 0) node = node->rb_right; else { - BUG_ON(thread != atoms->thread); + BUG_ON(!RC_CHK_EQUAL(thread, atoms->thread)); return atoms; } } From e68b1c0098b959cb88afce5c93dd6a9324e6da78 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 2 Jul 2025 18:49:41 -0700 Subject: [PATCH 0525/2411] perf sched: Fix memory leaks in 'perf sched latency' The work_atoms should be freed after use. Add free_work_atoms() to make sure to release all. It should use list_splice_init() when merging atoms to prevent accessing invalid pointers. Fixes: b1ffe8f3e0c96f552 ("perf sched: Finish latency => atom rename and misc cleanups") Reviewed-by: Ian Rogers Tested-by: Ian Rogers Link: https://lore.kernel.org/r/20250703014942.1369397-8-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/builtin-sched.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 087d4eaba5f7..4bbebd6ef2e4 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1111,6 +1111,21 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp) atoms->nb_atoms++; } +static void free_work_atoms(struct work_atoms *atoms) +{ + struct work_atom *atom, *tmp; + + if (atoms == NULL) + return; + + list_for_each_entry_safe(atom, tmp, &atoms->work_list, list) { + list_del(&atom->list); + free(atom); + } + thread__zput(atoms->thread); + free(atoms); +} + static int latency_switch_event(struct perf_sched *sched, struct evsel *evsel, struct perf_sample *sample, @@ -3426,13 +3441,13 @@ static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *d this->total_runtime += data->total_runtime; this->nb_atoms += data->nb_atoms; this->total_lat += data->total_lat; - list_splice(&data->work_list, &this->work_list); + list_splice_init(&data->work_list, &this->work_list); if (this->max_lat < data->max_lat) { this->max_lat = data->max_lat; this->max_lat_start = data->max_lat_start; this->max_lat_end = data->max_lat_end; } - zfree(&data); + free_work_atoms(data); return; } } @@ -3511,7 +3526,6 @@ static int perf_sched__lat(struct perf_sched *sched) work_list = rb_entry(next, struct work_atoms, node); output_lat_thread(sched, work_list); next = rb_next(next); - thread__zput(work_list->thread); } printf(" -----------------------------------------------------------------------------------------------------------------\n"); @@ -3525,6 +3539,13 @@ static int perf_sched__lat(struct perf_sched *sched) rc = 0; + while ((next = rb_first_cached(&sched->sorted_atom_root))) { + struct work_atoms *data; + + data = rb_entry(next, struct work_atoms, node); + rb_erase_cached(next, &sched->sorted_atom_root); + free_work_atoms(data); + } out_free_cpus_switch_event: free_cpus_switch_event(sched); return rc; From 2009a2d5696944d85c34d75e691a6f3884e787c0 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 16 Jun 2025 12:34:06 +0900 Subject: [PATCH 0526/2411] rust: sync: implement `Borrow` and `BorrowMut` for `Arc` types Implement `Borrow` and `BorrowMut` for `UniqueArc`, and `Borrow` for `Arc`. This allows these containers to be used in generic APIs asking for types implementing those traits. `T` and `&mut T` also implement those traits allowing users to use either owned, shared or borrowed values. `ForeignOwnable` makes a call to its own `borrow` method which must be disambiguated. Reviewed-by: Alice Ryhl Reviewed-by: Benno Lossin Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250616-borrow_impls-v4-2-36f9beb3fe6a@nvidia.com Signed-off-by: Miguel Ojeda --- rust/kernel/sync/arc.rs | 78 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs index c7af0aa48a0a..499175f637a7 100644 --- a/rust/kernel/sync/arc.rs +++ b/rust/kernel/sync/arc.rs @@ -25,6 +25,7 @@ }; use core::{ alloc::Layout, + borrow::{Borrow, BorrowMut}, fmt, marker::PhantomData, mem::{ManuallyDrop, MaybeUninit}, @@ -406,7 +407,7 @@ unsafe fn borrow<'a>(ptr: *mut Self::PointedTo) -> ArcBorrow<'a, T> { unsafe fn borrow_mut<'a>(ptr: *mut Self::PointedTo) -> ArcBorrow<'a, T> { // SAFETY: The safety requirements for `borrow_mut` are a superset of the safety // requirements for `borrow`. - unsafe { Self::borrow(ptr) } + unsafe { ::borrow(ptr) } } } @@ -426,6 +427,31 @@ fn as_ref(&self) -> &T { } } +/// # Examples +/// +/// ``` +/// # use core::borrow::Borrow; +/// # use kernel::sync::Arc; +/// struct Foo>(B); +/// +/// // Owned instance. +/// let owned = Foo(1); +/// +/// // Shared instance. +/// let arc = Arc::new(1, GFP_KERNEL)?; +/// let shared = Foo(arc.clone()); +/// +/// let i = 1; +/// // Borrowed from `i`. +/// let borrowed = Foo(&i); +/// # Ok::<(), Error>(()) +/// ``` +impl Borrow for Arc { + fn borrow(&self) -> &T { + self.deref() + } +} + impl Clone for Arc { fn clone(&self) -> Self { // SAFETY: By the type invariant, there is necessarily a reference to the object, so it is @@ -834,6 +860,56 @@ fn deref_mut(&mut self) -> &mut Self::Target { } } +/// # Examples +/// +/// ``` +/// # use core::borrow::Borrow; +/// # use kernel::sync::UniqueArc; +/// struct Foo>(B); +/// +/// // Owned instance. +/// let owned = Foo(1); +/// +/// // Owned instance using `UniqueArc`. +/// let arc = UniqueArc::new(1, GFP_KERNEL)?; +/// let shared = Foo(arc); +/// +/// let i = 1; +/// // Borrowed from `i`. +/// let borrowed = Foo(&i); +/// # Ok::<(), Error>(()) +/// ``` +impl Borrow for UniqueArc { + fn borrow(&self) -> &T { + self.deref() + } +} + +/// # Examples +/// +/// ``` +/// # use core::borrow::BorrowMut; +/// # use kernel::sync::UniqueArc; +/// struct Foo>(B); +/// +/// // Owned instance. +/// let owned = Foo(1); +/// +/// // Owned instance using `UniqueArc`. +/// let arc = UniqueArc::new(1, GFP_KERNEL)?; +/// let shared = Foo(arc); +/// +/// let mut i = 1; +/// // Borrowed from `i`. +/// let borrowed = Foo(&mut i); +/// # Ok::<(), Error>(()) +/// ``` +impl BorrowMut for UniqueArc { + fn borrow_mut(&mut self) -> &mut T { + self.deref_mut() + } +} + impl fmt::Display for UniqueArc { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.deref(), f) From cc4b392718dcbf64301681f8a3daa8a013cf6427 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 2 Jul 2025 18:49:42 -0700 Subject: [PATCH 0527/2411] perf test: Add more test cases to sched test $ sudo ./perf test -vv 92 92: perf sched tests: --- start --- test child forked, pid 1360101 Sched record pid 1360105's current affinity list: 0-3 pid 1360105's new affinity list: 0 pid 1360107's current affinity list: 0-3 pid 1360107's new affinity list: 0 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 4.330 MB /tmp/__perf_test_sched.perf.data.b3319 (12246 samples) ] Sched latency Sched script Sched map Sched timehist Samples of sched_switch event do not have callchains. ---- end(0) ---- 92: perf sched tests : Ok Reviewed-by: Ian Rogers Tested-by: Ian Rogers Link: https://lore.kernel.org/r/20250703014942.1369397-9-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/sched.sh | 41 +++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/tools/perf/tests/shell/sched.sh b/tools/perf/tests/shell/sched.sh index c030126d1a0c..b9b81eaf856e 100755 --- a/tools/perf/tests/shell/sched.sh +++ b/tools/perf/tests/shell/sched.sh @@ -56,38 +56,61 @@ cleanup_noploops() { kill "$PID1" "$PID2" } -test_sched_latency() { - echo "Sched latency" +test_sched_record() { + echo "Sched record" start_noploops perf sched record --no-inherit -o "${perfdata}" sleep 1 + + cleanup_noploops +} + +test_sched_latency() { + echo "Sched latency" + if ! perf sched latency -i "${perfdata}" | grep -q perf-noploop then echo "Sched latency [Failed missing output]" err=1 fi - - cleanup_noploops } test_sched_script() { echo "Sched script" - start_noploops - - perf sched record --no-inherit -o "${perfdata}" sleep 1 if ! perf sched script -i "${perfdata}" | grep -q perf-noploop then echo "Sched script [Failed missing output]" err=1 fi - - cleanup_noploops } +test_sched_map() { + echo "Sched map" + + if ! perf sched map -i "${perfdata}" | grep -q perf-noploop + then + echo "Sched map [Failed missing output]" + err=1 + fi +} + +test_sched_timehist() { + echo "Sched timehist" + + if ! perf sched timehist -i "${perfdata}" | grep -q perf-noploop + then + echo "Sched timehist [Failed missing output]" + err=1 + fi +} + +test_sched_record test_sched_latency test_sched_script +test_sched_map +test_sched_timehist cleanup exit $err From 56ffb63749f4a1e88c282b763c458f3ed73d8c27 Mon Sep 17 00:00:00 2001 From: Yuanjie Yang Date: Tue, 24 Jun 2025 17:06:00 +0800 Subject: [PATCH 0528/2411] pinctrl: qcom: add multi TLMM region option parameter Add support for selecting multiple TLMM regions using the tlmm-test tool. The current implementation only selects the TLMM Node region 0, which can lead to incorrect region selection. QCS 615 TLMM Node dts reg: tlmm: pinctrl@3100000 { compatible = "qcom,qcs615-tlmm"; reg = <0x0 0x03100000 0x0 0x300000>, <0x0 0x03500000 0x0 0x300000>, <0x0 0x03d00000 0x0 0x300000>; reg-names = "east", "west", "south"; QCS615 gpio57 is in the south region with an offset of 0x39000, and its address is 0x3d39000. However, the default region selection is region 0 (east region), resulting in a wrong calculated address of 0x3139000. Add a tlmm option parameter named tlmm_reg_name to select the region. If the user does not input the parameter, the default region is 0. Signed-off-by: Yuanjie Yang Link: https://lore.kernel.org/20250624090600.91063-1-quic_yuanjiey@quicinc.com Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/tlmm-test.c | 47 +++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/qcom/tlmm-test.c b/drivers/pinctrl/qcom/tlmm-test.c index 7b99e89e0f67..7d7fff538755 100644 --- a/drivers/pinctrl/qcom/tlmm-test.c +++ b/drivers/pinctrl/qcom/tlmm-test.c @@ -16,6 +16,7 @@ #include #include #include +#include /* * This TLMM test module serves the purpose of validating that the TLMM driver @@ -38,7 +39,10 @@ #define TLMM_REG_SIZE 0x1000 static int tlmm_test_gpio = -1; +static char *tlmm_reg_name = "default_region"; + module_param_named(gpio, tlmm_test_gpio, int, 0600); +module_param_named(name, tlmm_reg_name, charp, 0600); static struct { void __iomem *base; @@ -570,6 +574,47 @@ static const struct of_device_id tlmm_of_match[] = { {} }; +static int tlmm_reg_base(struct device_node *tlmm, struct resource *res) +{ + const char **reg_names; + int count; + int ret; + int i; + + count = of_property_count_strings(tlmm, "reg-names"); + if (count <= 0) { + pr_err("failed to find tlmm reg name\n"); + return count; + } + + reg_names = kcalloc(count, sizeof(char *), GFP_KERNEL); + if (!reg_names) + return -ENOMEM; + + ret = of_property_read_string_array(tlmm, "reg-names", reg_names, count); + if (ret != count) { + kfree(reg_names); + return -EINVAL; + } + + if (!strcmp(tlmm_reg_name, "default_region")) { + ret = of_address_to_resource(tlmm, 0, res); + } else { + for (i = 0; i < count; i++) { + if (!strcmp(reg_names[i], tlmm_reg_name)) { + ret = of_address_to_resource(tlmm, i, res); + break; + } + } + if (i == count) + ret = -EINVAL; + } + + kfree(reg_names); + + return ret; +} + static int tlmm_test_init_suite(struct kunit_suite *suite) { struct of_phandle_args args = {}; @@ -588,7 +633,7 @@ static int tlmm_test_init_suite(struct kunit_suite *suite) return -EINVAL; } - ret = of_address_to_resource(tlmm, 0, &res); + ret = tlmm_reg_base(tlmm, &res); if (ret < 0) return ret; From d3eed11b9cf84166ec38ba68ab892fcd9261b810 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 6 Jun 2025 12:03:58 -0400 Subject: [PATCH 0529/2411] dt-bindings: pinctrl: convert nxp,lpc1850-scu.txt to yaml format Convert nxp,lpc1850-scu.txt to yaml format. Additional changes: - keep child name *_cfg to align legancy very old platform dts file. - remove label in examples. - just keep one examples. Signed-off-by: Frank Li Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/20250606160359.1356555-1-Frank.Li@nxp.com Signed-off-by: Linus Walleij --- .../bindings/pinctrl/nxp,lpc1850-scu.txt | 71 ----------------- .../bindings/pinctrl/nxp,lpc1850-scu.yaml | 79 +++++++++++++++++++ 2 files changed, 79 insertions(+), 71 deletions(-) delete mode 100644 Documentation/devicetree/bindings/pinctrl/nxp,lpc1850-scu.txt create mode 100644 Documentation/devicetree/bindings/pinctrl/nxp,lpc1850-scu.yaml diff --git a/Documentation/devicetree/bindings/pinctrl/nxp,lpc1850-scu.txt b/Documentation/devicetree/bindings/pinctrl/nxp,lpc1850-scu.txt deleted file mode 100644 index bd8b0c69fa44..000000000000 --- a/Documentation/devicetree/bindings/pinctrl/nxp,lpc1850-scu.txt +++ /dev/null @@ -1,71 +0,0 @@ -NXP LPC18xx/43xx SCU pin controller Device Tree Bindings --------------------------------------------------------- - -Required properties: -- compatible : Should be "nxp,lpc1850-scu" -- reg : Address and length of the register set for the device -- clocks : Clock specifier (see clock bindings for details) - -The lpc1850-scu driver uses the generic pin multiplexing and generic pin -configuration documented in pinctrl-bindings.txt. - -The following generic nodes are supported: - - function - - pins - - bias-disable - - bias-pull-up - - bias-pull-down - - drive-strength - - input-enable - - input-disable - - input-schmitt-enable - - input-schmitt-disable - - slew-rate - -NXP specific properties: - - nxp,gpio-pin-interrupt : Assign pin to gpio pin interrupt controller - irq number 0 to 7. See example below. - -Not all pins support all properties so either refer to the NXP 1850/4350 -user manual or the pin table in the pinctrl-lpc18xx driver for supported -pin properties. - -Example: -pinctrl: pinctrl@40086000 { - compatible = "nxp,lpc1850-scu"; - reg = <0x40086000 0x1000>; - clocks = <&ccu1 CLK_CPU_SCU>; - - i2c0_pins: i2c0-pins { - i2c0_pins_cfg { - pins = "i2c0_scl", "i2c0_sda"; - function = "i2c0"; - input-enable; - }; - }; - - uart0_pins: uart0-pins { - uart0_rx_cfg { - pins = "pf_11"; - function = "uart0"; - bias-disable; - input-enable; - }; - - uart0_tx_cfg { - pins = "pf_10"; - function = "uart0"; - bias-disable; - }; - }; - - gpio_joystick_pins: gpio-joystick-pins { - gpio_joystick_1_cfg { - pins = "p9_0"; - function = "gpio"; - nxp,gpio-pin-interrupt = <0>; - input-enable; - bias-disable; - }; - }; -}; diff --git a/Documentation/devicetree/bindings/pinctrl/nxp,lpc1850-scu.yaml b/Documentation/devicetree/bindings/pinctrl/nxp,lpc1850-scu.yaml new file mode 100644 index 000000000000..11f41359b5c8 --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/nxp,lpc1850-scu.yaml @@ -0,0 +1,79 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pinctrl/nxp,lpc1850-scu.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP LPC18xx/43xx SCU pin controller + +description: + Not all pins support all pin generic node properties so either refer to + the NXP 1850/4350 user manual or the pin table in the pinctrl-lpc18xx + driver for supported pin properties. + +maintainers: + - Frank Li + +properties: + compatible: + const: nxp,lpc1850-scu + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + +patternProperties: + '-pins$': + type: object + additionalProperties: false + + patternProperties: + '_cfg$': + type: object + + allOf: + - $ref: pincfg-node.yaml# + - $ref: pinmux-node.yaml# + + unevaluatedProperties: false + + properties: + nxp,gpio-pin-interrupt: + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 7 + description: + Assign pin to gpio pin interrupt controller + irq number 0 to 7. See example below. + +required: + - compatible + - reg + - clocks + +allOf: + - $ref: pinctrl.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + + pinctrl@40086000 { + compatible = "nxp,lpc1850-scu"; + reg = <0x40086000 0x1000>; + clocks = <&ccu1 CLK_CPU_SCU>; + + gpio-joystick-pins { + gpio-joystick-1_cfg { + pins = "p9_0"; + function = "gpio"; + nxp,gpio-pin-interrupt = <0>; + input-enable; + bias-disable; + }; + }; + }; From b838fb5f16a355ef851b1ed7ac31aaf4dc7f45a0 Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Tue, 10 Jun 2025 17:18:37 +0200 Subject: [PATCH 0530/2411] dt-bindings: pinctrl: stm32: Add missing blank lines Separate the properties through a blank line. Signed-off-by: Antonio Borneo Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/20250610151837.299244-6-antonio.borneo@foss.st.com Signed-off-by: Linus Walleij --- .../bindings/pinctrl/st,stm32-pinctrl.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml index 5d17d6487ae9..961161c2ab62 100644 --- a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml @@ -32,13 +32,16 @@ properties: '#address-cells': const: 1 + '#size-cells': const: 1 ranges: true + pins-are-numbered: $ref: /schemas/types.yaml#/definitions/flag deprecated: true + hwlocks: true interrupts: @@ -67,22 +70,29 @@ patternProperties: additionalProperties: false properties: gpio-controller: true + '#gpio-cells': const: 2 + interrupt-controller: true '#interrupt-cells': const: 2 reg: maxItems: 1 + clocks: maxItems: 1 + resets: maxItems: 1 + gpio-line-names: true + gpio-ranges: minItems: 1 maxItems: 16 + ngpios: description: Number of available gpios in a bank. @@ -187,18 +197,25 @@ patternProperties: bias-disable: type: boolean + bias-pull-down: type: boolean + bias-pull-up: type: boolean + drive-push-pull: type: boolean + drive-open-drain: type: boolean + output-low: type: boolean + output-high: type: boolean + slew-rate: description: | 0: Low speed From b306791037bc7274c10372c4f3d777b0deb08f06 Mon Sep 17 00:00:00 2001 From: Jack Ping CHNG Date: Fri, 27 Jun 2025 08:54:19 +0800 Subject: [PATCH 0531/2411] pinctrl: equilibrium: Add request and free hooks Add request and free gpio_chip hooks to support gpio allocation and release in the driver. Signed-off-by: Jack Ping CHNG Link: https://lore.kernel.org/20250627005419.3124660-1-jchng@maxlinear.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-equilibrium.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/pinctrl-equilibrium.c b/drivers/pinctrl/pinctrl-equilibrium.c index 3a9a0f059090..128b7efb110a 100644 --- a/drivers/pinctrl/pinctrl-equilibrium.c +++ b/drivers/pinctrl/pinctrl-equilibrium.c @@ -182,6 +182,8 @@ static int gpiochip_setup(struct device *dev, struct eqbr_gpio_ctrl *gctrl) gc = &gctrl->chip; gc->label = gctrl->name; gc->fwnode = gctrl->fwnode; + gc->request = gpiochip_generic_request; + gc->free = gpiochip_generic_free; if (!fwnode_property_read_bool(gctrl->fwnode, "interrupt-controller")) { dev_dbg(dev, "gc %s: doesn't act as interrupt controller!\n", From 1b84691e7870bc5b6a66a1e81abe0eae8359dfce Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 4 Jun 2025 12:18:27 +0200 Subject: [PATCH 0532/2411] i3c: dw: use adapter timeout value for I2C transfers I2C adapters have their own timeout value which can be changed by userspace if desired. Use it for I2C transfers. The default is 1Hz, so the default behaviour is unchanged. Signed-off-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250604101831.56585-2-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/dw-i3c-master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index 611c22b72c15..0c370672a4fc 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -1142,7 +1142,7 @@ static int dw_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, } dw_i3c_master_enqueue_xfer(master, xfer); - if (!wait_for_completion_timeout(&xfer->comp, XFER_TIMEOUT)) + if (!wait_for_completion_timeout(&xfer->comp, m->i2c.timeout)) dw_i3c_master_dequeue_xfer(master, xfer); ret = xfer->ret; From be27ed672878bdfb38580b491270f38cc5c36b38 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 4 Jun 2025 12:18:28 +0200 Subject: [PATCH 0533/2411] i3c: master: cdns: use adapter timeout value for I2C transfers I2C adapters have their own timeout value which can be changed by userspace if desired. Use it for I2C transfers. The default is 1Hz, so the default behaviour is unchanged. Signed-off-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250604101831.56585-3-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/i3c-master-cdns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c index fd3752cea654..562e49e930b4 100644 --- a/drivers/i3c/master/i3c-master-cdns.c +++ b/drivers/i3c/master/i3c-master-cdns.c @@ -863,7 +863,7 @@ static int cdns_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, } cdns_i3c_master_queue_xfer(master, xfer); - if (!wait_for_completion_timeout(&xfer->comp, msecs_to_jiffies(1000))) + if (!wait_for_completion_timeout(&xfer->comp, m->i2c.timeout)) cdns_i3c_master_unqueue_xfer(master, xfer); ret = xfer->ret; From c0a90eb55a69fc9016f4a0b19bb03708d6b1d0b7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 4 Jun 2025 12:18:29 +0200 Subject: [PATCH 0534/2411] i3c: mipi-i3c-hci: use adapter timeout value for I2C transfers I2C adapters have their own timeout value which can be changed by userspace if desired. Use it for I2C transfers. The default is 1Hz, so the default behaviour is unchanged. Signed-off-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250604101831.56585-4-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/mipi-i3c-hci/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c index bc4538694540..60f1175f1f37 100644 --- a/drivers/i3c/master/mipi-i3c-hci/core.c +++ b/drivers/i3c/master/mipi-i3c-hci/core.c @@ -395,7 +395,7 @@ static int i3c_hci_i2c_xfers(struct i2c_dev_desc *dev, ret = hci->io->queue_xfer(hci, xfer, nxfers); if (ret) goto out; - if (!wait_for_completion_timeout(&done, HZ) && + if (!wait_for_completion_timeout(&done, m->i2c.timeout) && hci->io->dequeue_xfer(hci, xfer, nxfers)) { ret = -ETIME; goto out; From a747e01adad2715bc002755ee15ef72360190ffc Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 4 Jun 2025 12:18:30 +0200 Subject: [PATCH 0535/2411] i3c: master: svc: use adapter timeout value for I2C transfers I2C adapters have their own timeout value which can be changed by userspace if desired. Use it for I2C transfers. The default is 1Hz, so the default behaviour is unchanged. Signed-off-by: Wolfram Sang Reviewed-by: Miquel Raynal Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250604101831.56585-5-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/svc-i3c-master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 7e1a7cb94b43..6d0eea80ea34 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -1708,7 +1708,7 @@ static int svc_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, mutex_lock(&master->lock); svc_i3c_master_enqueue_xfer(master, xfer); - if (!wait_for_completion_timeout(&xfer->comp, msecs_to_jiffies(1000))) + if (!wait_for_completion_timeout(&xfer->comp, m->i2c.timeout)) svc_i3c_master_dequeue_xfer(master, xfer); mutex_unlock(&master->lock); From 290ce8b2d0745e45a3155268184523a8c75996f1 Mon Sep 17 00:00:00 2001 From: Jorge Marques Date: Sun, 22 Jun 2025 12:11:07 +0200 Subject: [PATCH 0536/2411] i3c: master: Initialize ret in i3c_i2c_notifier_call() Set ret to -EINVAL if i3c_i2c_notifier_call() receives an invalid action, resolving uninitialized warning. Signed-off-by: Jorge Marques Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250622-i3c-master-ret-uninitialized-v1-1-aabb5625c932@analog.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index fd81871609d9..68b8ea9174b9 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -2467,6 +2467,8 @@ static int i3c_i2c_notifier_call(struct notifier_block *nb, unsigned long action case BUS_NOTIFY_DEL_DEVICE: ret = i3c_master_i2c_detach(adap, client); break; + default: + ret = -EINVAL; } i3c_bus_maintenance_unlock(&master->bus); From 4f5ee6405f8bde3d8c037531e0c57be5cd32de3d Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 5 Jun 2025 11:47:58 +0200 Subject: [PATCH 0537/2411] i3c: add patchwork entry to MAINTAINERS Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20250605094757.8655-2-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..d5a173e987c0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11468,6 +11468,7 @@ M: Alexandre Belloni R: Frank Li L: linux-i3c@lists.infradead.org (moderated for non-subscribers) S: Maintained +Q: https://patchwork.kernel.org/project/linux-i3c/list/ C: irc://chat.freenode.net/linux-i3c T: git git://git.kernel.org/pub/scm/linux/kernel/git/i3c/linux.git F: Documentation/ABI/testing/sysfs-bus-i3c From 64daf134941208e7dd30bf04c8c97764c2f0c2b1 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Tue, 1 Jul 2025 22:11:20 +0200 Subject: [PATCH 0538/2411] pinctrl: sunxi: v3s: Fix wrong comment about UART2 pinmux The original comment doesn't match the pin attribution, probably due to a hasty copy/paste. Signed-off-by: Paul Kocialkowski Reviewed-by: Icenowy Zheng Link: https://lore.kernel.org/20250701201124.812882-2-paulk@sys-base.io Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sun8i-v3s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-v3s.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-v3s.c index 696d7dd8d87b..2e3bd36a4410 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun8i-v3s.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-v3s.c @@ -45,7 +45,7 @@ static const struct sunxi_desc_pin sun8i_v3s_pins[] = { SUNXI_PIN(SUNXI_PINCTRL_PIN(B, 3), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "uart2"), /* D1 */ + SUNXI_FUNCTION(0x2, "uart2"), /* CTS */ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 3)), /* PB_EINT3 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(B, 4), SUNXI_FUNCTION(0x0, "gpio_in"), From 683d532dfc9657ab8aae25204f378352ed144646 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Wed, 2 Jul 2025 13:15:24 +0100 Subject: [PATCH 0539/2411] pinctrl: samsung: Fix gs101 irq chip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When adding the dedicated gs101_wkup_irq_chip struct to support the eint wakeup mask the .eint_con, eint_mask and .eint_pend fields were missed. The result is that irqs on gs101 for the buttons etc are broken. Reported-by: André Draszik Fixes: 2642f55d44ce ("pinctrl: samsung: add support for gs101 wakeup mask programming") Signed-off-by: Peter Griffin Tested-by: André Draszik Link: https://lore.kernel.org/r/20250702-fix-gs101-irqchip-v1-1-ccc84b44ad72@linaro.org Signed-off-by: Krzysztof Kozlowski --- drivers/pinctrl/samsung/pinctrl-exynos.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c index 5554768d465f..81fe0b08a9af 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos.c @@ -665,6 +665,9 @@ static const struct exynos_irq_chip gs101_wkup_irq_chip __initconst = { .irq_request_resources = exynos_irq_request_resources, .irq_release_resources = exynos_irq_release_resources, }, + .eint_con = EXYNOS7_WKUP_ECON_OFFSET, + .eint_mask = EXYNOS7_WKUP_EMASK_OFFSET, + .eint_pend = EXYNOS7_WKUP_EPEND_OFFSET, .eint_num_wakeup_reg = 3, .eint_wake_mask_reg = GS101_EINT_WAKEUP_MASK, .set_eint_wakeup_mask = gs101_pinctrl_set_eint_wakeup_mask, From 2453753f395e68af947d9ac6ce37bf0eb40f7123 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 4 Jul 2025 10:54:42 +0300 Subject: [PATCH 0540/2411] power: supply: bq24190: Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-reduntant explicit call to pm_runtime_mark_last_busy(). Signed-off-by: Sakari Ailus Link: https://lore.kernel.org/r/20250704075442.3221283-1-sakari.ailus@linux.intel.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq24190_charger.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c index 2c3804e1207e..e1510c7fdab3 100644 --- a/drivers/power/supply/bq24190_charger.c +++ b/drivers/power/supply/bq24190_charger.c @@ -504,7 +504,6 @@ static ssize_t bq24190_sysfs_show(struct device *dev, else count = sysfs_emit(buf, "%hhx\n", v); - pm_runtime_mark_last_busy(bdi->dev); pm_runtime_put_autosuspend(bdi->dev); return count; @@ -535,7 +534,6 @@ static ssize_t bq24190_sysfs_store(struct device *dev, if (ret) count = ret; - pm_runtime_mark_last_busy(bdi->dev); pm_runtime_put_autosuspend(bdi->dev); return count; @@ -562,7 +560,6 @@ static int bq24190_set_otg_vbus(struct bq24190_dev_info *bdi, bool enable) else ret = bq24190_charger_set_charge_type(bdi, &val); - pm_runtime_mark_last_busy(bdi->dev); pm_runtime_put_autosuspend(bdi->dev); return ret; @@ -605,7 +602,6 @@ static int bq24296_set_otg_vbus(struct bq24190_dev_info *bdi, bool enable) } out: - pm_runtime_mark_last_busy(bdi->dev); pm_runtime_put_autosuspend(bdi->dev); return ret; @@ -638,7 +634,6 @@ static int bq24190_vbus_is_enabled(struct regulator_dev *dev) BQ24190_REG_POC_CHG_CONFIG_MASK, BQ24190_REG_POC_CHG_CONFIG_SHIFT, &val); - pm_runtime_mark_last_busy(bdi->dev); pm_runtime_put_autosuspend(bdi->dev); if (ret) @@ -675,7 +670,6 @@ static int bq24296_vbus_is_enabled(struct regulator_dev *dev) BQ24296_REG_POC_OTG_CONFIG_MASK, BQ24296_REG_POC_OTG_CONFIG_SHIFT, &val); - pm_runtime_mark_last_busy(bdi->dev); pm_runtime_put_autosuspend(bdi->dev); if (ret) @@ -1376,7 +1370,6 @@ static int bq24190_charger_get_property(struct power_supply *psy, ret = -ENODATA; } - pm_runtime_mark_last_busy(bdi->dev); pm_runtime_put_autosuspend(bdi->dev); return ret; @@ -1419,7 +1412,6 @@ static int bq24190_charger_set_property(struct power_supply *psy, ret = -EINVAL; } - pm_runtime_mark_last_busy(bdi->dev); pm_runtime_put_autosuspend(bdi->dev); return ret; @@ -1682,7 +1674,6 @@ static int bq24190_battery_get_property(struct power_supply *psy, ret = -ENODATA; } - pm_runtime_mark_last_busy(bdi->dev); pm_runtime_put_autosuspend(bdi->dev); return ret; @@ -1713,7 +1704,6 @@ static int bq24190_battery_set_property(struct power_supply *psy, ret = -EINVAL; } - pm_runtime_mark_last_busy(bdi->dev); pm_runtime_put_autosuspend(bdi->dev); return ret; @@ -1861,7 +1851,6 @@ static irqreturn_t bq24190_irq_handler_thread(int irq, void *data) return IRQ_NONE; } bq24190_check_status(bdi); - pm_runtime_mark_last_busy(bdi->dev); pm_runtime_put_autosuspend(bdi->dev); bdi->irq_event = false; @@ -2188,7 +2177,6 @@ static int bq24190_probe(struct i2c_client *client) enable_irq_wake(client->irq); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; @@ -2275,7 +2263,6 @@ static __maybe_unused int bq24190_pm_suspend(struct device *dev) bq24190_register_reset(bdi); if (error >= 0) { - pm_runtime_mark_last_busy(bdi->dev); pm_runtime_put_autosuspend(bdi->dev); } @@ -2300,7 +2287,6 @@ static __maybe_unused int bq24190_pm_resume(struct device *dev) bq24190_read(bdi, BQ24190_REG_SS, &bdi->ss_reg); if (error >= 0) { - pm_runtime_mark_last_busy(bdi->dev); pm_runtime_put_autosuspend(bdi->dev); } From f9335bb4f5d4f3b913efd5872c2794d027dd85a6 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 4 Jul 2025 10:54:42 +0300 Subject: [PATCH 0541/2411] power: supply: twl4030_charger: Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-reduntant explicit call to pm_runtime_mark_last_busy(). Signed-off-by: Sakari Ailus Link: https://lore.kernel.org/r/20250704075442.3221330-1-sakari.ailus@linux.intel.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/twl4030_charger.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c index 9dcb5457bef4..04216b2bfb6c 100644 --- a/drivers/power/supply/twl4030_charger.c +++ b/drivers/power/supply/twl4030_charger.c @@ -512,7 +512,6 @@ static int twl4030_charger_enable_usb(struct twl4030_bci *bci, bool enable) ret |= twl_i2c_write_u8(TWL_MODULE_MAIN_CHARGE, 0x2a, TWL4030_BCIMDKEY); if (bci->usb_enabled) { - pm_runtime_mark_last_busy(bci->transceiver->dev); pm_runtime_put_autosuspend(bci->transceiver->dev); bci->usb_enabled = 0; } From c1dc61aede55a571d34fe20415b1413a3c86ee24 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 1 Jul 2025 17:38:56 +0530 Subject: [PATCH 0542/2411] PCI: dwc: Make dw_pcie_ptm_ops static dw_pcie_ptm_ops is not used outside of this file, so make it static. This also fixes the sparse warning: drivers/pci/controller/dwc/pcie-designware-debugfs.c:868:27: warning: symbol 'dw_pcie_ptm_ops' was not declared. Should it be static? Fixes: 852a1fdd34a8 ("PCI: dwc: Add debugfs support for PTM context") Reported-by: Bjorn Helgaas Closes: https://lore.kernel.org/linux-pci/20250617231210.GA1172093@bhelgaas Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250701120856.15839-1-mani@kernel.org --- drivers/pci/controller/dwc/pcie-designware-debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-debugfs.c b/drivers/pci/controller/dwc/pcie-designware-debugfs.c index 6f438a36f840..0fbf86c0b97e 100644 --- a/drivers/pci/controller/dwc/pcie-designware-debugfs.c +++ b/drivers/pci/controller/dwc/pcie-designware-debugfs.c @@ -865,7 +865,7 @@ static bool dw_pcie_ptm_t4_visible(void *drvdata) return pci->mode == DW_PCIE_EP_TYPE; } -const struct pcie_ptm_ops dw_pcie_ptm_ops = { +static const struct pcie_ptm_ops dw_pcie_ptm_ops = { .check_capability = dw_pcie_ptm_check_capability, .context_update_write = dw_pcie_ptm_context_update_write, .context_update_read = dw_pcie_ptm_context_update_read, From 447270cdb41b1c8c3621bb14b93a6749f942556e Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 4 Jul 2025 22:44:32 +0200 Subject: [PATCH 0543/2411] i3c: don't fail if GETHDRCAP is unsupported 'I3C_BCR_HDR_CAP' is still spec v1.0 and has been renamed to 'advanced capabilities' in v1.1 onwards. The ST pressure sensor LPS22DF does not have HDR, but has the 'advanced cap' bit set. The core still wants to get additional information using the CCC 'GETHDRCAP' (or GETCAPS in v1.1 onwards). Not all controllers support this CCC and will notify the upper layers about it. For instantiating the device, we can ignore this unsupported CCC as standard communication will work. Without this patch, the device will not be instantiated at all. Signed-off-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250704204524.6124-1-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index 68b8ea9174b9..dfa0bad991cf 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -1439,7 +1439,7 @@ static int i3c_master_retrieve_dev_info(struct i3c_dev_desc *dev) if (dev->info.bcr & I3C_BCR_HDR_CAP) { ret = i3c_master_gethdrcap_locked(master, &dev->info); - if (ret) + if (ret && ret != -ENOTSUPP) return ret; } From d10a4c323883c41cf1b652309e61c48bce248e35 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 28 Jun 2025 21:20:28 +0200 Subject: [PATCH 0544/2411] i3c: master: replace ENOTSUPP with SUSV4-compliant EOPNOTSUPP Replace non-standard ENOTSUPP with the SUSV4-defined error code EOPNOTSUPP to fix below checkpatch warning: "ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP" Signed-off-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250628192027.3932-6-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index dfa0bad991cf..1a68acee1f13 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -837,14 +837,14 @@ static int i3c_master_send_ccc_cmd_locked(struct i3c_master_controller *master, return -EINVAL; if (!master->ops->send_ccc_cmd) - return -ENOTSUPP; + return -EOPNOTSUPP; if ((cmd->id & I3C_CCC_DIRECT) && (!cmd->dests || !cmd->ndests)) return -EINVAL; if (master->ops->supports_ccc_cmd && !master->ops->supports_ccc_cmd(master, cmd)) - return -ENOTSUPP; + return -EOPNOTSUPP; ret = master->ops->send_ccc_cmd(master, cmd); if (ret) { @@ -1439,7 +1439,7 @@ static int i3c_master_retrieve_dev_info(struct i3c_dev_desc *dev) if (dev->info.bcr & I3C_BCR_HDR_CAP) { ret = i3c_master_gethdrcap_locked(master, &dev->info); - if (ret && ret != -ENOTSUPP) + if (ret && ret != -EOPNOTSUPP) return ret; } @@ -2210,7 +2210,7 @@ of_i3c_master_add_i2c_boardinfo(struct i3c_master_controller *master, */ if (boardinfo->base.flags & I2C_CLIENT_TEN) { dev_err(dev, "I2C device with 10 bit address not supported."); - return -ENOTSUPP; + return -EOPNOTSUPP; } /* LVR is encoded in reg[2]. */ @@ -2340,13 +2340,13 @@ static int i3c_master_i2c_adapter_xfer(struct i2c_adapter *adap, return -EINVAL; if (!master->ops->i2c_xfers) - return -ENOTSUPP; + return -EOPNOTSUPP; /* Doing transfers to different devices is not supported. */ addr = xfers[0].addr; for (i = 1; i < nxfers; i++) { if (addr != xfers[i].addr) - return -ENOTSUPP; + return -EOPNOTSUPP; } i3c_bus_normaluse_lock(&master->bus); @@ -2768,7 +2768,7 @@ static int i3c_master_check_ops(const struct i3c_master_controller_ops *ops) * controller) * @ops: the master controller operations * @secondary: true if you are registering a secondary master. Will return - * -ENOTSUPP if set to true since secondary masters are not yet + * -EOPNOTSUPP if set to true since secondary masters are not yet * supported * * This function takes care of everything for you: @@ -2795,7 +2795,7 @@ int i3c_master_register(struct i3c_master_controller *master, /* We do not support secondary masters yet. */ if (secondary) - return -ENOTSUPP; + return -EOPNOTSUPP; ret = i3c_master_check_ops(ops); if (ret) @@ -2956,7 +2956,7 @@ int i3c_dev_do_priv_xfers_locked(struct i3c_dev_desc *dev, return -EINVAL; if (!master->ops->priv_xfers) - return -ENOTSUPP; + return -EOPNOTSUPP; return master->ops->priv_xfers(dev, xfers, nxfers); } @@ -3006,7 +3006,7 @@ int i3c_dev_request_ibi_locked(struct i3c_dev_desc *dev, int ret; if (!master->ops->request_ibi) - return -ENOTSUPP; + return -EOPNOTSUPP; if (dev->ibi) return -EBUSY; From 566aebedee37789644bcc976fd6d98ccf8de375b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 28 Jun 2025 21:20:29 +0200 Subject: [PATCH 0545/2411] i3c: dw: replace ENOTSUPP with SUSV4-compliant EOPNOTSUPP Replace non-standard ENOTSUPP with the SUSV4-defined error code EOPNOTSUPP to fix below checkpatch warning: "ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP" Signed-off-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250628192027.3932-7-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/dw-i3c-master.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index 0c370672a4fc..ae1992665673 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -932,7 +932,7 @@ static int dw_i3c_master_priv_xfers(struct i3c_dev_desc *dev, return 0; if (i3c_nxfers > master->caps.cmdfifodepth) - return -ENOTSUPP; + return -EOPNOTSUPP; for (i = 0; i < i3c_nxfers; i++) { if (i3c_xfers[i].rnw) @@ -943,7 +943,7 @@ static int dw_i3c_master_priv_xfers(struct i3c_dev_desc *dev, if (ntxwords > master->caps.datafifodepth || nrxwords > master->caps.datafifodepth) - return -ENOTSUPP; + return -EOPNOTSUPP; xfer = dw_i3c_master_alloc_xfer(master, i3c_nxfers); if (!xfer) @@ -1093,7 +1093,7 @@ static int dw_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, return 0; if (i2c_nxfers > master->caps.cmdfifodepth) - return -ENOTSUPP; + return -EOPNOTSUPP; for (i = 0; i < i2c_nxfers; i++) { if (i2c_xfers[i].flags & I2C_M_RD) @@ -1104,7 +1104,7 @@ static int dw_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, if (ntxwords > master->caps.datafifodepth || nrxwords > master->caps.datafifodepth) - return -ENOTSUPP; + return -EOPNOTSUPP; xfer = dw_i3c_master_alloc_xfer(master, i2c_nxfers); if (!xfer) From 8d53c0d645e3b2a1e341ffb4dbea345c55035c6b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 28 Jun 2025 21:20:30 +0200 Subject: [PATCH 0546/2411] i3c: master: cdns: replace ENOTSUPP with SUSV4-compliant EOPNOTSUPP Replace non-standard ENOTSUPP with the SUSV4-defined error code EOPNOTSUPP to fix below checkpatch warning: "ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP" Signed-off-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250628192027.3932-8-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/i3c-master-cdns.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c index 562e49e930b4..449e85d7ba87 100644 --- a/drivers/i3c/master/i3c-master-cdns.c +++ b/drivers/i3c/master/i3c-master-cdns.c @@ -742,7 +742,7 @@ static int cdns_i3c_master_priv_xfers(struct i3c_dev_desc *dev, for (i = 0; i < nxfers; i++) { if (xfers[i].len > CMD0_FIFO_PL_LEN_MAX) - return -ENOTSUPP; + return -EOPNOTSUPP; } if (!nxfers) @@ -750,7 +750,7 @@ static int cdns_i3c_master_priv_xfers(struct i3c_dev_desc *dev, if (nxfers > master->caps.cmdfifodepth || nxfers > master->caps.cmdrfifodepth) - return -ENOTSUPP; + return -EOPNOTSUPP; /* * First make sure that all transactions (block of transfers separated @@ -765,7 +765,7 @@ static int cdns_i3c_master_priv_xfers(struct i3c_dev_desc *dev, if (rxslots > master->caps.rxfifodepth || txslots > master->caps.txfifodepth) - return -ENOTSUPP; + return -EOPNOTSUPP; cdns_xfer = cdns_i3c_master_alloc_xfer(master, nxfers); if (!cdns_xfer) @@ -822,11 +822,11 @@ static int cdns_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, int i, ret = 0; if (nxfers > master->caps.cmdfifodepth) - return -ENOTSUPP; + return -EOPNOTSUPP; for (i = 0; i < nxfers; i++) { if (xfers[i].len > CMD0_FIFO_PL_LEN_MAX) - return -ENOTSUPP; + return -EOPNOTSUPP; if (xfers[i].flags & I2C_M_RD) nrxwords += DIV_ROUND_UP(xfers[i].len, 4); @@ -836,7 +836,7 @@ static int cdns_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, if (ntxwords > master->caps.txfifodepth || nrxwords > master->caps.rxfifodepth) - return -ENOTSUPP; + return -EOPNOTSUPP; xfer = cdns_i3c_master_alloc_xfer(master, nxfers); if (!xfer) From 12aa3e0cb0c6f8d406be00bc9f5d89bfbee7b9d9 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 7 Jul 2025 13:54:08 +0200 Subject: [PATCH 0547/2411] i3c: prefix hexadecimal entries in sysfs Hexadecimal values in sysfs should be prefixed with '0x' like e.g. PCI and SCSI already do it. Also ensure the two digit length since BCR and DCR are a byte in size. Signed-off-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250707115409.73545-2-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index 1a68acee1f13..e00991444f31 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -141,7 +141,7 @@ static ssize_t bcr_show(struct device *dev, i3c_bus_normaluse_lock(bus); desc = dev_to_i3cdesc(dev); - ret = sprintf(buf, "%x\n", desc->info.bcr); + ret = sprintf(buf, "0x%02x\n", desc->info.bcr); i3c_bus_normaluse_unlock(bus); return ret; @@ -158,7 +158,7 @@ static ssize_t dcr_show(struct device *dev, i3c_bus_normaluse_lock(bus); desc = dev_to_i3cdesc(dev); - ret = sprintf(buf, "%x\n", desc->info.dcr); + ret = sprintf(buf, "0x%02x\n", desc->info.dcr); i3c_bus_normaluse_unlock(bus); return ret; From 590951f908f25c7e4d6822f0109e7e230d7b0a89 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sun, 8 Jun 2025 17:42:51 -0500 Subject: [PATCH 0548/2411] dt-bindings: Move sophgo,cv1800b-rtc to rtc directory The $id path for the sophgo,cv1800b-rtc binding was missing part of the path 'soc'. However, the correct place for RTC bindings (even if it's also a "syscon") is the rtc directory, so move the binding there while fixing the $id value. Fixes: 76517429dbfd ("dt-bindings: soc: sophgo: add RTC support for Sophgo CV1800 series") Signed-off-by: Rob Herring (Arm) Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250608224252.3902421-1-robh@kernel.org Signed-off-by: Alexandre Belloni --- .../bindings/{soc/sophgo => rtc}/sophgo,cv1800b-rtc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename Documentation/devicetree/bindings/{soc/sophgo => rtc}/sophgo,cv1800b-rtc.yaml (96%) diff --git a/Documentation/devicetree/bindings/soc/sophgo/sophgo,cv1800b-rtc.yaml b/Documentation/devicetree/bindings/rtc/sophgo,cv1800b-rtc.yaml similarity index 96% rename from Documentation/devicetree/bindings/soc/sophgo/sophgo,cv1800b-rtc.yaml rename to Documentation/devicetree/bindings/rtc/sophgo,cv1800b-rtc.yaml index 5cf186c396c9..c695d2ff9fcc 100644 --- a/Documentation/devicetree/bindings/soc/sophgo/sophgo,cv1800b-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/sophgo,cv1800b-rtc.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) %YAML 1.2 --- -$id: http://devicetree.org/schemas/sophgo/sophgo,cv1800b-rtc.yaml# +$id: http://devicetree.org/schemas/rtc/sophgo,cv1800b-rtc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# title: Real Time Clock of the Sophgo CV1800 SoC From d754e2c4aaeadb342036f89d8afc78db6ba2e210 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Tue, 24 Jun 2025 16:17:32 -0400 Subject: [PATCH 0549/2411] dt-bindings: rtc: move nxp,lpc3220-rtc to separated file from trivial-rtc.yaml nxp,lpc3220-rtc have clocks property, so move it from trivial-rtc.yaml. Signed-off-by: Frank Li Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250624201733.2515971-1-Frank.Li@nxp.com Signed-off-by: Alexandre Belloni --- .../bindings/rtc/nxp,lpc3220-rtc.yaml | 49 +++++++++++++++++++ .../devicetree/bindings/rtc/trivial-rtc.yaml | 2 - 2 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 Documentation/devicetree/bindings/rtc/nxp,lpc3220-rtc.yaml diff --git a/Documentation/devicetree/bindings/rtc/nxp,lpc3220-rtc.yaml b/Documentation/devicetree/bindings/rtc/nxp,lpc3220-rtc.yaml new file mode 100644 index 000000000000..53353de4cb37 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/nxp,lpc3220-rtc.yaml @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/nxp,lpc3220-rtc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP LPC32xx SoC Real-time Clock + +maintainers: + - Frank Li + +properties: + compatible: + enum: + - nxp,lpc3220-rtc + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + interrupts: + maxItems: 1 + + start-year: true + +required: + - compatible + - reg + +allOf: + - $ref: rtc.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + #include + + rtc@40024000 { + compatible = "nxp,lpc3220-rtc"; + reg = <0x40024000 0x1000>; + interrupt-parent = <&sic1>; + interrupts = <20 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LPC32XX_CLK_RTC>; + }; + diff --git a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml index 7330a7200831..5e0c7cd25cc6 100644 --- a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml @@ -63,8 +63,6 @@ properties: - microcrystal,rv3029 # Real Time Clock - microcrystal,rv8523 - # NXP LPC32xx SoC Real-time Clock - - nxp,lpc3220-rtc # I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC - ricoh,r2025sd # I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC From 0bafe291cb429d39b5ff70bcf7b2f3ab026dcb02 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 2 Jun 2025 10:28:41 -0400 Subject: [PATCH 0550/2411] dt-bindings: rtc: nxp,lpc1788-rtc: add compatible string nxp,lpc1850-rtc Add compatible string nxp,lpc1850-rtc and fallback to nxp,lpc1788-rtc. Fix below CHECK_DTB warning: arch/arm/boot/dts/nxp/lpc/lpc4337-ciaa.dtb: rtc@40046000 (nxp,lpc1850-rtc): compatible: ['nxp,lpc1850-rtc', 'nxp,lpc1788-rtc'] is too long Signed-off-by: Frank Li Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250602142842.942700-1-Frank.Li@nxp.com Signed-off-by: Alexandre Belloni --- Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.yaml b/Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.yaml index e88b847a1cc5..e896ba59302a 100644 --- a/Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.yaml @@ -18,7 +18,12 @@ allOf: properties: compatible: - const: nxp,lpc1788-rtc + oneOf: + - items: + - enum: + - nxp,lpc1850-rtc + - const: nxp,lpc1788-rtc + - const: nxp,lpc1788-rtc reg: maxItems: 1 From cbbfe9f683f0f9b6a1da2eaa53b995a4b5961086 Mon Sep 17 00:00:00 2001 From: Geraldo Nascimento Date: Mon, 30 Jun 2025 19:24:41 -0300 Subject: [PATCH 0551/2411] PCI: rockchip: Use standard PCIe definitions Current code uses custom-defined register offsets and bitfields for the standard PCIe registers. This creates duplication as the PCI header already defines them. So, switch to using the standard PCIe definitions and drop the custom ones. Suggested-by: Bjorn Helgaas Signed-off-by: Geraldo Nascimento [mani: commit message rewording] Signed-off-by: Manivannan Sadhasivam [bhelgaas: include bitfield.h] Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/e81700ef4b49f584bc8834bfb07b6d8995fc1f42.1751322015.git.geraldogabriel@gmail.com --- drivers/pci/controller/pcie-rockchip-ep.c | 4 +- drivers/pci/controller/pcie-rockchip-host.c | 45 +++++++++++---------- drivers/pci/controller/pcie-rockchip.h | 12 +----- 3 files changed, 26 insertions(+), 35 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 55416b8311dd..300cd85fa035 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -518,9 +518,9 @@ static void rockchip_pcie_ep_retrain_link(struct rockchip_pcie *rockchip) { u32 status; - status = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_LCS); + status = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_BASE + PCI_EXP_LNKCTL); status |= PCI_EXP_LNKCTL_RL; - rockchip_pcie_write(rockchip, status, PCIE_EP_CONFIG_LCS); + rockchip_pcie_write(rockchip, status, PCIE_EP_CONFIG_BASE + PCI_EXP_LNKCTL); } static bool rockchip_pcie_ep_link_up(struct rockchip_pcie *rockchip) diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c index b9e7a8710cf0..383d20f98cc3 100644 --- a/drivers/pci/controller/pcie-rockchip-host.c +++ b/drivers/pci/controller/pcie-rockchip-host.c @@ -11,6 +11,7 @@ * ARM PCI Host generic driver. */ +#include #include #include #include @@ -40,18 +41,18 @@ static void rockchip_pcie_enable_bw_int(struct rockchip_pcie *rockchip) { u32 status; - status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_LCS); + status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_CR + PCI_EXP_LNKCTL); status |= (PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); - rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_LCS); + rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_CR + PCI_EXP_LNKCTL); } static void rockchip_pcie_clr_bw_int(struct rockchip_pcie *rockchip) { u32 status; - status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_LCS); + status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_CR + PCI_EXP_LNKCTL); status |= (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS) << 16; - rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_LCS); + rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_CR + PCI_EXP_LNKCTL); } static void rockchip_pcie_update_txcredit_mui(struct rockchip_pcie *rockchip) @@ -269,7 +270,7 @@ static void rockchip_pcie_set_power_limit(struct rockchip_pcie *rockchip) scale = 3; /* 0.001x */ curr = curr / 1000; /* convert to mA */ power = (curr * 3300) / 1000; /* milliwatt */ - while (power > PCIE_RC_CONFIG_DCR_CSPL_LIMIT) { + while (power > FIELD_MAX(PCI_EXP_DEVCAP_PWR_VAL)) { if (!scale) { dev_warn(rockchip->dev, "invalid power supply\n"); return; @@ -278,10 +279,10 @@ static void rockchip_pcie_set_power_limit(struct rockchip_pcie *rockchip) power = power / 10; } - status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_DCR); - status |= (power << PCIE_RC_CONFIG_DCR_CSPL_SHIFT) | - (scale << PCIE_RC_CONFIG_DCR_CPLS_SHIFT); - rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_DCR); + status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_CR + PCI_EXP_DEVCAP); + status |= FIELD_PREP(PCI_EXP_DEVCAP_PWR_VAL, power); + status |= FIELD_PREP(PCI_EXP_DEVCAP_PWR_SCL, scale); + rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_CR + PCI_EXP_DEVCAP); } /** @@ -309,14 +310,14 @@ static int rockchip_pcie_host_init_port(struct rockchip_pcie *rockchip) rockchip_pcie_set_power_limit(rockchip); /* Set RC's clock architecture as common clock */ - status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_LCS); + status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_CR + PCI_EXP_LNKCTL); status |= PCI_EXP_LNKSTA_SLC << 16; - rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_LCS); + rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_CR + PCI_EXP_LNKCTL); /* Set RC's RCB to 128 */ - status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_LCS); + status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_CR + PCI_EXP_LNKCTL); status |= PCI_EXP_LNKCTL_RCB; - rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_LCS); + rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_CR + PCI_EXP_LNKCTL); /* Enable Gen1 training */ rockchip_pcie_write(rockchip, PCIE_CLIENT_LINK_TRAIN_ENABLE, @@ -341,9 +342,9 @@ static int rockchip_pcie_host_init_port(struct rockchip_pcie *rockchip) * Enable retrain for gen2. This should be configured only after * gen1 finished. */ - status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_LCS); + status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_CR + PCI_EXP_LNKCTL); status |= PCI_EXP_LNKCTL_RL; - rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_LCS); + rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_CR + PCI_EXP_LNKCTL); err = readl_poll_timeout(rockchip->apb_base + PCIE_CORE_CTRL, status, PCIE_LINK_IS_GEN2(status), 20, @@ -380,15 +381,15 @@ static int rockchip_pcie_host_init_port(struct rockchip_pcie *rockchip) /* Clear L0s from RC's link cap */ if (of_property_read_bool(dev->of_node, "aspm-no-l0s")) { - status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_LINK_CAP); - status &= ~PCIE_RC_CONFIG_LINK_CAP_L0S; - rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_LINK_CAP); + status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_CR + PCI_EXP_LNKCAP); + status &= ~PCI_EXP_LNKCAP_ASPM_L0S; + rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_CR + PCI_EXP_LNKCAP); } - status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_DCSR); - status &= ~PCIE_RC_CONFIG_DCSR_MPS_MASK; - status |= PCIE_RC_CONFIG_DCSR_MPS_256; - rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_DCSR); + status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_CR + PCI_EXP_DEVCTL); + status &= ~PCI_EXP_DEVCTL_PAYLOAD; + status |= PCI_EXP_DEVCTL_PAYLOAD_256B; + rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_CR + PCI_EXP_DEVCTL); return 0; err_power_off_phy: diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index 12bc8da59d73..72a2c045f6fe 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -155,17 +155,7 @@ #define PCIE_EP_CONFIG_DID_VID (PCIE_EP_CONFIG_BASE + 0x00) #define PCIE_EP_CONFIG_LCS (PCIE_EP_CONFIG_BASE + 0xd0) #define PCIE_RC_CONFIG_RID_CCR (PCIE_RC_CONFIG_BASE + 0x08) -#define PCIE_RC_CONFIG_DCR (PCIE_RC_CONFIG_BASE + 0xc4) -#define PCIE_RC_CONFIG_DCR_CSPL_SHIFT 18 -#define PCIE_RC_CONFIG_DCR_CSPL_LIMIT 0xff -#define PCIE_RC_CONFIG_DCR_CPLS_SHIFT 26 -#define PCIE_RC_CONFIG_DCSR (PCIE_RC_CONFIG_BASE + 0xc8) -#define PCIE_RC_CONFIG_DCSR_MPS_MASK GENMASK(7, 5) -#define PCIE_RC_CONFIG_DCSR_MPS_256 (0x1 << 5) -#define PCIE_RC_CONFIG_LINK_CAP (PCIE_RC_CONFIG_BASE + 0xcc) -#define PCIE_RC_CONFIG_LINK_CAP_L0S BIT(10) -#define PCIE_RC_CONFIG_LCS (PCIE_RC_CONFIG_BASE + 0xd0) -#define PCIE_EP_CONFIG_LCS (PCIE_EP_CONFIG_BASE + 0xd0) +#define PCIE_RC_CONFIG_CR (PCIE_RC_CONFIG_BASE + 0xc0) #define PCIE_RC_CONFIG_L1_SUBSTATE_CTRL2 (PCIE_RC_CONFIG_BASE + 0x90c) #define PCIE_RC_CONFIG_THP_CAP (PCIE_RC_CONFIG_BASE + 0x274) #define PCIE_RC_CONFIG_THP_CAP_NEXT_MASK GENMASK(31, 20) From 114b06ee108cabc82b995fbac6672230a9776936 Mon Sep 17 00:00:00 2001 From: Geraldo Nascimento Date: Mon, 30 Jun 2025 19:24:57 -0300 Subject: [PATCH 0552/2411] PCI: rockchip: Set Target Link Speed to 5.0 GT/s before retraining Rockchip controllers can support up to 5.0 GT/s link speed. But the driver doesn't set the Target Link Speed currently. This may cause failure in retraining the link to 5.0 GT/s if supported by the endpoint. So set the Target Link Speed to 5.0 GT/s in the Link Control and Status Register 2. Fixes: e77f847df54c ("PCI: rockchip: Add Rockchip PCIe controller support") Signed-off-by: Geraldo Nascimento [mani: fixed whitespace warning, commit message rewording, added fixes tag] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Tested-by: Robin Murphy Cc: stable@vger.kernel.org Link: https://patch.msgid.link/0afa6bc47b7f50e2e81b0b47d51c66feb0fb565f.1751322015.git.geraldogabriel@gmail.com --- drivers/pci/controller/pcie-rockchip-host.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c index 383d20f98cc3..fb9ae3f158a8 100644 --- a/drivers/pci/controller/pcie-rockchip-host.c +++ b/drivers/pci/controller/pcie-rockchip-host.c @@ -342,6 +342,10 @@ static int rockchip_pcie_host_init_port(struct rockchip_pcie *rockchip) * Enable retrain for gen2. This should be configured only after * gen1 finished. */ + status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_CR + PCI_EXP_LNKCTL2); + status &= ~PCI_EXP_LNKCTL2_TLS; + status |= PCI_EXP_LNKCTL2_TLS_5_0GT; + rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_CR + PCI_EXP_LNKCTL2); status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_CR + PCI_EXP_LNKCTL); status |= PCI_EXP_LNKCTL_RL; rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_CR + PCI_EXP_LNKCTL); From a292d5733c5e9c0febaf901295eacb13e2df636c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 30 Jun 2025 09:30:46 -0700 Subject: [PATCH 0553/2411] perf vendor events: Update Alderlake events Update events from v1.29 to v1.31. Bring in the event updates v1.31: https://github.com/intel/perfmon/commit/5a1269c8af70e32a548e74e1fda736189c398ddc https://github.com/intel/perfmon/commit/76c6d2c348c067e9ae1b616b35ee982da6d873b4 Signed-off-by: Ian Rogers Tested-by: Thomas Falcon Link: https://lore.kernel.org/r/20250630163101.1920170-2-irogers@google.com Signed-off-by: Namhyung Kim --- .../pmu-events/arch/x86/alderlake/cache.json | 56 +++++++------------ .../arch/x86/alderlake/floating-point.json | 1 - .../pmu-events/arch/x86/alderlake/other.json | 1 - .../arch/x86/alderlake/pipeline.json | 44 ++------------- .../arch/x86/alderlake/virtual-memory.json | 3 - tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- 6 files changed, 28 insertions(+), 79 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/alderlake/cache.json b/tools/perf/pmu-events/arch/x86/alderlake/cache.json index c2802fbb853b..5461576dafc7 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/cache.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/cache.json @@ -728,7 +728,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT", - "PublicDescription": "Counts the number of load uops retired that hit in DRAM. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x80", "Unit": "cpu_atom" @@ -739,7 +738,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.HITM", - "PublicDescription": "Counts the number of load uops retired that hit in the L3 cache, in which a snoop was required and modified data was forwarded from another core or module. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x20", "Unit": "cpu_atom" @@ -750,7 +748,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", - "PublicDescription": "Counts the number of load uops retired that hit in the L1 data cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x1", "Unit": "cpu_atom" @@ -761,7 +758,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", - "PublicDescription": "Counts the number of load uops retired that miss in the L1 data cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x8", "Unit": "cpu_atom" @@ -772,7 +768,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", - "PublicDescription": "Counts the number of load uops retired that hit in the L2 cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x2", "Unit": "cpu_atom" @@ -783,7 +778,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", - "PublicDescription": "Counts the number of load uops retired that miss in the L2 cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x10", "Unit": "cpu_atom" @@ -794,7 +788,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", - "PublicDescription": "Counts the number of load uops retired that hit in the L3 cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x4", "Unit": "cpu_atom" @@ -805,7 +798,6 @@ "Data_LA": "1", "EventCode": "0xd2", "EventName": "MEM_LOAD_UOPS_RETIRED_MISC.HIT_E_F", - "PublicDescription": "Counts the number of load uops retired that hit in the L3 cache, in which a snoop was required, and non-modified data was forwarded. Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x40", "Unit": "cpu_atom" @@ -816,7 +808,6 @@ "Data_LA": "1", "EventCode": "0xd2", "EventName": "MEM_LOAD_UOPS_RETIRED_MISC.L3_MISS", - "PublicDescription": "Counts the number of load uops retired that miss in the L3 cache. Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x20", "Unit": "cpu_atom" @@ -873,7 +864,7 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", - "PublicDescription": "Counts the total number of load uops retired. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of load uops retired.", "SampleAfterValue": "200003", "UMask": "0x81", "Unit": "cpu_atom" @@ -884,111 +875,111 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", - "PublicDescription": "Counts the total number of store uops retired. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of store uops retired.", "SampleAfterValue": "200003", "UMask": "0x82", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128", "MSRIndex": "0x3F6", "MSRValue": "0x80", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16", "MSRIndex": "0x3F6", "MSRValue": "0x10", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256", "MSRIndex": "0x3F6", "MSRValue": "0x100", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32", "MSRIndex": "0x3F6", "MSRValue": "0x20", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4", "MSRIndex": "0x3F6", "MSRValue": "0x4", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512", "MSRIndex": "0x3F6", "MSRValue": "0x200", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64", "MSRIndex": "0x3F6", "MSRValue": "0x40", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8", "MSRIndex": "0x3F6", "MSRValue": "0x8", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5", "Unit": "cpu_atom" @@ -999,7 +990,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", - "PublicDescription": "Counts the number of load uops retired that performed one or more locks. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x21", "Unit": "cpu_atom" @@ -1010,7 +1000,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", - "PublicDescription": "Counts the number of retired split load uops. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x41", "Unit": "cpu_atom" @@ -1021,7 +1010,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.STLB_MISS", - "PublicDescription": "Counts the total number of load and store uops retired that missed in the second level TLB. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x13", "Unit": "cpu_atom" @@ -1032,7 +1020,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", - "PublicDescription": "Counts the number of load ops retired that miss in the second Level TLB. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x11", "Unit": "cpu_atom" @@ -1043,7 +1030,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", - "PublicDescription": "Counts the number of store ops retired that miss in the second level TLB. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x12", "Unit": "cpu_atom" @@ -1054,7 +1040,7 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY", - "PublicDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled. If PEBS is enabled and a PEBS record is generated, will populate PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled. If PEBS is enabled and a PEBS record is generated, will populate PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x6", "Unit": "cpu_atom" @@ -1478,12 +1464,12 @@ "Unit": "cpu_core" }, { - "BriefDescription": "For every cycle where the core is waiting on at least 1 outstanding Demand RFO request, increments by 1.", + "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.", "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x20", "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO Available PDIST counters: 0", + "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x4", "Unit": "cpu_core" diff --git a/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json b/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json index ce570b96360a..d01f1b163ed8 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json @@ -213,7 +213,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.FPDIV", - "PublicDescription": "Counts the number of floating point divide uops retired (x87 and SSE, including x87 sqrt). Available PDIST counters: 0", "SampleAfterValue": "2000003", "UMask": "0x8", "Unit": "cpu_atom" diff --git a/tools/perf/pmu-events/arch/x86/alderlake/other.json b/tools/perf/pmu-events/arch/x86/alderlake/other.json index e4e75b088ccc..5f64138edfe4 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/other.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/other.json @@ -55,7 +55,6 @@ "Deprecated": "1", "EventCode": "0xe4", "EventName": "LBR_INSERTS.ANY", - "PublicDescription": "This event is deprecated. [This event is alias to MISC_RETIRED.LBR_INSERTS] Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x1", "Unit": "cpu_atom" diff --git a/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json b/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json index 7e0e33792c45..48ef2a8cc49a 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json @@ -128,7 +128,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.", "SampleAfterValue": "200003", "Unit": "cpu_atom" }, @@ -147,7 +147,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.CALL", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_CALL Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf9", "Unit": "cpu_atom" @@ -157,7 +156,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.COND", - "PublicDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x7e", "Unit": "cpu_atom" @@ -187,7 +185,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.COND_TAKEN", - "PublicDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfe", "Unit": "cpu_atom" @@ -207,7 +204,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.FAR_BRANCH", - "PublicDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xbf", "Unit": "cpu_atom" @@ -227,7 +223,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.INDIRECT", - "PublicDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xeb", "Unit": "cpu_atom" @@ -247,7 +242,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.INDIRECT_CALL", - "PublicDescription": "Counts the number of near indirect CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfb", "Unit": "cpu_atom" @@ -258,7 +252,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.IND_CALL", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfb", "Unit": "cpu_atom" @@ -269,7 +262,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.JCC", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x7e", "Unit": "cpu_atom" @@ -279,7 +271,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_CALL", - "PublicDescription": "Counts the number of near CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf9", "Unit": "cpu_atom" @@ -299,7 +290,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "PublicDescription": "Counts the number of near RET branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf7", "Unit": "cpu_atom" @@ -319,7 +309,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_TAKEN", - "PublicDescription": "Counts the number of near taken branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xc0", "Unit": "cpu_atom" @@ -340,7 +329,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NON_RETURN_IND", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xeb", "Unit": "cpu_atom" @@ -350,7 +338,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.REL_CALL", - "PublicDescription": "Counts the number of near relative CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfd", "Unit": "cpu_atom" @@ -361,7 +348,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.RETURN", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_RETURN Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf7", "Unit": "cpu_atom" @@ -372,7 +358,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.TAKEN_JCC", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND_TAKEN Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfe", "Unit": "cpu_atom" @@ -382,7 +367,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.", "SampleAfterValue": "200003", "Unit": "cpu_atom" }, @@ -400,7 +385,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.COND", - "PublicDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x7e", "Unit": "cpu_atom" @@ -430,7 +414,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.COND_TAKEN", - "PublicDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfe", "Unit": "cpu_atom" @@ -450,7 +433,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.INDIRECT", - "PublicDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xeb", "Unit": "cpu_atom" @@ -470,7 +452,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.INDIRECT_CALL", - "PublicDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfb", "Unit": "cpu_atom" @@ -491,7 +472,6 @@ "Deprecated": "1", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.IND_CALL", - "PublicDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT_CALL Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfb", "Unit": "cpu_atom" @@ -502,7 +482,6 @@ "Deprecated": "1", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.JCC", - "PublicDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x7e", "Unit": "cpu_atom" @@ -512,7 +491,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "PublicDescription": "Counts the number of mispredicted near taken branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x80", "Unit": "cpu_atom" @@ -533,7 +511,6 @@ "Deprecated": "1", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.NON_RETURN_IND", - "PublicDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xeb", "Unit": "cpu_atom" @@ -553,7 +530,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.RETURN", - "PublicDescription": "Counts the number of mispredicted near RET branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf7", "Unit": "cpu_atom" @@ -564,7 +540,6 @@ "Deprecated": "1", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.TAKEN_JCC", - "PublicDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND_TAKEN Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfe", "Unit": "cpu_atom" @@ -934,7 +909,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc0", "EventName": "INST_RETIRED.ANY_P", - "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses a programmable general purpose performance counter. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses a programmable general purpose performance counter.", "SampleAfterValue": "2000003", "Unit": "cpu_atom" }, @@ -1126,7 +1101,6 @@ "Deprecated": "1", "EventCode": "0x03", "EventName": "LD_BLOCKS.4K_ALIAS", - "PublicDescription": "This event is deprecated. Refer to new event LD_BLOCKS.ADDRESS_ALIAS Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x4", "Unit": "cpu_atom" @@ -1136,7 +1110,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0x03", "EventName": "LD_BLOCKS.ADDRESS_ALIAS", - "PublicDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check. Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x4", "Unit": "cpu_atom" @@ -1156,7 +1129,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0x03", "EventName": "LD_BLOCKS.DATA_UNKNOWN", - "PublicDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready. Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x1", "Unit": "cpu_atom" @@ -1186,7 +1158,7 @@ "Counter": "0,1,2,3", "EventCode": "0x4c", "EventName": "LOAD_HIT_PREFETCH.SWPF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", + "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", "SampleAfterValue": "100003", "UMask": "0x1", "Unit": "cpu_core" @@ -1306,7 +1278,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xe4", "EventName": "MISC_RETIRED.LBR_INSERTS", - "PublicDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL. This event is PDIR on GP0 and NPEBS on all other GPs [This event is alias to LBR_INSERTS.ANY] Available PDIST counters: 0", + "PublicDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL. This event is PDIR on GP0 and NPEBS on all other GPs [This event is alias to LBR_INSERTS.ANY]", "SampleAfterValue": "1000003", "UMask": "0x1", "Unit": "cpu_atom" @@ -1681,7 +1653,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "TOPDOWN_RETIRING.ALL", - "PublicDescription": "Counts the total number of consumed retirement slots. Available PDIST counters: 0", "SampleAfterValue": "1000003", "Unit": "cpu_atom" }, @@ -1933,7 +1904,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.ALL", - "PublicDescription": "Counts the total number of uops retired. Available PDIST counters: 0", "SampleAfterValue": "2000003", "Unit": "cpu_atom" }, @@ -1963,7 +1933,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.IDIV", - "PublicDescription": "Counts the number of integer divide uops retired. Available PDIST counters: 0", "SampleAfterValue": "2000003", "UMask": "0x10", "Unit": "cpu_atom" @@ -1973,7 +1942,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.MS", - "PublicDescription": "Counts the number of uops that are from complex flows issued by the Microcode Sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows. Available PDIST counters: 0", + "PublicDescription": "Counts the number of uops that are from complex flows issued by the Microcode Sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.", "SampleAfterValue": "2000003", "UMask": "0x1", "Unit": "cpu_atom" @@ -2030,7 +1999,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.X87", - "PublicDescription": "Counts the number of x87 uops retired, includes those in MS flows. Available PDIST counters: 0", "SampleAfterValue": "2000003", "UMask": "0x2", "Unit": "cpu_atom" diff --git a/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json index 3d15275eca61..ffbbd08acc68 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json @@ -266,7 +266,6 @@ "Deprecated": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.DTLB_MISS", - "PublicDescription": "This event is deprecated. Refer to new event MEM_UOPS_RETIRED.STLB_MISS Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x13", "Unit": "cpu_atom" @@ -278,7 +277,6 @@ "Deprecated": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS", - "PublicDescription": "This event is deprecated. Refer to new event MEM_UOPS_RETIRED.STLB_MISS_LOADS Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x11", "Unit": "cpu_atom" @@ -290,7 +288,6 @@ "Deprecated": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_STORES", - "PublicDescription": "This event is deprecated. Refer to new event MEM_UOPS_RETIRED.STLB_MISS_STORES Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x12", "Unit": "cpu_atom" diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index bde2f32423a1..35c5a4088356 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -1,5 +1,5 @@ Family-model,Version,Filename,EventType -GenuineIntel-6-(97|9A|B7|BA|BF),v1.29,alderlake,core +GenuineIntel-6-(97|9A|B7|BA|BF),v1.31,alderlake,core GenuineIntel-6-BE,v1.29,alderlaken,core GenuineIntel-6-C[56],v1.08,arrowlake,core GenuineIntel-6-(1C|26|27|35|36),v5,bonnell,core From e393a7b9202b0958dca0398732f4e38869a71668 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 30 Jun 2025 09:30:47 -0700 Subject: [PATCH 0554/2411] perf vendor events: Update AlderlakeN events Update events from v1.29 to v1.31. Bring in the event updates v1.31: https://github.com/intel/perfmon/commit/5a1269c8af70e32a548e74e1fda736189c398ddc https://github.com/intel/perfmon/commit/76c6d2c348c067e9ae1b616b35ee982da6d873b4 Signed-off-by: Ian Rogers Tested-by: Thomas Falcon Link: https://lore.kernel.org/r/20250630163101.1920170-3-irogers@google.com Signed-off-by: Namhyung Kim --- .../pmu-events/arch/x86/alderlaken/cache.json | 52 +++++++------------ .../arch/x86/alderlaken/floating-point.json | 1 - .../pmu-events/arch/x86/alderlaken/other.json | 1 - .../arch/x86/alderlaken/pipeline.json | 42 ++------------- .../arch/x86/alderlaken/virtual-memory.json | 3 -- tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- 6 files changed, 25 insertions(+), 76 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/cache.json b/tools/perf/pmu-events/arch/x86/alderlaken/cache.json index bf691aee1ef4..669f4979b651 100644 --- a/tools/perf/pmu-events/arch/x86/alderlaken/cache.json +++ b/tools/perf/pmu-events/arch/x86/alderlaken/cache.json @@ -118,7 +118,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT", - "PublicDescription": "Counts the number of load uops retired that hit in DRAM. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x80" }, @@ -128,7 +127,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.HITM", - "PublicDescription": "Counts the number of load uops retired that hit in the L3 cache, in which a snoop was required and modified data was forwarded from another core or module. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x20" }, @@ -138,7 +136,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", - "PublicDescription": "Counts the number of load uops retired that hit in the L1 data cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x1" }, @@ -148,7 +145,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", - "PublicDescription": "Counts the number of load uops retired that miss in the L1 data cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x8" }, @@ -158,7 +154,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", - "PublicDescription": "Counts the number of load uops retired that hit in the L2 cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x2" }, @@ -168,7 +163,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", - "PublicDescription": "Counts the number of load uops retired that miss in the L2 cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x10" }, @@ -178,7 +172,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", - "PublicDescription": "Counts the number of load uops retired that hit in the L3 cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x4" }, @@ -188,7 +181,6 @@ "Data_LA": "1", "EventCode": "0xd2", "EventName": "MEM_LOAD_UOPS_RETIRED_MISC.HIT_E_F", - "PublicDescription": "Counts the number of load uops retired that hit in the L3 cache, in which a snoop was required, and non-modified data was forwarded. Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x40" }, @@ -198,7 +190,6 @@ "Data_LA": "1", "EventCode": "0xd2", "EventName": "MEM_LOAD_UOPS_RETIRED_MISC.L3_MISS", - "PublicDescription": "Counts the number of load uops retired that miss in the L3 cache. Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x20" }, @@ -240,7 +231,7 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", - "PublicDescription": "Counts the total number of load uops retired. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of load uops retired.", "SampleAfterValue": "200003", "UMask": "0x81" }, @@ -250,103 +241,103 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", - "PublicDescription": "Counts the total number of store uops retired. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of store uops retired.", "SampleAfterValue": "200003", "UMask": "0x82" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128", "MSRIndex": "0x3F6", "MSRValue": "0x80", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16", "MSRIndex": "0x3F6", "MSRValue": "0x10", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256", "MSRIndex": "0x3F6", "MSRValue": "0x100", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32", "MSRIndex": "0x3F6", "MSRValue": "0x20", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4", "MSRIndex": "0x3F6", "MSRValue": "0x4", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512", "MSRIndex": "0x3F6", "MSRValue": "0x200", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64", "MSRIndex": "0x3F6", "MSRValue": "0x40", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8", "MSRIndex": "0x3F6", "MSRValue": "0x8", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5" }, @@ -356,7 +347,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", - "PublicDescription": "Counts the number of load uops retired that performed one or more locks. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x21" }, @@ -366,7 +356,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", - "PublicDescription": "Counts the number of retired split load uops. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x41" }, @@ -376,7 +365,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.STLB_MISS", - "PublicDescription": "Counts the total number of load and store uops retired that missed in the second level TLB. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x13" }, @@ -386,7 +374,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", - "PublicDescription": "Counts the number of load ops retired that miss in the second Level TLB. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x11" }, @@ -396,7 +383,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", - "PublicDescription": "Counts the number of store ops retired that miss in the second level TLB. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x12" }, @@ -406,7 +392,7 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY", - "PublicDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled. If PEBS is enabled and a PEBS record is generated, will populate PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled. If PEBS is enabled and a PEBS record is generated, will populate PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x6" }, diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json b/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json index f44da31ff1f1..ed963fcb6485 100644 --- a/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json @@ -29,7 +29,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.FPDIV", - "PublicDescription": "Counts the number of floating point divide uops retired (x87 and SSE, including x87 sqrt). Available PDIST counters: 0", "SampleAfterValue": "2000003", "UMask": "0x8" } diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/other.json b/tools/perf/pmu-events/arch/x86/alderlaken/other.json index 8c2b5a284f2a..144d7b06f240 100644 --- a/tools/perf/pmu-events/arch/x86/alderlaken/other.json +++ b/tools/perf/pmu-events/arch/x86/alderlaken/other.json @@ -5,7 +5,6 @@ "Deprecated": "1", "EventCode": "0xe4", "EventName": "LBR_INSERTS.ANY", - "PublicDescription": "This event is deprecated. [This event is alias to MISC_RETIRED.LBR_INSERTS] Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x1" }, diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json b/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json index 9616bf0e9f1f..1dd61baec1a9 100644 --- a/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json @@ -54,7 +54,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.", "SampleAfterValue": "200003" }, { @@ -63,7 +63,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.CALL", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_CALL Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf9" }, @@ -72,7 +71,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.COND", - "PublicDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x7e" }, @@ -81,7 +79,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.COND_TAKEN", - "PublicDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfe" }, @@ -90,7 +87,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.FAR_BRANCH", - "PublicDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xbf" }, @@ -99,7 +95,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.INDIRECT", - "PublicDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xeb" }, @@ -108,7 +103,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.INDIRECT_CALL", - "PublicDescription": "Counts the number of near indirect CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfb" }, @@ -118,7 +112,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.IND_CALL", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfb" }, @@ -128,7 +121,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.JCC", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x7e" }, @@ -137,7 +129,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_CALL", - "PublicDescription": "Counts the number of near CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf9" }, @@ -146,7 +137,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "PublicDescription": "Counts the number of near RET branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf7" }, @@ -155,7 +145,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_TAKEN", - "PublicDescription": "Counts the number of near taken branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xc0" }, @@ -165,7 +154,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NON_RETURN_IND", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xeb" }, @@ -174,7 +162,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.REL_CALL", - "PublicDescription": "Counts the number of near relative CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfd" }, @@ -184,7 +171,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.RETURN", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_RETURN Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf7" }, @@ -194,7 +180,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.TAKEN_JCC", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND_TAKEN Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfe" }, @@ -203,7 +188,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.", "SampleAfterValue": "200003" }, { @@ -211,7 +196,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.COND", - "PublicDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x7e" }, @@ -220,7 +204,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.COND_TAKEN", - "PublicDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfe" }, @@ -229,7 +212,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.INDIRECT", - "PublicDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xeb" }, @@ -238,7 +220,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.INDIRECT_CALL", - "PublicDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfb" }, @@ -248,7 +229,6 @@ "Deprecated": "1", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.IND_CALL", - "PublicDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT_CALL Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfb" }, @@ -258,7 +238,6 @@ "Deprecated": "1", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.JCC", - "PublicDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x7e" }, @@ -267,7 +246,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "PublicDescription": "Counts the number of mispredicted near taken branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x80" }, @@ -277,7 +255,6 @@ "Deprecated": "1", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.NON_RETURN_IND", - "PublicDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xeb" }, @@ -286,7 +263,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.RETURN", - "PublicDescription": "Counts the number of mispredicted near RET branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf7" }, @@ -296,7 +272,6 @@ "Deprecated": "1", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.TAKEN_JCC", - "PublicDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND_TAKEN Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfe" }, @@ -371,7 +346,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc0", "EventName": "INST_RETIRED.ANY_P", - "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses a programmable general purpose performance counter. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses a programmable general purpose performance counter.", "SampleAfterValue": "2000003" }, { @@ -380,7 +355,6 @@ "Deprecated": "1", "EventCode": "0x03", "EventName": "LD_BLOCKS.4K_ALIAS", - "PublicDescription": "This event is deprecated. Refer to new event LD_BLOCKS.ADDRESS_ALIAS Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x4" }, @@ -389,7 +363,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0x03", "EventName": "LD_BLOCKS.ADDRESS_ALIAS", - "PublicDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check. Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x4" }, @@ -398,7 +371,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0x03", "EventName": "LD_BLOCKS.DATA_UNKNOWN", - "PublicDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready. Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x1" }, @@ -448,7 +420,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xe4", "EventName": "MISC_RETIRED.LBR_INSERTS", - "PublicDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL. This event is PDIR on GP0 and NPEBS on all other GPs [This event is alias to LBR_INSERTS.ANY] Available PDIST counters: 0", + "PublicDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL. This event is PDIR on GP0 and NPEBS on all other GPs [This event is alias to LBR_INSERTS.ANY]", "SampleAfterValue": "1000003", "UMask": "0x1" }, @@ -651,7 +623,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "TOPDOWN_RETIRING.ALL", - "PublicDescription": "Counts the total number of consumed retirement slots. Available PDIST counters: 0", "SampleAfterValue": "1000003" }, { @@ -667,7 +638,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.ALL", - "PublicDescription": "Counts the total number of uops retired. Available PDIST counters: 0", "SampleAfterValue": "2000003" }, { @@ -675,7 +645,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.IDIV", - "PublicDescription": "Counts the number of integer divide uops retired. Available PDIST counters: 0", "SampleAfterValue": "2000003", "UMask": "0x10" }, @@ -684,7 +653,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.MS", - "PublicDescription": "Counts the number of uops that are from complex flows issued by the Microcode Sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows. Available PDIST counters: 0", + "PublicDescription": "Counts the number of uops that are from complex flows issued by the Microcode Sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.", "SampleAfterValue": "2000003", "UMask": "0x1" }, @@ -693,7 +662,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.X87", - "PublicDescription": "Counts the number of x87 uops retired, includes those in MS flows. Available PDIST counters: 0", "SampleAfterValue": "2000003", "UMask": "0x2" } diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json b/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json index c348046696bf..d9c737a17df0 100644 --- a/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json @@ -57,7 +57,6 @@ "Deprecated": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.DTLB_MISS", - "PublicDescription": "This event is deprecated. Refer to new event MEM_UOPS_RETIRED.STLB_MISS Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x13" }, @@ -68,7 +67,6 @@ "Deprecated": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS", - "PublicDescription": "This event is deprecated. Refer to new event MEM_UOPS_RETIRED.STLB_MISS_LOADS Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x11" }, @@ -79,7 +77,6 @@ "Deprecated": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_STORES", - "PublicDescription": "This event is deprecated. Refer to new event MEM_UOPS_RETIRED.STLB_MISS_STORES Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x12" } diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 35c5a4088356..8a2ee64cad7e 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -1,6 +1,6 @@ Family-model,Version,Filename,EventType GenuineIntel-6-(97|9A|B7|BA|BF),v1.31,alderlake,core -GenuineIntel-6-BE,v1.29,alderlaken,core +GenuineIntel-6-BE,v1.31,alderlaken,core GenuineIntel-6-C[56],v1.08,arrowlake,core GenuineIntel-6-(1C|26|27|35|36),v5,bonnell,core GenuineIntel-6-(3D|47),v30,broadwell,core From e7c38d634cad1c71220767375c6e276de72c2dbf Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 30 Jun 2025 09:30:48 -0700 Subject: [PATCH 0555/2411] perf vendor events: Update Arrowlake events Update events from v1.08 to v1.09. Bring in the event updates v1.09: https://github.com/intel/perfmon/commit/cf3be6daf0a751ad270b67890dfdb2261dfc75da Signed-off-by: Ian Rogers Tested-by: Thomas Falcon Link: https://lore.kernel.org/r/20250630163101.1920170-4-irogers@google.com Signed-off-by: Namhyung Kim --- .../pmu-events/arch/x86/arrowlake/cache.json | 13 +- .../arch/x86/arrowlake/frontend.json | 135 ++++++++++++++++++ tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- 3 files changed, 148 insertions(+), 2 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/cache.json b/tools/perf/pmu-events/arch/x86/arrowlake/cache.json index 70175404540d..91929d8bcf47 100644 --- a/tools/perf/pmu-events/arch/x86/arrowlake/cache.json +++ b/tools/perf/pmu-events/arch/x86/arrowlake/cache.json @@ -237,7 +237,7 @@ "Unit": "cpu_lowpower" }, { - "BriefDescription": "Counts the number of L2 Cache Accesses that miss the L2 and get BBL reject short and long rejects (includes those counted in L2_reject_XQ.any), per core event", + "BriefDescription": "Counts the number of L2 Cache Accesses that miss the L2 and get BBL reject short and long rejects, per core event", "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x24", "EventName": "L2_REQUEST.REJECTS", @@ -728,6 +728,17 @@ "EventName": "MEM_LOAD_RETIRED.L1_HIT", "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0", "SampleAfterValue": "1000003", + "UMask": "0x101", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts retired load instructions with at least one uop that hit in the Level 0 of the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", + "Counter": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L1_HIT_L0", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 0 of the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0", + "SampleAfterValue": "1000003", "UMask": "0x1", "Unit": "cpu_core" }, diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/frontend.json b/tools/perf/pmu-events/arch/x86/arrowlake/frontend.json index 67cc83de18d3..56cf1ec63200 100644 --- a/tools/perf/pmu-events/arch/x86/arrowlake/frontend.json +++ b/tools/perf/pmu-events/arch/x86/arrowlake/frontend.json @@ -58,6 +58,22 @@ "UMask": "0x2", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged with having preceded with frontend bound behavior", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ALL", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged with having preceded with frontend bound behavior", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ALL", + "SampleAfterValue": "1000003", + "Unit": "cpu_lowpower" + }, { "BriefDescription": "Retired ANT branches", "Counter": "0,1,2,3,4,5,6,7,8,9", @@ -82,6 +98,80 @@ "UMask": "0x3", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles/empty issue slots due to a baclear", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.BRANCH_DETECT", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles/empty issue slots due to a baclear", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.BRANCH_DETECT", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_lowpower" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles /empty issue slots due to a btclear", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.BRANCH_RESTEER", + "SampleAfterValue": "1000003", + "UMask": "0x40", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles /empty issue slots due to a btclear", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.BRANCH_RESTEER", + "SampleAfterValue": "1000003", + "UMask": "0x40", + "Unit": "cpu_lowpower" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged following an ms flow due to the bubble/wasted issue slot from exiting long ms flow", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.CISC", + "PublicDescription": "Counts the number of instructions retired that were tagged following an ms flow due to the bubble/wasted issue slot from exiting long ms flow", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged following an ms flow due to the bubble/wasted issue slot from exiting long ms flow", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.CISC", + "PublicDescription": "Counts the number of instructions retired that were tagged following an ms flow due to the bubble/wasted issue slot from exiting long ms flow", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_lowpower" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged every cycle the decoder is unable to send 4 uops", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.DECODE", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged every cycle the decoder is unable to send 3 uops per cycle.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.DECODE", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_lowpower" + }, { "BriefDescription": "Retired Instructions who experienced a critical DSB miss.", "Counter": "0,1,2,3,4,5,6,7,8,9", @@ -103,6 +193,15 @@ "UMask": "0x20", "Unit": "cpu_atom" }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to icache miss", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ICACHE", + "SampleAfterValue": "1000003", + "UMask": "0x20", + "Unit": "cpu_lowpower" + }, { "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss", "Counter": "0,1,2,3,4,5,6,7", @@ -301,6 +400,42 @@ "UMask": "0x3", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of instruction retired tagged after a wasted issue slot if none of the previous events occurred", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.OTHER", + "SampleAfterValue": "1000003", + "UMask": "0x80", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instruction retired tagged after a wasted issue slot if none of the previous events occurred", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.OTHER", + "SampleAfterValue": "1000003", + "UMask": "0x80", + "Unit": "cpu_lowpower" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles/empty issue slots due to a predecode wrong", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.PREDECODE", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles/empty issue slots due to a predecode wrong.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.PREDECODE", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_lowpower" + }, { "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.", "Counter": "0,1,2,3,4,5,6,7,8,9", diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 8a2ee64cad7e..b2db2bb658ce 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -1,7 +1,7 @@ Family-model,Version,Filename,EventType GenuineIntel-6-(97|9A|B7|BA|BF),v1.31,alderlake,core GenuineIntel-6-BE,v1.31,alderlaken,core -GenuineIntel-6-C[56],v1.08,arrowlake,core +GenuineIntel-6-C[56],v1.09,arrowlake,core GenuineIntel-6-(1C|26|27|35|36),v5,bonnell,core GenuineIntel-6-(3D|47),v30,broadwell,core GenuineIntel-6-56,v12,broadwellde,core From 73a33656896f87962c47462ff8ebda03d9094e0a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 30 Jun 2025 09:30:49 -0700 Subject: [PATCH 0556/2411] perf vendor events: Update CascadelakeX events Update events from v1.23 to v1.25. Bring in the event updates v1.25: https://github.com/intel/perfmon/commit/86f146e15626b0fd3b032cab4538cafaaf2d0635 https://github.com/intel/perfmon/commit/fef03ffc333ae44d1e9d695b4e67e5bbb4429729 Signed-off-by: Ian Rogers Tested-by: Thomas Falcon Link: https://lore.kernel.org/r/20250630163101.1920170-5-irogers@google.com Signed-off-by: Namhyung Kim --- .../pmu-events/arch/x86/cascadelakex/floating-point.json | 6 +++--- tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json | 2 +- tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/floating-point.json b/tools/perf/pmu-events/arch/x86/cascadelakex/floating-point.json index 1c709983b65f..3ef6f00f1135 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/floating-point.json @@ -111,7 +111,7 @@ "Counter": "0,1,2,3", "EventCode": "0xCF", "EventName": "FP_ARITH_INST_RETIRED2.128BIT_PACKED_BF16", - "PublicDescription": "Counts once for each Intel AVX-512 computational 512-bit packed BFloat16 floating-point instruction retired. Applies to the ZMM based VDPBF16PS instruction. Each count represents 64 computation operations. This event is only supported on products formerly named Cooper Lake and is not supported on products formerly named Cascade Lake.", + "PublicDescription": "Counts once for each Intel AVX-512 computational 128-bit packed BFloat16 floating-point instruction retired. Applies to the XMM based VDPBF16PS instruction. Each count represents 16 computation operations. This event is only supported on products formerly named Cooper Lake and is not supported on products formerly named Cascade Lake.", "SampleAfterValue": "2000003", "UMask": "0x20" }, @@ -120,7 +120,7 @@ "Counter": "0,1,2,3", "EventCode": "0xCF", "EventName": "FP_ARITH_INST_RETIRED2.256BIT_PACKED_BF16", - "PublicDescription": "Counts once for each Intel AVX-512 computational 128-bit packed BFloat16 floating-point instruction retired. Applies to the XMM based VDPBF16PS instruction. Each count represents 16 computation operations. This event is only supported on products formerly named Cooper Lake and is not supported on products formerly named Cascade Lake.", + "PublicDescription": "Counts once for each Intel AVX-512 computational 256-bit packed BFloat16 floating-point instruction retired. Applies to the YMM based VDPBF16PS instruction. Each count represents 32 computation operations. This event is only supported on products formerly named Cooper Lake and is not supported on products formerly named Cascade Lake.", "SampleAfterValue": "2000003", "UMask": "0x40" }, @@ -129,7 +129,7 @@ "Counter": "0,1,2,3", "EventCode": "0xCF", "EventName": "FP_ARITH_INST_RETIRED2.512BIT_PACKED_BF16", - "PublicDescription": "Counts once for each Intel AVX-512 computational 256-bit packed BFloat16 floating-point instruction retired. Applies to the YMM based VDPBF16PS instruction. Each count represents 32 computation operations. This event is only supported on products formerly named Cooper Lake and is not supported on products formerly named Cascade Lake.", + "PublicDescription": "Counts once for each Intel AVX-512 computational 512-bit packed BFloat16 floating-point instruction retired. Applies to the ZMM based VDPBF16PS instruction. Each count represents 64 computation operations. This event is only supported on products formerly named Cooper Lake and is not supported on products formerly named Cascade Lake.", "SampleAfterValue": "2000003", "UMask": "0x80" }, diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json index 3dd296ab4d78..9a1349527b66 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json @@ -542,7 +542,7 @@ "Counter": "0,1,2,3", "EventCode": "0x4C", "EventName": "LOAD_HIT_PRE.SW_PF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", + "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", "SampleAfterValue": "100003", "UMask": "0x1" }, diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index b2db2bb658ce..9a60e95a2e15 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -6,7 +6,7 @@ GenuineIntel-6-(1C|26|27|35|36),v5,bonnell,core GenuineIntel-6-(3D|47),v30,broadwell,core GenuineIntel-6-56,v12,broadwellde,core GenuineIntel-6-4F,v23,broadwellx,core -GenuineIntel-6-55-[56789ABCDEF],v1.23,cascadelakex,core +GenuineIntel-6-55-[56789ABCDEF],v1.25,cascadelakex,core GenuineIntel-6-DD,v1.00,clearwaterforest,core GenuineIntel-6-9[6C],v1.05,elkhartlake,core GenuineIntel-6-CF,v1.11,emeraldrapids,core From 31c8714cf5b91da62ae549323fc41e32609a5b4b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 30 Jun 2025 09:30:50 -0700 Subject: [PATCH 0557/2411] perf vendor events: Update EmeraldRapids events Update events from v1.11 to v1.14. Bring in the event updates v1.14: https://github.com/intel/perfmon/commit/6f6e4c8c906992b450cb2014d0501a9ec1cda0d0 https://github.com/intel/perfmon/commit/e363f82276c129aec60402a1d64efbbd41af844e Signed-off-by: Ian Rogers Tested-by: Thomas Falcon Link: https://lore.kernel.org/r/20250630163101.1920170-6-irogers@google.com Signed-off-by: Namhyung Kim --- .../arch/x86/emeraldrapids/pipeline.json | 2 +- .../arch/x86/emeraldrapids/uncore-io.json | 12 +++++++++++ .../arch/x86/emeraldrapids/uncore-memory.json | 20 +++++++++++++++++++ tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json index 00b05a77c289..48bec483b49a 100644 --- a/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json @@ -684,7 +684,7 @@ "Counter": "0,1,2,3", "EventCode": "0x4c", "EventName": "LOAD_HIT_PREFETCH.SWPF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", + "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", "SampleAfterValue": "100003", "UMask": "0x1" }, diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json index 94340dee1c9c..d4cf2199d46b 100644 --- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json +++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json @@ -1821,6 +1821,18 @@ "UMask": "0x4", "Unit": "IIO" }, + { + "BriefDescription": "Posted requests sent by the integrated IO (IIO) controller to the Ubox, useful for counting message signaled interrupts (MSI).", + "Counter": "0,1,2,3", + "EventCode": "0x8e", + "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX_POSTED", + "Experimental": "1", + "FCMask": "0x01", + "PerPkg": "1", + "PortMask": "0x00FF", + "UMask": "0x4", + "Unit": "IIO" + }, { "BriefDescription": "ITC address map 1", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-memory.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-memory.json index aa06088dd26f..68be01dad7c9 100644 --- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-memory.json @@ -2145,6 +2145,16 @@ "UMask": "0x1", "Unit": "MCHBM" }, + { + "BriefDescription": "ECC Correctable Errors", + "Counter": "0,1,2,3", + "EventCode": "0x09", + "EventName": "UNC_MCHBM_ECC_CORRECTABLE_ERRORS", + "Experimental": "1", + "PerPkg": "1", + "PublicDescription": "ECC Correctable Errors. Counts the number of ECC errors detected and corrected by the iMC on this channel. This counter is only useful with ECC devices. This count will increment one time for each correction regardless of the number of bits corrected. The iMC can correct up to 4 bit errors in independent channel mode and 8 bit errors in lockstep mode.", + "Unit": "MCHBM" + }, { "BriefDescription": "HBM Precharge All Commands", "Counter": "0,1,2,3", @@ -2759,6 +2769,16 @@ "UMask": "0x3", "Unit": "iMC" }, + { + "BriefDescription": "ECC Correctable Errors", + "Counter": "0,1,2,3", + "EventCode": "0x09", + "EventName": "UNC_M_ECC_CORRECTABLE_ERRORS", + "Experimental": "1", + "PerPkg": "1", + "PublicDescription": "ECC Correctable Errors : Counts the number of ECC errors detected and corrected by the iMC on this channel. This counter is only useful with ECC DRAM devices. This count will increment one time for each correction regardless of the number of bits corrected. The iMC can correct up to 4 bit errors in independent channel mode and 8 bit errors in lockstep mode.", + "Unit": "iMC" + }, { "BriefDescription": "IMC Clockticks at HCLK frequency", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 9a60e95a2e15..e139a099374a 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -9,7 +9,7 @@ GenuineIntel-6-4F,v23,broadwellx,core GenuineIntel-6-55-[56789ABCDEF],v1.25,cascadelakex,core GenuineIntel-6-DD,v1.00,clearwaterforest,core GenuineIntel-6-9[6C],v1.05,elkhartlake,core -GenuineIntel-6-CF,v1.11,emeraldrapids,core +GenuineIntel-6-CF,v1.14,emeraldrapids,core GenuineIntel-6-5[CF],v13,goldmont,core GenuineIntel-6-7A,v1.01,goldmontplus,core GenuineIntel-6-B6,v1.07,grandridge,core From 25da8939d615dc6cac67a57b320b2793691b39aa Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 30 Jun 2025 09:30:51 -0700 Subject: [PATCH 0558/2411] perf vendor events: Update GrandRidge events Update events from v1.07 to v1.09. Bring in the event updates v1.09: https://github.com/intel/perfmon/commit/8c74d09c8544421256a79f4f21e548ad756f5b7f https://github.com/intel/perfmon/commit/18c7d2a75e45eacf5553f900ae2097a1290f5bed Signed-off-by: Ian Rogers Tested-by: Thomas Falcon Link: https://lore.kernel.org/r/20250630163101.1920170-7-irogers@google.com Signed-off-by: Namhyung Kim --- .../arch/x86/grandridge/grr-metrics.json | 30 +++++++++++++++---- .../x86/grandridge/uncore-interconnect.json | 10 +++++++ .../arch/x86/grandridge/uncore-io.json | 12 ++++++++ tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- 4 files changed, 47 insertions(+), 7 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/grandridge/grr-metrics.json b/tools/perf/pmu-events/arch/x86/grandridge/grr-metrics.json index 1c6dba7b2822..878b1caf12de 100644 --- a/tools/perf/pmu-events/arch/x86/grandridge/grr-metrics.json +++ b/tools/perf/pmu-events/arch/x86/grandridge/grr-metrics.json @@ -106,6 +106,30 @@ "MetricName": "io_bandwidth_write", "ScaleUnit": "1MB/s" }, + { + "BriefDescription": "The percent of inbound full cache line writes initiated by IO that miss the L3 cache", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM / UNC_CHA_TOR_INSERTS.IO_ITOM", + "MetricName": "io_full_write_l3_miss", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Message Signaled Interrupts (MSI) per second sent by the integrated I/O traffic controller (IIO) to System Configuration Controller (Ubox)", + "MetricExpr": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX_POSTED / duration_time", + "MetricName": "io_msi", + "ScaleUnit": "1per_sec" + }, + { + "BriefDescription": "The percent of inbound partial writes initiated by IO that miss the L3 cache", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_RFO) / (UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_RFO)", + "MetricName": "io_partial_write_l3_miss", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "The percent of inbound reads initiated by IO that miss the L3 cache", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR / UNC_CHA_TOR_INSERTS.IO_PCIRDCUR", + "MetricName": "io_read_l3_miss", + "ScaleUnit": "100%" + }, { "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions", "MetricExpr": "ITLB_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY", @@ -162,12 +186,6 @@ "MetricName": "llc_data_read_mpi_demand_plus_prefetch", "ScaleUnit": "1per_instr" }, - { - "BriefDescription": "Average latency of a last level cache (LLC) demand data read miss (read memory access) in nano seconds", - "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT) / (UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT) * #num_packages)) * duration_time", - "MetricName": "llc_demand_data_read_miss_latency", - "ScaleUnit": "1ns" - }, { "BriefDescription": "Load operations retired per instruction", "MetricExpr": "MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY", diff --git a/tools/perf/pmu-events/arch/x86/grandridge/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/grandridge/uncore-interconnect.json index 2c18767511f3..c7250332d8aa 100644 --- a/tools/perf/pmu-events/arch/x86/grandridge/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/grandridge/uncore-interconnect.json @@ -261,5 +261,15 @@ "PerPkg": "1", "UMask": "0x8", "Unit": "IRP" + }, + { + "BriefDescription": "Message Received : MSI", + "Counter": "0,1", + "EventCode": "0x42", + "EventName": "UNC_U_EVENT_MSG.MSI_RCVD", + "PerPkg": "1", + "PublicDescription": "Message Received : MSI : Message Signaled Interrupts - interrupts sent by devices (including PCIe via IOxAPIC) (Socket Mode only)", + "UMask": "0x2", + "Unit": "UBOX" } ] diff --git a/tools/perf/pmu-events/arch/x86/grandridge/uncore-io.json b/tools/perf/pmu-events/arch/x86/grandridge/uncore-io.json index c5b05c71c56d..764cf2f0b4a8 100644 --- a/tools/perf/pmu-events/arch/x86/grandridge/uncore-io.json +++ b/tools/perf/pmu-events/arch/x86/grandridge/uncore-io.json @@ -907,6 +907,18 @@ "UMask": "0x4", "Unit": "IIO" }, + { + "BriefDescription": "Posted requests sent by the integrated IO (IIO) controller to the Ubox, useful for counting message signaled interrupts (MSI).", + "Counter": "0,1,2,3", + "EventCode": "0x8e", + "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX_POSTED", + "FCMask": "0x01", + "PerPkg": "1", + "PortMask": "0x0FF", + "PublicDescription": "-", + "UMask": "0x4", + "Unit": "IIO" + }, { "BriefDescription": "All 9 bits of Page Walk Tracker Occupancy", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index e139a099374a..f3fe686b6630 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -12,7 +12,7 @@ GenuineIntel-6-9[6C],v1.05,elkhartlake,core GenuineIntel-6-CF,v1.14,emeraldrapids,core GenuineIntel-6-5[CF],v13,goldmont,core GenuineIntel-6-7A,v1.01,goldmontplus,core -GenuineIntel-6-B6,v1.07,grandridge,core +GenuineIntel-6-B6,v1.09,grandridge,core GenuineIntel-6-A[DE],v1.08,graniterapids,core GenuineIntel-6-(3C|45|46),v36,haswell,core GenuineIntel-6-3F,v29,haswellx,core From 81699249168750fd7bf9101d2e98b5133d5c23f2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 30 Jun 2025 09:30:52 -0700 Subject: [PATCH 0559/2411] perf vendor events: Update GraniteRapids events Update events from v1.08 to v1.10. Bring in the event updates v1.10 https://github.com/intel/perfmon/commit/96259a932e2ce5f70ed7d347ca92fdeb78f83aa5 https://github.com/intel/perfmon/commit/19e315c8d2e0b44e170a6e60de44c9359062a6aa Signed-off-by: Ian Rogers Tested-by: Thomas Falcon Link: https://lore.kernel.org/r/20250630163101.1920170-8-irogers@google.com Signed-off-by: Namhyung Kim --- .../arch/x86/graniterapids/cache.json | 9 +++++ .../arch/x86/graniterapids/counter.json | 10 +++--- .../arch/x86/graniterapids/gnr-metrics.json | 36 +++++++++++++++++++ .../arch/x86/graniterapids/pipeline.json | 2 +- .../graniterapids/uncore-interconnect.json | 19 ---------- .../arch/x86/graniterapids/uncore-io.json | 27 +++++++++++++- tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- 7 files changed, 78 insertions(+), 27 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/cache.json b/tools/perf/pmu-events/arch/x86/graniterapids/cache.json index 32f99a8a3871..dbdeade6fe6f 100644 --- a/tools/perf/pmu-events/arch/x86/graniterapids/cache.json +++ b/tools/perf/pmu-events/arch/x86/graniterapids/cache.json @@ -977,6 +977,15 @@ "SampleAfterValue": "100003", "UMask": "0x4" }, + { + "BriefDescription": "Offcore Uncacheable memory data read transactions.", + "Counter": "0,1,2,3", + "EventCode": "0x21", + "EventName": "OFFCORE_REQUESTS.MEM_UC", + "PublicDescription": "This event counts noncacheable memory data read transactions. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x20" + }, { "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/counter.json b/tools/perf/pmu-events/arch/x86/graniterapids/counter.json index 5d3b202eadd3..d97211a0227e 100644 --- a/tools/perf/pmu-events/arch/x86/graniterapids/counter.json +++ b/tools/perf/pmu-events/arch/x86/graniterapids/counter.json @@ -59,6 +59,11 @@ "CountersNumFixed": "0", "CountersNumGeneric": 4 }, + { + "Unit": "UBOX", + "CountersNumFixed": "0", + "CountersNumGeneric": "2" + }, { "Unit": "PCU", "CountersNumFixed": "0", @@ -73,10 +78,5 @@ "Unit": "MDF", "CountersNumFixed": "0", "CountersNumGeneric": "4" - }, - { - "Unit": "UBOX", - "CountersNumFixed": "0", - "CountersNumGeneric": "2" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json b/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json index af527f7f9d0c..9a620e1b8de8 100644 --- a/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json +++ b/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json @@ -95,6 +95,12 @@ "MetricName": "io_bandwidth_read", "ScaleUnit": "1MB/s" }, + { + "BriefDescription": "Bandwidth of inbound IO reads that are initiated by end device controllers that are requesting memory from the CPU and miss the L3 cache", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR * 64 / 1e6 / duration_time", + "MetricName": "io_bandwidth_read_l3_miss", + "ScaleUnit": "1MB/s" + }, { "BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the local CPU socket", "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_LOCAL * 64 / 1e6 / duration_time", @@ -113,6 +119,12 @@ "MetricName": "io_bandwidth_write", "ScaleUnit": "1MB/s" }, + { + "BriefDescription": "Bandwidth of inbound IO writes that are initiated by end device controllers that are writing memory to the CPU", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOM + UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR) * 64 / 1e6 / duration_time", + "MetricName": "io_bandwidth_write_l3_miss", + "ScaleUnit": "1MB/s" + }, { "BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the local CPU socket", "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_ITOM_LOCAL + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_LOCAL) * 64 / 1e6 / duration_time", @@ -125,6 +137,30 @@ "MetricName": "io_bandwidth_write_remote", "ScaleUnit": "1MB/s" }, + { + "BriefDescription": "The percent of inbound full cache line writes initiated by IO that miss the L3 cache", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM / UNC_CHA_TOR_INSERTS.IO_ITOM", + "MetricName": "io_full_write_l3_miss", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Message Signaled Interrupts (MSI) per second sent by the integrated I/O traffic controller (IIO) to System Configuration Controller (Ubox)", + "MetricExpr": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX_POSTED / duration_time", + "MetricName": "io_msi", + "ScaleUnit": "1per_sec" + }, + { + "BriefDescription": "The percent of inbound partial writes initiated by IO that miss the L3 cache", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_RFO) / (UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_RFO)", + "MetricName": "io_partial_write_l3_miss", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "The percent of inbound reads initiated by IO that miss the L3 cache", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR / UNC_CHA_TOR_INSERTS.IO_PCIRDCUR", + "MetricName": "io_read_l3_miss", + "ScaleUnit": "100%" + }, { "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions", "MetricExpr": "ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY", diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json b/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json index 1edfdad1600d..27af3bd6bacf 100644 --- a/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json @@ -738,7 +738,7 @@ "Counter": "0,1,2,3", "EventCode": "0x4c", "EventName": "LOAD_HIT_PREFETCH.SWPF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", + "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", "SampleAfterValue": "100003", "UMask": "0x1" }, diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json index e5bd11b27bcd..6667fbc50452 100644 --- a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json @@ -1915,24 +1915,6 @@ "UMask": "0x4", "Unit": "UPI" }, - { - "BriefDescription": "Tx Flit Buffer Allocations : Number of allocations into the UPI Tx Flit Buffer. Generally, when data is transmitted across UPI, it will bypass the TxQ and pass directly to the link. However, the TxQ will be used with L0p and when LLR occurs, increasing latency to transfer out to the link. This event can be used in conjunction with the Flit Buffer Occupancy event in order to calculate the average flit buffer lifetime.", - "Counter": "0,1,2,3", - "EventCode": "0x40", - "EventName": "UNC_UPI_TxL_INSERTS", - "Experimental": "1", - "PerPkg": "1", - "Unit": "UPI" - }, - { - "BriefDescription": "Tx Flit Buffer Occupancy : Accumulates the number of flits in the TxQ. Generally, when data is transmitted across UPI, it will bypass the TxQ and pass directly to the link. However, the TxQ will be used with L0p and when LLR occurs, increasing latency to transfer out to the link. This can be used with the cycles not empty event to track average occupancy, or the allocations event to track average lifetime in the TxQ.", - "Counter": "0,1,2,3", - "EventCode": "0x42", - "EventName": "UNC_UPI_TxL_OCCUPANCY", - "Experimental": "1", - "PerPkg": "1", - "Unit": "UPI" - }, { "BriefDescription": "Message Received : Doorbell", "Counter": "0,1", @@ -1970,7 +1952,6 @@ "Counter": "0,1", "EventCode": "0x42", "EventName": "UNC_U_EVENT_MSG.MSI_RCVD", - "Experimental": "1", "PerPkg": "1", "PublicDescription": "Message Received : MSI : Message Signaled Interrupts - interrupts sent by devices (including PCIe via IOxAPIC) (Socket Mode only)", "UMask": "0x2", diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json index 886b99a971be..f4f956966e16 100644 --- a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json +++ b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json @@ -1121,8 +1121,9 @@ "Unit": "IIO" }, { - "BriefDescription": "Occupancy of outbound request queue : To device : Counts number of outbound requests/completions IIO is currently processing", + "BriefDescription": "This event is deprecated. [This event is alias to UNC_IIO_NUM_OUTSTANDING_REQ_FROM_CPU.TO_IO]", "Counter": "2,3", + "Deprecated": "1", "EventCode": "0xc5", "EventName": "UNC_IIO_NUM_OUSTANDING_REQ_FROM_CPU.TO_IO", "Experimental": "1", @@ -1132,6 +1133,18 @@ "UMask": "0x8", "Unit": "IIO" }, + { + "BriefDescription": "Occupancy of outbound request queue : To device : Counts number of outbound requests/completions IIO is currently processing [This event is alias to UNC_IIO_NUM_OUSTANDING_REQ_FROM_CPU.TO_IO]", + "Counter": "2,3", + "EventCode": "0xc5", + "EventName": "UNC_IIO_NUM_OUTSTANDING_REQ_FROM_CPU.TO_IO", + "Experimental": "1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x0FF", + "UMask": "0x8", + "Unit": "IIO" + }, { "BriefDescription": "Passing data to be written", "Counter": "0,1,2,3", @@ -1300,6 +1313,18 @@ "UMask": "0x4", "Unit": "IIO" }, + { + "BriefDescription": "Posted requests sent by the integrated IO (IIO) controller to the Ubox, useful for counting message signaled interrupts (MSI).", + "Counter": "0,1,2,3", + "EventCode": "0x8e", + "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX_POSTED", + "FCMask": "0x01", + "PerPkg": "1", + "PortMask": "0x0FF", + "PublicDescription": "-", + "UMask": "0x4", + "Unit": "IIO" + }, { "BriefDescription": "All 9 bits of Page Walk Tracker Occupancy", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index f3fe686b6630..960076e3f66f 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -13,7 +13,7 @@ GenuineIntel-6-CF,v1.14,emeraldrapids,core GenuineIntel-6-5[CF],v13,goldmont,core GenuineIntel-6-7A,v1.01,goldmontplus,core GenuineIntel-6-B6,v1.09,grandridge,core -GenuineIntel-6-A[DE],v1.08,graniterapids,core +GenuineIntel-6-A[DE],v1.10,graniterapids,core GenuineIntel-6-(3C|45|46),v36,haswell,core GenuineIntel-6-3F,v29,haswellx,core GenuineIntel-6-7[DE],v1.24,icelake,core From 0a6b21da26e22b68a43cc763253a9ad0a8a24c1a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 30 Jun 2025 09:30:53 -0700 Subject: [PATCH 0560/2411] perf vendor events: Update IcelakeX events Update events from v1.27 to v1.28. Bring in the event updates v1.28: https://github.com/intel/perfmon/commit/c52728a46cf37ba271c09b1eb7093cfc82dfbf29 Signed-off-by: Ian Rogers Tested-by: Thomas Falcon Link: https://lore.kernel.org/r/20250630163101.1920170-9-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/pmu-events/arch/x86/icelakex/pipeline.json | 2 +- tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json | 2 -- tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json index f1446f1b67c6..f3a0d7f49af4 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json @@ -477,7 +477,7 @@ "Counter": "0,1,2,3", "EventCode": "0x4c", "EventName": "LOAD_HIT_PREFETCH.SWPF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", + "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", "SampleAfterValue": "100003", "UMask": "0x1" }, diff --git a/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json b/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json index 8c73708befef..6f84ad47276d 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json @@ -8193,7 +8193,6 @@ "Counter": "0,1,2,3", "EventCode": "0x35", "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_RFO", - "Experimental": "1", "PerPkg": "1", "PublicDescription": "TOR Inserts : RFOs issued by IO Devices that missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.", "UMask": "0xc803fe04", @@ -8234,7 +8233,6 @@ "Counter": "0,1,2,3", "EventCode": "0x35", "EventName": "UNC_CHA_TOR_INSERTS.IO_RFO", - "Experimental": "1", "PerPkg": "1", "PublicDescription": "TOR Inserts : RFOs issued by IO Devices : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.", "UMask": "0xc803ff04", diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 960076e3f66f..53c0d19c51d4 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -17,7 +17,7 @@ GenuineIntel-6-A[DE],v1.10,graniterapids,core GenuineIntel-6-(3C|45|46),v36,haswell,core GenuineIntel-6-3F,v29,haswellx,core GenuineIntel-6-7[DE],v1.24,icelake,core -GenuineIntel-6-6[AC],v1.27,icelakex,core +GenuineIntel-6-6[AC],v1.28,icelakex,core GenuineIntel-6-3A,v24,ivybridge,core GenuineIntel-6-3E,v24,ivytown,core GenuineIntel-6-2D,v24,jaketown,core From efafab4f491532c3293eeb1edeb9fcb2844d46b9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 30 Jun 2025 09:30:54 -0700 Subject: [PATCH 0561/2411] perf vendor events: Update LunarLake events Update events from v1.11 to v1.14. Bring in the event updates v1.14: https://github.com/intel/perfmon/commit/95634fec10542c0c466eb2c6d9a81e0c24fb1123 https://github.com/intel/perfmon/commit/84a49938387ac592af0a622273e4e8e4997e987d Signed-off-by: Ian Rogers Tested-by: Thomas Falcon Link: https://lore.kernel.org/r/20250630163101.1920170-10-irogers@google.com Signed-off-by: Namhyung Kim --- .../pmu-events/arch/x86/lunarlake/cache.json | 11 +++++++++++ .../arch/x86/lunarlake/pipeline.json | 18 ++++++++++++++---- .../arch/x86/lunarlake/virtual-memory.json | 18 ------------------ tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- 4 files changed, 26 insertions(+), 23 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/cache.json b/tools/perf/pmu-events/arch/x86/lunarlake/cache.json index b1a6bb867a1e..ff37d49611c3 100644 --- a/tools/perf/pmu-events/arch/x86/lunarlake/cache.json +++ b/tools/perf/pmu-events/arch/x86/lunarlake/cache.json @@ -790,6 +790,17 @@ "EventName": "MEM_LOAD_RETIRED.L1_HIT", "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0", "SampleAfterValue": "1000003", + "UMask": "0x101", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts retired load instructions with at least one uop that hit in the Level 0 of the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", + "Counter": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L1_HIT_L0", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 0 of the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0", + "SampleAfterValue": "1000003", "UMask": "0x1", "Unit": "cpu_core" }, diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json b/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json index 4875047fb65c..6ac410510628 100644 --- a/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json @@ -1247,9 +1247,19 @@ "Unit": "cpu_core" }, { - "BriefDescription": "Counts the number of demand loads that match on a wcb (request buffer) allocated by an L1 hardware prefetch", + "BriefDescription": "Counts the number of demand loads that match on a wcb (request buffer) allocated by an L1 hardware prefetch [This event is alias to LOAD_HIT_PREFETCH.HW_PF]", "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x4c", + "EventName": "LOAD_HIT_PREFETCH.HWPF", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event is deprecated. [This event is alias to LOAD_HIT_PREFETCH.HWPF]", + "Counter": "0,1,2,3,4,5,6,7", + "Deprecated": "1", + "EventCode": "0x4c", "EventName": "LOAD_HIT_PREFETCH.HW_PF", "SampleAfterValue": "1000003", "UMask": "0x2", @@ -1664,7 +1674,7 @@ }, { "BriefDescription": "Fixed Counter: Counts the number of issue slots not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.", - "Counter": "36", + "Counter": "Fixed counter 4", "EventName": "TOPDOWN_BAD_SPECULATION.ALL", "PublicDescription": "Fixed Counter: Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the IQ. Also, includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.", "SampleAfterValue": "1000003", @@ -1797,7 +1807,7 @@ }, { "BriefDescription": "Fixed Counter: Counts the number of retirement slots not consumed due to front end stalls.", - "Counter": "37", + "Counter": "Fixed counter 5", "EventName": "TOPDOWN_FE_BOUND.ALL", "SampleAfterValue": "1000003", "UMask": "0x6", @@ -1903,7 +1913,7 @@ }, { "BriefDescription": "Fixed Counter: Counts the number of consumed retirement slots.", - "Counter": "38", + "Counter": "Fixed counter 6", "EventName": "TOPDOWN_RETIRING.ALL", "SampleAfterValue": "1000003", "UMask": "0x7", diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/lunarlake/virtual-memory.json index defa3a967754..e60a5e904da2 100644 --- a/tools/perf/pmu-events/arch/x86/lunarlake/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/lunarlake/virtual-memory.json @@ -36,24 +36,6 @@ "UMask": "0x320", "Unit": "cpu_core" }, - { - "BriefDescription": "Counts the number of first level TLB misses but second level hits due to a demand load that did not start a page walk. Account for 4k page size only. Will result in a DTLB write from STLB.", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0x08", - "EventName": "DTLB_LOAD_MISSES.STLB_HIT_4K", - "SampleAfterValue": "200003", - "UMask": "0x20", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of first level TLB misses but second level hits due to a demand load that did not start a page walk. Account for large page sizes only. Will result in a DTLB write from STLB.", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0x08", - "EventName": "DTLB_LOAD_MISSES.STLB_HIT_LGPG", - "SampleAfterValue": "200003", - "UMask": "0x40", - "Unit": "cpu_atom" - }, { "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a demand load.", "Counter": "0,1,2,3,4,5,6,7,8,9", diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 53c0d19c51d4..5f27b3700c3c 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -22,7 +22,7 @@ GenuineIntel-6-3A,v24,ivybridge,core GenuineIntel-6-3E,v24,ivytown,core GenuineIntel-6-2D,v24,jaketown,core GenuineIntel-6-(57|85),v16,knightslanding,core -GenuineIntel-6-BD,v1.11,lunarlake,core +GenuineIntel-6-BD,v1.14,lunarlake,core GenuineIntel-6-(AA|AC|B5),v1.13,meteorlake,core GenuineIntel-6-1[AEF],v4,nehalemep,core GenuineIntel-6-2E,v4,nehalemex,core From a04ab3e59d6a2be27a45d54aef706c7080c7db4e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 30 Jun 2025 09:30:55 -0700 Subject: [PATCH 0562/2411] perf vendor events: Update MeteorLake events Update events from v1.13 to v1.14. Bring in the event updates v1.14: https://github.com/intel/perfmon/commit/6c53969b8d1a83afe6ae90149c8dd4ee416027ef Signed-off-by: Ian Rogers Tested-by: Thomas Falcon Link: https://lore.kernel.org/r/20250630163101.1920170-11-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- .../pmu-events/arch/x86/meteorlake/cache.json | 2 +- .../arch/x86/meteorlake/frontend.json | 72 +++++++++++++++++++ .../arch/x86/meteorlake/pipeline.json | 2 +- 4 files changed, 75 insertions(+), 3 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 5f27b3700c3c..1185ea93b44a 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -23,7 +23,7 @@ GenuineIntel-6-3E,v24,ivytown,core GenuineIntel-6-2D,v24,jaketown,core GenuineIntel-6-(57|85),v16,knightslanding,core GenuineIntel-6-BD,v1.14,lunarlake,core -GenuineIntel-6-(AA|AC|B5),v1.13,meteorlake,core +GenuineIntel-6-(AA|AC|B5),v1.14,meteorlake,core GenuineIntel-6-1[AEF],v4,nehalemep,core GenuineIntel-6-2E,v4,nehalemex,core GenuineIntel-6-A7,v1.04,rocketlake,core diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json index c980bbee6146..82b115183924 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json @@ -231,7 +231,7 @@ "Unit": "cpu_core" }, { - "BriefDescription": "Counts the number of L2 Cache Accesses that miss the L2 and get BBL reject short and long rejects (includes those counted in L2_reject_XQ.any), per core event", + "BriefDescription": "Counts the number of L2 Cache Accesses that miss the L2 and get BBL reject short and long rejects, per core event", "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x24", "EventName": "L2_REQUEST.REJECTS", diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json b/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json index 509ce68c2ea6..82727022efb6 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json @@ -49,6 +49,14 @@ "UMask": "0x2", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged with having preceded with frontend bound behavior", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ALL", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, { "BriefDescription": "Retired ANT branches", "Counter": "0,1,2,3,4,5,6,7", @@ -73,6 +81,43 @@ "UMask": "0x3", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles/empty issue slots due to a baclear", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.BRANCH_DETECT", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles /empty issue slots due to a btclear", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.BRANCH_RESTEER", + "SampleAfterValue": "1000003", + "UMask": "0x40", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged following an ms flow due to the bubble/wasted issue slot from exiting long ms flow", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.CISC", + "PublicDescription": "Counts the number of instructions retired that were tagged following an ms flow due to the bubble/wasted issue slot from exiting long ms flow", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged every cycle the decoder is unable to send 3 uops per cycle.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.DECODE", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_atom" + }, { "BriefDescription": "Retired Instructions who experienced a critical DSB miss.", "Counter": "0,1,2,3,4,5,6,7", @@ -85,6 +130,15 @@ "UMask": "0x3", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to icache miss", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ICACHE", + "SampleAfterValue": "1000003", + "UMask": "0x20", + "Unit": "cpu_atom" + }, { "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss", "Counter": "0,1,2,3,4,5,6,7", @@ -286,6 +340,24 @@ "UMask": "0x3", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of instruction retired tagged after a wasted issue slot if none of the previous events occurred", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.OTHER", + "SampleAfterValue": "1000003", + "UMask": "0x80", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles/empty issue slots due to a predecode wrong.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.PREDECODE", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, { "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.", "Counter": "0,1,2,3,4,5,6,7", diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json b/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json index a833d6f53d0e..22b25708e799 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json @@ -1076,7 +1076,7 @@ "Counter": "0,1,2,3", "EventCode": "0x4c", "EventName": "LOAD_HIT_PREFETCH.SWPF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", + "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", "SampleAfterValue": "100003", "UMask": "0x1", "Unit": "cpu_core" From 1f9e24e4df0099c407bc7eeed931baf58d9144a9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 30 Jun 2025 09:30:56 -0700 Subject: [PATCH 0563/2411] perf vendor events: Add PantherLake events Bring in the events at v1.00: https://github.com/intel/perfmon/commit/d90a6737d0e4e6fbea4a5951e829615fd8317c24 Signed-off-by: Ian Rogers Tested-by: Thomas Falcon Link: https://lore.kernel.org/r/20250630163101.1920170-12-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/pmu-events/arch/x86/mapfile.csv | 1 + .../arch/x86/pantherlake/cache.json | 278 +++++++++++++++ .../arch/x86/pantherlake/counter.json | 12 + .../arch/x86/pantherlake/frontend.json | 30 ++ .../arch/x86/pantherlake/memory.json | 215 ++++++++++++ .../arch/x86/pantherlake/pipeline.json | 325 ++++++++++++++++++ .../arch/x86/pantherlake/virtual-memory.json | 62 ++++ 7 files changed, 923 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/pantherlake/cache.json create mode 100644 tools/perf/pmu-events/arch/x86/pantherlake/counter.json create mode 100644 tools/perf/pmu-events/arch/x86/pantherlake/frontend.json create mode 100644 tools/perf/pmu-events/arch/x86/pantherlake/memory.json create mode 100644 tools/perf/pmu-events/arch/x86/pantherlake/pipeline.json create mode 100644 tools/perf/pmu-events/arch/x86/pantherlake/virtual-memory.json diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 1185ea93b44a..252382751fa5 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -26,6 +26,7 @@ GenuineIntel-6-BD,v1.14,lunarlake,core GenuineIntel-6-(AA|AC|B5),v1.14,meteorlake,core GenuineIntel-6-1[AEF],v4,nehalemep,core GenuineIntel-6-2E,v4,nehalemex,core +GenuineIntel-6-CC,v1.00,pantherlake,core GenuineIntel-6-A7,v1.04,rocketlake,core GenuineIntel-6-2A,v19,sandybridge,core GenuineIntel-6-8F,v1.25,sapphirerapids,core diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/cache.json b/tools/perf/pmu-events/arch/x86/pantherlake/cache.json new file mode 100644 index 000000000000..c84f3d9fdb10 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/pantherlake/cache.json @@ -0,0 +1,278 @@ +[ + { + "BriefDescription": "Counts the number of L2 cache accesses from front door requests for Code Read, Data Read, RFO, ITOM, and L2 Prefetches. Does not include rejects or recycles, per core event.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_REQUEST.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x1ff", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "L2 code requests", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_CODE_RD", + "PublicDescription": "Counts the total number of L2 code requests.", + "SampleAfterValue": "200003", + "UMask": "0xe4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Demand Data Read access L2 cache", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", + "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once.", + "SampleAfterValue": "200003", + "UMask": "0xe1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "SampleAfterValue": "1000003", + "UMask": "0x41", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Core-originated cacheable requests that missed L3 (Except hardware prefetches to the L3)", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.", + "SampleAfterValue": "100003", + "UMask": "0x41", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "SampleAfterValue": "1000003", + "UMask": "0x4f", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Core-originated cacheable requests that refer to L3 (Except hardware prefetches to the L3)", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.", + "SampleAfterValue": "100003", + "UMask": "0x4f", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts all retired load instructions.", + "Counter": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.ALL_LOADS", + "PublicDescription": "Counts Instructions with at least one architecturally visible load retired. Available PDIST counters: 0", + "SampleAfterValue": "1000003", + "UMask": "0x81", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired store instructions.", + "Counter": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.ALL_STORES", + "PublicDescription": "Counts all retired store instructions. Available PDIST counters: 0", + "SampleAfterValue": "1000003", + "UMask": "0x82", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of load ops retired.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", + "SampleAfterValue": "1000003", + "UMask": "0x81", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of store ops retired.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.ALL_STORES", + "SampleAfterValue": "1000003", + "UMask": "0x82", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_1024", + "MSRIndex": "0x3F6", + "MSRValue": "0x400", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128", + "MSRIndex": "0x3F6", + "MSRValue": "0x80", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16", + "MSRIndex": "0x3F6", + "MSRValue": "0x10", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_2048", + "MSRIndex": "0x3F6", + "MSRValue": "0x800", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256", + "MSRIndex": "0x3F6", + "MSRValue": "0x100", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32", + "MSRIndex": "0x3F6", + "MSRValue": "0x20", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4", + "MSRIndex": "0x3F6", + "MSRValue": "0x4", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512", + "MSRIndex": "0x3F6", + "MSRValue": "0x200", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64", + "MSRIndex": "0x3F6", + "MSRValue": "0x40", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8", + "MSRIndex": "0x3F6", + "MSRValue": "0x8", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of stores uops retired same as MEM_UOPS_RETIRED.ALL_STORES", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY", + "SampleAfterValue": "1000003", + "UMask": "0x6", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts demand data reads that have any type of response.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001", + "PublicDescription": "Counts demand data reads that have any type of response. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts demand data reads that have any type of response.", + "Counter": "0,1,2,3", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001", + "PublicDescription": "Counts demand data reads that have any type of response. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10002", + "PublicDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", + "Counter": "0,1,2,3", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10002", + "PublicDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + } +] diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/counter.json b/tools/perf/pmu-events/arch/x86/pantherlake/counter.json new file mode 100644 index 000000000000..69f158a97707 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/pantherlake/counter.json @@ -0,0 +1,12 @@ +[ + { + "Unit": "cpu_atom", + "CountersNumFixed": "3", + "CountersNumGeneric": "39" + }, + { + "Unit": "cpu_core", + "CountersNumFixed": "4", + "CountersNumGeneric": "10" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/frontend.json b/tools/perf/pmu-events/arch/x86/pantherlake/frontend.json new file mode 100644 index 000000000000..aedf631e3c0f --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/pantherlake/frontend.json @@ -0,0 +1,30 @@ +[ + { + "BriefDescription": "Counts every time the code stream enters into a new cache line by walking sequential from the previous line or being redirected by a jump.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x80", + "EventName": "ICACHE.ACCESSES", + "SampleAfterValue": "1000003", + "UMask": "0x3", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts every time the code stream enters into a new cache line by walking sequential from the previous line or being redirected by a jump and the instruction cache registers bytes are not present. -", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x80", + "EventName": "ICACHE.MISSES", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event counts a subset of the Topdown Slots event that when no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations.", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x9c", + "EventName": "IDQ_BUBBLES.CORE", + "PublicDescription": "This event counts a subset of the Topdown Slots event that when no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations. Software can use this event as the numerator for the Frontend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + } +] diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/memory.json b/tools/perf/pmu-events/arch/x86/pantherlake/memory.json new file mode 100644 index 000000000000..47daee8cc00f --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/pantherlake/memory.json @@ -0,0 +1,215 @@ +[ + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_1024", + "MSRIndex": "0x3F6", + "MSRValue": "0x400", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "53", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", + "MSRIndex": "0x3F6", + "MSRValue": "0x80", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "1009", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", + "MSRIndex": "0x3F6", + "MSRValue": "0x10", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "20011", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_2048", + "MSRIndex": "0x3F6", + "MSRValue": "0x800", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "23", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", + "MSRIndex": "0x3F6", + "MSRValue": "0x100", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "503", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", + "MSRIndex": "0x3F6", + "MSRValue": "0x20", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "100007", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", + "MSRIndex": "0x3F6", + "MSRValue": "0x4", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", + "MSRIndex": "0x3F6", + "MSRValue": "0x200", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "101", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", + "MSRIndex": "0x3F6", + "MSRValue": "0x40", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "2003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", + "MSRIndex": "0x3F6", + "MSRValue": "0x8", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "50021", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired memory store access operations. A PDist event for PEBS Store Latency Facility.", + "Counter": "0,1", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.STORE_SAMPLE", + "PublicDescription": "Counts Retired memory accesses with at least 1 store operation. This PEBS event is the precisely-distributed (PDist) trigger covering all stores uops for sampling by the PEBS Store Latency Facility. The facility is described in Intel SDM Volume 3 section 19.9.8 Available PDIST counters: 0", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by DRAM.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x7BC000001", + "PublicDescription": "Counts demand data reads that were supplied by DRAM. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by DRAM.", + "Counter": "0,1,2,3", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_DATA_RD.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1E780000001", + "PublicDescription": "Counts demand data reads that were supplied by DRAM. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x13FBFC00001", + "PublicDescription": "Counts demand data reads that were not supplied by the L3 cache. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", + "Counter": "0,1,2,3", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_DATA_RD.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x9E7FA000001", + "PublicDescription": "Counts demand data reads that were not supplied by the L3 cache. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x13FBFC00002", + "PublicDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", + "Counter": "0,1,2,3", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_RFO.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x9E7FA000002", + "PublicDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + } +] diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/pipeline.json b/tools/perf/pmu-events/arch/x86/pantherlake/pipeline.json new file mode 100644 index 000000000000..2caf2f85327f --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/pantherlake/pipeline.json @@ -0,0 +1,325 @@ +[ + { + "BriefDescription": "Counts the total number of branch instructions retired for all branch types.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "All branch instructions retired.", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts all branch instructions retired. Available PDIST counters: 0", + "SampleAfterValue": "400009", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "All mispredicted branch instructions retired.", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path. Available PDIST counters: 0", + "SampleAfterValue": "400009", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles.", + "Counter": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.CORE", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Core cycles when the core is not in a halt state.", + "Counter": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.CORE", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the programmable counters available for other events.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles. [This event is alias to CPU_CLK_UNHALTED.THREAD_P]", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.CORE_P", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Thread cycles when thread is not in halt state [This event is alias to CPU_CLK_UNHALTED.THREAD_P]", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.CORE_P", + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time. [This event is alias to CPU_CLK_UNHALTED.THREAD_P]", + "SampleAfterValue": "2000003", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of unhalted reference clock cycles.", + "Counter": "Fixed counter 2", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "SampleAfterValue": "1000003", + "UMask": "0x3", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Reference cycles when the core is not in halt state.", + "Counter": "Fixed counter 2", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", + "SampleAfterValue": "2000003", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", + "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Reference cycles when the core is not in halt state.", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles.", + "Counter": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Core cycles when the thread is not in a halt state.", + "Counter": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the programmable counters available for other events.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles. [This event is alias to CPU_CLK_UNHALTED.CORE_P]", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Thread cycles when thread is not in halt state [This event is alias to CPU_CLK_UNHALTED.CORE_P]", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time. [This event is alias to CPU_CLK_UNHALTED.CORE_P]", + "SampleAfterValue": "2000003", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of instructions retired.", + "Counter": "Fixed counter 0", + "EventName": "INST_RETIRED.ANY", + "PublicDescription": "Fixed Counter: Counts the number of instructions retired. Available PDIST counters: 32", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Number of instructions retired. Fixed Counter - architectural event", + "Counter": "Fixed counter 0", + "EventName": "INST_RETIRED.ANY", + "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter. Available PDIST counters: 32", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of instructions retired.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc0", + "EventName": "INST_RETIRED.ANY_P", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0xc0", + "EventName": "INST_RETIRED.ANY_P", + "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter. Available PDIST counters: 0", + "SampleAfterValue": "2000003", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of retired loads that are blocked because its address partially overlapped with an older store.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.STORE_FORWARD", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.STORE_FORWARD", + "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.", + "SampleAfterValue": "100003", + "UMask": "0x82", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xe4", + "EventName": "MISC_RETIRED.LBR_INSERTS", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "LBR record is inserted", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0xe4", + "EventName": "MISC_RETIRED.LBR_INSERTS", + "PublicDescription": "LBR record is inserted Available PDIST counters: 0", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "This event counts a subset of the Topdown Slots event that were not consumed by the back-end pipeline due to lack of back-end resources, as a result of memory subsystem delays, execution units limitations, or other conditions.", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0xa4", + "EventName": "TOPDOWN.BACKEND_BOUND_SLOTS", + "PublicDescription": "This event counts a subset of the Topdown Slots event that were not consumed by the back-end pipeline due to lack of back-end resources, as a result of memory subsystem delays, execution units limitations, or other conditions. Software can use this event as the numerator for the Backend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.", + "SampleAfterValue": "10000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event", + "Counter": "Fixed counter 3", + "EventName": "TOPDOWN.SLOTS", + "PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3).", + "SampleAfterValue": "10000003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "TMA slots available for an unhalted logical processor. General counter - architectural event", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0xa4", + "EventName": "TOPDOWN.SLOTS_P", + "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method.", + "SampleAfterValue": "10000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.", + "Counter": "36", + "EventName": "TOPDOWN_BAD_SPECULATION.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.ALL_P", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window, including relevant microcode flows, and while uops are not yet available in the instruction queue (IQ) or until an FE_BOUND event occurs besides OTHER and CISC. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls. [This event is alias to TOPDOWN_BE_BOUND.ALL_P]", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xa4", + "EventName": "TOPDOWN_BE_BOUND.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls. [This event is alias to TOPDOWN_BE_BOUND.ALL]", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xa4", + "EventName": "TOPDOWN_BE_BOUND.ALL_P", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of retirement slots not consumed due to front end stalls.", + "Counter": "37", + "EventName": "TOPDOWN_FE_BOUND.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x6", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x9c", + "EventName": "TOPDOWN_FE_BOUND.ALL_P", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of consumed retirement slots.", + "Counter": "38", + "EventName": "TOPDOWN_RETIRING.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x7", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of consumed retirement slots.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc2", + "EventName": "TOPDOWN_RETIRING.ALL_P", + "PublicDescription": "Counts the number of consumed retirement slots. Available PDIST counters: 0,1", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric.", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.SLOTS", + "PublicDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric. Software can use this event as the numerator for the Retiring metric (or top-level category) of the Top-down Microarchitecture Analysis method.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_core" + } +] diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/pantherlake/virtual-memory.json new file mode 100644 index 000000000000..690c5dff9d9e --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/pantherlake/virtual-memory.json @@ -0,0 +1,62 @@ +[ + { + "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "1000003", + "UMask": "0xe", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x12", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0xe", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "1000003", + "UMask": "0xe", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x13", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0xe", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "1000003", + "UMask": "0xe", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x11", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0xe", + "Unit": "cpu_core" + } +] From 8704418511944eb417e35af30da5cb4a0b3676a9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 30 Jun 2025 09:30:57 -0700 Subject: [PATCH 0564/2411] perf vendor events: Update SapphireRapids events Update events from v1.25 to v1.28. Bring in the event updates v1.28: https://github.com/intel/perfmon/commit/990bfdff270adf08d408534d6d66ba47ec6adb34 https://github.com/intel/perfmon/commit/b7b4d7f18cf9a893438777a571abc7ecc087368b Signed-off-by: Ian Rogers Tested-by: Thomas Falcon Link: https://lore.kernel.org/r/20250630163101.1920170-13-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- .../arch/x86/sapphirerapids/pipeline.json | 2 +- .../arch/x86/sapphirerapids/uncore-io.json | 12 +++++++++++ .../x86/sapphirerapids/uncore-memory.json | 20 +++++++++++++++++++ 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 252382751fa5..13eaed97b4ac 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -29,7 +29,7 @@ GenuineIntel-6-2E,v4,nehalemex,core GenuineIntel-6-CC,v1.00,pantherlake,core GenuineIntel-6-A7,v1.04,rocketlake,core GenuineIntel-6-2A,v19,sandybridge,core -GenuineIntel-6-8F,v1.25,sapphirerapids,core +GenuineIntel-6-8F,v1.28,sapphirerapids,core GenuineIntel-6-AF,v1.09,sierraforest,core GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v59,skylake,core diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json index 00b05a77c289..48bec483b49a 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json @@ -684,7 +684,7 @@ "Counter": "0,1,2,3", "EventCode": "0x4c", "EventName": "LOAD_HIT_PREFETCH.SWPF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", + "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", "SampleAfterValue": "100003", "UMask": "0x1" }, diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json index aab082ff9402..dac7e6c50f31 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json @@ -1901,6 +1901,18 @@ "UMask": "0x4", "Unit": "IIO" }, + { + "BriefDescription": "Posted requests sent by the integrated IO (IIO) controller to the Ubox, useful for counting message signaled interrupts (MSI).", + "Counter": "0,1,2,3", + "EventCode": "0x8e", + "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX_POSTED", + "Experimental": "1", + "FCMask": "0x01", + "PerPkg": "1", + "PortMask": "0x00FF", + "UMask": "0x4", + "Unit": "IIO" + }, { "BriefDescription": "ITC address map 1", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json index aa06088dd26f..68be01dad7c9 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json @@ -2145,6 +2145,16 @@ "UMask": "0x1", "Unit": "MCHBM" }, + { + "BriefDescription": "ECC Correctable Errors", + "Counter": "0,1,2,3", + "EventCode": "0x09", + "EventName": "UNC_MCHBM_ECC_CORRECTABLE_ERRORS", + "Experimental": "1", + "PerPkg": "1", + "PublicDescription": "ECC Correctable Errors. Counts the number of ECC errors detected and corrected by the iMC on this channel. This counter is only useful with ECC devices. This count will increment one time for each correction regardless of the number of bits corrected. The iMC can correct up to 4 bit errors in independent channel mode and 8 bit errors in lockstep mode.", + "Unit": "MCHBM" + }, { "BriefDescription": "HBM Precharge All Commands", "Counter": "0,1,2,3", @@ -2759,6 +2769,16 @@ "UMask": "0x3", "Unit": "iMC" }, + { + "BriefDescription": "ECC Correctable Errors", + "Counter": "0,1,2,3", + "EventCode": "0x09", + "EventName": "UNC_M_ECC_CORRECTABLE_ERRORS", + "Experimental": "1", + "PerPkg": "1", + "PublicDescription": "ECC Correctable Errors : Counts the number of ECC errors detected and corrected by the iMC on this channel. This counter is only useful with ECC DRAM devices. This count will increment one time for each correction regardless of the number of bits corrected. The iMC can correct up to 4 bit errors in independent channel mode and 8 bit errors in lockstep mode.", + "Unit": "iMC" + }, { "BriefDescription": "IMC Clockticks at HCLK frequency", "Counter": "0,1,2,3", From 336473ad0771890fc4106da08efa3cef4b30b106 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 30 Jun 2025 09:30:58 -0700 Subject: [PATCH 0565/2411] perf vendor events: Update SierraForest events Update events from v1.09 to v1.11. Bring in the event updates v1.11: https://github.com/intel/perfmon/commit/6b824df1dba3948146281c8ba2a8c3e7bf7f7c51 https://github.com/intel/perfmon/commit/4b0346fbee2b04dd34526522250116aee525c922 Signed-off-by: Ian Rogers Tested-by: Thomas Falcon Link: https://lore.kernel.org/r/20250630163101.1920170-14-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- .../arch/x86/sierraforest/frontend.json | 64 +++++++++++++++++++ .../arch/x86/sierraforest/pipeline.json | 8 +++ .../arch/x86/sierraforest/srf-metrics.json | 48 ++++++++++++++ .../arch/x86/sierraforest/uncore-cache.json | 6 +- .../x86/sierraforest/uncore-interconnect.json | 53 ++++++++++++--- .../arch/x86/sierraforest/uncore-io.json | 27 +++++++- 7 files changed, 194 insertions(+), 14 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 13eaed97b4ac..54c2cfb0af9c 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -30,7 +30,7 @@ GenuineIntel-6-CC,v1.00,pantherlake,core GenuineIntel-6-A7,v1.04,rocketlake,core GenuineIntel-6-2A,v19,sandybridge,core GenuineIntel-6-8F,v1.28,sapphirerapids,core -GenuineIntel-6-AF,v1.09,sierraforest,core +GenuineIntel-6-AF,v1.11,sierraforest,core GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v59,skylake,core GenuineIntel-6-55-[01234],v1.36,skylakex,core diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/frontend.json b/tools/perf/pmu-events/arch/x86/sierraforest/frontend.json index fef5cba533bb..8a591e31d331 100644 --- a/tools/perf/pmu-events/arch/x86/sierraforest/frontend.json +++ b/tools/perf/pmu-events/arch/x86/sierraforest/frontend.json @@ -8,6 +8,54 @@ "SampleAfterValue": "200003", "UMask": "0x1" }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged with having preceded with frontend bound behavior", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ALL", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles/empty issue slots due to a baclear", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.BRANCH_DETECT", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles /empty issue slots due to a btclear", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.BRANCH_RESTEER", + "SampleAfterValue": "1000003", + "UMask": "0x40" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged following an ms flow due to the bubble/wasted issue slot from exiting long ms flow", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.CISC", + "PublicDescription": "Counts the number of instructions retired that were tagged following an ms flow due to the bubble/wasted issue slot from exiting long ms flow", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged every cycle the decoder is unable to send 3 uops per cycle.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.DECODE", + "SampleAfterValue": "1000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to icache miss", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ICACHE", + "SampleAfterValue": "1000003", + "UMask": "0x20" + }, { "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss", "Counter": "0,1,2,3,4,5,6,7", @@ -16,6 +64,22 @@ "SampleAfterValue": "1000003", "UMask": "0x10" }, + { + "BriefDescription": "Counts the number of instruction retired tagged after a wasted issue slot if none of the previous events occurred", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.OTHER", + "SampleAfterValue": "1000003", + "UMask": "0x80" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles/empty issue slots due to a predecode wrong.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.PREDECODE", + "SampleAfterValue": "1000003", + "UMask": "0x4" + }, { "BriefDescription": "Counts every time the code stream enters into a new cache line by walking sequential from the previous line or being redirected by a jump.", "Counter": "0,1,2,3,4,5,6,7", diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/pipeline.json b/tools/perf/pmu-events/arch/x86/sierraforest/pipeline.json index f56d8d816e53..70af13143024 100644 --- a/tools/perf/pmu-events/arch/x86/sierraforest/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/sierraforest/pipeline.json @@ -11,6 +11,7 @@ { "BriefDescription": "Counts the total number of branch instructions retired for all branch types.", "Counter": "0,1,2,3,4,5,6,7", + "Errata": "SRF6, SRF7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.ALL_BRANCHES", "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.", @@ -19,6 +20,7 @@ { "BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.", "Counter": "0,1,2,3,4,5,6,7", + "Errata": "SRF7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.COND", "SampleAfterValue": "200003", @@ -35,6 +37,7 @@ { "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.", "Counter": "0,1,2,3,4,5,6,7", + "Errata": "SRF7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.FAR_BRANCH", "SampleAfterValue": "200003", @@ -43,6 +46,7 @@ { "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.", "Counter": "0,1,2,3,4,5,6,7", + "Errata": "SRF7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.INDIRECT", "SampleAfterValue": "200003", @@ -51,6 +55,7 @@ { "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.", "Counter": "0,1,2,3,4,5,6,7", + "Errata": "SRF7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.INDIRECT_CALL", "SampleAfterValue": "200003", @@ -68,6 +73,7 @@ "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL", "Counter": "0,1,2,3,4,5,6,7", "Deprecated": "1", + "Errata": "SRF7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.IND_CALL", "SampleAfterValue": "200003", @@ -76,6 +82,7 @@ { "BriefDescription": "Counts the number of near CALL branch instructions retired.", "Counter": "0,1,2,3,4,5,6,7", + "Errata": "SRF6, SRF7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_CALL", "SampleAfterValue": "200003", @@ -92,6 +99,7 @@ { "BriefDescription": "Counts the number of near taken branch instructions retired.", "Counter": "0,1,2,3,4,5,6,7", + "Errata": "SRF7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_TAKEN", "SampleAfterValue": "200003", diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/srf-metrics.json b/tools/perf/pmu-events/arch/x86/sierraforest/srf-metrics.json index ef629e4e91ce..b9f3c611d87b 100644 --- a/tools/perf/pmu-events/arch/x86/sierraforest/srf-metrics.json +++ b/tools/perf/pmu-events/arch/x86/sierraforest/srf-metrics.json @@ -61,6 +61,18 @@ "MetricName": "cpi", "ScaleUnit": "1per_instr" }, + { + "BriefDescription": "The average number of cores that are in cstate C0 as observed by the power control unit (PCU)", + "MetricExpr": "UNC_P_POWER_STATE_OCCUPANCY_CORES_C0 / pcu_0@UNC_P_CLOCKTICKS@ * #num_packages", + "MetricGroup": "cpu_cstate", + "MetricName": "cpu_cstate_c0" + }, + { + "BriefDescription": "The average number of cores that are in cstate C6 as observed by the power control unit (PCU)", + "MetricExpr": "UNC_P_POWER_STATE_OCCUPANCY_CORES_C6 / pcu_0@UNC_P_CLOCKTICKS@ * #num_packages", + "MetricGroup": "cpu_cstate", + "MetricName": "cpu_cstate_c6" + }, { "BriefDescription": "CPU operating frequency (in GHz)", "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC * #SYSTEM_TSC_FREQ / 1e9", @@ -112,6 +124,12 @@ "MetricName": "io_bandwidth_read", "ScaleUnit": "1MB/s" }, + { + "BriefDescription": "Bandwidth of inbound IO reads that are initiated by end device controllers that are requesting memory from the CPU and miss the L3 cache", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR * 64 / 1e6 / duration_time", + "MetricName": "io_bandwidth_read_l3_miss", + "ScaleUnit": "1MB/s" + }, { "BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the local CPU socket", "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_LOCAL * 64 / 1e6 / duration_time", @@ -130,6 +148,12 @@ "MetricName": "io_bandwidth_write", "ScaleUnit": "1MB/s" }, + { + "BriefDescription": "Bandwidth of inbound IO writes that are initiated by end device controllers that are writing memory to the CPU", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOM + UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR) * 64 / 1e6 / duration_time", + "MetricName": "io_bandwidth_write_l3_miss", + "ScaleUnit": "1MB/s" + }, { "BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the local CPU socket", "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_ITOM_LOCAL + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_LOCAL) * 64 / 1e6 / duration_time", @@ -142,6 +166,30 @@ "MetricName": "io_bandwidth_write_remote", "ScaleUnit": "1MB/s" }, + { + "BriefDescription": "The percent of inbound full cache line writes initiated by IO that miss the L3 cache", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM / UNC_CHA_TOR_INSERTS.IO_ITOM", + "MetricName": "io_full_write_l3_miss", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Message Signaled Interrupts (MSI) per second sent by the integrated I/O traffic controller (IIO) to System Configuration Controller (Ubox)", + "MetricExpr": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX_POSTED / duration_time", + "MetricName": "io_msi", + "ScaleUnit": "1per_sec" + }, + { + "BriefDescription": "The percent of inbound partial writes initiated by IO that miss the L3 cache", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_RFO) / (UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_RFO)", + "MetricName": "io_partial_write_l3_miss", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "The percent of inbound reads initiated by IO that miss the L3 cache", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR / UNC_CHA_TOR_INSERTS.IO_PCIRDCUR", + "MetricName": "io_read_l3_miss", + "ScaleUnit": "100%" + }, { "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions", "MetricExpr": "ITLB_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY", diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-cache.json b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-cache.json index 7182ca00ef8d..3d1fb5f0417e 100644 --- a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-cache.json @@ -874,7 +874,7 @@ "Unit": "CHA" }, { - "BriefDescription": "Counts snoop filter capacity evictions for entries tracking exclusive lines in the cores? cache.? Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry.? Does not count clean evictions such as when a core?s cache replaces a tracked cacheline with a new cacheline.", + "BriefDescription": "Counts snoop filter capacity evictions for entries tracking exclusive lines in the core's cache. Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry. Does not count clean evictions such as when a core's cache replaces a tracked cacheline with a new cacheline.", "Counter": "0,1,2,3", "EventCode": "0x3d", "EventName": "UNC_CHA_SF_EVICTION.E_STATE", @@ -885,7 +885,7 @@ "Unit": "CHA" }, { - "BriefDescription": "Counts snoop filter capacity evictions for entries tracking modified lines in the cores? cache.? Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry.? Does not count clean evictions such as when a core?s cache replaces a tracked cacheline with a new cacheline.", + "BriefDescription": "Counts snoop filter capacity evictions for entries tracking modified lines in the core's cache. Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry. Does not count clean evictions such as when a core's cache replaces a tracked cacheline with a new cacheline.", "Counter": "0,1,2,3", "EventCode": "0x3d", "EventName": "UNC_CHA_SF_EVICTION.M_STATE", @@ -895,7 +895,7 @@ "Unit": "CHA" }, { - "BriefDescription": "Counts snoop filter capacity evictions for entries tracking shared lines in the cores? cache.? Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry.? Does not count clean evictions such as when a core?s cache replaces a tracked cacheline with a new cacheline.", + "BriefDescription": "Counts snoop filter capacity evictions for entries tracking shared lines in the core's cache. Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry. Does not count clean evictions such as when a core's cache replaces a tracked cacheline with a new cacheline.", "Counter": "0,1,2,3", "EventCode": "0x3d", "EventName": "UNC_CHA_SF_EVICTION.S_STATE", diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-interconnect.json index 2ccbc8bca24e..952b6de3fefc 100644 --- a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-interconnect.json @@ -1562,21 +1562,56 @@ "Unit": "UPI" }, { - "BriefDescription": "Tx Flit Buffer Allocations : Number of allocations into the UPI Tx Flit Buffer. Generally, when data is transmitted across UPI, it will bypass the TxQ and pass directly to the link. However, the TxQ will be used with L0p and when LLR occurs, increasing latency to transfer out to the link. This event can be used in conjunction with the Flit Buffer Occupancy event in order to calculate the average flit buffer lifetime.", - "Counter": "0,1,2,3", - "EventCode": "0x40", - "EventName": "UNC_UPI_TxL_INSERTS", + "BriefDescription": "Message Received : Doorbell", + "Counter": "0,1", + "EventCode": "0x42", + "EventName": "UNC_U_EVENT_MSG.DOORBELL_RCVD", "Experimental": "1", "PerPkg": "1", - "Unit": "UPI" + "UMask": "0x8", + "Unit": "UBOX" }, { - "BriefDescription": "Tx Flit Buffer Occupancy : Accumulates the number of flits in the TxQ. Generally, when data is transmitted across UPI, it will bypass the TxQ and pass directly to the link. However, the TxQ will be used with L0p and when LLR occurs, increasing latency to transfer out to the link. This can be used with the cycles not empty event to track average occupancy, or the allocations event to track average lifetime in the TxQ.", - "Counter": "0,1,2,3", + "BriefDescription": "Message Received : Interrupt", + "Counter": "0,1", "EventCode": "0x42", - "EventName": "UNC_UPI_TxL_OCCUPANCY", + "EventName": "UNC_U_EVENT_MSG.INT_PRIO", "Experimental": "1", "PerPkg": "1", - "Unit": "UPI" + "PublicDescription": "Message Received : Interrupt : Interrupts", + "UMask": "0x10", + "Unit": "UBOX" + }, + { + "BriefDescription": "Message Received : IPI", + "Counter": "0,1", + "EventCode": "0x42", + "EventName": "UNC_U_EVENT_MSG.IPI_RCVD", + "Experimental": "1", + "PerPkg": "1", + "PublicDescription": "Message Received : IPI : Inter Processor Interrupts", + "UMask": "0x4", + "Unit": "UBOX" + }, + { + "BriefDescription": "Message Received : MSI", + "Counter": "0,1", + "EventCode": "0x42", + "EventName": "UNC_U_EVENT_MSG.MSI_RCVD", + "PerPkg": "1", + "PublicDescription": "Message Received : MSI : Message Signaled Interrupts - interrupts sent by devices (including PCIe via IOxAPIC) (Socket Mode only)", + "UMask": "0x2", + "Unit": "UBOX" + }, + { + "BriefDescription": "Message Received : VLW", + "Counter": "0,1", + "EventCode": "0x42", + "EventName": "UNC_U_EVENT_MSG.VLW_RCVD", + "Experimental": "1", + "PerPkg": "1", + "PublicDescription": "Message Received : VLW : Virtual Logical Wire (legacy) message were received from Uncore.", + "UMask": "0x1", + "Unit": "UBOX" } ] diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-io.json b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-io.json index 886b99a971be..f4f956966e16 100644 --- a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-io.json +++ b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-io.json @@ -1121,8 +1121,9 @@ "Unit": "IIO" }, { - "BriefDescription": "Occupancy of outbound request queue : To device : Counts number of outbound requests/completions IIO is currently processing", + "BriefDescription": "This event is deprecated. [This event is alias to UNC_IIO_NUM_OUTSTANDING_REQ_FROM_CPU.TO_IO]", "Counter": "2,3", + "Deprecated": "1", "EventCode": "0xc5", "EventName": "UNC_IIO_NUM_OUSTANDING_REQ_FROM_CPU.TO_IO", "Experimental": "1", @@ -1132,6 +1133,18 @@ "UMask": "0x8", "Unit": "IIO" }, + { + "BriefDescription": "Occupancy of outbound request queue : To device : Counts number of outbound requests/completions IIO is currently processing [This event is alias to UNC_IIO_NUM_OUSTANDING_REQ_FROM_CPU.TO_IO]", + "Counter": "2,3", + "EventCode": "0xc5", + "EventName": "UNC_IIO_NUM_OUTSTANDING_REQ_FROM_CPU.TO_IO", + "Experimental": "1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x0FF", + "UMask": "0x8", + "Unit": "IIO" + }, { "BriefDescription": "Passing data to be written", "Counter": "0,1,2,3", @@ -1300,6 +1313,18 @@ "UMask": "0x4", "Unit": "IIO" }, + { + "BriefDescription": "Posted requests sent by the integrated IO (IIO) controller to the Ubox, useful for counting message signaled interrupts (MSI).", + "Counter": "0,1,2,3", + "EventCode": "0x8e", + "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX_POSTED", + "FCMask": "0x01", + "PerPkg": "1", + "PortMask": "0x0FF", + "PublicDescription": "-", + "UMask": "0x4", + "Unit": "IIO" + }, { "BriefDescription": "All 9 bits of Page Walk Tracker Occupancy", "Counter": "0,1,2,3", From 80c6b82226c1d34e6bcad8e88c4cf319b43d5d3a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 30 Jun 2025 09:30:59 -0700 Subject: [PATCH 0566/2411] perf vendor events: Update SkylakeX events Update events from v1.36 to v1.37. Bring in the event updates v1.37: https://github.com/intel/perfmon/commit/6ee8e4cadda8b6954bd84236e20fab95e345578f Signed-off-by: Ian Rogers Tested-by: Thomas Falcon Link: https://lore.kernel.org/r/20250630163101.1920170-15-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- tools/perf/pmu-events/arch/x86/skylakex/pipeline.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 54c2cfb0af9c..2d9699efff58 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -33,7 +33,7 @@ GenuineIntel-6-8F,v1.28,sapphirerapids,core GenuineIntel-6-AF,v1.11,sierraforest,core GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v59,skylake,core -GenuineIntel-6-55-[01234],v1.36,skylakex,core +GenuineIntel-6-55-[01234],v1.37,skylakex,core GenuineIntel-6-86,v1.23,snowridgex,core GenuineIntel-6-8[CD],v1.17,tigerlake,core GenuineIntel-6-2C,v5,westmereep-dp,core diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json index 3dd296ab4d78..9a1349527b66 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json @@ -542,7 +542,7 @@ "Counter": "0,1,2,3", "EventCode": "0x4C", "EventName": "LOAD_HIT_PRE.SW_PF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", + "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", "SampleAfterValue": "100003", "UMask": "0x1" }, From 585189332afe02c99e66c6a0d328fe05e456ff6a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 30 Jun 2025 09:31:00 -0700 Subject: [PATCH 0567/2411] perf vendor events: Update TigerLake events Update events from v1.17 to v1.18. Bring in the event updates v1.18: https://github.com/intel/perfmon/commit/943fea37d0d54232605f12abf72a812ac314cd1d Signed-off-by: Ian Rogers Tested-by: Thomas Falcon Link: https://lore.kernel.org/r/20250630163101.1920170-16-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 2d9699efff58..354ce241500b 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -35,7 +35,7 @@ GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v59,skylake,core GenuineIntel-6-55-[01234],v1.37,skylakex,core GenuineIntel-6-86,v1.23,snowridgex,core -GenuineIntel-6-8[CD],v1.17,tigerlake,core +GenuineIntel-6-8[CD],v1.18,tigerlake,core GenuineIntel-6-2C,v5,westmereep-dp,core GenuineIntel-6-25,v4,westmereep-sp,core GenuineIntel-6-2F,v4,westmereex,core diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json b/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json index 7ef1bac08463..b417e1db9e07 100644 --- a/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json @@ -497,7 +497,7 @@ "Counter": "0,1,2,3", "EventCode": "0x4c", "EventName": "LOAD_HIT_PREFETCH.SWPF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", + "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", "SampleAfterValue": "100003", "UMask": "0x1" }, From f6f9760320a93930e70ad6016afbabc475bcdd09 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 8 Jul 2025 16:18:05 +0100 Subject: [PATCH 0568/2411] char: ipmi: remove redundant variable 'type' and check The variable 'type' is assigned the value SI_INVALID which is zero and later checks of 'type' is non-zero (which is always false). The variable is not referenced anywhere else, so it is redundant and so is the check, so remove these. Signed-off-by: Colin Ian King Message-ID: <20250708151805.1893858-1-colin.i.king@gmail.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_intf.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 7fe891783a37..064cc463313d 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2107,7 +2107,6 @@ static bool __init ipmi_smi_info_same(struct smi_info *e1, struct smi_info *e2) static int __init init_ipmi_si(void) { struct smi_info *e, *e2; - enum ipmi_addr_src type = SI_INVALID; if (initialized) return 0; @@ -2189,9 +2188,6 @@ static int __init init_ipmi_si(void) initialized = true; mutex_unlock(&smi_infos_lock); - if (type) - return 0; - mutex_lock(&smi_infos_lock); if (unload_when_empty && list_empty(&smi_infos)) { mutex_unlock(&smi_infos_lock); From 88c79ecfb68fac057a0201db883518b662a0417d Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Wed, 4 Jun 2025 20:06:50 -0400 Subject: [PATCH 0569/2411] tracing: Replace opencoded cpumask_next_wrap() in move_to_next_cpu() The dedicated cpumask_next_wrap() is more verbose and effective than cpumask_next() followed by cpumask_first(). Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250605000651.45281-1-yury.norov@gmail.com Signed-off-by: Yury Norov Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_hwlat.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index b65353ec2837..2f7b94e98317 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -325,12 +325,9 @@ static void move_to_next_cpu(void) cpus_read_lock(); cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask); - next_cpu = cpumask_next(raw_smp_processor_id(), current_mask); + next_cpu = cpumask_next_wrap(raw_smp_processor_id(), current_mask); cpus_read_unlock(); - if (next_cpu >= nr_cpu_ids) - next_cpu = cpumask_first(current_mask); - if (next_cpu >= nr_cpu_ids) /* Shouldn't happen! */ goto change_mode; From 3aceaa539cfe3a2e62bd92e6697d9fae1c20c0be Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 9 Jun 2025 13:17:32 -0400 Subject: [PATCH 0570/2411] tracing: Use queue_rcu_work() to free filters Freeing of filters requires to wait for both an RCU grace period as well as a RCU task trace wait period after they have been detached from their lists. The trace task period can be quite large so the freeing of the filters was moved to use the call_rcu*() routines. The problem with that is that the callback functions of call_rcu*() is done from a soft irq and can cause latencies if the callback takes a bit of time. The filters are freed per event in a system and the syscalls system contains an event per system call, which can be over 700 events. Freeing 700 filters in a bottom half is undesirable. Instead, move the freeing to use queue_rcu_work() which is done in task context. Link: https://lore.kernel.org/all/9a2f0cd0-1561-4206-8966-f93ccd25927f@paulmck-laptop/ Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250609131732.04fd303b@gandalf.local.home Fixes: a9d0aab5eb33 ("tracing: Fix regression of filter waiting a long time on RCU synchronization") Suggested-by: "Paul E. McKenney" Reviewed-by: Paul E. McKenney Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_filter.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 3885aadc434d..196c8bf34970 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1344,13 +1344,14 @@ struct filter_list { struct filter_head { struct list_head list; - struct rcu_head rcu; + union { + struct rcu_head rcu; + struct rcu_work rwork; + }; }; - -static void free_filter_list(struct rcu_head *rhp) +static void free_filter_list(struct filter_head *filter_list) { - struct filter_head *filter_list = container_of(rhp, struct filter_head, rcu); struct filter_list *filter_item, *tmp; list_for_each_entry_safe(filter_item, tmp, &filter_list->list, list) { @@ -1361,9 +1362,20 @@ static void free_filter_list(struct rcu_head *rhp) kfree(filter_list); } +static void free_filter_list_work(struct work_struct *work) +{ + struct filter_head *filter_list; + + filter_list = container_of(to_rcu_work(work), struct filter_head, rwork); + free_filter_list(filter_list); +} + static void free_filter_list_tasks(struct rcu_head *rhp) { - call_rcu(rhp, free_filter_list); + struct filter_head *filter_list = container_of(rhp, struct filter_head, rcu); + + INIT_RCU_WORK(&filter_list->rwork, free_filter_list_work); + queue_rcu_work(system_wq, &filter_list->rwork); } /* @@ -1460,7 +1472,7 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir, tracepoint_synchronize_unregister(); if (head) - free_filter_list(&head->rcu); + free_filter_list(head); list_for_each_entry(file, &tr->events, list) { if (file->system != dir || !file->filter) @@ -2305,7 +2317,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir, return 0; fail: /* No call succeeded */ - free_filter_list(&filter_list->rcu); + free_filter_list(filter_list); parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0); return -EINVAL; fail_mem: @@ -2315,7 +2327,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir, if (!fail) delay_free_filter(filter_list); else - free_filter_list(&filter_list->rcu); + free_filter_list(filter_list); return -ENOMEM; } From adc353c0bfb243ebfd29b6222fa3bf149169a6de Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 20 Jun 2025 13:12:12 +0200 Subject: [PATCH 0571/2411] kernel: trace: preemptirq_delay_test: use offstack cpu mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A CPU mask on the stack is broken for large values of CONFIG_NR_CPUS: kernel/trace/preemptirq_delay_test.c: In function ‘preemptirq_delay_run’: kernel/trace/preemptirq_delay_test.c:143:1: error: the frame size of 8512 bytes is larger than 1536 bytes [-Werror=frame-larger-than=] Fall back to dynamic allocation here. Cc: Masami Hiramatsu Cc: Song Chen Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250620111215.3365305-1-arnd@kernel.org Fixes: 4b9091e1c194 ("kernel: trace: preemptirq_delay_test: add cpu affinity") Signed-off-by: Arnd Bergmann Signed-off-by: Steven Rostedt (Google) --- kernel/trace/preemptirq_delay_test.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index 314ffc143039..acb0c971a408 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -117,12 +117,15 @@ static int preemptirq_delay_run(void *data) { int i; int s = MIN(burst_size, NR_TEST_FUNCS); - struct cpumask cpu_mask; + cpumask_var_t cpu_mask; + + if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL)) + return -ENOMEM; if (cpu_affinity > -1) { - cpumask_clear(&cpu_mask); - cpumask_set_cpu(cpu_affinity, &cpu_mask); - if (set_cpus_allowed_ptr(current, &cpu_mask)) + cpumask_clear(cpu_mask); + cpumask_set_cpu(cpu_affinity, cpu_mask); + if (set_cpus_allowed_ptr(current, cpu_mask)) pr_err("cpu_affinity:%d, failed\n", cpu_affinity); } @@ -139,6 +142,8 @@ static int preemptirq_delay_run(void *data) __set_current_state(TASK_RUNNING); + free_cpumask_var(cpu_mask); + return 0; } From f393a761763c542761abcf978252d431269366d6 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Tue, 8 Jul 2025 14:56:23 +0200 Subject: [PATCH 0572/2411] efi: add ovmf debug log driver Recent OVMF versions (edk2-stable202508 + newer) can write their debug log to a memory buffer. This driver exposes the log content via sysfs (/sys/firmware/efi/ovmf_debug_log). Signed-off-by: Gerd Hoffmann Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/Kconfig | 8 ++ drivers/firmware/efi/Makefile | 1 + drivers/firmware/efi/efi.c | 8 ++ drivers/firmware/efi/ovmf-debug-log.c | 111 ++++++++++++++++++++++++++ include/linux/efi.h | 4 + 5 files changed, 132 insertions(+) create mode 100644 drivers/firmware/efi/ovmf-debug-log.c diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index db8c5c03d3a2..eb1bff6968a5 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -263,6 +263,14 @@ config EFI_COCO_SECRET virt/coco/efi_secret module to access the secrets, which in turn allows userspace programs to access the injected secrets. +config OVMF_DEBUG_LOG + bool "Expose OVMF firmware debug log via sysfs" + depends on EFI + help + Recent OVMF versions (edk2-stable202508 + newer) can write + their debug log to a memory buffer. This driver exposes the + log content via sysfs (/sys/firmware/efi/ovmf_debug_log). + config UNACCEPTED_MEMORY bool depends on EFI_STUB diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index a2d0009560d0..8efbcf699e4f 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_APPLE_PROPERTIES) += apple-properties.o obj-$(CONFIG_EFI_RCI2_TABLE) += rci2-table.o obj-$(CONFIG_EFI_EMBEDDED_FIRMWARE) += embedded-firmware.o obj-$(CONFIG_LOAD_UEFI_KEYS) += mokvar-table.o +obj-$(CONFIG_OVMF_DEBUG_LOG) += ovmf-debug-log.o obj-$(CONFIG_SYSFB) += sysfb_efi.o diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index e57bff702b5f..1ce428e2ac8a 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -45,6 +45,7 @@ struct efi __read_mostly efi = { .esrt = EFI_INVALID_TABLE_ADDR, .tpm_log = EFI_INVALID_TABLE_ADDR, .tpm_final_log = EFI_INVALID_TABLE_ADDR, + .ovmf_debug_log = EFI_INVALID_TABLE_ADDR, #ifdef CONFIG_LOAD_UEFI_KEYS .mokvar_table = EFI_INVALID_TABLE_ADDR, #endif @@ -473,6 +474,10 @@ static int __init efisubsys_init(void) platform_device_register_simple("efi_secret", 0, NULL, 0); #endif + if (IS_ENABLED(CONFIG_OVMF_DEBUG_LOG) && + efi.ovmf_debug_log != EFI_INVALID_TABLE_ADDR) + ovmf_log_probe(efi.ovmf_debug_log); + return 0; err_remove_group: @@ -617,6 +622,9 @@ static const efi_config_table_type_t common_tables[] __initconst = { {LINUX_EFI_MEMRESERVE_TABLE_GUID, &mem_reserve, "MEMRESERVE" }, {LINUX_EFI_INITRD_MEDIA_GUID, &initrd, "INITRD" }, {EFI_RT_PROPERTIES_TABLE_GUID, &rt_prop, "RTPROP" }, +#ifdef CONFIG_OVMF_DEBUG_LOG + {OVMF_MEMORY_LOG_TABLE_GUID, &efi.ovmf_debug_log, "OvmfDebugLog" }, +#endif #ifdef CONFIG_EFI_RCI2_TABLE {DELLEMC_EFI_RCI2_TABLE_GUID, &rci2_table_phys }, #endif diff --git a/drivers/firmware/efi/ovmf-debug-log.c b/drivers/firmware/efi/ovmf-debug-log.c new file mode 100644 index 000000000000..5b2471ffaeed --- /dev/null +++ b/drivers/firmware/efi/ovmf-debug-log.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include + +#define OVMF_DEBUG_LOG_MAGIC1 0x3167646d666d766f // "ovmfmdg1" +#define OVMF_DEBUG_LOG_MAGIC2 0x3267646d666d766f // "ovmfmdg2" + +struct ovmf_debug_log_header { + u64 magic1; + u64 magic2; + u64 hdr_size; + u64 log_size; + u64 lock; // edk2 spinlock + u64 head_off; + u64 tail_off; + u64 truncated; + u8 fw_version[128]; +}; + +static struct ovmf_debug_log_header *hdr; +static u8 *logbuf; +static u64 logbufsize; + +static ssize_t ovmf_log_read(struct file *filp, struct kobject *kobj, + const struct bin_attribute *attr, char *buf, + loff_t offset, size_t count) +{ + u64 start, end; + + start = hdr->head_off + offset; + if (hdr->head_off > hdr->tail_off && start >= hdr->log_size) + start -= hdr->log_size; + + end = start + count; + if (start > hdr->tail_off) { + if (end > hdr->log_size) + end = hdr->log_size; + } else { + if (end > hdr->tail_off) + end = hdr->tail_off; + } + + if (start > logbufsize || end > logbufsize) + return 0; + if (start >= end) + return 0; + + memcpy(buf, logbuf + start, end - start); + return end - start; +} + +static struct bin_attribute ovmf_log_bin_attr = { + .attr = { + .name = "ovmf_debug_log", + .mode = 0444, + }, + .read = ovmf_log_read, +}; + +int __init ovmf_log_probe(unsigned long ovmf_debug_log_table) +{ + int ret = -EINVAL; + u64 size; + + /* map + verify header */ + hdr = memremap(ovmf_debug_log_table, sizeof(*hdr), MEMREMAP_WB); + if (!hdr) { + pr_err("OVMF debug log: header map failed\n"); + return -EINVAL; + } + + if (hdr->magic1 != OVMF_DEBUG_LOG_MAGIC1 || + hdr->magic2 != OVMF_DEBUG_LOG_MAGIC2) { + printk(KERN_ERR "OVMF debug log: magic mismatch\n"); + goto err_unmap; + } + + size = hdr->hdr_size + hdr->log_size; + pr_info("OVMF debug log: firmware version: \"%s\"\n", hdr->fw_version); + pr_info("OVMF debug log: buffer size: %lluk\n", size / 1024); + + /* map complete log buffer */ + memunmap(hdr); + hdr = memremap(ovmf_debug_log_table, size, MEMREMAP_WB); + if (!hdr) { + pr_err("OVMF debug log: buffer map failed\n"); + return -EINVAL; + } + logbuf = (void *)hdr + hdr->hdr_size; + logbufsize = hdr->log_size; + + ovmf_log_bin_attr.size = size; + ret = sysfs_create_bin_file(efi_kobj, &ovmf_log_bin_attr); + if (ret != 0) { + pr_err("OVMF debug log: sysfs register failed\n"); + goto err_unmap; + } + + return 0; + +err_unmap: + memunmap(hdr); + return ret; +} diff --git a/include/linux/efi.h b/include/linux/efi.h index 7d63d1d75f22..50db7df0efab 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -439,6 +439,7 @@ void efi_native_runtime_setup(void); /* OVMF protocol GUIDs */ #define OVMF_SEV_MEMORY_ACCEPTANCE_PROTOCOL_GUID EFI_GUID(0xc5a010fe, 0x38a7, 0x4531, 0x8a, 0x4a, 0x05, 0x00, 0xd2, 0xfd, 0x16, 0x49) +#define OVMF_MEMORY_LOG_TABLE_GUID EFI_GUID(0x95305139, 0xb20f, 0x4723, 0x84, 0x25, 0x62, 0x7c, 0x88, 0x8f, 0xf1, 0x21) typedef struct { efi_guid_t guid; @@ -642,6 +643,7 @@ extern struct efi { unsigned long esrt; /* ESRT table */ unsigned long tpm_log; /* TPM2 Event Log table */ unsigned long tpm_final_log; /* TPM2 Final Events Log table */ + unsigned long ovmf_debug_log; unsigned long mokvar_table; /* MOK variable config table */ unsigned long coco_secret; /* Confidential computing secret table */ unsigned long unaccepted; /* Unaccepted memory table */ @@ -1344,6 +1346,8 @@ bool efi_config_table_is_usable(const efi_guid_t *guid, unsigned long table) umode_t efi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n); +int ovmf_log_probe(unsigned long ovmf_debug_log_table); + /* * efivar ops event type */ From e7cd58d2fdf8b3d2cb8c1d7a6d8eac2c67e5e18b Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 3 Jun 2025 19:03:40 +0200 Subject: [PATCH 0573/2411] PCI: endpoint: pci-epf-vntb: Allow BAR assignment via configfs The current BAR configuration for the PCI vNTB endpoint function allocates BARs in order, which lacks flexibility and does not account for platform-specific quirks. This is problematic on Renesas platforms, where BAR_4 is a fixed 256B region that ends up being used for MW1, despite being better suited for doorbells. Add new configfs attributes to allow users to specify arbitrary BAR assignments. If no configuration is provided, the driver retains its original behavior of sequential BAR allocation, preserving compatibility with existing userspace setups. This enables use cases such as assigning BAR_2 for MW1 and using the limited BAR_4 for doorbells on Renesas platforms. Signed-off-by: Jerome Brunet [mani: adjusted the indent of EPF_NTB_BAR_W, fixed kdoc & squashed bar fix] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Frank Li Link: https://patch.msgid.link/20250603-pci-vntb-bar-mapping-v2-3-fc685a22ad28@baylibre.com --- drivers/pci/endpoint/functions/pci-epf-vntb.c | 132 +++++++++++++++++- 1 file changed, 125 insertions(+), 7 deletions(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c index 1db87d16da8d..ac83a6dc6116 100644 --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c @@ -73,6 +73,8 @@ enum epf_ntb_bar { BAR_MW1, BAR_MW2, BAR_MW3, + BAR_MW4, + VNTB_BAR_NUM, }; /* @@ -132,7 +134,7 @@ struct epf_ntb { bool linkup; u32 spad_size; - enum pci_barno epf_ntb_bar[6]; + enum pci_barno epf_ntb_bar[VNTB_BAR_NUM]; struct epf_ntb_ctrl *reg; @@ -654,6 +656,63 @@ static void epf_ntb_epc_destroy(struct epf_ntb *ntb) pci_epc_put(ntb->epf->epc); } + +/** + * epf_ntb_is_bar_used() - Check if a bar is used in the ntb configuration + * @ntb: NTB device that facilitates communication between HOST and VHOST + * @barno: Checked bar number + * + * Returns: true if used, false if free. + */ +static bool epf_ntb_is_bar_used(struct epf_ntb *ntb, + enum pci_barno barno) +{ + int i; + + for (i = 0; i < VNTB_BAR_NUM; i++) { + if (ntb->epf_ntb_bar[i] == barno) + return true; + } + + return false; +} + +/** + * epf_ntb_find_bar() - Assign BAR number when no configuration is provided + * @ntb: NTB device that facilitates communication between HOST and VHOST + * @epc_features: The features provided by the EPC specific to this EPF + * @bar: NTB BAR index + * @barno: Bar start index + * + * When the BAR configuration was not provided through the userspace + * configuration, automatically assign BAR as it has been historically + * done by this endpoint function. + * + * Returns: the BAR number found, if any. -1 otherwise + */ +static int epf_ntb_find_bar(struct epf_ntb *ntb, + const struct pci_epc_features *epc_features, + enum epf_ntb_bar bar, + enum pci_barno barno) +{ + while (ntb->epf_ntb_bar[bar] < 0) { + barno = pci_epc_get_next_free_bar(epc_features, barno); + if (barno < 0) + break; /* No more BAR available */ + + /* + * Verify if the BAR found is not already assigned + * through the provided configuration + */ + if (!epf_ntb_is_bar_used(ntb, barno)) + ntb->epf_ntb_bar[bar] = barno; + + barno += 1; + } + + return barno; +} + /** * epf_ntb_init_epc_bar() - Identify BARs to be used for each of the NTB * constructs (scratchpad region, doorbell, memorywindow) @@ -676,23 +735,21 @@ static int epf_ntb_init_epc_bar(struct epf_ntb *ntb) epc_features = pci_epc_get_features(ntb->epf->epc, ntb->epf->func_no, ntb->epf->vfunc_no); /* These are required BARs which are mandatory for NTB functionality */ - for (bar = BAR_CONFIG; bar <= BAR_MW1; bar++, barno++) { - barno = pci_epc_get_next_free_bar(epc_features, barno); + for (bar = BAR_CONFIG; bar <= BAR_MW1; bar++) { + barno = epf_ntb_find_bar(ntb, epc_features, bar, barno); if (barno < 0) { dev_err(dev, "Fail to get NTB function BAR\n"); return -ENOENT; } - ntb->epf_ntb_bar[bar] = barno; } /* These are optional BARs which don't impact NTB functionality */ - for (bar = BAR_MW1, i = 1; i < num_mws; bar++, barno++, i++) { - barno = pci_epc_get_next_free_bar(epc_features, barno); + for (bar = BAR_MW1, i = 1; i < num_mws; bar++, i++) { + barno = epf_ntb_find_bar(ntb, epc_features, bar, barno); if (barno < 0) { ntb->num_mws = i; dev_dbg(dev, "BAR not available for > MW%d\n", i + 1); } - ntb->epf_ntb_bar[bar] = barno; } return 0; @@ -860,6 +917,37 @@ static ssize_t epf_ntb_##_name##_store(struct config_item *item, \ return len; \ } +#define EPF_NTB_BAR_R(_name, _id) \ + static ssize_t epf_ntb_##_name##_show(struct config_item *item, \ + char *page) \ + { \ + struct config_group *group = to_config_group(item); \ + struct epf_ntb *ntb = to_epf_ntb(group); \ + \ + return sprintf(page, "%d\n", ntb->epf_ntb_bar[_id]); \ + } + +#define EPF_NTB_BAR_W(_name, _id) \ + static ssize_t epf_ntb_##_name##_store(struct config_item *item, \ + const char *page, size_t len) \ + { \ + struct config_group *group = to_config_group(item); \ + struct epf_ntb *ntb = to_epf_ntb(group); \ + int val; \ + int ret; \ + \ + ret = kstrtoint(page, 0, &val); \ + if (ret) \ + return ret; \ + \ + if (val < NO_BAR || val > BAR_5) \ + return -EINVAL; \ + \ + ntb->epf_ntb_bar[_id] = val; \ + \ + return len; \ + } + static ssize_t epf_ntb_num_mws_store(struct config_item *item, const char *page, size_t len) { @@ -899,6 +987,18 @@ EPF_NTB_MW_R(mw3) EPF_NTB_MW_W(mw3) EPF_NTB_MW_R(mw4) EPF_NTB_MW_W(mw4) +EPF_NTB_BAR_R(ctrl_bar, BAR_CONFIG) +EPF_NTB_BAR_W(ctrl_bar, BAR_CONFIG) +EPF_NTB_BAR_R(db_bar, BAR_DB) +EPF_NTB_BAR_W(db_bar, BAR_DB) +EPF_NTB_BAR_R(mw1_bar, BAR_MW1) +EPF_NTB_BAR_W(mw1_bar, BAR_MW1) +EPF_NTB_BAR_R(mw2_bar, BAR_MW2) +EPF_NTB_BAR_W(mw2_bar, BAR_MW2) +EPF_NTB_BAR_R(mw3_bar, BAR_MW3) +EPF_NTB_BAR_W(mw3_bar, BAR_MW3) +EPF_NTB_BAR_R(mw4_bar, BAR_MW4) +EPF_NTB_BAR_W(mw4_bar, BAR_MW4) CONFIGFS_ATTR(epf_ntb_, spad_count); CONFIGFS_ATTR(epf_ntb_, db_count); @@ -910,6 +1010,12 @@ CONFIGFS_ATTR(epf_ntb_, mw4); CONFIGFS_ATTR(epf_ntb_, vbus_number); CONFIGFS_ATTR(epf_ntb_, vntb_pid); CONFIGFS_ATTR(epf_ntb_, vntb_vid); +CONFIGFS_ATTR(epf_ntb_, ctrl_bar); +CONFIGFS_ATTR(epf_ntb_, db_bar); +CONFIGFS_ATTR(epf_ntb_, mw1_bar); +CONFIGFS_ATTR(epf_ntb_, mw2_bar); +CONFIGFS_ATTR(epf_ntb_, mw3_bar); +CONFIGFS_ATTR(epf_ntb_, mw4_bar); static struct configfs_attribute *epf_ntb_attrs[] = { &epf_ntb_attr_spad_count, @@ -922,6 +1028,12 @@ static struct configfs_attribute *epf_ntb_attrs[] = { &epf_ntb_attr_vbus_number, &epf_ntb_attr_vntb_pid, &epf_ntb_attr_vntb_vid, + &epf_ntb_attr_ctrl_bar, + &epf_ntb_attr_db_bar, + &epf_ntb_attr_mw1_bar, + &epf_ntb_attr_mw2_bar, + &epf_ntb_attr_mw3_bar, + &epf_ntb_attr_mw4_bar, NULL, }; @@ -1379,6 +1491,7 @@ static int epf_ntb_probe(struct pci_epf *epf, { struct epf_ntb *ntb; struct device *dev; + int i; dev = &epf->dev; @@ -1389,6 +1502,11 @@ static int epf_ntb_probe(struct pci_epf *epf, epf->header = &epf_ntb_header; ntb->epf = epf; ntb->vbus_number = 0xff; + + /* Initially, no bar is assigned */ + for (i = 0; i < VNTB_BAR_NUM; i++) + ntb->epf_ntb_bar[i] = NO_BAR; + epf_set_drvdata(epf, ntb); dev_info(dev, "pci-ep epf driver loaded\n"); From d49ac7744f578bcc8708a845cce24d3b91f86260 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 8 Jul 2025 19:37:47 +0100 Subject: [PATCH 0574/2411] MAINTAINERS: add mm folks as reviewers to rust alloc The alloc implementation is a thin wrapper over slab/vmalloc, so to help out on the mm side of things and to be cc'd on changes, add some mm people as reviewers. Signed-off-by: Lorenzo Stoakes Acked-by: Uladzislau Rezki (Sony) Acked-by: Vlastimil Babka Acked-by: Liam R. Howlett Link: https://lore.kernel.org/r/20250708183747.104286-1-lorenzo.stoakes@oracle.com Signed-off-by: Danilo Krummrich --- MAINTAINERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..f53839653660 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21708,6 +21708,10 @@ K: \b(?i:rust)\b RUST [ALLOC] M: Danilo Krummrich +R: Lorenzo Stoakes +R: Vlastimil Babka +R: Liam R. Howlett +R: Uladzislau Rezki L: rust-for-linux@vger.kernel.org S: Maintained T: git https://github.com/Rust-for-Linux/linux.git alloc-next From a12a23720c135a299ed914adf623387c7404e014 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 9 Jul 2025 09:24:52 +0200 Subject: [PATCH 0575/2411] perf list: Remove trailing A in PAI crypto event 4210 According to the z16 and z17 Principle of Operation documents SA22-7832-13 and SA22-7832-14 the event 4210 is named PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256 without a trailing 'A'. Adjust the json definition files for this event and remove the trailing 'A' character. PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256A Also remove a black ' ' between the dash '-' and the number: xxx-AES- 192 ----> xxx-AES-192 Suggested-by: Ingo Franzki Signed-off-by: Thomas Richter Reviewed-by: Ian Rogers Acked-by: Sumanth Korikkar Link: https://lore.kernel.org/r/20250709072452.1595257-1-tmricht@linux.ibm.com Signed-off-by: Namhyung Kim --- .../pmu-events/arch/s390/cf_z16/pai_crypto.json | 14 +++++++------- .../pmu-events/arch/s390/cf_z17/pai_crypto.json | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/perf/pmu-events/arch/s390/cf_z16/pai_crypto.json b/tools/perf/pmu-events/arch/s390/cf_z16/pai_crypto.json index cf8563d059b9..a82674f62409 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z16/pai_crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z16/pai_crypto.json @@ -753,14 +753,14 @@ "EventCode": "4203", "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_128", "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED TDEA 128", - "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-TDEA- 128 function ending with CC=0" + "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-TDEA-128 function ending with CC=0" }, { "Unit": "PAI-CRYPTO", "EventCode": "4204", "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_192", "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED TDEA 192", - "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-TDEA- 192 function ending with CC=0" + "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-TDEA-192 function ending with CC=0" }, { "Unit": "PAI-CRYPTO", @@ -788,21 +788,21 @@ "EventCode": "4208", "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128", "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED AES 128", - "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-AES- 128 function ending with CC=0" + "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-AES-128 function ending with CC=0" }, { "Unit": "PAI-CRYPTO", "EventCode": "4209", "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192", "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED AES 192", - "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-AES- 192 function ending with CC=0" + "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-AES-192 function ending with CC=0" }, { "Unit": "PAI-CRYPTO", "EventCode": "4210", - "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256A", - "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED AES 256A", - "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-AES- 256A function ending with CC=0" + "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256", + "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED AES 256", + "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-AES-256 function ending with CC=0" }, { "Unit": "PAI-CRYPTO", diff --git a/tools/perf/pmu-events/arch/s390/cf_z17/pai_crypto.json b/tools/perf/pmu-events/arch/s390/cf_z17/pai_crypto.json index a7176c988b8a..fd2eb536ecc7 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z17/pai_crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z17/pai_crypto.json @@ -800,9 +800,9 @@ { "Unit": "PAI-CRYPTO", "EventCode": "4210", - "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256A", - "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED AES 256A", - "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-AES-256A function ending with CC=0" + "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256", + "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED AES 256", + "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-AES-256 function ending with CC=0" }, { "Unit": "PAI-CRYPTO", From e9705c61b1dbe7bac9dc189de434994d8a76b191 Mon Sep 17 00:00:00 2001 From: Jiazi Li Date: Thu, 3 Jul 2025 14:13:04 +0800 Subject: [PATCH 0576/2411] f2fs: use kfree() instead of kvfree() to free some memory options in f2fs_fill_super is alloc by kstrdup: options = kstrdup((const char *)data, GFP_KERNEL) sit_bitmap[_mir], nat_bitmap[_mir] are alloc by kmemdup: sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL); sit_i->sit_bitmap_mir = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL); nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size, GFP_KERNEL); nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size, GFP_KERNEL); write_io is alloc by f2fs_kmalloc: sbi->write_io[i] = f2fs_kmalloc(sbi, array_size(n, sizeof(struct f2fs_bio_info)) Use kfree is more efficient. Signed-off-by: Jiazi Li Signed-off-by: peixuan.qiu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++-- fs/f2fs/segment.c | 4 ++-- fs/f2fs/super.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 2fd287f2bca4..be3d38d1fdee 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -3408,10 +3408,10 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) } kvfree(nm_i->free_nid_count); - kvfree(nm_i->nat_bitmap); + kfree(nm_i->nat_bitmap); kvfree(nm_i->nat_bits); #ifdef CONFIG_F2FS_CHECK_FS - kvfree(nm_i->nat_bitmap_mir); + kfree(nm_i->nat_bitmap_mir); #endif sbi->nm_info = NULL; kfree(nm_i); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b89bdb867508..a9c25b498f9c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -5813,9 +5813,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) kvfree(sit_i->dirty_sentries_bitmap); SM_I(sbi)->sit_info = NULL; - kvfree(sit_i->sit_bitmap); + kfree(sit_i->sit_bitmap); #ifdef CONFIG_F2FS_CHECK_FS - kvfree(sit_i->sit_bitmap_mir); + kfree(sit_i->sit_bitmap_mir); kvfree(sit_i->invalid_segmap); #endif kfree(sit_i); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5a1b2b6e78f3..73492270ea93 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1718,7 +1718,7 @@ static void f2fs_put_super(struct super_block *sb) destroy_percpu_info(sbi); f2fs_destroy_iostat(sbi); for (i = 0; i < NR_PAGE_TYPE; i++) - kvfree(sbi->write_io[i]); + kfree(sbi->write_io[i]); #if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif @@ -4935,7 +4935,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto sync_free_meta; } - kvfree(options); + kfree(options); /* recover broken superblock */ if (recovery) { @@ -5018,7 +5018,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) f2fs_destroy_iostat(sbi); free_bio_info: for (i = 0; i < NR_PAGE_TYPE; i++) - kvfree(sbi->write_io[i]); + kfree(sbi->write_io[i]); #if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); @@ -5030,7 +5030,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy); - kvfree(options); + kfree(options); free_sb_buf: kfree(raw_super); free_sbi: From 81b6ecca2f15922e8d653dc037df5871e754be6e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 2 Jul 2025 14:49:25 +0800 Subject: [PATCH 0577/2411] f2fs: doc: fix wrong quota mount option description We should use "{usr,grp,prj}jquota=" to disable journaled quota, rather than using off{usr,grp,prj}jquota. Fixes: 4b2414d04e99 ("f2fs: support journalled quota") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 440e4ae74e44..03b1efa6d3b2 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -238,9 +238,9 @@ usrjquota= Appoint specified file and type during mount, so that quota grpjquota= information can be properly updated during recovery flow, prjjquota= : must be in root directory; jqfmt= : [vfsold,vfsv0,vfsv1]. -offusrjquota Turn off user journalled quota. -offgrpjquota Turn off group journalled quota. -offprjjquota Turn off project journalled quota. +usrjquota= Turn off user journalled quota. +grpjquota= Turn off group journalled quota. +prjjquota= Turn off project journalled quota. quota Enable plain user disk quota accounting. noquota Disable all plain disk quota option. alloc_mode=%s Adjust block allocation policy, which supports "reuse" From 7c30d79930132466f5be7d0b57add14d1a016bda Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 8 Jul 2025 17:53:39 +0800 Subject: [PATCH 0578/2411] f2fs: fix to avoid UAF in f2fs_sync_inode_meta() syzbot reported an UAF issue as below: [1] [2] [1] https://syzkaller.appspot.com/text?tag=CrashReport&x=16594c60580000 ================================================================== BUG: KASAN: use-after-free in __list_del_entry_valid+0xa6/0x130 lib/list_debug.c:62 Read of size 8 at addr ffff888100567dc8 by task kworker/u4:0/8 CPU: 1 PID: 8 Comm: kworker/u4:0 Tainted: G W 6.1.129-syzkaller-00017-g642656a36791 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Workqueue: writeback wb_workfn (flush-7:0) Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x151/0x1b7 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:316 [inline] print_report+0x158/0x4e0 mm/kasan/report.c:427 kasan_report+0x13c/0x170 mm/kasan/report.c:531 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report_generic.c:351 __list_del_entry_valid+0xa6/0x130 lib/list_debug.c:62 __list_del_entry include/linux/list.h:134 [inline] list_del_init include/linux/list.h:206 [inline] f2fs_inode_synced+0x100/0x2e0 fs/f2fs/super.c:1553 f2fs_update_inode+0x72/0x1c40 fs/f2fs/inode.c:588 f2fs_update_inode_page+0x135/0x170 fs/f2fs/inode.c:706 f2fs_write_inode+0x416/0x790 fs/f2fs/inode.c:734 write_inode fs/fs-writeback.c:1460 [inline] __writeback_single_inode+0x4cf/0xb80 fs/fs-writeback.c:1677 writeback_sb_inodes+0xb32/0x1910 fs/fs-writeback.c:1903 __writeback_inodes_wb+0x118/0x3f0 fs/fs-writeback.c:1974 wb_writeback+0x3da/0xa00 fs/fs-writeback.c:2081 wb_check_background_flush fs/fs-writeback.c:2151 [inline] wb_do_writeback fs/fs-writeback.c:2239 [inline] wb_workfn+0xbba/0x1030 fs/fs-writeback.c:2266 process_one_work+0x73d/0xcb0 kernel/workqueue.c:2299 worker_thread+0xa60/0x1260 kernel/workqueue.c:2446 kthread+0x26d/0x300 kernel/kthread.c:386 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 Allocated by task 298: kasan_save_stack mm/kasan/common.c:45 [inline] kasan_set_track+0x4b/0x70 mm/kasan/common.c:52 kasan_save_alloc_info+0x1f/0x30 mm/kasan/generic.c:505 __kasan_slab_alloc+0x6c/0x80 mm/kasan/common.c:333 kasan_slab_alloc include/linux/kasan.h:202 [inline] slab_post_alloc_hook+0x53/0x2c0 mm/slab.h:768 slab_alloc_node mm/slub.c:3421 [inline] slab_alloc mm/slub.c:3431 [inline] __kmem_cache_alloc_lru mm/slub.c:3438 [inline] kmem_cache_alloc_lru+0x102/0x270 mm/slub.c:3454 alloc_inode_sb include/linux/fs.h:3255 [inline] f2fs_alloc_inode+0x2d/0x350 fs/f2fs/super.c:1437 alloc_inode fs/inode.c:261 [inline] iget_locked+0x18c/0x7e0 fs/inode.c:1373 f2fs_iget+0x55/0x4ca0 fs/f2fs/inode.c:486 f2fs_lookup+0x3c1/0xb50 fs/f2fs/namei.c:484 __lookup_slow+0x2b9/0x3e0 fs/namei.c:1689 lookup_slow+0x5a/0x80 fs/namei.c:1706 walk_component+0x2e7/0x410 fs/namei.c:1997 lookup_last fs/namei.c:2454 [inline] path_lookupat+0x16d/0x450 fs/namei.c:2478 filename_lookup+0x251/0x600 fs/namei.c:2507 vfs_statx+0x107/0x4b0 fs/stat.c:229 vfs_fstatat fs/stat.c:267 [inline] vfs_lstat include/linux/fs.h:3434 [inline] __do_sys_newlstat fs/stat.c:423 [inline] __se_sys_newlstat+0xda/0x7c0 fs/stat.c:417 __x64_sys_newlstat+0x5b/0x70 fs/stat.c:417 x64_sys_call+0x52/0x9a0 arch/x86/include/generated/asm/syscalls_64.h:7 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x3b/0x80 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x68/0xd2 Freed by task 0: kasan_save_stack mm/kasan/common.c:45 [inline] kasan_set_track+0x4b/0x70 mm/kasan/common.c:52 kasan_save_free_info+0x2b/0x40 mm/kasan/generic.c:516 ____kasan_slab_free+0x131/0x180 mm/kasan/common.c:241 __kasan_slab_free+0x11/0x20 mm/kasan/common.c:249 kasan_slab_free include/linux/kasan.h:178 [inline] slab_free_hook mm/slub.c:1745 [inline] slab_free_freelist_hook mm/slub.c:1771 [inline] slab_free mm/slub.c:3686 [inline] kmem_cache_free+0x291/0x560 mm/slub.c:3711 f2fs_free_inode+0x24/0x30 fs/f2fs/super.c:1584 i_callback+0x4b/0x70 fs/inode.c:250 rcu_do_batch+0x552/0xbe0 kernel/rcu/tree.c:2297 rcu_core+0x502/0xf40 kernel/rcu/tree.c:2557 rcu_core_si+0x9/0x10 kernel/rcu/tree.c:2574 handle_softirqs+0x1db/0x650 kernel/softirq.c:624 __do_softirq kernel/softirq.c:662 [inline] invoke_softirq kernel/softirq.c:479 [inline] __irq_exit_rcu+0x52/0xf0 kernel/softirq.c:711 irq_exit_rcu+0x9/0x10 kernel/softirq.c:723 instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1118 [inline] sysvec_apic_timer_interrupt+0xa9/0xc0 arch/x86/kernel/apic/apic.c:1118 asm_sysvec_apic_timer_interrupt+0x1b/0x20 arch/x86/include/asm/idtentry.h:691 Last potentially related work creation: kasan_save_stack+0x3b/0x60 mm/kasan/common.c:45 __kasan_record_aux_stack+0xb4/0xc0 mm/kasan/generic.c:486 kasan_record_aux_stack_noalloc+0xb/0x10 mm/kasan/generic.c:496 __call_rcu_common kernel/rcu/tree.c:2807 [inline] call_rcu+0xdc/0x10f0 kernel/rcu/tree.c:2926 destroy_inode fs/inode.c:316 [inline] evict+0x87d/0x930 fs/inode.c:720 iput_final fs/inode.c:1834 [inline] iput+0x616/0x690 fs/inode.c:1860 do_unlinkat+0x4e1/0x920 fs/namei.c:4396 __do_sys_unlink fs/namei.c:4437 [inline] __se_sys_unlink fs/namei.c:4435 [inline] __x64_sys_unlink+0x49/0x50 fs/namei.c:4435 x64_sys_call+0x289/0x9a0 arch/x86/include/generated/asm/syscalls_64.h:88 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x3b/0x80 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x68/0xd2 The buggy address belongs to the object at ffff888100567a10 which belongs to the cache f2fs_inode_cache of size 1360 The buggy address is located 952 bytes inside of 1360-byte region [ffff888100567a10, ffff888100567f60) The buggy address belongs to the physical page: page:ffffea0004015800 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x100560 head:ffffea0004015800 order:3 compound_mapcount:0 compound_pincount:0 flags: 0x4000000000010200(slab|head|zone=1) raw: 4000000000010200 0000000000000000 dead000000000122 ffff8881002c4d80 raw: 0000000000000000 0000000080160016 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Reclaimable, gfp_mask 0xd2050(__GFP_IO|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_RECLAIMABLE), pid 298, tgid 298 (syz-executor330), ts 26489303743, free_ts 0 set_page_owner include/linux/page_owner.h:33 [inline] post_alloc_hook+0x213/0x220 mm/page_alloc.c:2637 prep_new_page+0x1b/0x110 mm/page_alloc.c:2644 get_page_from_freelist+0x3a98/0x3b10 mm/page_alloc.c:4539 __alloc_pages+0x234/0x610 mm/page_alloc.c:5837 alloc_slab_page+0x6c/0xf0 include/linux/gfp.h:-1 allocate_slab mm/slub.c:1962 [inline] new_slab+0x90/0x3e0 mm/slub.c:2015 ___slab_alloc+0x6f9/0xb80 mm/slub.c:3203 __slab_alloc+0x5d/0xa0 mm/slub.c:3302 slab_alloc_node mm/slub.c:3387 [inline] slab_alloc mm/slub.c:3431 [inline] __kmem_cache_alloc_lru mm/slub.c:3438 [inline] kmem_cache_alloc_lru+0x149/0x270 mm/slub.c:3454 alloc_inode_sb include/linux/fs.h:3255 [inline] f2fs_alloc_inode+0x2d/0x350 fs/f2fs/super.c:1437 alloc_inode fs/inode.c:261 [inline] iget_locked+0x18c/0x7e0 fs/inode.c:1373 f2fs_iget+0x55/0x4ca0 fs/f2fs/inode.c:486 f2fs_fill_super+0x5360/0x6dc0 fs/f2fs/super.c:4488 mount_bdev+0x282/0x3b0 fs/super.c:1445 f2fs_mount+0x34/0x40 fs/f2fs/super.c:4743 legacy_get_tree+0xf1/0x190 fs/fs_context.c:632 page_owner free stack trace missing Memory state around the buggy address: ffff888100567c80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888100567d00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff888100567d80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888100567e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888100567e80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== [2] https://syzkaller.appspot.com/text?tag=CrashLog&x=13654c60580000 [ 24.675720][ T28] audit: type=1400 audit(1745327318.732:72): avc: denied { write } for pid=298 comm="syz-executor399" name="/" dev="loop0" ino=3 scontext=root:sysadm_r:sysadm_t tcontext=system_u:object_r:unlabeled_t tclass=dir permissive=1 [ 24.705426][ T296] ------------[ cut here ]------------ [ 24.706608][ T28] audit: type=1400 audit(1745327318.732:73): avc: denied { remove_name } for pid=298 comm="syz-executor399" name="file0" dev="loop0" ino=4 scontext=root:sysadm_r:sysadm_t tcontext=system_u:object_r:unlabeled_t tclass=dir permissive=1 [ 24.711550][ T296] WARNING: CPU: 0 PID: 296 at fs/f2fs/inode.c:847 f2fs_evict_inode+0x1262/0x1540 [ 24.734141][ T28] audit: type=1400 audit(1745327318.732:74): avc: denied { rename } for pid=298 comm="syz-executor399" name="file0" dev="loop0" ino=4 scontext=root:sysadm_r:sysadm_t tcontext=system_u:object_r:unlabeled_t tclass=dir permissive=1 [ 24.742969][ T296] Modules linked in: [ 24.765201][ T28] audit: type=1400 audit(1745327318.732:75): avc: denied { add_name } for pid=298 comm="syz-executor399" name="bus" scontext=root:sysadm_r:sysadm_t tcontext=system_u:object_r:unlabeled_t tclass=dir permissive=1 [ 24.768847][ T296] CPU: 0 PID: 296 Comm: syz-executor399 Not tainted 6.1.129-syzkaller-00017-g642656a36791 #0 [ 24.799506][ T296] Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 [ 24.809401][ T296] RIP: 0010:f2fs_evict_inode+0x1262/0x1540 [ 24.815018][ T296] Code: 34 70 4a ff eb 0d e8 2d 70 4a ff 4d 89 e5 4c 8b 64 24 18 48 8b 5c 24 28 4c 89 e7 e8 78 38 03 00 e9 84 fc ff ff e8 0e 70 4a ff <0f> 0b 4c 89 f7 be 08 00 00 00 e8 7f 21 92 ff f0 41 80 0e 04 e9 61 [ 24.834584][ T296] RSP: 0018:ffffc90000db7a40 EFLAGS: 00010293 [ 24.840465][ T296] RAX: ffffffff822aca42 RBX: 0000000000000002 RCX: ffff888110948000 [ 24.848291][ T296] RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000 [ 24.856064][ T296] RBP: ffffc90000db7bb0 R08: ffffffff822ac6a8 R09: ffffed10200b005d [ 24.864073][ T296] R10: 0000000000000000 R11: dffffc0000000001 R12: ffff888100580000 [ 24.871812][ T296] R13: dffffc0000000000 R14: ffff88810fef4078 R15: 1ffff920001b6f5c The root cause is w/ a fuzzed image, f2fs may missed to clear FI_DIRTY_INODE flag for target inode, after f2fs_evict_inode(), the inode is still linked in sbi->inode_list[DIRTY_META] global list, once it triggers checkpoint, f2fs_sync_inode_meta() may access the released inode. In f2fs_evict_inode(), let's always call f2fs_inode_synced() to clear FI_DIRTY_INODE flag and drop inode from global dirty list to avoid this UAF issue. Fixes: 0f18b462b2e5 ("f2fs: flush inode metadata when checkpoint is doing") Closes: https://syzkaller.appspot.com/bug?extid=849174b2efaf0d8be6ba Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 083d52a42bfb..d3c6f3202b69 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -949,8 +949,12 @@ void f2fs_evict_inode(struct inode *inode) if (likely(!f2fs_cp_error(sbi) && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))) f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); - else - f2fs_inode_synced(inode); + + /* + * anyway, it needs to remove the inode from sbi->inode_list[DIRTY_META] + * list to avoid UAF in f2fs_sync_inode_meta() during checkpoint. + */ + f2fs_inode_synced(inode); /* for the case f2fs_new_inode() was failed, .i_ino is zero, skip it */ if (inode->i_ino) From a509a55f8eecc8970b3980c6f06886bbff0e2f68 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 8 Jul 2025 17:56:57 +0800 Subject: [PATCH 0579/2411] f2fs: fix to avoid panic in f2fs_evict_inode As syzbot [1] reported as below: R10: 0000000000000100 R11: 0000000000000206 R12: 00007ffe17473450 R13: 00007f28b1c10854 R14: 000000000000dae5 R15: 00007ffe17474520 ---[ end trace 0000000000000000 ]--- ================================================================== BUG: KASAN: use-after-free in __list_del_entry_valid+0xa6/0x130 lib/list_debug.c:62 Read of size 8 at addr ffff88812d962278 by task syz-executor/564 CPU: 1 PID: 564 Comm: syz-executor Tainted: G W 6.1.129-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Call Trace: __dump_stack+0x21/0x24 lib/dump_stack.c:88 dump_stack_lvl+0xee/0x158 lib/dump_stack.c:106 print_address_description+0x71/0x210 mm/kasan/report.c:316 print_report+0x4a/0x60 mm/kasan/report.c:427 kasan_report+0x122/0x150 mm/kasan/report.c:531 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report_generic.c:351 __list_del_entry_valid+0xa6/0x130 lib/list_debug.c:62 __list_del_entry include/linux/list.h:134 [inline] list_del_init include/linux/list.h:206 [inline] f2fs_inode_synced+0xf7/0x2e0 fs/f2fs/super.c:1531 f2fs_update_inode+0x74/0x1c40 fs/f2fs/inode.c:585 f2fs_update_inode_page+0x137/0x170 fs/f2fs/inode.c:703 f2fs_write_inode+0x4ec/0x770 fs/f2fs/inode.c:731 write_inode fs/fs-writeback.c:1460 [inline] __writeback_single_inode+0x4a0/0xab0 fs/fs-writeback.c:1677 writeback_single_inode+0x221/0x8b0 fs/fs-writeback.c:1733 sync_inode_metadata+0xb6/0x110 fs/fs-writeback.c:2789 f2fs_sync_inode_meta+0x16d/0x2a0 fs/f2fs/checkpoint.c:1159 block_operations fs/f2fs/checkpoint.c:1269 [inline] f2fs_write_checkpoint+0xca3/0x2100 fs/f2fs/checkpoint.c:1658 kill_f2fs_super+0x231/0x390 fs/f2fs/super.c:4668 deactivate_locked_super+0x98/0x100 fs/super.c:332 deactivate_super+0xaf/0xe0 fs/super.c:363 cleanup_mnt+0x45f/0x4e0 fs/namespace.c:1186 __cleanup_mnt+0x19/0x20 fs/namespace.c:1193 task_work_run+0x1c6/0x230 kernel/task_work.c:203 exit_task_work include/linux/task_work.h:39 [inline] do_exit+0x9fb/0x2410 kernel/exit.c:871 do_group_exit+0x210/0x2d0 kernel/exit.c:1021 __do_sys_exit_group kernel/exit.c:1032 [inline] __se_sys_exit_group kernel/exit.c:1030 [inline] __x64_sys_exit_group+0x3f/0x40 kernel/exit.c:1030 x64_sys_call+0x7b4/0x9a0 arch/x86/include/generated/asm/syscalls_64.h:232 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x4c/0xa0 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x68/0xd2 RIP: 0033:0x7f28b1b8e169 Code: Unable to access opcode bytes at 0x7f28b1b8e13f. RSP: 002b:00007ffe174710a8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 00007f28b1c10879 RCX: 00007f28b1b8e169 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000001 RBP: 0000000000000002 R08: 00007ffe1746ee47 R09: 00007ffe17472360 R10: 0000000000000009 R11: 0000000000000246 R12: 00007ffe17472360 R13: 00007f28b1c10854 R14: 000000000000dae5 R15: 00007ffe17474520 Allocated by task 569: kasan_save_stack mm/kasan/common.c:45 [inline] kasan_set_track+0x4b/0x70 mm/kasan/common.c:52 kasan_save_alloc_info+0x25/0x30 mm/kasan/generic.c:505 __kasan_slab_alloc+0x72/0x80 mm/kasan/common.c:328 kasan_slab_alloc include/linux/kasan.h:201 [inline] slab_post_alloc_hook+0x4f/0x2c0 mm/slab.h:737 slab_alloc_node mm/slub.c:3398 [inline] slab_alloc mm/slub.c:3406 [inline] __kmem_cache_alloc_lru mm/slub.c:3413 [inline] kmem_cache_alloc_lru+0x104/0x220 mm/slub.c:3429 alloc_inode_sb include/linux/fs.h:3245 [inline] f2fs_alloc_inode+0x2d/0x340 fs/f2fs/super.c:1419 alloc_inode fs/inode.c:261 [inline] iget_locked+0x186/0x880 fs/inode.c:1373 f2fs_iget+0x55/0x4c60 fs/f2fs/inode.c:483 f2fs_lookup+0x366/0xab0 fs/f2fs/namei.c:487 __lookup_slow+0x2a3/0x3d0 fs/namei.c:1690 lookup_slow+0x57/0x70 fs/namei.c:1707 walk_component+0x2e6/0x410 fs/namei.c:1998 lookup_last fs/namei.c:2455 [inline] path_lookupat+0x180/0x490 fs/namei.c:2479 filename_lookup+0x1f0/0x500 fs/namei.c:2508 vfs_statx+0x10b/0x660 fs/stat.c:229 vfs_fstatat fs/stat.c:267 [inline] vfs_lstat include/linux/fs.h:3424 [inline] __do_sys_newlstat fs/stat.c:423 [inline] __se_sys_newlstat+0xd5/0x350 fs/stat.c:417 __x64_sys_newlstat+0x5b/0x70 fs/stat.c:417 x64_sys_call+0x393/0x9a0 arch/x86/include/generated/asm/syscalls_64.h:7 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x4c/0xa0 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x68/0xd2 Freed by task 13: kasan_save_stack mm/kasan/common.c:45 [inline] kasan_set_track+0x4b/0x70 mm/kasan/common.c:52 kasan_save_free_info+0x31/0x50 mm/kasan/generic.c:516 ____kasan_slab_free+0x132/0x180 mm/kasan/common.c:236 __kasan_slab_free+0x11/0x20 mm/kasan/common.c:244 kasan_slab_free include/linux/kasan.h:177 [inline] slab_free_hook mm/slub.c:1724 [inline] slab_free_freelist_hook+0xc2/0x190 mm/slub.c:1750 slab_free mm/slub.c:3661 [inline] kmem_cache_free+0x12d/0x2a0 mm/slub.c:3683 f2fs_free_inode+0x24/0x30 fs/f2fs/super.c:1562 i_callback+0x4c/0x70 fs/inode.c:250 rcu_do_batch+0x503/0xb80 kernel/rcu/tree.c:2297 rcu_core+0x5a2/0xe70 kernel/rcu/tree.c:2557 rcu_core_si+0x9/0x10 kernel/rcu/tree.c:2574 handle_softirqs+0x178/0x500 kernel/softirq.c:578 run_ksoftirqd+0x28/0x30 kernel/softirq.c:945 smpboot_thread_fn+0x45a/0x8c0 kernel/smpboot.c:164 kthread+0x270/0x310 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 Last potentially related work creation: kasan_save_stack+0x3a/0x60 mm/kasan/common.c:45 __kasan_record_aux_stack+0xb6/0xc0 mm/kasan/generic.c:486 kasan_record_aux_stack_noalloc+0xb/0x10 mm/kasan/generic.c:496 call_rcu+0xd4/0xf70 kernel/rcu/tree.c:2845 destroy_inode fs/inode.c:316 [inline] evict+0x7da/0x870 fs/inode.c:720 iput_final fs/inode.c:1834 [inline] iput+0x62b/0x830 fs/inode.c:1860 do_unlinkat+0x356/0x540 fs/namei.c:4397 __do_sys_unlink fs/namei.c:4438 [inline] __se_sys_unlink fs/namei.c:4436 [inline] __x64_sys_unlink+0x49/0x50 fs/namei.c:4436 x64_sys_call+0x958/0x9a0 arch/x86/include/generated/asm/syscalls_64.h:88 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x4c/0xa0 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x68/0xd2 The buggy address belongs to the object at ffff88812d961f20 which belongs to the cache f2fs_inode_cache of size 1200 The buggy address is located 856 bytes inside of 1200-byte region [ffff88812d961f20, ffff88812d9623d0) The buggy address belongs to the physical page: page:ffffea0004b65800 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x12d960 head:ffffea0004b65800 order:2 compound_mapcount:0 compound_pincount:0 flags: 0x4000000000010200(slab|head|zone=1) raw: 4000000000010200 0000000000000000 dead000000000122 ffff88810a94c500 raw: 0000000000000000 00000000800c000c 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 2, migratetype Reclaimable, gfp_mask 0x1d2050(__GFP_IO|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_RECLAIMABLE), pid 569, tgid 568 (syz.2.16), ts 55943246141, free_ts 0 set_page_owner include/linux/page_owner.h:31 [inline] post_alloc_hook+0x1d0/0x1f0 mm/page_alloc.c:2532 prep_new_page mm/page_alloc.c:2539 [inline] get_page_from_freelist+0x2e63/0x2ef0 mm/page_alloc.c:4328 __alloc_pages+0x235/0x4b0 mm/page_alloc.c:5605 alloc_slab_page include/linux/gfp.h:-1 [inline] allocate_slab mm/slub.c:1939 [inline] new_slab+0xec/0x4b0 mm/slub.c:1992 ___slab_alloc+0x6f6/0xb50 mm/slub.c:3180 __slab_alloc+0x5e/0xa0 mm/slub.c:3279 slab_alloc_node mm/slub.c:3364 [inline] slab_alloc mm/slub.c:3406 [inline] __kmem_cache_alloc_lru mm/slub.c:3413 [inline] kmem_cache_alloc_lru+0x13f/0x220 mm/slub.c:3429 alloc_inode_sb include/linux/fs.h:3245 [inline] f2fs_alloc_inode+0x2d/0x340 fs/f2fs/super.c:1419 alloc_inode fs/inode.c:261 [inline] iget_locked+0x186/0x880 fs/inode.c:1373 f2fs_iget+0x55/0x4c60 fs/f2fs/inode.c:483 f2fs_fill_super+0x3ad7/0x6bb0 fs/f2fs/super.c:4293 mount_bdev+0x2ae/0x3e0 fs/super.c:1443 f2fs_mount+0x34/0x40 fs/f2fs/super.c:4642 legacy_get_tree+0xea/0x190 fs/fs_context.c:632 vfs_get_tree+0x89/0x260 fs/super.c:1573 do_new_mount+0x25a/0xa20 fs/namespace.c:3056 page_owner free stack trace missing Memory state around the buggy address: ffff88812d962100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88812d962180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88812d962200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88812d962280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88812d962300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== [1] https://syzkaller.appspot.com/x/report.txt?x=13448368580000 This bug can be reproduced w/ the reproducer [2], once we enable CONFIG_F2FS_CHECK_FS config, the reproducer will trigger panic as below, so the direct reason of this bug is the same as the one below patch [3] fixed. kernel BUG at fs/f2fs/inode.c:857! RIP: 0010:f2fs_evict_inode+0x1204/0x1a20 Call Trace: evict+0x32a/0x7a0 do_unlinkat+0x37b/0x5b0 __x64_sys_unlink+0xad/0x100 do_syscall_64+0x5a/0xb0 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 RIP: 0010:f2fs_evict_inode+0x1204/0x1a20 [2] https://syzkaller.appspot.com/x/repro.c?x=17495ccc580000 [3] https://lore.kernel.org/linux-f2fs-devel/20250702120321.1080759-1-chao@kernel.org Tracepoints before panic: f2fs_unlink_enter: dev = (7,0), dir ino = 3, i_size = 4096, i_blocks = 8, name = file1 f2fs_unlink_exit: dev = (7,0), ino = 7, ret = 0 f2fs_evict_inode: dev = (7,0), ino = 7, pino = 3, i_mode = 0x81ed, i_size = 10, i_nlink = 0, i_blocks = 0, i_advise = 0x0 f2fs_truncate_node: dev = (7,0), ino = 7, nid = 8, block_address = 0x3c05 f2fs_unlink_enter: dev = (7,0), dir ino = 3, i_size = 4096, i_blocks = 8, name = file3 f2fs_unlink_exit: dev = (7,0), ino = 8, ret = 0 f2fs_evict_inode: dev = (7,0), ino = 8, pino = 3, i_mode = 0x81ed, i_size = 9000, i_nlink = 0, i_blocks = 24, i_advise = 0x4 f2fs_truncate: dev = (7,0), ino = 8, pino = 3, i_mode = 0x81ed, i_size = 0, i_nlink = 0, i_blocks = 24, i_advise = 0x4 f2fs_truncate_blocks_enter: dev = (7,0), ino = 8, i_size = 0, i_blocks = 24, start file offset = 0 f2fs_truncate_blocks_exit: dev = (7,0), ino = 8, ret = -2 The root cause is: in the fuzzed image, dnode #8 belongs to inode #7, after inode #7 eviction, dnode #8 was dropped. However there is dirent that has ino #8, so, once we unlink file3, in f2fs_evict_inode(), both f2fs_truncate() and f2fs_update_inode_page() will fail due to we can not load node #8, result in we missed to call f2fs_inode_synced() to clear inode dirty status. Let's fix this by calling f2fs_inode_synced() in error path of f2fs_evict_inode(). PS: As I verified, the reproducer [2] can trigger this bug in v6.1.129, but it failed in v6.16-rc4, this is because the testcase will stop due to other corruption has been detected by f2fs: F2FS-fs (loop0): inconsistent node block, node_type:2, nid:8, node_footer[nid:8,ino:8,ofs:0,cpver:5013063228981249506,blkaddr:15366] F2FS-fs (loop0): f2fs_lookup: inode (ino=9) has zero i_nlink Fixes: 0f18b462b2e5 ("f2fs: flush inode metadata when checkpoint is doing") Closes: https://syzkaller.appspot.com/x/report.txt?x=13448368580000 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index d3c6f3202b69..fc774de1c752 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -933,6 +933,19 @@ void f2fs_evict_inode(struct inode *inode) f2fs_update_inode_page(inode); if (dquot_initialize_needed(inode)) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + + /* + * If both f2fs_truncate() and f2fs_update_inode_page() failed + * due to fuzzed corrupted inode, call f2fs_inode_synced() to + * avoid triggering later f2fs_bug_on(). + */ + if (is_inode_flag_set(inode, FI_DIRTY_INODE)) { + f2fs_warn(sbi, + "f2fs_evict_inode: inode is dirty, ino:%lu", + inode->i_ino); + f2fs_inode_synced(inode); + set_sbi_flag(sbi, SBI_NEED_FSCK); + } } if (freeze_protected) sb_end_intwrite(inode->i_sb); From c1cfc87e49525853ebe9dce2ffce6332eb811fa6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 7 Jul 2025 19:46:14 +0800 Subject: [PATCH 0580/2411] f2fs: introduce is_cur{seg,sec}() There are redundant codes in IS_CUR{SEG,SEC}() macros, let's introduce inline is_cur{seg,sec}() functions, and use a loop in it for cleanup. Meanwhile, it enhances expansibility, as it doesn't need to change is_cur{seg,sec}() when we add a new log header. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 4 ++-- fs/f2fs/gc.c | 2 +- fs/f2fs/segment.c | 22 +++++++++---------- fs/f2fs/segment.h | 54 +++++++++++++++++++++-------------------------- 4 files changed, 38 insertions(+), 44 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 3417e7e550b2..43a83bbd3bc5 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -91,7 +91,7 @@ static void update_multidevice_stats(struct f2fs_sb_info *sbi) seg_blks = get_seg_entry(sbi, j)->valid_blocks; /* update segment stats */ - if (IS_CURSEG(sbi, j)) + if (is_curseg(sbi, j)) dev_stats[i].devstats[0][DEVSTAT_INUSE]++; else if (seg_blks == BLKS_PER_SEG(sbi)) dev_stats[i].devstats[0][DEVSTAT_FULL]++; @@ -109,7 +109,7 @@ static void update_multidevice_stats(struct f2fs_sb_info *sbi) sec_blks = get_sec_entry(sbi, j)->valid_blocks; /* update section stats */ - if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, j))) + if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, j))) dev_stats[i].devstats[1][DEVSTAT_INUSE]++; else if (sec_blks == BLKS_PER_SEC(sbi)) dev_stats[i].devstats[1][DEVSTAT_FULL]++; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 30b95ebb4499..778f9ec40b70 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -2065,7 +2065,7 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi, .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; - if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, segno))) + if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, segno))) continue; do_garbage_collect(sbi, segno, &gc_list, FG_GC, true, false); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a9c25b498f9c..df5a1e226aa9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -773,7 +773,7 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); /* need not be added */ - if (IS_CURSEG(sbi, segno)) + if (is_curseg(sbi, segno)) return; if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) @@ -800,7 +800,7 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, !valid_blocks) || valid_blocks == CAP_BLKS_PER_SEC(sbi)); - if (!IS_CURSEC(sbi, secno)) + if (!is_cursec(sbi, secno)) set_bit(secno, dirty_i->dirty_secmap); } } @@ -839,7 +839,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, return; } - if (!IS_CURSEC(sbi, secno)) + if (!is_cursec(sbi, secno)) set_bit(secno, dirty_i->dirty_secmap); } } @@ -856,7 +856,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) unsigned short valid_blocks, ckpt_valid_blocks; unsigned int usable_blocks; - if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) + if (segno == NULL_SEGNO || is_curseg(sbi, segno)) return; usable_blocks = f2fs_usable_blks_in_seg(sbi, segno); @@ -889,7 +889,7 @@ void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { if (get_valid_blocks(sbi, segno, false)) continue; - if (IS_CURSEG(sbi, segno)) + if (is_curseg(sbi, segno)) continue; __locate_dirty_segment(sbi, segno, PRE); __remove_dirty_segment(sbi, segno, DIRTY); @@ -2108,7 +2108,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, if (!force) { if (!f2fs_realtime_discard_enable(sbi) || (!se->valid_blocks && - !IS_CURSEG(sbi, cpc->trim_start)) || + !is_curseg(sbi, cpc->trim_start)) || SM_I(sbi)->dcc_info->nr_discards >= SM_I(sbi)->dcc_info->max_discards) return false; @@ -2236,7 +2236,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, next: secno = GET_SEC_FROM_SEG(sbi, start); start_segno = GET_SEG_FROM_SEC(sbi, secno); - if (!IS_CURSEC(sbi, secno) && + if (!is_cursec(sbi, secno) && !get_valid_blocks(sbi, start, true)) f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), BLKS_PER_SEC(sbi)); @@ -4107,14 +4107,14 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (!recover_curseg) { /* for recovery flow */ - if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { + if (se->valid_blocks == 0 && !is_curseg(sbi, segno)) { if (old_blkaddr == NULL_ADDR) type = CURSEG_COLD_DATA; else type = CURSEG_WARM_DATA; } } else { - if (IS_CURSEG(sbi, segno)) { + if (is_curseg(sbi, segno)) { /* se->type is volatile as SSR allocation */ type = __f2fs_get_curseg(sbi, segno); f2fs_bug_on(sbi, type == NO_CHECK_TYPE); @@ -5150,7 +5150,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi)) continue; - if (IS_CURSEC(sbi, secno)) + if (is_cursec(sbi, secno)) continue; set_bit(secno, dirty_i->dirty_secmap); } @@ -5286,7 +5286,7 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, * Get # of valid block of the zone. */ valid_block_cnt = get_valid_blocks(sbi, zone_segno, true); - if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) { + if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) { f2fs_notice(sbi, "Open zones: valid block[0x%x,0x%x] cond[%s]", zone_segno, valid_block_cnt, blk_zone_cond_str(zone->cond)); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index db619fd2f51a..d2c73f641134 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -34,34 +34,6 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, seg_type >= NR_PERSISTENT_LOG); } -#define IS_CURSEG(sbi, seg) \ - (((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno)) - -#define IS_CURSEC(sbi, secno) \ - (((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno / \ - SEGS_PER_SEC(sbi))) - #define MAIN_BLKADDR(sbi) \ (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \ le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr)) @@ -318,6 +290,28 @@ static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type) return (struct curseg_info *)(SM_I(sbi)->curseg_array + type); } +static inline bool is_curseg(struct f2fs_sb_info *sbi, unsigned int segno) +{ + int i; + + for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { + if (segno == CURSEG_I(sbi, i)->segno) + return true; + } + return false; +} + +static inline bool is_cursec(struct f2fs_sb_info *sbi, unsigned int secno) +{ + int i; + + for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { + if (secno == GET_SEC_FROM_SEG(sbi, CURSEG_I(sbi, i)->segno)) + return true; + } + return false; +} + static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi, unsigned int segno) { @@ -509,7 +503,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, free_i->free_segments++; - if (!inmem && IS_CURSEC(sbi, secno)) + if (!inmem && is_cursec(sbi, secno)) goto unlock_out; /* check large section */ @@ -986,7 +980,7 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) { - if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno)) + if (is_cursec(sbi, secno) || (sbi->cur_victim_sec == secno)) return true; return false; } From 878e1e94a8aafb2f93a333a1aaed5e1c5f17e339 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 20 Jun 2025 17:30:20 -0700 Subject: [PATCH 0581/2411] tracing/sched: Remove obsolete comment on suffixes Commit ac01fa73f530 ("tracepoint: Have tracepoints created with DECLARE_ TRACE() have _tp suffix") makes it unnecessary to manually add a suffix. Remove a now obsolete comment. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Ricardo Neri Link: https://lore.kernel.org/20250620-rneri-tp-comment-fix-v1-1-e0f6495ac33c@linux.intel.com Signed-off-by: Ricardo Neri Signed-off-by: Steven Rostedt (Google) --- include/trace/events/sched.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 4e6b2910cec3..f24c373bcc44 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -829,8 +829,6 @@ TRACE_EVENT(sched_wake_idle_without_ipi, /* * Following tracepoints are not exported in tracefs and provide hooking * mechanisms only for testing and debugging purposes. - * - * Postfixed with _tp to make them easily identifiable in the code. */ DECLARE_TRACE(pelt_cfs, TP_PROTO(struct cfs_rq *cfs_rq), From 50b4233a22b1ee9ccd0e847597de66ce21329ddb Mon Sep 17 00:00:00 2001 From: Julian Vetter Date: Tue, 3 Jun 2025 15:21:21 +0200 Subject: [PATCH 0582/2411] include/linux/jhash.h: replace __get_unaligned_cpu32 in jhash function __get_unaligned_cpu32() is deprecated. So, replace it with the more generic get_unaligned() and just cast the input parameter. Link: https://lkml.kernel.org/r/20250603132121.3674066-1-julian@outer-limits.org Signed-off-by: Julian Vetter Cc: Arnd Bergmann Cc: Wei-Hsin Yeh Signed-off-by: Andrew Morton --- include/linux/jhash.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/jhash.h b/include/linux/jhash.h index fa26a2dd3b52..7c1c1821c694 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -24,7 +24,7 @@ * Jozsef */ #include -#include +#include /* Best hash sizes are of power of two */ #define jhash_size(n) ((u32)1<<(n)) @@ -77,9 +77,9 @@ static inline u32 jhash(const void *key, u32 length, u32 initval) /* All but the last block: affect some 32 bits of (a,b,c) */ while (length > 12) { - a += __get_unaligned_cpu32(k); - b += __get_unaligned_cpu32(k + 4); - c += __get_unaligned_cpu32(k + 8); + a += get_unaligned((u32 *)k); + b += get_unaligned((u32 *)(k + 4)); + c += get_unaligned((u32 *)(k + 8)); __jhash_mix(a, b, c); length -= 12; k += 12; From 85df0d505ed64d72c86822733f648074d1ae2bca Mon Sep 17 00:00:00 2001 From: Su Hui Date: Tue, 27 May 2025 17:23:34 +0800 Subject: [PATCH 0583/2411] ocfs2: replace simple_strtol with kstrtol kstrtol() is better because simple_strtol() ignores overflow. And using kstrtol() is more concise. Link: https://lkml.kernel.org/r/20250527092333.1917391-1-suhui@nfschina.com Signed-off-by: Su Hui Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/stack_user.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 77edcd70f72c..0f045e45fa0c 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -360,7 +360,6 @@ static int ocfs2_control_do_setnode_msg(struct file *file, struct ocfs2_control_message_setn *msg) { long nodenum; - char *ptr = NULL; struct ocfs2_control_private *p = file->private_data; if (ocfs2_control_get_handshake_state(file) != @@ -375,8 +374,7 @@ static int ocfs2_control_do_setnode_msg(struct file *file, return -EINVAL; msg->space = msg->newline = '\0'; - nodenum = simple_strtol(msg->nodestr, &ptr, 16); - if (!ptr || *ptr) + if (kstrtol(msg->nodestr, 16, &nodenum)) return -EINVAL; if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || @@ -391,7 +389,6 @@ static int ocfs2_control_do_setversion_msg(struct file *file, struct ocfs2_control_message_setv *msg) { long major, minor; - char *ptr = NULL; struct ocfs2_control_private *p = file->private_data; struct ocfs2_protocol_version *max = &ocfs2_user_plugin.sp_max_proto; @@ -409,11 +406,9 @@ static int ocfs2_control_do_setversion_msg(struct file *file, return -EINVAL; msg->space1 = msg->space2 = msg->newline = '\0'; - major = simple_strtol(msg->major, &ptr, 16); - if (!ptr || *ptr) + if (kstrtol(msg->major, 16, &major)) return -EINVAL; - minor = simple_strtol(msg->minor, &ptr, 16); - if (!ptr || *ptr) + if (kstrtol(msg->minor, 16, &minor)) return -EINVAL; /* @@ -441,7 +436,6 @@ static int ocfs2_control_do_down_msg(struct file *file, struct ocfs2_control_message_down *msg) { long nodenum; - char *p = NULL; if (ocfs2_control_get_handshake_state(file) != OCFS2_CONTROL_HANDSHAKE_VALID) @@ -456,8 +450,7 @@ static int ocfs2_control_do_down_msg(struct file *file, return -EINVAL; msg->space1 = msg->space2 = msg->newline = '\0'; - nodenum = simple_strtol(msg->nodestr, &p, 16); - if (!p || *p) + if (kstrtol(msg->nodestr, 16, &nodenum)) return -EINVAL; if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || From 08e2153dd9440ebe6bb82d4dfd7b10bdf14c660c Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 21 May 2025 14:18:38 +0200 Subject: [PATCH 0584/2411] alpha: replace sprintf()/strcpy() with scnprintf()/strscpy() Replace sprintf() with the safer variant scnprintf() and use its return value instead of calculating the string length again using strlen(). Use strscpy() instead of the deprecated strcpy(). No functional changes intended. Link: https://github.com/KSPP/linux/issues/88 Link: https://lkml.kernel.org/r/20250521121840.5653-1-thorsten.blum@linux.dev Signed-off-by: Thorsten Blum Cc: Alexander Gordeev Cc: Geert Uytterhoeven Cc: guoweikang Cc: Matt Turner Cc: Mike Rapoport Cc: Richard Henderson Signed-off-by: Andrew Morton --- arch/alpha/kernel/core_marvel.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index b1bfbd11980d..d38f4d6759e4 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -79,10 +80,12 @@ mk_resource_name(int pe, int port, char *str) { char tmp[80]; char *name; - - sprintf(tmp, "PCI %s PE %d PORT %d", str, pe, port); - name = memblock_alloc_or_panic(strlen(tmp) + 1, SMP_CACHE_BYTES); - strcpy(name, tmp); + size_t sz; + + sz = scnprintf(tmp, sizeof(tmp), "PCI %s PE %d PORT %d", str, pe, port); + sz += 1; /* NUL terminator */ + name = memblock_alloc_or_panic(sz, SMP_CACHE_BYTES); + strscpy(name, tmp, sz); return name; } From 449e0b4ed5a16c72289a786c5333fc97520402bf Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Fri, 9 May 2025 08:29:27 +0200 Subject: [PATCH 0585/2411] fork: clean-up naming of vm_stack/vm_struct variables in vmap stacks code There are two data types: "struct vm_struct" and "struct vm_stack" that have the same local variable names: vm_stack, or vm, or s, which makes the code confusing to read. Change the code so the naming is consistent: struct vm_struct is always called vm_area struct vm_stack is always called vm_stack One change altering vfree(vm_stack) to vfree(vm_area->addr) may look like a semantic change but it is not: vm_area->addr points to the vm_stack. This was done to improve readability. [linus.walleij@linaro.org: rebased and added new users of the variable names, address review comments] Link: https://lore.kernel.org/20240311164638.2015063-4-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20250509-fork-fixes-v3-2-e6c69dd356f2@linaro.org Signed-off-by: Pasha Tatashin Signed-off-by: Linus Walleij Acked-by: Mike Rapoport (Microsoft) Cc: Mateusz Guzik Signed-off-by: Andrew Morton --- kernel/fork.c | 60 +++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 1ee8eb11f38b..5fd893c907a5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -207,14 +207,14 @@ struct vm_stack { struct vm_struct *stack_vm_area; }; -static bool try_release_thread_stack_to_cache(struct vm_struct *vm) +static bool try_release_thread_stack_to_cache(struct vm_struct *vm_area) { unsigned int i; for (i = 0; i < NR_CACHED_STACKS; i++) { struct vm_struct *tmp = NULL; - if (this_cpu_try_cmpxchg(cached_stacks[i], &tmp, vm)) + if (this_cpu_try_cmpxchg(cached_stacks[i], &tmp, vm_area)) return true; } return false; @@ -223,11 +223,12 @@ static bool try_release_thread_stack_to_cache(struct vm_struct *vm) static void thread_stack_free_rcu(struct rcu_head *rh) { struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu); + struct vm_struct *vm_area = vm_stack->stack_vm_area; if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area)) return; - vfree(vm_stack); + vfree(vm_area->addr); } static void thread_stack_delayed_free(struct task_struct *tsk) @@ -240,32 +241,32 @@ static void thread_stack_delayed_free(struct task_struct *tsk) static int free_vm_stack_cache(unsigned int cpu) { - struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu); + struct vm_struct **cached_vm_stack_areas = per_cpu_ptr(cached_stacks, cpu); int i; for (i = 0; i < NR_CACHED_STACKS; i++) { - struct vm_struct *vm_stack = cached_vm_stacks[i]; + struct vm_struct *vm_area = cached_vm_stack_areas[i]; - if (!vm_stack) + if (!vm_area) continue; - vfree(vm_stack->addr); - cached_vm_stacks[i] = NULL; + vfree(vm_area->addr); + cached_vm_stack_areas[i] = NULL; } return 0; } -static int memcg_charge_kernel_stack(struct vm_struct *vm) +static int memcg_charge_kernel_stack(struct vm_struct *vm_area) { int i; int ret; int nr_charged = 0; - BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); + BUG_ON(vm_area->nr_pages != THREAD_SIZE / PAGE_SIZE); for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { - ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0); + ret = memcg_kmem_charge_page(vm_area->pages[i], GFP_KERNEL, 0); if (ret) goto err; nr_charged++; @@ -273,38 +274,35 @@ static int memcg_charge_kernel_stack(struct vm_struct *vm) return 0; err: for (i = 0; i < nr_charged; i++) - memcg_kmem_uncharge_page(vm->pages[i], 0); + memcg_kmem_uncharge_page(vm_area->pages[i], 0); return ret; } static int alloc_thread_stack_node(struct task_struct *tsk, int node) { - struct vm_struct *vm; + struct vm_struct *vm_area; void *stack; int i; for (i = 0; i < NR_CACHED_STACKS; i++) { - struct vm_struct *s; - - s = this_cpu_xchg(cached_stacks[i], NULL); - - if (!s) + vm_area = this_cpu_xchg(cached_stacks[i], NULL); + if (!vm_area) continue; /* Reset stack metadata. */ - kasan_unpoison_range(s->addr, THREAD_SIZE); + kasan_unpoison_range(vm_area->addr, THREAD_SIZE); - stack = kasan_reset_tag(s->addr); + stack = kasan_reset_tag(vm_area->addr); /* Clear stale pointers from reused stack. */ memset(stack, 0, THREAD_SIZE); - if (memcg_charge_kernel_stack(s)) { - vfree(s->addr); + if (memcg_charge_kernel_stack(vm_area)) { + vfree(vm_area->addr); return -ENOMEM; } - tsk->stack_vm_area = s; + tsk->stack_vm_area = vm_area; tsk->stack = stack; return 0; } @@ -320,8 +318,8 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) if (!stack) return -ENOMEM; - vm = find_vm_area(stack); - if (memcg_charge_kernel_stack(vm)) { + vm_area = find_vm_area(stack); + if (memcg_charge_kernel_stack(vm_area)) { vfree(stack); return -ENOMEM; } @@ -330,7 +328,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) * free_thread_stack() can be called in interrupt context, * so cache the vm_struct. */ - tsk->stack_vm_area = vm; + tsk->stack_vm_area = vm_area; stack = kasan_reset_tag(stack); tsk->stack = stack; return 0; @@ -437,11 +435,11 @@ static struct kmem_cache *mm_cachep; static void account_kernel_stack(struct task_struct *tsk, int account) { if (IS_ENABLED(CONFIG_VMAP_STACK)) { - struct vm_struct *vm = task_stack_vm_area(tsk); + struct vm_struct *vm_area = task_stack_vm_area(tsk); int i; for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) - mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB, + mod_lruvec_page_state(vm_area->pages[i], NR_KERNEL_STACK_KB, account * (PAGE_SIZE / 1024)); } else { void *stack = task_stack_page(tsk); @@ -457,12 +455,12 @@ void exit_task_stack_account(struct task_struct *tsk) account_kernel_stack(tsk, -1); if (IS_ENABLED(CONFIG_VMAP_STACK)) { - struct vm_struct *vm; + struct vm_struct *vm_area; int i; - vm = task_stack_vm_area(tsk); + vm_area = task_stack_vm_area(tsk); for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) - memcg_kmem_uncharge_page(vm->pages[i], 0); + memcg_kmem_uncharge_page(vm_area->pages[i], 0); } } From f7b0ff2bc91d8bb2ba9fdb182da39dd9733b1c50 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 9 May 2025 09:25:09 +0200 Subject: [PATCH 0586/2411] fork: define a local GFP_VMAP_STACK The current allocation of VMAP stack memory is using (THREADINFO_GFP & ~__GFP_ACCOUNT) which is a complicated way of saying (GFP_KERNEL | __GFP_ZERO): : define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) : define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) This is an unfortunate side-effect of independent changes blurring the picture: commit 19809c2da28aee5860ad9a2eff760730a0710df0 changed (THREADINFO_GFP | __GFP_HIGHMEM) to just THREADINFO_GFP since highmem became implicit. commit 9b6f7e163cd0f468d1b9696b785659d3c27c8667 then added stack caching and rewrote the allocation to (THREADINFO_GFP & ~__GFP_ACCOUNT) as cached stacks need to be accounted separately. However that code, when it eventually accounts the memory does this: ret = memcg_kmem_charge(vm->pages[i], GFP_KERNEL, 0) so the memory is charged as a GFP_KERNEL allocation. Define a unique GFP_VMAP_STACK to use GFP_KERNEL | __GFP_ZERO and move the comment there. Link: https://lkml.kernel.org/r/20250509-gfp-stack-v1-1-82f6f7efc210@linaro.org Signed-off-by: Linus Walleij Reported-by: Mateusz Guzik Cc: Pasha Tatashin Cc: Mike Rapoport (Microsoft) Signed-off-by: Andrew Morton --- kernel/fork.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 5fd893c907a5..6616d173307a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -201,6 +201,12 @@ static inline void free_task_struct(struct task_struct *tsk) */ #define NR_CACHED_STACKS 2 static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]); +/* + * Allocated stacks are cached and later reused by new threads, so memcg + * accounting is performed by the code assigning/releasing stacks to tasks. + * We need a zeroed memory without __GFP_ACCOUNT. + */ +#define GFP_VMAP_STACK (GFP_KERNEL | __GFP_ZERO) struct vm_stack { struct rcu_head rcu; @@ -307,13 +313,8 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) return 0; } - /* - * Allocated stacks are cached and later reused by new threads, - * so memcg accounting is performed manually on assigning/releasing - * stacks to tasks. Drop __GFP_ACCOUNT. - */ stack = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, - THREADINFO_GFP & ~__GFP_ACCOUNT, + GFP_VMAP_STACK, node, __builtin_return_address(0)); if (!stack) return -ENOMEM; From 0ba5a25ad1c951fa25baa8c30a526b647ab50d47 Mon Sep 17 00:00:00 2001 From: Elijah Wright Date: Tue, 10 Jun 2025 15:56:28 -0700 Subject: [PATCH 0587/2411] kernel: relay: use __GFP_ZERO in relay_alloc_buf Passing the __GFP_ZERO flag to alloc_page should result in less overhead th= an using memset() Link: https://lkml.kernel.org/r/20250610225639.314970-3-git@elijahs.space Signed-off-by: Elijah Wright Signed-off-by: Andrew Morton --- kernel/relay.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/relay.c b/kernel/relay.c index c0c93a04d4ce..3ee5b038d0d9 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -118,7 +118,7 @@ static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size) return NULL; for (i = 0; i < n_pages; i++) { - buf->page_array[i] = alloc_page(GFP_KERNEL); + buf->page_array[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); if (unlikely(!buf->page_array[i])) goto depopulate; set_page_private(buf->page_array[i], (unsigned long)buf); @@ -127,7 +127,6 @@ static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size) if (!mem) goto depopulate; - memset(mem, 0, *size); buf->page_count = n_pages; return mem; From ca742a822a32aca68adb8ffa75a7d9c8887c41d1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 Jun 2025 15:39:00 +0100 Subject: [PATCH 0588/2411] squashfs: pass the inode to squashfs_readahead_fragment() Patch series "squashfs: Remove page->mapping references". We're close to being able to kill page->mapping. These two patches get us a little bit closer. This patch (of 2): Eliminate a reference to page->mapping by passing the inode from the caller. Link: https://lkml.kernel.org/r/20250612143903.2849289-1-willy@infradead.org Link: https://lkml.kernel.org/r/20250612143903.2849289-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Phillip Lougher Signed-off-by: Andrew Morton --- fs/squashfs/file.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 5ca2baa16dc2..ce7d661d5ad8 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -493,10 +493,9 @@ static int squashfs_read_folio(struct file *file, struct folio *folio) return res; } -static int squashfs_readahead_fragment(struct page **page, +static int squashfs_readahead_fragment(struct inode *inode, struct page **page, unsigned int pages, unsigned int expected, loff_t start) { - struct inode *inode = page[0]->mapping->host; struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, squashfs_i(inode)->fragment_block, squashfs_i(inode)->fragment_size); @@ -605,8 +604,8 @@ static void squashfs_readahead(struct readahead_control *ractl) if (start >> msblk->block_log == file_end && squashfs_i(inode)->fragment_block != SQUASHFS_INVALID_BLK) { - res = squashfs_readahead_fragment(pages, nr_pages, - expected, start); + res = squashfs_readahead_fragment(inode, pages, + nr_pages, expected, start); if (res) goto skip_pages; continue; From c9e3fb050e9cb0d3a833b2c62b35ea42cdd81e89 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 Jun 2025 15:39:01 +0100 Subject: [PATCH 0589/2411] squashfs: use folios in squashfs_bio_read_cached() Remove an access to page->mapping and a few calls to the old page-based APIs. This doesn't support large folios, but it's still a nice improvement. Link: https://lkml.kernel.org/r/20250612143903.2849289-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Phillip Lougher Signed-off-by: Andrew Morton --- fs/squashfs/block.c | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 3061043e915c..296c5a0fcc40 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -80,23 +80,22 @@ static int squashfs_bio_read_cached(struct bio *fullbio, struct address_space *cache_mapping, u64 index, int length, u64 read_start, u64 read_end, int page_count) { - struct page *head_to_cache = NULL, *tail_to_cache = NULL; + struct folio *head_to_cache = NULL, *tail_to_cache = NULL; struct block_device *bdev = fullbio->bi_bdev; int start_idx = 0, end_idx = 0; - struct bvec_iter_all iter_all; + struct folio_iter fi;; struct bio *bio = NULL; - struct bio_vec *bv; int idx = 0; int err = 0; #ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL - struct page **cache_pages = kmalloc_array(page_count, + struct folio **cache_folios = kmalloc_array(page_count, sizeof(void *), GFP_KERNEL | __GFP_ZERO); #endif - bio_for_each_segment_all(bv, fullbio, iter_all) { - struct page *page = bv->bv_page; + bio_for_each_folio_all(fi, fullbio) { + struct folio *folio = fi.folio; - if (page->mapping == cache_mapping) { + if (folio->mapping == cache_mapping) { idx++; continue; } @@ -111,13 +110,13 @@ static int squashfs_bio_read_cached(struct bio *fullbio, * adjacent blocks. */ if (idx == 0 && index != read_start) - head_to_cache = page; + head_to_cache = folio; else if (idx == page_count - 1 && index + length != read_end) - tail_to_cache = page; + tail_to_cache = folio; #ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL /* Cache all pages in the BIO for repeated reads */ - else if (cache_pages) - cache_pages[idx] = page; + else if (cache_folios) + cache_folios[idx] = folio; #endif if (!bio || idx != end_idx) { @@ -150,45 +149,45 @@ static int squashfs_bio_read_cached(struct bio *fullbio, return err; if (head_to_cache) { - int ret = add_to_page_cache_lru(head_to_cache, cache_mapping, + int ret = filemap_add_folio(cache_mapping, head_to_cache, read_start >> PAGE_SHIFT, GFP_NOIO); if (!ret) { - SetPageUptodate(head_to_cache); - unlock_page(head_to_cache); + folio_mark_uptodate(head_to_cache); + folio_unlock(head_to_cache); } } if (tail_to_cache) { - int ret = add_to_page_cache_lru(tail_to_cache, cache_mapping, + int ret = filemap_add_folio(cache_mapping, tail_to_cache, (read_end >> PAGE_SHIFT) - 1, GFP_NOIO); if (!ret) { - SetPageUptodate(tail_to_cache); - unlock_page(tail_to_cache); + folio_mark_uptodate(tail_to_cache); + folio_unlock(tail_to_cache); } } #ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL - if (!cache_pages) + if (!cache_folios) goto out; for (idx = 0; idx < page_count; idx++) { - if (!cache_pages[idx]) + if (!cache_folios[idx]) continue; - int ret = add_to_page_cache_lru(cache_pages[idx], cache_mapping, + int ret = filemap_add_folio(cache_mapping, cache_folios[idx], (read_start >> PAGE_SHIFT) + idx, GFP_NOIO); if (!ret) { - SetPageUptodate(cache_pages[idx]); - unlock_page(cache_pages[idx]); + folio_mark_uptodate(cache_folios[idx]); + folio_unlock(cache_folios[idx]); } } - kfree(cache_pages); + kfree(cache_folios); out: #endif return 0; From 2489e958129ff7cbf26a34ee33cdc9ccbd68fe3c Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 12 Jun 2025 14:11:57 +0800 Subject: [PATCH 0590/2411] relayfs: abolish prev_padding Patch series "relayfs: misc changes", v5. The series mostly focuses on the error counters which helps every user debug their own kernel module. This patch (of 5): prev_padding represents the unused space of certain subbuffer. If the content of a call of relay_write() exceeds the limit of the remainder of this subbuffer, it will skip storing in the rest space and record the start point as buf->prev_padding in relay_switch_subbuf(). Since the buf is a per-cpu big buffer, the point of prev_padding as a global value for the whole buffer instead of a single subbuffer (whose padding info is stored in buf->padding[]) seems meaningless from the real use cases, so we don't bother to record it any more. Link: https://lkml.kernel.org/r/20250612061201.34272-1-kerneljasonxing@gmail.com Link: https://lkml.kernel.org/r/20250612061201.34272-2-kerneljasonxing@gmail.com Signed-off-by: Jason Xing Reviewed-by: Yushan Zhou Reviewed-by: Masami Hiramatsu (Google) Cc: Jens Axboe Cc: Mathieu Desnoyers Cc: Steven Rostedt Signed-off-by: Andrew Morton --- drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 3 +-- drivers/net/wwan/iosm/iosm_ipc_trace.c | 3 +-- drivers/net/wwan/t7xx/t7xx_port_trace.c | 2 +- include/linux/relay.h | 5 +---- kernel/relay.c | 14 ++++++++------ kernel/trace/blktrace.c | 2 +- 6 files changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index e8a04e476c57..09a64f224c49 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -220,8 +220,7 @@ static int guc_action_control_log(struct intel_guc *guc, bool enable, */ static int subbuf_start_callback(struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, - size_t prev_padding) + void *prev_subbuf) { /* * Use no-overwrite mode by default, where relay will stop accepting diff --git a/drivers/net/wwan/iosm/iosm_ipc_trace.c b/drivers/net/wwan/iosm/iosm_ipc_trace.c index eeecfa3d10c5..9656254c1c6c 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_trace.c +++ b/drivers/net/wwan/iosm/iosm_ipc_trace.c @@ -51,8 +51,7 @@ static int ipc_trace_remove_buf_file_handler(struct dentry *dentry) } static int ipc_trace_subbuf_start_handler(struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, - size_t prev_padding) + void *prev_subbuf) { if (relay_buf_full(buf)) { pr_err_ratelimited("Relay_buf full dropping traces"); diff --git a/drivers/net/wwan/t7xx/t7xx_port_trace.c b/drivers/net/wwan/t7xx/t7xx_port_trace.c index 4ed8b4e29bf1..f16d3b01302c 100644 --- a/drivers/net/wwan/t7xx/t7xx_port_trace.c +++ b/drivers/net/wwan/t7xx/t7xx_port_trace.c @@ -33,7 +33,7 @@ static int t7xx_trace_remove_buf_file_handler(struct dentry *dentry) } static int t7xx_trace_subbuf_start_handler(struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, size_t prev_padding) + void *prev_subbuf) { if (relay_buf_full(buf)) { pr_err_ratelimited("Relay_buf full dropping traces"); diff --git a/include/linux/relay.h b/include/linux/relay.h index b3224111d074..e10a0fdf4325 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -47,7 +47,6 @@ struct rchan_buf unsigned int page_count; /* number of current buffer pages */ unsigned int finalized; /* buffer has been finalized */ size_t *padding; /* padding counts per sub-buffer */ - size_t prev_padding; /* temporary variable */ size_t bytes_consumed; /* bytes consumed in cur read subbuf */ size_t early_bytes; /* bytes consumed before VFS inited */ unsigned int cpu; /* this buf's cpu */ @@ -84,7 +83,6 @@ struct rchan_callbacks * @buf: the channel buffer containing the new sub-buffer * @subbuf: the start of the new sub-buffer * @prev_subbuf: the start of the previous sub-buffer - * @prev_padding: unused space at the end of previous sub-buffer * * The client should return 1 to continue logging, 0 to stop * logging. @@ -100,8 +98,7 @@ struct rchan_callbacks */ int (*subbuf_start) (struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, - size_t prev_padding); + void *prev_subbuf); /* * create_buf_file - create file to represent a relay channel buffer diff --git a/kernel/relay.c b/kernel/relay.c index 3ee5b038d0d9..fc6ad76b789d 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -249,13 +249,13 @@ EXPORT_SYMBOL_GPL(relay_buf_full); */ static int relay_subbuf_start(struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, size_t prev_padding) + void *prev_subbuf) { if (!buf->chan->cb->subbuf_start) return !relay_buf_full(buf); return buf->chan->cb->subbuf_start(buf, subbuf, - prev_subbuf, prev_padding); + prev_subbuf); } /** @@ -301,7 +301,7 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init) for (i = 0; i < buf->chan->n_subbufs; i++) buf->padding[i] = 0; - relay_subbuf_start(buf, buf->data, NULL, 0); + relay_subbuf_start(buf, buf->data, NULL); } /** @@ -554,9 +554,11 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) goto toobig; if (buf->offset != buf->chan->subbuf_size + 1) { - buf->prev_padding = buf->chan->subbuf_size - buf->offset; + size_t prev_padding; + + prev_padding = buf->chan->subbuf_size - buf->offset; old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs; - buf->padding[old_subbuf] = buf->prev_padding; + buf->padding[old_subbuf] = prev_padding; buf->subbufs_produced++; if (buf->dentry) d_inode(buf->dentry)->i_size += @@ -581,7 +583,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs; new = buf->start + new_subbuf * buf->chan->subbuf_size; buf->offset = 0; - if (!relay_subbuf_start(buf, new, old, buf->prev_padding)) { + if (!relay_subbuf_start(buf, new, old)) { buf->offset = buf->chan->subbuf_size + 1; return 0; } diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 3f6a7bdc6edf..d3083c88474e 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -461,7 +461,7 @@ static const struct file_operations blk_msg_fops = { * the user space app in telling how many lost events there were. */ static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, size_t prev_padding) + void *prev_subbuf) { struct blk_trace *bt; From ca01a90ae7bf9bb22137e719366bdc0f387675c2 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 12 Jun 2025 14:11:58 +0800 Subject: [PATCH 0591/2411] relayfs: support a counter tracking if per-cpu buffers is full When using relay mechanism, we often encounter the case where new data are lost or old unconsumed data are overwritten because of slow reader. Add 'full' field in per-cpu buffer structure to detect if the above case is happening. Relay has two modes: 1) non-overwrite mode, 2) overwrite mode. So buffer being full here respectively means: 1) relayfs doesn't intend to accept new data and then simply drop them, or 2) relayfs is going to start over again and overwrite old unread data with new data. Note: this counter doesn't need any explicit lock to protect from being modified by different threads for the better performance consideration. Writers calling __relay_write/relay_write should consider how to use the lock and ensure it performs under the lock protection, thus it's not necessary to add a new small lock here. Link: https://lkml.kernel.org/r/20250612061201.34272-3-kerneljasonxing@gmail.com Signed-off-by: Jason Xing Reviewed-by: Yushan Zhou Reviewed-by: Jens Axboe Reviewed-by: Masami Hiramatsu (Google) Cc: Mathieu Desnoyers Cc: Steven Rostedt Signed-off-by: Andrew Morton --- include/linux/relay.h | 9 +++++++++ kernel/relay.c | 8 +++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/linux/relay.h b/include/linux/relay.h index e10a0fdf4325..cd77eb285a48 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -28,6 +28,14 @@ */ #define RELAYFS_CHANNEL_VERSION 7 +/* + * Relay buffer statistics + */ +struct rchan_buf_stats +{ + unsigned int full_count; /* counter for buffer full */ +}; + /* * Per-cpu relay channel buffer */ @@ -43,6 +51,7 @@ struct rchan_buf struct irq_work wakeup_work; /* reader wakeup */ struct dentry *dentry; /* channel file dentry */ struct kref kref; /* channel buffer refcount */ + struct rchan_buf_stats stats; /* buffer stats */ struct page **page_array; /* array of current buffer pages */ unsigned int page_count; /* number of current buffer pages */ unsigned int finalized; /* buffer has been finalized */ diff --git a/kernel/relay.c b/kernel/relay.c index fc6ad76b789d..4b07efddc2cf 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -251,8 +251,13 @@ EXPORT_SYMBOL_GPL(relay_buf_full); static int relay_subbuf_start(struct rchan_buf *buf, void *subbuf, void *prev_subbuf) { + int full = relay_buf_full(buf); + + if (full) + buf->stats.full_count++; + if (!buf->chan->cb->subbuf_start) - return !relay_buf_full(buf); + return !full; return buf->chan->cb->subbuf_start(buf, subbuf, prev_subbuf); @@ -297,6 +302,7 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init) buf->finalized = 0; buf->data = buf->start; buf->offset = 0; + buf->stats.full_count = 0; for (i = 0; i < buf->chan->n_subbufs; i++) buf->padding[i] = 0; From a53202ce7fbafd24f854865b02eff891e246c550 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 12 Jun 2025 14:11:59 +0800 Subject: [PATCH 0592/2411] relayfs: introduce getting relayfs statistics function In this version, only support getting the counter for buffer full and implement the framework of how it works. Users can pass certain flag to fetch what field/statistics they expect to know. Each time it only returns one result. So do not pass multiple flags. Link: https://lkml.kernel.org/r/20250612061201.34272-4-kerneljasonxing@gmail.com Signed-off-by: Jason Xing Reviewed-by: Yushan Zhou Reviewed-by: Masami Hiramatsu (Google) Cc: Jens Axboe Cc: Mathieu Desnoyers Cc: Steven Rostedt Signed-off-by: Andrew Morton --- include/linux/relay.h | 7 +++++++ kernel/relay.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/include/linux/relay.h b/include/linux/relay.h index cd77eb285a48..5310967f9d74 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -31,6 +31,12 @@ /* * Relay buffer statistics */ +enum { + RELAY_STATS_BUF_FULL = (1 << 0), + + RELAY_STATS_LAST = RELAY_STATS_BUF_FULL, +}; + struct rchan_buf_stats { unsigned int full_count; /* counter for buffer full */ @@ -167,6 +173,7 @@ struct rchan *relay_open(const char *base_filename, void *private_data); extern void relay_close(struct rchan *chan); extern void relay_flush(struct rchan *chan); +size_t relay_stats(struct rchan *chan, int flags); extern void relay_subbufs_consumed(struct rchan *chan, unsigned int cpu, size_t consumed); diff --git a/kernel/relay.c b/kernel/relay.c index 4b07efddc2cf..2fc27c0e771e 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -700,6 +700,36 @@ void relay_flush(struct rchan *chan) } EXPORT_SYMBOL_GPL(relay_flush); +/** + * relay_stats - get channel buffer statistics + * @chan: the channel + * @flags: select particular information to get + * + * Returns the count of certain field that caller specifies. + */ +size_t relay_stats(struct rchan *chan, int flags) +{ + unsigned int i, count = 0; + struct rchan_buf *rbuf; + + if (!chan || flags > RELAY_STATS_LAST) + return 0; + + if (chan->is_global) { + rbuf = *per_cpu_ptr(chan->buf, 0); + if (flags & RELAY_STATS_BUF_FULL) + count = rbuf->stats.full_count; + } else { + for_each_online_cpu(i) { + rbuf = *per_cpu_ptr(chan->buf, i); + if (rbuf && flags & RELAY_STATS_BUF_FULL) + count += rbuf->stats.full_count; + } + } + + return count; +} + /** * relay_file_open - open file op for relay files * @inode: the inode From 7f2173894f7bfe63bcb241f419b15ed5ce79f0d1 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 12 Jun 2025 14:12:00 +0800 Subject: [PATCH 0593/2411] blktrace: use rbuf->stats.full as a drop indicator in relayfs Replace internal subbuf_start in blktrace with the default policy in relayfs. Remove dropped field from struct blktrace. Correspondingly, call the common helper in relay. By incrementing full_count to keep track of how many times we encountered a full buffer issue, user space will know how many events were lost. Link: https://lkml.kernel.org/r/20250612061201.34272-5-kerneljasonxing@gmail.com Signed-off-by: Jason Xing Reviewed-by: Yushan Zhou Reviewed-by: Jens Axboe Reviewed-by: Masami Hiramatsu (Google) Cc: Mathieu Desnoyers Cc: Steven Rostedt Signed-off-by: Andrew Morton --- kernel/trace/blktrace.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index d3083c88474e..5401b9006135 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -415,9 +415,10 @@ static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { struct blk_trace *bt = filp->private_data; + size_t dropped = relay_stats(bt->rchan, RELAY_STATS_BUF_FULL); char buf[16]; - snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped)); + snprintf(buf, sizeof(buf), "%zu\n", dropped); return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf)); } @@ -456,23 +457,6 @@ static const struct file_operations blk_msg_fops = { .llseek = noop_llseek, }; -/* - * Keep track of how many times we encountered a full subbuffer, to aid - * the user space app in telling how many lost events there were. - */ -static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, - void *prev_subbuf) -{ - struct blk_trace *bt; - - if (!relay_buf_full(buf)) - return 1; - - bt = buf->chan->private_data; - atomic_inc(&bt->dropped); - return 0; -} - static int blk_remove_buf_file_callback(struct dentry *dentry) { debugfs_remove(dentry); @@ -491,7 +475,6 @@ static struct dentry *blk_create_buf_file_callback(const char *filename, } static const struct rchan_callbacks blk_relay_callbacks = { - .subbuf_start = blk_subbuf_start_callback, .create_buf_file = blk_create_buf_file_callback, .remove_buf_file = blk_remove_buf_file_callback, }; @@ -580,7 +563,6 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, } bt->dev = dev; - atomic_set(&bt->dropped, 0); INIT_LIST_HEAD(&bt->running_list); ret = -EIO; From 19f3cb64a25b80db667a00182785577fae465b3e Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 12 Jun 2025 14:12:01 +0800 Subject: [PATCH 0594/2411] relayfs: support a counter tracking if data is too big to write It really doesn't matter if the user/admin knows what the last too big value is. Record how many times this case is triggered would be helpful. Solve the existing issue where relay_reset() doesn't restore the value. Store the counter in the per-cpu buffer structure instead of the global buffer structure. It also solves the racy condition which is likely to happen when a few of per-cpu buffers encounter the too big data case and then access the global field last_toobig without lock protection. Remove the printk in relay_close() since kernel module can directly call relay_stats() as they want. Link: https://lkml.kernel.org/r/20250612061201.34272-6-kerneljasonxing@gmail.com Signed-off-by: Jason Xing Reviewed-by: Yushan Zhou Reviewed-by: Masami Hiramatsu (Google) Cc: Jens Axboe Cc: Mathieu Desnoyers Cc: Steven Rostedt Signed-off-by: Andrew Morton --- include/linux/relay.h | 5 +++-- kernel/relay.c | 18 ++++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/linux/relay.h b/include/linux/relay.h index 5310967f9d74..6772a7075840 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -33,13 +33,15 @@ */ enum { RELAY_STATS_BUF_FULL = (1 << 0), + RELAY_STATS_WRT_BIG = (1 << 1), - RELAY_STATS_LAST = RELAY_STATS_BUF_FULL, + RELAY_STATS_LAST = RELAY_STATS_WRT_BIG, }; struct rchan_buf_stats { unsigned int full_count; /* counter for buffer full */ + unsigned int big_count; /* counter for too big to write */ }; /* @@ -79,7 +81,6 @@ struct rchan const struct rchan_callbacks *cb; /* client callbacks */ struct kref kref; /* channel refcount */ void *private_data; /* for user-defined data */ - size_t last_toobig; /* tried to log event > subbuf size */ struct rchan_buf * __percpu *buf; /* per-cpu channel buffers */ int is_global; /* One global buffer ? */ struct list_head list; /* for channel list */ diff --git a/kernel/relay.c b/kernel/relay.c index 2fc27c0e771e..8d915fe98198 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -303,6 +303,7 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init) buf->data = buf->start; buf->offset = 0; buf->stats.full_count = 0; + buf->stats.big_count = 0; for (i = 0; i < buf->chan->n_subbufs; i++) buf->padding[i] = 0; @@ -602,7 +603,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) return length; toobig: - buf->chan->last_toobig = length; + buf->stats.big_count++; return 0; } EXPORT_SYMBOL_GPL(relay_switch_subbuf); @@ -662,11 +663,6 @@ void relay_close(struct rchan *chan) if ((buf = *per_cpu_ptr(chan->buf, i))) relay_close_buf(buf); - if (chan->last_toobig) - printk(KERN_WARNING "relay: one or more items not logged " - "[item size (%zd) > sub-buffer size (%zd)]\n", - chan->last_toobig, chan->subbuf_size); - list_del(&chan->list); kref_put(&chan->kref, relay_destroy_channel); mutex_unlock(&relay_channels_mutex); @@ -719,11 +715,17 @@ size_t relay_stats(struct rchan *chan, int flags) rbuf = *per_cpu_ptr(chan->buf, 0); if (flags & RELAY_STATS_BUF_FULL) count = rbuf->stats.full_count; + else if (flags & RELAY_STATS_WRT_BIG) + count = rbuf->stats.big_count; } else { for_each_online_cpu(i) { rbuf = *per_cpu_ptr(chan->buf, i); - if (rbuf && flags & RELAY_STATS_BUF_FULL) - count += rbuf->stats.full_count; + if (rbuf) { + if (flags & RELAY_STATS_BUF_FULL) + count += rbuf->stats.full_count; + else if (flags & RELAY_STATS_WRT_BIG) + count += rbuf->stats.big_count; + } } } From ad2c8079e9d5637f6d66cb5ce5cf49768ae87658 Mon Sep 17 00:00:00 2001 From: Wei Nanxin Date: Sun, 15 Jun 2025 20:32:37 +0800 Subject: [PATCH 0595/2411] kcov: fix typo in comment of kcov_fault_in_area change '__santizer_cov_trace_pc()' to '__sanitizer_cov_trace_pc()' Link: https://lkml.kernel.org/r/20250615123237.110144-1-n9winx@163.com Signed-off-by: Wei Nanxin Cc: Andrey Konovalov Cc: Macro Elver Signed-off-by: Andrew Morton --- kernel/kcov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 187ba1b80bda..1d85597057e1 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -552,7 +552,7 @@ static int kcov_get_mode(unsigned long arg) /* * Fault in a lazily-faulted vmalloc area before it can be used by - * __santizer_cov_trace_pc(), to avoid recursion issues if any code on the + * __sanitizer_cov_trace_pc(), to avoid recursion issues if any code on the * vmalloc fault handling path is instrumented. */ static void kcov_fault_in_area(struct kcov *kcov) From d71b90e5ba83b32b4e3980f8c07ba2012ad9378a Mon Sep 17 00:00:00 2001 From: Fushuai Wang Date: Sun, 15 Jun 2025 11:09:30 +0800 Subject: [PATCH 0596/2411] exit: fix misleading comment in forget_original_parent() The commit 482a3767e508 ("exit: reparent: call forget_original_parent() under tasklist_lock") moved the comment from exit_notify() to forget_original_parent(). However, the forget_original_parent() only handles (A), while (B) is handled in kill_orphaned_pgrp(). So remove the unrelated part. Link: https://lkml.kernel.org/r/20250615030930.58051-1-wangfushuai@baidu.com Signed-off-by: Fushuai Wang Acked-by: Oleg Nesterov Cc: Andrii Nakryiko Cc: Christian Brauner Cc: Mateusz Guzik Cc: Michal Hocko Cc: Pasha Tatashin Cc: wangfushuai Signed-off-by: Andrew Morton --- kernel/exit.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index bb184a67ac73..f03caf17b214 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -692,12 +692,7 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p, } /* - * This does two things: - * - * A. Make init inherit all the child processes - * B. Check to see if any process groups have become orphaned - * as a result of our exiting, and if they have any stopped - * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) + * Make init inherit all the child processes */ static void forget_original_parent(struct task_struct *father, struct list_head *dead) From e795000e755c309d1f9bd2a0590eca38b4625f3a Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 16 Jun 2025 15:22:44 -0400 Subject: [PATCH 0597/2411] mul_u64_u64_div_u64: fix the division-by-zero behavior MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current implementation forces a compile-time 1/0 division, which generates an undefined instruction (ud2 on x86) rather than a proper runtime division-by-zero exception. Change to trigger an actual div-by-0 exception at runtime, consistent with other division operations. Use a non-1 dividend to prevent the compiler from optimizing the division into a comparison. Link: https://lkml.kernel.org/r/q246p466-1453-qon9-29so-37105116009q@onlyvoer.pbz Signed-off-by: Nicolas Pitre Cc: Biju Das Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Weißschuh Cc: Uwe Kleine-König Cc: David Laight Signed-off-by: Andrew Morton --- lib/math/div64.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/math/div64.c b/lib/math/div64.c index 5faa29208bdb..bf77b9843175 100644 --- a/lib/math/div64.c +++ b/lib/math/div64.c @@ -212,12 +212,13 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c) #endif - /* make sure c is not zero, trigger exception otherwise */ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdiv-by-zero" - if (unlikely(c == 0)) - return 1/0; -#pragma GCC diagnostic pop + /* make sure c is not zero, trigger runtime exception otherwise */ + if (unlikely(c == 0)) { + unsigned long zero = 0; + + OPTIMIZER_HIDE_VAR(zero); + return ~0UL/zero; + } int shift = __builtin_ctzll(c); From 5eee4c2b2aebfd3c8f11d9722e49d838da4e4150 Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Mon, 16 Jun 2025 09:59:13 +0200 Subject: [PATCH 0598/2411] checkpatch: use utf-8 match for spell checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current code that checks for misspelling verifies, in a more complex regex, if $rawline matches [^\w]($misspellings)[^\w] Being $rawline a byte-string, a utf-8 character in $rawline can match the non-word-char [^\w]. E.g.: ./scripts/checkpatch.pl --git 81c2f059ab9 WARNING: 'ment' may be misspelled - perhaps 'meant'? #36: FILE: MAINTAINERS:14360: +M: Clément Léger ^^^^ Use a utf-8 version of $rawline for spell checking. Link: https://lkml.kernel.org/r/20250616-b4-checkpatch-upstream-v2-1-5600ce4a3b43@foss.st.com Signed-off-by: Antonio Borneo Signed-off-by: Clément Le Goffic Cc: Andy Whitcroft Cc: Dwaipayan Ray Cc: Joe Perches Cc: Lukas Bulwahn Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 664f7b7a622c..489b74d52abe 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3502,9 +3502,10 @@ sub process { # Check for various typo / spelling mistakes if (defined($misspellings) && ($in_commit_log || $line =~ /^(?:\+|Subject:)/i)) { - while ($rawline =~ /(?:^|[^\w\-'`])($misspellings)(?:[^\w\-'`]|$)/gi) { + my $rawline_utf8 = decode("utf8", $rawline); + while ($rawline_utf8 =~ /(?:^|[^\w\-'`])($misspellings)(?:[^\w\-'`]|$)/gi) { my $typo = $1; - my $blank = copy_spacing($rawline); + my $blank = copy_spacing($rawline_utf8); my $ptr = substr($blank, 0, $-[1]) . "^" x length($typo); my $hereptr = "$hereline$ptr\n"; my $typo_fix = $spelling_fix{lc($typo)}; From aa644c405291a419e92b112e2279c01c410e9a26 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 14 May 2025 12:18:09 +0200 Subject: [PATCH 0599/2411] uprobes: revert ref_ctr_offset in uprobe_unregister error path There's error path that could lead to inactive uprobe: 1) uprobe_register succeeds - updates instruction to int3 and changes ref_ctr from 0 to 1 2) uprobe_unregister fails - int3 stays in place, but ref_ctr is changed to 0 (it's not restored to 1 in the fail path) uprobe is leaked 3) another uprobe_register comes and re-uses the leaked uprobe and succeds - but int3 is already in place, so ref_ctr update is skipped and it stays 0 - uprobe CAN NOT be triggered now 4) uprobe_unregister fails because ref_ctr value is unexpected Fix this by reverting the updated ref_ctr value back to 1 in step 2), which is the case when uprobe_unregister fails (int3 stays in place), but we have already updated refctr. The new scenario will go as follows: 1) uprobe_register succeeds - updates instruction to int3 and changes ref_ctr from 0 to 1 2) uprobe_unregister fails - int3 stays in place and ref_ctr is reverted to 1.. uprobe is leaked 3) another uprobe_register comes and re-uses the leaked uprobe and succeds - but int3 is already in place, so ref_ctr update is skipped and it stays 1 - uprobe CAN be triggered now 4) uprobe_unregister succeeds Link: https://lkml.kernel.org/r/20250514101809.2010193-1-jolsa@kernel.org Fixes: 1cc33161a83d ("uprobes: Support SDT markers having reference count (semaphore)") Signed-off-by: Jiri Olsa Acked-by: David Hildenbrand Acked-by: Oleg Nesterov Suggested-by: Oleg Nesterov Cc: Andrii Nakryiko Cc: "Masami Hiramatsu (Google)" Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- kernel/events/uprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 4c965ba77f9f..84ee7b590861 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -581,8 +581,8 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, out: /* Revert back reference counter if instruction update failed. */ - if (ret < 0 && is_register && ref_ctr_updated) - update_ref_ctr(uprobe, mm, -1); + if (ret < 0 && ref_ctr_updated) + update_ref_ctr(uprobe, mm, is_register ? -1 : 1); /* try collapse pmd for compound page */ if (ret > 0) From 2ae826799932ff89409f56636ad3c25578fe7cf5 Mon Sep 17 00:00:00 2001 From: Lizhi Xu Date: Mon, 16 Jun 2025 09:31:40 +0800 Subject: [PATCH 0600/2411] ocfs2: reset folio to NULL when get folio fails The reproducer uses FAULT_INJECTION to make memory allocation fail, which causes __filemap_get_folio() to fail, when initializing w_folios[i] in ocfs2_grab_folios_for_write(), it only returns an error code and the value of w_folios[i] is the error code, which causes ocfs2_unlock_and_free_folios() to recycle the invalid w_folios[i] when releasing folios. Link: https://lkml.kernel.org/r/20250616013140.3602219-1-lizhi.xu@windriver.com Reported-by: syzbot+c2ea94ae47cd7e3881ec@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c2ea94ae47cd7e3881ec Signed-off-by: Lizhi Xu Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/aops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 40b6bce12951..89aadc6cdd87 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1071,6 +1071,7 @@ static int ocfs2_grab_folios_for_write(struct address_space *mapping, if (IS_ERR(wc->w_folios[i])) { ret = PTR_ERR(wc->w_folios[i]); mlog_errno(ret); + wc->w_folios[i] = NULL; goto out; } } From 816a8800326833becc93f607eb706fc542c28d75 Mon Sep 17 00:00:00 2001 From: Long Li Date: Tue, 17 Jun 2025 09:25:34 +0800 Subject: [PATCH 0601/2411] ocfs2: remove redundant NULL check in rename path The code checks newfe_bh for NULL after it has already been dereferenced to access b_data. This NULL check is unnecessary for two reasons: 1. If ocfs2_inode_lock() succeeds (returns >= 0), newfe_bh is guaranteed to be valid. 2. We've already dereferenced newfe_bh to access b_data, so it must be non-NULL at this point. Remove the redundant NULL check in the trace_ocfs2_rename_over_existing() call to improve code clarity. Link: https://lkml.kernel.org/r/20250617012534.3458669-1-leo.lilong@huawei.com Signed-off-by: Long Li Reviewed-by: Su Yue Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 99278c8f0e24..26bc59c3a813 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -1452,8 +1452,8 @@ static int ocfs2_rename(struct mnt_idmap *idmap, newfe = (struct ocfs2_dinode *) newfe_bh->b_data; trace_ocfs2_rename_over_existing( - (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ? - (unsigned long long)newfe_bh->b_blocknr : 0ULL); + (unsigned long long)newfe_blkno, newfe_bh, + (unsigned long long)newfe_bh->b_blocknr); if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) { status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, From 64960497ea86a5d09176c296c3616aa7c8668624 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Wed, 18 Jun 2025 15:34:33 +0200 Subject: [PATCH 0602/2411] fork: clean up ifdef logic around stack allocation There is an unneeded OR in the ifdef functions that are used to allocate and free kernel stacks based on direct map or vmap. Adding dynamic stack support would complicate this logic even further. Therefore, clean up by changing the order so OR is no longer needed. Link: https://lkml.kernel.org/r/20250618-fork-fixes-v4-1-2e05a2e1f5fc@linaro.org Signed-off-by: Pasha Tatashin Link: https://lore.kernel.org/20240311164638.2015063-3-pasha.tatashin@soleen.com Signed-off-by: Linus Walleij Cc: Mateusz Guzik Signed-off-by: Andrew Morton --- kernel/fork.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 6616d173307a..bd8c21d64746 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -188,13 +188,7 @@ static inline void free_task_struct(struct task_struct *tsk) kmem_cache_free(task_struct_cachep, tsk); } -/* - * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a - * kmemcache based allocator. - */ -# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) - -# ifdef CONFIG_VMAP_STACK +#ifdef CONFIG_VMAP_STACK /* * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB * flush. Try to minimize the number of calls by caching stacks. @@ -344,7 +338,13 @@ static void free_thread_stack(struct task_struct *tsk) tsk->stack_vm_area = NULL; } -# else /* !CONFIG_VMAP_STACK */ +#else /* !CONFIG_VMAP_STACK */ + +/* + * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a + * kmemcache based allocator. + */ +#if THREAD_SIZE >= PAGE_SIZE static void thread_stack_free_rcu(struct rcu_head *rh) { @@ -376,8 +376,7 @@ static void free_thread_stack(struct task_struct *tsk) tsk->stack = NULL; } -# endif /* CONFIG_VMAP_STACK */ -# else /* !(THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)) */ +#else /* !(THREAD_SIZE >= PAGE_SIZE) */ static struct kmem_cache *thread_stack_cache; @@ -416,7 +415,8 @@ void thread_stack_cache_init(void) BUG_ON(thread_stack_cache == NULL); } -# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */ +#endif /* THREAD_SIZE >= PAGE_SIZE */ +#endif /* CONFIG_VMAP_STACK */ /* SLAB cache for signal_struct structures (tsk->signal) */ static struct kmem_cache *signal_cachep; From 41a7f737685eed2700654720d3faaffdf0132135 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Jun 2025 15:46:02 +0200 Subject: [PATCH 0603/2411] scripts: gdb: move MNT_* constants to gdb-parsed Since these are now no longer defines, but in an enum. Link: https://lkml.kernel.org/r/20250618134629.25700-2-johannes@sipsolutions.net Fixes: 101f2bbab541 ("fs: convert mount flags to enum") Reviewed-by: Benjamin Berg Signed-off-by: Johannes Berg Cc: Jan Kiszka Cc: Kieran Bingham Cc: Stephen Brennan Signed-off-by: Andrew Morton --- scripts/gdb/linux/constants.py.in | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in index fd6bd69c5096..d5e3069f42a7 100644 --- a/scripts/gdb/linux/constants.py.in +++ b/scripts/gdb/linux/constants.py.in @@ -73,12 +73,12 @@ if IS_BUILTIN(CONFIG_MODULES): LX_GDBPARSED(MOD_RO_AFTER_INIT) /* linux/mount.h */ -LX_VALUE(MNT_NOSUID) -LX_VALUE(MNT_NODEV) -LX_VALUE(MNT_NOEXEC) -LX_VALUE(MNT_NOATIME) -LX_VALUE(MNT_NODIRATIME) -LX_VALUE(MNT_RELATIME) +LX_GDBPARSED(MNT_NOSUID) +LX_GDBPARSED(MNT_NODEV) +LX_GDBPARSED(MNT_NOEXEC) +LX_GDBPARSED(MNT_NOATIME) +LX_GDBPARSED(MNT_NODIRATIME) +LX_GDBPARSED(MNT_RELATIME) /* linux/threads.h */ LX_VALUE(NR_CPUS) From 1857fcc847443b0238cb64584b43d8c3a9049a0a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 17 Mar 2025 17:02:28 +0800 Subject: [PATCH 0604/2411] lib/raid6: replace custom zero page with ZERO_PAGE Use the system-wide zero page instead of a custom zero page. [herbert@gondor.apana.org.au: update lib/raid6/recov_rvv.c, per Klara] Link: https://lkml.kernel.org/r/aFkUnXWtxcgOTVkw@gondor.apana.org.au Link: https://lkml.kernel.org/r/Z9flJNkWQICx0PXk@gondor.apana.org.au Signed-off-by: Herbert Xu Cc: Song Liu Cc: Yu Kuai Cc: Klara Modin Signed-off-by: Andrew Morton --- crypto/async_tx/async_pq.c | 2 +- crypto/async_tx/async_raid6_recov.c | 4 ++-- include/linux/raid/pq.h | 12 +++++++++++- lib/raid6/algos.c | 3 --- lib/raid6/recov.c | 6 +++--- lib/raid6/recov_avx2.c | 6 +++--- lib/raid6/recov_avx512.c | 6 +++--- lib/raid6/recov_loongarch_simd.c | 12 ++++++------ lib/raid6/recov_neon.c | 6 +++--- lib/raid6/recov_s390xc.c | 6 +++--- lib/raid6/recov_ssse3.c | 6 +++--- 11 files changed, 38 insertions(+), 31 deletions(-) diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index 5e2b2680d7db..9e4bb7fbde25 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -119,7 +119,7 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int *offsets, int disks, for (i = 0; i < disks; i++) { if (blocks[i] == NULL) { BUG_ON(i > disks - 3); /* P or Q can't be zero */ - srcs[i] = (void*)raid6_empty_zero_page; + srcs[i] = raid6_get_zero_page(); } else { srcs[i] = page_address(blocks[i]) + offsets[i]; diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c index 354b8cd5537f..539ea5b378dc 100644 --- a/crypto/async_tx/async_raid6_recov.c +++ b/crypto/async_tx/async_raid6_recov.c @@ -414,7 +414,7 @@ async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, async_tx_quiesce(&submit->depend_tx); for (i = 0; i < disks; i++) if (blocks[i] == NULL) - ptrs[i] = (void *) raid6_empty_zero_page; + ptrs[i] = raid6_get_zero_page(); else ptrs[i] = page_address(blocks[i]) + offs[i]; @@ -497,7 +497,7 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, async_tx_quiesce(&submit->depend_tx); for (i = 0; i < disks; i++) if (blocks[i] == NULL) - ptrs[i] = (void*)raid6_empty_zero_page; + ptrs[i] = raid6_get_zero_page(); else ptrs[i] = page_address(blocks[i]) + offs[i]; diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 72ff44cca864..2467b3be15c9 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -11,8 +11,13 @@ #ifdef __KERNEL__ #include +#include -extern const char raid6_empty_zero_page[PAGE_SIZE]; +/* This should be const but the raid6 code is too convoluted for that. */ +static inline void *raid6_get_zero_page(void) +{ + return page_address(ZERO_PAGE(0)); +} #else /* ! __KERNEL__ */ /* Used for testing in user space */ @@ -191,6 +196,11 @@ static inline uint32_t raid6_jiffies(void) return tv.tv_sec*1000 + tv.tv_usec/1000; } +static inline void *raid6_get_zero_page(void) +{ + return raid6_empty_zero_page; +} + #endif /* ! __KERNEL__ */ #endif /* LINUX_RAID_RAID6_H */ diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 75ce3e134b7c..799e0e5eac26 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -18,9 +18,6 @@ #else #include #include -/* In .bss so it's zeroed */ -const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); -EXPORT_SYMBOL(raid6_empty_zero_page); #endif struct raid6_calls raid6_call; diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c index a7c1b2bbe40d..b5e47c008b41 100644 --- a/lib/raid6/recov.c +++ b/lib/raid6/recov.c @@ -31,10 +31,10 @@ static void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, Use the dead data pages as temporary storage for delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -72,7 +72,7 @@ static void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, /* Compute syndrome with zero for the missing data page Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_avx2.c b/lib/raid6/recov_avx2.c index 4e8095403ee2..97d598d2535c 100644 --- a/lib/raid6/recov_avx2.c +++ b/lib/raid6/recov_avx2.c @@ -28,10 +28,10 @@ static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila, Use the dead data pages as temporary storage for delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -196,7 +196,7 @@ static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila, /* Compute syndrome with zero for the missing data page Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_avx512.c b/lib/raid6/recov_avx512.c index 310c715db313..7986120ca444 100644 --- a/lib/raid6/recov_avx512.c +++ b/lib/raid6/recov_avx512.c @@ -37,10 +37,10 @@ static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila, */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -238,7 +238,7 @@ static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila, */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_loongarch_simd.c b/lib/raid6/recov_loongarch_simd.c index 94aeac85e6f7..93dc515997a1 100644 --- a/lib/raid6/recov_loongarch_simd.c +++ b/lib/raid6/recov_loongarch_simd.c @@ -42,10 +42,10 @@ static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila, * delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -197,7 +197,7 @@ static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila, * Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -316,10 +316,10 @@ static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila, * delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -436,7 +436,7 @@ static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila, * Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_neon.c b/lib/raid6/recov_neon.c index 1bfc14174d4d..70e1404c1512 100644 --- a/lib/raid6/recov_neon.c +++ b/lib/raid6/recov_neon.c @@ -36,10 +36,10 @@ static void raid6_2data_recov_neon(int disks, size_t bytes, int faila, * delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -74,7 +74,7 @@ static void raid6_datap_recov_neon(int disks, size_t bytes, int faila, * Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_s390xc.c b/lib/raid6/recov_s390xc.c index 179eec900cea..1d32c01261be 100644 --- a/lib/raid6/recov_s390xc.c +++ b/lib/raid6/recov_s390xc.c @@ -35,10 +35,10 @@ static void raid6_2data_recov_s390xc(int disks, size_t bytes, int faila, Use the dead data pages as temporary storage for delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -82,7 +82,7 @@ static void raid6_datap_recov_s390xc(int disks, size_t bytes, int faila, /* Compute syndrome with zero for the missing data page Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c index 4bfa3c6b60de..2e849185c32b 100644 --- a/lib/raid6/recov_ssse3.c +++ b/lib/raid6/recov_ssse3.c @@ -30,10 +30,10 @@ static void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, Use the dead data pages as temporary storage for delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -203,7 +203,7 @@ static void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, /* Compute syndrome with zero for the missing data page Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); From 4d71d99f361f54bd18a94370ea08e562e511c4e9 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 19 Jun 2025 16:13:17 -0700 Subject: [PATCH 0605/2411] MAINTAINERS: add lib/raid6/ to "SOFTWARE RAID" Cc: Song Liu Cc: Yu Kuai Cc: Herbert Xu Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index fad6cb025a19..947ec6bf5b95 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23053,6 +23053,7 @@ F: drivers/md/md* F: drivers/md/raid* F: include/linux/raid/ F: include/uapi/linux/raid/ +F: lib/raid6/ SOLIDRUN CLEARFOG SUPPORT M: Russell King From caf728dfa7789f7096e8cd4341b4bf8f671f5c1d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 20 Jun 2025 13:19:04 +0200 Subject: [PATCH 0606/2411] lib: test_objagg: split test_hints_case() into two functions With sanitizers enabled, this function uses a lot of stack, causing a harmless warning: lib/test_objagg.c: In function 'test_hints_case.constprop': lib/test_objagg.c:994:1: error: the frame size of 1440 bytes is larger than 1408 bytes [-Werror=frame-larger-than=] Most of this is from the two 'struct world' structures. Since most of the work in this function is duplicated for the two, split it up into separate functions that each use one of them. The combined stack usage is still the same here, but there is no warning any more, and the code is still safe because of the known call chain. Link: https://lkml.kernel.org/r/20250620111907.3395296-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Cc: Jiri Pirko Signed-off-by: Andrew Morton --- lib/test_objagg.c | 77 +++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/lib/test_objagg.c b/lib/test_objagg.c index 222b39fc2629..ce5c4c36a084 100644 --- a/lib/test_objagg.c +++ b/lib/test_objagg.c @@ -908,50 +908,22 @@ static int check_expect_hints_stats(struct objagg_hints *objagg_hints, return err; } -static int test_hints_case(const struct hints_case *hints_case) +static int test_hints_case2(const struct hints_case *hints_case, + struct objagg_hints *hints, struct objagg *objagg) { struct objagg_obj *objagg_obj; - struct objagg_hints *hints; struct world world2 = {}; - struct world world = {}; struct objagg *objagg2; - struct objagg *objagg; const char *errmsg; int i; int err; - objagg = objagg_create(&delta_ops, NULL, &world); - if (IS_ERR(objagg)) - return PTR_ERR(objagg); - - for (i = 0; i < hints_case->key_ids_count; i++) { - objagg_obj = world_obj_get(&world, objagg, - hints_case->key_ids[i]); - if (IS_ERR(objagg_obj)) { - err = PTR_ERR(objagg_obj); - goto err_world_obj_get; - } - } - - pr_debug_stats(objagg); - err = check_expect_stats(objagg, &hints_case->expect_stats, &errmsg); - if (err) { - pr_err("Stats: %s\n", errmsg); - goto err_check_expect_stats; - } - - hints = objagg_hints_get(objagg, OBJAGG_OPT_ALGO_SIMPLE_GREEDY); - if (IS_ERR(hints)) { - err = PTR_ERR(hints); - goto err_hints_get; - } - pr_debug_hints_stats(hints); err = check_expect_hints_stats(hints, &hints_case->expect_stats_hints, &errmsg); if (err) { pr_err("Hints stats: %s\n", errmsg); - goto err_check_expect_hints_stats; + return err; } objagg2 = objagg_create(&delta_ops, hints, &world2); @@ -983,7 +955,48 @@ static int test_hints_case(const struct hints_case *hints_case) world_obj_put(&world2, objagg, hints_case->key_ids[i]); i = hints_case->key_ids_count; objagg_destroy(objagg2); -err_check_expect_hints_stats: + + return err; +} + +static int test_hints_case(const struct hints_case *hints_case) +{ + struct objagg_obj *objagg_obj; + struct objagg_hints *hints; + struct world world = {}; + struct objagg *objagg; + const char *errmsg; + int i; + int err; + + objagg = objagg_create(&delta_ops, NULL, &world); + if (IS_ERR(objagg)) + return PTR_ERR(objagg); + + for (i = 0; i < hints_case->key_ids_count; i++) { + objagg_obj = world_obj_get(&world, objagg, + hints_case->key_ids[i]); + if (IS_ERR(objagg_obj)) { + err = PTR_ERR(objagg_obj); + goto err_world_obj_get; + } + } + + pr_debug_stats(objagg); + err = check_expect_stats(objagg, &hints_case->expect_stats, &errmsg); + if (err) { + pr_err("Stats: %s\n", errmsg); + goto err_check_expect_stats; + } + + hints = objagg_hints_get(objagg, OBJAGG_OPT_ALGO_SIMPLE_GREEDY); + if (IS_ERR(hints)) { + err = PTR_ERR(hints); + goto err_hints_get; + } + + err = test_hints_case2(hints_case, hints, objagg); + objagg_hints_put(hints); err_hints_get: err_check_expect_stats: From fed307b67c5bbb17b72c54816cd1bce61c23b4d7 Mon Sep 17 00:00:00 2001 From: Jiazi Li Date: Fri, 20 Jun 2025 18:07:56 +0800 Subject: [PATCH 0607/2411] kthread: update comment for __to_kthread With commit 343f4c49f243 ("kthread: Don't allocate kthread_struct for init and umh") and commit 753550eb0ce1 ("fork: Explicitly set PF_KTHREAD"), umh task no longer have struct kthread and PF_KTHREAD flag. Update the comment to describe what the current rules are to detect is something is a kthread. Link: https://lkml.kernel.org/r/20250620100801.23185-1-jqqlijiazi@gmail.com Signed-off-by: Jiazi Li Signed-off-by: mingzhu.wang Suggested-by Eric W . Biederman Reviewed-by: "Eric W. Biederman" Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- kernel/kthread.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index 85fc068f0083..0e98b228a8ef 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -88,13 +88,12 @@ static inline struct kthread *to_kthread(struct task_struct *k) /* * Variant of to_kthread() that doesn't assume @p is a kthread. * - * Per construction; when: + * When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will + * always remain a kthread. For kthreads p->worker_private always + * points to a struct kthread. For tasks that are not kthreads + * p->worker_private is used to point to other things. * - * (p->flags & PF_KTHREAD) && p->worker_private - * - * the task is both a kthread and struct kthread is persistent. However - * PF_KTHREAD on it's own is not, kernel_thread() can exec() (See umh.c and - * begin_new_exec()). + * Return NULL for any task that is not a kthread. */ static inline struct kthread *__to_kthread(struct task_struct *p) { From 896f612273dacfdc7a635315394ccf285c257208 Mon Sep 17 00:00:00 2001 From: Li Chen Date: Fri, 20 Jun 2025 10:02:31 +0800 Subject: [PATCH 0608/2411] fs: fat: Prevent fsfuzzer from dominating the console fsfuzzer may make many invalid access for FAT-fs and generate many kmsg like "FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb)". For platforms & os that enables hardware serial device whose speed are slow, this may cause softlockup easily. So let's ratelimit the error log. The log as below: [11916.242560] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.254485] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.266388] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.278287] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.290180] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.302068] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.313962] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.325848] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.337732] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.349619] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.361505] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.373391] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.385272] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.397144] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.409025] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.420909] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.432791] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.444674] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.456558] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.468446] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.480352] watchdog: BUG: soft lockup - CPU#58 stuck for 26s! [cat:2446035] [11916.480357] Modules linked in: ... [11916.480503] CPU: 58 PID: 2446035 Comm: cat Kdump: loaded Tainted: ... [11916.480508] Hardware name: vclusters VSFT5000 B/VSFT5000 B, BIOS ... [11916.480510] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [11916.480513] pc : console_emit_next_record+0x1b4/0x288 [11916.480524] lr : console_emit_next_record+0x1ac/0x288 [11916.480525] sp : ffff80009bcdae90 [11916.480527] x29: ffff80009bcdaec0 x28: ffff800082513810 x27: 0000000000000001 [11916.480530] x26: 0000000000000001 x25: ffff800081f66000 x24: 0000000000000000 [11916.480533] x23: 0000000000000000 x22: ffff80009bcdaf8f x21: 0000000000000001 [11916.480535] x20: 0000000000000000 x19: ffff800082513810 x18: ffffffffffffffff [11916.480538] x17: 0000000000000002 x16: 0000000000000001 x15: ffff80009bcdab30 [11916.480541] x14: 0000000000000000 x13: 205d353330363434 x12: 32545b5d36343438 [11916.480543] x11: 652820544146206f x10: 7420737365636361 x9 : ffff800080159a6c [11916.480546] x8 : 69202c726f727265 x7 : 545b5d3634343836 x6 : 342e36313931315b [11916.480549] x5 : ffff800082513a01 x4 : ffff80009bcdad31 x3 : 0000000000000000 [11916.480551] x2 : 00000000ffffffff x1 : 0000000001b9b000 x0 : ffff8000836cef00 [11916.480554] Call trace: [11916.480557] console_emit_next_record+0x1b4/0x288 [11916.480560] console_flush_all+0xcc/0x190 [11916.480563] console_unlock+0x78/0x138 [11916.480565] vprintk_emit+0x1c4/0x210 [11916.480568] vprintk_default+0x40/0x58 [11916.480570] vprintk+0x84/0xc8 [11916.480572] _printk+0x68/0xa0 [11916.480578] _fat_msg+0x6c/0xa0 [fat] [11916.480593] __fat_fs_error+0xf8/0x118 [fat] [11916.480601] fat_ent_read+0x164/0x238 [fat] [11916.480609] fat_get_cluster+0x180/0x2c8 [fat] [11916.480617] fat_get_mapped_cluster+0xb8/0x170 [fat] Link: https://lkml.kernel.org/r/20250620020231.9292-1-me@linux.beauty Signed-off-by: Li Chen Acked-by: OGAWA Hirofumi Cc: Christian Brauner Signed-off-by: Andrew Morton --- fs/fat/fatent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 1db348f8f887..a7061c2ad8e4 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -356,7 +356,7 @@ int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry) if (!fat_valid_entry(sbi, entry)) { fatent_brelse(fatent); - fat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", entry); + fat_fs_error_ratelimit(sb, "invalid access to FAT (entry 0x%08x)", entry); return -EIO; } From 01bda05819b89b38eebad7e2034b8ab14eee5207 Mon Sep 17 00:00:00 2001 From: Yaxin Wang Date: Thu, 19 Jun 2025 21:18:43 +0800 Subject: [PATCH 0609/2411] tools/accounting/delaytop: add delaytop to record top-n task delay Problem ======= The "getdelays" can only display the latency of a single task by specifying a PID, but it has the following limitations: 1. single-task perspective: only supports querying the latency (CPU, I/O, memory, etc.) of an individual task via PID and cannot provide a global analysis of high-latency processes across the system. 2. lack of High-Latency process awareness: when the overall system latency is high (e.g., a spike in CPU latency), there is no way to quickly identify the top N processes contributing to the highest latency. 3. poor interactivity: It lacks dynamic sorting and refresh capabilities (similar to top), making it difficult to monitor latency changes in real time. Solution ======== To address these limitations, we introduce the "delaytop" with the following capabilities: 1. system view: monitors latency metrics (CPU, I/O, memory, IRQ, etc.) for all system processes 2. supports field-based sorting (e.g., default sort by CPU latency in descending order) 3. dynamic interactive interface: focus on specific processes with --pid; limit displayed entries with --processes 20; control monitoring duration with --iterations; Use case ======== bash# ./delaytop Top 20 processes (sorted by CPU delay): PID TGID COMMAND CPU(ms) IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms) --------------------------------------------------------------------------------------------- 26 26 kworker/1:0H 5.55 0.00 0.00 0.00 0.00 0.00 0.00 0.00 32 32 kworker/2:0H-kb 2.93 0.00 0.00 0.00 0.00 0.00 0.00 0.00 38 38 kworker/3:0H-ev 2.88 0.00 0.00 0.00 0.00 0.00 0.00 0.00 84 84 kworker/R-vfio- 1.62 0.00 0.00 0.00 0.00 0.00 0.00 0.00 24 24 ksoftirqd/1 1.43 0.00 0.00 0.00 0.00 0.00 0.00 0.00 19 19 idle_inject/0 0.99 0.00 0.00 0.00 0.00 0.00 0.00 0.00 16 16 rcu_exp_par_gp_ 0.87 0.00 0.00 0.00 0.00 0.00 0.00 0.00 11 11 kworker/0:1 0.87 0.00 0.00 0.00 0.00 0.00 0.00 0.00 22 22 idle_inject/1 0.80 0.00 0.00 0.00 0.00 0.00 0.00 0.00 3 3 pool_workqueue_ 0.74 0.00 0.00 0.00 0.00 0.00 0.00 0.00 81 81 scsi_eh_1 0.59 0.00 0.00 0.00 0.00 0.00 0.00 0.00 30 30 ksoftirqd/2 0.42 0.00 0.00 0.00 0.00 0.00 0.00 0.00 36 36 ksoftirqd/3 0.37 0.00 0.00 0.00 0.00 0.00 0.00 0.00 9 9 kworker/0:0-eve 0.36 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8 8 kworker/R-netns 0.34 0.00 0.00 0.00 0.00 0.00 0.00 0.00 76 76 kworker/1:1-pm 0.32 0.00 0.00 0.00 0.00 0.00 0.00 0.00 21 21 cpuhp/1 0.30 0.00 0.00 0.00 0.00 0.00 0.00 0.00 4 4 kworker/R-rcu_g 0.21 0.00 0.00 0.00 0.00 0.00 0.00 0.00 12 12 kworker/u16:0-i 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1 1 init 0.18 0.00 0.00 0.00 0.00 0.00 0.08 0.00 Link: https://lkml.kernel.org/r/20250619211843633h05gWrBDMFkEH6xAVm_5y@zte.com.cn Co-developed-by: Fan Yu Signed-off-by: Fan Yu Signed-off-by: Yaxin Wang Cc: Balbir Singh Cc: David Hildenbrand Cc: Peilin He Cc: Qiang Tu Cc: wangyong Cc: xu xin Cc: Yang Yang Cc: ye xingchen Cc: Yunkai Zhang Signed-off-by: Andrew Morton --- tools/accounting/Makefile | 2 +- tools/accounting/delaytop.c | 673 ++++++++++++++++++++++++++++++++++++ 2 files changed, 674 insertions(+), 1 deletion(-) create mode 100644 tools/accounting/delaytop.c diff --git a/tools/accounting/Makefile b/tools/accounting/Makefile index 11def1ad046c..20bbd461515e 100644 --- a/tools/accounting/Makefile +++ b/tools/accounting/Makefile @@ -2,7 +2,7 @@ CC := $(CROSS_COMPILE)gcc CFLAGS := -I../../usr/include -PROGS := getdelays procacct +PROGS := getdelays procacct delaytop all: $(PROGS) diff --git a/tools/accounting/delaytop.c b/tools/accounting/delaytop.c new file mode 100644 index 000000000000..23e38f39e97d --- /dev/null +++ b/tools/accounting/delaytop.c @@ -0,0 +1,673 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * delaytop.c - task delay monitoring tool. + * + * This tool provides real-time monitoring and statistics of + * system, container, and task-level delays, including CPU, + * memory, IO, and IRQ and delay accounting. It supports both + * interactive (top-like), and can output delay information + * for the whole system, specific containers (cgroups), or + * individual tasks (PIDs). + * + * Key features: + * - Collects per-task delay accounting statistics via taskstats. + * - Supports sorting, filtering. + * - Supports both interactive (screen refresh). + * + * Copyright (C) Fan Yu, ZTE Corp. 2025 + * Copyright (C) Wang Yaxin, ZTE Corp. 2025 + * + * Compile with + * gcc -I/usr/src/linux/include delaytop.c -o delaytop + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len))) +#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN)) +#define NLA_PAYLOAD(len) (len - NLA_HDRLEN) + +#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN)) +#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN) + +#define TASK_COMM_LEN 16 +#define MAX_MSG_SIZE 1024 +#define MAX_TASKS 1000 +#define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field + +/* Program settings structure */ +struct config { + int delay; /* Update interval in seconds */ + int iterations; /* Number of iterations, 0 == infinite */ + int max_processes; /* Maximum number of processes to show */ + char sort_field; /* Field to sort by */ + int output_one_time; /* Output once and exit */ + int monitor_pid; /* Monitor specific PID */ + char *container_path; /* Path to container cgroup */ +}; + +/* Task delay information structure */ +struct task_info { + int pid; + int tgid; + char command[TASK_COMM_LEN]; + unsigned long long cpu_count; + unsigned long long cpu_delay_total; + unsigned long long blkio_count; + unsigned long long blkio_delay_total; + unsigned long long swapin_count; + unsigned long long swapin_delay_total; + unsigned long long freepages_count; + unsigned long long freepages_delay_total; + unsigned long long thrashing_count; + unsigned long long thrashing_delay_total; + unsigned long long compact_count; + unsigned long long compact_delay_total; + unsigned long long wpcopy_count; + unsigned long long wpcopy_delay_total; + unsigned long long irq_count; + unsigned long long irq_delay_total; +}; + +/* Container statistics structure */ +struct container_stats { + int nr_sleeping; /* Number of sleeping processes */ + int nr_running; /* Number of running processes */ + int nr_stopped; /* Number of stopped processes */ + int nr_uninterruptible; /* Number of uninterruptible processes */ + int nr_io_wait; /* Number of processes in IO wait */ +}; + +/* Global variables */ +static struct config cfg; +static struct task_info tasks[MAX_TASKS]; +static int task_count; +static int running = 1; +static struct container_stats container_stats; + +/* Netlink socket variables */ +static int nl_sd = -1; +static int family_id; + +/* Set terminal to non-canonical mode for q-to-quit */ +static struct termios orig_termios; +static void enable_raw_mode(void) +{ + struct termios raw; + + tcgetattr(STDIN_FILENO, &orig_termios); + raw = orig_termios; + raw.c_lflag &= ~(ICANON | ECHO); + tcsetattr(STDIN_FILENO, TCSAFLUSH, &raw); +} +static void disable_raw_mode(void) +{ + tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios); +} + +/* Display usage information and command line options */ +static void usage(void) +{ + printf("Usage: delaytop [Options]\n" + "Options:\n" + " -h, --help Show this help message and exit\n" + " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n" + " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n" + " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n" + " -o, --once Display once and exit\n" + " -p, --pid=PID Monitor only the specified PID\n" + " -C, --container=PATH Monitor the container at specified cgroup path\n"); + exit(0); +} + +/* Parse command line arguments and set configuration */ +static void parse_args(int argc, char **argv) +{ + int c; + struct option long_options[] = { + {"help", no_argument, 0, 'h'}, + {"delay", required_argument, 0, 'd'}, + {"iterations", required_argument, 0, 'n'}, + {"pid", required_argument, 0, 'p'}, + {"once", no_argument, 0, 'o'}, + {"processes", required_argument, 0, 'P'}, + {"container", required_argument, 0, 'C'}, + {0, 0, 0, 0} + }; + + /* Set defaults */ + cfg.delay = 2; + cfg.iterations = 0; + cfg.max_processes = 20; + cfg.sort_field = 'c'; /* Default sort by CPU delay */ + cfg.output_one_time = 0; + cfg.monitor_pid = 0; /* 0 means monitor all PIDs */ + cfg.container_path = NULL; + + while (1) { + int option_index = 0; + + c = getopt_long(argc, argv, "hd:n:p:oP:C:", long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 'h': + usage(); + break; + case 'd': + cfg.delay = atoi(optarg); + if (cfg.delay < 1) { + fprintf(stderr, "Error: delay must be >= 1.\n"); + exit(1); + } + break; + case 'n': + cfg.iterations = atoi(optarg); + if (cfg.iterations < 0) { + fprintf(stderr, "Error: iterations must be >= 0.\n"); + exit(1); + } + break; + case 'p': + cfg.monitor_pid = atoi(optarg); + if (cfg.monitor_pid < 1) { + fprintf(stderr, "Error: pid must be >= 1.\n"); + exit(1); + } + break; + case 'o': + cfg.output_one_time = 1; + break; + case 'P': + cfg.max_processes = atoi(optarg); + if (cfg.max_processes < 1) { + fprintf(stderr, "Error: processes must be >= 1.\n"); + exit(1); + } + if (cfg.max_processes > MAX_TASKS) { + fprintf(stderr, "Warning: processes capped to %d.\n", + MAX_TASKS); + cfg.max_processes = MAX_TASKS; + } + break; + case 'C': + cfg.container_path = strdup(optarg); + break; + default: + fprintf(stderr, "Try 'delaytop --help' for more information.\n"); + exit(1); + } + } +} + +/* Create a raw netlink socket and bind */ +static int create_nl_socket(void) +{ + int fd; + struct sockaddr_nl local; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (fd < 0) + return -1; + + memset(&local, 0, sizeof(local)); + local.nl_family = AF_NETLINK; + + if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) { + close(fd); + return -1; + } + + return fd; +} + +/* Send a command via netlink */ +static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, + __u8 genl_cmd, __u16 nla_type, + void *nla_data, int nla_len) +{ + struct sockaddr_nl nladdr; + struct nlattr *na; + int r, buflen; + char *buf; + + struct { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[MAX_MSG_SIZE]; + } msg; + + msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); + msg.n.nlmsg_type = nlmsg_type; + msg.n.nlmsg_flags = NLM_F_REQUEST; + msg.n.nlmsg_seq = 0; + msg.n.nlmsg_pid = nlmsg_pid; + msg.g.cmd = genl_cmd; + msg.g.version = 0x1; + na = (struct nlattr *) GENLMSG_DATA(&msg); + na->nla_type = nla_type; + na->nla_len = nla_len + NLA_HDRLEN; + memcpy(NLA_DATA(na), nla_data, nla_len); + msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); + + buf = (char *) &msg; + buflen = msg.n.nlmsg_len; + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr, + sizeof(nladdr))) < buflen) { + if (r > 0) { + buf += r; + buflen -= r; + } else if (errno != EAGAIN) + return -1; + } + return 0; +} + +/* Get family ID for taskstats via netlink */ +static int get_family_id(int sd) +{ + struct { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[256]; + } ans; + + int id = 0, rc; + struct nlattr *na; + int rep_len; + char name[100]; + + strncpy(name, TASKSTATS_GENL_NAME, sizeof(name) - 1); + name[sizeof(name) - 1] = '\0'; + rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, + CTRL_ATTR_FAMILY_NAME, (void *)name, + strlen(TASKSTATS_GENL_NAME)+1); + if (rc < 0) + return 0; + + rep_len = recv(sd, &ans, sizeof(ans), 0); + if (ans.n.nlmsg_type == NLMSG_ERROR || + (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) + return 0; + + na = (struct nlattr *) GENLMSG_DATA(&ans); + na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); + if (na->nla_type == CTRL_ATTR_FAMILY_ID) + id = *(__u16 *) NLA_DATA(na); + return id; +} + +static int read_comm(int pid, char *comm_buf, size_t buf_size) +{ + char path[64]; + size_t len; + FILE *fp; + + snprintf(path, sizeof(path), "/proc/%d/comm", pid); + fp = fopen(path, "r"); + if (!fp) + return -1; + if (fgets(comm_buf, buf_size, fp)) { + len = strlen(comm_buf); + if (len > 0 && comm_buf[len - 1] == '\n') + comm_buf[len - 1] = '\0'; + } else { + fclose(fp); + return -1; + } + fclose(fp); + return 0; +} + +static int fetch_and_fill_task_info(int pid, const char *comm) +{ + struct { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[MAX_MSG_SIZE]; + } resp; + struct taskstats stats; + struct nlattr *nested; + struct nlattr *na; + int nested_len; + int nl_len; + int rc; + + if (send_cmd(nl_sd, family_id, getpid(), TASKSTATS_CMD_GET, + TASKSTATS_CMD_ATTR_PID, &pid, sizeof(pid)) < 0) { + return -1; + } + rc = recv(nl_sd, &resp, sizeof(resp), 0); + if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) + return -1; + nl_len = GENLMSG_PAYLOAD(&resp.n); + na = (struct nlattr *) GENLMSG_DATA(&resp); + while (nl_len > 0) { + if (na->nla_type == TASKSTATS_TYPE_AGGR_PID) { + nested = (struct nlattr *) NLA_DATA(na); + nested_len = NLA_PAYLOAD(na->nla_len); + while (nested_len > 0) { + if (nested->nla_type == TASKSTATS_TYPE_STATS) { + memcpy(&stats, NLA_DATA(nested), sizeof(stats)); + if (task_count < MAX_TASKS) { + tasks[task_count].pid = pid; + tasks[task_count].tgid = pid; + strncpy(tasks[task_count].command, comm, + TASK_COMM_LEN - 1); + tasks[task_count].command[TASK_COMM_LEN - 1] = '\0'; + SET_TASK_STAT(task_count, cpu_count); + SET_TASK_STAT(task_count, cpu_delay_total); + SET_TASK_STAT(task_count, blkio_count); + SET_TASK_STAT(task_count, blkio_delay_total); + SET_TASK_STAT(task_count, swapin_count); + SET_TASK_STAT(task_count, swapin_delay_total); + SET_TASK_STAT(task_count, freepages_count); + SET_TASK_STAT(task_count, freepages_delay_total); + SET_TASK_STAT(task_count, thrashing_count); + SET_TASK_STAT(task_count, thrashing_delay_total); + SET_TASK_STAT(task_count, compact_count); + SET_TASK_STAT(task_count, compact_delay_total); + SET_TASK_STAT(task_count, wpcopy_count); + SET_TASK_STAT(task_count, wpcopy_delay_total); + SET_TASK_STAT(task_count, irq_count); + SET_TASK_STAT(task_count, irq_delay_total); + task_count++; + } + break; + } + nested_len -= NLA_ALIGN(nested->nla_len); + nested = NLA_NEXT(nested); + } + } + nl_len -= NLA_ALIGN(na->nla_len); + na = NLA_NEXT(na); + } + return 0; +} + +static void get_task_delays(void) +{ + char comm[TASK_COMM_LEN]; + struct dirent *entry; + DIR *dir; + int pid; + + task_count = 0; + if (cfg.monitor_pid > 0) { + if (read_comm(cfg.monitor_pid, comm, sizeof(comm)) == 0) + fetch_and_fill_task_info(cfg.monitor_pid, comm); + return; + } + + dir = opendir("/proc"); + if (!dir) { + fprintf(stderr, "Error opening /proc directory\n"); + return; + } + + while ((entry = readdir(dir)) != NULL && task_count < MAX_TASKS) { + if (!isdigit(entry->d_name[0])) + continue; + pid = atoi(entry->d_name); + if (pid == 0) + continue; + if (read_comm(pid, comm, sizeof(comm)) != 0) + continue; + fetch_and_fill_task_info(pid, comm); + } + closedir(dir); +} + +/* Calculate average delay in milliseconds */ +static double average_ms(unsigned long long total, unsigned long long count) +{ + if (count == 0) + return 0; + return (double)total / 1000000.0 / count; +} + +/* Comparison function for sorting tasks */ +static int compare_tasks(const void *a, const void *b) +{ + const struct task_info *t1 = (const struct task_info *)a; + const struct task_info *t2 = (const struct task_info *)b; + double avg1, avg2; + + switch (cfg.sort_field) { + case 'c': /* CPU */ + avg1 = average_ms(t1->cpu_delay_total, t1->cpu_count); + avg2 = average_ms(t2->cpu_delay_total, t2->cpu_count); + if (avg1 != avg2) + return avg2 > avg1 ? 1 : -1; + return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1; + + default: + return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1; + } +} + +/* Sort tasks by selected field */ +static void sort_tasks(void) +{ + if (task_count > 0) + qsort(tasks, task_count, sizeof(struct task_info), compare_tasks); +} + +/* Get container statistics via cgroupstats */ +static void get_container_stats(void) +{ + int rc, cfd; + struct { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[MAX_MSG_SIZE]; + } req, resp; + struct nlattr *na; + int nl_len; + struct cgroupstats stats; + + /* Check if container path is set */ + if (!cfg.container_path) + return; + + /* Open container cgroup */ + cfd = open(cfg.container_path, O_RDONLY); + if (cfd < 0) { + fprintf(stderr, "Error opening container path: %s\n", cfg.container_path); + return; + } + + /* Send request for container stats */ + if (send_cmd(nl_sd, family_id, getpid(), CGROUPSTATS_CMD_GET, + CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32)) < 0) { + fprintf(stderr, "Failed to send request for container stats\n"); + close(cfd); + return; + } + + /* Receive response */ + rc = recv(nl_sd, &resp, sizeof(resp), 0); + if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) { + fprintf(stderr, "Failed to receive response for container stats\n"); + close(cfd); + return; + } + + /* Parse response */ + nl_len = GENLMSG_PAYLOAD(&resp.n); + na = (struct nlattr *) GENLMSG_DATA(&resp); + while (nl_len > 0) { + if (na->nla_type == CGROUPSTATS_TYPE_CGROUP_STATS) { + /* Get the cgroupstats structure */ + memcpy(&stats, NLA_DATA(na), sizeof(stats)); + + /* Fill container stats */ + container_stats.nr_sleeping = stats.nr_sleeping; + container_stats.nr_running = stats.nr_running; + container_stats.nr_stopped = stats.nr_stopped; + container_stats.nr_uninterruptible = stats.nr_uninterruptible; + container_stats.nr_io_wait = stats.nr_io_wait; + break; + } + nl_len -= NLA_ALIGN(na->nla_len); + na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); + } + + close(cfd); +} + +/* Display results to stdout or log file */ +static void display_results(void) +{ + time_t now = time(NULL); + struct tm *tm_now = localtime(&now); + char timestamp[32]; + int i, count; + FILE *out = stdout; + + fprintf(out, "\033[H\033[J"); + + if (cfg.container_path) { + fprintf(out, "Container Information (%s):\n", cfg.container_path); + fprintf(out, "Processes: running=%d, sleeping=%d, ", + container_stats.nr_running, container_stats.nr_sleeping); + fprintf(out, "stopped=%d, uninterruptible=%d, io_wait=%d\n\n", + container_stats.nr_stopped, container_stats.nr_uninterruptible, + container_stats.nr_io_wait); + } + fprintf(out, "Top %d processes (sorted by CPU delay):\n\n", + cfg.max_processes); + fprintf(out, " PID TGID COMMAND CPU(ms) IO(ms) "); + fprintf(out, "SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms)\n"); + fprintf(out, "-----------------------------------------------"); + fprintf(out, "----------------------------------------------\n"); + count = task_count < cfg.max_processes ? task_count : cfg.max_processes; + + for (i = 0; i < count; i++) { + fprintf(out, "%5d %5d %-15s ", + tasks[i].pid, tasks[i].tgid, tasks[i].command); + fprintf(out, "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n", + average_ms(tasks[i].cpu_delay_total, tasks[i].cpu_count), + average_ms(tasks[i].blkio_delay_total, tasks[i].blkio_count), + average_ms(tasks[i].swapin_delay_total, tasks[i].swapin_count), + average_ms(tasks[i].freepages_delay_total, tasks[i].freepages_count), + average_ms(tasks[i].thrashing_delay_total, tasks[i].thrashing_count), + average_ms(tasks[i].compact_delay_total, tasks[i].compact_count), + average_ms(tasks[i].wpcopy_delay_total, tasks[i].wpcopy_count), + average_ms(tasks[i].irq_delay_total, tasks[i].irq_count)); + } + + fprintf(out, "\n"); +} + +/* Main function */ +int main(int argc, char **argv) +{ + int iterations = 0; + int use_q_quit = 0; + + /* Parse command line arguments */ + parse_args(argc, argv); + + /* Setup netlink socket */ + nl_sd = create_nl_socket(); + if (nl_sd < 0) { + fprintf(stderr, "Error creating netlink socket\n"); + exit(1); + } + + /* Get family ID for taskstats via netlink */ + family_id = get_family_id(nl_sd); + if (!family_id) { + fprintf(stderr, "Error getting taskstats family ID\n"); + close(nl_sd); + exit(1); + } + + if (!cfg.output_one_time) { + use_q_quit = 1; + enable_raw_mode(); + printf("Press 'q' to quit.\n"); + fflush(stdout); + } + + /* Main loop */ + while (running) { + /* Get container stats if container path provided */ + if (cfg.container_path) + get_container_stats(); + + /* Get task delays */ + get_task_delays(); + + /* Sort tasks */ + sort_tasks(); + + /* Display results to stdout or log file */ + display_results(); + + /* Check for iterations */ + if (cfg.iterations > 0 && ++iterations >= cfg.iterations) + break; + + /* Exit if output_one_time is set */ + if (cfg.output_one_time) + break; + + /* Check for 'q' key to quit */ + if (use_q_quit) { + struct timeval tv = {cfg.delay, 0}; + fd_set readfds; + + FD_ZERO(&readfds); + FD_SET(STDIN_FILENO, &readfds); + int r = select(STDIN_FILENO+1, &readfds, NULL, NULL, &tv); + + if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) { + char ch = 0; + + read(STDIN_FILENO, &ch, 1); + if (ch == 'q' || ch == 'Q') { + running = 0; + break; + } + } + } else { + sleep(cfg.delay); + } + } + + /* Restore terminal mode */ + if (use_q_quit) + disable_raw_mode(); + + /* Cleanup */ + close(nl_sd); + if (cfg.container_path) + free(cfg.container_path); + + return 0; +} From 0c954c57f9e1fcf5d1b3e1be5320978bfaf9cbed Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 23 Jun 2025 23:54:20 +0900 Subject: [PATCH 0610/2411] ocfs2: embed actual values into ocfs2_sysfile_lock_key names Since lockdep_set_class() uses stringified key name via macro, calling lockdep_set_class() with an array causes lockdep warning messages to report variable name than actual index number. Change ocfs2_init_locked_inode() to pass actual index number for better readability of lockdep reports. This patch does not change behavior. Before: Chain exists of: &ocfs2_sysfile_lock_key[args->fi_sysfile_type] --> jbd2_handle --> &oi->ip_xattr_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&oi->ip_xattr_sem); lock(jbd2_handle); lock(&oi->ip_xattr_sem); lock(&ocfs2_sysfile_lock_key[args->fi_sysfile_type]); *** DEADLOCK *** After: Chain exists of: &ocfs2_sysfile_lock_key[EXTENT_ALLOC_SYSTEM_INODE] --> jbd2_handle --> &oi->ip_xattr_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&oi->ip_xattr_sem); lock(jbd2_handle); lock(&oi->ip_xattr_sem); lock(&ocfs2_sysfile_lock_key[EXTENT_ALLOC_SYSTEM_INODE]); *** DEADLOCK *** Link: https://lkml.kernel.org/r/29348724-639c-443d-bbce-65c3a0a13a38@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/inode.c | 70 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 4 deletions(-) diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 12e5d1f73325..14bf440ea4df 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -50,8 +50,6 @@ struct ocfs2_find_inode_args unsigned int fi_sysfile_type; }; -static struct lock_class_key ocfs2_sysfile_lock_key[NUM_SYSTEM_INODES]; - static int ocfs2_read_locked_inode(struct inode *inode, struct ocfs2_find_inode_args *args); static int ocfs2_init_locked_inode(struct inode *inode, void *opaque); @@ -250,14 +248,77 @@ static int ocfs2_find_actor(struct inode *inode, void *opaque) static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) { struct ocfs2_find_inode_args *args = opaque; +#ifdef CONFIG_LOCKDEP + static struct lock_class_key ocfs2_sysfile_lock_key[NUM_SYSTEM_INODES]; static struct lock_class_key ocfs2_quota_ip_alloc_sem_key, ocfs2_file_ip_alloc_sem_key; +#endif inode->i_ino = args->fi_ino; OCFS2_I(inode)->ip_blkno = args->fi_blkno; - if (args->fi_sysfile_type != 0) +#ifdef CONFIG_LOCKDEP + switch (args->fi_sysfile_type) { + case BAD_BLOCK_SYSTEM_INODE: + break; + case GLOBAL_INODE_ALLOC_SYSTEM_INODE: lockdep_set_class(&inode->i_rwsem, - &ocfs2_sysfile_lock_key[args->fi_sysfile_type]); + &ocfs2_sysfile_lock_key[GLOBAL_INODE_ALLOC_SYSTEM_INODE]); + break; + case SLOT_MAP_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[SLOT_MAP_SYSTEM_INODE]); + break; + case HEARTBEAT_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[HEARTBEAT_SYSTEM_INODE]); + break; + case GLOBAL_BITMAP_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[GLOBAL_BITMAP_SYSTEM_INODE]); + break; + case USER_QUOTA_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[USER_QUOTA_SYSTEM_INODE]); + break; + case GROUP_QUOTA_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[GROUP_QUOTA_SYSTEM_INODE]); + break; + case ORPHAN_DIR_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[ORPHAN_DIR_SYSTEM_INODE]); + break; + case EXTENT_ALLOC_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[EXTENT_ALLOC_SYSTEM_INODE]); + break; + case INODE_ALLOC_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[INODE_ALLOC_SYSTEM_INODE]); + break; + case JOURNAL_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[JOURNAL_SYSTEM_INODE]); + break; + case LOCAL_ALLOC_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[LOCAL_ALLOC_SYSTEM_INODE]); + break; + case TRUNCATE_LOG_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[TRUNCATE_LOG_SYSTEM_INODE]); + break; + case LOCAL_USER_QUOTA_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[LOCAL_USER_QUOTA_SYSTEM_INODE]); + break; + case LOCAL_GROUP_QUOTA_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[LOCAL_GROUP_QUOTA_SYSTEM_INODE]); + break; + default: + WARN_ONCE(1, "Unknown sysfile type %d\n", args->fi_sysfile_type); + } if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE || args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE || args->fi_sysfile_type == LOCAL_USER_QUOTA_SYSTEM_INODE || @@ -267,6 +328,7 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) else lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem, &ocfs2_file_ip_alloc_sem_key); +#endif return 0; } From 37d0f07bc5a2d2736021c9090ce796b8a66571ba Mon Sep 17 00:00:00 2001 From: Sachin Mokashi Date: Tue, 24 Jun 2025 10:12:20 -0400 Subject: [PATCH 0611/2411] mailmap: update Sachin Mokashi's email address As previous contributions were made with the older email address, which is no longer in use. Update my new address to map the old one. Link: https://lkml.kernel.org/r/20250624141220.1264691-1-sachin.mokashi@intel.com Signed-off-by: Sachin Mokashi Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index b0ace71968ab..c1f4381f9685 100644 --- a/.mailmap +++ b/.mailmap @@ -670,6 +670,7 @@ Muchun Song Ross Zwisler Rudolf Marek Rui Saraiva +Sachin Mokashi Sachin P Sant Sai Prakash Ranjan Sakari Ailus From ad0039db42179064f9b60eab67c797f7359fdefc Mon Sep 17 00:00:00 2001 From: Su Hui Date: Thu, 26 Jun 2025 18:54:41 +0800 Subject: [PATCH 0612/2411] fs/proc/vmcore: a few cleanups for vmcore_add_device_dump() There are two cleanups for vmcore_add_device_dump(). Return -ENOMEM directly rather than goto the label to simplify the code and use scoped_guard() to simplify the lock/unlock code. Link: https://lkml.kernel.org/r/20250626105440.1053139-1-suhui@nfschina.com Signed-off-by: Su Hui Reviewed-by: Dan Carpenter Cc: Baoquan He Cc: Dave Young Cc: Suhui Cc: Vivek Goyal Signed-off-by: Andrew Morton --- fs/proc/vmcore.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 10d01eb09c43..f188bd900eb2 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -1490,10 +1490,8 @@ int vmcore_add_device_dump(struct vmcoredd_data *data) return -EINVAL; dump = vzalloc(sizeof(*dump)); - if (!dump) { - ret = -ENOMEM; - goto out_err; - } + if (!dump) + return -ENOMEM; /* Keep size of the buffer page aligned so that it can be mmaped */ data_size = roundup(sizeof(struct vmcoredd_header) + data->size, @@ -1519,22 +1517,19 @@ int vmcore_add_device_dump(struct vmcoredd_data *data) dump->size = data_size; /* Add the dump to driver sysfs list and update the elfcore hdr */ - mutex_lock(&vmcore_mutex); - if (vmcore_opened) - pr_warn_once("Unexpected adding of device dump\n"); - if (vmcore_open) { - ret = -EBUSY; - goto unlock; + scoped_guard(mutex, &vmcore_mutex) { + if (vmcore_opened) + pr_warn_once("Unexpected adding of device dump\n"); + if (vmcore_open) { + ret = -EBUSY; + goto out_err; + } + + list_add_tail(&dump->list, &vmcoredd_list); + vmcoredd_update_size(data_size); } - - list_add_tail(&dump->list, &vmcoredd_list); - vmcoredd_update_size(data_size); - mutex_unlock(&vmcore_mutex); return 0; -unlock: - mutex_unlock(&vmcore_mutex); - out_err: vfree(buf); vfree(dump); From d0118d7d20bbf9a76f75840bc3b0da0f4d092da9 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 30 Jun 2025 19:21:31 +0900 Subject: [PATCH 0613/2411] ocfs2: update d_splice_alias() return code checking When commit d3556babd7fa ("ocfs2: fix d_splice_alias() return code checking") was merged into v3.18-rc3, d_splice_alias() was returning one of a valid dentry, NULL or an ERR_PTR. When commit b5ae6b15bd73 ("merge d_materialise_unique() into d_splice_alias()") was merged into v3.19-rc1, d_splice_alias() started returning -ELOOP as one of ERR_PTR values. Now, when syzkaller mounts a crafted ocfs2 filesystem image that hits d_splice_alias() == -ELOOP case from ocfs2_lookup(), ocfs2_lookup() fails to handle -ELOOP case and generic_shutdown_super() hits "VFS: Busy inodes after unmount" message. Instead of calling ocfs2_dentry_attach_lock() or ocfs2_dentry_attach_gen() when d_splice_alias() returned an ERR_PTR value, change ocfs2_lookup() to bail out immediately. Also, ocfs2_lookup() needs to call dupt() when ocfs2_dentry_attach_lock() returned an ERR_PTR value. Link: https://lkml.kernel.org/r/da5be67d-2a0b-4b93-85d6-42f3b7440135@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Reported-by: syzbot Closes: https://syzkaller.appspot.com/bug?extid=1134d3a5b062e9665a7a Suggested-by: Al Viro Reviewed-by: Joseph Qi Cc: Al Viro Cc: Joel Becker Cc: Mark Fasheh Cc: Richard Weinberger Cc: Tetsuo Handa Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/namei.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 26bc59c3a813..c90b254da75e 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -142,6 +142,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, bail_add: ret = d_splice_alias(inode, dentry); + if (IS_ERR(ret)) + goto bail_unlock; if (inode) { /* @@ -154,15 +156,16 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, * NOTE: This dentry already has ->d_op set from * ocfs2_get_parent() and ocfs2_get_dentry() */ - if (!IS_ERR_OR_NULL(ret)) + if (ret) dentry = ret; status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); if (status) { mlog_errno(status); + if (ret) + dput(ret); ret = ERR_PTR(status); - goto bail_unlock; } } else ocfs2_dentry_attach_gen(dentry); From 1f04e0e65209be3148a20b4b370e01d02b7ac445 Mon Sep 17 00:00:00 2001 From: Moon Hee Lee Date: Mon, 23 Jun 2025 11:34:06 -0700 Subject: [PATCH 0614/2411] selftests: ptrace: add set_syscall_info to .gitignore Add the set_syscall_info test binary to .gitignore to avoid tracking build artifacts in the ptrace selftests directory. Link: https://lkml.kernel.org/r/20250623183405.133434-2-moonhee.lee.ca@gmail.com Signed-off-by: Moon Hee Lee Cc: "Dmitry V. Levin" Cc: Oleg Nesterov Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/ptrace/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore index b7dde152e75a..f6be8efd57ea 100644 --- a/tools/testing/selftests/ptrace/.gitignore +++ b/tools/testing/selftests/ptrace/.gitignore @@ -3,3 +3,4 @@ get_syscall_info get_set_sud peeksiginfo vmaccess +set_syscall_info From 22c2ed6996ac34df506040a069fac3e5100b5c0e Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 2 Jul 2025 16:52:00 -0700 Subject: [PATCH 0615/2411] checkpatch: check for missing sentinels in ID arrays All of the ID tables based on (of_device_id, pci_device_id, ...) require their arrays to end in an empty sentinel value. That's usually spelled with an empty initializer entry (e.g., "{}"), but also sometimes with explicit 0 entries, field initializers (e.g., '.id = ""'), or even a macro entry (like PCMCIA_DEVICE_NULL). Without a sentinel, device-matching code may read out of bounds. I've found a number of such bugs in driver reviews, and we even occasionally commit one to the tree. See commit 5751eee5c620 ("i2c: nomadik: Add missing sentinel to match table") for example. Teach checkpatch to find these ID tables, and complain if it looks like there wasn't a sentinel value. Test output: $ git format-patch -1 a0d15cc47f29be6d --stdout | scripts/checkpatch.pl - ERROR: missing sentinel in ID array #57: FILE: drivers/i2c/busses/i2c-nomadik.c:1073: +static const struct of_device_id nmk_i2c_eyeq_match_table[] = { { .compatible = "XXXXXXXXXXXXXXXXXX", .data = (void *)(NMK_I2C_EYEQ_FLAG_32B_BUS | NMK_I2C_EYEQ_FLAG_IS_EYEQ5), }, }; total: 1 errors, 0 warnings, 66 lines checked NOTE: For some of the reported defects, checkpatch may be able to mechanically convert to the typical style using --fix or --fix-inplace. "[PATCH] i2c: nomadik: switch from of_device_is_compatible() to" has style problems, please review. NOTE: If any of the errors are false positives, please report them to the maintainer, see CHECKPATCH in MAINTAINERS. When run across the entire tree (scripts/checkpatch.pl -q --types MISSING_SENTINEL -f ...), false positives exist: * where macros are used that hide the table from analysis (e.g., drivers/gpu/drm/radeon/radeon_drv.c / radeon_PCI_IDS). There are fewer than 5 of these. * where such tables are processed correctly via ARRAY_SIZE() (fewer than 5 instances). This is by far not the typical usage of *_device_id arrays. * some odd parsing artifacts, where ctx_statement_block() seems to quit in the middle of a block due to #if/#else/#endif. Also, not every "struct *_device_id" is in fact a sentinel-requiring structure, but even with such types, false positives are very rare. Link: https://lkml.kernel.org/r/20250702235245.1007351-1-briannorris@chromium.org Signed-off-by: Brian Norris Acked-by: Joe Perches Cc: Andy Whitcroft Cc: Brian Norris Cc: Dwaipayan Ray Cc: Lukas Bulwahn Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 489b74d52abe..d4c24318548c 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -685,6 +685,9 @@ our $tracing_logging_tags = qr{(?xi: [\.\!:\s]* )}; +# Device ID types like found in include/linux/mod_devicetable.h. +our $dev_id_types = qr{\b[a-z]\w*_device_id\b}; + sub edit_distance_min { my (@arr) = @_; my $len = scalar @arr; @@ -7679,6 +7682,31 @@ sub process { WARN("DUPLICATED_SYSCTL_CONST", "duplicated sysctl range checking value '$1', consider using the shared one in include/linux/sysctl.h\n" . $herecurr); } + +# Check that *_device_id tables have sentinel entries. + if (defined $stat && $line =~ /struct\s+$dev_id_types\s+\w+\s*\[\s*\]\s*=\s*\{/) { + my $stripped = $stat; + + # Strip diff line prefixes. + $stripped =~ s/(^|\n)./$1/g; + # Line continuations. + $stripped =~ s/\\\n/\n/g; + # Strip whitespace, empty strings, zeroes, and commas. + $stripped =~ s/""//g; + $stripped =~ s/0x0//g; + $stripped =~ s/[\s$;,0]//g; + # Strip field assignments. + $stripped =~ s/\.$Ident=//g; + + if (!(substr($stripped, -4) eq "{}};" || + substr($stripped, -6) eq "{{}}};" || + $stripped =~ /ISAPNP_DEVICE_SINGLE_END}};$/ || + $stripped =~ /ISAPNP_CARD_END}};$/ || + $stripped =~ /NULL};$/ || + $stripped =~ /PCMCIA_DEVICE_NULL};$/)) { + ERROR("MISSING_SENTINEL", "missing sentinel in ID array\n" . "$here\n$stat\n"); + } + } } # If we have no input at all, then there is nothing to report on From ec50ec378e3fd83bde9b3d622ceac3509a60b6b5 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 10 Jul 2025 05:57:26 -0700 Subject: [PATCH 0616/2411] ipmi: Use dev_warn_ratelimited() for incorrect message warnings During BMC firmware upgrades on live systems, the ipmi_msghandler generates excessive "BMC returned incorrect response" warnings while the BMC is temporarily offline. This can flood system logs in large deployments. Replace dev_warn() with dev_warn_ratelimited() to throttle these warnings and prevent log spam during BMC maintenance operations. Signed-off-by: Breno Leitao Message-ID: <20250710-ipmi_ratelimit-v1-1-6d417015ebe9@debian.org> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 064944ae9fdc..8e9050f99e9e 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -4607,10 +4607,10 @@ static int handle_one_recv_msg(struct ipmi_smi *intf, * The NetFN and Command in the response is not even * marginally correct. */ - dev_warn(intf->si_dev, - "BMC returned incorrect response, expected netfn %x cmd %x, got netfn %x cmd %x\n", - (msg->data[0] >> 2) | 1, msg->data[1], - msg->rsp[0] >> 2, msg->rsp[1]); + dev_warn_ratelimited(intf->si_dev, + "BMC returned incorrect response, expected netfn %x cmd %x, got netfn %x cmd %x\n", + (msg->data[0] >> 2) | 1, msg->data[1], + msg->rsp[0] >> 2, msg->rsp[1]); goto return_unspecified; } From 5b6031c832c2747d58d3f0130098d965ef050b9a Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 11 Jul 2025 11:23:55 +0800 Subject: [PATCH 0617/2411] cxl/core: Introduce a new helper cxl_resource_contains_addr() In CXL subsystem, many functions need to check an address availability by checking if the resource range contains the address. Providing a new helper function cxl_resource_contains_addr() to check if the resource range contains the input address. Suggested-by: Alison Schofield Signed-off-by: Li Ming Tested-by: Shiju Jose Reviewed-by: Andy Shevchenko Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20250711032357.127355-2-ming.li@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/core.h | 1 + drivers/cxl/core/hdm.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 29b61828a847..6b78b10da3e1 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -80,6 +80,7 @@ int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size); int cxl_dpa_free(struct cxl_endpoint_decoder *cxled); resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled); resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled); +bool cxl_resource_contains_addr(const struct resource *res, const resource_size_t addr); enum cxl_rcrb { CXL_RCRB_DOWNSTREAM, diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index ab1007495f6b..088caa6b6f74 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -547,6 +547,13 @@ resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled) return base; } +bool cxl_resource_contains_addr(const struct resource *res, const resource_size_t addr) +{ + struct resource _addr = DEFINE_RES_MEM(addr, 1); + + return resource_contains(res, &_addr); +} + int cxl_dpa_free(struct cxl_endpoint_decoder *cxled) { struct cxl_port *port = cxled_to_port(cxled); From 03ff65c02559e8da32be231d7f10fe899233ceae Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 11 Jul 2025 11:23:56 +0800 Subject: [PATCH 0618/2411] cxl/edac: Fix wrong dpa checking for PPR operation Per Table 8-143. "Get Partition Info Output Payload" in CXL r3.2 section 8.2.10.9.2.1 "Get Partition Info(Opcode 4100h)", DPA 0 is a valid address of a CXL device. However, cxl_do_ppr() considers it as an invalid address, so that user will get an -EINVAL when user calls the sysfs interface of the edac driver to trigger a Post Package Repair(PPR) operation for DPA 0 on a CXL device. The correct implementation should be checking if the input DPA is in the DPA range of the CXL device. Fixes: be9b359e056a ("cxl/edac: Add CXL memory device soft PPR control feature") Signed-off-by: Li Ming Tested-by: Shiju Jose Reviewed-by: Shiju Jose Reviewed-by: Dave Jiang Reviewed-by: Alison Schofield Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250711032357.127355-3-ming.li@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/edac.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/edac.c b/drivers/cxl/core/edac.c index cd3873750e78..1526d388740c 100644 --- a/drivers/cxl/core/edac.c +++ b/drivers/cxl/core/edac.c @@ -1923,8 +1923,11 @@ static int cxl_ppr_set_nibble_mask(struct device *dev, void *drv_data, static int cxl_do_ppr(struct device *dev, void *drv_data, u32 val) { struct cxl_ppr_context *cxl_ppr_ctx = drv_data; + struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; + struct cxl_dev_state *cxlds = cxlmd->cxlds; - if (!cxl_ppr_ctx->dpa || val != EDAC_DO_MEM_REPAIR) + if (val != EDAC_DO_MEM_REPAIR || + !cxl_resource_contains_addr(&cxlds->dpa_res, cxl_ppr_ctx->dpa)) return -EINVAL; return cxl_mem_perform_ppr(cxl_ppr_ctx); From bdf2d9fd3a86538b8c7368989248b857b5f1bcf1 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 11 Jul 2025 11:23:57 +0800 Subject: [PATCH 0619/2411] cxl/core: Using cxl_resource_contains_addr() to check address availability Helper function cxl_resource_contains_addr() can be used to check if a resource range contains an input address. Use it to replace all code that checks whether a resource range contains a DPA/HPA/SPA. Signed-off-by: Li Ming Reviewed-by: Dave Jiang Reviewed-by: Alison Schofield Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250711032357.127355-4-ming.li@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/edac.c | 4 ++-- drivers/cxl/core/memdev.c | 2 +- drivers/cxl/core/region.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/cxl/core/edac.c b/drivers/cxl/core/edac.c index 1526d388740c..520121901353 100644 --- a/drivers/cxl/core/edac.c +++ b/drivers/cxl/core/edac.c @@ -1523,7 +1523,7 @@ static int cxl_mem_sparing_set_dpa(struct device *dev, void *drv_data, u64 dpa) struct cxl_memdev *cxlmd = ctx->cxlmd; struct cxl_dev_state *cxlds = cxlmd->cxlds; - if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) + if (!cxl_resource_contains_addr(&cxlds->dpa_res, dpa)) return -EINVAL; ctx->dpa = dpa; @@ -1892,7 +1892,7 @@ static int cxl_ppr_set_dpa(struct device *dev, void *drv_data, u64 dpa) struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; struct cxl_dev_state *cxlds = cxlmd->cxlds; - if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) + if (!cxl_resource_contains_addr(&cxlds->dpa_res, dpa)) return -EINVAL; cxl_ppr_ctx->dpa = dpa; diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index f88a13adf7fa..769bd9be8b94 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -267,7 +267,7 @@ static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa) dev_dbg(cxlds->dev, "device has no dpa resource\n"); return -EINVAL; } - if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) { + if (!cxl_resource_contains_addr(&cxlds->dpa_res, dpa)) { dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n", dpa, &cxlds->dpa_res); return -EINVAL; diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 6e5e1460068d..91ff3a495fbd 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2847,7 +2847,7 @@ static int __cxl_dpa_to_region(struct device *dev, void *arg) if (!cxled || !cxled->dpa_res || !resource_size(cxled->dpa_res)) return 0; - if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start) + if (!cxl_resource_contains_addr(cxled->dpa_res, dpa)) return 0; /* @@ -2959,7 +2959,7 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, if (cxlrd->hpa_to_spa) hpa = cxlrd->hpa_to_spa(cxlrd, hpa); - if (hpa < p->res->start || hpa > p->res->end) { + if (!cxl_resource_contains_addr(p->res, hpa)) { dev_dbg(&cxlr->dev, "Addr trans fail: hpa 0x%llx not in region\n", hpa); return ULLONG_MAX; @@ -3499,7 +3499,7 @@ u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa) xa_for_each(&endpoint->regions, index, iter) { struct cxl_region_params *p = &iter->region->params; - if (p->res->start <= spa && spa <= p->res->end) { + if (cxl_resource_contains_addr(p->res, spa)) { if (!p->cache_size) return ~0ULL; From 0b075c011032f88d1cfde3b45d6dcf08b44140eb Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Tue, 8 Jul 2025 13:28:38 +0530 Subject: [PATCH 0620/2411] pinmux: fix race causing mux_owner NULL with active mux_usecount commit 5a3e85c3c397 ("pinmux: Use sequential access to access desc->pinmux data") tried to address the issue when two client of the same gpio calls pinctrl_select_state() for the same functionality, was resulting in NULL pointer issue while accessing desc->mux_owner. However, issue was not completely fixed due to the way it was handled and it can still result in the same NULL pointer. The issue occurs due to the following interleaving: cpu0 (process A) cpu1 (process B) pin_request() { pin_free() { mutex_lock() desc->mux_usecount--; //becomes 0 .. mutex_unlock() mutex_lock(desc->mux) desc->mux_usecount++; // becomes 1 desc->mux_owner = owner; mutex_unlock(desc->mux) mutex_lock(desc->mux) desc->mux_owner = NULL; mutex_unlock(desc->mux) This sequence leads to a state where the pin appears to be in use (`mux_usecount == 1`) but has no owner (`mux_owner == NULL`), which can cause NULL pointer on next pin_request on the same pin. Ensure that updates to mux_usecount and mux_owner are performed atomically under the same lock. Only clear mux_owner when mux_usecount reaches zero and no new owner has been assigned. Fixes: 5a3e85c3c397 ("pinmux: Use sequential access to access desc->pinmux data") Signed-off-by: Mukesh Ojha Link: https://lore.kernel.org/20250708-pinmux-race-fix-v2-1-8ae9e8a0d1a1@oss.qualcomm.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinmux.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c index 0743190da59e..2c31e7f2a27a 100644 --- a/drivers/pinctrl/pinmux.c +++ b/drivers/pinctrl/pinmux.c @@ -236,6 +236,15 @@ static const char *pin_free(struct pinctrl_dev *pctldev, int pin, if (desc->mux_usecount) return NULL; } + + if (gpio_range) { + owner = desc->gpio_owner; + desc->gpio_owner = NULL; + } else { + owner = desc->mux_owner; + desc->mux_owner = NULL; + desc->mux_setting = NULL; + } } /* @@ -247,17 +256,6 @@ static const char *pin_free(struct pinctrl_dev *pctldev, int pin, else if (ops->free) ops->free(pctldev, pin); - scoped_guard(mutex, &desc->mux_lock) { - if (gpio_range) { - owner = desc->gpio_owner; - desc->gpio_owner = NULL; - } else { - owner = desc->mux_owner; - desc->mux_owner = NULL; - desc->mux_setting = NULL; - } - } - module_put(pctldev->owner); return owner; From ac6242b7ba0bedf4097846717ec366904aaab01b Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Wed, 9 Jul 2025 13:22:27 +0200 Subject: [PATCH 0621/2411] dt-bindings: pinctrl: qcom,pmic-gpio: Add PMIV0104 support Update the Qualcomm Technologies, Inc. PMIC GPIO binding documentation to include the compatible string for the PMIV0104 PMICs. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Luca Weiss Link: https://lore.kernel.org/20250709-sm7635-pmiv0104-v2-2-ebf18895edd6@fairphone.com Signed-off-by: Linus Walleij --- Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml index 055cea5452eb..e8a1f524929a 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml @@ -64,6 +64,7 @@ properties: - qcom,pmi8994-gpio - qcom,pmi8998-gpio - qcom,pmih0108-gpio + - qcom,pmiv0104-gpio - qcom,pmk8350-gpio - qcom,pmk8550-gpio - qcom,pmm8155au-gpio @@ -228,6 +229,7 @@ allOf: - qcom,pmc8180-gpio - qcom,pmc8380-gpio - qcom,pmi8994-gpio + - qcom,pmiv0104-gpio - qcom,pmm8155au-gpio then: properties: From 19dca764dbb54bf5af11311016c9d8d69c1f5131 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Wed, 9 Jul 2025 13:22:28 +0200 Subject: [PATCH 0622/2411] pinctrl: qcom: spmi: Add PMIV0104 PMIV0104 is a PMIC, featuring 10 GPIOs. Describe it. Reviewed-by: Dmitry Baryshkov Signed-off-by: Luca Weiss Link: https://lore.kernel.org/20250709-sm7635-pmiv0104-v2-3-ebf18895edd6@fairphone.com Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-spmi-gpio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c index bc082bfb52ef..40de5554c771 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c @@ -1244,6 +1244,7 @@ static const struct of_device_id pmic_gpio_of_match[] = { { .compatible = "qcom,pmi8994-gpio", .data = (void *) 10 }, { .compatible = "qcom,pmi8998-gpio", .data = (void *) 14 }, { .compatible = "qcom,pmih0108-gpio", .data = (void *) 18 }, + { .compatible = "qcom,pmiv0104-gpio", .data = (void *) 10 }, { .compatible = "qcom,pmk8350-gpio", .data = (void *) 4 }, { .compatible = "qcom,pmk8550-gpio", .data = (void *) 6 }, { .compatible = "qcom,pmm8155au-gpio", .data = (void *) 10 }, From 2feab53ac467d7f274830f30c218afa6ce89e39e Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Wed, 9 Jul 2025 13:46:34 +0200 Subject: [PATCH 0623/2411] dt-bindings: pinctrl: qcom,pmic-gpio: Add PM7550 support Update the Qualcomm Technologies, Inc. PMIC GPIO binding documentation to include the compatible string for the PM7550 PMICs. Signed-off-by: Luca Weiss Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/20250709-sm7635-pmxr2230-v2-3-09777dab0a95@fairphone.com Signed-off-by: Linus Walleij --- Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml index e8a1f524929a..5e6dfcc3fe9b 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml @@ -27,6 +27,7 @@ properties: - qcom,pm6450-gpio - qcom,pm7250b-gpio - qcom,pm7325-gpio + - qcom,pm7550-gpio - qcom,pm7550ba-gpio - qcom,pm8005-gpio - qcom,pm8018-gpio @@ -263,6 +264,7 @@ allOf: - qcom,pm660l-gpio - qcom,pm6150l-gpio - qcom,pm7250b-gpio + - qcom,pm7550-gpio - qcom,pm8038-gpio - qcom,pm8150b-gpio - qcom,pm8150l-gpio From 52e06d25bdcf8026cd1c951ff7f910e21c4afa04 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Wed, 9 Jul 2025 13:46:35 +0200 Subject: [PATCH 0624/2411] pinctrl: qcom: spmi: Add PM7550 PM7550 is a PMIC, featuring 12 GPIOs. Describe it. Signed-off-by: Luca Weiss Link: https://lore.kernel.org/20250709-sm7635-pmxr2230-v2-4-09777dab0a95@fairphone.com Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-spmi-gpio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c index 40de5554c771..606becc160eb 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c @@ -1206,6 +1206,7 @@ static const struct of_device_id pmic_gpio_of_match[] = { { .compatible = "qcom,pm6450-gpio", .data = (void *) 9 }, { .compatible = "qcom,pm7250b-gpio", .data = (void *) 12 }, { .compatible = "qcom,pm7325-gpio", .data = (void *) 10 }, + { .compatible = "qcom,pm7550-gpio", .data = (void *) 12 }, { .compatible = "qcom,pm7550ba-gpio", .data = (void *) 8}, { .compatible = "qcom,pm8005-gpio", .data = (void *) 4 }, { .compatible = "qcom,pm8019-gpio", .data = (void *) 6 }, From fd7dac34fda486785ce979a1d38d9760bad2b77d Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Wed, 2 Jul 2025 17:56:16 +0200 Subject: [PATCH 0625/2411] dt-bindings: pinctrl: document the Milos Top Level Mode Multiplexer Document the Top Level Mode Multiplexer on the Milos Platform. Signed-off-by: Luca Weiss Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/20250702-sm7635-pinctrl-v2-1-c138624b9924@fairphone.com Signed-off-by: Linus Walleij --- .../bindings/pinctrl/qcom,milos-tlmm.yaml | 133 ++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 Documentation/devicetree/bindings/pinctrl/qcom,milos-tlmm.yaml diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,milos-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,milos-tlmm.yaml new file mode 100644 index 000000000000..0091204df20a --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/qcom,milos-tlmm.yaml @@ -0,0 +1,133 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pinctrl/qcom,milos-tlmm.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Technologies, Inc. Milos TLMM block + +maintainers: + - Luca Weiss + +description: + Top Level Mode Multiplexer pin controller in Qualcomm Milos SoC. + +allOf: + - $ref: /schemas/pinctrl/qcom,tlmm-common.yaml# + +properties: + compatible: + const: qcom,milos-tlmm + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + gpio-reserved-ranges: + minItems: 1 + maxItems: 84 + + gpio-line-names: + maxItems: 167 + +patternProperties: + "-state$": + oneOf: + - $ref: "#/$defs/qcom-milos-tlmm-state" + - patternProperties: + "-pins$": + $ref: "#/$defs/qcom-milos-tlmm-state" + additionalProperties: false + +$defs: + qcom-milos-tlmm-state: + type: object + description: + Pinctrl node's client devices use subnodes for desired pin configuration. + Client device subnodes use below standard properties. + $ref: qcom,tlmm-common.yaml#/$defs/qcom-tlmm-state + unevaluatedProperties: false + + properties: + pins: + description: + List of gpio pins affected by the properties specified in this + subnode. + items: + oneOf: + - pattern: "^gpio([0-9]|[1-9][0-9]|1[0-5][0-9]|16[0-7])$" + - enum: [ ufs_reset, sdc2_clk, sdc2_cmd, sdc2_data ] + minItems: 1 + maxItems: 36 + + function: + description: + Specify the alternative function to be configured for the specified + pins. + enum: [ gpio, aoss_cti, atest_char, atest_usb, audio_ext_mclk0, + audio_ext_mclk1, audio_ref_clk, cam_mclk, cci_async_in0, + cci_i2c_scl, cci_i2c_sda, cci_timer, coex_uart1_rx, + coex_uart1_tx, dbg_out_clk, ddr_bist_complete, ddr_bist_fail, + ddr_bist_start, ddr_bist_stop, ddr_pxi0, ddr_pxi1, dp0_hot, + egpio, gcc_gp1, gcc_gp2, gcc_gp3, host2wlan_sol, i2s0_data0, + i2s0_data1, i2s0_sck, i2s0_ws, ibi_i3c, jitter_bist, mdp_vsync, + mdp_vsync0_out, mdp_vsync1_out, mdp_vsync2_out, mdp_vsync3_out, + mdp_vsync_e, nav_gpio0, nav_gpio1, nav_gpio2, pcie0_clk_req_n, + pcie1_clk_req_n, phase_flag, pll_bist_sync, pll_clk_aux, + prng_rosc0, prng_rosc1, prng_rosc2, prng_rosc3, qdss_cti, + qdss_gpio, qlink0_enable, qlink0_request, qlink0_wmss, + qlink1_enable, qlink1_request, qlink1_wmss, qspi0, qup0_se0, + qup0_se1, qup0_se2, qup0_se3, qup0_se4, qup0_se5, qup0_se6, + qup1_se0, qup1_se1, qup1_se2, qup1_se3, qup1_se4, qup1_se5, + qup1_se6, resout_gpio_n, sd_write_protect, sdc1_clk, sdc1_cmd, + sdc1_data, sdc1_rclk, sdc2_clk, sdc2_cmd, sdc2_data, + sdc2_fb_clk, tb_trig_sdc1, tb_trig_sdc2, tgu_ch0_trigout, + tgu_ch1_trigout, tmess_prng0, tmess_prng1, tmess_prng2, + tmess_prng3, tsense_pwm1, tsense_pwm2, uim0_clk, uim0_data, + uim0_present, uim0_reset, uim1_clk_mira, uim1_clk_mirb, + uim1_data_mira, uim1_data_mirb, uim1_present_mira, + uim1_present_mirb, uim1_reset_mira, uim1_reset_mirb, usb0_hs, + usb0_phy_ps, vfr_0, vfr_1, vsense_trigger_mirnat, wcn_sw, + wcn_sw_ctrl ] + + required: + - pins + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + #include + tlmm: pinctrl@f100000 { + compatible = "qcom,milos-tlmm"; + reg = <0x0f100000 0x300000>; + + interrupts = ; + + gpio-controller; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + gpio-ranges = <&tlmm 0 0 168>; + + gpio-wo-state { + pins = "gpio1"; + function = "gpio"; + }; + + qup-uart5-default-state { + pins = "gpio25", "gpio26"; + function = "qup0_se5"; + drive-strength = <2>; + bias-disable; + }; + }; +... From 4a6cdecaa1497f1fbbd1d5307a225b6ca5a62a90 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 11 Jul 2025 12:10:15 +0100 Subject: [PATCH 0626/2411] perf tests bp_account: Fix leaked file descriptor Since the commit e9846f5ead26 ("perf test: In forked mode add check that fds aren't leaked"), the test "Breakpoint accounting" reports the error: # perf test -vvv "Breakpoint accounting" 20: Breakpoint accounting: --- start --- test child forked, pid 373 failed opening event 0 failed opening event 0 watchpoints count 4, breakpoints count 6, has_ioctl 1, share 0 wp 0 created wp 1 created wp 2 created wp 3 created wp 0 modified to bp wp max created ---- end(0) ---- Leak of file descriptor 7 that opened: 'anon_inode:[perf_event]' A watchpoint's file descriptor was not properly released. This patch fixes the leak. Fixes: 032db28e5fa3 ("perf tests: Add breakpoint accounting/modify test") Reported-by: Aishwarya TCV Signed-off-by: Leo Yan Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250711-perf_fix_breakpoint_accounting-v1-1-b314393023f9@arm.com Signed-off-by: Namhyung Kim --- tools/perf/tests/bp_account.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c index 4cb7d486b5c1..047433c977bc 100644 --- a/tools/perf/tests/bp_account.c +++ b/tools/perf/tests/bp_account.c @@ -104,6 +104,7 @@ static int bp_accounting(int wp_cnt, int share) fd_wp = wp_event((void *)&the_var, &attr_new); TEST_ASSERT_VAL("failed to create max wp\n", fd_wp != -1); pr_debug("wp max created\n"); + close(fd_wp); } for (i = 0; i < wp_cnt; i++) From 28f5aa8184c9c9b8eab35fa3884c416fe75e88e4 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 10 Jul 2025 16:51:14 -0700 Subject: [PATCH 0627/2411] perf hwmon_pmu: Avoid shortening hwmon PMU name Long names like ucsi_source_psy_USBC000:001 when prefixed with hwmon_ exceed the buffer size and the last digit is lost. This causes confusion with similar names like ucsi_source_psy_USBC000:002. Extend the buffer size to avoid this. Fixes: 53cc0b351ec9 ("perf hwmon_pmu: Add a tool PMU exposing events from hwmon in sysfs") Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250710235126.1086011-2-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/hwmon_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/hwmon_pmu.c b/tools/perf/util/hwmon_pmu.c index 7edda010ba27..416dfea9ffff 100644 --- a/tools/perf/util/hwmon_pmu.c +++ b/tools/perf/util/hwmon_pmu.c @@ -345,7 +345,7 @@ static int hwmon_pmu__read_events(struct hwmon_pmu *pmu) struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, const char *hwmon_dir, const char *sysfs_name, const char *name) { - char buf[32]; + char buf[64]; struct hwmon_pmu *hwm; __u32 type = PERF_PMU_TYPE_HWMON_START + strtoul(sysfs_name + 5, NULL, 10); From 679c098cd2db458b1899e4410150d41a550ec6d6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 10 Jul 2025 16:51:15 -0700 Subject: [PATCH 0628/2411] perf parse-events: Minor tidy up of event_type helper Add missing breakpoint and raw types. Avoid a switch, just use a lookup array. Switch the type to unsigned to avoid checking negative values. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250710235126.1086011-3-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/parse-events.c | 31 +++++++++++++------------------ tools/perf/util/parse-events.h | 2 +- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4cd64ffa4fcd..a59ae5ca0f89 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -135,26 +135,21 @@ const struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { }, }; -const char *event_type(int type) +static const char *const event_types[] = { + [PERF_TYPE_HARDWARE] = "hardware", + [PERF_TYPE_SOFTWARE] = "software", + [PERF_TYPE_TRACEPOINT] = "tracepoint", + [PERF_TYPE_HW_CACHE] = "hardware-cache", + [PERF_TYPE_RAW] = "raw", + [PERF_TYPE_BREAKPOINT] = "breakpoint", +}; + +const char *event_type(size_t type) { - switch (type) { - case PERF_TYPE_HARDWARE: - return "hardware"; + if (type >= PERF_TYPE_MAX) + return "unknown"; - case PERF_TYPE_SOFTWARE: - return "software"; - - case PERF_TYPE_TRACEPOINT: - return "tracepoint"; - - case PERF_TYPE_HW_CACHE: - return "hardware-cache"; - - default: - break; - } - - return "unknown"; + return event_types[type]; } static char *get_config_str(const struct parse_events_terms *head_terms, diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 1c20ed0879aa..b47bf2810112 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -21,7 +21,7 @@ struct option; struct perf_pmu; struct strbuf; -const char *event_type(int type); +const char *event_type(size_t type); /* Arguments encoded in opt->value. */ struct parse_events_option_args { From bcc7693ad100ef9c778621edee2295b8c02f2271 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 10 Jul 2025 16:51:16 -0700 Subject: [PATCH 0629/2411] perf spark: Fix includes and add SPDX scnprintf is declared in linux/kernel.h, directly depend upon it. Add missing SPDX comments. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250710235126.1086011-4-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/spark.c | 8 +++----- tools/perf/util/spark.h | 1 + 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/spark.c b/tools/perf/util/spark.c index 70272a8b81a6..65ca253cc22e 100644 --- a/tools/perf/util/spark.c +++ b/tools/perf/util/spark.c @@ -1,9 +1,7 @@ -#include -#include -#include -#include +// SPDX-License-Identifier: GPL-2.0 #include "spark.h" -#include "stat.h" +#include +#include #define SPARK_SHIFT 8 diff --git a/tools/perf/util/spark.h b/tools/perf/util/spark.h index 25402d7d7a64..78597c38ef35 100644 --- a/tools/perf/util/spark.h +++ b/tools/perf/util/spark.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef SPARK_H #define SPARK_H 1 From 8c75dc742089c702cab6d0f21be80c5ddd3c6067 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 10 Jul 2025 16:51:17 -0700 Subject: [PATCH 0630/2411] perf pmu: Tolerate failure to read the type for wellknown PMUs If sysfs isn't mounted then we may fail to read a PMU's type. In this situation resort to lookup of wellknown types. Only applies to software, tracepoint and breakpoint PMUs. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250710235126.1086011-5-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/pmu.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index f795883c233f..23666883049d 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1182,6 +1182,32 @@ int perf_pmu__init(struct perf_pmu *pmu, __u32 type, const char *name) return 0; } +static __u32 wellknown_pmu_type(const char *pmu_name) +{ + struct { + const char *pmu_name; + __u32 type; + } wellknown_pmus[] = { + { + "software", + PERF_TYPE_SOFTWARE + }, + { + "tracepoint", + PERF_TYPE_TRACEPOINT + }, + { + "breakpoint", + PERF_TYPE_BREAKPOINT + }, + }; + for (size_t i = 0; i < ARRAY_SIZE(wellknown_pmus); i++) { + if (!strcmp(wellknown_pmus[i].pmu_name, pmu_name)) + return wellknown_pmus[i].type; + } + return PERF_TYPE_MAX; +} + struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *name, bool eager_load) { @@ -1201,8 +1227,12 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char * that type value is successfully assigned (return 1). */ if (perf_pmu__scan_file_at(pmu, dirfd, "type", "%u", &pmu->type) != 1) { - perf_pmu__delete(pmu); - return NULL; + /* Double check the PMU's name isn't wellknown. */ + pmu->type = wellknown_pmu_type(name); + if (pmu->type == PERF_TYPE_MAX) { + perf_pmu__delete(pmu); + return NULL; + } } /* From cb336b6aaeb44be281df9a03684ddeadd3afab60 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 10 Jul 2025 16:51:18 -0700 Subject: [PATCH 0631/2411] perf metricgroup: Factor out for-each function and move out printing Factor metricgroup__for_each_metric into its own function handling regular and sys metrics. Make the metric adding and printing code use it, move the printing code into print-events files. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250710235126.1086011-6-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/metricgroup.c | 241 ++++----------------------------- tools/perf/util/metricgroup.h | 3 +- tools/perf/util/print-events.c | 133 ++++++++++++++++++ tools/perf/util/print-events.h | 2 + 4 files changed, 165 insertions(+), 214 deletions(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 43d35f956a33..ddd5c362d183 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -384,107 +384,6 @@ static bool match_pm_metric_or_groups(const struct pmu_metric *pm, const char *p match_metric_or_groups(pm->metric_name, metric_or_groups); } -/** struct mep - RB-tree node for building printing information. */ -struct mep { - /** nd - RB-tree element. */ - struct rb_node nd; - /** @metric_group: Owned metric group name, separated others with ';'. */ - char *metric_group; - const char *metric_name; - const char *metric_desc; - const char *metric_long_desc; - const char *metric_expr; - const char *metric_threshold; - const char *metric_unit; - const char *pmu_name; -}; - -static int mep_cmp(struct rb_node *rb_node, const void *entry) -{ - struct mep *a = container_of(rb_node, struct mep, nd); - struct mep *b = (struct mep *)entry; - int ret; - - ret = strcmp(a->metric_group, b->metric_group); - if (ret) - return ret; - - return strcmp(a->metric_name, b->metric_name); -} - -static struct rb_node *mep_new(struct rblist *rl __maybe_unused, const void *entry) -{ - struct mep *me = malloc(sizeof(struct mep)); - - if (!me) - return NULL; - - memcpy(me, entry, sizeof(struct mep)); - return &me->nd; -} - -static void mep_delete(struct rblist *rl __maybe_unused, - struct rb_node *nd) -{ - struct mep *me = container_of(nd, struct mep, nd); - - zfree(&me->metric_group); - free(me); -} - -static struct mep *mep_lookup(struct rblist *groups, const char *metric_group, - const char *metric_name) -{ - struct rb_node *nd; - struct mep me = { - .metric_group = strdup(metric_group), - .metric_name = metric_name, - }; - nd = rblist__find(groups, &me); - if (nd) { - free(me.metric_group); - return container_of(nd, struct mep, nd); - } - rblist__add_node(groups, &me); - nd = rblist__find(groups, &me); - if (nd) - return container_of(nd, struct mep, nd); - return NULL; -} - -static int metricgroup__add_to_mep_groups(const struct pmu_metric *pm, - struct rblist *groups) -{ - const char *g; - char *omg, *mg; - - mg = strdup(pm->metric_group ?: pm->metric_name); - if (!mg) - return -ENOMEM; - omg = mg; - while ((g = strsep(&mg, ";")) != NULL) { - struct mep *me; - - g = skip_spaces(g); - if (strlen(g)) - me = mep_lookup(groups, g, pm->metric_name); - else - me = mep_lookup(groups, pm->metric_name, pm->metric_name); - - if (me) { - me->metric_desc = pm->desc; - me->metric_long_desc = pm->long_desc; - me->metric_expr = pm->metric_expr; - me->metric_threshold = pm->metric_threshold; - me->metric_unit = pm->unit; - me->pmu_name = pm->pmu; - } - } - free(omg); - - return 0; -} - struct metricgroup_iter_data { pmu_metric_iter_fn fn; void *data; @@ -510,54 +409,22 @@ static int metricgroup__sys_event_iter(const struct pmu_metric *pm, return 0; } -static int metricgroup__add_to_mep_groups_callback(const struct pmu_metric *pm, - const struct pmu_metrics_table *table __maybe_unused, - void *vdata) +int metricgroup__for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn, + void *data) { - struct rblist *groups = vdata; + struct metricgroup_iter_data sys_data = { + .fn = fn, + .data = data, + }; - return metricgroup__add_to_mep_groups(pm, groups); -} - -void metricgroup__print(const struct print_callbacks *print_cb, void *print_state) -{ - struct rblist groups; - const struct pmu_metrics_table *table; - struct rb_node *node, *next; - - rblist__init(&groups); - groups.node_new = mep_new; - groups.node_cmp = mep_cmp; - groups.node_delete = mep_delete; - table = pmu_metrics_table__find(); if (table) { - pmu_metrics_table__for_each_metric(table, - metricgroup__add_to_mep_groups_callback, - &groups); - } - { - struct metricgroup_iter_data data = { - .fn = metricgroup__add_to_mep_groups_callback, - .data = &groups, - }; - pmu_for_each_sys_metric(metricgroup__sys_event_iter, &data); + int ret = pmu_metrics_table__for_each_metric(table, fn, data); + + if (ret) + return ret; } - for (node = rb_first_cached(&groups.entries); node; node = next) { - struct mep *me = container_of(node, struct mep, nd); - - print_cb->print_metric(print_state, - me->metric_group, - me->metric_name, - me->metric_desc, - me->metric_long_desc, - me->metric_expr, - me->metric_threshold, - me->metric_unit, - me->pmu_name); - next = rb_next(node); - rblist__remove_node(&groups, node); - } + return pmu_for_each_sys_metric(metricgroup__sys_event_iter, &sys_data); } static const char *code_characters = ",-=@"; @@ -1090,29 +957,6 @@ static int add_metric(struct list_head *metric_list, return ret; } -static int metricgroup__add_metric_sys_event_iter(const struct pmu_metric *pm, - const struct pmu_metrics_table *table __maybe_unused, - void *data) -{ - struct metricgroup_add_iter_data *d = data; - int ret; - - if (!match_pm_metric_or_groups(pm, d->pmu, d->metric_name)) - return 0; - - ret = add_metric(d->metric_list, pm, d->modifier, d->metric_no_group, - d->metric_no_threshold, d->user_requested_cpu_list, - d->system_wide, d->root_metric, d->visited, d->table); - if (ret) - goto out; - - *(d->has_match) = true; - -out: - *(d->ret) = ret; - return ret; -} - /** * metric_list_cmp - list_sort comparator that sorts metrics with more events to * the front. tool events are excluded from the count. @@ -1216,55 +1060,26 @@ static int metricgroup__add_metric(const char *pmu, const char *metric_name, con { LIST_HEAD(list); int ret; - bool has_match = false; + struct metricgroup__add_metric_data data = { + .list = &list, + .pmu = pmu, + .metric_name = metric_name, + .modifier = modifier, + .metric_no_group = metric_no_group, + .metric_no_threshold = metric_no_threshold, + .user_requested_cpu_list = user_requested_cpu_list, + .system_wide = system_wide, + .has_match = false, + }; - { - struct metricgroup__add_metric_data data = { - .list = &list, - .pmu = pmu, - .metric_name = metric_name, - .modifier = modifier, - .metric_no_group = metric_no_group, - .metric_no_threshold = metric_no_threshold, - .user_requested_cpu_list = user_requested_cpu_list, - .system_wide = system_wide, - .has_match = false, - }; - /* - * Iterate over all metrics seeing if metric matches either the - * name or group. When it does add the metric to the list. - */ - ret = pmu_metrics_table__for_each_metric(table, metricgroup__add_metric_callback, - &data); - if (ret) - goto out; - - has_match = data.has_match; - } - { - struct metricgroup_iter_data data = { - .fn = metricgroup__add_metric_sys_event_iter, - .data = (void *) &(struct metricgroup_add_iter_data) { - .metric_list = &list, - .pmu = pmu, - .metric_name = metric_name, - .modifier = modifier, - .metric_no_group = metric_no_group, - .user_requested_cpu_list = user_requested_cpu_list, - .system_wide = system_wide, - .has_match = &has_match, - .ret = &ret, - .table = table, - }, - }; - - pmu_for_each_sys_metric(metricgroup__sys_event_iter, &data); - } - /* End of pmu events. */ - if (!has_match) + /* + * Iterate over all metrics seeing if metric matches either the + * name or group. When it does add the metric to the list. + */ + ret = metricgroup__for_each_metric(table, metricgroup__add_metric_callback, &data); + if (!ret && !data.has_match) ret = -EINVAL; -out: /* * add to metric_list so that they can be released * even if it's failed diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index a04ac1afa6cc..1c07295931c1 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -84,7 +84,8 @@ int metricgroup__parse_groups_test(struct evlist *evlist, const char *str, struct rblist *metric_events); -void metricgroup__print(const struct print_callbacks *print_cb, void *print_state); +int metricgroup__for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn, + void *data); bool metricgroup__has_metric_or_groups(const char *pmu, const char *metric_or_groups); unsigned int metricgroups__topdown_max_level(void); int arch_get_runtimeparam(const struct pmu_metric *pm); diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index 83aaf7cda635..e233bacaa641 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -381,6 +381,139 @@ void print_symbol_events(const struct print_callbacks *print_cb, void *print_sta strlist__delete(evt_name_list); } +/** struct mep - RB-tree node for building printing information. */ +struct mep { + /** nd - RB-tree element. */ + struct rb_node nd; + /** @metric_group: Owned metric group name, separated others with ';'. */ + char *metric_group; + const char *metric_name; + const char *metric_desc; + const char *metric_long_desc; + const char *metric_expr; + const char *metric_threshold; + const char *metric_unit; + const char *pmu_name; +}; + +static int mep_cmp(struct rb_node *rb_node, const void *entry) +{ + struct mep *a = container_of(rb_node, struct mep, nd); + struct mep *b = (struct mep *)entry; + int ret; + + ret = strcmp(a->metric_group, b->metric_group); + if (ret) + return ret; + + return strcmp(a->metric_name, b->metric_name); +} + +static struct rb_node *mep_new(struct rblist *rl __maybe_unused, const void *entry) +{ + struct mep *me = malloc(sizeof(struct mep)); + + if (!me) + return NULL; + + memcpy(me, entry, sizeof(struct mep)); + return &me->nd; +} + +static void mep_delete(struct rblist *rl __maybe_unused, + struct rb_node *nd) +{ + struct mep *me = container_of(nd, struct mep, nd); + + zfree(&me->metric_group); + free(me); +} + +static struct mep *mep_lookup(struct rblist *groups, const char *metric_group, + const char *metric_name) +{ + struct rb_node *nd; + struct mep me = { + .metric_group = strdup(metric_group), + .metric_name = metric_name, + }; + nd = rblist__find(groups, &me); + if (nd) { + free(me.metric_group); + return container_of(nd, struct mep, nd); + } + rblist__add_node(groups, &me); + nd = rblist__find(groups, &me); + if (nd) + return container_of(nd, struct mep, nd); + return NULL; +} + +static int metricgroup__add_to_mep_groups_callback(const struct pmu_metric *pm, + const struct pmu_metrics_table *table __maybe_unused, + void *vdata) +{ + struct rblist *groups = vdata; + const char *g; + char *omg, *mg; + + mg = strdup(pm->metric_group ?: pm->metric_name); + if (!mg) + return -ENOMEM; + omg = mg; + while ((g = strsep(&mg, ";")) != NULL) { + struct mep *me; + + g = skip_spaces(g); + if (strlen(g)) + me = mep_lookup(groups, g, pm->metric_name); + else + me = mep_lookup(groups, pm->metric_name, pm->metric_name); + + if (me) { + me->metric_desc = pm->desc; + me->metric_long_desc = pm->long_desc; + me->metric_expr = pm->metric_expr; + me->metric_threshold = pm->metric_threshold; + me->metric_unit = pm->unit; + me->pmu_name = pm->pmu; + } + } + free(omg); + + return 0; +} + +void metricgroup__print(const struct print_callbacks *print_cb, void *print_state) +{ + struct rblist groups; + struct rb_node *node, *next; + const struct pmu_metrics_table *table = pmu_metrics_table__find(); + + rblist__init(&groups); + groups.node_new = mep_new; + groups.node_cmp = mep_cmp; + groups.node_delete = mep_delete; + + metricgroup__for_each_metric(table, metricgroup__add_to_mep_groups_callback, &groups); + + for (node = rb_first_cached(&groups.entries); node; node = next) { + struct mep *me = container_of(node, struct mep, nd); + + print_cb->print_metric(print_state, + me->metric_group, + me->metric_name, + me->metric_desc, + me->metric_long_desc, + me->metric_expr, + me->metric_threshold, + me->metric_unit, + me->pmu_name); + next = rb_next(node); + rblist__remove_node(&groups, node); + } +} + /* * Print the help text for the event symbols: */ diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h index 8f19c2bea64a..48682e2d166d 100644 --- a/tools/perf/util/print-events.h +++ b/tools/perf/util/print-events.h @@ -37,7 +37,9 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state) void print_symbol_events(const struct print_callbacks *print_cb, void *print_state, unsigned int type, const struct event_symbol *syms, unsigned int max); + void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state); +void metricgroup__print(const struct print_callbacks *print_cb, void *print_state); bool is_event_supported(u8 type, u64 config); #endif /* __PERF_PRINT_EVENTS_H */ From faebee18d720d9e209946ece3e468c06cf13f5ec Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 10 Jul 2025 16:51:19 -0700 Subject: [PATCH 0632/2411] perf stat: Move metric list from config to evlist The rblist of metric_event that then have a list of associated metric_expr is moved out of the stat_config and into the evlist. This is done as part of refactoring things for python, having the state split in two places complicates that implementation. The evlist is doing the harder work of enabling and disabling events, the metrics are needed to compute a value and it doesn't seem unreasonable to hang them from the evlist. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250710235126.1086011-7-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-script.c | 3 +-- tools/perf/builtin-stat.c | 25 ++++++++++++------------- tools/perf/tests/expand-cgroup.c | 24 +++++++----------------- tools/perf/tests/parse-metric.c | 16 +++++----------- tools/perf/tests/pmu-events.c | 8 ++------ tools/perf/util/cgroup.c | 23 ++++++++--------------- tools/perf/util/cgroup.h | 3 +-- tools/perf/util/evlist.c | 3 +++ tools/perf/util/evlist.h | 6 ++++++ tools/perf/util/metricgroup.c | 20 ++++++++------------ tools/perf/util/metricgroup.h | 7 +++---- tools/perf/util/python.c | 4 ++++ tools/perf/util/stat-display.c | 16 ++++++---------- tools/perf/util/stat-shadow.c | 13 ++++++------- tools/perf/util/stat.h | 12 +++--------- 15 files changed, 75 insertions(+), 108 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 4001e621b6cb..271f22962e32 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2136,8 +2136,7 @@ static void perf_sample__fprint_metric(struct perf_script *script, perf_stat__print_shadow_stats(&stat_config, ev2, evsel_script(ev2)->val, sample->cpu, - &ctx, - NULL); + &ctx); } evsel_script(leader)->gnum = 0; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 50fc53adb7e4..77e2248fa7fc 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1863,8 +1863,7 @@ static int add_default_events(void) stat_config.metric_no_threshold, stat_config.user_requested_cpu_list, stat_config.system_wide, - stat_config.hardware_aware_grouping, - &stat_config.metric_events); + stat_config.hardware_aware_grouping); goto out; } @@ -1901,8 +1900,7 @@ static int add_default_events(void) stat_config.metric_no_threshold, stat_config.user_requested_cpu_list, stat_config.system_wide, - stat_config.hardware_aware_grouping, - &stat_config.metric_events); + stat_config.hardware_aware_grouping); goto out; } @@ -1939,8 +1937,7 @@ static int add_default_events(void) /*metric_no_threshold=*/true, stat_config.user_requested_cpu_list, stat_config.system_wide, - stat_config.hardware_aware_grouping, - &stat_config.metric_events) < 0) { + stat_config.hardware_aware_grouping) < 0) { ret = -1; goto out; } @@ -1989,8 +1986,7 @@ static int add_default_events(void) /*metric_no_threshold=*/true, stat_config.user_requested_cpu_list, stat_config.system_wide, - stat_config.hardware_aware_grouping, - &stat_config.metric_events) < 0) { + stat_config.hardware_aware_grouping) < 0) { ret = -1; goto out; } @@ -1999,6 +1995,9 @@ static int add_default_events(void) evsel->default_metricgroup = true; evlist__splice_list_tail(evlist, &metric_evlist->core.entries); + metricgroup__copy_metric_events(evlist, /*cgrp=*/NULL, + &evlist->metric_events, + &metric_evlist->metric_events); evlist__delete(metric_evlist); } } @@ -2053,6 +2052,9 @@ static int add_default_events(void) } parse_events_error__exit(&err); evlist__splice_list_tail(evsel_list, &evlist->core.entries); + metricgroup__copy_metric_events(evsel_list, /*cgrp=*/NULL, + &evsel_list->metric_events, + &evlist->metric_events); evlist__delete(evlist); return ret; } @@ -2739,8 +2741,7 @@ int cmd_stat(int argc, const char **argv) stat_config.metric_no_threshold, stat_config.user_requested_cpu_list, stat_config.system_wide, - stat_config.hardware_aware_grouping, - &stat_config.metric_events); + stat_config.hardware_aware_grouping); zfree(&metrics); if (ret) { @@ -2760,8 +2761,7 @@ int cmd_stat(int argc, const char **argv) goto out; } - if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list, - &stat_config.metric_events, true) < 0) { + if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list, true) < 0) { parse_options_usage(stat_usage, stat_options, "for-each-cgroup", 0); goto out; @@ -2936,7 +2936,6 @@ int cmd_stat(int argc, const char **argv) evlist__delete(evsel_list); - metricgroup__rblist_exit(&stat_config.metric_events); evlist__close_control(stat_config.ctl_fd, stat_config.ctl_fd_ack, &stat_config.ctl_fd_close); return status; diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c index 31966ff856f8..c7b32a220ca1 100644 --- a/tools/perf/tests/expand-cgroup.c +++ b/tools/perf/tests/expand-cgroup.c @@ -13,8 +13,7 @@ #include #include -static int test_expand_events(struct evlist *evlist, - struct rblist *metric_events) +static int test_expand_events(struct evlist *evlist) { int i, ret = TEST_FAIL; int nr_events; @@ -47,7 +46,7 @@ static int test_expand_events(struct evlist *evlist, was_group_event = evsel__is_group_event(evlist__first(evlist)); nr_members = evlist__first(evlist)->core.nr_members; - ret = evlist__expand_cgroup(evlist, cgrp_str, metric_events, false); + ret = evlist__expand_cgroup(evlist, cgrp_str, false); if (ret < 0) { pr_debug("failed to expand events for cgroups\n"); goto out; @@ -100,13 +99,11 @@ out: for (i = 0; i < nr_events; i++) static int expand_default_events(void) { int ret; - struct rblist metric_events; struct evlist *evlist = evlist__new_default(); TEST_ASSERT_VAL("failed to get evlist", evlist); - rblist__init(&metric_events); - ret = test_expand_events(evlist, &metric_events); + ret = test_expand_events(evlist); evlist__delete(evlist); return ret; } @@ -115,7 +112,6 @@ static int expand_group_events(void) { int ret; struct evlist *evlist; - struct rblist metric_events; struct parse_events_error err; const char event_str[] = "{cycles,instructions}"; @@ -132,8 +128,7 @@ static int expand_group_events(void) goto out; } - rblist__init(&metric_events); - ret = test_expand_events(evlist, &metric_events); + ret = test_expand_events(evlist); out: parse_events_error__exit(&err); evlist__delete(evlist); @@ -144,7 +139,6 @@ static int expand_libpfm_events(void) { int ret; struct evlist *evlist; - struct rblist metric_events; const char event_str[] = "CYCLES"; struct option opt = { .value = &evlist, @@ -166,8 +160,7 @@ static int expand_libpfm_events(void) goto out; } - rblist__init(&metric_events); - ret = test_expand_events(evlist, &metric_events); + ret = test_expand_events(evlist); out: evlist__delete(evlist); return ret; @@ -177,25 +170,22 @@ static int expand_metric_events(void) { int ret; struct evlist *evlist; - struct rblist metric_events; const char metric_str[] = "CPI"; const struct pmu_metrics_table *pme_test; evlist = evlist__new(); TEST_ASSERT_VAL("failed to get evlist", evlist); - rblist__init(&metric_events); pme_test = find_core_metrics_table("testarch", "testcpu"); - ret = metricgroup__parse_groups_test(evlist, pme_test, metric_str, &metric_events); + ret = metricgroup__parse_groups_test(evlist, pme_test, metric_str); if (ret < 0) { pr_debug("failed to parse '%s' metric\n", metric_str); goto out; } - ret = test_expand_events(evlist, &metric_events); + ret = test_expand_events(evlist); out: - metricgroup__rblist_exit(&metric_events); evlist__delete(evlist); return ret; } diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 2c28fb50dc24..66a5275917e2 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -45,15 +45,14 @@ static void load_runtime_stat(struct evlist *evlist, struct value *vals) } } -static double compute_single(struct rblist *metric_events, struct evlist *evlist, - const char *name) +static double compute_single(struct evlist *evlist, const char *name) { struct metric_expr *mexp; struct metric_event *me; struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - me = metricgroup__lookup(metric_events, evsel, false); + me = metricgroup__lookup(&evlist->metric_events, evsel, false); if (me != NULL) { list_for_each_entry (mexp, &me->head, nd) { if (strcmp(mexp->metric_name, name)) @@ -69,9 +68,6 @@ static int __compute_metric(const char *name, struct value *vals, const char *name1, double *ratio1, const char *name2, double *ratio2) { - struct rblist metric_events = { - .nr_entries = 0, - }; const struct pmu_metrics_table *pme_test; struct perf_cpu_map *cpus; struct evlist *evlist; @@ -95,8 +91,7 @@ static int __compute_metric(const char *name, struct value *vals, /* Parse the metric into metric_events list. */ pme_test = find_core_metrics_table("testarch", "testcpu"); - err = metricgroup__parse_groups_test(evlist, pme_test, name, - &metric_events); + err = metricgroup__parse_groups_test(evlist, pme_test, name); if (err) goto out; @@ -109,13 +104,12 @@ static int __compute_metric(const char *name, struct value *vals, /* And execute the metric */ if (name1 && ratio1) - *ratio1 = compute_single(&metric_events, evlist, name1); + *ratio1 = compute_single(evlist, name1); if (name2 && ratio2) - *ratio2 = compute_single(&metric_events, evlist, name2); + *ratio2 = compute_single(evlist, name2); out: /* ... cleanup. */ - metricgroup__rblist_exit(&metric_events); evlist__free_stats(evlist); perf_cpu_map__put(cpus); evlist__delete(evlist); diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 815b40097428..8bbe0516ecc0 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -868,9 +868,6 @@ static int test__parsing_callback(const struct pmu_metric *pm, struct evlist *evlist; struct perf_cpu_map *cpus; struct evsel *evsel; - struct rblist metric_events = { - .nr_entries = 0, - }; int err = 0; if (!pm->metric_expr) @@ -895,7 +892,7 @@ static int test__parsing_callback(const struct pmu_metric *pm, perf_evlist__set_maps(&evlist->core, cpus, NULL); - err = metricgroup__parse_groups_test(evlist, table, pm->metric_name, &metric_events); + err = metricgroup__parse_groups_test(evlist, table, pm->metric_name); if (err) { if (!strcmp(pm->metric_name, "M1") || !strcmp(pm->metric_name, "M2") || !strcmp(pm->metric_name, "M3")) { @@ -922,7 +919,7 @@ static int test__parsing_callback(const struct pmu_metric *pm, k++; } evlist__for_each_entry(evlist, evsel) { - struct metric_event *me = metricgroup__lookup(&metric_events, evsel, false); + struct metric_event *me = metricgroup__lookup(&evlist->metric_events, evsel, false); if (me != NULL) { struct metric_expr *mexp; @@ -944,7 +941,6 @@ static int test__parsing_callback(const struct pmu_metric *pm, pr_debug("Broken metric %s\n", pm->metric_name); /* ... cleanup. */ - metricgroup__rblist_exit(&metric_events); evlist__free_stats(evlist); perf_cpu_map__put(cpus); evlist__delete(evlist); diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index fbcc0626f9ce..25e2769b5e74 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -413,8 +413,7 @@ static bool has_pattern_string(const char *str) return !!strpbrk(str, "{}[]()|*+?^$"); } -int evlist__expand_cgroup(struct evlist *evlist, const char *str, - struct rblist *metric_events, bool open_cgroup) +int evlist__expand_cgroup(struct evlist *evlist, const char *str, bool open_cgroup) { struct evlist *orig_list, *tmp_list; struct evsel *pos, *evsel, *leader; @@ -440,12 +439,8 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, evlist__splice_list_tail(orig_list, &evlist->core.entries); evlist->core.nr_entries = 0; - if (metric_events) { - orig_metric_events = *metric_events; - rblist__init(metric_events); - } else { - rblist__init(&orig_metric_events); - } + orig_metric_events = evlist->metric_events; + metricgroup__rblist_init(&evlist->metric_events); if (has_pattern_string(str)) prefix_len = match_cgroups(str); @@ -490,12 +485,10 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, cgroup__put(cgrp); nr_cgroups++; - if (metric_events) { - if (metricgroup__copy_metric_events(tmp_list, cgrp, - metric_events, - &orig_metric_events) < 0) - goto out_err; - } + if (metricgroup__copy_metric_events(tmp_list, cgrp, + &evlist->metric_events, + &orig_metric_events) < 0) + goto out_err; evlist__splice_list_tail(evlist, &tmp_list->core.entries); tmp_list->core.nr_entries = 0; @@ -512,7 +505,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, out_err: evlist__delete(orig_list); evlist__delete(tmp_list); - rblist__exit(&orig_metric_events); + metricgroup__rblist_exit(&orig_metric_events); release_cgroup_list(); return ret; diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index de8882d6e8d3..7b1bda22878c 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -28,8 +28,7 @@ struct rblist; struct cgroup *cgroup__new(const char *name, bool do_open); struct cgroup *evlist__findnew_cgroup(struct evlist *evlist, const char *name); -int evlist__expand_cgroup(struct evlist *evlist, const char *cgroups, - struct rblist *metric_events, bool open_cgroup); +int evlist__expand_cgroup(struct evlist *evlist, const char *cgroups, bool open_cgroup); void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 5664ebf6bbc6..995ad5f654d0 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -35,6 +35,7 @@ #include "util/util.h" #include "util/env.h" #include "util/intel-tpebs.h" +#include "util/metricgroup.h" #include "util/strbuf.h" #include #include @@ -83,6 +84,7 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, evlist->ctl_fd.ack = -1; evlist->ctl_fd.pos = -1; evlist->nr_br_cntr = -1; + metricgroup__rblist_init(&evlist->metric_events); } struct evlist *evlist__new(void) @@ -173,6 +175,7 @@ static void evlist__purge(struct evlist *evlist) void evlist__exit(struct evlist *evlist) { + metricgroup__rblist_exit(&evlist->metric_events); event_enable_timer__exit(&evlist->eet); zfree(&evlist->mmap); zfree(&evlist->overwrite_mmap); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 85859708393e..fac1a01ba13f 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -12,6 +12,7 @@ #include #include "events_stats.h" #include "evsel.h" +#include "rblist.h" #include #include #include @@ -86,6 +87,11 @@ struct evlist { int pos; /* index at evlist core object to check signals */ } ctl_fd; struct event_enable_timer *eet; + /** + * @metric_events: A list of struct metric_event which each have a list + * of struct metric_expr. + */ + struct rblist metric_events; }; struct evsel_str_handler { diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index ddd5c362d183..3cc6c47402bd 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -103,7 +103,7 @@ static void metric_event_delete(struct rblist *rblist __maybe_unused, free(me); } -static void metricgroup__rblist_init(struct rblist *metric_events) +void metricgroup__rblist_init(struct rblist *metric_events) { rblist__init(metric_events); metric_events->node_cmp = metric_event_cmp; @@ -1323,7 +1323,6 @@ static int parse_groups(struct evlist *perf_evlist, const char *user_requested_cpu_list, bool system_wide, bool fake_pmu, - struct rblist *metric_events_list, const struct pmu_metrics_table *table) { struct evlist *combined_evlist = NULL; @@ -1333,8 +1332,6 @@ static int parse_groups(struct evlist *perf_evlist, bool is_default = !strcmp(str, "Default"); int ret; - if (metric_events_list->nr_entries == 0) - metricgroup__rblist_init(metric_events_list); ret = metricgroup__add_metric_list(pmu, str, metric_no_group, metric_no_threshold, user_requested_cpu_list, system_wide, &metric_list, table); @@ -1425,7 +1422,8 @@ static int parse_groups(struct evlist *perf_evlist, goto out; } - me = metricgroup__lookup(metric_events_list, metric_events[0], true); + me = metricgroup__lookup(&perf_evlist->metric_events, metric_events[0], + /*create=*/true); expr = malloc(sizeof(struct metric_expr)); if (!expr) { @@ -1485,8 +1483,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist, bool metric_no_threshold, const char *user_requested_cpu_list, bool system_wide, - bool hardware_aware_grouping, - struct rblist *metric_events) + bool hardware_aware_grouping) { const struct pmu_metrics_table *table = pmu_metrics_table__find(); @@ -1497,13 +1494,12 @@ int metricgroup__parse_groups(struct evlist *perf_evlist, return parse_groups(perf_evlist, pmu, str, metric_no_group, metric_no_merge, metric_no_threshold, user_requested_cpu_list, system_wide, - /*fake_pmu=*/false, metric_events, table); + /*fake_pmu=*/false, table); } int metricgroup__parse_groups_test(struct evlist *evlist, const struct pmu_metrics_table *table, - const char *str, - struct rblist *metric_events) + const char *str) { return parse_groups(evlist, "all", str, /*metric_no_group=*/false, @@ -1511,7 +1507,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist, /*metric_no_threshold=*/false, /*user_requested_cpu_list=*/NULL, /*system_wide=*/false, - /*fake_pmu=*/true, metric_events, table); + /*fake_pmu=*/true, table); } struct metricgroup__has_metric_data { @@ -1596,7 +1592,7 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, evsel = evlist__find_evsel(evlist, old_me->evsel->core.idx); if (!evsel) return -EINVAL; - new_me = metricgroup__lookup(new_metric_events, evsel, true); + new_me = metricgroup__lookup(new_metric_events, evsel, /*create=*/true); if (!new_me) return -ENOMEM; diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 1c07295931c1..324880b2ed8f 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -77,18 +77,17 @@ int metricgroup__parse_groups(struct evlist *perf_evlist, bool metric_no_threshold, const char *user_requested_cpu_list, bool system_wide, - bool hardware_aware_grouping, - struct rblist *metric_events); + bool hardware_aware_grouping); int metricgroup__parse_groups_test(struct evlist *evlist, const struct pmu_metrics_table *table, - const char *str, - struct rblist *metric_events); + const char *str); int metricgroup__for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn, void *data); bool metricgroup__has_metric_or_groups(const char *pmu, const char *metric_or_groups); unsigned int metricgroups__topdown_max_level(void); int arch_get_runtimeparam(const struct pmu_metric *pm); +void metricgroup__rblist_init(struct rblist *metric_events); void metricgroup__rblist_exit(struct rblist *metric_events); int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 82666bcd2eda..b5ee9f7a4662 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -18,6 +18,7 @@ #include "strbuf.h" #include "thread_map.h" #include "trace-event.h" +#include "metricgroup.h" #include "mmap.h" #include "util/sample.h" #include @@ -1544,6 +1545,9 @@ static PyObject *pyrf_evlist__from_evlist(struct evlist *evlist) evlist__add(&pevlist->evlist, &pevsel->evsel); } + metricgroup__copy_metric_events(&pevlist->evlist, /*cgrp=*/NULL, + &pevlist->evlist.metric_events, + &evlist->metric_events); return (PyObject *)pevlist; } diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 9cb5245a92aa..a67b991f4e81 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -899,12 +899,11 @@ static void printout(struct perf_stat_config *config, struct outstate *os, print_noise(config, os, counter, noise, /*before_metric=*/true); print_running(config, os, run, ena, /*before_metric=*/true); from = perf_stat__print_shadow_stats_metricgroup(config, counter, aggr_idx, - &num, from, &out, - &config->metric_events); + &num, from, &out); } while (from != NULL); - } else - perf_stat__print_shadow_stats(config, counter, uval, aggr_idx, - &out, &config->metric_events); + } else { + perf_stat__print_shadow_stats(config, counter, uval, aggr_idx, &out); + } } else { pm(config, os, METRIC_THRESHOLD_UNKNOWN, /*format=*/NULL, /*unit=*/NULL, /*val=*/0); } @@ -1016,7 +1015,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, ena = aggr->counts.ena; run = aggr->counts.run; - if (perf_stat__skip_metric_event(counter, &config->metric_events, ena, run)) + if (perf_stat__skip_metric_event(counter, ena, run)) return; if (val == 0 && should_skip_zero_counter(config, counter, &id)) @@ -1275,10 +1274,7 @@ static void print_metric_headers(struct perf_stat_config *config, os.evsel = counter; - perf_stat__print_shadow_stats(config, counter, 0, - 0, - &out, - &config->metric_events); + perf_stat__print_shadow_stats(config, counter, 0, 0, &out); } if (!config->json_output) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index d83bda5824d2..2b4950f56fae 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -15,6 +15,7 @@ #include #include "iostat.h" #include "util/hashmap.h" +#include "rblist.h" #include "tool_pmu.h" struct stats walltime_nsecs_stats; @@ -635,14 +636,14 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, int aggr_idx, int *num, void *from, - struct perf_stat_output_ctx *out, - struct rblist *metric_events) + struct perf_stat_output_ctx *out) { struct metric_event *me; struct metric_expr *mexp = from; void *ctxp = out->ctx; bool header_printed = false; const char *name = NULL; + struct rblist *metric_events = &evsel->evlist->metric_events; me = metricgroup__lookup(metric_events, evsel, false); if (me == NULL) @@ -683,8 +684,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct evsel *evsel, double avg, int aggr_idx, - struct perf_stat_output_ctx *out, - struct rblist *metric_events) + struct perf_stat_output_ctx *out) { typedef void (*stat_print_function_t)(struct perf_stat_config *config, const struct evsel *evsel, @@ -735,7 +735,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, } perf_stat__print_shadow_stats_metricgroup(config, evsel, aggr_idx, - &num, NULL, out, metric_events); + &num, NULL, out); if (num == 0) { print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, @@ -748,7 +748,6 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, * if it's not running or not the metric event. */ bool perf_stat__skip_metric_event(struct evsel *evsel, - struct rblist *metric_events, u64 ena, u64 run) { if (!evsel->default_metricgroup) @@ -757,5 +756,5 @@ bool perf_stat__skip_metric_event(struct evsel *evsel, if (!ena || !run) return true; - return !metricgroup__lookup(metric_events, evsel, false); + return !metricgroup__lookup(&evsel->evlist->metric_events, evsel, false); } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 1bcd7634bf47..4b0f14ae4e5f 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -7,7 +7,6 @@ #include #include #include "cpumap.h" -#include "rblist.h" #include "counts.h" struct perf_cpu_map; @@ -108,7 +107,6 @@ struct perf_stat_config { aggr_get_id_t aggr_get_id; struct cpu_aggr_map *cpus_aggr_map; u64 *walltime_run; - struct rblist metric_events; int ctl_fd; int ctl_fd_ack; bool ctl_fd_close; @@ -187,18 +185,14 @@ struct perf_stat_output_ctx { void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct evsel *evsel, double avg, int aggr_idx, - struct perf_stat_output_ctx *out, - struct rblist *metric_events); -bool perf_stat__skip_metric_event(struct evsel *evsel, - struct rblist *metric_events, - u64 ena, u64 run); + struct perf_stat_output_ctx *out); +bool perf_stat__skip_metric_event(struct evsel *evsel, u64 ena, u64 run); void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, struct evsel *evsel, int aggr_idx, int *num, void *from, - struct perf_stat_output_ctx *out, - struct rblist *metric_events); + struct perf_stat_output_ctx *out); int evlist__alloc_stats(struct perf_stat_config *config, struct evlist *evlist, bool alloc_raw); From 3787cdaf387cdc14a9a000624742b4ee0a509244 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 10 Jul 2025 16:51:20 -0700 Subject: [PATCH 0633/2411] perf expr: Accumulate rather than replace in the context counts Metrics will fill in the context to have mappings from an event to a count. When counts are added they replace existing mappings which generally shouldn't exist with aggregation. Switch to accumulating to better support cases where perf stat's aggregation isn't used and we may see a counter more than once. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250710235126.1086011-8-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/expr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 6413537442aa..ca70a14c7cdf 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -166,8 +166,12 @@ int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id, data_ptr->kind = EXPR_ID_DATA__VALUE; ret = hashmap__set(ctx->ids, id, data_ptr, &old_key, &old_data); - if (ret) + if (ret) { free(data_ptr); + } else if (old_data) { + data_ptr->val.val += old_data->val.val; + data_ptr->val.source_count += old_data->val.source_count; + } free(old_key); free(old_data); return ret; From 5c255832deaf34d74c0adf2200eb50a8bba0fc00 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 10 Jul 2025 16:51:21 -0700 Subject: [PATCH 0634/2411] perf jevents: If the long_desc and desc are identical then drop the long_desc If the short and long descriptions are the same then save space and don't store both of them. When storing the desc in the perf_pmu_alias, don't duplicate the desc into the long_desc. By avoiding storing the duplicate the size of the events string in the binary on x86 is reduced by 29,840 bytes. Fix tests that expect a duplicated description. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250710235126.1086011-9-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/pmu-events/empty-pmu-events.c | 128 +++++++++++------------ tools/perf/pmu-events/jevents.py | 3 + tools/perf/tests/pmu-events.c | 22 ---- tools/perf/util/pmu.c | 3 +- 4 files changed, 68 insertions(+), 88 deletions(-) diff --git a/tools/perf/pmu-events/empty-pmu-events.c b/tools/perf/pmu-events/empty-pmu-events.c index d4017007a991..a4569a74db07 100644 --- a/tools/perf/pmu-events/empty-pmu-events.c +++ b/tools/perf/pmu-events/empty-pmu-events.c @@ -40,38 +40,38 @@ static const char *const big_c_string = /* offset=1475 */ "dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000\000\000\000" /* offset=1608 */ "eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000\000\000\000" /* offset=1726 */ "hisi_sccl,ddrc\000" -/* offset=1741 */ "uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000\000\000\000DDRC write commands\000" -/* offset=1830 */ "uncore_cbox\000" -/* offset=1842 */ "unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000\000\000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000" -/* offset=2076 */ "event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000\000\000\000UNC_CBO_HYPHEN\000" -/* offset=2144 */ "event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000\000\000\000UNC_CBO_TWO_HYPH\000" -/* offset=2218 */ "hisi_sccl,l3c\000" -/* offset=2232 */ "uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000\000\000\000Total read hits\000" -/* offset=2315 */ "uncore_imc_free_running\000" -/* offset=2339 */ "uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000\000\000\000Total cache misses\000" -/* offset=2437 */ "uncore_imc\000" -/* offset=2448 */ "uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000\000\000\000Total cache hits\000" -/* offset=2529 */ "uncore_sys_ddr_pmu\000" -/* offset=2548 */ "sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000\000\000\000" -/* offset=2624 */ "uncore_sys_ccn_pmu\000" -/* offset=2643 */ "sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000\000\000\000" -/* offset=2720 */ "uncore_sys_cmn_pmu\000" -/* offset=2739 */ "sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000\000\000\000" -/* offset=2882 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000" -/* offset=2904 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000" -/* offset=2967 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000" -/* offset=3133 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" -/* offset=3197 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" -/* offset=3264 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000" -/* offset=3335 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000" -/* offset=3429 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000" -/* offset=3563 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000" -/* offset=3627 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000" -/* offset=3695 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000" -/* offset=3765 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\00000" -/* offset=3787 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\00000" -/* offset=3809 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\00000" -/* offset=3829 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000" +/* offset=1741 */ "uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000\000\000\000\000" +/* offset=1811 */ "uncore_cbox\000" +/* offset=1823 */ "unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000\000\000\000\000" +/* offset=1977 */ "event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000\000\000\000\000" +/* offset=2031 */ "event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000\000\000\000\000" +/* offset=2089 */ "hisi_sccl,l3c\000" +/* offset=2103 */ "uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000\000\000\000\000" +/* offset=2171 */ "uncore_imc_free_running\000" +/* offset=2195 */ "uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000\000\000\000\000" +/* offset=2275 */ "uncore_imc\000" +/* offset=2286 */ "uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000\000\000\000\000" +/* offset=2351 */ "uncore_sys_ddr_pmu\000" +/* offset=2370 */ "sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000\000\000\000" +/* offset=2446 */ "uncore_sys_ccn_pmu\000" +/* offset=2465 */ "sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000\000\000\000" +/* offset=2542 */ "uncore_sys_cmn_pmu\000" +/* offset=2561 */ "sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000\000\000\000" +/* offset=2704 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000" +/* offset=2726 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000" +/* offset=2789 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000" +/* offset=2955 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" +/* offset=3019 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" +/* offset=3086 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000" +/* offset=3157 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000" +/* offset=3251 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000" +/* offset=3385 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000" +/* offset=3449 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000" +/* offset=3517 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000" +/* offset=3587 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\00000" +/* offset=3609 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\00000" +/* offset=3631 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\00000" +/* offset=3651 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000" ; static const struct compact_pmu_event pmu_events__common_tool[] = { @@ -107,21 +107,21 @@ static const struct compact_pmu_event pmu_events__test_soc_cpu_default_core[] = { 1373 }, /* segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000\000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_hisi_sccl_ddrc[] = { -{ 1741 }, /* uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000\000\000\000DDRC write commands\000 */ +{ 1741 }, /* uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000\000\000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_hisi_sccl_l3c[] = { -{ 2232 }, /* uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000\000\000\000Total read hits\000 */ +{ 2103 }, /* uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000\000\000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_cbox[] = { -{ 2076 }, /* event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000\000\000\000UNC_CBO_HYPHEN\000 */ -{ 2144 }, /* event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000\000\000\000UNC_CBO_TWO_HYPH\000 */ -{ 1842 }, /* unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000\000\000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000 */ +{ 1977 }, /* event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000\000\000\000\000 */ +{ 2031 }, /* event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000\000\000\000\000 */ +{ 1823 }, /* unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000\000\000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_imc[] = { -{ 2448 }, /* uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000\000\000\000Total cache hits\000 */ +{ 2286 }, /* uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000\000\000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_imc_free_running[] = { -{ 2339 }, /* uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000\000\000\000Total cache misses\000 */ +{ 2195 }, /* uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000\000\000\000\000 */ }; @@ -139,41 +139,41 @@ const struct pmu_table_entry pmu_events__test_soc_cpu[] = { { .entries = pmu_events__test_soc_cpu_hisi_sccl_l3c, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_hisi_sccl_l3c), - .pmu_name = { 2218 /* hisi_sccl,l3c\000 */ }, + .pmu_name = { 2089 /* hisi_sccl,l3c\000 */ }, }, { .entries = pmu_events__test_soc_cpu_uncore_cbox, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_cbox), - .pmu_name = { 1830 /* uncore_cbox\000 */ }, + .pmu_name = { 1811 /* uncore_cbox\000 */ }, }, { .entries = pmu_events__test_soc_cpu_uncore_imc, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_imc), - .pmu_name = { 2437 /* uncore_imc\000 */ }, + .pmu_name = { 2275 /* uncore_imc\000 */ }, }, { .entries = pmu_events__test_soc_cpu_uncore_imc_free_running, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_imc_free_running), - .pmu_name = { 2315 /* uncore_imc_free_running\000 */ }, + .pmu_name = { 2171 /* uncore_imc_free_running\000 */ }, }, }; static const struct compact_pmu_event pmu_metrics__test_soc_cpu_default_core[] = { -{ 2882 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000 */ -{ 3563 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000 */ -{ 3335 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000 */ -{ 3429 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000 */ -{ 3627 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ -{ 3695 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ -{ 2967 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000 */ -{ 2904 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000 */ -{ 3829 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000 */ -{ 3765 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\00000 */ -{ 3787 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\00000 */ -{ 3809 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\00000 */ -{ 3264 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000 */ -{ 3133 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ -{ 3197 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ +{ 2704 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000 */ +{ 3385 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000 */ +{ 3157 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000 */ +{ 3251 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000 */ +{ 3449 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ +{ 3517 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ +{ 2789 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000 */ +{ 2726 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000 */ +{ 3651 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000 */ +{ 3587 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\00000 */ +{ 3609 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\00000 */ +{ 3631 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\00000 */ +{ 3086 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000 */ +{ 2955 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ +{ 3019 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ }; @@ -186,13 +186,13 @@ const struct pmu_table_entry pmu_metrics__test_soc_cpu[] = { }; static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_ccn_pmu[] = { -{ 2643 }, /* sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000\000\000\000 */ +{ 2465 }, /* sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000\000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_cmn_pmu[] = { -{ 2739 }, /* sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000\000\000\000 */ +{ 2561 }, /* sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000\000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_ddr_pmu[] = { -{ 2548 }, /* sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000\000\000\000 */ +{ 2370 }, /* sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000\000\000\000 */ }; @@ -200,17 +200,17 @@ const struct pmu_table_entry pmu_events__test_soc_sys[] = { { .entries = pmu_events__test_soc_sys_uncore_sys_ccn_pmu, .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_ccn_pmu), - .pmu_name = { 2624 /* uncore_sys_ccn_pmu\000 */ }, + .pmu_name = { 2446 /* uncore_sys_ccn_pmu\000 */ }, }, { .entries = pmu_events__test_soc_sys_uncore_sys_cmn_pmu, .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_cmn_pmu), - .pmu_name = { 2720 /* uncore_sys_cmn_pmu\000 */ }, + .pmu_name = { 2542 /* uncore_sys_cmn_pmu\000 */ }, }, { .entries = pmu_events__test_soc_sys_uncore_sys_ddr_pmu, .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_ddr_pmu), - .pmu_name = { 2529 /* uncore_sys_ddr_pmu\000 */ }, + .pmu_name = { 2351 /* uncore_sys_ddr_pmu\000 */ }, }, }; diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index a1899f35ec74..e821155151ec 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -397,6 +397,9 @@ class JsonEvent: self.desc += extra_desc if self.long_desc and extra_desc: self.long_desc += extra_desc + if self.desc and self.long_desc and self.desc == self.long_desc: + # Avoid duplicated descriptions. + self.long_desc = None if arch_std: if arch_std.lower() in _arch_std_events: event = _arch_std_events[arch_std.lower()].event diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 8bbe0516ecc0..95fd9f671a22 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -53,7 +53,6 @@ static const struct perf_pmu_test_event bp_l1_btb_correct = { .topic = "branch", }, .alias_str = "event=0x8a", - .alias_long_desc = "L1 BTB Correction", }; static const struct perf_pmu_test_event bp_l2_btb_correct = { @@ -65,7 +64,6 @@ static const struct perf_pmu_test_event bp_l2_btb_correct = { .topic = "branch", }, .alias_str = "event=0x8b", - .alias_long_desc = "L2 BTB Correction", }; static const struct perf_pmu_test_event segment_reg_loads_any = { @@ -77,7 +75,6 @@ static const struct perf_pmu_test_event segment_reg_loads_any = { .topic = "other", }, .alias_str = "event=0x6,period=0x30d40,umask=0x80", - .alias_long_desc = "Number of segment register loads", }; static const struct perf_pmu_test_event dispatch_blocked_any = { @@ -89,7 +86,6 @@ static const struct perf_pmu_test_event dispatch_blocked_any = { .topic = "other", }, .alias_str = "event=0x9,period=0x30d40,umask=0x20", - .alias_long_desc = "Memory cluster signals to block micro-op dispatch for any reason", }; static const struct perf_pmu_test_event eist_trans = { @@ -101,7 +97,6 @@ static const struct perf_pmu_test_event eist_trans = { .topic = "other", }, .alias_str = "event=0x3a,period=0x30d40", - .alias_long_desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", }; static const struct perf_pmu_test_event l3_cache_rd = { @@ -133,11 +128,9 @@ static const struct perf_pmu_test_event uncore_hisi_ddrc_flux_wcmd = { .event = "event=2", .desc = "DDRC write commands", .topic = "uncore", - .long_desc = "DDRC write commands", .pmu = "hisi_sccl,ddrc", }, .alias_str = "event=0x2", - .alias_long_desc = "DDRC write commands", .matching_pmu = "hisi_sccl1_ddrc2", }; @@ -147,11 +140,9 @@ static const struct perf_pmu_test_event unc_cbo_xsnp_response_miss_eviction = { .event = "event=0x22,umask=0x81", .desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core", .topic = "uncore", - .long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core", .pmu = "uncore_cbox", }, .alias_str = "event=0x22,umask=0x81", - .alias_long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core", .matching_pmu = "uncore_cbox_0", }; @@ -161,11 +152,9 @@ static const struct perf_pmu_test_event uncore_hyphen = { .event = "event=0xe0", .desc = "UNC_CBO_HYPHEN", .topic = "uncore", - .long_desc = "UNC_CBO_HYPHEN", .pmu = "uncore_cbox", }, .alias_str = "event=0xe0", - .alias_long_desc = "UNC_CBO_HYPHEN", .matching_pmu = "uncore_cbox_0", }; @@ -175,11 +164,9 @@ static const struct perf_pmu_test_event uncore_two_hyph = { .event = "event=0xc0", .desc = "UNC_CBO_TWO_HYPH", .topic = "uncore", - .long_desc = "UNC_CBO_TWO_HYPH", .pmu = "uncore_cbox", }, .alias_str = "event=0xc0", - .alias_long_desc = "UNC_CBO_TWO_HYPH", .matching_pmu = "uncore_cbox_0", }; @@ -189,11 +176,9 @@ static const struct perf_pmu_test_event uncore_hisi_l3c_rd_hit_cpipe = { .event = "event=7", .desc = "Total read hits", .topic = "uncore", - .long_desc = "Total read hits", .pmu = "hisi_sccl,l3c", }, .alias_str = "event=0x7", - .alias_long_desc = "Total read hits", .matching_pmu = "hisi_sccl3_l3c7", }; @@ -203,11 +188,9 @@ static const struct perf_pmu_test_event uncore_imc_free_running_cache_miss = { .event = "event=0x12", .desc = "Total cache misses", .topic = "uncore", - .long_desc = "Total cache misses", .pmu = "uncore_imc_free_running", }, .alias_str = "event=0x12", - .alias_long_desc = "Total cache misses", .matching_pmu = "uncore_imc_free_running_0", }; @@ -217,11 +200,9 @@ static const struct perf_pmu_test_event uncore_imc_cache_hits = { .event = "event=0x34", .desc = "Total cache hits", .topic = "uncore", - .long_desc = "Total cache hits", .pmu = "uncore_imc", }, .alias_str = "event=0x34", - .alias_long_desc = "Total cache hits", .matching_pmu = "uncore_imc_0", }; @@ -246,7 +227,6 @@ static const struct perf_pmu_test_event sys_ddr_pmu_write_cycles = { .compat = "v8", }, .alias_str = "event=0x2b", - .alias_long_desc = "ddr write-cycles event", .matching_pmu = "uncore_sys_ddr_pmu0", }; @@ -260,7 +240,6 @@ static const struct perf_pmu_test_event sys_ccn_pmu_read_cycles = { .compat = "0x01", }, .alias_str = "config=0x2c", - .alias_long_desc = "ccn read-cycles event", .matching_pmu = "uncore_sys_ccn_pmu4", }; @@ -274,7 +253,6 @@ static const struct perf_pmu_test_event sys_cmn_pmu_hnf_cache_miss = { .compat = "(434|436|43c|43a).*", }, .alias_str = "eventid=0x1,type=0x5", - .alias_long_desc = "Counts total cache misses in first lookup result (high priority)", .matching_pmu = "uncore_sys_cmn_pmu0", }; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 23666883049d..b09b2ea2407a 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -623,8 +623,7 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, alias->name = strdup(name); alias->desc = desc ? strdup(desc) : NULL; - alias->long_desc = long_desc ? strdup(long_desc) : - desc ? strdup(desc) : NULL; + alias->long_desc = long_desc ? strdup(long_desc) : NULL; alias->topic = topic ? strdup(topic) : NULL; alias->pmu_name = pmu_name ? strdup(pmu_name) : NULL; if (unit) { From 7d5b635d9f4314c93bc1f9828f5d757decb860bc Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 10 Jul 2025 16:51:22 -0700 Subject: [PATCH 0635/2411] perf python: In str(evsel) use the evsel__pmu_name helper The evsel__pmu_name helper will internally use evsel__find_pmu that handles legacy events, extended types, etc. in determining a PMU and will provide a better value than just trying to access the PMU's name directly as the PMU may not have been computed. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250710235126.1086011-10-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/python.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index b5ee9f7a4662..0821205b1aaa 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -925,10 +925,7 @@ static PyObject *pyrf_evsel__str(PyObject *self) struct pyrf_evsel *pevsel = (void *)self; struct evsel *evsel = &pevsel->evsel; - if (!evsel->pmu) - return PyUnicode_FromFormat("evsel(%s)", evsel__name(evsel)); - - return PyUnicode_FromFormat("evsel(%s/%s/)", evsel->pmu->name, evsel__name(evsel)); + return PyUnicode_FromFormat("evsel(%s/%s/)", evsel__pmu_name(evsel), evsel__name(evsel)); } static PyMethodDef pyrf_evsel__methods[] = { From 64ec9b997f3a9462901a404ad60f452f76dd2d6e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 10 Jul 2025 16:51:23 -0700 Subject: [PATCH 0636/2411] perf python: Fix thread check in pyrf_evsel__read The CPU index is incorrectly checked rather than the thread index. Fixes: 739621f65702 ("perf python: Add evsel read method") Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250710235126.1086011-11-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/python.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 0821205b1aaa..4a3c2b4dd79f 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -910,7 +910,7 @@ static PyObject *pyrf_evsel__read(struct pyrf_evsel *pevsel, return NULL; } thread_idx = perf_thread_map__idx(evsel->core.threads, thread); - if (cpu_idx < 0) { + if (thread_idx < 0) { PyErr_Format(PyExc_TypeError, "Thread %d is not part of evsel's threads", thread); return NULL; From 6183afcba9c1c810656ddb36170106aaf3cf778c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 10 Jul 2025 16:51:24 -0700 Subject: [PATCH 0637/2411] perf python: Correct pyrf_evsel__read for tool PMUs Tool PMUs assume that stat's process_counter_values is being used to read the counters. Specifically they hold onto old values in evsel->prev_raw_counts and give the cumulative count based off of this value. Update pyrf_evsel__read to allocate counts and prev_raw_counts, use evsel__read_counter rather than perf_evsel__read so tool PMUs are read from not just perf_event_open events, make the returned pyrf_counts_values contain the delta value rather than the cumulative value. Fixes: 739621f65702 ("perf python: Add evsel read method") Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250710235126.1086011-12-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/python.c | 47 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 4a3c2b4dd79f..f689560192f4 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -10,6 +10,7 @@ #endif #include #include "callchain.h" +#include "counts.h" #include "evlist.h" #include "evsel.h" #include "event.h" @@ -889,12 +890,38 @@ static PyObject *pyrf_evsel__threads(struct pyrf_evsel *pevsel) return (PyObject *)pthread_map; } +/* + * Ensure evsel's counts and prev_raw_counts are allocated, the latter + * used by tool PMUs to compute the cumulative count as expected by + * stat's process_counter_values. + */ +static int evsel__ensure_counts(struct evsel *evsel) +{ + int nthreads, ncpus; + + if (evsel->counts != NULL) + return 0; + + nthreads = perf_thread_map__nr(evsel->core.threads); + ncpus = perf_cpu_map__nr(evsel->core.cpus); + + evsel->counts = perf_counts__new(ncpus, nthreads); + if (evsel->counts == NULL) + return -ENOMEM; + + evsel->prev_raw_counts = perf_counts__new(ncpus, nthreads); + if (evsel->prev_raw_counts == NULL) + return -ENOMEM; + + return 0; +} + static PyObject *pyrf_evsel__read(struct pyrf_evsel *pevsel, PyObject *args, PyObject *kwargs) { struct evsel *evsel = &pevsel->evsel; int cpu = 0, cpu_idx, thread = 0, thread_idx; - struct perf_counts_values counts; + struct perf_counts_values *old_count, *new_count; struct pyrf_counts_values *count_values = PyObject_New(struct pyrf_counts_values, &pyrf_counts_values__type); @@ -915,8 +942,22 @@ static PyObject *pyrf_evsel__read(struct pyrf_evsel *pevsel, thread); return NULL; } - perf_evsel__read(&(evsel->core), cpu_idx, thread_idx, &counts); - count_values->values = counts; + + if (evsel__ensure_counts(evsel)) + return PyErr_NoMemory(); + + /* Set up pointers to the old and newly read counter values. */ + old_count = perf_counts(evsel->prev_raw_counts, cpu_idx, thread_idx); + new_count = perf_counts(evsel->counts, cpu_idx, thread_idx); + /* Update the value in evsel->counts. */ + evsel__read_counter(evsel, cpu_idx, thread_idx); + /* Copy the value and turn it into the delta from old_count. */ + count_values->values = *new_count; + count_values->values.val -= old_count->val; + count_values->values.ena -= old_count->ena; + count_values->values.run -= old_count->run; + /* Save the new count over the old_count for the next read. */ + *old_count = *new_count; return (PyObject *)count_values; } From 421c5f39adcdf292ca5c7162f40ed6d120d136a8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 10 Jul 2025 16:51:25 -0700 Subject: [PATCH 0638/2411] perf python: Improve leader copying from evlist The struct pyrf_evlist embeds the evlist requiring the copying from things like parsed events. The copying logic handles the leader being the event itself, but if the leader group event is a different in the list it will cause an evsel to point to the evsel in the list that was copied from which is bad. Fix this by adding another pass over the evlist rewriting leaders, simplified by the introductin of two evlist helpers. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250710235126.1086011-13-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/python.c | 57 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index f689560192f4..1d9fa33d377a 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -1568,10 +1568,37 @@ static PyObject *pyrf_evsel__from_evsel(struct evsel *evsel) return (PyObject *)pevsel; } +static int evlist__pos(struct evlist *evlist, struct evsel *evsel) +{ + struct evsel *pos; + int idx = 0; + + evlist__for_each_entry(evlist, pos) { + if (evsel == pos) + return idx; + idx++; + } + return -1; +} + +static struct evsel *evlist__at(struct evlist *evlist, int idx) +{ + struct evsel *pos; + int idx2 = 0; + + evlist__for_each_entry(evlist, pos) { + if (idx == idx2) + return pos; + idx2++; + } + return NULL; +} + static PyObject *pyrf_evlist__from_evlist(struct evlist *evlist) { struct pyrf_evlist *pevlist = PyObject_New(struct pyrf_evlist, &pyrf_evlist__type); struct evsel *pos; + struct rb_node *node; if (!pevlist) return NULL; @@ -1583,9 +1610,39 @@ static PyObject *pyrf_evlist__from_evlist(struct evlist *evlist) evlist__add(&pevlist->evlist, &pevsel->evsel); } + evlist__for_each_entry(&pevlist->evlist, pos) { + struct evsel *leader = evsel__leader(pos); + + if (pos != leader) { + int idx = evlist__pos(evlist, leader); + + if (idx >= 0) + evsel__set_leader(pos, evlist__at(&pevlist->evlist, idx)); + else if (leader == NULL) + evsel__set_leader(pos, pos); + } + } metricgroup__copy_metric_events(&pevlist->evlist, /*cgrp=*/NULL, &pevlist->evlist.metric_events, &evlist->metric_events); + for (node = rb_first_cached(&pevlist->evlist.metric_events.entries); node; + node = rb_next(node)) { + struct metric_event *me = container_of(node, struct metric_event, nd); + struct list_head *mpos; + int idx = evlist__pos(evlist, me->evsel); + + if (idx >= 0) + me->evsel = evlist__at(&pevlist->evlist, idx); + list_for_each(mpos, &me->head) { + struct metric_expr *e = container_of(mpos, struct metric_expr, nd); + + for (int j = 0; e->metric_events[j]; j++) { + idx = evlist__pos(evlist, e->metric_events[j]); + if (idx >= 0) + e->metric_events[j] = evlist__at(&pevlist->evlist, idx); + } + } + } return (PyObject *)pevlist; } From b4aff7ed7a4c1360e8b29d545c7bc9e05af1a995 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 10 Jul 2025 16:51:26 -0700 Subject: [PATCH 0639/2411] perf python: Set index error for invalid thread/cpu map items Returning NULL for out of bound CPU or thread map items causes internal errors. Fix by correctly setting the error to be an index error. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250710235126.1086011-14-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/python.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 1d9fa33d377a..2f28f71325a8 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -529,8 +529,10 @@ static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i) { struct pyrf_cpu_map *pcpus = (void *)obj; - if (i >= perf_cpu_map__nr(pcpus->cpus)) + if (i >= perf_cpu_map__nr(pcpus->cpus)) { + PyErr_SetString(PyExc_IndexError, "Index out of range"); return NULL; + } return Py_BuildValue("i", perf_cpu_map__cpu(pcpus->cpus, i).cpu); } @@ -598,8 +600,10 @@ static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i) { struct pyrf_thread_map *pthreads = (void *)obj; - if (i >= perf_thread_map__nr(pthreads->threads)) + if (i >= perf_thread_map__nr(pthreads->threads)) { + PyErr_SetString(PyExc_IndexError, "Index out of range"); return NULL; + } return Py_BuildValue("i", perf_thread_map__pid(pthreads->threads, i)); } From b25e271b377999191b12f0afbe1861edcf57e3fe Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 18 Jun 2025 16:46:17 -0700 Subject: [PATCH 0640/2411] vfio: Fix unbalanced vfio_df_close call in no-iommu mode For devices with no-iommu enabled in IOMMUFD VFIO compat mode, the group open path skips vfio_df_open(), leaving open_count at 0. This causes a warning in vfio_assert_device_open(device) when vfio_df_close() is called during group close. The correct behavior is to skip only the IOMMUFD bind in the device open path for no-iommu devices. Commit 6086efe73498 omitted vfio_df_open(), which was too broad. This patch restores the previous behavior, ensuring the vfio_df_open is called in the group open path. Fixes: 6086efe73498 ("vfio-iommufd: Move noiommu compat validation out of vfio_iommufd_bind()") Suggested-by: Alex Williamson Suggested-by: Jason Gunthorpe Signed-off-by: Jacob Pan Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250618234618.1910456-1-jacob.pan@linux.microsoft.com Signed-off-by: Alex Williamson --- drivers/vfio/group.c | 7 +++---- drivers/vfio/iommufd.c | 4 ++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c index c321d442f0da..c376a6279de0 100644 --- a/drivers/vfio/group.c +++ b/drivers/vfio/group.c @@ -192,11 +192,10 @@ static int vfio_df_group_open(struct vfio_device_file *df) * implies they expected translation to exist */ if (!capable(CAP_SYS_RAWIO) || - vfio_iommufd_device_has_compat_ioas(device, df->iommufd)) + vfio_iommufd_device_has_compat_ioas(device, df->iommufd)) { ret = -EPERM; - else - ret = 0; - goto out_put_kvm; + goto out_put_kvm; + } } ret = vfio_df_open(df); diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c index c8c3a2d53f86..a38d262c6028 100644 --- a/drivers/vfio/iommufd.c +++ b/drivers/vfio/iommufd.c @@ -25,6 +25,10 @@ int vfio_df_iommufd_bind(struct vfio_device_file *df) lockdep_assert_held(&vdev->dev_set->lock); + /* Returns 0 to permit device opening under noiommu mode */ + if (vfio_device_is_noiommu(vdev)) + return 0; + return vdev->ops->bind_iommufd(vdev, ictx, &df->devid); } From 982ddd59ed97dc7e63efd97ed50273ffb817bd41 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 18 Jun 2025 16:46:18 -0700 Subject: [PATCH 0641/2411] vfio: Prevent open_count decrement to negative When vfio_df_close() is called with open_count=0, it triggers a warning in vfio_assert_device_open() but still decrements open_count to -1. This allows a subsequent open to incorrectly pass the open_count == 0 check, leading to unintended behavior, such as setting df->access_granted = true. For example, running an IOMMUFD compat no-IOMMU device with VFIO tests (https://github.com/awilliam/tests/blob/master/vfio-noiommu-pci-device-open.c) results in a warning and a failed VFIO_GROUP_GET_DEVICE_FD ioctl on the first run, but the second run succeeds incorrectly. Add checks to avoid decrementing open_count below zero. Fixes: 05f37e1c03b6 ("vfio: Pass struct vfio_device_file * to vfio_device_open/close()") Reviewed-by: Jason Gunthorpe Reviewed-by: Yi Liu Signed-off-by: Jacob Pan Link: https://lore.kernel.org/r/20250618234618.1910456-2-jacob.pan@linux.microsoft.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 1fd261efc582..5046cae05222 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -583,7 +583,8 @@ void vfio_df_close(struct vfio_device_file *df) lockdep_assert_held(&device->dev_set->lock); - vfio_assert_device_open(device); + if (!vfio_assert_device_open(device)) + return; if (device->open_count == 1) vfio_df_device_last_close(df); device->open_count--; From fe24d5bc635e103a517ec201c3cb571eeab8be2f Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Wed, 2 Jul 2025 09:37:44 -0700 Subject: [PATCH 0642/2411] vfio/pds: Fix missing detach_ioas op When CONFIG_IOMMUFD is enabled and a device is bound to the pds_vfio_pci driver, the following WARN_ON() trace is seen and probe fails: WARNING: CPU: 0 PID: 5040 at drivers/vfio/vfio_main.c:317 __vfio_register_dev+0x130/0x140 [vfio] <...> pds_vfio_pci 0000:08:00.1: probe with driver pds_vfio_pci failed with error -22 This is because the driver's vfio_device_ops.detach_ioas isn't set. Fix this by using the generic vfio_iommufd_physical_detach_ioas function. Fixes: 38fe3975b4c2 ("vfio/pds: Initial support for pds VFIO driver") Signed-off-by: Brett Creeley Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20250702163744.69767-1-brett.creeley@amd.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/pds/vfio_dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vfio/pci/pds/vfio_dev.c b/drivers/vfio/pci/pds/vfio_dev.c index 76a80ae7087b..f6e0253a8a14 100644 --- a/drivers/vfio/pci/pds/vfio_dev.c +++ b/drivers/vfio/pci/pds/vfio_dev.c @@ -204,6 +204,7 @@ static const struct vfio_device_ops pds_vfio_ops = { .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, + .detach_ioas = vfio_iommufd_physical_detach_ioas, }; const struct vfio_device_ops *pds_vfio_ops_info(void) From e908f58b6beb337cbe4481d52c3f5c78167b1aab Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 26 Jun 2025 16:56:18 -0600 Subject: [PATCH 0643/2411] vfio/pci: Separate SR-IOV VF dev_set In the below noted Fixes commit we introduced a reflck mutex to allow better scaling between devices for open and close. The reflck was based on the hot reset granularity, device level for root bus devices which cannot support hot reset or bus/slot reset otherwise. Overlooked in this were SR-IOV VFs, where there's also no bus reset option, but the default for a non-root-bus, non-slot-based device is bus level reflck granularity. The reflck mutex has since become the dev_set mutex (via commit 2cd8b14aaa66 ("vfio/pci: Move to the device set infrastructure")) and is our defacto serialization for various operations and ioctls. It still seems to be the case though that sets of vfio-pci devices really only need serialization relative to hot resets affecting the entire set, which is not relevant to SR-IOV VFs. As described in the Closes link below, this serialization contributes to startup latency when multiple VFs sharing the same "bus" are opened concurrently. Mark the device itself as the basis of the dev_set for SR-IOV VFs. Reported-by: Aaron Lewis Closes: https://lore.kernel.org/all/20250626180424.632628-1-aaronlewis@google.com Tested-by: Aaron Lewis Fixes: e309df5b0c9e ("vfio/pci: Parallelize device open and release") Reviewed-by: Yi Liu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250626225623.1180952-1-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 6328c3a05bcd..261a6dc5a5fc 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -2149,7 +2149,7 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) return -EBUSY; } - if (pci_is_root_bus(pdev->bus)) { + if (pci_is_root_bus(pdev->bus) || pdev->is_virtfn) { ret = vfio_assign_device_set(&vdev->vdev, vdev); } else if (!pci_probe_reset_slot(pdev->slot)) { ret = vfio_assign_device_set(&vdev->vdev, pdev->slot); From 33927f3d0ecdcff06326d6e4edb6166aed42811c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 12 Jul 2025 06:02:31 +0100 Subject: [PATCH 0644/2411] habanalabs: fix UAF in export_dmabuf() As soon as we'd inserted a file reference into descriptor table, another thread could close it. That's fine for the case when all we are doing is returning that descriptor to userland (it's a race, but it's a userland race and there's nothing the kernel can do about it). However, if we follow fd_install() with any kind of access to objects that would be destroyed on close (be it the struct file itself or anything destroyed by its ->release()), we have a UAF. dma_buf_fd() is a combination of reserving a descriptor and fd_install(). habanalabs export_dmabuf() calls it and then proceeds to access the objects destroyed on close. In particular, it grabs an extra reference to another struct file that will be dropped as part of ->release() for ours; that "will be" is actually "might have already been". Fix that by reserving descriptor before anything else and do fd_install() only when everything had been set up. As a side benefit, we no longer have the failure exit with file already created, but reference to underlying file (as well as ->dmabuf_export_cnt, etc.) not grabbed yet; unlike dma_buf_fd(), fd_install() can't fail. Fixes: db1a8dd916aa ("habanalabs: add support for dma-buf exporter") Signed-off-by: Al Viro --- drivers/accel/habanalabs/common/memory.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c index 601fdbe70179..61472a381904 100644 --- a/drivers/accel/habanalabs/common/memory.c +++ b/drivers/accel/habanalabs/common/memory.c @@ -1829,9 +1829,6 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf) struct hl_dmabuf_priv *hl_dmabuf = dmabuf->priv; struct hl_ctx *ctx; - if (!hl_dmabuf) - return; - ctx = hl_dmabuf->ctx; if (hl_dmabuf->memhash_hnode) @@ -1859,7 +1856,12 @@ static int export_dmabuf(struct hl_ctx *ctx, { DEFINE_DMA_BUF_EXPORT_INFO(exp_info); struct hl_device *hdev = ctx->hdev; - int rc, fd; + CLASS(get_unused_fd, fd)(flags); + + if (fd < 0) { + dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf, %d\n", fd); + return fd; + } exp_info.ops = &habanalabs_dmabuf_ops; exp_info.size = total_size; @@ -1872,13 +1874,6 @@ static int export_dmabuf(struct hl_ctx *ctx, return PTR_ERR(hl_dmabuf->dmabuf); } - fd = dma_buf_fd(hl_dmabuf->dmabuf, flags); - if (fd < 0) { - dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf, %d\n", fd); - rc = fd; - goto err_dma_buf_put; - } - hl_dmabuf->ctx = ctx; hl_ctx_get(hl_dmabuf->ctx); atomic_inc(&ctx->hdev->dmabuf_export_cnt); @@ -1890,13 +1885,9 @@ static int export_dmabuf(struct hl_ctx *ctx, get_file(ctx->hpriv->file_priv->filp); *dmabuf_fd = fd; + fd_install(take_fd(fd), hl_dmabuf->dmabuf->file); return 0; - -err_dma_buf_put: - hl_dmabuf->dmabuf->priv = NULL; - dma_buf_put(hl_dmabuf->dmabuf); - return rc; } static int validate_export_params_common(struct hl_device *hdev, u64 addr, u64 size, u64 offset) From 95a042a0c8ecd3c1e886648f6f6ab9c7e4403db9 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 10 Jul 2025 22:19:16 +0900 Subject: [PATCH 0645/2411] firewire: ohci: reduce the size of common context structure by extracting members into AT structure In commit 386a4153a2c1 ("firewire: ohci: cache the context run bit"), a running member was added to the context structure to cache the running state of a given DMA context. Although this member is accessible from IR, IT, and AT contexts, it is currently used only by the AT context. Additionally, the context structure includes a work item, which is also used by the AT context. Both members are unnecessary for IR and IT contexts. This commit refactors the code by moving these two members into a new structure specific to AT context. Link: https://lore.kernel.org/r/20250710131916.31289-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/ohci.c | 92 ++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 43 deletions(-) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 709a714fd5c8..5d8301b0f3aa 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -128,7 +128,6 @@ struct context { int total_allocation; u32 current_bus; bool running; - bool flushing; /* * List of page-sized buffers for storing DMA descriptors. @@ -157,8 +156,12 @@ struct context { int prev_z; descriptor_callback_t callback; +}; +struct at_context { + struct context context; struct work_struct work; + bool flushing; }; struct iso_context { @@ -204,8 +207,8 @@ struct fw_ohci { struct ar_context ar_request_ctx; struct ar_context ar_response_ctx; - struct context at_request_ctx; - struct context at_response_ctx; + struct at_context at_request_ctx; + struct at_context at_response_ctx; u32 it_context_support; u32 it_context_mask; /* unoccupied IT contexts */ @@ -1178,9 +1181,9 @@ static void context_retire_descriptors(struct context *ctx) static void ohci_at_context_work(struct work_struct *work) { - struct context *ctx = from_work(ctx, work, work); + struct at_context *ctx = from_work(ctx, work, work); - context_retire_descriptors(ctx); + context_retire_descriptors(&ctx->context); } static void ohci_isoc_context_work(struct work_struct *work) @@ -1382,17 +1385,17 @@ struct driver_data { * Must always be called with the ochi->lock held to ensure proper * generation handling and locking around packet queue manipulation. */ -static int at_context_queue_packet(struct context *ctx, - struct fw_packet *packet) +static int at_context_queue_packet(struct at_context *ctx, struct fw_packet *packet) { - struct fw_ohci *ohci = ctx->ohci; + struct context *context = &ctx->context; + struct fw_ohci *ohci = context->ohci; dma_addr_t d_bus, payload_bus; struct driver_data *driver_data; struct descriptor *d, *last; __le32 *header; int z, tcode; - d = context_get_descriptors(ctx, 4, &d_bus); + d = context_get_descriptors(context, 4, &d_bus); if (d == NULL) { packet->ack = RCODE_SEND_ERROR; return -1; @@ -1422,7 +1425,7 @@ static int at_context_queue_packet(struct context *ctx, ohci1394_at_data_set_destination_id(header, async_header_get_destination(packet->header)); - if (ctx == &ctx->ohci->at_response_ctx) { + if (ctx == &ohci->at_response_ctx) { ohci1394_at_data_set_rcode(header, async_header_get_rcode(packet->header)); } else { ohci1394_at_data_set_destination_offset(header, @@ -1511,17 +1514,17 @@ static int at_context_queue_packet(struct context *ctx, return -1; } - context_append(ctx, d, z, 4 - z); + context_append(context, d, z, 4 - z); - if (ctx->running) - reg_write(ohci, CONTROL_SET(ctx->regs), CONTEXT_WAKE); + if (context->running) + reg_write(ohci, CONTROL_SET(context->regs), CONTEXT_WAKE); else - context_run(ctx, 0); + context_run(context, 0); return 0; } -static void at_context_flush(struct context *ctx) +static void at_context_flush(struct at_context *ctx) { // Avoid dead lock due to programming mistake. if (WARN_ON_ONCE(current_work() == &ctx->work)) @@ -1540,12 +1543,13 @@ static int handle_at_packet(struct context *context, struct descriptor *d, struct descriptor *last) { + struct at_context *ctx = container_of(context, struct at_context, context); + struct fw_ohci *ohci = ctx->context.ohci; struct driver_data *driver_data; struct fw_packet *packet; - struct fw_ohci *ohci = context->ohci; int evt; - if (last->transfer_status == 0 && !READ_ONCE(context->flushing)) + if (last->transfer_status == 0 && !READ_ONCE(ctx->flushing)) /* This descriptor isn't done yet, stop iteration. */ return 0; @@ -1579,7 +1583,7 @@ static int handle_at_packet(struct context *context, break; case OHCI1394_evt_missing_ack: - if (READ_ONCE(context->flushing)) + if (READ_ONCE(ctx->flushing)) packet->ack = RCODE_GENERATION; else { /* @@ -1601,7 +1605,7 @@ static int handle_at_packet(struct context *context, break; case OHCI1394_evt_no_status: - if (READ_ONCE(context->flushing)) { + if (READ_ONCE(ctx->flushing)) { packet->ack = RCODE_GENERATION; break; } @@ -1698,13 +1702,14 @@ static void handle_local_lock(struct fw_ohci *ohci, fw_core_handle_response(&ohci->card, &response); } -static void handle_local_request(struct context *ctx, struct fw_packet *packet) +static void handle_local_request(struct at_context *ctx, struct fw_packet *packet) { + struct fw_ohci *ohci = ctx->context.ohci; u64 offset, csr; - if (ctx == &ctx->ohci->at_request_ctx) { + if (ctx == &ohci->at_request_ctx) { packet->ack = ACK_PENDING; - packet->callback(packet, &ctx->ohci->card, packet->ack); + packet->callback(packet, &ohci->card, packet->ack); } offset = async_header_get_offset(packet->header); @@ -1712,54 +1717,55 @@ static void handle_local_request(struct context *ctx, struct fw_packet *packet) /* Handle config rom reads. */ if (csr >= CSR_CONFIG_ROM && csr < CSR_CONFIG_ROM_END) - handle_local_rom(ctx->ohci, packet, csr); + handle_local_rom(ohci, packet, csr); else switch (csr) { case CSR_BUS_MANAGER_ID: case CSR_BANDWIDTH_AVAILABLE: case CSR_CHANNELS_AVAILABLE_HI: case CSR_CHANNELS_AVAILABLE_LO: - handle_local_lock(ctx->ohci, packet, csr); + handle_local_lock(ohci, packet, csr); break; default: - if (ctx == &ctx->ohci->at_request_ctx) - fw_core_handle_request(&ctx->ohci->card, packet); + if (ctx == &ohci->at_request_ctx) + fw_core_handle_request(&ohci->card, packet); else - fw_core_handle_response(&ctx->ohci->card, packet); + fw_core_handle_response(&ohci->card, packet); break; } - if (ctx == &ctx->ohci->at_response_ctx) { + if (ctx == &ohci->at_response_ctx) { packet->ack = ACK_COMPLETE; - packet->callback(packet, &ctx->ohci->card, packet->ack); + packet->callback(packet, &ohci->card, packet->ack); } } -static void at_context_transmit(struct context *ctx, struct fw_packet *packet) +static void at_context_transmit(struct at_context *ctx, struct fw_packet *packet) { + struct fw_ohci *ohci = ctx->context.ohci; unsigned long flags; int ret; - spin_lock_irqsave(&ctx->ohci->lock, flags); + spin_lock_irqsave(&ohci->lock, flags); - if (async_header_get_destination(packet->header) == ctx->ohci->node_id && - ctx->ohci->generation == packet->generation) { - spin_unlock_irqrestore(&ctx->ohci->lock, flags); + if (async_header_get_destination(packet->header) == ohci->node_id && + ohci->generation == packet->generation) { + spin_unlock_irqrestore(&ohci->lock, flags); // Timestamping on behalf of the hardware. - packet->timestamp = cycle_time_to_ohci_tstamp(get_cycle_time(ctx->ohci)); + packet->timestamp = cycle_time_to_ohci_tstamp(get_cycle_time(ohci)); handle_local_request(ctx, packet); return; } ret = at_context_queue_packet(ctx, packet); - spin_unlock_irqrestore(&ctx->ohci->lock, flags); + spin_unlock_irqrestore(&ohci->lock, flags); if (ret < 0) { // Timestamping on behalf of the hardware. - packet->timestamp = cycle_time_to_ohci_tstamp(get_cycle_time(ctx->ohci)); + packet->timestamp = cycle_time_to_ohci_tstamp(get_cycle_time(ohci)); - packet->callback(packet, &ctx->ohci->card, packet->ack); + packet->callback(packet, &ohci->card, packet->ack); } } @@ -2138,8 +2144,8 @@ static void bus_reset_work(struct work_struct *work) // FIXME: Document how the locking works. scoped_guard(spinlock_irq, &ohci->lock) { ohci->generation = -1; // prevent AT packet queueing - context_stop(&ohci->at_request_ctx); - context_stop(&ohci->at_response_ctx); + context_stop(&ohci->at_request_ctx.context); + context_stop(&ohci->at_response_ctx.context); } /* @@ -2683,7 +2689,7 @@ static void ohci_send_response(struct fw_card *card, struct fw_packet *packet) static int ohci_cancel_packet(struct fw_card *card, struct fw_packet *packet) { struct fw_ohci *ohci = fw_ohci(card); - struct context *ctx = &ohci->at_request_ctx; + struct at_context *ctx = &ohci->at_request_ctx; struct driver_data *driver_data = packet->driver_data; int ret = -ENOENT; @@ -3767,13 +3773,13 @@ static int pci_probe(struct pci_dev *dev, if (err < 0) return err; - err = context_init(&ohci->at_request_ctx, ohci, + err = context_init(&ohci->at_request_ctx.context, ohci, OHCI1394_AsReqTrContextControlSet, handle_at_packet); if (err < 0) return err; INIT_WORK(&ohci->at_request_ctx.work, ohci_at_context_work); - err = context_init(&ohci->at_response_ctx, ohci, + err = context_init(&ohci->at_response_ctx.context, ohci, OHCI1394_AsRspTrContextControlSet, handle_at_packet); if (err < 0) return err; From 7b41a2341fa62babda5d5c7a32c632e9eba2ee11 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 7 Jul 2025 01:26:58 +0200 Subject: [PATCH 0646/2411] power: supply: core: fix static checker warning static checker complains, that the block already breaks if IS_ERR(np) and thus the extra !IS_ERR(np) check in the while condition is superfluous. Avoid the extra check by using while(true) instead. This should not change the runtime behavior at all and I expect the binary to be more or less the same for an optimizing compiler. Fixes: f368f87b22da ("power: supply: core: convert to fwnnode") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-pm/285c9c39-482c-480c-8b0b-07111e39fdfe@sabinyo.mountain/ Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250707-fix-psy-static-checker-warning-v1-1-42d555c2b68a@collabora.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index aedb20c1d276..7c3913155dc0 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -212,7 +212,7 @@ static int __power_supply_populate_supplied_from(struct power_supply *epsy, break; } fwnode_handle_put(np); - } while (!IS_ERR(np)); + } while (true); return 0; } From 8da881d39c1b7fd4a211587ba40f1c936909a11a Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 16 Jun 2025 12:41:47 +0000 Subject: [PATCH 0647/2411] rust: uaccess: add strncpy_from_user This patch adds a direct wrapper around the C function of the same name. It's not really intended for direct use by Rust code since strncpy_from_user has a somewhat unfortunate API where it only nul-terminates the buffer if there's space for the nul-terminator. This means that a direct Rust wrapper around it could not return a &CStr since the buffer may not be a cstring. However, we still add the method to build more convenient APIs on top of it, which will happen in subsequent patches. Reviewed-by: Danilo Krummrich Reviewed-by: Greg Kroah-Hartman Reviewed-by: Boqun Feng Reviewed-by: Benno Lossin Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20250616-strncpy-from-user-v5-1-2d3fb0e1f5af@google.com [ Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/uaccess.rs | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/rust/kernel/uaccess.rs b/rust/kernel/uaccess.rs index 4ef13cf13a78..c917d3e51bb0 100644 --- a/rust/kernel/uaccess.rs +++ b/rust/kernel/uaccess.rs @@ -8,7 +8,7 @@ alloc::{Allocator, Flags}, bindings, error::Result, - ffi::c_void, + ffi::{c_char, c_void}, prelude::*, transmute::{AsBytes, FromBytes}, }; @@ -367,3 +367,37 @@ pub fn write(&mut self, value: &T) -> Result { Ok(()) } } + +/// Reads a nul-terminated string into `dst` and returns the length. +/// +/// This reads from userspace until a NUL byte is encountered, or until `dst.len()` bytes have been +/// read. Fails with [`EFAULT`] if a read happens on a bad address (some data may have been +/// copied). When the end of the buffer is encountered, no NUL byte is added, so the string is +/// *not* guaranteed to be NUL-terminated when `Ok(dst.len())` is returned. +/// +/// # Guarantees +/// +/// When this function returns `Ok(len)`, it is guaranteed that the first `len` bytes of `dst` are +/// initialized and non-zero. Furthermore, if `len < dst.len()`, then `dst[len]` is a NUL byte. +#[inline] +#[expect(dead_code)] +fn raw_strncpy_from_user(dst: &mut [MaybeUninit], src: UserPtr) -> Result { + // CAST: Slice lengths are guaranteed to be `<= isize::MAX`. + let len = dst.len() as isize; + + // SAFETY: `dst` is valid for writing `dst.len()` bytes. + let res = unsafe { + bindings::strncpy_from_user(dst.as_mut_ptr().cast::(), src as *const c_char, len) + }; + + if res < 0 { + return Err(Error::from_errno(res as i32)); + } + + #[cfg(CONFIG_RUST_OVERFLOW_CHECKS)] + assert!(res <= len); + + // GUARANTEES: `strncpy_from_user` was successful, so `dst` has contents in accordance with the + // guarantees of this function. + Ok(res as usize) +} From 17bbbefbf6715a543ff4713e26f7b8e6b7a876d6 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 16 Jun 2025 12:41:48 +0000 Subject: [PATCH 0648/2411] rust: uaccess: add UserSliceReader::strcpy_into_buf This patch adds a more convenient method for reading C strings from userspace. Logic is added to NUL-terminate the buffer when necessary so that a &CStr can be returned. Note that we treat attempts to read past `self.length` as a fault, so this returns EFAULT if that limit is exceeded before `buf.len()` is reached. Reviewed-by: Danilo Krummrich Signed-off-by: Alice Ryhl Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250616-strncpy-from-user-v5-2-2d3fb0e1f5af@google.com [ Use `from_mut` to clean `clippy::ref_as_ptr` lint. Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/uaccess.rs | 60 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/rust/kernel/uaccess.rs b/rust/kernel/uaccess.rs index c917d3e51bb0..85097eee81d9 100644 --- a/rust/kernel/uaccess.rs +++ b/rust/kernel/uaccess.rs @@ -291,6 +291,65 @@ pub fn read_all(mut self, buf: &mut Vec, flags: Flags) -> R unsafe { buf.inc_len(len) }; Ok(()) } + + /// Read a NUL-terminated string from userspace and return it. + /// + /// The string is read into `buf` and a NUL-terminator is added if the end of `buf` is reached. + /// Since there must be space to add a NUL-terminator, the buffer must not be empty. The + /// returned `&CStr` points into `buf`. + /// + /// Fails with [`EFAULT`] if the read happens on a bad address (some data may have been + /// copied). + #[doc(alias = "strncpy_from_user")] + pub fn strcpy_into_buf<'buf>(self, buf: &'buf mut [u8]) -> Result<&'buf CStr> { + if buf.is_empty() { + return Err(EINVAL); + } + + // SAFETY: The types are compatible and `strncpy_from_user` doesn't write uninitialized + // bytes to `buf`. + let mut dst = unsafe { &mut *(core::ptr::from_mut(buf) as *mut [MaybeUninit]) }; + + // We never read more than `self.length` bytes. + if dst.len() > self.length { + dst = &mut dst[..self.length]; + } + + let mut len = raw_strncpy_from_user(dst, self.ptr)?; + if len < dst.len() { + // Add one to include the NUL-terminator. + len += 1; + } else if len < buf.len() { + // This implies that `len == dst.len() < buf.len()`. + // + // This means that we could not fill the entire buffer, but we had to stop reading + // because we hit the `self.length` limit of this `UserSliceReader`. Since we did not + // fill the buffer, we treat this case as if we tried to read past the `self.length` + // limit and received a page fault, which is consistent with other `UserSliceReader` + // methods that also return page faults when you exceed `self.length`. + return Err(EFAULT); + } else { + // This implies that `len == buf.len()`. + // + // This means that we filled the buffer exactly. In this case, we add a NUL-terminator + // and return it. Unlike the `len < dst.len()` branch, don't modify `len` because it + // already represents the length including the NUL-terminator. + // + // SAFETY: Due to the check at the beginning, the buffer is not empty. + unsafe { *buf.last_mut().unwrap_unchecked() = 0 }; + } + + // This method consumes `self`, so it can only be called once, thus we do not need to + // update `self.length`. This sidesteps concerns such as whether `self.length` should be + // incremented by `len` or `len-1` in the `len == buf.len()` case. + + // SAFETY: There are two cases: + // * If we hit the `len < dst.len()` case, then `raw_strncpy_from_user` guarantees that + // this slice contains exactly one NUL byte at the end of the string. + // * Otherwise, `raw_strncpy_from_user` guarantees that the string contained no NUL bytes, + // and we have since added a NUL byte at the end. + Ok(unsafe { CStr::from_bytes_with_nul_unchecked(&buf[..len]) }) + } } /// A writer for [`UserSlice`]. @@ -380,7 +439,6 @@ pub fn write(&mut self, value: &T) -> Result { /// When this function returns `Ok(len)`, it is guaranteed that the first `len` bytes of `dst` are /// initialized and non-zero. Furthermore, if `len < dst.len()`, then `dst[len]` is a NUL byte. #[inline] -#[expect(dead_code)] fn raw_strncpy_from_user(dst: &mut [MaybeUninit], src: UserPtr) -> Result { // CAST: Slice lengths are guaranteed to be `<= isize::MAX`. let len = dst.len() as isize; From 620d3d1025581b9f1b883452788b6f409ff04170 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Wed, 2 Jul 2025 17:56:17 +0200 Subject: [PATCH 0649/2411] pinctrl: qcom: Add Milos pinctrl driver Add pinctrl driver for TLMM block found in the Milos SoC. Signed-off-by: Luca Weiss Reviewed-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/20250702-sm7635-pinctrl-v2-2-c138624b9924@fairphone.com Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/Kconfig.msm | 8 + drivers/pinctrl/qcom/Makefile | 1 + drivers/pinctrl/qcom/pinctrl-milos.c | 1339 ++++++++++++++++++++++++++ 3 files changed, 1348 insertions(+) create mode 100644 drivers/pinctrl/qcom/pinctrl-milos.c diff --git a/drivers/pinctrl/qcom/Kconfig.msm b/drivers/pinctrl/qcom/Kconfig.msm index 0bb44c9a4c06..6dad942b00a3 100644 --- a/drivers/pinctrl/qcom/Kconfig.msm +++ b/drivers/pinctrl/qcom/Kconfig.msm @@ -371,6 +371,14 @@ config PINCTRL_SM7150 Qualcomm Technologies Inc TLMM block found on the Qualcomm Technologies Inc SM7150 platform. +config PINCTRL_MILOS + tristate "Qualcomm Technologies Inc Milos pin controller driver" + depends on ARM64 || COMPILE_TEST + help + This is the pinctrl, pinmux, pinconf and gpiolib driver for the + Qualcomm Technologies Inc TLMM block found on the Qualcomm + Technologies Inc Milos platform. + config PINCTRL_SM8150 tristate "Qualcomm Technologies Inc SM8150 pin controller driver" depends on ARM64 || COMPILE_TEST diff --git a/drivers/pinctrl/qcom/Makefile b/drivers/pinctrl/qcom/Makefile index 954f5291cc37..2acff520a285 100644 --- a/drivers/pinctrl/qcom/Makefile +++ b/drivers/pinctrl/qcom/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_PINCTRL_QCS8300) += pinctrl-qcs8300.o obj-$(CONFIG_PINCTRL_QDF2XXX) += pinctrl-qdf2xxx.o obj-$(CONFIG_PINCTRL_MDM9607) += pinctrl-mdm9607.o obj-$(CONFIG_PINCTRL_MDM9615) += pinctrl-mdm9615.o +obj-$(CONFIG_PINCTRL_MILOS) += pinctrl-milos.o obj-$(CONFIG_PINCTRL_QCOM_SPMI_PMIC) += pinctrl-spmi-gpio.o obj-$(CONFIG_PINCTRL_QCOM_SPMI_PMIC) += pinctrl-spmi-mpp.o obj-$(CONFIG_PINCTRL_QCOM_SSBI_PMIC) += pinctrl-ssbi-gpio.o diff --git a/drivers/pinctrl/qcom/pinctrl-milos.c b/drivers/pinctrl/qcom/pinctrl-milos.c new file mode 100644 index 000000000000..d11a7bbcd733 --- /dev/null +++ b/drivers/pinctrl/qcom/pinctrl-milos.c @@ -0,0 +1,1339 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2025, Luca Weiss + */ + +#include +#include +#include + +#include "pinctrl-msm.h" + +#define REG_SIZE 0x1000 + +#define PINGROUP(id, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11) \ + { \ + .grp = PINCTRL_PINGROUP("gpio" #id, \ + gpio##id##_pins, \ + ARRAY_SIZE(gpio##id##_pins)), \ + .funcs = (int[]){ \ + msm_mux_gpio, /* gpio mode */ \ + msm_mux_##f1, \ + msm_mux_##f2, \ + msm_mux_##f3, \ + msm_mux_##f4, \ + msm_mux_##f5, \ + msm_mux_##f6, \ + msm_mux_##f7, \ + msm_mux_##f8, \ + msm_mux_##f9, \ + msm_mux_##f10, \ + msm_mux_##f11 /* egpio mode */ \ + }, \ + .nfuncs = 12, \ + .ctl_reg = REG_SIZE * id, \ + .io_reg = 0x4 + REG_SIZE * id, \ + .intr_cfg_reg = 0x8 + REG_SIZE * id, \ + .intr_status_reg = 0xc + REG_SIZE * id, \ + .intr_target_reg = 0x8 + REG_SIZE * id, \ + .mux_bit = 2, \ + .pull_bit = 0, \ + .drv_bit = 6, \ + .i2c_pull_bit = 13, \ + .egpio_enable = 12, \ + .egpio_present = 11, \ + .oe_bit = 9, \ + .in_bit = 0, \ + .out_bit = 1, \ + .intr_enable_bit = 0, \ + .intr_status_bit = 0, \ + .intr_target_bit = 8, \ + .intr_wakeup_enable_bit = 7, \ + .intr_wakeup_present_bit = 6, \ + .intr_target_kpss_val = 3, \ + .intr_raw_status_bit = 4, \ + .intr_polarity_bit = 1, \ + .intr_detection_bit = 2, \ + .intr_detection_width = 2, \ + } + +#define SDC_QDSD_PINGROUP(pg_name, ctl, pull, drv) \ + { \ + .grp = PINCTRL_PINGROUP(#pg_name, \ + pg_name##_pins, \ + ARRAY_SIZE(pg_name##_pins)), \ + .ctl_reg = ctl, \ + .io_reg = 0, \ + .intr_cfg_reg = 0, \ + .intr_status_reg = 0, \ + .intr_target_reg = 0, \ + .mux_bit = -1, \ + .pull_bit = pull, \ + .drv_bit = drv, \ + .oe_bit = -1, \ + .in_bit = -1, \ + .out_bit = -1, \ + .intr_enable_bit = -1, \ + .intr_status_bit = -1, \ + .intr_target_bit = -1, \ + .intr_raw_status_bit = -1, \ + .intr_polarity_bit = -1, \ + .intr_detection_bit = -1, \ + .intr_detection_width = -1, \ + } + +#define UFS_RESET(pg_name, ctl, io) \ + { \ + .grp = PINCTRL_PINGROUP(#pg_name, \ + pg_name##_pins, \ + ARRAY_SIZE(pg_name##_pins)), \ + .ctl_reg = ctl, \ + .io_reg = io, \ + .intr_cfg_reg = 0, \ + .intr_status_reg = 0, \ + .intr_target_reg = 0, \ + .mux_bit = -1, \ + .pull_bit = 3, \ + .drv_bit = 0, \ + .oe_bit = -1, \ + .in_bit = -1, \ + .out_bit = 0, \ + .intr_enable_bit = -1, \ + .intr_status_bit = -1, \ + .intr_target_bit = -1, \ + .intr_raw_status_bit = -1, \ + .intr_polarity_bit = -1, \ + .intr_detection_bit = -1, \ + .intr_detection_width = -1, \ + } + +static const struct pinctrl_pin_desc milos_pins[] = { + PINCTRL_PIN(0, "GPIO_0"), + PINCTRL_PIN(1, "GPIO_1"), + PINCTRL_PIN(2, "GPIO_2"), + PINCTRL_PIN(3, "GPIO_3"), + PINCTRL_PIN(4, "GPIO_4"), + PINCTRL_PIN(5, "GPIO_5"), + PINCTRL_PIN(6, "GPIO_6"), + PINCTRL_PIN(7, "GPIO_7"), + PINCTRL_PIN(8, "GPIO_8"), + PINCTRL_PIN(9, "GPIO_9"), + PINCTRL_PIN(10, "GPIO_10"), + PINCTRL_PIN(11, "GPIO_11"), + PINCTRL_PIN(12, "GPIO_12"), + PINCTRL_PIN(13, "GPIO_13"), + PINCTRL_PIN(14, "GPIO_14"), + PINCTRL_PIN(15, "GPIO_15"), + PINCTRL_PIN(16, "GPIO_16"), + PINCTRL_PIN(17, "GPIO_17"), + PINCTRL_PIN(18, "GPIO_18"), + PINCTRL_PIN(19, "GPIO_19"), + PINCTRL_PIN(20, "GPIO_20"), + PINCTRL_PIN(21, "GPIO_21"), + PINCTRL_PIN(22, "GPIO_22"), + PINCTRL_PIN(23, "GPIO_23"), + PINCTRL_PIN(24, "GPIO_24"), + PINCTRL_PIN(25, "GPIO_25"), + PINCTRL_PIN(26, "GPIO_26"), + PINCTRL_PIN(27, "GPIO_27"), + PINCTRL_PIN(28, "GPIO_28"), + PINCTRL_PIN(29, "GPIO_29"), + PINCTRL_PIN(30, "GPIO_30"), + PINCTRL_PIN(31, "GPIO_31"), + PINCTRL_PIN(32, "GPIO_32"), + PINCTRL_PIN(33, "GPIO_33"), + PINCTRL_PIN(34, "GPIO_34"), + PINCTRL_PIN(35, "GPIO_35"), + PINCTRL_PIN(36, "GPIO_36"), + PINCTRL_PIN(37, "GPIO_37"), + PINCTRL_PIN(38, "GPIO_38"), + PINCTRL_PIN(39, "GPIO_39"), + PINCTRL_PIN(40, "GPIO_40"), + PINCTRL_PIN(41, "GPIO_41"), + PINCTRL_PIN(42, "GPIO_42"), + PINCTRL_PIN(43, "GPIO_43"), + PINCTRL_PIN(44, "GPIO_44"), + PINCTRL_PIN(45, "GPIO_45"), + PINCTRL_PIN(46, "GPIO_46"), + PINCTRL_PIN(47, "GPIO_47"), + PINCTRL_PIN(48, "GPIO_48"), + PINCTRL_PIN(49, "GPIO_49"), + PINCTRL_PIN(50, "GPIO_50"), + PINCTRL_PIN(51, "GPIO_51"), + PINCTRL_PIN(52, "GPIO_52"), + PINCTRL_PIN(53, "GPIO_53"), + PINCTRL_PIN(54, "GPIO_54"), + PINCTRL_PIN(55, "GPIO_55"), + PINCTRL_PIN(56, "GPIO_56"), + PINCTRL_PIN(57, "GPIO_57"), + PINCTRL_PIN(58, "GPIO_58"), + PINCTRL_PIN(59, "GPIO_59"), + PINCTRL_PIN(60, "GPIO_60"), + PINCTRL_PIN(61, "GPIO_61"), + PINCTRL_PIN(62, "GPIO_62"), + PINCTRL_PIN(63, "GPIO_63"), + PINCTRL_PIN(64, "GPIO_64"), + PINCTRL_PIN(65, "GPIO_65"), + PINCTRL_PIN(66, "GPIO_66"), + PINCTRL_PIN(67, "GPIO_67"), + PINCTRL_PIN(68, "GPIO_68"), + PINCTRL_PIN(69, "GPIO_69"), + PINCTRL_PIN(70, "GPIO_70"), + PINCTRL_PIN(71, "GPIO_71"), + PINCTRL_PIN(72, "GPIO_72"), + PINCTRL_PIN(73, "GPIO_73"), + PINCTRL_PIN(74, "GPIO_74"), + PINCTRL_PIN(75, "GPIO_75"), + PINCTRL_PIN(76, "GPIO_76"), + PINCTRL_PIN(77, "GPIO_77"), + PINCTRL_PIN(78, "GPIO_78"), + PINCTRL_PIN(79, "GPIO_79"), + PINCTRL_PIN(80, "GPIO_80"), + PINCTRL_PIN(81, "GPIO_81"), + PINCTRL_PIN(82, "GPIO_82"), + PINCTRL_PIN(83, "GPIO_83"), + PINCTRL_PIN(84, "GPIO_84"), + PINCTRL_PIN(85, "GPIO_85"), + PINCTRL_PIN(86, "GPIO_86"), + PINCTRL_PIN(87, "GPIO_87"), + PINCTRL_PIN(88, "GPIO_88"), + PINCTRL_PIN(89, "GPIO_89"), + PINCTRL_PIN(90, "GPIO_90"), + PINCTRL_PIN(91, "GPIO_91"), + PINCTRL_PIN(92, "GPIO_92"), + PINCTRL_PIN(93, "GPIO_93"), + PINCTRL_PIN(94, "GPIO_94"), + PINCTRL_PIN(95, "GPIO_95"), + PINCTRL_PIN(96, "GPIO_96"), + PINCTRL_PIN(97, "GPIO_97"), + PINCTRL_PIN(98, "GPIO_98"), + PINCTRL_PIN(99, "GPIO_99"), + PINCTRL_PIN(100, "GPIO_100"), + PINCTRL_PIN(101, "GPIO_101"), + PINCTRL_PIN(102, "GPIO_102"), + PINCTRL_PIN(103, "GPIO_103"), + PINCTRL_PIN(104, "GPIO_104"), + PINCTRL_PIN(105, "GPIO_105"), + PINCTRL_PIN(106, "GPIO_106"), + PINCTRL_PIN(107, "GPIO_107"), + PINCTRL_PIN(108, "GPIO_108"), + PINCTRL_PIN(109, "GPIO_109"), + PINCTRL_PIN(110, "GPIO_110"), + PINCTRL_PIN(111, "GPIO_111"), + PINCTRL_PIN(112, "GPIO_112"), + PINCTRL_PIN(113, "GPIO_113"), + PINCTRL_PIN(114, "GPIO_114"), + PINCTRL_PIN(115, "GPIO_115"), + PINCTRL_PIN(116, "GPIO_116"), + PINCTRL_PIN(117, "GPIO_117"), + PINCTRL_PIN(118, "GPIO_118"), + PINCTRL_PIN(119, "GPIO_119"), + PINCTRL_PIN(120, "GPIO_120"), + PINCTRL_PIN(121, "GPIO_121"), + PINCTRL_PIN(122, "GPIO_122"), + PINCTRL_PIN(123, "GPIO_123"), + PINCTRL_PIN(124, "GPIO_124"), + PINCTRL_PIN(125, "GPIO_125"), + PINCTRL_PIN(126, "GPIO_126"), + PINCTRL_PIN(127, "GPIO_127"), + PINCTRL_PIN(128, "GPIO_128"), + PINCTRL_PIN(129, "GPIO_129"), + PINCTRL_PIN(130, "GPIO_130"), + PINCTRL_PIN(131, "GPIO_131"), + PINCTRL_PIN(132, "GPIO_132"), + PINCTRL_PIN(133, "GPIO_133"), + PINCTRL_PIN(134, "GPIO_134"), + PINCTRL_PIN(135, "GPIO_135"), + PINCTRL_PIN(136, "GPIO_136"), + PINCTRL_PIN(137, "GPIO_137"), + PINCTRL_PIN(138, "GPIO_138"), + PINCTRL_PIN(139, "GPIO_139"), + PINCTRL_PIN(140, "GPIO_140"), + PINCTRL_PIN(141, "GPIO_141"), + PINCTRL_PIN(142, "GPIO_142"), + PINCTRL_PIN(143, "GPIO_143"), + PINCTRL_PIN(144, "GPIO_144"), + PINCTRL_PIN(145, "GPIO_145"), + PINCTRL_PIN(146, "GPIO_146"), + PINCTRL_PIN(147, "GPIO_147"), + PINCTRL_PIN(148, "GPIO_148"), + PINCTRL_PIN(149, "GPIO_149"), + PINCTRL_PIN(150, "GPIO_150"), + PINCTRL_PIN(151, "GPIO_151"), + PINCTRL_PIN(152, "GPIO_152"), + PINCTRL_PIN(153, "GPIO_153"), + PINCTRL_PIN(154, "GPIO_154"), + PINCTRL_PIN(155, "GPIO_155"), + PINCTRL_PIN(156, "GPIO_156"), + PINCTRL_PIN(157, "GPIO_157"), + PINCTRL_PIN(158, "GPIO_158"), + PINCTRL_PIN(159, "GPIO_159"), + PINCTRL_PIN(160, "GPIO_160"), + PINCTRL_PIN(161, "GPIO_161"), + PINCTRL_PIN(162, "GPIO_162"), + PINCTRL_PIN(163, "GPIO_163"), + PINCTRL_PIN(164, "GPIO_164"), + PINCTRL_PIN(165, "GPIO_165"), + PINCTRL_PIN(166, "GPIO_166"), + PINCTRL_PIN(167, "UFS_RESET"), + PINCTRL_PIN(168, "SDC2_CLK"), + PINCTRL_PIN(169, "SDC2_CMD"), + PINCTRL_PIN(170, "SDC2_DATA"), +}; + +#define DECLARE_MSM_GPIO_PINS(pin) \ + static const unsigned int gpio##pin##_pins[] = { pin } +DECLARE_MSM_GPIO_PINS(0); +DECLARE_MSM_GPIO_PINS(1); +DECLARE_MSM_GPIO_PINS(2); +DECLARE_MSM_GPIO_PINS(3); +DECLARE_MSM_GPIO_PINS(4); +DECLARE_MSM_GPIO_PINS(5); +DECLARE_MSM_GPIO_PINS(6); +DECLARE_MSM_GPIO_PINS(7); +DECLARE_MSM_GPIO_PINS(8); +DECLARE_MSM_GPIO_PINS(9); +DECLARE_MSM_GPIO_PINS(10); +DECLARE_MSM_GPIO_PINS(11); +DECLARE_MSM_GPIO_PINS(12); +DECLARE_MSM_GPIO_PINS(13); +DECLARE_MSM_GPIO_PINS(14); +DECLARE_MSM_GPIO_PINS(15); +DECLARE_MSM_GPIO_PINS(16); +DECLARE_MSM_GPIO_PINS(17); +DECLARE_MSM_GPIO_PINS(18); +DECLARE_MSM_GPIO_PINS(19); +DECLARE_MSM_GPIO_PINS(20); +DECLARE_MSM_GPIO_PINS(21); +DECLARE_MSM_GPIO_PINS(22); +DECLARE_MSM_GPIO_PINS(23); +DECLARE_MSM_GPIO_PINS(24); +DECLARE_MSM_GPIO_PINS(25); +DECLARE_MSM_GPIO_PINS(26); +DECLARE_MSM_GPIO_PINS(27); +DECLARE_MSM_GPIO_PINS(28); +DECLARE_MSM_GPIO_PINS(29); +DECLARE_MSM_GPIO_PINS(30); +DECLARE_MSM_GPIO_PINS(31); +DECLARE_MSM_GPIO_PINS(32); +DECLARE_MSM_GPIO_PINS(33); +DECLARE_MSM_GPIO_PINS(34); +DECLARE_MSM_GPIO_PINS(35); +DECLARE_MSM_GPIO_PINS(36); +DECLARE_MSM_GPIO_PINS(37); +DECLARE_MSM_GPIO_PINS(38); +DECLARE_MSM_GPIO_PINS(39); +DECLARE_MSM_GPIO_PINS(40); +DECLARE_MSM_GPIO_PINS(41); +DECLARE_MSM_GPIO_PINS(42); +DECLARE_MSM_GPIO_PINS(43); +DECLARE_MSM_GPIO_PINS(44); +DECLARE_MSM_GPIO_PINS(45); +DECLARE_MSM_GPIO_PINS(46); +DECLARE_MSM_GPIO_PINS(47); +DECLARE_MSM_GPIO_PINS(48); +DECLARE_MSM_GPIO_PINS(49); +DECLARE_MSM_GPIO_PINS(50); +DECLARE_MSM_GPIO_PINS(51); +DECLARE_MSM_GPIO_PINS(52); +DECLARE_MSM_GPIO_PINS(53); +DECLARE_MSM_GPIO_PINS(54); +DECLARE_MSM_GPIO_PINS(55); +DECLARE_MSM_GPIO_PINS(56); +DECLARE_MSM_GPIO_PINS(57); +DECLARE_MSM_GPIO_PINS(58); +DECLARE_MSM_GPIO_PINS(59); +DECLARE_MSM_GPIO_PINS(60); +DECLARE_MSM_GPIO_PINS(61); +DECLARE_MSM_GPIO_PINS(62); +DECLARE_MSM_GPIO_PINS(63); +DECLARE_MSM_GPIO_PINS(64); +DECLARE_MSM_GPIO_PINS(65); +DECLARE_MSM_GPIO_PINS(66); +DECLARE_MSM_GPIO_PINS(67); +DECLARE_MSM_GPIO_PINS(68); +DECLARE_MSM_GPIO_PINS(69); +DECLARE_MSM_GPIO_PINS(70); +DECLARE_MSM_GPIO_PINS(71); +DECLARE_MSM_GPIO_PINS(72); +DECLARE_MSM_GPIO_PINS(73); +DECLARE_MSM_GPIO_PINS(74); +DECLARE_MSM_GPIO_PINS(75); +DECLARE_MSM_GPIO_PINS(76); +DECLARE_MSM_GPIO_PINS(77); +DECLARE_MSM_GPIO_PINS(78); +DECLARE_MSM_GPIO_PINS(79); +DECLARE_MSM_GPIO_PINS(80); +DECLARE_MSM_GPIO_PINS(81); +DECLARE_MSM_GPIO_PINS(82); +DECLARE_MSM_GPIO_PINS(83); +DECLARE_MSM_GPIO_PINS(84); +DECLARE_MSM_GPIO_PINS(85); +DECLARE_MSM_GPIO_PINS(86); +DECLARE_MSM_GPIO_PINS(87); +DECLARE_MSM_GPIO_PINS(88); +DECLARE_MSM_GPIO_PINS(89); +DECLARE_MSM_GPIO_PINS(90); +DECLARE_MSM_GPIO_PINS(91); +DECLARE_MSM_GPIO_PINS(92); +DECLARE_MSM_GPIO_PINS(93); +DECLARE_MSM_GPIO_PINS(94); +DECLARE_MSM_GPIO_PINS(95); +DECLARE_MSM_GPIO_PINS(96); +DECLARE_MSM_GPIO_PINS(97); +DECLARE_MSM_GPIO_PINS(98); +DECLARE_MSM_GPIO_PINS(99); +DECLARE_MSM_GPIO_PINS(100); +DECLARE_MSM_GPIO_PINS(101); +DECLARE_MSM_GPIO_PINS(102); +DECLARE_MSM_GPIO_PINS(103); +DECLARE_MSM_GPIO_PINS(104); +DECLARE_MSM_GPIO_PINS(105); +DECLARE_MSM_GPIO_PINS(106); +DECLARE_MSM_GPIO_PINS(107); +DECLARE_MSM_GPIO_PINS(108); +DECLARE_MSM_GPIO_PINS(109); +DECLARE_MSM_GPIO_PINS(110); +DECLARE_MSM_GPIO_PINS(111); +DECLARE_MSM_GPIO_PINS(112); +DECLARE_MSM_GPIO_PINS(113); +DECLARE_MSM_GPIO_PINS(114); +DECLARE_MSM_GPIO_PINS(115); +DECLARE_MSM_GPIO_PINS(116); +DECLARE_MSM_GPIO_PINS(117); +DECLARE_MSM_GPIO_PINS(118); +DECLARE_MSM_GPIO_PINS(119); +DECLARE_MSM_GPIO_PINS(120); +DECLARE_MSM_GPIO_PINS(121); +DECLARE_MSM_GPIO_PINS(122); +DECLARE_MSM_GPIO_PINS(123); +DECLARE_MSM_GPIO_PINS(124); +DECLARE_MSM_GPIO_PINS(125); +DECLARE_MSM_GPIO_PINS(126); +DECLARE_MSM_GPIO_PINS(127); +DECLARE_MSM_GPIO_PINS(128); +DECLARE_MSM_GPIO_PINS(129); +DECLARE_MSM_GPIO_PINS(130); +DECLARE_MSM_GPIO_PINS(131); +DECLARE_MSM_GPIO_PINS(132); +DECLARE_MSM_GPIO_PINS(133); +DECLARE_MSM_GPIO_PINS(134); +DECLARE_MSM_GPIO_PINS(135); +DECLARE_MSM_GPIO_PINS(136); +DECLARE_MSM_GPIO_PINS(137); +DECLARE_MSM_GPIO_PINS(138); +DECLARE_MSM_GPIO_PINS(139); +DECLARE_MSM_GPIO_PINS(140); +DECLARE_MSM_GPIO_PINS(141); +DECLARE_MSM_GPIO_PINS(142); +DECLARE_MSM_GPIO_PINS(143); +DECLARE_MSM_GPIO_PINS(144); +DECLARE_MSM_GPIO_PINS(145); +DECLARE_MSM_GPIO_PINS(146); +DECLARE_MSM_GPIO_PINS(147); +DECLARE_MSM_GPIO_PINS(148); +DECLARE_MSM_GPIO_PINS(149); +DECLARE_MSM_GPIO_PINS(150); +DECLARE_MSM_GPIO_PINS(151); +DECLARE_MSM_GPIO_PINS(152); +DECLARE_MSM_GPIO_PINS(153); +DECLARE_MSM_GPIO_PINS(154); +DECLARE_MSM_GPIO_PINS(155); +DECLARE_MSM_GPIO_PINS(156); +DECLARE_MSM_GPIO_PINS(157); +DECLARE_MSM_GPIO_PINS(158); +DECLARE_MSM_GPIO_PINS(159); +DECLARE_MSM_GPIO_PINS(160); +DECLARE_MSM_GPIO_PINS(161); +DECLARE_MSM_GPIO_PINS(162); +DECLARE_MSM_GPIO_PINS(163); +DECLARE_MSM_GPIO_PINS(164); +DECLARE_MSM_GPIO_PINS(165); +DECLARE_MSM_GPIO_PINS(166); + +static const unsigned int ufs_reset_pins[] = { 167 }; +static const unsigned int sdc2_clk_pins[] = { 168 }; +static const unsigned int sdc2_cmd_pins[] = { 169 }; +static const unsigned int sdc2_data_pins[] = { 170 }; + +enum milos_functions { + msm_mux_gpio, + msm_mux_aoss_cti, + msm_mux_atest_char, + msm_mux_atest_usb, + msm_mux_audio_ext_mclk0, + msm_mux_audio_ext_mclk1, + msm_mux_audio_ref_clk, + msm_mux_cam_mclk, + msm_mux_cci_async_in0, + msm_mux_cci_i2c_scl, + msm_mux_cci_i2c_sda, + msm_mux_cci_timer, + msm_mux_coex_uart1_rx, + msm_mux_coex_uart1_tx, + msm_mux_dbg_out_clk, + msm_mux_ddr_bist_complete, + msm_mux_ddr_bist_fail, + msm_mux_ddr_bist_start, + msm_mux_ddr_bist_stop, + msm_mux_ddr_pxi0, + msm_mux_ddr_pxi1, + msm_mux_dp0_hot, + msm_mux_egpio, + msm_mux_gcc_gp1, + msm_mux_gcc_gp2, + msm_mux_gcc_gp3, + msm_mux_host2wlan_sol, + msm_mux_i2s0_data0, + msm_mux_i2s0_data1, + msm_mux_i2s0_sck, + msm_mux_i2s0_ws, + msm_mux_ibi_i3c, + msm_mux_jitter_bist, + msm_mux_mdp_vsync, + msm_mux_mdp_vsync0_out, + msm_mux_mdp_vsync1_out, + msm_mux_mdp_vsync2_out, + msm_mux_mdp_vsync3_out, + msm_mux_mdp_vsync_e, + msm_mux_nav_gpio0, + msm_mux_nav_gpio1, + msm_mux_nav_gpio2, + msm_mux_pcie0_clk_req_n, + msm_mux_pcie1_clk_req_n, + msm_mux_phase_flag, + msm_mux_pll_bist_sync, + msm_mux_pll_clk_aux, + msm_mux_prng_rosc0, + msm_mux_prng_rosc1, + msm_mux_prng_rosc2, + msm_mux_prng_rosc3, + msm_mux_qdss_cti, + msm_mux_qdss_gpio, + msm_mux_qlink0_enable, + msm_mux_qlink0_request, + msm_mux_qlink0_wmss, + msm_mux_qlink1_enable, + msm_mux_qlink1_request, + msm_mux_qlink1_wmss, + msm_mux_qspi0, + msm_mux_qup0_se0, + msm_mux_qup0_se1, + msm_mux_qup0_se2, + msm_mux_qup0_se3, + msm_mux_qup0_se4, + msm_mux_qup0_se5, + msm_mux_qup0_se6, + msm_mux_qup1_se0, + msm_mux_qup1_se1, + msm_mux_qup1_se2, + msm_mux_qup1_se3, + msm_mux_qup1_se4, + msm_mux_qup1_se5, + msm_mux_qup1_se6, + msm_mux_resout_gpio_n, + msm_mux_sd_write_protect, + msm_mux_sdc1_clk, + msm_mux_sdc1_cmd, + msm_mux_sdc1_data, + msm_mux_sdc1_rclk, + msm_mux_sdc2_clk, + msm_mux_sdc2_cmd, + msm_mux_sdc2_data, + msm_mux_sdc2_fb_clk, + msm_mux_tb_trig_sdc1, + msm_mux_tb_trig_sdc2, + msm_mux_tgu_ch0_trigout, + msm_mux_tgu_ch1_trigout, + msm_mux_tmess_prng0, + msm_mux_tmess_prng1, + msm_mux_tmess_prng2, + msm_mux_tmess_prng3, + msm_mux_tsense_pwm1, + msm_mux_tsense_pwm2, + msm_mux_uim0_clk, + msm_mux_uim0_data, + msm_mux_uim0_present, + msm_mux_uim0_reset, + msm_mux_uim1_clk_mira, + msm_mux_uim1_clk_mirb, + msm_mux_uim1_data_mira, + msm_mux_uim1_data_mirb, + msm_mux_uim1_present_mira, + msm_mux_uim1_present_mirb, + msm_mux_uim1_reset_mira, + msm_mux_uim1_reset_mirb, + msm_mux_usb0_hs, + msm_mux_usb0_phy_ps, + msm_mux_vfr_0, + msm_mux_vfr_1, + msm_mux_vsense_trigger_mirnat, + msm_mux_wcn_sw, + msm_mux_wcn_sw_ctrl, + msm_mux__, +}; + +static const char *const gpio_groups[] = { + "gpio0", "gpio1", "gpio2", "gpio3", "gpio4", "gpio5", + "gpio6", "gpio7", "gpio8", "gpio9", "gpio10", "gpio11", + "gpio12", "gpio13", "gpio14", "gpio15", "gpio16", "gpio17", + "gpio18", "gpio19", "gpio20", "gpio21", "gpio22", "gpio23", + "gpio24", "gpio25", "gpio26", "gpio27", "gpio28", "gpio29", + "gpio30", "gpio31", "gpio32", "gpio33", "gpio34", "gpio35", + "gpio36", "gpio37", "gpio38", "gpio39", "gpio40", "gpio41", + "gpio42", "gpio43", "gpio44", "gpio45", "gpio46", "gpio47", + "gpio48", "gpio49", "gpio50", "gpio51", "gpio52", "gpio53", + "gpio54", "gpio55", "gpio56", "gpio57", "gpio58", "gpio59", + "gpio60", "gpio61", "gpio62", "gpio63", "gpio64", "gpio65", + "gpio66", "gpio67", "gpio68", "gpio69", "gpio70", "gpio71", + "gpio72", "gpio73", "gpio74", "gpio75", "gpio76", "gpio77", + "gpio78", "gpio79", "gpio80", "gpio81", "gpio82", "gpio83", + "gpio84", "gpio85", "gpio86", "gpio87", "gpio88", "gpio89", + "gpio90", "gpio91", "gpio92", "gpio93", "gpio94", "gpio95", + "gpio96", "gpio97", "gpio98", "gpio99", "gpio100", "gpio101", + "gpio102", "gpio103", "gpio104", "gpio105", "gpio106", "gpio107", + "gpio108", "gpio109", "gpio110", "gpio111", "gpio112", "gpio113", + "gpio114", "gpio115", "gpio116", "gpio117", "gpio118", "gpio119", + "gpio120", "gpio121", "gpio122", "gpio123", "gpio124", "gpio125", + "gpio126", "gpio127", "gpio128", "gpio129", "gpio130", "gpio131", + "gpio132", "gpio133", "gpio134", "gpio135", "gpio136", "gpio137", + "gpio138", "gpio139", "gpio140", "gpio141", "gpio142", "gpio143", + "gpio144", "gpio145", "gpio146", "gpio147", "gpio148", "gpio149", + "gpio150", "gpio151", "gpio152", "gpio153", "gpio154", "gpio155", + "gpio156", "gpio157", "gpio158", "gpio159", "gpio160", "gpio161", + "gpio162", "gpio163", "gpio164", "gpio165", "gpio166", +}; +static const char *const resout_gpio_n_groups[] = { + "gpio39", +}; +static const char *const sdc1_clk_groups[] = { + "gpio77", +}; +static const char *const sdc1_cmd_groups[] = { + "gpio78", +}; +static const char *const sdc1_data_groups[] = { + "gpio73", "gpio74", "gpio75", "gpio76", "gpio79", "gpio80", + "gpio81", "gpio82", +}; +static const char *const sdc1_rclk_groups[] = { + "gpio72", +}; +static const char *const aoss_cti_groups[] = { + "gpio0", + "gpio1", + "gpio4", + "gpio5", +}; +static const char *const atest_char_groups[] = { + "gpio44", "gpio45", "gpio46", "gpio47", "gpio63", +}; +static const char *const atest_usb_groups[] = { + "gpio23", "gpio24", "gpio60", +}; +static const char *const audio_ext_mclk0_groups[] = { + "gpio23", +}; +static const char *const audio_ext_mclk1_groups[] = { + "gpio24", +}; +static const char *const audio_ref_clk_groups[] = { + "gpio24", +}; +static const char *const cam_mclk_groups[] = { + "gpio83", "gpio84", "gpio85", "gpio86", "gpio87", +}; +static const char *const cci_async_in0_groups[] = { + "gpio86", +}; +static const char *const cci_i2c_scl_groups[] = { + "gpio89", "gpio91", "gpio93", "gpio95", +}; +static const char *const cci_i2c_sda_groups[] = { + "gpio88", "gpio90", "gpio92", "gpio94", +}; +static const char *const cci_timer_groups[] = { + "gpio77", "gpio83", "gpio84", "gpio85", +}; +static const char *const coex_uart1_rx_groups[] = { + "gpio64", +}; +static const char *const coex_uart1_tx_groups[] = { + "gpio63", +}; +static const char *const dbg_out_clk_groups[] = { + "gpio24", +}; +static const char *const ddr_bist_complete_groups[] = { + "gpio137", +}; +static const char *const ddr_bist_fail_groups[] = { + "gpio56", +}; +static const char *const ddr_bist_start_groups[] = { + "gpio133", +}; +static const char *const ddr_bist_stop_groups[] = { + "gpio47", +}; +static const char *const ddr_pxi0_groups[] = { + "gpio23", + "gpio24", +}; +static const char *const ddr_pxi1_groups[] = { + "gpio50", + "gpio51", +}; +static const char *const dp0_hot_groups[] = { + "gpio75", +}; +static const char *const egpio_groups[] = { + "gpio132", "gpio133", "gpio134", "gpio135", "gpio136", "gpio137", + "gpio138", "gpio139", "gpio140", "gpio141", "gpio142", "gpio143", + "gpio144", "gpio145", "gpio146", "gpio147", "gpio148", "gpio149", + "gpio150", "gpio151", "gpio152", "gpio153", "gpio154", "gpio155", + "gpio156", "gpio157", "gpio158", "gpio159", "gpio160", "gpio161", + "gpio162", "gpio163", "gpio164", "gpio165", "gpio166", +}; +static const char *const gcc_gp1_groups[] = { + "gpio29", + "gpio32", +}; +static const char *const gcc_gp2_groups[] = { + "gpio28", + "gpio30", +}; +static const char *const gcc_gp3_groups[] = { + "gpio31", + "gpio33", +}; +static const char *const host2wlan_sol_groups[] = { + "gpio46", +}; +static const char *const i2s0_data0_groups[] = { + "gpio16", +}; +static const char *const i2s0_data1_groups[] = { + "gpio17", +}; +static const char *const i2s0_sck_groups[] = { + "gpio15", +}; +static const char *const i2s0_ws_groups[] = { + "gpio18", +}; +static const char *const ibi_i3c_groups[] = { + "gpio0", "gpio1", "gpio4", "gpio5", + "gpio32", "gpio33", "gpio36", "gpio37", +}; +static const char *const jitter_bist_groups[] = { + "gpio141", +}; +static const char *const mdp_vsync_groups[] = { + "gpio19", + "gpio37", + "gpio72", + "gpio129", +}; +static const char *const mdp_vsync0_out_groups[] = { + "gpio12", +}; +static const char *const mdp_vsync1_out_groups[] = { + "gpio12", +}; +static const char *const mdp_vsync2_out_groups[] = { + "gpio40", +}; +static const char *const mdp_vsync3_out_groups[] = { + "gpio40", +}; +static const char *const mdp_vsync_e_groups[] = { + "gpio45", +}; +static const char *const nav_gpio0_groups[] = { + "gpio124", +}; +static const char *const nav_gpio1_groups[] = { + "gpio125", +}; +static const char *const nav_gpio2_groups[] = { + "gpio126", +}; +static const char *const pcie0_clk_req_n_groups[] = { + "gpio67", +}; +static const char *const pcie1_clk_req_n_groups[] = { + "gpio70", +}; +static const char *const phase_flag_groups[] = { + "gpio8", "gpio9", "gpio11", "gpio12", "gpio13", "gpio14", + "gpio15", "gpio16", "gpio18", "gpio26", "gpio38", "gpio39", + "gpio40", "gpio41", "gpio42", "gpio43", "gpio44", "gpio45", + "gpio46", "gpio47", "gpio48", "gpio49", "gpio63", "gpio64", + "gpio127", "gpio138", "gpio139", "gpio140", "gpio142", "gpio143", + "gpio144", "gpio147", +}; +static const char *const pll_bist_sync_groups[] = { + "gpio26", +}; +static const char *const pll_clk_aux_groups[] = { + "gpio36", +}; +static const char *const prng_rosc0_groups[] = { + "gpio66", +}; +static const char *const prng_rosc1_groups[] = { + "gpio67", +}; +static const char *const prng_rosc2_groups[] = { + "gpio68", +}; +static const char *const prng_rosc3_groups[] = { + "gpio69", +}; +static const char *const qdss_cti_groups[] = { + "gpio4", "gpio5", "gpio6", "gpio7", + "gpio44", "gpio45", "gpio54", "gpio87", +}; +static const char *const qdss_gpio_groups[] = { + "gpio40", "gpio41", "gpio42", "gpio43", "gpio46", "gpio47", + "gpio48", "gpio49", "gpio50", "gpio51", "gpio52", "gpio53", + "gpio83", "gpio84", "gpio85", "gpio86", "gpio88", "gpio89", + "gpio138", "gpio139", "gpio140", "gpio141", "gpio149", "gpio150", + "gpio155", "gpio156", "gpio157", "gpio158", "gpio159", "gpio160", + "gpio161", "gpio162", "gpio163", "gpio164", "gpio165", "gpio166", +}; +static const char *const qlink0_enable_groups[] = { + "gpio105", +}; +static const char *const qlink0_request_groups[] = { + "gpio104", +}; +static const char *const qlink0_wmss_groups[] = { + "gpio106", +}; +static const char *const qlink1_enable_groups[] = { + "gpio108", +}; +static const char *const qlink1_request_groups[] = { + "gpio107", +}; +static const char *const qlink1_wmss_groups[] = { + "gpio109", +}; +static const char *const qspi0_groups[] = { + "gpio8", "gpio9", "gpio10", "gpio11", "gpio12", "gpio13", "gpio14", +}; +static const char *const qup0_se0_groups[] = { + "gpio0", "gpio1", "gpio2", "gpio3", +}; +static const char *const qup0_se1_groups[] = { + "gpio4", "gpio5", "gpio6", "gpio7", +}; +static const char *const qup0_se2_groups[] = { + "gpio8", "gpio9", "gpio10", "gpio11", "gpio12", "gpio13", "gpio14", +}; +static const char *const qup0_se3_groups[] = { + "gpio15", "gpio16", "gpio17", "gpio18", "gpio23", "gpio24", "gpio26", +}; +static const char *const qup0_se4_groups[] = { + "gpio19", "gpio20", "gpio21", "gpio22", +}; +static const char *const qup0_se5_groups[] = { + "gpio23", "gpio24", "gpio25", "gpio26", +}; +static const char *const qup0_se6_groups[] = { + "gpio27", "gpio28", "gpio29", "gpio30", "gpio31", +}; +static const char *const qup1_se0_groups[] = { + "gpio32", "gpio33", "gpio94", "gpio95", +}; +static const char *const qup1_se1_groups[] = { + "gpio36", "gpio37", "gpio38", "gpio39", +}; +static const char *const qup1_se2_groups[] = { + "gpio36", "gpio37", "gpio38", "gpio40", "gpio41", "gpio42", "gpio43", +}; +static const char *const qup1_se3_groups[] = { + "gpio92", "gpio93", "gpio94", "gpio95", +}; +static const char *const qup1_se4_groups[] = { + "gpio48", "gpio49", "gpio50", "gpio51", "gpio52", "gpio53", "gpio54", +}; +static const char *const qup1_se5_groups[] = { + "gpio55", "gpio56", "gpio59", "gpio60", +}; +static const char *const qup1_se6_groups[] = { + "gpio55", "gpio56", "gpio59", "gpio60", "gpio90", "gpio91", +}; +static const char *const sd_write_protect_groups[] = { + "gpio4", +}; +static const char *const sdc2_data_groups[] = { + "gpio34", + "gpio35", + "gpio57", + "gpio58", +}; +static const char *const sdc2_clk_groups[] = { + "gpio62", +}; +static const char *const sdc2_cmd_groups[] = { + "gpio61", +}; +static const char *const sdc2_fb_clk_groups[] = { + "gpio128", +}; +static const char *const tb_trig_sdc1_groups[] = { + "gpio87", +}; +static const char *const tb_trig_sdc2_groups[] = { + "gpio78", +}; +static const char *const tgu_ch0_trigout_groups[] = { + "gpio87", +}; +static const char *const tgu_ch1_trigout_groups[] = { + "gpio88", +}; +static const char *const tmess_prng0_groups[] = { + "gpio86", +}; +static const char *const tmess_prng1_groups[] = { + "gpio83", +}; +static const char *const tmess_prng2_groups[] = { + "gpio84", +}; +static const char *const tmess_prng3_groups[] = { + "gpio85", +}; +static const char *const tsense_pwm1_groups[] = { + "gpio17", +}; +static const char *const tsense_pwm2_groups[] = { + "gpio17", +}; +static const char *const uim0_clk_groups[] = { + "gpio97", +}; +static const char *const uim0_data_groups[] = { + "gpio96", +}; +static const char *const uim0_present_groups[] = { + "gpio99", +}; +static const char *const uim0_reset_groups[] = { + "gpio98", +}; +static const char *const uim1_clk_mira_groups[] = { + "gpio111", +}; +static const char *const uim1_clk_mirb_groups[] = { + "gpio101", +}; +static const char *const uim1_data_mira_groups[] = { + "gpio110", +}; +static const char *const uim1_data_mirb_groups[] = { + "gpio100", +}; +static const char *const uim1_present_mira_groups[] = { + "gpio113", +}; +static const char *const uim1_present_mirb_groups[] = { + "gpio103", +}; +static const char *const uim1_reset_mira_groups[] = { + "gpio112", +}; +static const char *const uim1_reset_mirb_groups[] = { + "gpio102", +}; +static const char *const usb0_hs_groups[] = { + "gpio125", +}; +static const char *const usb0_phy_ps_groups[] = { + "gpio131", +}; +static const char *const vfr_0_groups[] = { + "gpio56", +}; +static const char *const vfr_1_groups[] = { + "gpio126", +}; +static const char *const vsense_trigger_mirnat_groups[] = { + "gpio94", +}; +static const char *const wcn_sw_groups[] = { + "gpio52", +}; +static const char *const wcn_sw_ctrl_groups[] = { + "gpio45", +}; + +static const struct pinfunction milos_functions[] = { + MSM_PIN_FUNCTION(gpio), + MSM_PIN_FUNCTION(aoss_cti), + MSM_PIN_FUNCTION(atest_char), + MSM_PIN_FUNCTION(atest_usb), + MSM_PIN_FUNCTION(audio_ext_mclk0), + MSM_PIN_FUNCTION(audio_ext_mclk1), + MSM_PIN_FUNCTION(audio_ref_clk), + MSM_PIN_FUNCTION(cam_mclk), + MSM_PIN_FUNCTION(cci_async_in0), + MSM_PIN_FUNCTION(cci_i2c_scl), + MSM_PIN_FUNCTION(cci_i2c_sda), + MSM_PIN_FUNCTION(cci_timer), + MSM_PIN_FUNCTION(coex_uart1_rx), + MSM_PIN_FUNCTION(coex_uart1_tx), + MSM_PIN_FUNCTION(dbg_out_clk), + MSM_PIN_FUNCTION(ddr_bist_complete), + MSM_PIN_FUNCTION(ddr_bist_fail), + MSM_PIN_FUNCTION(ddr_bist_start), + MSM_PIN_FUNCTION(ddr_bist_stop), + MSM_PIN_FUNCTION(ddr_pxi0), + MSM_PIN_FUNCTION(ddr_pxi1), + MSM_PIN_FUNCTION(dp0_hot), + MSM_PIN_FUNCTION(egpio), + MSM_PIN_FUNCTION(gcc_gp1), + MSM_PIN_FUNCTION(gcc_gp2), + MSM_PIN_FUNCTION(gcc_gp3), + MSM_PIN_FUNCTION(host2wlan_sol), + MSM_PIN_FUNCTION(i2s0_data0), + MSM_PIN_FUNCTION(i2s0_data1), + MSM_PIN_FUNCTION(i2s0_sck), + MSM_PIN_FUNCTION(i2s0_ws), + MSM_PIN_FUNCTION(ibi_i3c), + MSM_PIN_FUNCTION(jitter_bist), + MSM_PIN_FUNCTION(mdp_vsync), + MSM_PIN_FUNCTION(mdp_vsync0_out), + MSM_PIN_FUNCTION(mdp_vsync1_out), + MSM_PIN_FUNCTION(mdp_vsync2_out), + MSM_PIN_FUNCTION(mdp_vsync3_out), + MSM_PIN_FUNCTION(mdp_vsync_e), + MSM_PIN_FUNCTION(nav_gpio0), + MSM_PIN_FUNCTION(nav_gpio1), + MSM_PIN_FUNCTION(nav_gpio2), + MSM_PIN_FUNCTION(pcie0_clk_req_n), + MSM_PIN_FUNCTION(pcie1_clk_req_n), + MSM_PIN_FUNCTION(phase_flag), + MSM_PIN_FUNCTION(pll_bist_sync), + MSM_PIN_FUNCTION(pll_clk_aux), + MSM_PIN_FUNCTION(prng_rosc0), + MSM_PIN_FUNCTION(prng_rosc1), + MSM_PIN_FUNCTION(prng_rosc2), + MSM_PIN_FUNCTION(prng_rosc3), + MSM_PIN_FUNCTION(qdss_cti), + MSM_PIN_FUNCTION(qdss_gpio), + MSM_PIN_FUNCTION(qlink0_enable), + MSM_PIN_FUNCTION(qlink0_request), + MSM_PIN_FUNCTION(qlink0_wmss), + MSM_PIN_FUNCTION(qlink1_enable), + MSM_PIN_FUNCTION(qlink1_request), + MSM_PIN_FUNCTION(qlink1_wmss), + MSM_PIN_FUNCTION(qspi0), + MSM_PIN_FUNCTION(qup0_se0), + MSM_PIN_FUNCTION(qup0_se1), + MSM_PIN_FUNCTION(qup0_se2), + MSM_PIN_FUNCTION(qup0_se3), + MSM_PIN_FUNCTION(qup0_se4), + MSM_PIN_FUNCTION(qup0_se5), + MSM_PIN_FUNCTION(qup0_se6), + MSM_PIN_FUNCTION(qup1_se0), + MSM_PIN_FUNCTION(qup1_se1), + MSM_PIN_FUNCTION(qup1_se2), + MSM_PIN_FUNCTION(qup1_se3), + MSM_PIN_FUNCTION(qup1_se4), + MSM_PIN_FUNCTION(qup1_se5), + MSM_PIN_FUNCTION(qup1_se6), + MSM_PIN_FUNCTION(resout_gpio_n), + MSM_PIN_FUNCTION(sd_write_protect), + MSM_PIN_FUNCTION(sdc1_clk), + MSM_PIN_FUNCTION(sdc1_cmd), + MSM_PIN_FUNCTION(sdc1_data), + MSM_PIN_FUNCTION(sdc1_rclk), + MSM_PIN_FUNCTION(sdc2_clk), + MSM_PIN_FUNCTION(sdc2_cmd), + MSM_PIN_FUNCTION(sdc2_data), + MSM_PIN_FUNCTION(sdc2_fb_clk), + MSM_PIN_FUNCTION(tb_trig_sdc1), + MSM_PIN_FUNCTION(tb_trig_sdc2), + MSM_PIN_FUNCTION(tgu_ch0_trigout), + MSM_PIN_FUNCTION(tgu_ch1_trigout), + MSM_PIN_FUNCTION(tmess_prng0), + MSM_PIN_FUNCTION(tmess_prng1), + MSM_PIN_FUNCTION(tmess_prng2), + MSM_PIN_FUNCTION(tmess_prng3), + MSM_PIN_FUNCTION(tsense_pwm1), + MSM_PIN_FUNCTION(tsense_pwm2), + MSM_PIN_FUNCTION(uim0_clk), + MSM_PIN_FUNCTION(uim0_data), + MSM_PIN_FUNCTION(uim0_present), + MSM_PIN_FUNCTION(uim0_reset), + MSM_PIN_FUNCTION(uim1_clk_mira), + MSM_PIN_FUNCTION(uim1_clk_mirb), + MSM_PIN_FUNCTION(uim1_data_mira), + MSM_PIN_FUNCTION(uim1_data_mirb), + MSM_PIN_FUNCTION(uim1_present_mira), + MSM_PIN_FUNCTION(uim1_present_mirb), + MSM_PIN_FUNCTION(uim1_reset_mira), + MSM_PIN_FUNCTION(uim1_reset_mirb), + MSM_PIN_FUNCTION(usb0_hs), + MSM_PIN_FUNCTION(usb0_phy_ps), + MSM_PIN_FUNCTION(vfr_0), + MSM_PIN_FUNCTION(vfr_1), + MSM_PIN_FUNCTION(vsense_trigger_mirnat), + MSM_PIN_FUNCTION(wcn_sw), + MSM_PIN_FUNCTION(wcn_sw_ctrl), +}; + +/* + * Every pin is maintained as a single group, and missing or non-existing pin + * would be maintained as dummy group to synchronize pin group index with + * pin descriptor registered with pinctrl core. + * Clients would not be able to request these dummy pin groups. + */ +static const struct msm_pingroup milos_groups[] = { + [0] = PINGROUP(0, qup0_se0, ibi_i3c, aoss_cti, _, _, _, _, _, _, _, _), + [1] = PINGROUP(1, qup0_se0, ibi_i3c, aoss_cti, _, _, _, _, _, _, _, _), + [2] = PINGROUP(2, qup0_se0, _, _, _, _, _, _, _, _, _, _), + [3] = PINGROUP(3, qup0_se0, _, _, _, _, _, _, _, _, _, _), + [4] = PINGROUP(4, qup0_se1, ibi_i3c, aoss_cti, sd_write_protect, qdss_cti, _, _, _, _, _, _), + [5] = PINGROUP(5, qup0_se1, ibi_i3c, aoss_cti, qdss_cti, _, _, _, _, _, _, _), + [6] = PINGROUP(6, qup0_se1, qdss_cti, _, _, _, _, _, _, _, _, _), + [7] = PINGROUP(7, qup0_se1, qdss_cti, _, _, _, _, _, _, _, _, _), + [8] = PINGROUP(8, qup0_se2, qspi0, _, phase_flag, _, _, _, _, _, _, _), + [9] = PINGROUP(9, qup0_se2, qspi0, _, phase_flag, _, _, _, _, _, _, _), + [10] = PINGROUP(10, qup0_se2, qspi0, _, _, _, _, _, _, _, _, _), + [11] = PINGROUP(11, qup0_se2, qspi0, _, phase_flag, _, _, _, _, _, _, _), + [12] = PINGROUP(12, qup0_se2, qspi0, mdp_vsync0_out, mdp_vsync1_out, _, phase_flag, _, _, _, _, _), + [13] = PINGROUP(13, qup0_se2, qspi0, _, phase_flag, _, _, _, _, _, _, _), + [14] = PINGROUP(14, qup0_se2, qspi0, _, phase_flag, _, _, _, _, _, _, _), + [15] = PINGROUP(15, qup0_se3, i2s0_sck, _, phase_flag, _, _, _, _, _, _, _), + [16] = PINGROUP(16, qup0_se3, i2s0_data0, _, phase_flag, _, _, _, _, _, _, _), + [17] = PINGROUP(17, qup0_se3, i2s0_data1, tsense_pwm1, tsense_pwm2, _, _, _, _, _, _, _), + [18] = PINGROUP(18, qup0_se3, i2s0_ws, _, phase_flag, _, _, _, _, _, _, _), + [19] = PINGROUP(19, qup0_se4, mdp_vsync, _, _, _, _, _, _, _, _, _), + [20] = PINGROUP(20, qup0_se4, _, _, _, _, _, _, _, _, _, _), + [21] = PINGROUP(21, qup0_se4, _, _, _, _, _, _, _, _, _, _), + [22] = PINGROUP(22, qup0_se4, _, _, _, _, _, _, _, _, _, _), + [23] = PINGROUP(23, qup0_se5, qup0_se3, audio_ext_mclk0, _, atest_usb, ddr_pxi0, _, _, _, _, _), + [24] = PINGROUP(24, qup0_se5, qup0_se3, audio_ext_mclk1, audio_ref_clk, dbg_out_clk, _, atest_usb, ddr_pxi0, _, _, _), + [25] = PINGROUP(25, qup0_se5, _, _, _, _, _, _, _, _, _, _), + [26] = PINGROUP(26, qup0_se5, qup0_se3, pll_bist_sync, _, phase_flag, _, _, _, _, _, _), + [27] = PINGROUP(27, qup0_se6, _, _, _, _, _, _, _, _, _, _), + [28] = PINGROUP(28, qup0_se6, gcc_gp2, _, _, _, _, _, _, _, _, _), + [29] = PINGROUP(29, qup0_se6, gcc_gp1, _, _, _, _, _, _, _, _, _), + [30] = PINGROUP(30, qup0_se6, gcc_gp2, _, _, _, _, _, _, _, _, _), + [31] = PINGROUP(31, qup0_se6, gcc_gp3, _, _, _, _, _, _, _, _, _), + [32] = PINGROUP(32, qup1_se0, ibi_i3c, gcc_gp1, _, _, _, _, _, _, _, _), + [33] = PINGROUP(33, qup1_se0, ibi_i3c, gcc_gp3, _, _, _, _, _, _, _, _), + [34] = PINGROUP(34, sdc2_data, _, _, _, _, _, _, _, _, _, _), + [35] = PINGROUP(35, sdc2_data, _, _, _, _, _, _, _, _, _, _), + [36] = PINGROUP(36, qup1_se1, qup1_se2, ibi_i3c, pll_clk_aux, _, _, _, _, _, _, _), + [37] = PINGROUP(37, qup1_se1, qup1_se2, ibi_i3c, mdp_vsync, _, _, _, _, _, _, _), + [38] = PINGROUP(38, qup1_se1, qup1_se2, _, phase_flag, _, _, _, _, _, _, _), + [39] = PINGROUP(39, qup1_se1, resout_gpio_n, _, phase_flag, _, _, _, _, _, _, _), + [40] = PINGROUP(40, qup1_se2, mdp_vsync2_out, mdp_vsync3_out, _, phase_flag, qdss_gpio, _, _, _, _, _), + [41] = PINGROUP(41, qup1_se2, _, phase_flag, qdss_gpio, _, _, _, _, _, _, _), + [42] = PINGROUP(42, qup1_se2, _, phase_flag, qdss_gpio, _, _, _, _, _, _, _), + [43] = PINGROUP(43, qup1_se2, _, _, phase_flag, qdss_gpio, _, _, _, _, _, _), + [44] = PINGROUP(44, _, _, phase_flag, qdss_cti, atest_char, _, _, _, _, _, _), + [45] = PINGROUP(45, wcn_sw_ctrl, mdp_vsync_e, _, _, phase_flag, qdss_cti, atest_char, _, _, _, _), + [46] = PINGROUP(46, host2wlan_sol, _, phase_flag, qdss_gpio, atest_char, _, _, _, _, _, _), + [47] = PINGROUP(47, ddr_bist_stop, _, phase_flag, qdss_gpio, atest_char, _, _, _, _, _, _), + [48] = PINGROUP(48, qup1_se4, _, phase_flag, qdss_gpio, _, _, _, _, _, _, _), + [49] = PINGROUP(49, qup1_se4, _, phase_flag, qdss_gpio, _, _, _, _, _, _, _), + [50] = PINGROUP(50, qup1_se4, qdss_gpio, ddr_pxi1, _, _, _, _, _, _, _, _), + [51] = PINGROUP(51, qup1_se4, qdss_gpio, ddr_pxi1, _, _, _, _, _, _, _, _), + [52] = PINGROUP(52, qup1_se4, wcn_sw, qdss_gpio, _, _, _, _, _, _, _, _), + [53] = PINGROUP(53, qup1_se4, qdss_gpio, _, _, _, _, _, _, _, _, _), + [54] = PINGROUP(54, qup1_se4, qdss_cti, _, _, _, _, _, _, _, _, _), + [55] = PINGROUP(55, qup1_se5, qup1_se6, _, _, _, _, _, _, _, _, _), + [56] = PINGROUP(56, qup1_se5, qup1_se6, vfr_0, ddr_bist_fail, _, _, _, _, _, _, _), + [57] = PINGROUP(57, sdc2_data, _, _, _, _, _, _, _, _, _, _), + [58] = PINGROUP(58, sdc2_data, _, _, _, _, _, _, _, _, _, _), + [59] = PINGROUP(59, qup1_se6, _, qup1_se5, _, _, _, _, _, _, _, _), + [60] = PINGROUP(60, qup1_se6, _, qup1_se5, atest_usb, _, _, _, _, _, _, _), + [61] = PINGROUP(61, sdc2_cmd, _, _, _, _, _, _, _, _, _, _), + [62] = PINGROUP(62, sdc2_clk, _, _, _, _, _, _, _, _, _, _), + [63] = PINGROUP(63, coex_uart1_tx, _, phase_flag, atest_char, _, _, _, _, _, _, _), + [64] = PINGROUP(64, coex_uart1_rx, _, phase_flag, _, _, _, _, _, _, _, _), + [65] = PINGROUP(65, _, _, _, _, _, _, _, _, _, _, _), + [66] = PINGROUP(66, prng_rosc0, _, _, _, _, _, _, _, _, _, _), + [67] = PINGROUP(67, pcie0_clk_req_n, prng_rosc1, _, _, _, _, _, _, _, _, _), + [68] = PINGROUP(68, prng_rosc2, _, _, _, _, _, _, _, _, _, _), + [69] = PINGROUP(69, prng_rosc3, _, _, _, _, _, _, _, _, _, _), + [70] = PINGROUP(70, pcie1_clk_req_n, _, _, _, _, _, _, _, _, _, _), + [71] = PINGROUP(71, _, _, _, _, _, _, _, _, _, _, _), + [72] = PINGROUP(72, sdc1_rclk, mdp_vsync, _, _, _, _, _, _, _, _, _), + [73] = PINGROUP(73, sdc1_data, _, _, _, _, _, _, _, _, _, _), + [74] = PINGROUP(74, sdc1_data, _, _, _, _, _, _, _, _, _, _), + [75] = PINGROUP(75, sdc1_data, dp0_hot, _, _, _, _, _, _, _, _, _), + [76] = PINGROUP(76, sdc1_data, _, _, _, _, _, _, _, _, _, _), + [77] = PINGROUP(77, sdc1_clk, cci_timer, _, _, _, _, _, _, _, _, _), + [78] = PINGROUP(78, sdc1_cmd, tb_trig_sdc2, _, _, _, _, _, _, _, _, _), + [79] = PINGROUP(79, sdc1_data, _, _, _, _, _, _, _, _, _, _), + [80] = PINGROUP(80, sdc1_data, _, _, _, _, _, _, _, _, _, _), + [81] = PINGROUP(81, sdc1_data, _, _, _, _, _, _, _, _, _, _), + [82] = PINGROUP(82, sdc1_data, _, _, _, _, _, _, _, _, _, _), + [83] = PINGROUP(83, cam_mclk, cci_timer, tmess_prng1, qdss_gpio, _, _, _, _, _, _, _), + [84] = PINGROUP(84, cam_mclk, cci_timer, tmess_prng2, qdss_gpio, _, _, _, _, _, _, _), + [85] = PINGROUP(85, cam_mclk, cci_timer, tmess_prng3, qdss_gpio, _, _, _, _, _, _, _), + [86] = PINGROUP(86, cam_mclk, cci_async_in0, tmess_prng0, qdss_gpio, _, _, _, _, _, _, _), + [87] = PINGROUP(87, cam_mclk, tb_trig_sdc1, tgu_ch0_trigout, qdss_cti, _, _, _, _, _, _, _), + [88] = PINGROUP(88, cci_i2c_sda, tgu_ch1_trigout, _, qdss_gpio, _, _, _, _, _, _, _), + [89] = PINGROUP(89, cci_i2c_scl, _, qdss_gpio, _, _, _, _, _, _, _, _), + [90] = PINGROUP(90, cci_i2c_sda, qup1_se6, _, _, _, _, _, _, _, _, _), + [91] = PINGROUP(91, cci_i2c_scl, qup1_se6, _, _, _, _, _, _, _, _, _), + [92] = PINGROUP(92, cci_i2c_sda, qup1_se3, _, _, _, _, _, _, _, _, _), + [93] = PINGROUP(93, cci_i2c_scl, qup1_se3, _, _, _, _, _, _, _, _, _), + [94] = PINGROUP(94, cci_i2c_sda, qup1_se3, qup1_se0, _, vsense_trigger_mirnat, _, _, _, _, _, _), + [95] = PINGROUP(95, cci_i2c_scl, qup1_se3, qup1_se0, _, _, _, _, _, _, _, _), + [96] = PINGROUP(96, uim0_data, _, _, _, _, _, _, _, _, _, _), + [97] = PINGROUP(97, uim0_clk, _, _, _, _, _, _, _, _, _, _), + [98] = PINGROUP(98, uim0_reset, _, _, _, _, _, _, _, _, _, _), + [99] = PINGROUP(99, uim0_present, _, _, _, _, _, _, _, _, _, _), + [100] = PINGROUP(100, uim1_data_mirb, _, _, _, _, _, _, _, _, _, _), + [101] = PINGROUP(101, uim1_clk_mirb, _, _, _, _, _, _, _, _, _, _), + [102] = PINGROUP(102, uim1_reset_mirb, _, _, _, _, _, _, _, _, _, _), + [103] = PINGROUP(103, uim1_present_mirb, _, _, _, _, _, _, _, _, _, _), + [104] = PINGROUP(104, qlink0_request, _, _, _, _, _, _, _, _, _, _), + [105] = PINGROUP(105, qlink0_enable, _, _, _, _, _, _, _, _, _, _), + [106] = PINGROUP(106, qlink0_wmss, _, _, _, _, _, _, _, _, _, _), + [107] = PINGROUP(107, qlink1_request, _, _, _, _, _, _, _, _, _, _), + [108] = PINGROUP(108, qlink1_enable, _, _, _, _, _, _, _, _, _, _), + [109] = PINGROUP(109, qlink1_wmss, _, _, _, _, _, _, _, _, _, _), + [110] = PINGROUP(110, uim1_data_mira, _, _, _, _, _, _, _, _, _, _), + [111] = PINGROUP(111, uim1_clk_mira, _, _, _, _, _, _, _, _, _, _), + [112] = PINGROUP(112, uim1_reset_mira, _, _, _, _, _, _, _, _, _, _), + [113] = PINGROUP(113, uim1_present_mira, _, _, _, _, _, _, _, _, _, _), + [114] = PINGROUP(114, _, _, _, _, _, _, _, _, _, _, _), + [115] = PINGROUP(115, _, _, _, _, _, _, _, _, _, _, _), + [116] = PINGROUP(116, _, _, _, _, _, _, _, _, _, _, _), + [117] = PINGROUP(117, _, _, _, _, _, _, _, _, _, _, _), + [118] = PINGROUP(118, _, _, _, _, _, _, _, _, _, _, _), + [119] = PINGROUP(119, _, _, _, _, _, _, _, _, _, _, _), + [120] = PINGROUP(120, _, _, _, _, _, _, _, _, _, _, _), + [121] = PINGROUP(121, _, _, _, _, _, _, _, _, _, _, _), + [122] = PINGROUP(122, _, _, _, _, _, _, _, _, _, _, _), + [123] = PINGROUP(123, _, _, _, _, _, _, _, _, _, _, _), + [124] = PINGROUP(124, nav_gpio0, _, _, _, _, _, _, _, _, _, _), + [125] = PINGROUP(125, nav_gpio1, usb0_hs, _, _, _, _, _, _, _, _, _), + [126] = PINGROUP(126, _, nav_gpio2, vfr_1, _, _, _, _, _, _, _, _), + [127] = PINGROUP(127, _, _, phase_flag, _, _, _, _, _, _, _, _), + [128] = PINGROUP(128, sdc2_fb_clk, _, _, _, _, _, _, _, _, _, _), + [129] = PINGROUP(129, mdp_vsync, _, _, _, _, _, _, _, _, _, _), + [130] = PINGROUP(130, _, _, _, _, _, _, _, _, _, _, _), + [131] = PINGROUP(131, usb0_phy_ps, _, _, _, _, _, _, _, _, _, _), + [132] = PINGROUP(132, _, _, _, _, _, _, _, _, _, _, egpio), + [133] = PINGROUP(133, ddr_bist_start, _, _, _, _, _, _, _, _, _, egpio), + [134] = PINGROUP(134, _, _, _, _, _, _, _, _, _, _, egpio), + [135] = PINGROUP(135, _, _, _, _, _, _, _, _, _, _, egpio), + [136] = PINGROUP(136, _, _, _, _, _, _, _, _, _, _, egpio), + [137] = PINGROUP(137, ddr_bist_complete, _, _, _, _, _, _, _, _, _, egpio), + [138] = PINGROUP(138, _, phase_flag, qdss_gpio, _, _, _, _, _, _, _, egpio), + [139] = PINGROUP(139, _, phase_flag, qdss_gpio, _, _, _, _, _, _, _, egpio), + [140] = PINGROUP(140, _, phase_flag, qdss_gpio, _, _, _, _, _, _, _, egpio), + [141] = PINGROUP(141, jitter_bist, qdss_gpio, _, _, _, _, _, _, _, _, egpio), + [142] = PINGROUP(142, _, phase_flag, _, _, _, _, _, _, _, _, egpio), + [143] = PINGROUP(143, _, phase_flag, _, _, _, _, _, _, _, _, egpio), + [144] = PINGROUP(144, _, phase_flag, _, _, _, _, _, _, _, _, egpio), + [145] = PINGROUP(145, _, _, _, _, _, _, _, _, _, _, egpio), + [146] = PINGROUP(146, _, _, _, _, _, _, _, _, _, _, egpio), + [147] = PINGROUP(147, _, phase_flag, _, _, _, _, _, _, _, _, egpio), + [148] = PINGROUP(148, _, _, _, _, _, _, _, _, _, _, egpio), + [149] = PINGROUP(149, _, qdss_gpio, _, _, _, _, _, _, _, _, egpio), + [150] = PINGROUP(150, _, qdss_gpio, _, _, _, _, _, _, _, _, egpio), + [151] = PINGROUP(151, _, _, _, _, _, _, _, _, _, _, egpio), + [152] = PINGROUP(152, _, _, _, _, _, _, _, _, _, _, egpio), + [153] = PINGROUP(153, _, _, _, _, _, _, _, _, _, _, egpio), + [154] = PINGROUP(154, _, _, _, _, _, _, _, _, _, _, egpio), + [155] = PINGROUP(155, _, qdss_gpio, _, _, _, _, _, _, _, _, egpio), + [156] = PINGROUP(156, _, qdss_gpio, _, _, _, _, _, _, _, _, egpio), + [157] = PINGROUP(157, _, qdss_gpio, _, _, _, _, _, _, _, _, egpio), + [158] = PINGROUP(158, qdss_gpio, _, _, _, _, _, _, _, _, _, egpio), + [159] = PINGROUP(159, qdss_gpio, _, _, _, _, _, _, _, _, _, egpio), + [160] = PINGROUP(160, qdss_gpio, _, _, _, _, _, _, _, _, _, egpio), + [161] = PINGROUP(161, qdss_gpio, _, _, _, _, _, _, _, _, _, egpio), + [162] = PINGROUP(162, qdss_gpio, _, _, _, _, _, _, _, _, _, egpio), + [163] = PINGROUP(163, qdss_gpio, _, _, _, _, _, _, _, _, _, egpio), + [164] = PINGROUP(164, qdss_gpio, _, _, _, _, _, _, _, _, _, egpio), + [165] = PINGROUP(165, qdss_gpio, _, _, _, _, _, _, _, _, _, egpio), + [166] = PINGROUP(166, qdss_gpio, _, _, _, _, _, _, _, _, _, egpio), + [167] = UFS_RESET(ufs_reset, 0xb4004, 0xb5000), + [168] = SDC_QDSD_PINGROUP(sdc2_clk, 0xab000, 0, 6), + [169] = SDC_QDSD_PINGROUP(sdc2_cmd, 0xab000, 12, 3), + [170] = SDC_QDSD_PINGROUP(sdc2_data, 0xab000, 9, 0), +}; + +static const struct msm_gpio_wakeirq_map milos_pdc_map[] = { + { 0, 122 }, { 3, 95 }, { 4, 100 }, { 6, 52 }, { 7, 119 }, + { 8, 92 }, { 11, 54 }, { 12, 56 }, { 13, 64 }, { 14, 75 }, + { 15, 82 }, { 18, 89 }, { 19, 90 }, { 22, 93 }, { 23, 94 }, + { 26, 91 }, { 27, 57 }, { 30, 138 }, { 31, 96 }, { 32, 67 }, + { 34, 128 }, { 35, 98 }, { 36, 99 }, { 38, 101 }, { 39, 102 }, + { 40, 69 }, { 43, 103 }, { 44, 104 }, { 45, 126 }, { 47, 59 }, + { 48, 106 }, { 51, 107 }, { 52, 108 }, { 54, 110 }, { 55, 140 }, + { 56, 58 }, { 57, 129 }, { 58, 111 }, { 59, 112 }, { 60, 115 }, + { 61, 113 }, { 62, 114 }, { 64, 105 }, { 65, 55 }, { 67, 116 }, + { 68, 117 }, { 70, 120 }, { 71, 121 }, { 72, 97 }, { 73, 109 }, + { 74, 118 }, { 75, 132 }, { 76, 144 }, { 77, 127 }, { 78, 133 }, + { 79, 134 }, { 80, 135 }, { 81, 124 }, { 82, 136 }, { 87, 60 }, + { 91, 123 }, { 92, 125 }, { 95, 139 }, { 99, 53 }, { 103, 61 }, + { 104, 71 }, { 107, 137 }, { 113, 51 }, { 124, 72 }, { 125, 62 }, + { 126, 73 }, { 128, 63 }, { 129, 130 }, { 130, 65 }, { 131, 66 }, + { 133, 68 }, { 136, 70 }, { 143, 78 }, { 144, 79 }, { 145, 142 }, + { 148, 81 }, { 149, 76 }, { 150, 83 }, { 151, 84 }, { 153, 74 }, + { 155, 131 }, { 158, 85 }, { 159, 77 }, { 161, 80 }, { 162, 143 }, + { 163, 86 }, { 164, 87 }, { 166, 88 }, +}; + +static const struct msm_pinctrl_soc_data milos_tlmm = { + .pins = milos_pins, + .npins = ARRAY_SIZE(milos_pins), + .functions = milos_functions, + .nfunctions = ARRAY_SIZE(milos_functions), + .groups = milos_groups, + .ngroups = ARRAY_SIZE(milos_groups), + .ngpios = 168, + .wakeirq_map = milos_pdc_map, + .nwakeirq_map = ARRAY_SIZE(milos_pdc_map), + .egpio_func = 11, +}; + +static int milos_tlmm_probe(struct platform_device *pdev) +{ + return msm_pinctrl_probe(pdev, &milos_tlmm); +} + +static const struct of_device_id milos_tlmm_of_match[] = { + { .compatible = "qcom,milos-tlmm" }, + { /* sentinel */ } +}; + +static struct platform_driver milos_tlmm_driver = { + .driver = { + .name = "milos-tlmm", + .of_match_table = milos_tlmm_of_match, + }, + .probe = milos_tlmm_probe, +}; + +static int __init milos_tlmm_init(void) +{ + return platform_driver_register(&milos_tlmm_driver); +} +arch_initcall(milos_tlmm_init); + +static void __exit milos_tlmm_exit(void) +{ + platform_driver_unregister(&milos_tlmm_driver); +} +module_exit(milos_tlmm_exit); + +MODULE_DESCRIPTION("QTI Milos TLMM driver"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(of, milos_tlmm_of_match); From 912275c325f47dfa6a247fb845f0265e7dfa6ebd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Le=20Goffic?= Date: Fri, 11 Jul 2025 09:41:19 +0200 Subject: [PATCH 0650/2411] dt-bindings: pinctrl: stm32: Introduce HDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'HDP' stands for Hardware Debug Port, it is an hardware block in STMicrolectronics' MPUs that let the user decide which internal SoC's signal to observe. It provides 8 ports and for each port there is up to 16 different signals that can be output. Signals are different for each MPU. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Clément Le Goffic Link: https://lore.kernel.org/20250711-hdp-upstream-v7-1-faeecf7aaee1@foss.st.com [Fixed up Clement's new email address] Signed-off-by: Linus Walleij --- .../bindings/pinctrl/st,stm32-hdp.yaml | 187 ++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 Documentation/devicetree/bindings/pinctrl/st,stm32-hdp.yaml diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-hdp.yaml b/Documentation/devicetree/bindings/pinctrl/st,stm32-hdp.yaml new file mode 100644 index 000000000000..845b6b7b7552 --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-hdp.yaml @@ -0,0 +1,187 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) STMicroelectronics 2025. +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pinctrl/st,stm32-hdp.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: STM32 Hardware Debug Port Mux/Config + +maintainers: + - Clément LE GOFFIC + +description: + STMicroelectronics's STM32 MPUs integrate a Hardware Debug Port (HDP). + It allows to output internal signals on SoC's GPIO. + +properties: + compatible: + enum: + - st,stm32mp131-hdp + - st,stm32mp151-hdp + - st,stm32mp251-hdp + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + +patternProperties: + "^hdp[0-7]-pins$": + type: object + $ref: pinmux-node.yaml# + additionalProperties: false + + properties: + pins: + pattern: '^HDP[0-7]$' + + function: true + + required: + - function + - pins + +allOf: + - $ref: pinctrl.yaml# + - if: + properties: + compatible: + contains: + const: st,stm32mp131-hdp + then: + patternProperties: + "^hdp[0-7]-pins$": + properties: + function: + enum: [ pwr_pwrwake_sys, pwr_stop_forbidden, pwr_stdby_wakeup, pwr_encomp_vddcore, + bsec_out_sec_niden, aiec_sys_wakeup, none, ddrctrl_lp_req, + pwr_ddr_ret_enable_n, dts_clk_ptat, sram3ctrl_tamp_erase_act, gpoval0, + pwr_sel_vth_vddcpu, pwr_mpu_ram_lowspeed, ca7_naxierrirq, pwr_okin_mr, + bsec_out_sec_dbgen, aiec_c1_wakeup, rcc_pwrds_mpu, ddrctrl_dfi_ctrlupd_req, + ddrctrl_cactive_ddrc_asr, sram3ctrl_hw_erase_act, nic400_s0_bready, gpoval1, + pwr_pwrwake_mpu, pwr_mpu_clock_disable_ack, ca7_ndbgreset_i, + bsec_in_rstcore_n, bsec_out_sec_bsc_dis, ddrctrl_dfi_init_complete, + ddrctrl_perf_op_is_refresh, ddrctrl_gskp_dfi_lp_req, sram3ctrl_sw_erase_act, + nic400_s0_bvalid, gpoval2, pwr_sel_vth_vddcore, pwr_mpu_clock_disable_req, + ca7_npmuirq0, ca7_nfiqout0, bsec_out_sec_dftlock, bsec_out_sec_jtag_dis, + rcc_pwrds_sys, sram3ctrl_tamp_erase_req, ddrctrl_stat_ddrc_reg_selfref_type0, + dts_valobus1_0, dts_valobus2_0, tamp_potential_tamp_erfcfg, nic400_s0_wready, + nic400_s0_rready, gpoval3, pwr_stop2_active, ca7_nl2reset_i, + ca7_npreset_varm_i, bsec_out_sec_dften, bsec_out_sec_dbgswenable, + eth1_out_pmt_intr_o, eth2_out_pmt_intr_o, ddrctrl_stat_ddrc_reg_selfref_type1, + ddrctrl_cactive_0, dts_valobus1_1, dts_valobus2_1, tamp_nreset_sram_ercfg, + nic400_s0_wlast, nic400_s0_rlast, gpoval4, ca7_standbywfil2, + pwr_vth_vddcore_ack, ca7_ncorereset_i, ca7_nirqout0, bsec_in_pwrok, + bsec_out_sec_deviceen, eth1_out_lpi_intr_o, eth2_out_lpi_intr_o, + ddrctrl_cactive_ddrc, ddrctrl_wr_credit_cnt, dts_valobus1_2, dts_valobus2_2, + pka_pka_itamp_out, nic400_s0_wvalid, nic400_s0_rvalid, gpoval5, + ca7_standbywfe0, pwr_vth_vddcpu_ack, ca7_evento, bsec_in_tamper_det, + bsec_out_sec_spniden, eth1_out_mac_speed_o1, eth2_out_mac_speed_o1, + ddrctrl_csysack_ddrc, ddrctrl_lpr_credit_cnt, dts_valobus1_3, dts_valobus2_3, + saes_tamper_out, nic400_s0_awready, nic400_s0_arready, gpoval6, + ca7_standbywfi0, pwr_rcc_vcpu_rdy, ca7_eventi, ca7_dbgack0, bsec_out_fuse_ok, + bsec_out_sec_spiden, eth1_out_mac_speed_o0, eth2_out_mac_speed_o0, + ddrctrl_csysreq_ddrc, ddrctrl_hpr_credit_cnt, dts_valobus1_4, dts_valobus2_4, + rng_tamper_out, nic400_s0_awavalid, nic400_s0_aravalid, gpoval7 ] + - if: + properties: + compatible: + contains: + const: st,stm32mp151-hdp + then: + patternProperties: + "^hdp[0-7]-pins$": + properties: + function: + enum: [ pwr_pwrwake_sys, cm4_sleepdeep, pwr_stdby_wkup, pwr_encomp_vddcore, + bsec_out_sec_niden, none, rcc_cm4_sleepdeep, gpu_dbg7, ddrctrl_lp_req, + pwr_ddr_ret_enable_n, dts_clk_ptat, gpoval0, pwr_pwrwake_mcu, cm4_halted, + ca7_naxierrirq, pwr_okin_mr, bsec_out_sec_dbgen, exti_sys_wakeup, + rcc_pwrds_mpu, gpu_dbg6, ddrctrl_dfi_ctrlupd_req, ddrctrl_cactive_ddrc_asr, + gpoval1, pwr_pwrwake_mpu, cm4_rxev, ca7_npmuirq1, ca7_nfiqout1, + bsec_in_rstcore_n, exti_c2_wakeup, rcc_pwrds_mcu, gpu_dbg5, + ddrctrl_dfi_init_complete, ddrctrl_perf_op_is_refresh, + ddrctrl_gskp_dfi_lp_req, gpoval2, pwr_sel_vth_vddcore, cm4_txev, ca7_npmuirq0, + ca7_nfiqout0, bsec_out_sec_dftlock, exti_c1_wakeup, rcc_pwrds_sys, gpu_dbg4, + ddrctrl_stat_ddrc_reg_selfref_type0, ddrctrl_cactive_1, dts_valobus1_0, + dts_valobus2_0, gpoval3, pwr_mpu_pdds_not_cstbydis, cm4_sleeping, ca7_nreset1, + ca7_nirqout1, bsec_out_sec_dften, bsec_out_sec_dbgswenable, + eth_out_pmt_intr_o, gpu_dbg3, ddrctrl_stat_ddrc_reg_selfref_type1, + ddrctrl_cactive_0, dts_valobus1_1, dts_valobus2_1, gpoval4, ca7_standbywfil2, + pwr_vth_vddcore_ack, ca7_nreset0, ca7_nirqout0, bsec_in_pwrok, + bsec_out_sec_deviceen, eth_out_lpi_intr_o, gpu_dbg2, ddrctrl_cactive_ddrc, + ddrctrl_wr_credit_cnt, dts_valobus1_2, dts_valobus2_2, gpoval5, + ca7_standbywfi1, ca7_standbywfe1, ca7_evento, ca7_dbgack1, + bsec_out_sec_spniden, eth_out_mac_speed_o1, gpu_dbg1, ddrctrl_csysack_ddrc, + ddrctrl_lpr_credit_cnt, dts_valobus1_3, dts_valobus2_3, gpoval6, + ca7_standbywfi0, ca7_standbywfe0, ca7_dbgack0, bsec_out_fuse_ok, + bsec_out_sec_spiden, eth_out_mac_speed_o0, gpu_dbg0, ddrctrl_csysreq_ddrc, + ddrctrl_hpr_credit_cnt, dts_valobus1_4, dts_valobus2_4, gpoval7 ] + - if: + properties: + compatible: + contains: + const: st,stm32mp251-hdp + then: + patternProperties: + "^hdp[0-7]-pins$": + properties: + function: + enum: [ pwr_pwrwake_sys, cpu2_sleep_deep, bsec_out_tst_sdr_unlock_or_disable_scan, + bsec_out_nidenm, bsec_out_nidena, cpu2_state_0, rcc_pwrds_sys, gpu_dbg7, + ddrss_csysreq_ddrc, ddrss_dfi_phyupd_req, cpu3_sleep_deep, + d2_gbl_per_clk_bus_req, pcie_usb_cxpl_debug_info_ei_0, + pcie_usb_cxpl_debug_info_ei_8, d3_state_0, gpoval0, pwr_pwrwake_cpu2, + cpu2_halted, cpu2_state_1, bsec_out_dbgenm, bsec_out_dbgena, exti1_sys_wakeup, + rcc_pwrds_cpu2, gpu_dbg6, ddrss_csysack_ddrc, ddrss_dfi_phymstr_req, + cpu3_halted, d2_gbl_per_dma_req, pcie_usb_cxpl_debug_info_ei_1, + pcie_usb_cxpl_debug_info_ei_9, d3_state_1, gpoval1, pwr_pwrwake_cpu1, + cpu2_rxev, cpu1_npumirq1, cpu1_nfiqout1, bsec_out_shdbgen, exti1_cpu2_wakeup, + rcc_pwrds_cpu1, gpu_dbg5, ddrss_cactive_ddrc, ddrss_dfi_lp_req, cpu3_rxev, + hpdma1_clk_bus_req, pcie_usb_cxpl_debug_info_ei_2, + pcie_usb_cxpl_debug_info_ei_10, d3_state_2, gpoval2, pwr_sel_vth_vddcpu, + cpu2_txev, cpu1_npumirq0, cpu1_nfiqout0, bsec_out_ddbgen, exti1_cpu1_wakeup, + cpu3_state_0, gpu_dbg4, ddrss_mcdcg_en, ddrss_dfi_freq_0, cpu3_txev, + hpdma2_clk_bus_req, pcie_usb_cxpl_debug_info_ei_3, + pcie_usb_cxpl_debug_info_ei_11, d1_state_0, gpoval3, pwr_sel_vth_vddcore, + cpu2_sleeping, cpu1_evento, cpu1_nirqout1, bsec_out_spnidena, exti2_d3_wakeup, + eth1_out_pmt_intr_o, gpu_dbg3, ddrss_dphycg_en, ddrss_obsp0, cpu3_sleeping, + hpdma3_clk_bus_req, pcie_usb_cxpl_debug_info_ei_4, + pcie_usb_cxpl_debug_info_ei_12, d1_state_1, gpoval4, cpu1_standby_wfil2, + none, cpu1_nirqout0, bsec_out_spidena, exti2_cpu3_wakeup, eth1_out_lpi_intr_o, + gpu_dbg2, ddrctrl_dfi_init_start, ddrss_obsp1, cpu3_state_1, + d3_gbl_per_clk_bus_req, pcie_usb_cxpl_debug_info_ei_5, + pcie_usb_cxpl_debug_info_ei_13, d1_state_2, gpoval5, cpu1_standby_wfi1, + cpu1_standby_wfe1, cpu1_halted1, cpu1_naxierrirq, bsec_out_spnidenm, + exti2_cpu2_wakeup, eth2_out_pmt_intr_o, gpu_dbg1, ddrss_dfi_init_complete, + ddrss_obsp2, d2_state_0, d3_gbl_per_dma_req, pcie_usb_cxpl_debug_info_ei_6, + pcie_usb_cxpl_debug_info_ei_14, cpu1_state_0, gpoval6, cpu1_standby_wfi0, + cpu1_standby_wfe0, cpu1_halted0, bsec_out_spidenm, exti2_cpu1__wakeup, + eth2_out_lpi_intr_o, gpu_dbg0, ddrss_dfi_ctrlupd_req, ddrss_obsp3, d2_state_1, + lpdma1_clk_bus_req, pcie_usb_cxpl_debug_info_ei_7, + pcie_usb_cxpl_debug_info_ei_15, cpu1_state_1, gpoval7 ] + +required: + - compatible + - reg + - clocks + +additionalProperties: false + +examples: + - | + #include + + pinctrl@54090000 { + compatible = "st,stm32mp151-hdp"; + reg = <0x54090000 0x400>; + clocks = <&rcc HDP>; + pinctrl-names = "default"; + pinctrl-0 = <&hdp2_gpo>; + hdp2_gpo: hdp2-pins { + function = "gpoval2"; + pins = "HDP2"; + }; + }; From 8eabf5ddbb08c2261de839a97c4257b79a15f60f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Le=20Goffic?= Date: Fri, 11 Jul 2025 09:41:20 +0200 Subject: [PATCH 0651/2411] pinctrl: stm32: Introduce HDP driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduce the driver for the Hardware Debug Port available on STM32MP platforms. The HDP allows the observation of internal SoC signals by using multiplexers. Each HDP port can provide up to 16 internal signals (one of them can be software controlled as a GPO). Reviewed-by: Linus Walleij Signed-off-by: Clément Le Goffic Link: https://lore.kernel.org/20250711-hdp-upstream-v7-2-faeecf7aaee1@foss.st.com Signed-off-by: Linus Walleij --- drivers/pinctrl/stm32/Kconfig | 14 + drivers/pinctrl/stm32/Makefile | 1 + drivers/pinctrl/stm32/pinctrl-stm32-hdp.c | 720 ++++++++++++++++++++++ 3 files changed, 735 insertions(+) create mode 100644 drivers/pinctrl/stm32/pinctrl-stm32-hdp.c diff --git a/drivers/pinctrl/stm32/Kconfig b/drivers/pinctrl/stm32/Kconfig index 297a2f088bc1..5f67e1ee66dd 100644 --- a/drivers/pinctrl/stm32/Kconfig +++ b/drivers/pinctrl/stm32/Kconfig @@ -57,4 +57,18 @@ config PINCTRL_STM32MP257 depends on OF && HAS_IOMEM default MACH_STM32MP25 || (ARCH_STM32 && ARM64) select PINCTRL_STM32 + +config PINCTRL_STM32_HDP + tristate "STMicroelectronics STM32 Hardware Debug Port (HDP) pin control" + depends on OF && HAS_IOMEM + default ARCH_STM32 && !ARM_SINGLE_ARMV7M + select PINMUX + select GENERIC_PINCONF + select GPIOLIB + help + The Hardware Debug Port allows the observation of internal signals. + It uses configurable multiplexer to route signals in a dedicated observation register. + This driver also permits the observation of signals on external SoC pins. + It permits the observation of up to 16 signals per HDP line. + endif diff --git a/drivers/pinctrl/stm32/Makefile b/drivers/pinctrl/stm32/Makefile index 7b17464d8de1..98a1bbc7e16c 100644 --- a/drivers/pinctrl/stm32/Makefile +++ b/drivers/pinctrl/stm32/Makefile @@ -11,3 +11,4 @@ obj-$(CONFIG_PINCTRL_STM32H743) += pinctrl-stm32h743.o obj-$(CONFIG_PINCTRL_STM32MP135) += pinctrl-stm32mp135.o obj-$(CONFIG_PINCTRL_STM32MP157) += pinctrl-stm32mp157.o obj-$(CONFIG_PINCTRL_STM32MP257) += pinctrl-stm32mp257.o +obj-$(CONFIG_PINCTRL_STM32_HDP) += pinctrl-stm32-hdp.o diff --git a/drivers/pinctrl/stm32/pinctrl-stm32-hdp.c b/drivers/pinctrl/stm32/pinctrl-stm32-hdp.c new file mode 100644 index 000000000000..e91442eb566b --- /dev/null +++ b/drivers/pinctrl/stm32/pinctrl-stm32-hdp.c @@ -0,0 +1,720 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) STMicroelectronics 2025 - All Rights Reserved + * Author: Clément Le Goffic for STMicroelectronics. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../core.h" + +#define DRIVER_NAME "stm32_hdp" +#define HDP_CTRL_ENABLE 1 +#define HDP_CTRL_DISABLE 0 + +#define HDP_CTRL 0x000 +#define HDP_MUX 0x004 +#define HDP_VAL 0x010 +#define HDP_GPOSET 0x014 +#define HDP_GPOCLR 0x018 +#define HDP_GPOVAL 0x01c +#define HDP_VERR 0x3f4 +#define HDP_IPIDR 0x3f8 +#define HDP_SIDR 0x3fc + +#define HDP_MUX_SHIFT(n) ((n) * 4) +#define HDP_MUX_MASK(n) (GENMASK(3, 0) << HDP_MUX_SHIFT(n)) +#define HDP_MUX_GPOVAL(n) (0xf << HDP_MUX_SHIFT(n)) + +#define HDP_PIN 8 +#define HDP_FUNC 16 +#define HDP_FUNC_TOTAL (HDP_PIN * HDP_FUNC) + +struct stm32_hdp { + struct device *dev; + void __iomem *base; + struct clk *clk; + struct pinctrl_dev *pctl_dev; + struct gpio_chip gpio_chip; + u32 mux_conf; + u32 gposet_conf; + const char * const *func_name; +}; + +static const struct pinctrl_pin_desc stm32_hdp_pins[] = { + PINCTRL_PIN(0, "HDP0"), + PINCTRL_PIN(1, "HDP1"), + PINCTRL_PIN(2, "HDP2"), + PINCTRL_PIN(3, "HDP3"), + PINCTRL_PIN(4, "HDP4"), + PINCTRL_PIN(5, "HDP5"), + PINCTRL_PIN(6, "HDP6"), + PINCTRL_PIN(7, "HDP7"), +}; + +static const char * const func_name_mp13[] = { + //HDP0 functions: + "pwr_pwrwake_sys", + "pwr_stop_forbidden", + "pwr_stdby_wakeup", + "pwr_encomp_vddcore", + "bsec_out_sec_niden", + "aiec_sys_wakeup", + "none", + "none", + "ddrctrl_lp_req", + "pwr_ddr_ret_enable_n", + "dts_clk_ptat", + "none", + "sram3ctrl_tamp_erase_act", + "none", + "none", + "gpoval0", + //HDP1 functions: + "pwr_sel_vth_vddcpu", + "pwr_mpu_ram_lowspeed", + "ca7_naxierrirq", + "pwr_okin_mr", + "bsec_out_sec_dbgen", + "aiec_c1_wakeup", + "rcc_pwrds_mpu", + "none", + "ddrctrl_dfi_ctrlupd_req", + "ddrctrl_cactive_ddrc_asr", + "none", + "none", + "sram3ctrl_hw_erase_act", + "nic400_s0_bready", + "none", + "gpoval1", + //HDP2 functions: + "pwr_pwrwake_mpu", + "pwr_mpu_clock_disable_ack", + "ca7_ndbgreset_i", + "none", + "bsec_in_rstcore_n", + "bsec_out_sec_bsc_dis", + "none", + "none", + "ddrctrl_dfi_init_complete", + "ddrctrl_perf_op_is_refresh", + "ddrctrl_gskp_dfi_lp_req", + "none", + "sram3ctrl_sw_erase_act", + "nic400_s0_bvalid", + "none", + "gpoval2", + //HDP3 functions: + "pwr_sel_vth_vddcore", + "pwr_mpu_clock_disable_req", + "ca7_npmuirq0", + "ca7_nfiqout0", + "bsec_out_sec_dftlock", + "bsec_out_sec_jtag_dis", + "rcc_pwrds_sys", + "sram3ctrl_tamp_erase_req", + "ddrctrl_stat_ddrc_reg_selfref_type0", + "none", + "dts_valobus1_0", + "dts_valobus2_0", + "tamp_potential_tamp_erfcfg", + "nic400_s0_wready", + "nic400_s0_rready", + "gpoval3", + //HDP4 functions: + "none", + "pwr_stop2_active", + "ca7_nl2reset_i", + "ca7_npreset_varm_i", + "bsec_out_sec_dften", + "bsec_out_sec_dbgswenable", + "eth1_out_pmt_intr_o", + "eth2_out_pmt_intr_o", + "ddrctrl_stat_ddrc_reg_selfref_type1", + "ddrctrl_cactive_0", + "dts_valobus1_1", + "dts_valobus2_1", + "tamp_nreset_sram_ercfg", + "nic400_s0_wlast", + "nic400_s0_rlast", + "gpoval4", + //HDP5 functions: + "ca7_standbywfil2", + "pwr_vth_vddcore_ack", + "ca7_ncorereset_i", + "ca7_nirqout0", + "bsec_in_pwrok", + "bsec_out_sec_deviceen", + "eth1_out_lpi_intr_o", + "eth2_out_lpi_intr_o", + "ddrctrl_cactive_ddrc", + "ddrctrl_wr_credit_cnt", + "dts_valobus1_2", + "dts_valobus2_2", + "pka_pka_itamp_out", + "nic400_s0_wvalid", + "nic400_s0_rvalid", + "gpoval5", + //HDP6 functions: + "ca7_standbywfe0", + "pwr_vth_vddcpu_ack", + "ca7_evento", + "none", + "bsec_in_tamper_det", + "bsec_out_sec_spniden", + "eth1_out_mac_speed_o1", + "eth2_out_mac_speed_o1", + "ddrctrl_csysack_ddrc", + "ddrctrl_lpr_credit_cnt", + "dts_valobus1_3", + "dts_valobus2_3", + "saes_tamper_out", + "nic400_s0_awready", + "nic400_s0_arready", + "gpoval6", + //HDP7 functions: + "ca7_standbywfi0", + "pwr_rcc_vcpu_rdy", + "ca7_eventi", + "ca7_dbgack0", + "bsec_out_fuse_ok", + "bsec_out_sec_spiden", + "eth1_out_mac_speed_o0", + "eth2_out_mac_speed_o0", + "ddrctrl_csysreq_ddrc", + "ddrctrl_hpr_credit_cnt", + "dts_valobus1_4", + "dts_valobus2_4", + "rng_tamper_out", + "nic400_s0_awavalid", + "nic400_s0_aravalid", + "gpoval7", +}; + +static const char * const func_name_mp15[] = { + //HDP0 functions: + "pwr_pwrwake_sys", + "cm4_sleepdeep", + "pwr_stdby_wkup", + "pwr_encomp_vddcore", + "bsec_out_sec_niden", + "none", + "rcc_cm4_sleepdeep", + "gpu_dbg7", + "ddrctrl_lp_req", + "pwr_ddr_ret_enable_n", + "dts_clk_ptat", + "none", + "none", + "none", + "none", + "gpoval0", + //HDP1 functions: + "pwr_pwrwake_mcu", + "cm4_halted", + "ca7_naxierrirq", + "pwr_okin_mr", + "bsec_out_sec_dbgen", + "exti_sys_wakeup", + "rcc_pwrds_mpu", + "gpu_dbg6", + "ddrctrl_dfi_ctrlupd_req", + "ddrctrl_cactive_ddrc_asr", + "none", + "none", + "none", + "none", + "none", + "gpoval1", + //HDP2 functions: + "pwr_pwrwake_mpu", + "cm4_rxev", + "ca7_npmuirq1", + "ca7_nfiqout1", + "bsec_in_rstcore_n", + "exti_c2_wakeup", + "rcc_pwrds_mcu", + "gpu_dbg5", + "ddrctrl_dfi_init_complete", + "ddrctrl_perf_op_is_refresh", + "ddrctrl_gskp_dfi_lp_req", + "none", + "none", + "none", + "none", + "gpoval2", + //HDP3 functions: + "pwr_sel_vth_vddcore", + "cm4_txev", + "ca7_npmuirq0", + "ca7_nfiqout0", + "bsec_out_sec_dftlock", + "exti_c1_wakeup", + "rcc_pwrds_sys", + "gpu_dbg4", + "ddrctrl_stat_ddrc_reg_selfref_type0", + "ddrctrl_cactive_1", + "dts_valobus1_0", + "dts_valobus2_0", + "none", + "none", + "none", + "gpoval3", + //HDP4 functions: + "pwr_mpu_pdds_not_cstbydis", + "cm4_sleeping", + "ca7_nreset1", + "ca7_nirqout1", + "bsec_out_sec_dften", + "bsec_out_sec_dbgswenable", + "eth_out_pmt_intr_o", + "gpu_dbg3", + "ddrctrl_stat_ddrc_reg_selfref_type1", + "ddrctrl_cactive_0", + "dts_valobus1_1", + "dts_valobus2_1", + "none", + "none", + "none", + "gpoval4", + //HDP5 functions: + "ca7_standbywfil2", + "pwr_vth_vddcore_ack", + "ca7_nreset0", + "ca7_nirqout0", + "bsec_in_pwrok", + "bsec_out_sec_deviceen", + "eth_out_lpi_intr_o", + "gpu_dbg2", + "ddrctrl_cactive_ddrc", + "ddrctrl_wr_credit_cnt", + "dts_valobus1_2", + "dts_valobus2_2", + "none", + "none", + "none", + "gpoval5", + //HDP6 functions: + "ca7_standbywfi1", + "ca7_standbywfe1", + "ca7_evento", + "ca7_dbgack1", + "none", + "bsec_out_sec_spniden", + "eth_out_mac_speed_o1", + "gpu_dbg1", + "ddrctrl_csysack_ddrc", + "ddrctrl_lpr_credit_cnt", + "dts_valobus1_3", + "dts_valobus2_3", + "none", + "none", + "none", + "gpoval6", + //HDP7 functions: + "ca7_standbywfi0", + "ca7_standbywfe0", + "none", + "ca7_dbgack0", + "bsec_out_fuse_ok", + "bsec_out_sec_spiden", + "eth_out_mac_speed_o0", + "gpu_dbg0", + "ddrctrl_csysreq_ddrc", + "ddrctrl_hpr_credit_cnt", + "dts_valobus1_4", + "dts_valobus2_4", + "none", + "none", + "none", + "gpoval7" +}; + +static const char * const func_name_mp25[] = { + //HDP0 functions: + "pwr_pwrwake_sys", + "cpu2_sleep_deep", + "bsec_out_tst_sdr_unlock_or_disable_scan", + "bsec_out_nidenm", + "bsec_out_nidena", + "cpu2_state_0", + "rcc_pwrds_sys", + "gpu_dbg7", + "ddrss_csysreq_ddrc", + "ddrss_dfi_phyupd_req", + "cpu3_sleep_deep", + "d2_gbl_per_clk_bus_req", + "pcie_usb_cxpl_debug_info_ei_0", + "pcie_usb_cxpl_debug_info_ei_8", + "d3_state_0", + "gpoval0", + //HDP1 functions: + "pwr_pwrwake_cpu2", + "cpu2_halted", + "cpu2_state_1", + "bsec_out_dbgenm", + "bsec_out_dbgena", + "exti1_sys_wakeup", + "rcc_pwrds_cpu2", + "gpu_dbg6", + "ddrss_csysack_ddrc", + "ddrss_dfi_phymstr_req", + "cpu3_halted", + "d2_gbl_per_dma_req", + "pcie_usb_cxpl_debug_info_ei_1", + "pcie_usb_cxpl_debug_info_ei_9", + "d3_state_1", + "gpoval1", + //HDP2 functions: + "pwr_pwrwake_cpu1", + "cpu2_rxev", + "cpu1_npumirq1", + "cpu1_nfiqout1", + "bsec_out_shdbgen", + "exti1_cpu2_wakeup", + "rcc_pwrds_cpu1", + "gpu_dbg5", + "ddrss_cactive_ddrc", + "ddrss_dfi_lp_req", + "cpu3_rxev", + "hpdma1_clk_bus_req", + "pcie_usb_cxpl_debug_info_ei_2", + "pcie_usb_cxpl_debug_info_ei_10", + "d3_state_2", + "gpoval2", + //HDP3 functions: + "pwr_sel_vth_vddcpu", + "cpu2_txev", + "cpu1_npumirq0", + "cpu1_nfiqout0", + "bsec_out_ddbgen", + "exti1_cpu1_wakeup", + "cpu3_state_0", + "gpu_dbg4", + "ddrss_mcdcg_en", + "ddrss_dfi_freq_0", + "cpu3_txev", + "hpdma2_clk_bus_req", + "pcie_usb_cxpl_debug_info_ei_3", + "pcie_usb_cxpl_debug_info_ei_11", + "d1_state_0", + "gpoval3", + //HDP4 functions: + "pwr_sel_vth_vddcore", + "cpu2_sleeping", + "cpu1_evento", + "cpu1_nirqout1", + "bsec_out_spnidena", + "exti2_d3_wakeup", + "eth1_out_pmt_intr_o", + "gpu_dbg3", + "ddrss_dphycg_en", + "ddrss_obsp0", + "cpu3_sleeping", + "hpdma3_clk_bus_req", + "pcie_usb_cxpl_debug_info_ei_4", + "pcie_usb_cxpl_debug_info_ei_12", + "d1_state_1", + "gpoval4", + //HDP5 functions: + "cpu1_standby_wfil2", + "none", + "none", + "cpu1_nirqout0", + "bsec_out_spidena", + "exti2_cpu3_wakeup", + "eth1_out_lpi_intr_o", + "gpu_dbg2", + "ddrctrl_dfi_init_start", + "ddrss_obsp1", + "cpu3_state_1", + "d3_gbl_per_clk_bus_req", + "pcie_usb_cxpl_debug_info_ei_5", + "pcie_usb_cxpl_debug_info_ei_13", + "d1_state_2", + "gpoval5", + //HDP6 functions: + "cpu1_standby_wfi1", + "cpu1_standby_wfe1", + "cpu1_halted1", + "cpu1_naxierrirq", + "bsec_out_spnidenm", + "exti2_cpu2_wakeup", + "eth2_out_pmt_intr_o", + "gpu_dbg1", + "ddrss_dfi_init_complete", + "ddrss_obsp2", + "d2_state_0", + "d3_gbl_per_dma_req", + "pcie_usb_cxpl_debug_info_ei_6", + "pcie_usb_cxpl_debug_info_ei_14", + "cpu1_state_0", + "gpoval6", + //HDP7 functions: + "cpu1_standby_wfi0", + "cpu1_standby_wfe0", + "cpu1_halted0", + "none", + "bsec_out_spidenm", + "exti2_cpu1__wakeup", + "eth2_out_lpi_intr_o", + "gpu_dbg0", + "ddrss_dfi_ctrlupd_req", + "ddrss_obsp3", + "d2_state_1", + "lpdma1_clk_bus_req", + "pcie_usb_cxpl_debug_info_ei_7", + "pcie_usb_cxpl_debug_info_ei_15", + "cpu1_state_1", + "gpoval7", +}; + +static const char * const stm32_hdp_pins_group[] = { + "HDP0", + "HDP1", + "HDP2", + "HDP3", + "HDP4", + "HDP5", + "HDP6", + "HDP7" +}; + +static int stm32_hdp_gpio_get_direction(struct gpio_chip *gc, unsigned int offset) +{ + return GPIO_LINE_DIRECTION_OUT; +} + +static int stm32_hdp_pinctrl_get_groups_count(struct pinctrl_dev *pctldev) +{ + return ARRAY_SIZE(stm32_hdp_pins); +} + +static const char *stm32_hdp_pinctrl_get_group_name(struct pinctrl_dev *pctldev, + unsigned int selector) +{ + return stm32_hdp_pins[selector].name; +} + +static int stm32_hdp_pinctrl_get_group_pins(struct pinctrl_dev *pctldev, unsigned int selector, + const unsigned int **pins, unsigned int *num_pins) +{ + *pins = &stm32_hdp_pins[selector].number; + *num_pins = 1; + + return 0; +} + +static const struct pinctrl_ops stm32_hdp_pinctrl_ops = { + .get_groups_count = stm32_hdp_pinctrl_get_groups_count, + .get_group_name = stm32_hdp_pinctrl_get_group_name, + .get_group_pins = stm32_hdp_pinctrl_get_group_pins, + .dt_node_to_map = pinconf_generic_dt_node_to_map_all, + .dt_free_map = pinconf_generic_dt_free_map, +}; + +static int stm32_hdp_pinmux_get_functions_count(struct pinctrl_dev *pctldev) +{ + return HDP_FUNC_TOTAL; +} + +static const char *stm32_hdp_pinmux_get_function_name(struct pinctrl_dev *pctldev, + unsigned int selector) +{ + struct stm32_hdp *hdp = pinctrl_dev_get_drvdata(pctldev); + + return hdp->func_name[selector]; +} + +static int stm32_hdp_pinmux_get_function_groups(struct pinctrl_dev *pctldev, unsigned int selector, + const char *const **groups, + unsigned int *num_groups) +{ + u32 index = selector / HDP_FUNC; + + *groups = &stm32_hdp_pins[index].name; + *num_groups = 1; + + return 0; +} + +static int stm32_hdp_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned int func_selector, + unsigned int group_selector) +{ + struct stm32_hdp *hdp = pinctrl_dev_get_drvdata(pctldev); + + unsigned int pin = stm32_hdp_pins[group_selector].number; + u32 mux; + + func_selector %= HDP_FUNC; + mux = readl_relaxed(hdp->base + HDP_MUX); + mux &= ~HDP_MUX_MASK(pin); + mux |= func_selector << HDP_MUX_SHIFT(pin); + + writel_relaxed(mux, hdp->base + HDP_MUX); + hdp->mux_conf = mux; + + return 0; +} + +static const struct pinmux_ops stm32_hdp_pinmux_ops = { + .get_functions_count = stm32_hdp_pinmux_get_functions_count, + .get_function_name = stm32_hdp_pinmux_get_function_name, + .get_function_groups = stm32_hdp_pinmux_get_function_groups, + .set_mux = stm32_hdp_pinmux_set_mux, + .gpio_set_direction = NULL, +}; + +static struct pinctrl_desc stm32_hdp_pdesc = { + .name = DRIVER_NAME, + .pins = stm32_hdp_pins, + .npins = ARRAY_SIZE(stm32_hdp_pins), + .pctlops = &stm32_hdp_pinctrl_ops, + .pmxops = &stm32_hdp_pinmux_ops, + .owner = THIS_MODULE, +}; + +static const struct of_device_id stm32_hdp_of_match[] = { + { + .compatible = "st,stm32mp131-hdp", + .data = &func_name_mp13, + }, + { + .compatible = "st,stm32mp151-hdp", + .data = &func_name_mp15, + }, + { + .compatible = "st,stm32mp251-hdp", + .data = &func_name_mp25, + }, + {} +}; +MODULE_DEVICE_TABLE(of, stm32_hdp_of_match); + +static int stm32_hdp_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct stm32_hdp *hdp; + u8 version; + int err; + + hdp = devm_kzalloc(dev, sizeof(*hdp), GFP_KERNEL); + if (!hdp) + return -ENOMEM; + hdp->dev = dev; + + platform_set_drvdata(pdev, hdp); + + hdp->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(hdp->base)) + return PTR_ERR(hdp->base); + + hdp->func_name = of_device_get_match_data(dev); + if (!hdp->func_name) + return dev_err_probe(dev, -ENODEV, "No function name provided\n"); + + hdp->clk = devm_clk_get_enabled(dev, NULL); + if (IS_ERR(hdp->clk)) + return dev_err_probe(dev, PTR_ERR(hdp->clk), "No HDP clock provided\n"); + + err = devm_pinctrl_register_and_init(dev, &stm32_hdp_pdesc, hdp, &hdp->pctl_dev); + if (err) + return dev_err_probe(dev, err, "Failed to register pinctrl\n"); + + err = pinctrl_enable(hdp->pctl_dev); + if (err) + return dev_err_probe(dev, err, "Failed to enable pinctrl\n"); + + hdp->gpio_chip.get_direction = stm32_hdp_gpio_get_direction; + hdp->gpio_chip.ngpio = ARRAY_SIZE(stm32_hdp_pins); + hdp->gpio_chip.can_sleep = true; + hdp->gpio_chip.names = stm32_hdp_pins_group; + + err = bgpio_init(&hdp->gpio_chip, dev, 4, + hdp->base + HDP_GPOVAL, + hdp->base + HDP_GPOSET, + hdp->base + HDP_GPOCLR, + NULL, NULL, BGPIOF_NO_INPUT); + if (err) + return dev_err_probe(dev, err, "Failed to init bgpio\n"); + + + err = devm_gpiochip_add_data(dev, &hdp->gpio_chip, hdp); + if (err) + return dev_err_probe(dev, err, "Failed to add gpiochip\n"); + + writel_relaxed(HDP_CTRL_ENABLE, hdp->base + HDP_CTRL); + + version = readl_relaxed(hdp->base + HDP_VERR); + dev_dbg(dev, "STM32 HDP version %u.%u initialized\n", version >> 4, version & 0x0f); + + return 0; +} + +static void stm32_hdp_remove(struct platform_device *pdev) +{ + struct stm32_hdp *hdp = platform_get_drvdata(pdev); + + writel_relaxed(HDP_CTRL_DISABLE, hdp->base + HDP_CTRL); +} + +static int stm32_hdp_suspend(struct device *dev) +{ + struct stm32_hdp *hdp = dev_get_drvdata(dev); + + hdp->gposet_conf = readl_relaxed(hdp->base + HDP_GPOSET); + + pinctrl_pm_select_sleep_state(dev); + + clk_disable_unprepare(hdp->clk); + + return 0; +} + +static int stm32_hdp_resume(struct device *dev) +{ + struct stm32_hdp *hdp = dev_get_drvdata(dev); + int err; + + err = clk_prepare_enable(hdp->clk); + if (err) { + dev_err(dev, "Failed to prepare_enable clk (%d)\n", err); + return err; + } + + writel_relaxed(HDP_CTRL_ENABLE, hdp->base + HDP_CTRL); + writel_relaxed(hdp->gposet_conf, hdp->base + HDP_GPOSET); + writel_relaxed(hdp->mux_conf, hdp->base + HDP_MUX); + + pinctrl_pm_select_default_state(dev); + + return 0; +} + +static DEFINE_SIMPLE_DEV_PM_OPS(stm32_hdp_pm_ops, stm32_hdp_suspend, stm32_hdp_resume); + +static struct platform_driver stm32_hdp_driver = { + .probe = stm32_hdp_probe, + .remove = stm32_hdp_remove, + .driver = { + .name = DRIVER_NAME, + .pm = pm_sleep_ptr(&stm32_hdp_pm_ops), + .of_match_table = stm32_hdp_of_match, + } +}; + +module_platform_driver(stm32_hdp_driver); + +MODULE_AUTHOR("Clément Le Goffic"); +MODULE_DESCRIPTION("STMicroelectronics STM32 Hardware Debug Port driver"); +MODULE_LICENSE("GPL"); From ebbe8bfe07f0c7c09276c12bfd65c8fd9941b06a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Le=20Goffic?= Date: Fri, 11 Jul 2025 09:41:21 +0200 Subject: [PATCH 0652/2411] =?UTF-8?q?MAINTAINERS:=20add=20Cl=C3=A9ment=20L?= =?UTF-8?q?e=20Goffic=20as=20STM32=20HDP=20maintainer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Clément Le Goffic as STM32 HDP maintainer. Signed-off-by: Clément Le Goffic Link: https://lore.kernel.org/20250711-hdp-upstream-v7-3-faeecf7aaee1@foss.st.com Signed-off-by: Linus Walleij --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 13b1226cc4b9..6576fb441541 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23458,6 +23458,12 @@ F: drivers/bus/stm32_etzpc.c F: drivers/bus/stm32_firewall.c F: drivers/bus/stm32_rifsc.c +ST STM32 HDP PINCTRL DRIVER +M: Clément Le Goffic +S: Maintained +F: Documentation/devicetree/bindings/pinctrl/st,stm32-hdp.yaml +F: drivers/pinctrl/stm32/pinctrl-stm32-hdp.c + ST STM32 I2C/SMBUS DRIVER M: Pierre-Yves MORDRET M: Alain Volmat From 5a8f77e24a30bbce2fa57926f3dede84894fd10a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 2 Jul 2025 11:35:18 +0200 Subject: [PATCH 0653/2411] PCI/IOV: Restore VF resizable BAR state after reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to regular resizable BARs, VF BARs can also be resized, e.g. by the system firmware or the PCI subsystem itself. The capability layout is the same as PCI_EXT_CAP_ID_REBAR. Add the capability ID and restore it as a part of IOV state. See PCIe r6.2, sec 7.8.7. Signed-off-by: Michał Winiarski Signed-off-by: Bjorn Helgaas Reviewed-by: Ilpo Järvinen Reviewed-by: Christian König Link: https://patch.msgid.link/20250702093522.518099-2-michal.winiarski@intel.com --- drivers/pci/iov.c | 30 +++++++++++++++++++++++++++++- drivers/pci/pci.h | 12 ++++++++++++ include/uapi/linux/pci_regs.h | 9 +++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 10693b5d7eb6..10ccef8afe14 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -7,6 +7,7 @@ * Copyright (C) 2009 Intel Corporation, Yu Zhao */ +#include #include #include #include @@ -850,6 +851,7 @@ static int sriov_init(struct pci_dev *dev, int pos) pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link); if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) iov->link = PCI_DEVFN(PCI_SLOT(dev->devfn), iov->link); + iov->vf_rebar_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_VF_REBAR); if (pdev) iov->dev = pci_dev_get(pdev); @@ -888,6 +890,30 @@ static void sriov_release(struct pci_dev *dev) dev->sriov = NULL; } +static void sriov_restore_vf_rebar_state(struct pci_dev *dev) +{ + unsigned int pos, nbars, i; + u32 ctrl; + + pos = pci_iov_vf_rebar_cap(dev); + if (!pos) + return; + + pci_read_config_dword(dev, pos + PCI_VF_REBAR_CTRL, &ctrl); + nbars = FIELD_GET(PCI_VF_REBAR_CTRL_NBAR_MASK, ctrl); + + for (i = 0; i < nbars; i++, pos += 8) { + int bar_idx, size; + + pci_read_config_dword(dev, pos + PCI_VF_REBAR_CTRL, &ctrl); + bar_idx = FIELD_GET(PCI_VF_REBAR_CTRL_BAR_IDX, ctrl); + size = pci_rebar_bytes_to_size(dev->sriov->barsz[bar_idx]); + ctrl &= ~PCI_VF_REBAR_CTRL_BAR_SIZE; + ctrl |= FIELD_PREP(PCI_VF_REBAR_CTRL_BAR_SIZE, size); + pci_write_config_dword(dev, pos + PCI_VF_REBAR_CTRL, ctrl); + } +} + static void sriov_restore_state(struct pci_dev *dev) { int i; @@ -1047,8 +1073,10 @@ resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno) */ void pci_restore_iov_state(struct pci_dev *dev) { - if (dev->is_physfn) + if (dev->is_physfn) { + sriov_restore_vf_rebar_state(dev); sriov_restore_state(dev); + } } /** diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 12215ee72afb..69258c445b28 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -486,6 +486,7 @@ struct pci_sriov { u16 subsystem_vendor; /* VF subsystem vendor */ u16 subsystem_device; /* VF subsystem device */ resource_size_t barsz[PCI_SRIOV_NUM_BARS]; /* VF BAR size */ + u16 vf_rebar_cap; /* VF Resizable BAR capability offset */ bool drivers_autoprobe; /* Auto probing of VFs by driver */ }; @@ -710,6 +711,13 @@ void pci_iov_update_resource(struct pci_dev *dev, int resno); resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno); void pci_restore_iov_state(struct pci_dev *dev); int pci_iov_bus_range(struct pci_bus *bus); +static inline u16 pci_iov_vf_rebar_cap(struct pci_dev *dev) +{ + if (!dev->is_physfn) + return 0; + + return dev->sriov->vf_rebar_cap; +} static inline bool pci_resource_is_iov(int resno) { return resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END; @@ -734,6 +742,10 @@ static inline int pci_iov_bus_range(struct pci_bus *bus) { return 0; } +static inline u16 pci_iov_vf_rebar_cap(struct pci_dev *dev) +{ + return 0; +} static inline bool pci_resource_is_iov(int resno) { return false; diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index a3a3e942dedf..f5b17745de60 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -745,6 +745,7 @@ #define PCI_EXT_CAP_ID_L1SS 0x1E /* L1 PM Substates */ #define PCI_EXT_CAP_ID_PTM 0x1F /* Precision Time Measurement */ #define PCI_EXT_CAP_ID_DVSEC 0x23 /* Designated Vendor-Specific */ +#define PCI_EXT_CAP_ID_VF_REBAR 0x24 /* VF Resizable BAR */ #define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */ #define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0 GT/s */ #define PCI_EXT_CAP_ID_NPEM 0x29 /* Native PCIe Enclosure Management */ @@ -1141,6 +1142,14 @@ #define PCI_DVSEC_HEADER2 0x8 /* Designated Vendor-Specific Header2 */ #define PCI_DVSEC_HEADER2_ID(x) ((x) & 0xffff) +/* VF Resizable BARs, same layout as PCI_REBAR */ +#define PCI_VF_REBAR_CAP PCI_REBAR_CAP +#define PCI_VF_REBAR_CAP_SIZES PCI_REBAR_CAP_SIZES +#define PCI_VF_REBAR_CTRL PCI_REBAR_CTRL +#define PCI_VF_REBAR_CTRL_BAR_IDX PCI_REBAR_CTRL_BAR_IDX +#define PCI_VF_REBAR_CTRL_NBAR_MASK PCI_REBAR_CTRL_NBAR_MASK +#define PCI_VF_REBAR_CTRL_BAR_SIZE PCI_REBAR_CTRL_BAR_SIZE + /* Data Link Feature */ #define PCI_DLF_CAP 0x04 /* Capabilities Register */ #define PCI_DLF_EXCHANGE_ENABLE 0x80000000 /* Data Link Feature Exchange Enable */ From 535bdbeaacf96a8c4ef8d726382fb8fae97f168f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 2 Jul 2025 11:35:19 +0200 Subject: [PATCH 0654/2411] PCI/IOV: Add pci_resource_num_to_vf_bar() to convert VF BAR number to/from IOV resource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are multiple places where conversions between IOV resources and corresponding VF BAR numbers are done. Extract the logic to pci_resource_num_from_vf_bar() and pci_resource_num_to_vf_bar() helpers. Suggested-by: Ilpo Järvinen Signed-off-by: Michał Winiarski Signed-off-by: Bjorn Helgaas Reviewed-by: Ilpo Järvinen Acked-by: Christian König Link: https://patch.msgid.link/20250702093522.518099-3-michal.winiarski@intel.com --- drivers/pci/iov.c | 26 ++++++++++++++++---------- drivers/pci/pci.h | 18 ++++++++++++++++++ drivers/pci/setup-bus.c | 3 ++- 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 10ccef8afe14..bdac07804552 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -151,7 +151,7 @@ resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno) if (!dev->is_physfn) return 0; - return dev->sriov->barsz[resno - PCI_IOV_RESOURCES]; + return dev->sriov->barsz[pci_resource_num_to_vf_bar(resno)]; } static void pci_read_vf_config_common(struct pci_dev *virtfn) @@ -342,12 +342,14 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id) virtfn->multifunction = 0; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &dev->resource[i + PCI_IOV_RESOURCES]; + int idx = pci_resource_num_from_vf_bar(i); + + res = &dev->resource[idx]; if (!res->parent) continue; virtfn->resource[i].name = pci_name(virtfn); virtfn->resource[i].flags = res->flags; - size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); + size = pci_iov_resource_size(dev, idx); resource_set_range(&virtfn->resource[i], res->start + size * id, size); rc = request_resource(res, &virtfn->resource[i]); @@ -644,8 +646,10 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) nres = 0; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - bars |= (1 << (i + PCI_IOV_RESOURCES)); - res = &dev->resource[i + PCI_IOV_RESOURCES]; + int idx = pci_resource_num_from_vf_bar(i); + + bars |= (1 << idx); + res = &dev->resource[idx]; if (res->parent) nres++; } @@ -811,8 +815,10 @@ static int sriov_init(struct pci_dev *dev, int pos) nres = 0; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &dev->resource[i + PCI_IOV_RESOURCES]; - res_name = pci_resource_name(dev, i + PCI_IOV_RESOURCES); + int idx = pci_resource_num_from_vf_bar(i); + + res = &dev->resource[idx]; + res_name = pci_resource_name(dev, idx); /* * If it is already FIXED, don't change it, something @@ -871,7 +877,7 @@ static int sriov_init(struct pci_dev *dev, int pos) dev->is_physfn = 0; failed: for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &dev->resource[i + PCI_IOV_RESOURCES]; + res = &dev->resource[pci_resource_num_from_vf_bar(i)]; res->flags = 0; } @@ -933,7 +939,7 @@ static void sriov_restore_state(struct pci_dev *dev) pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, ctrl); for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) - pci_update_resource(dev, i + PCI_IOV_RESOURCES); + pci_update_resource(dev, pci_resource_num_from_vf_bar(i)); pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz); pci_iov_set_numvfs(dev, iov->num_VFs); @@ -999,7 +1005,7 @@ void pci_iov_update_resource(struct pci_dev *dev, int resno) { struct pci_sriov *iov = dev->is_physfn ? dev->sriov : NULL; struct resource *res = pci_resource_n(dev, resno); - int vf_bar = resno - PCI_IOV_RESOURCES; + int vf_bar = pci_resource_num_to_vf_bar(resno); struct pci_bus_region region; u16 cmd; u32 new; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 69258c445b28..190e4c115014 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -722,6 +722,14 @@ static inline bool pci_resource_is_iov(int resno) { return resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END; } +static inline int pci_resource_num_from_vf_bar(int resno) +{ + return resno + PCI_IOV_RESOURCES; +} +static inline int pci_resource_num_to_vf_bar(int resno) +{ + return resno - PCI_IOV_RESOURCES; +} extern const struct attribute_group sriov_pf_dev_attr_group; extern const struct attribute_group sriov_vf_dev_attr_group; #else @@ -750,6 +758,16 @@ static inline bool pci_resource_is_iov(int resno) { return false; } +static inline int pci_resource_num_from_vf_bar(int resno) +{ + WARN_ON_ONCE(1); + return -ENODEV; +} +static inline int pci_resource_num_to_vf_bar(int resno) +{ + WARN_ON_ONCE(1); + return -ENODEV; +} #endif /* CONFIG_PCI_IOV */ #ifdef CONFIG_PCIE_TPH diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 07c3d021a47e..7853ac6999e2 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -1888,7 +1888,8 @@ static int iov_resources_unassigned(struct pci_dev *dev, void *data) bool *unassigned = data; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - struct resource *r = &dev->resource[i + PCI_IOV_RESOURCES]; + int idx = pci_resource_num_from_vf_bar(i); + struct resource *r = &dev->resource[idx]; struct pci_bus_region region; /* Not assigned or rejected by kernel? */ From e200f4f7eab52bb7affcd92bf079958326c154d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 2 Jul 2025 11:35:20 +0200 Subject: [PATCH 0655/2411] PCI/IOV: Allow IOV resources to be resized in pci_resize_resource() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to regular resizable BARs, VF BARs can also be resized. The capability layout is the same as PCI_EXT_CAP_ID_REBAR, which means we can reuse most of the implementation, the only difference being resource size calculation (which is multiplied by total VFs) and memory decoding (which is controlled by a separate VF MSE field in SR-IOV cap). Extend the pci_resize_resource() function to accept IOV resources. See PCIe r6.2, sec 7.8.7. Signed-off-by: Michał Winiarski Signed-off-by: Bjorn Helgaas Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/20250702093522.518099-4-michal.winiarski@intel.com --- drivers/pci/iov.c | 21 +++++++++++++++++++++ drivers/pci/pci.c | 8 +++++++- drivers/pci/pci.h | 9 +++++++++ drivers/pci/setup-res.c | 35 ++++++++++++++++++++++++++++++----- 4 files changed, 67 insertions(+), 6 deletions(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index bdac07804552..852424cf2ae1 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -154,6 +154,27 @@ resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno) return dev->sriov->barsz[pci_resource_num_to_vf_bar(resno)]; } +void pci_iov_resource_set_size(struct pci_dev *dev, int resno, + resource_size_t size) +{ + if (!pci_resource_is_iov(resno)) { + pci_warn(dev, "%s is not an IOV resource\n", + pci_resource_name(dev, resno)); + return; + } + + dev->sriov->barsz[pci_resource_num_to_vf_bar(resno)] = size; +} + +bool pci_iov_is_memory_decoding_enabled(struct pci_dev *dev) +{ + u16 cmd; + + pci_read_config_word(dev, dev->sriov->pos + PCI_SRIOV_CTRL, &cmd); + + return cmd & PCI_SRIOV_CTRL_MSE; +} + static void pci_read_vf_config_common(struct pci_dev *virtfn) { struct pci_dev *physfn = virtfn->physfn; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e9448d55113b..55695852975b 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3752,7 +3752,13 @@ static int pci_rebar_find_pos(struct pci_dev *pdev, int bar) unsigned int pos, nbars, i; u32 ctrl; - pos = pdev->rebar_cap; + if (pci_resource_is_iov(bar)) { + pos = pci_iov_vf_rebar_cap(pdev); + bar = pci_resource_num_to_vf_bar(bar); + } else { + pos = pdev->rebar_cap; + } + if (!pos) return -ENOTSUPP; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 190e4c115014..c7430afe7515 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -711,6 +711,9 @@ void pci_iov_update_resource(struct pci_dev *dev, int resno); resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno); void pci_restore_iov_state(struct pci_dev *dev); int pci_iov_bus_range(struct pci_bus *bus); +void pci_iov_resource_set_size(struct pci_dev *dev, int resno, + resource_size_t size); +bool pci_iov_is_memory_decoding_enabled(struct pci_dev *dev); static inline u16 pci_iov_vf_rebar_cap(struct pci_dev *dev) { if (!dev->is_physfn) @@ -750,6 +753,12 @@ static inline int pci_iov_bus_range(struct pci_bus *bus) { return 0; } +static inline void pci_iov_resource_set_size(struct pci_dev *dev, int resno, + resource_size_t size) { } +static inline bool pci_iov_is_memory_decoding_enabled(struct pci_dev *dev) +{ + return false; +} static inline u16 pci_iov_vf_rebar_cap(struct pci_dev *dev) { return 0; diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index c6657cdd06f6..d2b3ed51e880 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -423,13 +423,39 @@ void pci_release_resource(struct pci_dev *dev, int resno) } EXPORT_SYMBOL(pci_release_resource); +static bool pci_resize_is_memory_decoding_enabled(struct pci_dev *dev, + int resno) +{ + u16 cmd; + + if (pci_resource_is_iov(resno)) + return pci_iov_is_memory_decoding_enabled(dev); + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + + return cmd & PCI_COMMAND_MEMORY; +} + +static void pci_resize_resource_set_size(struct pci_dev *dev, int resno, + int size) +{ + resource_size_t res_size = pci_rebar_size_to_bytes(size); + struct resource *res = pci_resource_n(dev, resno); + + if (!pci_resource_is_iov(resno)) { + resource_set_size(res, res_size); + } else { + resource_set_size(res, res_size * pci_sriov_get_totalvfs(dev)); + pci_iov_resource_set_size(dev, resno, res_size); + } +} + int pci_resize_resource(struct pci_dev *dev, int resno, int size) { struct resource *res = pci_resource_n(dev, resno); struct pci_host_bridge *host; int old, ret; u32 sizes; - u16 cmd; /* Check if we must preserve the firmware's resource assignment */ host = pci_find_host_bridge(dev->bus); @@ -440,8 +466,7 @@ int pci_resize_resource(struct pci_dev *dev, int resno, int size) if (!(res->flags & IORESOURCE_UNSET)) return -EBUSY; - pci_read_config_word(dev, PCI_COMMAND, &cmd); - if (cmd & PCI_COMMAND_MEMORY) + if (pci_resize_is_memory_decoding_enabled(dev, resno)) return -EBUSY; sizes = pci_rebar_get_possible_sizes(dev, resno); @@ -459,7 +484,7 @@ int pci_resize_resource(struct pci_dev *dev, int resno, int size) if (ret) return ret; - resource_set_size(res, pci_rebar_size_to_bytes(size)); + pci_resize_resource_set_size(dev, resno, size); /* Check if the new config works by trying to assign everything. */ if (dev->bus->self) { @@ -471,7 +496,7 @@ int pci_resize_resource(struct pci_dev *dev, int resno, int size) error_resize: pci_rebar_set_size(dev, resno, old); - resource_set_size(res, pci_rebar_size_to_bytes(old)); + pci_resize_resource_set_size(dev, resno, old); return ret; } EXPORT_SYMBOL(pci_resize_resource); From e1ba95a168e6f771960c0afc4e44984cf5cf659c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 2 Jul 2025 11:35:21 +0200 Subject: [PATCH 0656/2411] PCI/IOV: Check that VF BAR fits within the reservation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the resource representing a VF MMIO BAR reservation is created, its size is always large enough to accommodate the BAR of all SR-IOV Virtual Functions that can potentially be created (total VFs). If for whatever reason it's not possible to accommodate all VFs, the resource is not assigned and no VFs can be created. An upcoming change will allow VF BAR size to be modified by drivers at a later point in time, which means that the check for resource assignment is no longer sufficient. Add an additional check that verifies that the VF BAR for all enabled VFs fits within the underlying reservation resource. Signed-off-by: Michał Winiarski Signed-off-by: Bjorn Helgaas Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/20250702093522.518099-5-michal.winiarski@intel.com --- drivers/pci/iov.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 852424cf2ae1..f34173c70b32 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -668,9 +668,12 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) nres = 0; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { int idx = pci_resource_num_from_vf_bar(i); + resource_size_t vf_bar_sz = pci_iov_resource_size(dev, idx); bars |= (1 << idx); res = &dev->resource[idx]; + if (vf_bar_sz * nr_virtfn > resource_size(res)) + continue; if (res->parent) nres++; } From 84f890414a12b8d1480045b92a5e4e6ac4ab3419 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 2 Jul 2025 11:35:22 +0200 Subject: [PATCH 0657/2411] PCI/IOV: Allow drivers to control VF BAR size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers could leverage the fact that the VF BAR MMIO reservation is created for total number of VFs supported by the device by resizing the BAR to larger size when smaller number of VFs is enabled. Add pci_iov_vf_bar_set_size() to control the size and a pci_iov_vf_bar_get_sizes() helper to get the VF BAR sizes that will allow up to num_vfs to be successfully enabled with the current underlying reservation size. Signed-off-by: Michał Winiarski Signed-off-by: Bjorn Helgaas Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/20250702093522.518099-6-michal.winiarski@intel.com --- drivers/pci/iov.c | 73 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 6 ++++ 2 files changed, 79 insertions(+) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index f34173c70b32..ac4375954c94 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -8,11 +8,15 @@ */ #include +#include +#include #include +#include #include #include #include #include +#include #include "pci.h" #define VIRTFN_ID_LEN 17 /* "virtfn%u\0" for 2^32 - 1 */ @@ -1313,3 +1317,72 @@ int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn) return nr_virtfn; } EXPORT_SYMBOL_GPL(pci_sriov_configure_simple); + +/** + * pci_iov_vf_bar_set_size - set a new size for a VF BAR + * @dev: the PCI device + * @resno: the resource number + * @size: new size as defined in the spec (0=1MB, 31=128TB) + * + * Set the new size of a VF BAR that supports VF resizable BAR capability. + * Unlike pci_resize_resource(), this does not cause the resource that + * reserves the MMIO space (originally up to total_VFs) to be resized, which + * means that following calls to pci_enable_sriov() can fail if the resources + * no longer fit. + * + * Return: 0 on success, or negative on failure. + */ +int pci_iov_vf_bar_set_size(struct pci_dev *dev, int resno, int size) +{ + u32 sizes; + int ret; + + if (!pci_resource_is_iov(resno)) + return -EINVAL; + + if (pci_iov_is_memory_decoding_enabled(dev)) + return -EBUSY; + + sizes = pci_rebar_get_possible_sizes(dev, resno); + if (!sizes) + return -ENOTSUPP; + + if (!(sizes & BIT(size))) + return -EINVAL; + + ret = pci_rebar_set_size(dev, resno, size); + if (ret) + return ret; + + pci_iov_resource_set_size(dev, resno, pci_rebar_size_to_bytes(size)); + + return 0; +} +EXPORT_SYMBOL_GPL(pci_iov_vf_bar_set_size); + +/** + * pci_iov_vf_bar_get_sizes - get VF BAR sizes allowing to create up to num_vfs + * @dev: the PCI device + * @resno: the resource number + * @num_vfs: number of VFs + * + * Get the sizes of a VF resizable BAR that can accommodate @num_vfs within + * the currently assigned size of the resource @resno. + * + * Return: A bitmask of sizes in format defined in the spec (bit 0=1MB, + * bit 31=128TB). + */ +u32 pci_iov_vf_bar_get_sizes(struct pci_dev *dev, int resno, int num_vfs) +{ + u64 vf_len = pci_resource_len(dev, resno); + u32 sizes; + + if (!num_vfs) + return 0; + + do_div(vf_len, num_vfs); + sizes = (roundup_pow_of_two(vf_len + 1) - 1) >> ilog2(SZ_1M); + + return sizes & pci_rebar_get_possible_sizes(dev, resno); +} +EXPORT_SYMBOL_GPL(pci_iov_vf_bar_get_sizes); diff --git a/include/linux/pci.h b/include/linux/pci.h index 05e68f35f392..28f06045ab20 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2438,6 +2438,8 @@ int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs); int pci_sriov_get_totalvfs(struct pci_dev *dev); int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn); resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno); +int pci_iov_vf_bar_set_size(struct pci_dev *dev, int resno, int size); +u32 pci_iov_vf_bar_get_sizes(struct pci_dev *dev, int resno, int num_vfs); void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe); /* Arch may override these (weak) */ @@ -2490,6 +2492,10 @@ static inline int pci_sriov_get_totalvfs(struct pci_dev *dev) #define pci_sriov_configure_simple NULL static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno) { return 0; } +static inline int pci_iov_vf_bar_set_size(struct pci_dev *dev, int resno, int size) +{ return -ENODEV; } +static inline u32 pci_iov_vf_bar_get_sizes(struct pci_dev *dev, int resno, int num_vfs) +{ return 0; } static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { } #endif From 60ecf796cdc8638c570a4ad06bae6a0d48a8986d Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 16 Jun 2025 13:45:57 +0000 Subject: [PATCH 0658/2411] rust: uaccess: use newtype for user pointers Currently, Rust code uses a typedef for unsigned long to represent userspace addresses. This is unfortunate because it means that userspace addresses could accidentally be mixed up with other integers. To alleviate that, we introduce a new UserPtr struct that wraps a raw pointer to represent a userspace address. By using a struct, type checking enforces that userspace addresses cannot be mixed up with anything else. This is similar to the __user annotation in C that detects cases where user pointers are mixed with non-user pointers. Note that unlike __user pointers in C, this type is just a pointer without a target type. This means that it can't detect cases such as mixing up which struct this user pointer references. However, that is okay due to the way this is intended to be used - generally, you create a UserPtr in your ioctl callback from the provided usize *before* dispatching on which ioctl is in use, and then after dispatching on the ioctl you pass the UserPtr into a UserSliceReader or UserSliceWriter; selecting the target type does not happen until you have obtained the UserSliceReader/Writer. The UserPtr type is not marked with #[derive(Debug)], which means that it's not possible to print values of this type. This avoids ASLR leakage. The type is added to the prelude as it is a fairly fundamental type similar to c_int. The wrapping_add() method is renamed to wrapping_byte_add() for consistency with the method name found on raw pointers. Reviewed-by: Benno Lossin Reviewed-by: Danilo Krummrich Reviewed-by: Christian Schrefl Reviewed-by: Boqun Feng Reviewed-by: Greg Kroah-Hartman Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20250616-userptr-newtype-v3-1-5ff7b2d18d9e@google.com [ Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/prelude.rs | 2 + rust/kernel/uaccess.rs | 71 ++++++++++++++++++++++++++------ samples/rust/rust_misc_device.rs | 2 + 3 files changed, 63 insertions(+), 12 deletions(-) diff --git a/rust/kernel/prelude.rs b/rust/kernel/prelude.rs index 2f30a398dddd..9a1a830f605c 100644 --- a/rust/kernel/prelude.rs +++ b/rust/kernel/prelude.rs @@ -46,3 +46,5 @@ pub use super::init::InPlaceInit; pub use super::current; + +pub use super::uaccess::UserPtr; diff --git a/rust/kernel/uaccess.rs b/rust/kernel/uaccess.rs index 85097eee81d9..a8fb4764185a 100644 --- a/rust/kernel/uaccess.rs +++ b/rust/kernel/uaccess.rs @@ -14,8 +14,51 @@ }; use core::mem::{size_of, MaybeUninit}; -/// The type used for userspace addresses. -pub type UserPtr = usize; +/// A pointer into userspace. +/// +/// This is the Rust equivalent to C pointers tagged with `__user`. +#[repr(transparent)] +#[derive(Copy, Clone)] +pub struct UserPtr(*mut c_void); + +impl UserPtr { + /// Create a `UserPtr` from an integer representing the userspace address. + #[inline] + pub fn from_addr(addr: usize) -> Self { + Self(addr as *mut c_void) + } + + /// Create a `UserPtr` from a pointer representing the userspace address. + #[inline] + pub fn from_ptr(addr: *mut c_void) -> Self { + Self(addr) + } + + /// Cast this userspace pointer to a raw const void pointer. + /// + /// It is up to the caller to use the returned pointer correctly. + #[inline] + pub fn as_const_ptr(self) -> *const c_void { + self.0 + } + + /// Cast this userspace pointer to a raw mutable void pointer. + /// + /// It is up to the caller to use the returned pointer correctly. + #[inline] + pub fn as_mut_ptr(self) -> *mut c_void { + self.0 + } + + /// Increment this user pointer by `add` bytes. + /// + /// This addition is wrapping, so wrapping around the address space does not result in a panic + /// even if `CONFIG_RUST_OVERFLOW_CHECKS` is enabled. + #[inline] + pub fn wrapping_byte_add(self, add: usize) -> UserPtr { + UserPtr(self.0.wrapping_byte_add(add)) + } +} /// A pointer to an area in userspace memory, which can be either read-only or read-write. /// @@ -177,7 +220,7 @@ impl UserSliceReader { pub fn skip(&mut self, num_skip: usize) -> Result { // Update `self.length` first since that's the fallible part of this operation. self.length = self.length.checked_sub(num_skip).ok_or(EFAULT)?; - self.ptr = self.ptr.wrapping_add(num_skip); + self.ptr = self.ptr.wrapping_byte_add(num_skip); Ok(()) } @@ -224,11 +267,11 @@ pub fn read_raw(&mut self, out: &mut [MaybeUninit]) -> Result { } // SAFETY: `out_ptr` points into a mutable slice of length `len`, so we may write // that many bytes to it. - let res = unsafe { bindings::copy_from_user(out_ptr, self.ptr as *const c_void, len) }; + let res = unsafe { bindings::copy_from_user(out_ptr, self.ptr.as_const_ptr(), len) }; if res != 0 { return Err(EFAULT); } - self.ptr = self.ptr.wrapping_add(len); + self.ptr = self.ptr.wrapping_byte_add(len); self.length -= len; Ok(()) } @@ -262,14 +305,14 @@ pub fn read(&mut self) -> Result { let res = unsafe { bindings::_copy_from_user( out.as_mut_ptr().cast::(), - self.ptr as *const c_void, + self.ptr.as_const_ptr(), len, ) }; if res != 0 { return Err(EFAULT); } - self.ptr = self.ptr.wrapping_add(len); + self.ptr = self.ptr.wrapping_byte_add(len); self.length -= len; // SAFETY: The read above has initialized all bytes in `out`, and since `T` implements // `FromBytes`, any bit-pattern is a valid value for this type. @@ -386,11 +429,11 @@ pub fn write_slice(&mut self, data: &[u8]) -> Result { } // SAFETY: `data_ptr` points into an immutable slice of length `len`, so we may read // that many bytes from it. - let res = unsafe { bindings::copy_to_user(self.ptr as *mut c_void, data_ptr, len) }; + let res = unsafe { bindings::copy_to_user(self.ptr.as_mut_ptr(), data_ptr, len) }; if res != 0 { return Err(EFAULT); } - self.ptr = self.ptr.wrapping_add(len); + self.ptr = self.ptr.wrapping_byte_add(len); self.length -= len; Ok(()) } @@ -413,7 +456,7 @@ pub fn write(&mut self, value: &T) -> Result { // is a compile-time constant. let res = unsafe { bindings::_copy_to_user( - self.ptr as *mut c_void, + self.ptr.as_mut_ptr(), core::ptr::from_ref(value).cast::(), len, ) @@ -421,7 +464,7 @@ pub fn write(&mut self, value: &T) -> Result { if res != 0 { return Err(EFAULT); } - self.ptr = self.ptr.wrapping_add(len); + self.ptr = self.ptr.wrapping_byte_add(len); self.length -= len; Ok(()) } @@ -445,7 +488,11 @@ fn raw_strncpy_from_user(dst: &mut [MaybeUninit], src: UserPtr) -> Result(), src as *const c_char, len) + bindings::strncpy_from_user( + dst.as_mut_ptr().cast::(), + src.as_const_ptr().cast::(), + len, + ) }; if res < 0 { diff --git a/samples/rust/rust_misc_device.rs b/samples/rust/rust_misc_device.rs index c881fd6dbd08..e7ab77448f75 100644 --- a/samples/rust/rust_misc_device.rs +++ b/samples/rust/rust_misc_device.rs @@ -176,6 +176,8 @@ fn open(_file: &File, misc: &MiscDeviceRegistration) -> Result, _file: &File, cmd: u32, arg: usize) -> Result { dev_info!(me.dev, "IOCTLing Rust Misc Device Sample\n"); + // Treat the ioctl argument as a user pointer. + let arg = UserPtr::from_addr(arg); let size = _IOC_SIZE(cmd); match cmd { From 8ffb945647f8740e2eab81ace8c87f9734c85f95 Mon Sep 17 00:00:00 2001 From: Krishna Ketan Rai Date: Sun, 29 Jun 2025 20:55:32 +0530 Subject: [PATCH 0659/2411] rust: helpers: sort includes alphabetically The helper includes should be sorted alphabetically as indicated by the comment at the top of the file, but they were not. Sort them properly. Suggested-by: Alice Ryhl Link: https://github.com/Rust-for-Linux/linux/issues/1174 Signed-off-by: Krishna Ketan Rai Link: https://lore.kernel.org/r/20250629152533.889-1-prafulrai522@gmail.com Signed-off-by: Miguel Ojeda --- rust/helpers/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index b15b3cddad4e..d3867d09e356 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -29,9 +29,9 @@ #include "mm.c" #include "mutex.c" #include "page.c" -#include "platform.c" #include "pci.c" #include "pid_namespace.c" +#include "platform.c" #include "rbtree.c" #include "rcu.c" #include "refcount.c" From b6f885060e8e24f1a1a9205ba41a0524964e8c30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=96zkan?= Date: Tue, 8 Jul 2025 10:58:50 +0300 Subject: [PATCH 0660/2411] rust: rbtree: simplify finding `current` in `remove_current` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous version used a verbose `match` to get `current`, which may be slightly confusing at first glance. This change makes it shorter and more clearly expresses the intent: prefer `next` if available, otherwise fall back to `prev`. Signed-off-by: Onur Özkan Reviewed-by: Alice Ryhl Reviewed-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250708075850.25789-1-work@onurozkan.dev Signed-off-by: Miguel Ojeda --- rust/kernel/rbtree.rs | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/rust/kernel/rbtree.rs b/rust/kernel/rbtree.rs index 9457134eb3af..b8fe6be6fcc4 100644 --- a/rust/kernel/rbtree.rs +++ b/rust/kernel/rbtree.rs @@ -775,23 +775,14 @@ pub fn remove_current(self) -> (Option, RBTreeNode) { // the tree cannot change. By the tree invariant, all nodes are valid. unsafe { bindings::rb_erase(&mut (*this).links, addr_of_mut!(self.tree.root)) }; - let current = match (prev, next) { - (_, Some(next)) => next, - (Some(prev), None) => prev, - (None, None) => { - return (None, node); - } - }; + // INVARIANT: + // - `current` is a valid node in the [`RBTree`] pointed to by `self.tree`. + let cursor = next.or(prev).map(|current| Self { + current, + tree: self.tree, + }); - ( - // INVARIANT: - // - `current` is a valid node in the [`RBTree`] pointed to by `self.tree`. - Some(Self { - current, - tree: self.tree, - }), - node, - ) + (cursor, node) } /// Remove the previous node, returning it if it exists. From 12717ebeffcf3e34063dbc1e1b7f34924150c7c9 Mon Sep 17 00:00:00 2001 From: Andreas Hindborg Date: Thu, 12 Jun 2025 15:09:43 +0200 Subject: [PATCH 0661/2411] rust: types: add FOREIGN_ALIGN to ForeignOwnable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current implementation of `ForeignOwnable` is leaking the type of the opaque pointer to consumers of the API. This allows consumers of the opaque pointer to rely on the information that can be extracted from the pointer type. To prevent this, change the API to the version suggested by Maira Canal (link below): Remove `ForeignOwnable::PointedTo` in favor of a constant, which specifies the alignment of the pointers returned by `into_foreign`. With this change, `ArcInner` no longer needs `pub` visibility, so change it to private. Suggested-by: Alice Ryhl Suggested-by: Maíra Canal Link: https://lore.kernel.org/r/20240309235927.168915-3-mcanal@igalia.com Acked-by: Danilo Krummrich Reviewed-by: Benno Lossin Signed-off-by: Andreas Hindborg Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250612-pointed-to-v3-1-b009006d86a1@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/alloc/kbox.rs | 41 +++++++++++++++++++---------------- rust/kernel/miscdevice.rs | 10 ++++----- rust/kernel/pci.rs | 2 +- rust/kernel/platform.rs | 2 +- rust/kernel/sync/arc.rs | 24 +++++++++++---------- rust/kernel/types.rs | 45 +++++++++++++++++++-------------------- rust/kernel/xarray.rs | 9 ++++---- 7 files changed, 70 insertions(+), 63 deletions(-) diff --git a/rust/kernel/alloc/kbox.rs b/rust/kernel/alloc/kbox.rs index c386ff771d50..bffe72f44cb3 100644 --- a/rust/kernel/alloc/kbox.rs +++ b/rust/kernel/alloc/kbox.rs @@ -15,6 +15,7 @@ use core::ptr::NonNull; use core::result::Result; +use crate::ffi::c_void; use crate::init::InPlaceInit; use crate::types::ForeignOwnable; use pin_init::{InPlaceWrite, Init, PinInit, ZeroableOption}; @@ -398,70 +399,74 @@ fn try_init(init: impl Init, flags: Flags) -> Result } } -// SAFETY: The `into_foreign` function returns a pointer that is well-aligned. +// SAFETY: The pointer returned by `into_foreign` comes from a well aligned +// pointer to `T`. unsafe impl ForeignOwnable for Box where A: Allocator, { - type PointedTo = T; + const FOREIGN_ALIGN: usize = core::mem::align_of::(); type Borrowed<'a> = &'a T; type BorrowedMut<'a> = &'a mut T; - fn into_foreign(self) -> *mut Self::PointedTo { - Box::into_raw(self) + fn into_foreign(self) -> *mut c_void { + Box::into_raw(self).cast() } - unsafe fn from_foreign(ptr: *mut Self::PointedTo) -> Self { + unsafe fn from_foreign(ptr: *mut c_void) -> Self { // SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous // call to `Self::into_foreign`. - unsafe { Box::from_raw(ptr) } + unsafe { Box::from_raw(ptr.cast()) } } - unsafe fn borrow<'a>(ptr: *mut Self::PointedTo) -> &'a T { + unsafe fn borrow<'a>(ptr: *mut c_void) -> &'a T { // SAFETY: The safety requirements of this method ensure that the object remains alive and // immutable for the duration of 'a. - unsafe { &*ptr } + unsafe { &*ptr.cast() } } - unsafe fn borrow_mut<'a>(ptr: *mut Self::PointedTo) -> &'a mut T { + unsafe fn borrow_mut<'a>(ptr: *mut c_void) -> &'a mut T { + let ptr = ptr.cast(); // SAFETY: The safety requirements of this method ensure that the pointer is valid and that // nothing else will access the value for the duration of 'a. unsafe { &mut *ptr } } } -// SAFETY: The `into_foreign` function returns a pointer that is well-aligned. +// SAFETY: The pointer returned by `into_foreign` comes from a well aligned +// pointer to `T`. unsafe impl ForeignOwnable for Pin> where A: Allocator, { - type PointedTo = T; + const FOREIGN_ALIGN: usize = core::mem::align_of::(); type Borrowed<'a> = Pin<&'a T>; type BorrowedMut<'a> = Pin<&'a mut T>; - fn into_foreign(self) -> *mut Self::PointedTo { + fn into_foreign(self) -> *mut c_void { // SAFETY: We are still treating the box as pinned. - Box::into_raw(unsafe { Pin::into_inner_unchecked(self) }) + Box::into_raw(unsafe { Pin::into_inner_unchecked(self) }).cast() } - unsafe fn from_foreign(ptr: *mut Self::PointedTo) -> Self { + unsafe fn from_foreign(ptr: *mut c_void) -> Self { // SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous // call to `Self::into_foreign`. - unsafe { Pin::new_unchecked(Box::from_raw(ptr)) } + unsafe { Pin::new_unchecked(Box::from_raw(ptr.cast())) } } - unsafe fn borrow<'a>(ptr: *mut Self::PointedTo) -> Pin<&'a T> { + unsafe fn borrow<'a>(ptr: *mut c_void) -> Pin<&'a T> { // SAFETY: The safety requirements for this function ensure that the object is still alive, // so it is safe to dereference the raw pointer. // The safety requirements of `from_foreign` also ensure that the object remains alive for // the lifetime of the returned value. - let r = unsafe { &*ptr }; + let r = unsafe { &*ptr.cast() }; // SAFETY: This pointer originates from a `Pin>`. unsafe { Pin::new_unchecked(r) } } - unsafe fn borrow_mut<'a>(ptr: *mut Self::PointedTo) -> Pin<&'a mut T> { + unsafe fn borrow_mut<'a>(ptr: *mut c_void) -> Pin<&'a mut T> { + let ptr = ptr.cast(); // SAFETY: The safety requirements for this function ensure that the object is still alive, // so it is safe to dereference the raw pointer. // The safety requirements of `from_foreign` also ensure that the object remains alive for diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs index 288f40e79906..ad51ffc549b8 100644 --- a/rust/kernel/miscdevice.rs +++ b/rust/kernel/miscdevice.rs @@ -217,7 +217,7 @@ impl MiscdeviceVTable { // type. // // SAFETY: The open call of a file can access the private data. - unsafe { (*raw_file).private_data = ptr.into_foreign().cast() }; + unsafe { (*raw_file).private_data = ptr.into_foreign() }; 0 } @@ -228,7 +228,7 @@ impl MiscdeviceVTable { /// must be associated with a `MiscDeviceRegistration`. unsafe extern "C" fn release(_inode: *mut bindings::inode, file: *mut bindings::file) -> c_int { // SAFETY: The release call of a file owns the private data. - let private = unsafe { (*file).private_data }.cast(); + let private = unsafe { (*file).private_data }; // SAFETY: The release call of a file owns the private data. let ptr = unsafe { ::from_foreign(private) }; @@ -272,7 +272,7 @@ impl MiscdeviceVTable { /// `file` must be a valid file that is associated with a `MiscDeviceRegistration`. unsafe extern "C" fn ioctl(file: *mut bindings::file, cmd: c_uint, arg: c_ulong) -> c_long { // SAFETY: The ioctl call of a file can access the private data. - let private = unsafe { (*file).private_data }.cast(); + let private = unsafe { (*file).private_data }; // SAFETY: Ioctl calls can borrow the private data of the file. let device = unsafe { ::borrow(private) }; @@ -297,7 +297,7 @@ impl MiscdeviceVTable { arg: c_ulong, ) -> c_long { // SAFETY: The compat ioctl call of a file can access the private data. - let private = unsafe { (*file).private_data }.cast(); + let private = unsafe { (*file).private_data }; // SAFETY: Ioctl calls can borrow the private data of the file. let device = unsafe { ::borrow(private) }; @@ -318,7 +318,7 @@ impl MiscdeviceVTable { /// - `seq_file` must be a valid `struct seq_file` that we can write to. unsafe extern "C" fn show_fdinfo(seq_file: *mut bindings::seq_file, file: *mut bindings::file) { // SAFETY: The release call of a file owns the private data. - let private = unsafe { (*file).private_data }.cast(); + let private = unsafe { (*file).private_data }; // SAFETY: Ioctl calls can borrow the private data of the file. let device = unsafe { ::borrow(private) }; // SAFETY: diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index 6b94fd7a3ce9..5ce07999168e 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -89,7 +89,7 @@ extern "C" fn probe_callback( extern "C" fn remove_callback(pdev: *mut bindings::pci_dev) { // SAFETY: The PCI bus only ever calls the remove callback with a valid pointer to a // `struct pci_dev`. - let ptr = unsafe { bindings::pci_get_drvdata(pdev) }.cast(); + let ptr = unsafe { bindings::pci_get_drvdata(pdev) }; // SAFETY: `remove_callback` is only ever called after a successful call to // `probe_callback`, hence it's guaranteed that `ptr` points to a valid and initialized diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index 0a6a6be732b2..e894790c510c 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -81,7 +81,7 @@ extern "C" fn probe_callback(pdev: *mut bindings::platform_device) -> kernel::ff extern "C" fn remove_callback(pdev: *mut bindings::platform_device) { // SAFETY: `pdev` is a valid pointer to a `struct platform_device`. - let ptr = unsafe { bindings::platform_get_drvdata(pdev) }.cast(); + let ptr = unsafe { bindings::platform_get_drvdata(pdev) }; // SAFETY: `remove_callback` is only ever called after a successful call to // `probe_callback`, hence it's guaranteed that `ptr` points to a valid and initialized diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs index 499175f637a7..63a66761d0c7 100644 --- a/rust/kernel/sync/arc.rs +++ b/rust/kernel/sync/arc.rs @@ -19,6 +19,7 @@ use crate::{ alloc::{AllocError, Flags, KBox}, bindings, + ffi::c_void, init::InPlaceInit, try_init, types::{ForeignOwnable, Opaque}, @@ -141,10 +142,9 @@ pub struct Arc { _p: PhantomData>, } -#[doc(hidden)] #[pin_data] #[repr(C)] -pub struct ArcInner { +struct ArcInner { refcount: Opaque, data: T, } @@ -373,20 +373,22 @@ pub fn into_unique_or_drop(self) -> Option>> { } } -// SAFETY: The `into_foreign` function returns a pointer that is well-aligned. +// SAFETY: The pointer returned by `into_foreign` comes from a well aligned +// pointer to `ArcInner`. unsafe impl ForeignOwnable for Arc { - type PointedTo = ArcInner; + const FOREIGN_ALIGN: usize = core::mem::align_of::>(); + type Borrowed<'a> = ArcBorrow<'a, T>; type BorrowedMut<'a> = Self::Borrowed<'a>; - fn into_foreign(self) -> *mut Self::PointedTo { - ManuallyDrop::new(self).ptr.as_ptr() + fn into_foreign(self) -> *mut c_void { + ManuallyDrop::new(self).ptr.as_ptr().cast() } - unsafe fn from_foreign(ptr: *mut Self::PointedTo) -> Self { + unsafe fn from_foreign(ptr: *mut c_void) -> Self { // SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous // call to `Self::into_foreign`. - let inner = unsafe { NonNull::new_unchecked(ptr) }; + let inner = unsafe { NonNull::new_unchecked(ptr.cast::>()) }; // SAFETY: By the safety requirement of this function, we know that `ptr` came from // a previous call to `Arc::into_foreign`, which guarantees that `ptr` is valid and @@ -394,17 +396,17 @@ unsafe fn from_foreign(ptr: *mut Self::PointedTo) -> Self { unsafe { Self::from_inner(inner) } } - unsafe fn borrow<'a>(ptr: *mut Self::PointedTo) -> ArcBorrow<'a, T> { + unsafe fn borrow<'a>(ptr: *mut c_void) -> ArcBorrow<'a, T> { // SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous // call to `Self::into_foreign`. - let inner = unsafe { NonNull::new_unchecked(ptr) }; + let inner = unsafe { NonNull::new_unchecked(ptr.cast::>()) }; // SAFETY: The safety requirements of `from_foreign` ensure that the object remains alive // for the lifetime of the returned value. unsafe { ArcBorrow::new(inner) } } - unsafe fn borrow_mut<'a>(ptr: *mut Self::PointedTo) -> ArcBorrow<'a, T> { + unsafe fn borrow_mut<'a>(ptr: *mut c_void) -> ArcBorrow<'a, T> { // SAFETY: The safety requirements for `borrow_mut` are a superset of the safety // requirements for `borrow`. unsafe { ::borrow(ptr) } diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index 22985b6f6982..c156808a78d3 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -2,6 +2,7 @@ //! Kernel types. +use crate::ffi::c_void; use core::{ cell::UnsafeCell, marker::{PhantomData, PhantomPinned}, @@ -21,15 +22,10 @@ /// /// # Safety /// -/// Implementers must ensure that [`into_foreign`] returns a pointer which meets the alignment -/// requirements of [`PointedTo`]. -/// -/// [`into_foreign`]: Self::into_foreign -/// [`PointedTo`]: Self::PointedTo +/// - Implementations must satisfy the guarantees of [`Self::into_foreign`]. pub unsafe trait ForeignOwnable: Sized { - /// Type used when the value is foreign-owned. In practical terms only defines the alignment of - /// the pointer. - type PointedTo; + /// The alignment of pointers returned by `into_foreign`. + const FOREIGN_ALIGN: usize; /// Type used to immutably borrow a value that is currently foreign-owned. type Borrowed<'a>; @@ -39,18 +35,20 @@ pub unsafe trait ForeignOwnable: Sized { /// Converts a Rust-owned object to a foreign-owned one. /// + /// The foreign representation is a pointer to void. Aside from the guarantees listed below, + /// there are no other guarantees for this pointer. For example, it might be invalid, dangling + /// or pointing to uninitialized memory. Using it in any way except for [`from_foreign`], + /// [`try_from_foreign`], [`borrow`], or [`borrow_mut`] can result in undefined behavior. + /// /// # Guarantees /// - /// The return value is guaranteed to be well-aligned, but there are no other guarantees for - /// this pointer. For example, it might be null, dangling, or point to uninitialized memory. - /// Using it in any way except for [`ForeignOwnable::from_foreign`], [`ForeignOwnable::borrow`], - /// [`ForeignOwnable::try_from_foreign`] can result in undefined behavior. + /// - Minimum alignment of returned pointer is [`Self::FOREIGN_ALIGN`]. /// /// [`from_foreign`]: Self::from_foreign /// [`try_from_foreign`]: Self::try_from_foreign /// [`borrow`]: Self::borrow /// [`borrow_mut`]: Self::borrow_mut - fn into_foreign(self) -> *mut Self::PointedTo; + fn into_foreign(self) -> *mut c_void; /// Converts a foreign-owned object back to a Rust-owned one. /// @@ -60,7 +58,7 @@ pub unsafe trait ForeignOwnable: Sized { /// must not be passed to `from_foreign` more than once. /// /// [`into_foreign`]: Self::into_foreign - unsafe fn from_foreign(ptr: *mut Self::PointedTo) -> Self; + unsafe fn from_foreign(ptr: *mut c_void) -> Self; /// Tries to convert a foreign-owned object back to a Rust-owned one. /// @@ -72,7 +70,7 @@ pub unsafe trait ForeignOwnable: Sized { /// `ptr` must either be null or satisfy the safety requirements for [`from_foreign`]. /// /// [`from_foreign`]: Self::from_foreign - unsafe fn try_from_foreign(ptr: *mut Self::PointedTo) -> Option { + unsafe fn try_from_foreign(ptr: *mut c_void) -> Option { if ptr.is_null() { None } else { @@ -95,7 +93,7 @@ unsafe fn try_from_foreign(ptr: *mut Self::PointedTo) -> Option { /// /// [`into_foreign`]: Self::into_foreign /// [`from_foreign`]: Self::from_foreign - unsafe fn borrow<'a>(ptr: *mut Self::PointedTo) -> Self::Borrowed<'a>; + unsafe fn borrow<'a>(ptr: *mut c_void) -> Self::Borrowed<'a>; /// Borrows a foreign-owned object mutably. /// @@ -123,23 +121,24 @@ unsafe fn try_from_foreign(ptr: *mut Self::PointedTo) -> Option { /// [`from_foreign`]: Self::from_foreign /// [`borrow`]: Self::borrow /// [`Arc`]: crate::sync::Arc - unsafe fn borrow_mut<'a>(ptr: *mut Self::PointedTo) -> Self::BorrowedMut<'a>; + unsafe fn borrow_mut<'a>(ptr: *mut c_void) -> Self::BorrowedMut<'a>; } -// SAFETY: The `into_foreign` function returns a pointer that is dangling, but well-aligned. +// SAFETY: The pointer returned by `into_foreign` comes from a well aligned +// pointer to `()`. unsafe impl ForeignOwnable for () { - type PointedTo = (); + const FOREIGN_ALIGN: usize = core::mem::align_of::<()>(); type Borrowed<'a> = (); type BorrowedMut<'a> = (); - fn into_foreign(self) -> *mut Self::PointedTo { + fn into_foreign(self) -> *mut c_void { core::ptr::NonNull::dangling().as_ptr() } - unsafe fn from_foreign(_: *mut Self::PointedTo) -> Self {} + unsafe fn from_foreign(_: *mut c_void) -> Self {} - unsafe fn borrow<'a>(_: *mut Self::PointedTo) -> Self::Borrowed<'a> {} - unsafe fn borrow_mut<'a>(_: *mut Self::PointedTo) -> Self::BorrowedMut<'a> {} + unsafe fn borrow<'a>(_: *mut c_void) -> Self::Borrowed<'a> {} + unsafe fn borrow_mut<'a>(_: *mut c_void) -> Self::BorrowedMut<'a> {} } /// Runs a cleanup function/closure when dropped. diff --git a/rust/kernel/xarray.rs b/rust/kernel/xarray.rs index 75719e7bb491..a49d6db28845 100644 --- a/rust/kernel/xarray.rs +++ b/rust/kernel/xarray.rs @@ -7,9 +7,10 @@ use crate::{ alloc, bindings, build_assert, error::{Error, Result}, + ffi::c_void, types::{ForeignOwnable, NotThreadSafe, Opaque}, }; -use core::{iter, marker::PhantomData, mem, pin::Pin, ptr::NonNull}; +use core::{iter, marker::PhantomData, pin::Pin, ptr::NonNull}; use pin_init::{pin_data, pin_init, pinned_drop, PinInit}; /// An array which efficiently maps sparse integer indices to owned objects. @@ -101,7 +102,7 @@ pub fn new(kind: AllocKind) -> impl PinInit { }) } - fn iter(&self) -> impl Iterator> + '_ { + fn iter(&self) -> impl Iterator> + '_ { let mut index = 0; // SAFETY: `self.xa` is always valid by the type invariant. @@ -179,7 +180,7 @@ fn from(value: StoreError) -> Self { impl<'a, T: ForeignOwnable> Guard<'a, T> { fn load(&self, index: usize, f: F) -> Option where - F: FnOnce(NonNull) -> U, + F: FnOnce(NonNull) -> U, { // SAFETY: `self.xa.xa` is always valid by the type invariant. let ptr = unsafe { bindings::xa_load(self.xa.xa.get(), index) }; @@ -230,7 +231,7 @@ pub fn store( gfp: alloc::Flags, ) -> Result, StoreError> { build_assert!( - mem::align_of::() >= 4, + T::FOREIGN_ALIGN >= 4, "pointers stored in XArray must be 4-byte aligned" ); let new = value.into_foreign(); From a68a6bef0e75fb9e5aea1399d8538f4e3584dab1 Mon Sep 17 00:00:00 2001 From: Andreas Hindborg Date: Thu, 12 Jun 2025 15:09:44 +0200 Subject: [PATCH 0662/2411] rust: types: require `ForeignOwnable::into_foreign` return non-null The intended implementations of `ForeignOwnable` will not return null pointers from `into_foreign`, as this would render the implementation of `try_from_foreign` useless. Current users of `ForeignOwnable` rely on `into_foreign` returning non-null pointers. So require `into_foreign` to return non-null pointers. Suggested-by: Benno Lossin Suggested-by: Alice Ryhl Signed-off-by: Andreas Hindborg Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250612-pointed-to-v3-2-b009006d86a1@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/types.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index c156808a78d3..63a2559a545f 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -43,6 +43,7 @@ pub unsafe trait ForeignOwnable: Sized { /// # Guarantees /// /// - Minimum alignment of returned pointer is [`Self::FOREIGN_ALIGN`]. + /// - The returned pointer is not null. /// /// [`from_foreign`]: Self::from_foreign /// [`try_from_foreign`]: Self::try_from_foreign From ce60ab3964782df9ba34f0a64c0bc766dd508bde Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 29 May 2025 06:45:45 -0400 Subject: [PATCH 0663/2411] Expand the type of nfs_fattr->valid We need to be able to track more than 32 attributes per inode. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/1e3405fca54efd0be7c91c1da77917b94f5dfcc4.1748515333.git.bcodding@redhat.com Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 +- include/linux/nfs_fs_sb.h | 2 +- include/linux/nfs_xdr.h | 54 +++++++++++++++++++-------------------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a2fa6bc4d74e..17f5dcda2a00 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2209,7 +2209,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) bool attr_changed = false; bool have_delegation; - dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", + dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%llx)\n", __func__, inode->i_sb->s_id, inode->i_ino, nfs_display_fhandle_hash(NFS_FH(inode)), atomic_read(&inode->i_count), fattr->valid); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 63141320c2a8..d7895eeccea3 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -172,8 +172,8 @@ struct nfs_server { #define NFS_MOUNT_FORCE_RDIRPLUS 0x20000000 #define NFS_MOUNT_NETUNREACH_FATAL 0x40000000 - unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ + __u64 fattr_valid; /* Valid attributes */ unsigned int rsize; /* read size */ unsigned int rpages; /* read size (in pages) */ unsigned int wsize; /* write size */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 67f6632f723b..9cacbbd14787 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -45,7 +45,7 @@ struct nfs4_threshold { }; struct nfs_fattr { - unsigned int valid; /* which fields are valid */ + __u64 valid; /* which fields are valid */ umode_t mode; __u32 nlink; kuid_t uid; @@ -80,32 +80,32 @@ struct nfs_fattr { struct nfs4_label *label; }; -#define NFS_ATTR_FATTR_TYPE (1U << 0) -#define NFS_ATTR_FATTR_MODE (1U << 1) -#define NFS_ATTR_FATTR_NLINK (1U << 2) -#define NFS_ATTR_FATTR_OWNER (1U << 3) -#define NFS_ATTR_FATTR_GROUP (1U << 4) -#define NFS_ATTR_FATTR_RDEV (1U << 5) -#define NFS_ATTR_FATTR_SIZE (1U << 6) -#define NFS_ATTR_FATTR_PRESIZE (1U << 7) -#define NFS_ATTR_FATTR_BLOCKS_USED (1U << 8) -#define NFS_ATTR_FATTR_SPACE_USED (1U << 9) -#define NFS_ATTR_FATTR_FSID (1U << 10) -#define NFS_ATTR_FATTR_FILEID (1U << 11) -#define NFS_ATTR_FATTR_ATIME (1U << 12) -#define NFS_ATTR_FATTR_MTIME (1U << 13) -#define NFS_ATTR_FATTR_CTIME (1U << 14) -#define NFS_ATTR_FATTR_PREMTIME (1U << 15) -#define NFS_ATTR_FATTR_PRECTIME (1U << 16) -#define NFS_ATTR_FATTR_CHANGE (1U << 17) -#define NFS_ATTR_FATTR_PRECHANGE (1U << 18) -#define NFS_ATTR_FATTR_V4_LOCATIONS (1U << 19) -#define NFS_ATTR_FATTR_V4_REFERRAL (1U << 20) -#define NFS_ATTR_FATTR_MOUNTPOINT (1U << 21) -#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID (1U << 22) -#define NFS_ATTR_FATTR_OWNER_NAME (1U << 23) -#define NFS_ATTR_FATTR_GROUP_NAME (1U << 24) -#define NFS_ATTR_FATTR_V4_SECURITY_LABEL (1U << 25) +#define NFS_ATTR_FATTR_TYPE BIT_ULL(0) +#define NFS_ATTR_FATTR_MODE BIT_ULL(1) +#define NFS_ATTR_FATTR_NLINK BIT_ULL(2) +#define NFS_ATTR_FATTR_OWNER BIT_ULL(3) +#define NFS_ATTR_FATTR_GROUP BIT_ULL(4) +#define NFS_ATTR_FATTR_RDEV BIT_ULL(5) +#define NFS_ATTR_FATTR_SIZE BIT_ULL(6) +#define NFS_ATTR_FATTR_PRESIZE BIT_ULL(7) +#define NFS_ATTR_FATTR_BLOCKS_USED BIT_ULL(8) +#define NFS_ATTR_FATTR_SPACE_USED BIT_ULL(9) +#define NFS_ATTR_FATTR_FSID BIT_ULL(10) +#define NFS_ATTR_FATTR_FILEID BIT_ULL(11) +#define NFS_ATTR_FATTR_ATIME BIT_ULL(12) +#define NFS_ATTR_FATTR_MTIME BIT_ULL(13) +#define NFS_ATTR_FATTR_CTIME BIT_ULL(14) +#define NFS_ATTR_FATTR_PREMTIME BIT_ULL(15) +#define NFS_ATTR_FATTR_PRECTIME BIT_ULL(16) +#define NFS_ATTR_FATTR_CHANGE BIT_ULL(17) +#define NFS_ATTR_FATTR_PRECHANGE BIT_ULL(18) +#define NFS_ATTR_FATTR_V4_LOCATIONS BIT_ULL(19) +#define NFS_ATTR_FATTR_V4_REFERRAL BIT_ULL(20) +#define NFS_ATTR_FATTR_MOUNTPOINT BIT_ULL(21) +#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID BIT_ULL(22) +#define NFS_ATTR_FATTR_OWNER_NAME BIT_ULL(23) +#define NFS_ATTR_FATTR_GROUP_NAME BIT_ULL(24) +#define NFS_ATTR_FATTR_V4_SECURITY_LABEL BIT_ULL(25) #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \ | NFS_ATTR_FATTR_MODE \ From 1c7ae2dd3f0e6d07ec0a5a348f2561f2171b9c81 Mon Sep 17 00:00:00 2001 From: Anne Marie Merritt Date: Thu, 29 May 2025 06:45:46 -0400 Subject: [PATCH 0664/2411] nfs: Add timecreate to nfs inode Add tracking of the create time (a.k.a. btime) along with corresponding bitfields, request, and decode xdr routines. Signed-off-by: Anne Marie Merritt Signed-off-by: Lance Shelton Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/1e3677b0655fa2bbaba0817b41d111d94a06e5ee.1748515333.git.bcodding@redhat.com Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 17 +++++++++++++++-- fs/nfs/nfs4proc.c | 14 +++++++++++++- fs/nfs/nfs4xdr.c | 24 ++++++++++++++++++++++++ fs/nfs/nfstrace.h | 3 ++- include/linux/nfs_fs.h | 8 ++++++++ include/linux/nfs_xdr.h | 3 +++ 6 files changed, 65 insertions(+), 4 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 17f5dcda2a00..c5462aed6bf5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -197,6 +197,7 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) if (!(flags & NFS_INO_REVAL_FORCED)) flags &= ~(NFS_INO_INVALID_MODE | NFS_INO_INVALID_OTHER | + NFS_INO_INVALID_BTIME | NFS_INO_INVALID_XATTR); flags &= ~(NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE); } @@ -522,6 +523,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode_set_atime(inode, 0, 0); inode_set_mtime(inode, 0, 0); inode_set_ctime(inode, 0, 0); + memset(&nfsi->btime, 0, sizeof(nfsi->btime)); inode_set_iversion_raw(inode, 0); inode->i_size = 0; clear_nlink(inode); @@ -545,6 +547,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode_set_ctime_to_ts(inode, fattr->ctime); else if (fattr_supported & NFS_ATTR_FATTR_CTIME) nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME); + if (fattr->valid & NFS_ATTR_FATTR_BTIME) + nfsi->btime = fattr->btime; + else if (fattr_supported & NFS_ATTR_FATTR_BTIME) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_BTIME); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) inode_set_iversion_raw(inode, fattr->change_attr); else @@ -1943,7 +1949,7 @@ static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr, NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE | NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_OTHER | - NFS_INO_INVALID_NLINK; + NFS_INO_INVALID_NLINK | NFS_INO_INVALID_BTIME; unsigned long cache_validity = NFS_I(inode)->cache_validity; enum nfs4_change_attr_type ctype = NFS_SERVER(inode)->change_attr_type; @@ -2304,7 +2310,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK | NFS_INO_INVALID_MODE - | NFS_INO_INVALID_OTHER; + | NFS_INO_INVALID_OTHER + | NFS_INO_INVALID_BTIME; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); attr_changed = true; @@ -2338,6 +2345,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_CTIME; + if (fattr->valid & NFS_ATTR_FATTR_BTIME) + nfsi->btime = fattr->btime; + else if (fattr_supported & NFS_ATTR_FATTR_BTIME) + nfsi->cache_validity |= + save_cache_validity & NFS_INO_INVALID_BTIME; + /* Check if our cached file size is stale */ if (fattr->valid & NFS_ATTR_FATTR_SIZE) { new_isize = nfs_size_to_loff_t(fattr->size); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 341740fa293d..92f1b2601b67 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -222,6 +222,7 @@ const u32 nfs4_fattr_bitmap[3] = { | FATTR4_WORD1_RAWDEV | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_CREATE | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_MOUNTED_ON_FILEID, @@ -243,6 +244,7 @@ static const u32 nfs4_pnfs_open_bitmap[3] = { | FATTR4_WORD1_RAWDEV | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_CREATE | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_MODIFY, FATTR4_WORD2_MDSTHRESHOLD @@ -323,6 +325,9 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src, if (!(cache_validity & NFS_INO_INVALID_OTHER)) dst[1] &= ~(FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP); + if (!(cache_validity & NFS_INO_INVALID_BTIME)) + dst[1] &= ~FATTR4_WORD1_TIME_CREATE; + if (nfs_have_delegated_mtime(inode)) { if (!(cache_validity & NFS_INO_INVALID_ATIME)) dst[1] &= ~(FATTR4_WORD1_TIME_ACCESS|FATTR4_WORD1_TIME_ACCESS_SET); @@ -1307,7 +1312,8 @@ nfs4_update_changeattr_locked(struct inode *inode, NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER | NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK | - NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR; + NFS_INO_INVALID_MODE | NFS_INO_INVALID_BTIME | + NFS_INO_INVALID_XATTR; nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); } nfsi->attrtimeo_timestamp = jiffies; @@ -4047,6 +4053,10 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->fattr_valid &= ~NFS_ATTR_FATTR_CTIME; if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY)) server->fattr_valid &= ~NFS_ATTR_FATTR_MTIME; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY)) + server->fattr_valid &= ~NFS_ATTR_FATTR_MTIME; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_CREATE)) + server->fattr_valid &= ~NFS_ATTR_FATTR_BTIME; memcpy(server->attr_bitmask_nl, res.attr_bitmask, sizeof(server->attr_bitmask)); server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL; @@ -5781,6 +5791,8 @@ void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[], bitmask[1] |= FATTR4_WORD1_TIME_MODIFY; if (cache_validity & NFS_INO_INVALID_BLOCKS) bitmask[1] |= FATTR4_WORD1_SPACE_USED; + if (cache_validity & NFS_INO_INVALID_BTIME) + bitmask[1] |= FATTR4_WORD1_TIME_CREATE; if (cache_validity & NFS_INO_INVALID_SIZE) bitmask[0] |= FATTR4_WORD0_SIZE; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 318afde38057..49ff98571fa5 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1623,6 +1623,7 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg | FATTR4_WORD1_RAWDEV | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_CREATE | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_MODIFY; attrs[2] |= FATTR4_WORD2_SECURITY_LABEL; @@ -4207,6 +4208,24 @@ static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, str return status; } +static int decode_attr_time_create(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time) +{ + int status = 0; + + time->tv_sec = 0; + time->tv_nsec = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_CREATE - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_TIME_CREATE)) { + status = decode_attr_time(xdr, time); + if (status == 0) + status = NFS_ATTR_FATTR_BTIME; + bitmap[1] &= ~FATTR4_WORD1_TIME_CREATE; + } + dprintk("%s: btime=%lld\n", __func__, time->tv_sec); + return status; +} + static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time) { int status = 0; @@ -4781,6 +4800,11 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; + status = decode_attr_time_create(xdr, bitmap, &fattr->btime); + if (status < 0) + goto xdr_error; + fattr->valid |= status; + status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime); if (status < 0) goto xdr_error; diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 7a058bd8c566..f49f064c5ee5 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -32,7 +32,8 @@ { NFS_INO_INVALID_BLOCKS, "INVALID_BLOCKS" }, \ { NFS_INO_INVALID_XATTR, "INVALID_XATTR" }, \ { NFS_INO_INVALID_NLINK, "INVALID_NLINK" }, \ - { NFS_INO_INVALID_MODE, "INVALID_MODE" }) + { NFS_INO_INVALID_MODE, "INVALID_MODE" }, \ + { NFS_INO_INVALID_BTIME, "INVALID_BTIME" }) #define nfs_show_nfsi_flags(v) \ __print_flags(v, "|", \ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 67ae2c3f41d2..c585939b6cd6 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -160,6 +160,12 @@ struct nfs_inode { unsigned long flags; /* atomic bit ops */ unsigned long cache_validity; /* bit mask */ + /* + * NFS Attributes not included in struct inode + */ + + struct timespec64 btime; + /* * read_cache_jiffies is when we started read-caching this inode. * attrtimeo is for how long the cached information is assumed @@ -316,10 +322,12 @@ struct nfs4_copy_state { #define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */ #define NFS_INO_INVALID_NLINK BIT(16) /* cached nlinks is invalid */ #define NFS_INO_INVALID_MODE BIT(17) /* cached mode is invalid */ +#define NFS_INO_INVALID_BTIME BIT(18) /* cached btime is invalid */ #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ | NFS_INO_INVALID_CTIME \ | NFS_INO_INVALID_MTIME \ + | NFS_INO_INVALID_BTIME \ | NFS_INO_INVALID_SIZE \ | NFS_INO_INVALID_NLINK \ | NFS_INO_INVALID_MODE \ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9cacbbd14787..ac4bff6e9913 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -67,6 +67,7 @@ struct nfs_fattr { struct timespec64 atime; struct timespec64 mtime; struct timespec64 ctime; + struct timespec64 btime; __u64 change_attr; /* NFSv4 change attribute */ __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ __u64 pre_size; /* pre_op_attr.size */ @@ -106,6 +107,7 @@ struct nfs_fattr { #define NFS_ATTR_FATTR_OWNER_NAME BIT_ULL(23) #define NFS_ATTR_FATTR_GROUP_NAME BIT_ULL(24) #define NFS_ATTR_FATTR_V4_SECURITY_LABEL BIT_ULL(25) +#define NFS_ATTR_FATTR_BTIME BIT_ULL(26) #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \ | NFS_ATTR_FATTR_MODE \ @@ -126,6 +128,7 @@ struct nfs_fattr { | NFS_ATTR_FATTR_SPACE_USED) #define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \ | NFS_ATTR_FATTR_SPACE_USED \ + | NFS_ATTR_FATTR_BTIME \ | NFS_ATTR_FATTR_V4_SECURITY_LABEL) /* From 4b5427414749233fb2315a89c9fa64328cf5ec03 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 29 May 2025 06:45:47 -0400 Subject: [PATCH 0665/2411] NFS: Return the file btime in the statx results when appropriate If the server supports the NFSv4.x "create_time" attribute, then return it as part of the statx results. Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/eae27d6467e08aaa67e0ac6ae7119263a0f83349.1748515333.git.bcodding@redhat.com Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 15 +++++++++++++-- fs/nfs/nfs4trace.h | 3 ++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c5462aed6bf5..4c7fa4f2bd5e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -937,6 +937,7 @@ static void nfs_readdirplus_parent_cache_hit(struct dentry *dentry) static u32 nfs_get_valid_attrmask(struct inode *inode) { + u64 fattr_valid = NFS_SERVER(inode)->fattr_valid; unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); u32 reply_mask = STATX_INO | STATX_TYPE; @@ -956,6 +957,9 @@ static u32 nfs_get_valid_attrmask(struct inode *inode) reply_mask |= STATX_UID | STATX_GID; if (!(cache_validity & NFS_INO_INVALID_BLOCKS)) reply_mask |= STATX_BLOCKS; + if (!(cache_validity & NFS_INO_INVALID_BTIME) && + (fattr_valid & NFS_ATTR_FATTR_BTIME)) + reply_mask |= STATX_BTIME; if (!(cache_validity & NFS_INO_INVALID_CHANGE)) reply_mask |= STATX_CHANGE_COOKIE; return reply_mask; @@ -966,6 +970,7 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, { struct inode *inode = d_inode(path->dentry); struct nfs_server *server = NFS_SERVER(inode); + u64 fattr_valid = server->fattr_valid; unsigned long cache_validity; int err = 0; bool force_sync = query_flags & AT_STATX_FORCE_SYNC; @@ -976,9 +981,12 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID | STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME | - STATX_INO | STATX_SIZE | STATX_BLOCKS | + STATX_INO | STATX_SIZE | STATX_BLOCKS | STATX_BTIME | STATX_CHANGE_COOKIE; + if (!(fattr_valid & NFS_ATTR_FATTR_BTIME)) + request_mask &= ~STATX_BTIME; + if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) { if (readdirplus_enabled) nfs_readdirplus_parent_cache_hit(path->dentry); @@ -1010,7 +1018,7 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, /* Is the user requesting attributes that might need revalidation? */ if (!(request_mask & (STATX_MODE|STATX_NLINK|STATX_ATIME|STATX_CTIME| STATX_MTIME|STATX_UID|STATX_GID| - STATX_SIZE|STATX_BLOCKS| + STATX_SIZE|STATX_BLOCKS|STATX_BTIME| STATX_CHANGE_COOKIE))) goto out_no_revalidate; @@ -1034,6 +1042,8 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, do_update |= cache_validity & NFS_INO_INVALID_OTHER; if (request_mask & STATX_BLOCKS) do_update |= cache_validity & NFS_INO_INVALID_BLOCKS; + if (request_mask & STATX_BTIME) + do_update |= cache_validity & NFS_INO_INVALID_BTIME; if (do_update) { if (readdirplus_enabled) @@ -1055,6 +1065,7 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC; if (S_ISDIR(inode->i_mode)) stat->blksize = NFS_SERVER(inode)->dtsize; + stat->btime = NFS_I(inode)->btime; out: trace_nfs_getattr_exit(inode, err); return err; diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index deab4c0e21a0..553e45502588 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -30,7 +30,8 @@ { NFS_ATTR_FATTR_CTIME, "CTIME" }, \ { NFS_ATTR_FATTR_CHANGE, "CHANGE" }, \ { NFS_ATTR_FATTR_OWNER_NAME, "OWNER_NAME" }, \ - { NFS_ATTR_FATTR_GROUP_NAME, "GROUP_NAME" }) + { NFS_ATTR_FATTR_GROUP_NAME, "GROUP_NAME" }, \ + { NFS_ATTR_FATTR_BTIME, "BTIME" }) DECLARE_EVENT_CLASS(nfs4_clientid_event, TP_PROTO( From c1b0b9d79fdf8a86451eac914fe6f5cf39bbfc5f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 9 Jun 2025 08:15:17 +1000 Subject: [PATCH 0666/2411] nfs: use lock_two_nondirectories() Rather than open-coding this function call it to make intention clear and to use "correct" nesting levels (parent and child are for directories). This is purely cosmetic with no expected change in behaviour. Signed-off-by: NeilBrown Link: https://lore.kernel.org/r/174942091741.608730.3327223511347232829@noble.neil.brown.name Signed-off-by: Trond Myklebust --- fs/nfs/nfs4file.c | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 5e9d66f3466c..53a958746bb0 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -253,7 +253,6 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off, struct nfs_server *server = NFS_SERVER(dst_inode); struct inode *src_inode = file_inode(src_file); unsigned int bs = server->clone_blksize; - bool same_inode = false; int ret; /* NFS does not support deduplication. */ @@ -275,20 +274,8 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off, goto out; } - if (src_inode == dst_inode) - same_inode = true; - /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */ - if (same_inode) { - inode_lock(src_inode); - } else if (dst_inode < src_inode) { - inode_lock_nested(dst_inode, I_MUTEX_PARENT); - inode_lock_nested(src_inode, I_MUTEX_CHILD); - } else { - inode_lock_nested(src_inode, I_MUTEX_PARENT); - inode_lock_nested(dst_inode, I_MUTEX_CHILD); - } - + lock_two_nondirectories(src_inode, dst_inode); /* flush all pending writes on both src and dst so that server * has the latest data */ ret = nfs_sync_inode(src_inode); @@ -306,15 +293,7 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off, truncate_inode_pages_range(&dst_inode->i_data, dst_off, dst_off + count - 1); out_unlock: - if (same_inode) { - inode_unlock(src_inode); - } else if (dst_inode < src_inode) { - inode_unlock(src_inode); - inode_unlock(dst_inode); - } else { - inode_unlock(dst_inode); - inode_unlock(src_inode); - } + unlock_two_nondirectories(src_inode, dst_inode); out: return ret < 0 ? ret : count; } From a9e21837208d63f42a3387bdf826605c1904be9b Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Tue, 10 Jun 2025 17:12:46 +0200 Subject: [PATCH 0667/2411] pnfs: add pnfs_ds_connect trace point This tracepoint aims to expose pnfs DS connect status Signed-off-by: Tigran Mkrtchyan Reviewed-by: Benjamin Coddington Link: https://lore.kernel.org/r/20250610151246.9147-1-tigran.mkrtchyan@desy.de Signed-off-by: Trond Myklebust --- fs/nfs/nfs4trace.c | 1 + fs/nfs/nfs4trace.h | 26 ++++++++++++++++++++++++++ fs/nfs/pnfs_nfs.c | 14 +++++++++----- 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c index 389941ccc9c9..436763a559cd 100644 --- a/fs/nfs/nfs4trace.c +++ b/fs/nfs/nfs4trace.c @@ -26,6 +26,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_done); EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_done); EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_pagelist); EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_pagelist); +EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_ds_connect); EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error); EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error); diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 553e45502588..9f69d7e14925 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -274,6 +274,32 @@ TRACE_EVENT(nfs4_cb_offload, show_nfs_stable_how(__entry->cb_how) ) ); + +TRACE_EVENT(pnfs_ds_connect, + TP_PROTO( + char *ds_remotestr, + int status + ), + + TP_ARGS(ds_remotestr, status), + + TP_STRUCT__entry( + __string(ds_ips, ds_remotestr) + __field(int, status) + ), + + TP_fast_assign( + __assign_str(ds_ips); + __entry->status = status; + ), + + TP_printk( + "ds_ips=%s, status=%d", + __get_str(ds_ips), + __entry->status + ) +); + #endif /* CONFIG_NFS_V4_1 */ TRACE_EVENT(nfs4_setup_sequence, diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index b4ccdf78d4dd..7b32afb29782 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -17,6 +17,7 @@ #include "internal.h" #include "pnfs.h" #include "netns.h" +#include "nfs4trace.h" #define NFSDBG_FACILITY NFSDBG_PNFS @@ -1007,8 +1008,10 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, err = nfs4_wait_ds_connect(ds); if (err || ds->ds_clp) goto out; - if (nfs4_test_deviceid_unavailable(devid)) - return -ENODEV; + if (nfs4_test_deviceid_unavailable(devid)) { + err = -ENODEV; + goto out; + } } while (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) != 0); if (ds->ds_clp) @@ -1038,11 +1041,12 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) { WARN_ON_ONCE(ds->ds_clp || !nfs4_test_deviceid_unavailable(devid)); - return -EINVAL; - } - err = nfs_client_init_status(ds->ds_clp); + err = -EINVAL; + } else + err = nfs_client_init_status(ds->ds_clp); } + trace_pnfs_ds_connect(ds->ds_remotestr, err); return err; } EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect); From 0715a72ee9a38461eac4b34388b772914f269119 Mon Sep 17 00:00:00 2001 From: Anthony Iliopoulos Date: Fri, 13 Jun 2025 11:44:37 +0200 Subject: [PATCH 0668/2411] NFS: remove unused wpages field from struct nfs_server The wpages field is not serving any purpose since commit c63c7b051395 ("NFS: Fix a race when doing NFS write coalescing") which was merged in v2.6.22-rc1. Remove it completely. Signed-off-by: Anthony Iliopoulos Link: https://lore.kernel.org/r/20250613094439.82338-2-ailiop@suse.com Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 - include/linux/nfs_fs_sb.h | 1 - 2 files changed, 2 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index cf35ad3f818a..23dafc590476 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -814,7 +814,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, server->wsize = max_rpc_payload; if (server->wsize > NFS_MAX_FILE_IO_SIZE) server->wsize = NFS_MAX_FILE_IO_SIZE; - server->wpages = (server->wsize + PAGE_SIZE - 1) >> PAGE_SHIFT; server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d7895eeccea3..7048f9b867ab 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -177,7 +177,6 @@ struct nfs_server { unsigned int rsize; /* read size */ unsigned int rpages; /* read size (in pages) */ unsigned int wsize; /* write size */ - unsigned int wpages; /* write size (in pages) */ unsigned int wtmult; /* server disk block size */ unsigned int dtsize; /* readdir size */ unsigned short port; /* "port=" setting */ From 74a33326cfe8e62ebe0a65ba01ea8a8bceb532f8 Mon Sep 17 00:00:00 2001 From: Anthony Iliopoulos Date: Fri, 13 Jun 2025 11:44:38 +0200 Subject: [PATCH 0669/2411] NFS: remove unused time_delta field from struct nfs_server The last code that was using this was removed via commit ca0daa277aca ("NFS: Cache aggressively when file is open for writing") which was merged in v4.8-rc1, so it can be removed completely. Signed-off-by: Anthony Iliopoulos Link: https://lore.kernel.org/r/20250613094439.82338-3-ailiop@suse.com Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 - include/linux/nfs_fs_sb.h | 1 - 2 files changed, 2 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 23dafc590476..47258dc3af70 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -830,7 +830,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, server->maxfilesize = fsinfo->maxfilesize; - server->time_delta = fsinfo->time_delta; server->change_attr_type = fsinfo->change_attr_type; server->clone_blksize = fsinfo->clone_blksize; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 7048f9b867ab..e1b2cf57e765 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -202,7 +202,6 @@ struct nfs_server { struct nfs_fsid fsid; int s_sysfs_id; /* sysfs dentry index */ __u64 maxfilesize; /* maximum file size */ - struct timespec64 time_delta; /* smallest time granularity */ unsigned long mount_time; /* when this fs was mounted */ struct super_block *super; /* VFS super block */ dev_t s_dev; /* superblock dev numbers */ From 2c665d91c2a2d8b5bdf1374d1253b3c89fca4ede Mon Sep 17 00:00:00 2001 From: Anthony Iliopoulos Date: Fri, 13 Jun 2025 11:44:39 +0200 Subject: [PATCH 0670/2411] NFS: remove unused pnfs_ld_data field from struct nfs_server The last code that was using this was removed via commit 20d655d6197d ("pnfs/blocklayout: use the device id cache") which was merged in v3.18-rc1, so it can be removed completely. Signed-off-by: Anthony Iliopoulos Link: https://lore.kernel.org/r/20250613094439.82338-4-ailiop@suse.com Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index e1b2cf57e765..d2d36711a119 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -246,7 +246,6 @@ struct nfs_server { filesystem */ struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ struct rpc_wait_queue roc_rpcwaitq; - void *pnfs_ld_data; /* per mount point data */ /* the following fields are protected by nfs_client->cl_lock */ struct rb_root state_owners; From 8c206b0a121e4195f892f298628791f3b848fef0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 18 Jun 2025 09:19:12 -0400 Subject: [PATCH 0671/2411] nfs: add cache_validity to the nfs_inode_event tracepoints Managing the cache_validity flags is the deep voodoo of NFS cache coherency. Let's have a little extra visibility into that value via the nfs_inode_event tracepoints. Reviewed-by: Benjamin Coddington Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20250618-nfs-tracepoints-v2-1-540c9fb48da2@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfstrace.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index f49f064c5ee5..96b1323318c2 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -57,6 +57,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event, __field(u32, fhandle) __field(u64, fileid) __field(u64, version) + __field(unsigned long, cache_validity) ), TP_fast_assign( @@ -65,14 +66,17 @@ DECLARE_EVENT_CLASS(nfs_inode_event, __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); + __entry->cache_validity = nfsi->cache_validity; ), TP_printk( - "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu ", + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu cache_validity=0x%lx (%s)", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - (unsigned long long)__entry->version + (unsigned long long)__entry->version, + __entry->cache_validity, + nfs_show_cache_validity(__entry->cache_validity) ) ); From 0139a30ada76d154ff48dfe7a1d2056f7d7ae023 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 18 Jun 2025 09:19:13 -0400 Subject: [PATCH 0672/2411] nfs: add a tracepoint to nfs_inode_detach_delegation_locked We have tracepoints for setting a delegation and reclaiming them. Add a tracepoint for when the delegation is being detached from the inode. Reviewed-by: Benjamin Coddington Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20250618-nfs-tracepoints-v2-2-540c9fb48da2@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 ++ fs/nfs/nfs4trace.h | 1 + 2 files changed, 3 insertions(+) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 10ef46e29b25..78a97d340bbd 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -355,6 +355,8 @@ nfs_detach_delegation_locked(struct nfs_inode *nfsi, rcu_dereference_protected(nfsi->delegation, lockdep_is_held(&clp->cl_lock)); + trace_nfs4_detach_delegation(&nfsi->vfs_inode, delegation->type); + if (deleg_cur == NULL || delegation != deleg_cur) return NULL; diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 9f69d7e14925..674f15e91c54 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -983,6 +983,7 @@ DECLARE_EVENT_CLASS(nfs4_set_delegation_event, TP_ARGS(inode, fmode)) DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_set_delegation); DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_reclaim_delegation); +DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_detach_delegation); TRACE_EVENT(nfs4_delegreturn_exit, TP_PROTO( From 5dd03d14b3a9595ea320a55c499ebf85b422392f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 18 Jun 2025 09:19:14 -0400 Subject: [PATCH 0673/2411] nfs: new tracepoint in nfs_delegation_need_return Add a tracepoint in the function that decides whether to return a delegation to the server. Reviewed-by: Benjamin Coddington Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20250618-nfs-tracepoints-v2-3-540c9fb48da2@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 ++ fs/nfs/nfs4trace.h | 47 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 78a97d340bbd..6f136c47eed7 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -594,6 +594,8 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation) { bool ret = false; + trace_nfs_delegation_need_return(delegation); + if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) ret = true; if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) || diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 674f15e91c54..6ab05261ce34 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -14,6 +14,8 @@ #include #include +#include "delegation.h" + #define show_nfs_fattr_flags(valid) \ __print_flags((unsigned long)valid, "|", \ { NFS_ATTR_FATTR_TYPE, "TYPE" }, \ @@ -985,6 +987,51 @@ DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_set_delegation); DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_reclaim_delegation); DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_detach_delegation); +#define show_delegation_flags(flags) \ + __print_flags(flags, "|", \ + { BIT(NFS_DELEGATION_NEED_RECLAIM), "NEED_RECLAIM" }, \ + { BIT(NFS_DELEGATION_RETURN), "RETURN" }, \ + { BIT(NFS_DELEGATION_RETURN_IF_CLOSED), "RETURN_IF_CLOSED" }, \ + { BIT(NFS_DELEGATION_REFERENCED), "REFERENCED" }, \ + { BIT(NFS_DELEGATION_RETURNING), "RETURNING" }, \ + { BIT(NFS_DELEGATION_REVOKED), "REVOKED" }, \ + { BIT(NFS_DELEGATION_TEST_EXPIRED), "TEST_EXPIRED" }, \ + { BIT(NFS_DELEGATION_INODE_FREEING), "INODE_FREEING" }, \ + { BIT(NFS_DELEGATION_RETURN_DELAYED), "RETURN_DELAYED" }) + +DECLARE_EVENT_CLASS(nfs4_delegation_event, + TP_PROTO( + const struct nfs_delegation *delegation + ), + + TP_ARGS(delegation), + + TP_STRUCT__entry( + __field(u32, fhandle) + __field(unsigned int, fmode) + __field(unsigned long, flags) + ), + + TP_fast_assign( + __entry->fhandle = nfs_fhandle_hash(NFS_FH(delegation->inode)); + __entry->fmode = delegation->type; + __entry->flags = delegation->flags; + ), + + TP_printk( + "fhandle=0x%08x fmode=%s flags=%s", + __entry->fhandle, show_fs_fmode_flags(__entry->fmode), + show_delegation_flags(__entry->flags) + ) +); +#define DEFINE_NFS4_DELEGATION_EVENT(name) \ + DEFINE_EVENT(nfs4_delegation_event, name, \ + TP_PROTO( \ + const struct nfs_delegation *delegation \ + ), \ + TP_ARGS(delegation)) +DEFINE_NFS4_DELEGATION_EVENT(nfs_delegation_need_return); + TRACE_EVENT(nfs4_delegreturn_exit, TP_PROTO( const struct nfs4_delegreturnargs *args, From b0b7cdc99431655aec3f3afcf05e3eeca0f8dd79 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 18 Jun 2025 09:19:15 -0400 Subject: [PATCH 0674/2411] nfs: new tracepoint in match_stateid operation Add new tracepoints in the NFSv4 match_stateid minorversion op that show the info in both stateids. Reviewed-by: Benjamin Coddington Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20250618-nfs-tracepoints-v2-4-540c9fb48da2@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 ++++ fs/nfs/nfs4trace.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 92f1b2601b67..ef2077e185b6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10692,6 +10692,8 @@ nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) static bool nfs41_match_stateid(const nfs4_stateid *s1, const nfs4_stateid *s2) { + trace_nfs41_match_stateid(s1, s2); + if (s1->type != s2->type) return false; @@ -10709,6 +10711,8 @@ static bool nfs41_match_stateid(const nfs4_stateid *s1, static bool nfs4_match_stateid(const nfs4_stateid *s1, const nfs4_stateid *s2) { + trace_nfs4_match_stateid(s1, s2); + return nfs4_stateid_match(s1, s2); } diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 6ab05261ce34..fe419147f60f 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -1524,6 +1524,63 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_recall); DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_layoutrecall_file); +#define show_stateid_type(type) \ + __print_symbolic(type, \ + { NFS4_INVALID_STATEID_TYPE, "INVALID" }, \ + { NFS4_SPECIAL_STATEID_TYPE, "SPECIAL" }, \ + { NFS4_OPEN_STATEID_TYPE, "OPEN" }, \ + { NFS4_LOCK_STATEID_TYPE, "LOCK" }, \ + { NFS4_DELEGATION_STATEID_TYPE, "DELEGATION" }, \ + { NFS4_LAYOUT_STATEID_TYPE, "LAYOUT" }, \ + { NFS4_PNFS_DS_STATEID_TYPE, "PNFS_DS" }, \ + { NFS4_REVOKED_STATEID_TYPE, "REVOKED" }, \ + { NFS4_FREED_STATEID_TYPE, "FREED" }) + +DECLARE_EVENT_CLASS(nfs4_match_stateid_event, + TP_PROTO( + const nfs4_stateid *s1, + const nfs4_stateid *s2 + ), + + TP_ARGS(s1, s2), + + TP_STRUCT__entry( + __field(int, s1_seq) + __field(int, s2_seq) + __field(u32, s1_hash) + __field(u32, s2_hash) + __field(int, s1_type) + __field(int, s2_type) + ), + + TP_fast_assign( + __entry->s1_seq = s1->seqid; + __entry->s1_hash = nfs_stateid_hash(s1); + __entry->s1_type = s1->type; + __entry->s2_seq = s2->seqid; + __entry->s2_hash = nfs_stateid_hash(s2); + __entry->s2_type = s2->type; + ), + + TP_printk( + "s1=%s:%x:%u s2=%s:%x:%u", + show_stateid_type(__entry->s1_type), + __entry->s1_hash, __entry->s1_seq, + show_stateid_type(__entry->s2_type), + __entry->s2_hash, __entry->s2_seq + ) +); + +#define DEFINE_NFS4_MATCH_STATEID_EVENT(name) \ + DEFINE_EVENT(nfs4_match_stateid_event, name, \ + TP_PROTO( \ + const nfs4_stateid *s1, \ + const nfs4_stateid *s2 \ + ), \ + TP_ARGS(s1, s2)) +DEFINE_NFS4_MATCH_STATEID_EVENT(nfs41_match_stateid); +DEFINE_NFS4_MATCH_STATEID_EVENT(nfs4_match_stateid); + DECLARE_EVENT_CLASS(nfs4_idmap_event, TP_PROTO( const char *name, From 72508db0fe1762f2cfcff1cb4cf28a8e645bdd43 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 24 Jun 2025 17:32:09 -0400 Subject: [PATCH 0675/2411] NFS: Allow folio migration for the case of mode == MIGRATE_SYNC When the mode is MIGRATE_SYNC, we are allowed to call nfs_wb_folio() under the folio lock. Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 374fc6b34c79..0dd22983f0d0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -2113,8 +2113,12 @@ int nfs_migrate_folio(struct address_space *mapping, struct folio *dst, * that we can safely release the inode reference while holding * the folio lock. */ - if (folio_test_private(src)) - return -EBUSY; + if (folio_test_private(src)) { + if (mode == MIGRATE_SYNC) + nfs_wb_folio(src->mapping->host, src); + if (folio_test_private(src)) + return -EBUSY; + } if (folio_test_private_2(src)) { /* [DEPRECATED] */ if (mode == MIGRATE_ASYNC) From 90c9550a8d65fb9b1bf87baf97a04ed91bf61b33 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 May 2025 13:50:55 +0200 Subject: [PATCH 0676/2411] NFS: support the kernel keyring for TLS Allow tlshd to use a per-mount key from the kernel keyring similar to NVMe over TCP. Note that tlshd expects keys and certificates stored in the kernel keyring to be in DER format, not the PEM format used for file based keys and certificates, so they need to be converted before they are added to the keyring, which is a bit unexpected. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Link: https://lore.kernel.org/r/20250515115107.33052-2-hch@lst.de Signed-off-by: Trond Myklebust --- fs/nfs/fs_context.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 13f71ca8c974..9e94d18448ff 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -96,6 +96,8 @@ enum nfs_param { Opt_wsize, Opt_write, Opt_xprtsec, + Opt_cert_serial, + Opt_privkey_serial, }; enum { @@ -221,6 +223,8 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = { fsparam_enum ("write", Opt_write, nfs_param_enums_write), fsparam_u32 ("wsize", Opt_wsize), fsparam_string("xprtsec", Opt_xprtsec), + fsparam_s32("cert_serial", Opt_cert_serial), + fsparam_s32("privkey_serial", Opt_privkey_serial), {} }; @@ -551,6 +555,32 @@ static int nfs_parse_version_string(struct fs_context *fc, return 0; } +#ifdef CONFIG_KEYS +static int nfs_tls_key_verify(key_serial_t key_id) +{ + struct key *key = key_lookup(key_id); + int error = 0; + + if (IS_ERR(key)) { + pr_err("key id %08x not found\n", key_id); + return PTR_ERR(key); + } + if (test_bit(KEY_FLAG_REVOKED, &key->flags) || + test_bit(KEY_FLAG_INVALIDATED, &key->flags)) { + pr_err("key id %08x revoked\n", key_id); + error = -EKEYREVOKED; + } + + key_put(key); + return error; +} +#else +static inline int nfs_tls_key_verify(key_serial_t key_id) +{ + return -ENOENT; +} +#endif /* CONFIG_KEYS */ + /* * Parse a single mount parameter. */ @@ -807,6 +837,18 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, if (ret < 0) return ret; break; + case Opt_cert_serial: + ret = nfs_tls_key_verify(result.int_32); + if (ret < 0) + return ret; + ctx->xprtsec.cert_serial = result.int_32; + break; + case Opt_privkey_serial: + ret = nfs_tls_key_verify(result.int_32); + if (ret < 0) + return ret; + ctx->xprtsec.privkey_serial = result.int_32; + break; case Opt_proto: if (!param->string) From 87268f7a4f1fb7243bba5a4aa6199720b54f72dd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 May 2025 13:50:56 +0200 Subject: [PATCH 0677/2411] nfs: create a kernel keyring Create a kernel .nfs keyring similar to the nvme .nvme one. Unlike for a userspace-created keyrind, tlshd is a possesor of the keys with this and thus the keys don't need user read permissions. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Link: https://lore.kernel.org/r/20250515115107.33052-3-hch@lst.de Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 4c7fa4f2bd5e..60fa0c8ff04e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2649,6 +2649,35 @@ static struct pernet_operations nfs_net_ops = { .size = sizeof(struct nfs_net), }; +#ifdef CONFIG_KEYS +static struct key *nfs_keyring; + +static int __init nfs_init_keyring(void) +{ + nfs_keyring = keyring_alloc(".nfs", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, + current_cred(), + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + (KEY_USR_ALL & ~KEY_USR_SETATTR), + KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); + return PTR_ERR_OR_ZERO(nfs_keyring); +} + +static void __exit nfs_exit_keyring(void) +{ + key_put(nfs_keyring); +} +#else +static inline int nfs_init_keyring(void) +{ + return 0; +} + +static inline void nfs_exit_keyring(void) +{ +} +#endif /* CONFIG_KEYS */ + /* * Initialize NFS */ @@ -2656,6 +2685,10 @@ static int __init init_nfs_fs(void) { int err; + err = nfs_init_keyring(); + if (err) + return err; + err = nfs_sysfs_init(); if (err < 0) goto out10; @@ -2716,6 +2749,7 @@ static int __init init_nfs_fs(void) out9: nfs_sysfs_exit(); out10: + nfs_exit_keyring(); return err; } @@ -2731,6 +2765,7 @@ static void __exit exit_nfs_fs(void) nfs_fs_proc_exit(); nfsiod_stop(); nfs_sysfs_exit(); + nfs_exit_keyring(); } /* Not quite true; I just maintain it */ From 48693d119b2114f8eaf8b8f972b29e05ae581ad4 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 13 Jul 2025 00:30:06 +0100 Subject: [PATCH 0678/2411] SUNRPC: Remove unused xdr functions Remove a bunch of unused xdr_*decode* functions: The last use of xdr_decode_netobj() was removed in 2021 by: commit 7cf96b6d0104 ("lockd: Update the NLMv4 SHARE arguments decoder to use struct xdr_stream") The last use of xdr_decode_string_inplace() was removed in 2021 by: commit 3049e974a7c7 ("lockd: Update the NLMv4 FREE_ALL arguments decoder to use struct xdr_stream") The last use of xdr_stream_decode_opaque() was removed in 2024 by: commit fed8a17c61ff ("xdrgen: typedefs should use the built-in string and opaque functions") The functions xdr_stream_decode_string() and xdr_stream_decode_opaque_dup() were both added in 2018 by the commit 0e779aa70308 ("SUNRPC: Add helpers for decoding opaque and string types") but never used. Remove them. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20250712233006.403226-1-linux@treblig.org Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 9 --- net/sunrpc/xdr.c | 110 ------------------------------------- 2 files changed, 119 deletions(-) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index a2ab813a9800..e370886632b0 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -128,10 +128,7 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) __be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len); __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len); __be32 *xdr_encode_string(__be32 *p, const char *s); -__be32 *xdr_decode_string_inplace(__be32 *p, char **sp, unsigned int *lenp, - unsigned int maxlen); __be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *); -__be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); void xdr_inline_pages(struct xdr_buf *, unsigned int, struct page **, unsigned int, unsigned int); @@ -341,12 +338,6 @@ xdr_stream_remaining(const struct xdr_stream *xdr) return xdr->nwords << 2; } -ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr, - size_t size); -ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr, - size_t maxlen, gfp_t gfp_flags); -ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, - size_t size); ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, size_t maxlen, gfp_t gfp_flags); ssize_t xdr_stream_decode_opaque_auth(struct xdr_stream *xdr, u32 *flavor, diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 2ea00e354ba6..a0aae1144212 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -37,19 +37,6 @@ xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj) } EXPORT_SYMBOL_GPL(xdr_encode_netobj); -__be32 * -xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj) -{ - unsigned int len; - - if ((len = be32_to_cpu(*p++)) > XDR_MAX_NETOBJ) - return NULL; - obj->len = len; - obj->data = (u8 *) p; - return p + XDR_QUADLEN(len); -} -EXPORT_SYMBOL_GPL(xdr_decode_netobj); - /** * xdr_encode_opaque_fixed - Encode fixed length opaque data * @p: pointer to current position in XDR buffer. @@ -102,21 +89,6 @@ xdr_encode_string(__be32 *p, const char *string) } EXPORT_SYMBOL_GPL(xdr_encode_string); -__be32 * -xdr_decode_string_inplace(__be32 *p, char **sp, - unsigned int *lenp, unsigned int maxlen) -{ - u32 len; - - len = be32_to_cpu(*p++); - if (len > maxlen) - return NULL; - *lenp = len; - *sp = (char *) p; - return p + XDR_QUADLEN(len); -} -EXPORT_SYMBOL_GPL(xdr_decode_string_inplace); - /** * xdr_terminate_string - '\0'-terminate a string residing in an xdr_buf * @buf: XDR buffer where string resides @@ -2247,88 +2219,6 @@ int xdr_process_buf(const struct xdr_buf *buf, unsigned int offset, } EXPORT_SYMBOL_GPL(xdr_process_buf); -/** - * xdr_stream_decode_opaque - Decode variable length opaque - * @xdr: pointer to xdr_stream - * @ptr: location to store opaque data - * @size: size of storage buffer @ptr - * - * Return values: - * On success, returns size of object stored in *@ptr - * %-EBADMSG on XDR buffer overflow - * %-EMSGSIZE on overflow of storage buffer @ptr - */ -ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr, size_t size) -{ - ssize_t ret; - void *p; - - ret = xdr_stream_decode_opaque_inline(xdr, &p, size); - if (ret <= 0) - return ret; - memcpy(ptr, p, ret); - return ret; -} -EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque); - -/** - * xdr_stream_decode_opaque_dup - Decode and duplicate variable length opaque - * @xdr: pointer to xdr_stream - * @ptr: location to store pointer to opaque data - * @maxlen: maximum acceptable object size - * @gfp_flags: GFP mask to use - * - * Return values: - * On success, returns size of object stored in *@ptr - * %-EBADMSG on XDR buffer overflow - * %-EMSGSIZE if the size of the object would exceed @maxlen - * %-ENOMEM on memory allocation failure - */ -ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr, - size_t maxlen, gfp_t gfp_flags) -{ - ssize_t ret; - void *p; - - ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen); - if (ret > 0) { - *ptr = kmemdup(p, ret, gfp_flags); - if (*ptr != NULL) - return ret; - ret = -ENOMEM; - } - *ptr = NULL; - return ret; -} -EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque_dup); - -/** - * xdr_stream_decode_string - Decode variable length string - * @xdr: pointer to xdr_stream - * @str: location to store string - * @size: size of storage buffer @str - * - * Return values: - * On success, returns length of NUL-terminated string stored in *@str - * %-EBADMSG on XDR buffer overflow - * %-EMSGSIZE on overflow of storage buffer @str - */ -ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, size_t size) -{ - ssize_t ret; - void *p; - - ret = xdr_stream_decode_opaque_inline(xdr, &p, size); - if (ret > 0) { - memcpy(str, p, ret); - str[ret] = '\0'; - return strlen(str); - } - *str = '\0'; - return ret; -} -EXPORT_SYMBOL_GPL(xdr_stream_decode_string); - /** * xdr_stream_decode_string_dup - Decode and duplicate variable length string * @xdr: pointer to xdr_stream From 3b3bc9a1f730dc24f9765dc70de65ad10888333e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 18 Feb 2025 21:52:50 +0000 Subject: [PATCH 0679/2411] NFS: Remove unused function nfs_umount nfs_umount() has been unused since 2013's commit 4580a92d44e2 ("NFS: Use server-recommended security flavor by default (NFSv3)") Remove it. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20250218215250.263709-1-linux@treblig.org Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 1 - fs/nfs/mount_clnt.c | 68 --------------------------------------------- 2 files changed, 69 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 69c2c10ee658..d55dce8bf043 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -207,7 +207,6 @@ struct nfs_mount_request { }; extern int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans); -extern void nfs_umount(const struct nfs_mount_request *info); /* client.c */ extern const struct rpc_program nfs_program; diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 57c9dd700b58..db8dfb920394 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -223,74 +223,6 @@ int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans) goto out; } -/** - * nfs_umount - Notify a server that we have unmounted this export - * @info: pointer to umount request arguments - * - * MOUNTPROC_UMNT is advisory, so we set a short timeout, and always - * use UDP. - */ -void nfs_umount(const struct nfs_mount_request *info) -{ - static const struct rpc_timeout nfs_umnt_timeout = { - .to_initval = 1 * HZ, - .to_maxval = 3 * HZ, - .to_retries = 2, - }; - struct rpc_create_args args = { - .net = info->net, - .protocol = IPPROTO_UDP, - .address = (struct sockaddr *)info->sap, - .addrsize = info->salen, - .timeout = &nfs_umnt_timeout, - .servername = info->hostname, - .program = &mnt_program, - .version = info->version, - .authflavor = RPC_AUTH_UNIX, - .flags = RPC_CLNT_CREATE_NOPING, - .cred = current_cred(), - }; - struct rpc_message msg = { - .rpc_argp = info->dirpath, - }; - struct rpc_clnt *clnt; - int status; - - if (strlen(info->dirpath) > MNTPATHLEN) - return; - - if (info->noresvport) - args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; - - clnt = rpc_create(&args); - if (IS_ERR(clnt)) - goto out_clnt_err; - - dprintk("NFS: sending UMNT request for %s:%s\n", - (info->hostname ? info->hostname : "server"), info->dirpath); - - if (info->version == NFS_MNT3_VERSION) - msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC3_UMNT]; - else - msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC_UMNT]; - - status = rpc_call_sync(clnt, &msg, 0); - rpc_shutdown_client(clnt); - - if (unlikely(status < 0)) - goto out_call_err; - - return; - -out_clnt_err: - dprintk("NFS: failed to create UMNT RPC client, status=%ld\n", - PTR_ERR(clnt)); - return; - -out_call_err: - dprintk("NFS: UMNT request failed, status=%d\n", status); -} - /* * XDR encode/decode functions for MOUNT */ From 9768797c219326699778fba9cd3b607b2f1e7950 Mon Sep 17 00:00:00 2001 From: Sergey Bashirov Date: Mon, 30 Jun 2025 21:35:26 +0300 Subject: [PATCH 0680/2411] pNFS: Fix uninited ptr deref in block/scsi layout The error occurs on the third attempt to encode extents. When function ext_tree_prepare_commit() reallocates a larger buffer to retry encoding extents, the "layoutupdate_pages" page array is initialized only after the retry loop. But ext_tree_free_commitdata() is called on every iteration and tries to put pages in the array, thus dereferencing uninitialized pointers. An additional problem is that there is no limit on the maximum possible buffer_size. When there are too many extents, the client may create a layoutcommit that is larger than the maximum possible RPC size accepted by the server. During testing, we observed two typical scenarios. First, one memory page for extents is enough when we work with small files, append data to the end of the file, or preallocate extents before writing. But when we fill a new large file without preallocating, the number of extents can be huge, and counting the number of written extents in ext_tree_encode_commit() does not help much. Since this number increases even more between unlocking and locking of ext_tree, the reallocated buffer may not be large enough again and again. Co-developed-by: Konstantin Evtushenko Signed-off-by: Konstantin Evtushenko Signed-off-by: Sergey Bashirov Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250630183537.196479-2-sergeybashirov@gmail.com Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/extent_tree.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c index 8f7cff7a4293..0add0f329816 100644 --- a/fs/nfs/blocklayout/extent_tree.c +++ b/fs/nfs/blocklayout/extent_tree.c @@ -552,6 +552,15 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, return ret; } +/** + * ext_tree_prepare_commit - encode extents that need to be committed + * @arg: layout commit data + * + * Return values: + * %0: Success, all required extents are encoded + * %-ENOSPC: Some extents are encoded, but not all, due to RPC size limit + * %-ENOMEM: Out of memory, extents not encoded + */ int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) { @@ -568,12 +577,12 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) start_p = page_address(arg->layoutupdate_page); arg->layoutupdate_pages = &arg->layoutupdate_page; -retry: - ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten); + ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, + &count, &arg->lastbytewritten); if (unlikely(ret)) { ext_tree_free_commitdata(arg, buffer_size); - buffer_size = ext_tree_layoutupdate_size(bl, count); + buffer_size = NFS_SERVER(arg->inode)->wsize; count = 0; arg->layoutupdate_pages = @@ -588,7 +597,8 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) return -ENOMEM; } - goto retry; + ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, + &count, &arg->lastbytewritten); } *start_p = cpu_to_be32(count); @@ -608,7 +618,7 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) } dprintk("%s found %zu ranges\n", __func__, count); - return 0; + return ret; } void From d84c4754f8740915da9977a282f72a3b2b0e0ac9 Mon Sep 17 00:00:00 2001 From: Sergey Bashirov Date: Mon, 30 Jun 2025 21:35:27 +0300 Subject: [PATCH 0681/2411] pNFS: Fix extent encoding in block/scsi layout The ext_tree_encode_commit() function may be called multiple times for the same file, layout, and last written byte if the provided buffer is not large enough to encode all extents in it. The first problem is that the last written byte field must be zeroed only on a successful call, otherwise we will lose its actual value and get an integer overflow on the next encoding attempt. The second problem is that we can't count and encode in one pass. The extent state changes during encoding, so if we return -ENOSPC but have already encoded some extents into a small buffer, they will not be re-encoded into a new larger buffer on the next try. As a result, the client never commits these extents to the server. Co-developed-by: Konstantin Evtushenko Signed-off-by: Konstantin Evtushenko Signed-off-by: Sergey Bashirov Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250630183537.196479-3-sergeybashirov@gmail.com Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/extent_tree.c | 80 +++++++++++++++++++++++++++++--- 1 file changed, 74 insertions(+), 6 deletions(-) diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c index 0add0f329816..faccd5caa149 100644 --- a/fs/nfs/blocklayout/extent_tree.c +++ b/fs/nfs/blocklayout/extent_tree.c @@ -520,10 +520,71 @@ static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p) return xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT); } -static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, +/** + * ext_tree_try_encode_commit - try to encode all extents into the buffer + * @bl: pointer to the layout + * @p: pointer to the output buffer + * @buffer_size: size of the output buffer + * @count: output pointer to the number of encoded extents + * @lastbyte: output pointer to the last written byte + * + * Return values: + * %0: Success, all required extents encoded, outputs are valid + * %-ENOSPC: Buffer too small, nothing encoded, outputs are invalid + */ +static int +ext_tree_try_encode_commit(struct pnfs_block_layout *bl, __be32 *p, size_t buffer_size, size_t *count, __u64 *lastbyte) { struct pnfs_block_extent *be; + + spin_lock(&bl->bl_ext_lock); + for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) { + if (be->be_state != PNFS_BLOCK_INVALID_DATA || + be->be_tag != EXTENT_WRITTEN) + continue; + + (*count)++; + if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) { + spin_unlock(&bl->bl_ext_lock); + return -ENOSPC; + } + } + for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) { + if (be->be_state != PNFS_BLOCK_INVALID_DATA || + be->be_tag != EXTENT_WRITTEN) + continue; + + if (bl->bl_scsi_layout) + p = encode_scsi_range(be, p); + else + p = encode_block_extent(be, p); + be->be_tag = EXTENT_COMMITTING; + } + *lastbyte = (bl->bl_lwb != 0) ? bl->bl_lwb - 1 : U64_MAX; + bl->bl_lwb = 0; + spin_unlock(&bl->bl_ext_lock); + + return 0; +} + +/** + * ext_tree_encode_commit - encode as much as possible extents into the buffer + * @bl: pointer to the layout + * @p: pointer to the output buffer + * @buffer_size: size of the output buffer + * @count: output pointer to the number of encoded extents + * @lastbyte: output pointer to the last written byte + * + * Return values: + * %0: Success, all required extents encoded, outputs are valid + * %-ENOSPC: Buffer too small, some extents are encoded, outputs are valid + */ +static int +ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, + size_t buffer_size, size_t *count, __u64 *lastbyte) +{ + struct pnfs_block_extent *be, *be_prev; int ret = 0; spin_lock(&bl->bl_ext_lock); @@ -534,9 +595,9 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, (*count)++; if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) { - /* keep counting.. */ + (*count)--; ret = -ENOSPC; - continue; + break; } if (bl->bl_scsi_layout) @@ -544,9 +605,16 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, else p = encode_block_extent(be, p); be->be_tag = EXTENT_COMMITTING; + be_prev = be; + } + if (!ret) { + *lastbyte = (bl->bl_lwb != 0) ? bl->bl_lwb - 1 : U64_MAX; + bl->bl_lwb = 0; + } else { + *lastbyte = be_prev->be_f_offset + be_prev->be_length; + *lastbyte <<= SECTOR_SHIFT; + *lastbyte -= 1; } - *lastbyte = bl->bl_lwb - 1; - bl->bl_lwb = 0; spin_unlock(&bl->bl_ext_lock); return ret; @@ -577,7 +645,7 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) start_p = page_address(arg->layoutupdate_page); arg->layoutupdate_pages = &arg->layoutupdate_page; - ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, + ret = ext_tree_try_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten); if (unlikely(ret)) { ext_tree_free_commitdata(arg, buffer_size); From 66642bbee595e5fa8fc4ce7c8706c3697da239fe Mon Sep 17 00:00:00 2001 From: Sergey Bashirov Date: Mon, 30 Jun 2025 21:35:28 +0300 Subject: [PATCH 0682/2411] pNFS: Add prepare commit trace to block/scsi layout Replace dprintk with trace event in ext_tree_prepare_commit() function. Co-developed-by: Konstantin Evtushenko Signed-off-by: Konstantin Evtushenko Signed-off-by: Sergey Bashirov Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250630183537.196479-4-sergeybashirov@gmail.com Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/extent_tree.c | 6 +++--- fs/nfs/nfs4trace.c | 1 + fs/nfs/nfs4trace.h | 34 ++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c index faccd5caa149..315949a7e92d 100644 --- a/fs/nfs/blocklayout/extent_tree.c +++ b/fs/nfs/blocklayout/extent_tree.c @@ -6,6 +6,7 @@ #include #include "blocklayout.h" +#include "../nfs4trace.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -637,8 +638,6 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) __be32 *start_p; int ret; - dprintk("%s enter\n", __func__); - arg->layoutupdate_page = alloc_page(GFP_NOFS); if (!arg->layoutupdate_page) return -ENOMEM; @@ -685,7 +684,8 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) } } - dprintk("%s found %zu ranges\n", __func__, count); + trace_bl_ext_tree_prepare_commit(ret, count, + arg->lastbytewritten, !!ret); return ret; } diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c index 436763a559cd..987c92d6364b 100644 --- a/fs/nfs/nfs4trace.c +++ b/fs/nfs/nfs4trace.c @@ -32,6 +32,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error); EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error); EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_commit_error); +EXPORT_TRACEPOINT_SYMBOL_GPL(bl_ext_tree_prepare_commit); EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg); EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg_err); EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_unreg); diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index fe419147f60f..9776d220cec3 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -2295,6 +2295,40 @@ TRACE_EVENT(ff_layout_commit_error, ) ); +TRACE_EVENT(bl_ext_tree_prepare_commit, + TP_PROTO( + int ret, + size_t count, + u64 lwb, + bool not_all_ranges + ), + + TP_ARGS(ret, count, lwb, not_all_ranges), + + TP_STRUCT__entry( + __field(int, ret) + __field(size_t, count) + __field(u64, lwb) + __field(bool, not_all_ranges) + ), + + TP_fast_assign( + __entry->ret = ret; + __entry->count = count; + __entry->lwb = lwb; + __entry->not_all_ranges = not_all_ranges; + ), + + TP_printk( + "ret=%d, found %zu ranges, lwb=%llu%s", + __entry->ret, + __entry->count, + __entry->lwb, + __entry->not_all_ranges ? ", not all ranges encoded" : + "" + ) +); + DECLARE_EVENT_CLASS(pnfs_bl_pr_key_class, TP_PROTO( const struct block_device *bdev, From d897d81671bc4615c80f4f3bd5e6b218f59df50c Mon Sep 17 00:00:00 2001 From: Sergey Bashirov Date: Mon, 30 Jun 2025 21:35:29 +0300 Subject: [PATCH 0683/2411] pNFS: Handle RPC size limit for layoutcommits When there are too many block extents for a layoutcommit, they may not all fit into the maximum-sized RPC. This patch allows the generic pnfs code to properly handle -ENOSPC returned by the block/scsi layout driver and trigger additional layoutcommits if necessary. Co-developed-by: Konstantin Evtushenko Signed-off-by: Konstantin Evtushenko Signed-off-by: Sergey Bashirov Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250630183537.196479-5-sergeybashirov@gmail.com Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 1a7ec68bde15..3fd0971bf16f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -3340,6 +3340,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) struct nfs_inode *nfsi = NFS_I(inode); loff_t end_pos; int status; + bool mark_as_dirty = false; if (!pnfs_layoutcommit_outstanding(inode)) return 0; @@ -3391,19 +3392,23 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) if (ld->prepare_layoutcommit) { status = ld->prepare_layoutcommit(&data->args); if (status) { - put_cred(data->cred); + if (status != -ENOSPC) + put_cred(data->cred); spin_lock(&inode->i_lock); set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); if (end_pos > nfsi->layout->plh_lwb) nfsi->layout->plh_lwb = end_pos; - goto out_unlock; + if (status != -ENOSPC) + goto out_unlock; + spin_unlock(&inode->i_lock); + mark_as_dirty = true; } } status = nfs4_proc_layoutcommit(data, sync); out: - if (status) + if (status || mark_as_dirty) mark_inode_dirty_sync(inode); dprintk("<-- %s status %d\n", __func__, status); return status; From 81438498a285759f31e843ac4800f82a5ce6521f Mon Sep 17 00:00:00 2001 From: Sergey Bashirov Date: Tue, 1 Jul 2025 15:21:48 +0300 Subject: [PATCH 0684/2411] pNFS: Fix stripe mapping in block/scsi layout Because of integer division, we need to carefully calculate the disk offset. Consider the example below for a stripe of 6 volumes, a chunk size of 4096, and an offset of 70000. chunk = div_u64(offset, dev->chunk_size) = 70000 / 4096 = 17 offset = chunk * dev->chunk_size = 17 * 4096 = 69632 disk_offset_wrong = div_u64(offset, dev->nr_children) = 69632 / 6 = 11605 disk_chunk = div_u64(chunk, dev->nr_children) = 17 / 6 = 2 disk_offset = disk_chunk * dev->chunk_size = 2 * 4096 = 8192 Signed-off-by: Sergey Bashirov Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250701122341.199112-1-sergeybashirov@gmail.com Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index cab8809f0e0f..44306ac22353 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -257,10 +257,11 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, struct pnfs_block_dev *child; u64 chunk; u32 chunk_idx; + u64 disk_chunk; u64 disk_offset; chunk = div_u64(offset, dev->chunk_size); - div_u64_rem(chunk, dev->nr_children, &chunk_idx); + disk_chunk = div_u64_rem(chunk, dev->nr_children, &chunk_idx); if (chunk_idx >= dev->nr_children) { dprintk("%s: invalid chunk idx %d (%lld/%lld)\n", @@ -273,7 +274,7 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, offset = chunk * dev->chunk_size; /* disk offset of the stripe */ - disk_offset = div_u64(offset, dev->nr_children); + disk_offset = disk_chunk * dev->chunk_size; child = &dev->children[chunk_idx]; child->map(child, disk_offset, map); From 7db6e66663681abda54f81d5916db3a3b8b1a13d Mon Sep 17 00:00:00 2001 From: Sergey Bashirov Date: Wed, 2 Jul 2025 16:32:21 +0300 Subject: [PATCH 0685/2411] pNFS: Fix disk addr range check in block/scsi layout At the end of the isect translation, disc_addr represents the physical disk offset. Thus, end calculated from disk_addr is also a physical disk offset. Therefore, range checking should be done using map->disk_offset, not map->start. Signed-off-by: Sergey Bashirov Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250702133226.212537-1-sergeybashirov@gmail.com Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 47189476b553..5d6edafbed20 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -149,8 +149,8 @@ do_add_page_to_bio(struct bio *bio, int npg, enum req_op op, sector_t isect, /* limit length to what the device mapping allows */ end = disk_addr + *len; - if (end >= map->start + map->len) - *len = map->start + map->len - disk_addr; + if (end >= map->disk_offset + map->len) + *len = map->disk_offset + map->len - disk_addr; retry: if (!bio) { From 86bc643afd72c28c25831c87df6e6d0b016c5004 Mon Sep 17 00:00:00 2001 From: Aaron Kling Date: Tue, 8 Jul 2025 02:30:42 -0500 Subject: [PATCH 0686/2411] efistub: Lower default log level Some uefi implementations will write the efistub logs to the display over a splash image. This is not desirable for debug and info logs, so lower the default efi log level to exclude them. Suggested-by: Ard Biesheuvel Signed-off-by: Aaron Kling Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/printk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/printk.c b/drivers/firmware/efi/libstub/printk.c index 3a67a2cea7bd..bc599212c05d 100644 --- a/drivers/firmware/efi/libstub/printk.c +++ b/drivers/firmware/efi/libstub/printk.c @@ -5,13 +5,13 @@ #include #include #include -#include /* For CONSOLE_LOGLEVEL_* */ +#include #include #include #include "efistub.h" -int efi_loglevel = CONSOLE_LOGLEVEL_DEFAULT; +int efi_loglevel = LOGLEVEL_NOTICE; /** * efi_char16_puts() - Write a UCS-2 encoded string to the console From 02eb7a8eee20b9ec6aafd5e17c5c41b53e8b13ef Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Fri, 11 Jul 2025 07:44:46 +0200 Subject: [PATCH 0687/2411] efi: add API doc entry for ovmf_debug_log Document the newly added sysfs ABI for accessing the in-memory debug log provided by OVMF EFI firmware (when enabled) Signed-off-by: Gerd Hoffmann Acked-by: Jonathan Corbet Signed-off-by: Ard Biesheuvel --- Documentation/ABI/testing/sysfs-firmware-efi | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-firmware-efi b/Documentation/ABI/testing/sysfs-firmware-efi index 5e4d0b27cdfe..927e362d4974 100644 --- a/Documentation/ABI/testing/sysfs-firmware-efi +++ b/Documentation/ABI/testing/sysfs-firmware-efi @@ -36,3 +36,10 @@ Description: Displays the content of the Runtime Configuration Interface Table version 2 on Dell EMC PowerEdge systems in binary format Users: It is used by Dell EMC OpenManage Server Administrator tool to populate BIOS setup page. + +What: /sys/firmware/efi/ovmf_debug_log +Date: July 2025 +Contact: Gerd Hoffmann , linux-efi@vger.kernel.org +Description: Displays the content of the OVMF debug log buffer. The file is + only present in case the firmware supports logging to a memory + buffer. From 8db1d772484dfa959044dd43dc28482c8c543b74 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 13 Jul 2025 22:21:43 -0700 Subject: [PATCH 0688/2411] perf ftrace latency: Add -e option to measure time between two events In addition to the function latency, it can measure events latencies. Some kernel tracepoints are paired and it's menningful to measure how long it takes between the two events. The latency is tracked for the same thread. Currently it only uses BPF to do the work but it can be lifted later. Instead of having separate a BPF program for each tracepoint, it only uses generic 'event_begin' and 'event_end' programs to attach to any (raw) tracepoints. $ sudo perf ftrace latency -a -b --hide-empty \ -e i915_request_wait_begin,i915_request_wait_end -- sleep 1 # DURATION | COUNT | GRAPH | 256 - 512 us | 4 | ###### | 2 - 4 ms | 2 | ### | 4 - 8 ms | 12 | ################### | 8 - 16 ms | 10 | ################ | # statistics (in usec) total time: 194915 avg time: 6961 max time: 12855 min time: 373 count: 28 Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250714052143.342851-1-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/Documentation/perf-ftrace.txt | 6 + tools/perf/builtin-ftrace.c | 50 +++++- tools/perf/util/bpf_ftrace.c | 75 ++++++--- tools/perf/util/bpf_skel/func_latency.bpf.c | 166 ++++++++++++-------- tools/perf/util/ftrace.h | 1 + 5 files changed, 214 insertions(+), 84 deletions(-) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index b77f58c4d2fd..914457853bcf 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -139,6 +139,12 @@ OPTIONS for 'perf ftrace latency' Set the function name to get the histogram. Unlike perf ftrace trace, it only allows single function to calculate the histogram. +-e:: +--events=:: + Set the pair of events to get the histogram. The histogram is calculated + by the time difference between the two events from the same thread. This + requires -b/--use-bpf option. + -b:: --use-bpf:: Use BPF to measure function latency instead of using the ftrace (it diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 3a253a1b9f45..e1f2f3fb1b08 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -1549,6 +1549,33 @@ static void delete_filter_func(struct list_head *head) } } +static int parse_filter_event(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + struct list_head *head = opt->value; + struct filter_entry *entry; + char *s, *p; + int ret = -ENOMEM; + + s = strdup(str); + if (s == NULL) + return -ENOMEM; + + while ((p = strsep(&s, ",")) != NULL) { + entry = malloc(sizeof(*entry) + strlen(p) + 1); + if (entry == NULL) + goto out; + + strcpy(entry->name, p); + list_add_tail(&entry->list, head); + } + ret = 0; + +out: + free(s); + return ret; +} + static int parse_buffer_size(const struct option *opt, const char *str, int unset) { @@ -1711,6 +1738,8 @@ int cmd_ftrace(int argc, const char **argv) const struct option latency_options[] = { OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", "Show latency of given function", parse_filter_func), + OPT_CALLBACK('e', "events", &ftrace.event_pair, "event1,event2", + "Show latency between the two events", parse_filter_event), #ifdef HAVE_BPF_SKEL OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf, "Use BPF to measure function latency"), @@ -1763,6 +1792,7 @@ int cmd_ftrace(int argc, const char **argv) INIT_LIST_HEAD(&ftrace.notrace); INIT_LIST_HEAD(&ftrace.graph_funcs); INIT_LIST_HEAD(&ftrace.nograph_funcs); + INIT_LIST_HEAD(&ftrace.event_pair); signal(SIGINT, sig_handler); signal(SIGUSR1, sig_handler); @@ -1817,9 +1847,24 @@ int cmd_ftrace(int argc, const char **argv) cmd_func = __cmd_ftrace; break; case PERF_FTRACE_LATENCY: - if (list_empty(&ftrace.filters)) { - pr_err("Should provide a function to measure\n"); + if (list_empty(&ftrace.filters) && list_empty(&ftrace.event_pair)) { + pr_err("Should provide a function or events to measure\n"); parse_options_usage(ftrace_usage, options, "T", 1); + parse_options_usage(NULL, options, "e", 1); + ret = -EINVAL; + goto out_delete_filters; + } + if (!list_empty(&ftrace.filters) && !list_empty(&ftrace.event_pair)) { + pr_err("Please specify either of function or events\n"); + parse_options_usage(ftrace_usage, options, "T", 1); + parse_options_usage(NULL, options, "e", 1); + ret = -EINVAL; + goto out_delete_filters; + } + if (!list_empty(&ftrace.event_pair) && !ftrace.target.use_bpf) { + pr_err("Event processing needs BPF\n"); + parse_options_usage(ftrace_usage, options, "b", 1); + parse_options_usage(NULL, options, "e", 1); ret = -EINVAL; goto out_delete_filters; } @@ -1910,6 +1955,7 @@ int cmd_ftrace(int argc, const char **argv) delete_filter_func(&ftrace.notrace); delete_filter_func(&ftrace.graph_funcs); delete_filter_func(&ftrace.nograph_funcs); + delete_filter_func(&ftrace.event_pair); return ret; } diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c index 7324668cc83e..0cb02412043c 100644 --- a/tools/perf/util/bpf_ftrace.c +++ b/tools/perf/util/bpf_ftrace.c @@ -21,16 +21,27 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) { int fd, err; int i, ncpus = 1, ntasks = 1; - struct filter_entry *func; + struct filter_entry *func = NULL; - if (!list_is_singular(&ftrace->filters)) { - pr_err("ERROR: %s target function(s).\n", - list_empty(&ftrace->filters) ? "No" : "Too many"); - return -1; + if (!list_empty(&ftrace->filters)) { + if (!list_is_singular(&ftrace->filters)) { + pr_err("ERROR: Too many target functions.\n"); + return -1; + } + func = list_first_entry(&ftrace->filters, struct filter_entry, list); + } else { + int count = 0; + struct list_head *pos; + + list_for_each(pos, &ftrace->event_pair) + count++; + + if (count != 2) { + pr_err("ERROR: Needs two target events.\n"); + return -1; + } } - func = list_first_entry(&ftrace->filters, struct filter_entry, list); - skel = func_latency_bpf__open(); if (!skel) { pr_err("Failed to open func latency skeleton\n"); @@ -93,20 +104,44 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) skel->bss->min = INT64_MAX; - skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, - false, func->name); - if (IS_ERR(skel->links.func_begin)) { - pr_err("Failed to attach fentry program\n"); - err = PTR_ERR(skel->links.func_begin); - goto out; - } + if (func) { + skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, + false, func->name); + if (IS_ERR(skel->links.func_begin)) { + pr_err("Failed to attach fentry program\n"); + err = PTR_ERR(skel->links.func_begin); + goto out; + } - skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end, - true, func->name); - if (IS_ERR(skel->links.func_end)) { - pr_err("Failed to attach fexit program\n"); - err = PTR_ERR(skel->links.func_end); - goto out; + skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end, + true, func->name); + if (IS_ERR(skel->links.func_end)) { + pr_err("Failed to attach fexit program\n"); + err = PTR_ERR(skel->links.func_end); + goto out; + } + } else { + struct filter_entry *event; + + event = list_first_entry(&ftrace->event_pair, struct filter_entry, list); + + skel->links.event_begin = bpf_program__attach_raw_tracepoint(skel->progs.event_begin, + event->name); + if (IS_ERR(skel->links.event_begin)) { + pr_err("Failed to attach first tracepoint program\n"); + err = PTR_ERR(skel->links.event_begin); + goto out; + } + + event = list_next_entry(event, list); + + skel->links.event_end = bpf_program__attach_raw_tracepoint(skel->progs.event_end, + event->name); + if (IS_ERR(skel->links.event_end)) { + pr_err("Failed to attach second tracepoint program\n"); + err = PTR_ERR(skel->links.event_end); + goto out; + } } /* XXX: we don't actually use this fd - just for poll() */ diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c index e731a79a753a..621e2022c8bc 100644 --- a/tools/perf/util/bpf_skel/func_latency.bpf.c +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c @@ -52,34 +52,89 @@ const volatile unsigned int min_latency; const volatile unsigned int max_latency; const volatile unsigned int bucket_num = NUM_BUCKET; -SEC("kprobe/func") -int BPF_PROG(func_begin) +static bool can_record(void) { - __u64 key, now; - - if (!enabled) - return 0; - - key = bpf_get_current_pid_tgid(); - if (has_cpu) { __u32 cpu = bpf_get_smp_processor_id(); __u8 *ok; ok = bpf_map_lookup_elem(&cpu_filter, &cpu); if (!ok) - return 0; + return false; } if (has_task) { - __u32 pid = key & 0xffffffff; + __u32 pid = bpf_get_current_pid_tgid(); __u8 *ok; ok = bpf_map_lookup_elem(&task_filter, &pid); if (!ok) - return 0; + return false; + } + return true; +} + +static void update_latency(__s64 delta) +{ + __u64 val = delta; + __u32 key = 0; + __u64 *hist; + __u64 cmp_base = use_nsec ? 1 : 1000; + + if (delta < 0) + return; + + if (bucket_range != 0) { + val = delta / cmp_base; + + if (min_latency > 0) { + if (val > min_latency) + val -= min_latency; + else + goto do_lookup; + } + + // Less than 1 unit (ms or ns), or, in the future, + // than the min latency desired. + if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units ) + key = val / bucket_range + 1; + if (key >= bucket_num) + key = bucket_num - 1; + } + + goto do_lookup; + } + // calculate index using delta + for (key = 0; key < (bucket_num - 1); key++) { + if (delta < (cmp_base << key)) + break; } +do_lookup: + hist = bpf_map_lookup_elem(&latency, &key); + if (!hist) + return; + + __sync_fetch_and_add(hist, 1); + + __sync_fetch_and_add(&total, delta); // always in nsec + __sync_fetch_and_add(&count, 1); + + if (delta > max) + max = delta; + if (delta < min) + min = delta; +} + +SEC("kprobe/func") +int BPF_PROG(func_begin) +{ + __u64 key, now; + + if (!enabled || !can_record()) + return 0; + + key = bpf_get_current_pid_tgid(); now = bpf_ktime_get_ns(); // overwrite timestamp for nested functions @@ -92,7 +147,6 @@ int BPF_PROG(func_end) { __u64 tid; __u64 *start; - __u64 cmp_base = use_nsec ? 1 : 1000; if (!enabled) return 0; @@ -101,56 +155,44 @@ int BPF_PROG(func_end) start = bpf_map_lookup_elem(&functime, &tid); if (start) { - __s64 delta = bpf_ktime_get_ns() - *start; - __u64 val = delta; - __u32 key = 0; - __u64 *hist; - + update_latency(bpf_ktime_get_ns() - *start); + bpf_map_delete_elem(&functime, &tid); + } + + return 0; +} + +SEC("raw_tp") +int BPF_PROG(event_begin) +{ + __u64 key, now; + + if (!enabled || !can_record()) + return 0; + + key = bpf_get_current_pid_tgid(); + now = bpf_ktime_get_ns(); + + // overwrite timestamp for nested events + bpf_map_update_elem(&functime, &key, &now, BPF_ANY); + return 0; +} + +SEC("raw_tp") +int BPF_PROG(event_end) +{ + __u64 tid; + __u64 *start; + + if (!enabled) + return 0; + + tid = bpf_get_current_pid_tgid(); + + start = bpf_map_lookup_elem(&functime, &tid); + if (start) { + update_latency(bpf_ktime_get_ns() - *start); bpf_map_delete_elem(&functime, &tid); - - if (delta < 0) - return 0; - - if (bucket_range != 0) { - val = delta / cmp_base; - - if (min_latency > 0) { - if (val > min_latency) - val -= min_latency; - else - goto do_lookup; - } - - // Less than 1 unit (ms or ns), or, in the future, - // than the min latency desired. - if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units ) - key = val / bucket_range + 1; - if (key >= bucket_num) - key = bucket_num - 1; - } - - goto do_lookup; - } - // calculate index using delta - for (key = 0; key < (bucket_num - 1); key++) { - if (delta < (cmp_base << key)) - break; - } - -do_lookup: - hist = bpf_map_lookup_elem(&latency, &key); - if (!hist) - return 0; - - __sync_fetch_and_add(hist, 1); - - __sync_fetch_and_add(&total, delta); // always in nsec - __sync_fetch_and_add(&count, 1); - - if (delta > max) - max = delta; - if (delta < min) - min = delta; } return 0; diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h index a9bc47da83a5..3f5094ac5908 100644 --- a/tools/perf/util/ftrace.h +++ b/tools/perf/util/ftrace.h @@ -17,6 +17,7 @@ struct perf_ftrace { struct list_head notrace; struct list_head graph_funcs; struct list_head nograph_funcs; + struct list_head event_pair; struct hashmap *profile_hash; unsigned long percpu_buffer_size; bool inherit; From 12b3d697c812aaf356e82d9e1f351fbb2ea97500 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 11 Jul 2025 17:15:27 +0200 Subject: [PATCH 0689/2411] cxl: Remove core/acpi.c and cxl core dependency on ACPI From Dave [1]: """ It was a mistake to introduce core/acpi.c and putting ACPI dependency on cxl_core when adding the extended linear cache support. """ Current implementation calls hmat_get_extended_linear_cache_size() of the ACPI subsystem. That external reference causes issue running cxl_test as there is no way to "mock" that function and ignore it when using cxl test. Instead of working around that using cxlrd ops and extensively expanding cxl_test code [1], just move HMAT calls out of the core module to cxl_acpi. Implement this by adding a @cache_size member to struct cxl_root_decoder. During initialization the cache size is determined and added to the root decoder object in cxl_acpi. Later on in cxl_core the cache_size parameter is used to setup extended linear caching. [1] https://patch.msgid.link/20250610172938.139428-1-dave.jiang@intel.com [ dj: Remove core/acpi.o from tools/testing/cxl/Kbuild ] [ dj: Add kdoc for cxlrd->cache_size ] Cc: Dave Jiang Signed-off-by: Robert Richter Reviewed-by: Alison Schofield Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20250711151529.787470-1-rrichter@amd.com Signed-off-by: Dave Jiang --- drivers/cxl/acpi.c | 59 +++++++++++++++++++++++++++++++++++++++ drivers/cxl/core/Makefile | 1 - drivers/cxl/core/acpi.c | 11 -------- drivers/cxl/core/core.h | 2 -- drivers/cxl/core/region.c | 7 +---- drivers/cxl/cxl.h | 2 ++ tools/testing/cxl/Kbuild | 1 - 7 files changed, 62 insertions(+), 21 deletions(-) delete mode 100644 drivers/cxl/core/acpi.c diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index a1a99ec3f12c..712624cba2b6 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -335,6 +335,63 @@ static int add_or_reset_cxl_resource(struct resource *parent, struct resource *r return rc; } +static int cxl_acpi_set_cache_size(struct cxl_root_decoder *cxlrd) +{ + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; + struct range *hpa = &cxld->hpa_range; + resource_size_t size = range_len(hpa); + resource_size_t start = hpa->start; + resource_size_t cache_size; + struct resource res; + int nid, rc; + + res = DEFINE_RES(start, size, 0); + nid = phys_to_target_node(start); + + rc = hmat_get_extended_linear_cache_size(&res, nid, &cache_size); + if (rc) + return rc; + + /* + * The cache range is expected to be within the CFMWS. + * Currently there is only support cache_size == cxl_size. CXL + * size is then half of the total CFMWS window size. + */ + size = size >> 1; + if (cache_size && size != cache_size) { + dev_warn(&cxld->dev, + "Extended Linear Cache size %pa != CXL size %pa. No Support!", + &cache_size, &size); + return -ENXIO; + } + + cxlrd->cache_size = cache_size; + + return 0; +} + +static void cxl_setup_extended_linear_cache(struct cxl_root_decoder *cxlrd) +{ + int rc; + + rc = cxl_acpi_set_cache_size(cxlrd); + if (!rc) + return; + + if (rc != -EOPNOTSUPP) { + /* + * Failing to support extended linear cache region resize does not + * prevent the region from functioning. Only causes cxl list showing + * incorrect region size. + */ + dev_warn(cxlrd->cxlsd.cxld.dev.parent, + "Extended linear cache calculation failed rc:%d\n", rc); + } + + /* Ignoring return code */ + cxlrd->cache_size = 0; +} + DEFINE_FREE(put_cxlrd, struct cxl_root_decoder *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->cxlsd.cxld.dev)) DEFINE_FREE(del_cxl_resource, struct resource *, if (_T) del_cxl_resource(_T)) @@ -394,6 +451,8 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, ig = CXL_DECODER_MIN_GRANULARITY; cxld->interleave_granularity = ig; + cxl_setup_extended_linear_cache(cxlrd); + if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) { if (ways != 1 && ways != 3) { cxims_ctx = (struct cxl_cxims_context) { diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 79e2ef81fde8..5ad8fef210b5 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -15,7 +15,6 @@ cxl_core-y += hdm.o cxl_core-y += pmu.o cxl_core-y += cdat.o cxl_core-y += ras.o -cxl_core-y += acpi.o cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o cxl_core-$(CONFIG_CXL_MCE) += mce.o diff --git a/drivers/cxl/core/acpi.c b/drivers/cxl/core/acpi.c deleted file mode 100644 index f13b4dae6ac5..000000000000 --- a/drivers/cxl/core/acpi.c +++ /dev/null @@ -1,11 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright(c) 2024 Intel Corporation. All rights reserved. */ -#include -#include "cxl.h" -#include "core.h" - -int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res, - int nid, resource_size_t *size) -{ - return hmat_get_extended_linear_cache_size(backing_res, nid, size); -} diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 6b78b10da3e1..2250c05cecc3 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -121,8 +121,6 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, int cxl_ras_init(void); void cxl_ras_exit(void); int cxl_gpf_port_setup(struct cxl_dport *dport); -int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res, - int nid, resource_size_t *size); #ifdef CONFIG_CXL_FEATURES struct cxl_feat_entry * diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 91ff3a495fbd..08ac7f483562 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3282,15 +3282,10 @@ static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr, { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_region_params *p = &cxlr->params; - int nid = phys_to_target_node(res->start); resource_size_t size = resource_size(res); resource_size_t cache_size, start; - int rc; - - rc = cxl_acpi_get_extended_linear_cache_size(res, nid, &cache_size); - if (rc) - return rc; + cache_size = cxlrd->cache_size; if (!cache_size) return 0; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index e7b66ca1d423..0730f92df038 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -423,6 +423,7 @@ typedef u64 (*cxl_hpa_to_spa_fn)(struct cxl_root_decoder *cxlrd, u64 hpa); /** * struct cxl_root_decoder - Static platform CXL address decoder * @res: host / parent resource for region allocations + * @cache_size: extended linear cache size if exists, otherwise zero. * @region_id: region id for next region provisioning event * @hpa_to_spa: translate CXL host-physical-address to Platform system-physical-address * @platform_data: platform specific configuration data @@ -432,6 +433,7 @@ typedef u64 (*cxl_hpa_to_spa_fn)(struct cxl_root_decoder *cxlrd, u64 hpa); */ struct cxl_root_decoder { struct resource *res; + resource_size_t cache_size; atomic_t region_id; cxl_hpa_to_spa_fn hpa_to_spa; void *platform_data; diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 31a2d73c963f..d07f14cb7aa4 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -62,7 +62,6 @@ cxl_core-y += $(CXL_CORE_SRC)/hdm.o cxl_core-y += $(CXL_CORE_SRC)/pmu.o cxl_core-y += $(CXL_CORE_SRC)/cdat.o cxl_core-y += $(CXL_CORE_SRC)/ras.o -cxl_core-y += $(CXL_CORE_SRC)/acpi.o cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o From 91703041697c9d2e8dffe5b3a159198ba0dd24e7 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 4 Jul 2025 09:38:33 +0200 Subject: [PATCH 0690/2411] PCI: Allow built-in drivers to use async initial probing The PCI core has historically not allowed built-in drivers to opt in to async initial probing: Drivers may set "PROBE_PREFER_ASYNCHRONOUS", but initial probing always happens synchronously. That's because the PCI core uses device_attach() instead of device_initial_probe(). Should a driver return -EPROBE_DEFER on initial probe, reprobing later on does honor the PROBE_PREFER_ASYNCHRONOUS setting. Modular drivers are also allowed to probe asynchronously, which is inconsistent. The choice of device_attach() is likely not deliberate: It was introduced in 2013 with commit 58d9a38f6fac ("PCI: Skip attaching driver in device_add()"), but asynchronous probing was added two years later with commit 765230b5f084 ("driver-core: add asynchronous probing support for drivers"). According to the kernel-doc of "enum probe_type", "the end goal is to switch the kernel to use asynchronous probing by default". To this end, use device_initial_probe() to allow asynchronous initial probing. The function returns void, making the return value check unnecessary. Initial PCI probing often takes on the order of seconds even on laptops, so this may speed up booting significantly. A small number of PCI drivers already opt in to asynchronous probing. Their maintainers (who are all cc'ed) should watch out for issues, now that asynchronous probing is not just allowed for deferred and modular probing, but also initial probing: hl_pci_driver drivers/accel/habanalabs/common/habanalabs_drv.c cxl_pci_driver drivers/cxl/pci.c quicki2c_driver drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c quickspi_driver drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c i801_driver drivers/i2c/busses/i2c-i801.c mei_me_driver drivers/misc/mei/pci-me.c mei_vsc_drv drivers/misc/mei/platform-vsc.c sdhci_driver drivers/mmc/host/sdhci-pci-core.c nvme_driver drivers/nvme/host/pci.c ehci_pci_driver drivers/usb/host/ehci-pci.c hvfb_pci_stub_driver drivers/video/fbdev/hyperv_fb.c All other driver maintainers may test asynchronous probing by specifying the command line parameter "driver_async_probe=drv_name1,drv_name2,...", and on success setting "probe_type = PROBE_PREFER_ASYNCHRONOUS" in the pci_driver struct. Signed-off-by: Lukas Wunner [bhelgaas: updated commit log per https://lore.kernel.org/r/aHYUh7WoDlhHckxd@wunner.de] Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/53abe6f5ac7c631f95f5d061aa748b192eda0379.1751614426.git.lukas@wunner.de --- drivers/pci/bus.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 69048869ef1c..b77fd30bbfd9 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -341,7 +341,6 @@ void pci_bus_add_device(struct pci_dev *dev) { struct device_node *dn = dev->dev.of_node; struct platform_device *pdev; - int retval; /* * Can not put in pci_device_add yet because resources @@ -372,9 +371,7 @@ void pci_bus_add_device(struct pci_dev *dev) if (!dn || of_device_is_available(dn)) pci_dev_allow_binding(dev); - retval = device_attach(&dev->dev); - if (retval < 0 && retval != -EPROBE_DEFER) - pci_warn(dev, "device attach failed (%d)\n", retval); + device_initial_probe(&dev->dev); pci_dev_assign_added(dev); } From c523fa63ac1d452abeeb4e699560ec3365037f32 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Tue, 8 Jul 2025 17:10:02 +0800 Subject: [PATCH 0691/2411] PCI: imx6: Add IMX8MQ_EP third 64-bit BAR in epc_features IMX8MQ_EP has three 64-bit BAR0/2/4 capable and programmable BARs. For IMX8MQ_EP, use imx8q_pcie_epc_features (64-bit BARs 0, 2, 4) instead of imx8m_pcie_epc_features (64-bit BARs 0, 2). Fixes: 75c2f26da03f ("PCI: imx6: Add i.MX PCIe EP mode support") Signed-off-by: Richard Zhu [bhelgaas: add details in subject] Signed-off-by: Bjorn Helgaas Reviewed-by: Frank Li Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250708091003.2582846-2-hongxing.zhu@nxp.com --- drivers/pci/controller/dwc/pci-imx6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 5a38cfaf989b..7d15bcb7c107 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -1912,7 +1912,7 @@ static const struct imx_pcie_drvdata drvdata[] = { .mode_mask[0] = IMX6Q_GPR12_DEVICE_TYPE, .mode_off[1] = IOMUXC_GPR12, .mode_mask[1] = IMX8MQ_GPR12_PCIE2_CTRL_DEVICE_TYPE, - .epc_features = &imx8m_pcie_epc_features, + .epc_features = &imx8q_pcie_epc_features, .init_phy = imx8mq_pcie_init_phy, .enable_ref_clk = imx8mm_pcie_enable_ref_clk, }, From 399444a87acdea5d21c218bc8e9b621fea1cd218 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Tue, 8 Jul 2025 17:10:03 +0800 Subject: [PATCH 0692/2411] PCI: imx6: Add IMX8MM_EP and IMX8MP_EP fixed 256-byte BAR 4 in epc_features For IMX8MM_EP and IMX8MP_EP, add fixed 256-byte BAR 4 and reserved BAR 5 in imx8m_pcie_epc_features. Fixes: 75c2f26da03f ("PCI: imx6: Add i.MX PCIe EP mode support") Signed-off-by: Richard Zhu [bhelgaas: add details in subject] Signed-off-by: Bjorn Helgaas Reviewed-by: Frank Li Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250708091003.2582846-3-hongxing.zhu@nxp.com --- drivers/pci/controller/dwc/pci-imx6.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 7d15bcb7c107..9754cc6e09b9 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -1385,6 +1385,8 @@ static const struct pci_epc_features imx8m_pcie_epc_features = { .msix_capable = false, .bar[BAR_1] = { .type = BAR_RESERVED, }, .bar[BAR_3] = { .type = BAR_RESERVED, }, + .bar[BAR_4] = { .type = BAR_FIXED, .fixed_size = SZ_256, }, + .bar[BAR_5] = { .type = BAR_RESERVED, }, .align = SZ_64K, }; From 0c927d478486fbc96a225aeae6705e7152700c87 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 1 Jul 2025 10:55:38 -0700 Subject: [PATCH 0693/2411] ARM: dts: broadcom: Fix bcm7445 memory controller compatible The memory controller node compatible string was incompletely specified and used the fallback compatible. After commit 501be7cecec9 ("dt-bindings: memory-controller: Define fallback compatible") however, we need to fully specify the compatible string. Fixes: 501be7cecec9 ("dt-bindings: memory-controller: Define fallback compatible") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507011302.ZqNlBKWX-lkp@intel.com/ Link: https://lore.kernel.org/r/20250701175538.1633435-1-florian.fainelli@broadcom.com Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/broadcom/bcm7445.dtsi | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/broadcom/bcm7445.dtsi b/arch/arm/boot/dts/broadcom/bcm7445.dtsi index 5ac2042515b8..c6307c7437e3 100644 --- a/arch/arm/boot/dts/broadcom/bcm7445.dtsi +++ b/arch/arm/boot/dts/broadcom/bcm7445.dtsi @@ -237,7 +237,8 @@ memc@0 { ranges = <0x0 0x0 0x80000>; memc-ddr@2000 { - compatible = "brcm,brcmstb-memc-ddr"; + compatible = "brcm,brcmstb-memc-ddr-rev-b.1.x", + "brcm,brcmstb-memc-ddr"; reg = <0x2000 0x800>; }; @@ -259,7 +260,8 @@ memc@80000 { ranges = <0x0 0x80000 0x80000>; memc-ddr@2000 { - compatible = "brcm,brcmstb-memc-ddr"; + compatible = "brcm,brcmstb-memc-ddr-rev-b.1.x", + "brcm,brcmstb-memc-ddr"; reg = <0x2000 0x800>; }; @@ -281,7 +283,8 @@ memc@100000 { ranges = <0x0 0x100000 0x80000>; memc-ddr@2000 { - compatible = "brcm,brcmstb-memc-ddr"; + compatible = "brcm,brcmstb-memc-ddr-rev-b.1.x", + "brcm,brcmstb-memc-ddr"; reg = <0x2000 0x800>; }; From d7c7c051e8e5f781c98310709f3feaf7e634251b Mon Sep 17 00:00:00 2001 From: Mayank Rana Date: Mon, 16 Jun 2025 15:42:58 -0700 Subject: [PATCH 0694/2411] dt-bindings: PCI: qcom,pcie-sa8255p: Document ECAM compliant PCIe root complex Document the required configuration to enable the PCIe Root Complex on SA8255p, which is managed by firmware using power-domain based handling and configured as ECAM compliant. Signed-off-by: Mayank Rana Signed-off-by: Manivannan Sadhasivam [bhelgaas: add "ECAM" in reg description] Signed-off-by: Bjorn Helgaas Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250616224259.3549811-4-mayank.rana@oss.qualcomm.com --- .../bindings/pci/qcom,pcie-sa8255p.yaml | 122 ++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 Documentation/devicetree/bindings/pci/qcom,pcie-sa8255p.yaml diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie-sa8255p.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie-sa8255p.yaml new file mode 100644 index 000000000000..ef705a02fcd9 --- /dev/null +++ b/Documentation/devicetree/bindings/pci/qcom,pcie-sa8255p.yaml @@ -0,0 +1,122 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pci/qcom,pcie-sa8255p.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm SA8255p based firmware managed and ECAM compliant PCIe Root Complex + +maintainers: + - Bjorn Andersson + - Manivannan Sadhasivam + +description: + Qualcomm SA8255p SoC PCIe root complex controller is based on the Synopsys + DesignWare PCIe IP which is managed by firmware, and configured in ECAM mode. + +properties: + compatible: + const: qcom,pcie-sa8255p + + reg: + description: + The base address and size of the ECAM area for accessing PCI + Configuration Space, as accessed from the parent bus. The base + address corresponds to the first bus in the "bus-range" property. If + no "bus-range" is specified, this will be bus 0 (the default). + maxItems: 1 + + ranges: + description: + As described in IEEE Std 1275-1994, but must provide at least a + definition of non-prefetchable memory. One or both of prefetchable Memory + may also be provided. + minItems: 1 + maxItems: 2 + + interrupts: + minItems: 8 + maxItems: 8 + + interrupt-names: + items: + - const: msi0 + - const: msi1 + - const: msi2 + - const: msi3 + - const: msi4 + - const: msi5 + - const: msi6 + - const: msi7 + + power-domains: + maxItems: 1 + + dma-coherent: true + iommu-map: true + +required: + - compatible + - reg + - ranges + - power-domains + - interrupts + - interrupt-names + +allOf: + - $ref: /schemas/pci/pci-host-bridge.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + + soc { + #address-cells = <2>; + #size-cells = <2>; + + pci@1c00000 { + compatible = "qcom,pcie-sa8255p"; + reg = <0x4 0x00000000 0 0x10000000>; + device_type = "pci"; + #address-cells = <3>; + #size-cells = <2>; + ranges = <0x02000000 0x0 0x40100000 0x0 0x40100000 0x0 0x1ff00000>, + <0x43000000 0x4 0x10100000 0x4 0x10100000 0x0 0x40000000>; + bus-range = <0x00 0xff>; + dma-coherent; + linux,pci-domain = <0>; + power-domains = <&scmi5_pd 0>; + iommu-map = <0x0 &pcie_smmu 0x0000 0x1>, + <0x100 &pcie_smmu 0x0001 0x1>; + interrupt-parent = <&intc>; + interrupts = , + , + , + , + , + , + , + ; + interrupt-names = "msi0", "msi1", "msi2", "msi3", + "msi4", "msi5", "msi6", "msi7"; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 0 0x7>; + interrupt-map = <0 0 0 1 &intc GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &intc GIC_SPI 149 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &intc GIC_SPI 150 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &intc GIC_SPI 151 IRQ_TYPE_LEVEL_HIGH>; + + pcie@0 { + device_type = "pci"; + reg = <0x0 0x0 0x0 0x0 0x0>; + bus-range = <0x01 0xff>; + + #address-cells = <3>; + #size-cells = <2>; + ranges; + }; + }; + }; From 7d944c0f146986a532087e15abb66a27c7890ca1 Mon Sep 17 00:00:00 2001 From: Mayank Rana Date: Mon, 16 Jun 2025 15:42:59 -0700 Subject: [PATCH 0695/2411] PCI: qcom: Add support for Qualcomm SA8255p based PCIe Root Complex Add functionality to enable resource management (like clocks, regulators, PHY) through firmware and enumerate ECAM compliant Root Complex on SA8255p SoC, where the PCIe Root Complex is firmware managed and configured into ECAM compliant mode. Signed-off-by: Mayank Rana [mani: minor code cleanups and commit message rewording] Signed-off-by: Manivannan Sadhasivam [bhelgaas: add "ECAM" in comment] Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250616224259.3549811-5-mayank.rana@oss.qualcomm.com --- drivers/pci/controller/dwc/Kconfig | 1 + drivers/pci/controller/dwc/pcie-qcom.c | 123 ++++++++++++++++++++++--- 2 files changed, 112 insertions(+), 12 deletions(-) diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig index d9f0386396ed..ce04ee6fbd99 100644 --- a/drivers/pci/controller/dwc/Kconfig +++ b/drivers/pci/controller/dwc/Kconfig @@ -296,6 +296,7 @@ config PCIE_QCOM select PCIE_DW_HOST select CRC8 select PCIE_QCOM_COMMON + select PCI_HOST_COMMON help Say Y here to enable PCIe controller support on Qualcomm SoCs. The PCIe controller uses the DesignWare core plus Qualcomm-specific diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index c789e3f85655..3014a0db022d 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -21,7 +21,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -34,6 +36,7 @@ #include #include "../../pci.h" +#include "../pci-host-common.h" #include "pcie-designware.h" #include "pcie-qcom-common.h" @@ -255,10 +258,12 @@ struct qcom_pcie_ops { * @ops: qcom PCIe ops structure * @override_no_snoop: Override NO_SNOOP attribute in TLP to enable cache * snooping + * @firmware_managed: Set if the Root Complex is firmware managed */ struct qcom_pcie_cfg { const struct qcom_pcie_ops *ops; bool override_no_snoop; + bool firmware_managed; bool no_l0s; }; @@ -1426,6 +1431,10 @@ static const struct qcom_pcie_cfg cfg_sc8280xp = { .no_l0s = true, }; +static const struct qcom_pcie_cfg cfg_fw_managed = { + .firmware_managed = true, +}; + static const struct dw_pcie_ops dw_pcie_ops = { .link_up = qcom_pcie_link_up, .start_link = qcom_pcie_start_link, @@ -1579,6 +1588,49 @@ static irqreturn_t qcom_pcie_global_irq_thread(int irq, void *data) return IRQ_HANDLED; } +static void qcom_pci_free_msi(void *ptr) +{ + struct dw_pcie_rp *pp = (struct dw_pcie_rp *)ptr; + + if (pp && pp->has_msi_ctrl) + dw_pcie_free_msi(pp); +} + +static int qcom_pcie_ecam_host_init(struct pci_config_window *cfg) +{ + struct device *dev = cfg->parent; + struct dw_pcie_rp *pp; + struct dw_pcie *pci; + int ret; + + pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL); + if (!pci) + return -ENOMEM; + + pci->dev = dev; + pp = &pci->pp; + pci->dbi_base = cfg->win; + pp->num_vectors = MSI_DEF_NUM_VECTORS; + + ret = dw_pcie_msi_host_init(pp); + if (ret) + return ret; + + pp->has_msi_ctrl = true; + dw_pcie_msi_init(pp); + + return devm_add_action_or_reset(dev, qcom_pci_free_msi, pp); +} + +static const struct pci_ecam_ops pci_qcom_ecam_ops = { + .init = qcom_pcie_ecam_host_init, + .pci_ops = { + .map_bus = pci_ecam_map_bus, + .read = pci_generic_config_read, + .write = pci_generic_config_write, + } +}; + static int qcom_pcie_probe(struct platform_device *pdev) { const struct qcom_pcie_cfg *pcie_cfg; @@ -1593,24 +1645,62 @@ static int qcom_pcie_probe(struct platform_device *pdev) char *name; pcie_cfg = of_device_get_match_data(dev); - if (!pcie_cfg || !pcie_cfg->ops) { - dev_err(dev, "Invalid platform data\n"); - return -EINVAL; + if (!pcie_cfg) { + dev_err(dev, "No platform data\n"); + return -ENODATA; } - pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); - if (!pcie) - return -ENOMEM; - - pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL); - if (!pci) - return -ENOMEM; + if (!pcie_cfg->firmware_managed && !pcie_cfg->ops) { + dev_err(dev, "No platform ops\n"); + return -ENODATA; + } pm_runtime_enable(dev); ret = pm_runtime_get_sync(dev); if (ret < 0) goto err_pm_runtime_put; + if (pcie_cfg->firmware_managed) { + struct pci_host_bridge *bridge; + struct pci_config_window *cfg; + + bridge = devm_pci_alloc_host_bridge(dev, 0); + if (!bridge) { + ret = -ENOMEM; + goto err_pm_runtime_put; + } + + /* Parse and map our ECAM configuration space area */ + cfg = pci_host_common_ecam_create(dev, bridge, + &pci_qcom_ecam_ops); + if (IS_ERR(cfg)) { + ret = PTR_ERR(cfg); + goto err_pm_runtime_put; + } + + bridge->sysdata = cfg; + bridge->ops = (struct pci_ops *)&pci_qcom_ecam_ops.pci_ops; + bridge->msi_domain = true; + + ret = pci_host_probe(bridge); + if (ret) + goto err_pm_runtime_put; + + return 0; + } + + pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); + if (!pcie) { + ret = -ENOMEM; + goto err_pm_runtime_put; + } + + pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL); + if (!pci) { + ret = -ENOMEM; + goto err_pm_runtime_put; + } + pci->dev = dev; pci->ops = &dw_pcie_ops; pp = &pci->pp; @@ -1756,9 +1846,13 @@ static int qcom_pcie_probe(struct platform_device *pdev) static int qcom_pcie_suspend_noirq(struct device *dev) { - struct qcom_pcie *pcie = dev_get_drvdata(dev); + struct qcom_pcie *pcie; int ret = 0; + pcie = dev_get_drvdata(dev); + if (!pcie) + return 0; + /* * Set minimum bandwidth required to keep data path functional during * suspend. @@ -1812,9 +1906,13 @@ static int qcom_pcie_suspend_noirq(struct device *dev) static int qcom_pcie_resume_noirq(struct device *dev) { - struct qcom_pcie *pcie = dev_get_drvdata(dev); + struct qcom_pcie *pcie; int ret; + pcie = dev_get_drvdata(dev); + if (!pcie) + return 0; + if (pm_suspend_target_state != PM_SUSPEND_MEM) { ret = icc_enable(pcie->icc_cpu); if (ret) { @@ -1849,6 +1947,7 @@ static const struct of_device_id qcom_pcie_match[] = { { .compatible = "qcom,pcie-ipq9574", .data = &cfg_2_9_0 }, { .compatible = "qcom,pcie-msm8996", .data = &cfg_2_3_2 }, { .compatible = "qcom,pcie-qcs404", .data = &cfg_2_4_0 }, + { .compatible = "qcom,pcie-sa8255p", .data = &cfg_fw_managed }, { .compatible = "qcom,pcie-sa8540p", .data = &cfg_sc8280xp }, { .compatible = "qcom,pcie-sa8775p", .data = &cfg_1_34_0}, { .compatible = "qcom,pcie-sc7280", .data = &cfg_1_9_0 }, From 38fcbfbd4207ec3fe47f66c2a16df7f5a857e198 Mon Sep 17 00:00:00 2001 From: Krishna Chaitanya Chundru Date: Wed, 2 Jul 2025 16:50:41 +0530 Subject: [PATCH 0696/2411] dt-bindings: PCI: qcom: Move PHY & reset GPIO to Root Port node Move the phys, phy-names, reset-gpios properties to the PCIe Root Port node from Host Bridge node, as agreed upon here [1]. Update the qcom,pcie-common.yaml to include the 'phys' property in the Root Port node. 'phy-names' property is not needed in Root Port since each Root Port supports only one PHY. Also, there is already 'reset-gpios' property defined for PERST# in pci-bus-common.yaml, so use that property instead of 'perst-gpios'. For backward compatibility, do not remove any existing properties in the bridge node, but mark them as 'deprecated' instead. [1] https://lore.kernel.org/linux-pci/20241211192014.GA3302752@bhelgaas/ Signed-off-by: Krishna Chaitanya Chundru [mani: commit message rewording] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250702-perst-v5-1-920b3d1f6ee1@qti.qualcomm.com --- .../bindings/pci/qcom,pcie-common.yaml | 32 +++++++++++++++++-- .../bindings/pci/qcom,pcie-sc7280.yaml | 16 +++++++--- 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie-common.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie-common.yaml index 0480c58f7d99..ab2509ec1c4b 100644 --- a/Documentation/devicetree/bindings/pci/qcom,pcie-common.yaml +++ b/Documentation/devicetree/bindings/pci/qcom,pcie-common.yaml @@ -51,10 +51,18 @@ properties: phys: maxItems: 1 + deprecated: true + description: + This property is deprecated, instead of referencing this property from + the host bridge node, use the property from the PCIe root port node. phy-names: items: - const: pciephy + deprecated: true + description: + Phandle to the register map node. This property is deprecated, and not + required to add in the root port also, as the root port has only one phy. power-domains: maxItems: 1 @@ -71,12 +79,18 @@ properties: maxItems: 12 perst-gpios: - description: GPIO controlled connection to PERST# signal + description: GPIO controlled connection to PERST# signal. This property is + deprecated, instead of referencing this property from the host bridge node, + use the reset-gpios property from the root port node. maxItems: 1 + deprecated: true wake-gpios: - description: GPIO controlled connection to WAKE# signal + description: GPIO controlled connection to WAKE# signal. This property is + deprecated, instead of referencing this property from the host bridge node, + use the property from the PCIe root port node. maxItems: 1 + deprecated: true vddpe-3v3-supply: description: PCIe endpoint power supply @@ -85,6 +99,20 @@ properties: opp-table: type: object +patternProperties: + "^pcie@": + type: object + $ref: /schemas/pci/pci-pci-bridge.yaml# + + properties: + reg: + maxItems: 1 + + phys: + maxItems: 1 + + unevaluatedProperties: false + required: - reg - reg-names diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie-sc7280.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie-sc7280.yaml index ff508f592a1a..4d0a91556603 100644 --- a/Documentation/devicetree/bindings/pci/qcom,pcie-sc7280.yaml +++ b/Documentation/devicetree/bindings/pci/qcom,pcie-sc7280.yaml @@ -165,9 +165,6 @@ examples: iommu-map = <0x0 &apps_smmu 0x1c80 0x1>, <0x100 &apps_smmu 0x1c81 0x1>; - phys = <&pcie1_phy>; - phy-names = "pciephy"; - pinctrl-names = "default"; pinctrl-0 = <&pcie1_clkreq_n>; @@ -176,7 +173,18 @@ examples: resets = <&gcc GCC_PCIE_1_BCR>; reset-names = "pci"; - perst-gpios = <&tlmm 2 GPIO_ACTIVE_LOW>; vddpe-3v3-supply = <&pp3300_ssd>; + pcie1_port0: pcie@0 { + device_type = "pci"; + reg = <0x0 0x0 0x0 0x0 0x0>; + bus-range = <0x01 0xff>; + + #address-cells = <3>; + #size-cells = <2>; + ranges; + phys = <&pcie1_phy>; + + reset-gpios = <&tlmm 2 GPIO_ACTIVE_LOW>; + }; }; }; From a2fbecdbbb9d7706fd3ec25f0dead83a2d542943 Mon Sep 17 00:00:00 2001 From: Krishna Chaitanya Chundru Date: Wed, 2 Jul 2025 16:50:42 +0530 Subject: [PATCH 0697/2411] PCI: qcom: Add support for parsing the new Root Port binding The DT binding has moved the PHY, PERST# properties to Root Port node from the Host Bridge node. So add support for parsing the new binding. The new binding uses 'reset-gpios' property for PERST#, hence parse the same property in the driver instead of the legacy 'perst-gpios'. To maintain DT backwards compatibility, fallback to the legacy method of parsing the host bridge node if the properties are not present in the Root Port node. Signed-off-by: Krishna Chaitanya Chundru [mani: refactored the root port parsing code, fixed a bug & commit message rewording] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250702-perst-v5-2-920b3d1f6ee1@qti.qualcomm.com --- drivers/pci/controller/dwc/pcie-qcom.c | 203 +++++++++++++++++++++---- 1 file changed, 176 insertions(+), 27 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 3014a0db022d..29b5db7aefe9 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -267,6 +267,12 @@ struct qcom_pcie_cfg { bool no_l0s; }; +struct qcom_pcie_port { + struct list_head list; + struct gpio_desc *reset; + struct phy *phy; +}; + struct qcom_pcie { struct dw_pcie *pci; void __iomem *parf; /* DT parf */ @@ -279,24 +285,37 @@ struct qcom_pcie { struct icc_path *icc_cpu; const struct qcom_pcie_cfg *cfg; struct dentry *debugfs; + struct list_head ports; bool suspended; bool use_pm_opp; }; #define to_qcom_pcie(x) dev_get_drvdata((x)->dev) +static void qcom_perst_assert(struct qcom_pcie *pcie, bool assert) +{ + struct qcom_pcie_port *port; + int val = assert ? 1 : 0; + + if (list_empty(&pcie->ports)) + gpiod_set_value_cansleep(pcie->reset, val); + else + list_for_each_entry(port, &pcie->ports, list) + gpiod_set_value_cansleep(port->reset, val); + + usleep_range(PERST_DELAY_US, PERST_DELAY_US + 500); +} + static void qcom_ep_reset_assert(struct qcom_pcie *pcie) { - gpiod_set_value_cansleep(pcie->reset, 1); - usleep_range(PERST_DELAY_US, PERST_DELAY_US + 500); + qcom_perst_assert(pcie, true); } static void qcom_ep_reset_deassert(struct qcom_pcie *pcie) { /* Ensure that PERST has been asserted for at least 100 ms */ msleep(PCIE_T_PVPERL_MS); - gpiod_set_value_cansleep(pcie->reset, 0); - usleep_range(PERST_DELAY_US, PERST_DELAY_US + 500); + qcom_perst_assert(pcie, false); } static int qcom_pcie_start_link(struct dw_pcie *pci) @@ -1234,6 +1253,59 @@ static bool qcom_pcie_link_up(struct dw_pcie *pci) return val & PCI_EXP_LNKSTA_DLLLA; } +static void qcom_pcie_phy_exit(struct qcom_pcie *pcie) +{ + struct qcom_pcie_port *port; + + if (list_empty(&pcie->ports)) + phy_exit(pcie->phy); + else + list_for_each_entry(port, &pcie->ports, list) + phy_exit(port->phy); +} + +static void qcom_pcie_phy_power_off(struct qcom_pcie *pcie) +{ + struct qcom_pcie_port *port; + + if (list_empty(&pcie->ports)) { + phy_power_off(pcie->phy); + } else { + list_for_each_entry(port, &pcie->ports, list) + phy_power_off(port->phy); + } +} + +static int qcom_pcie_phy_power_on(struct qcom_pcie *pcie) +{ + struct qcom_pcie_port *port; + int ret = 0; + + if (list_empty(&pcie->ports)) { + ret = phy_set_mode_ext(pcie->phy, PHY_MODE_PCIE, PHY_MODE_PCIE_RC); + if (ret) + return ret; + + ret = phy_power_on(pcie->phy); + if (ret) + return ret; + } else { + list_for_each_entry(port, &pcie->ports, list) { + ret = phy_set_mode_ext(port->phy, PHY_MODE_PCIE, PHY_MODE_PCIE_RC); + if (ret) + return ret; + + ret = phy_power_on(port->phy); + if (ret) { + qcom_pcie_phy_power_off(pcie); + return ret; + } + } + } + + return ret; +} + static int qcom_pcie_host_init(struct dw_pcie_rp *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); @@ -1246,11 +1318,7 @@ static int qcom_pcie_host_init(struct dw_pcie_rp *pp) if (ret) return ret; - ret = phy_set_mode_ext(pcie->phy, PHY_MODE_PCIE, PHY_MODE_PCIE_RC); - if (ret) - goto err_deinit; - - ret = phy_power_on(pcie->phy); + ret = qcom_pcie_phy_power_on(pcie); if (ret) goto err_deinit; @@ -1273,7 +1341,7 @@ static int qcom_pcie_host_init(struct dw_pcie_rp *pp) err_assert_reset: qcom_ep_reset_assert(pcie); err_disable_phy: - phy_power_off(pcie->phy); + qcom_pcie_phy_power_off(pcie); err_deinit: pcie->cfg->ops->deinit(pcie); @@ -1286,7 +1354,7 @@ static void qcom_pcie_host_deinit(struct dw_pcie_rp *pp) struct qcom_pcie *pcie = to_qcom_pcie(pci); qcom_ep_reset_assert(pcie); - phy_power_off(pcie->phy); + qcom_pcie_phy_power_off(pcie); pcie->cfg->ops->deinit(pcie); } @@ -1631,10 +1699,85 @@ static const struct pci_ecam_ops pci_qcom_ecam_ops = { } }; +static int qcom_pcie_parse_port(struct qcom_pcie *pcie, struct device_node *node) +{ + struct device *dev = pcie->pci->dev; + struct qcom_pcie_port *port; + struct gpio_desc *reset; + struct phy *phy; + int ret; + + reset = devm_fwnode_gpiod_get(dev, of_fwnode_handle(node), + "reset", GPIOD_OUT_HIGH, "PERST#"); + if (IS_ERR(reset)) + return PTR_ERR(reset); + + phy = devm_of_phy_get(dev, node, NULL); + if (IS_ERR(phy)) + return PTR_ERR(phy); + + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL); + if (!port) + return -ENOMEM; + + ret = phy_init(phy); + if (ret) + return ret; + + port->reset = reset; + port->phy = phy; + INIT_LIST_HEAD(&port->list); + list_add_tail(&port->list, &pcie->ports); + + return 0; +} + +static int qcom_pcie_parse_ports(struct qcom_pcie *pcie) +{ + struct device *dev = pcie->pci->dev; + struct qcom_pcie_port *port, *tmp; + int ret = -ENOENT; + + for_each_available_child_of_node_scoped(dev->of_node, of_port) { + ret = qcom_pcie_parse_port(pcie, of_port); + if (ret) + goto err_port_del; + } + + return ret; + +err_port_del: + list_for_each_entry_safe(port, tmp, &pcie->ports, list) + list_del(&port->list); + + return ret; +} + +static int qcom_pcie_parse_legacy_binding(struct qcom_pcie *pcie) +{ + struct device *dev = pcie->pci->dev; + int ret; + + pcie->phy = devm_phy_optional_get(dev, "pciephy"); + if (IS_ERR(pcie->phy)) + return PTR_ERR(pcie->phy); + + pcie->reset = devm_gpiod_get_optional(dev, "perst", GPIOD_OUT_HIGH); + if (IS_ERR(pcie->reset)) + return PTR_ERR(pcie->reset); + + ret = phy_init(pcie->phy); + if (ret) + return ret; + + return 0; +} + static int qcom_pcie_probe(struct platform_device *pdev) { const struct qcom_pcie_cfg *pcie_cfg; unsigned long max_freq = ULONG_MAX; + struct qcom_pcie_port *port, *tmp; struct device *dev = &pdev->dev; struct dev_pm_opp *opp; struct qcom_pcie *pcie; @@ -1701,6 +1844,8 @@ static int qcom_pcie_probe(struct platform_device *pdev) goto err_pm_runtime_put; } + INIT_LIST_HEAD(&pcie->ports); + pci->dev = dev; pci->ops = &dw_pcie_ops; pp = &pci->pp; @@ -1709,12 +1854,6 @@ static int qcom_pcie_probe(struct platform_device *pdev) pcie->cfg = pcie_cfg; - pcie->reset = devm_gpiod_get_optional(dev, "perst", GPIOD_OUT_HIGH); - if (IS_ERR(pcie->reset)) { - ret = PTR_ERR(pcie->reset); - goto err_pm_runtime_put; - } - pcie->parf = devm_platform_ioremap_resource_byname(pdev, "parf"); if (IS_ERR(pcie->parf)) { ret = PTR_ERR(pcie->parf); @@ -1737,12 +1876,6 @@ static int qcom_pcie_probe(struct platform_device *pdev) } } - pcie->phy = devm_phy_optional_get(dev, "pciephy"); - if (IS_ERR(pcie->phy)) { - ret = PTR_ERR(pcie->phy); - goto err_pm_runtime_put; - } - /* OPP table is optional */ ret = devm_pm_opp_of_add_table(dev); if (ret && ret != -ENODEV) { @@ -1789,9 +1922,23 @@ static int qcom_pcie_probe(struct platform_device *pdev) pp->ops = &qcom_pcie_dw_ops; - ret = phy_init(pcie->phy); - if (ret) - goto err_pm_runtime_put; + ret = qcom_pcie_parse_ports(pcie); + if (ret) { + if (ret != -ENOENT) { + dev_err_probe(pci->dev, ret, + "Failed to parse Root Port: %d\n", ret); + goto err_pm_runtime_put; + } + + /* + * In the case of properties not populated in Root Port node, + * fallback to the legacy method of parsing the Host Bridge + * node. This is to maintain DT backwards compatibility. + */ + ret = qcom_pcie_parse_legacy_binding(pcie); + if (ret) + goto err_pm_runtime_put; + } platform_set_drvdata(pdev, pcie); @@ -1836,7 +1983,9 @@ static int qcom_pcie_probe(struct platform_device *pdev) err_host_deinit: dw_pcie_host_deinit(pp); err_phy_exit: - phy_exit(pcie->phy); + qcom_pcie_phy_exit(pcie); + list_for_each_entry_safe(port, tmp, &pcie->ports, list) + list_del(&port->list); err_pm_runtime_put: pm_runtime_put(dev); pm_runtime_disable(dev); From 8802e168437840ea0b1d5ca571cd3e95681e9e2b Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 24 Jun 2025 15:27:55 +0000 Subject: [PATCH 0698/2411] rust: types: add Opaque::cast_from Since commit b20fbbc08a36 ("rust: check type of `$ptr` in `container_of!`") we have enforced that the field pointer passed to container_of! must match the declared field. This caused mismatches when using a pointer to bindings::x for fields of type Opaque. This situation encourages the user to simply pass field.cast() to the container_of! macro, but this is not great because you might accidentally pass a *mut bindings::y when the field type is Opaque, which would be wrong. To help catch this kind of mistake, add a new Opaque::cast_from that wraps a raw pointer in Opaque without changing the inner type. Also update the docs to reflect this as well as some existing users. Signed-off-by: Alice Ryhl Acked-by: Andreas Hindborg Acked-by: Boqun Feng Reviewed-by: Danilo Krummrich Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250624-opaque-from-raw-v2-1-e4da40bdc59c@google.com Signed-off-by: Miguel Ojeda --- rust/kernel/drm/device.rs | 4 +--- rust/kernel/drm/gem/mod.rs | 4 +--- rust/kernel/lib.rs | 7 +++++++ rust/kernel/types.rs | 5 +++++ 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index b7ee3c464a12..e598c4274f29 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -135,11 +135,9 @@ pub(crate) fn as_raw(&self) -> *mut bindings::drm_device { /// /// `ptr` must be a valid pointer to a `struct device` embedded in `Self`. unsafe fn from_drm_device(ptr: *const bindings::drm_device) -> *mut Self { - let ptr: *const Opaque = ptr.cast(); - // SAFETY: By the safety requirements of this function `ptr` is a valid pointer to a // `struct drm_device` embedded in `Self`. - unsafe { crate::container_of!(ptr, Self, dev) }.cast_mut() + unsafe { crate::container_of!(Opaque::cast_from(ptr), Self, dev) }.cast_mut() } /// Not intended to be called externally, except via declare_drm_ioctls!() diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs index 4cd69fa84318..6f914ae0a5aa 100644 --- a/rust/kernel/drm/gem/mod.rs +++ b/rust/kernel/drm/gem/mod.rs @@ -125,11 +125,9 @@ fn as_raw(&self) -> *mut bindings::drm_gem_object { } unsafe fn as_ref<'a>(self_ptr: *mut bindings::drm_gem_object) -> &'a Self { - let self_ptr: *mut Opaque = self_ptr.cast(); - // SAFETY: `obj` is guaranteed to be in an `Object` via the safety contract of this // function - unsafe { &*crate::container_of!(self_ptr, Object, obj) } + unsafe { &*crate::container_of!(Opaque::cast_from(self_ptr), Object, obj) } } } diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 6b4774b2b1c3..529ce9074996 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -204,6 +204,13 @@ fn panic(info: &core::panic::PanicInfo<'_>) -> ! { /// Produces a pointer to an object from a pointer to one of its fields. /// +/// If you encounter a type mismatch due to the [`Opaque`] type, then use [`Opaque::raw_get`] or +/// [`Opaque::cast_from`] to resolve the mismatch. +/// +/// [`Opaque`]: crate::types::Opaque +/// [`Opaque::raw_get`]: crate::types::Opaque::raw_get +/// [`Opaque::cast_from`]: crate::types::Opaque::cast_from +/// /// # Safety /// /// The pointer passed to this macro, and the pointer returned by this macro, must both be in diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index 63a2559a545f..9de8e0011b1d 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -413,6 +413,11 @@ pub const fn get(&self) -> *mut T { pub const fn raw_get(this: *const Self) -> *mut T { UnsafeCell::raw_get(this.cast::>>()).cast::() } + + /// The opposite operation of [`Opaque::raw_get`]. + pub const fn cast_from(this: *const T) -> *const Self { + this.cast() + } } /// Types that are _always_ reference counted. From b3060198483bac43ec113c62ae3837076f61f5de Mon Sep 17 00:00:00 2001 From: Artem Sadovnikov Date: Tue, 1 Jul 2025 14:40:17 +0000 Subject: [PATCH 0699/2411] vfio/mlx5: fix possible overflow in tracking max message size MLX cap pg_track_log_max_msg_size consists of 5 bits, value of which is used as power of 2 for max_msg_size. This can lead to multiplication overflow between max_msg_size (u32) and integer constant, and afterwards incorrect value is being written to rq_size. Fix this issue by extending integer constant to u64 type. Found by Linux Verification Center (linuxtesting.org) with SVACE. Suggested-by: Alex Williamson Signed-off-by: Artem Sadovnikov Reviewed-by: Yishai Hadas Link: https://lore.kernel.org/r/20250701144017.2410-2-a.sadovnikov@ispras.ru Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index 5b919a0b2524..a92b095b90f6 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -1523,8 +1523,8 @@ int mlx5vf_start_page_tracker(struct vfio_device *vdev, log_max_msg_size = MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_max_msg_size); max_msg_size = (1ULL << log_max_msg_size); /* The RQ must hold at least 4 WQEs/messages for successful QP creation */ - if (rq_size < 4 * max_msg_size) - rq_size = 4 * max_msg_size; + if (rq_size < 4ULL * max_msg_size) + rq_size = 4ULL * max_msg_size; memset(tracker, 0, sizeof(*tracker)); tracker->uar = mlx5_get_uars_page(mdev); From 78447d4545b2ea76ee04f4e46d473639483158b2 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 25 Apr 2025 14:39:29 +0100 Subject: [PATCH 0700/2411] PCI: Fix driver_managed_dma check Since it's not currently safe to take device_lock() in the IOMMU probe path, that can race against really_probe() setting dev->driver before attempting to bind. The race itself isn't so bad, since we're only concerned with dereferencing dev->driver itself anyway, but sadly my attempt to implement the check with minimal churn leads to a kind of Time-of-Check to Time-of-Use (TOCTOU) issue, where dev->driver becomes valid after to_pci_driver(NULL) is already computed, and thus the check fails to work as intended. Will and I both hit this with the platform bus, but the pattern here is the same, so fix it for correctness too. Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path") Reported-by: Will McVicker Signed-off-by: Robin Murphy Signed-off-by: Bjorn Helgaas Reviewed-by: Will McVicker Link: https://patch.msgid.link/20250425133929.646493-4-robin.murphy@arm.com --- drivers/pci/pci-driver.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 67db34fd10ee..01e6aea1b0c7 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1628,7 +1628,7 @@ static int pci_bus_num_vf(struct device *dev) */ static int pci_dma_configure(struct device *dev) { - struct pci_driver *driver = to_pci_driver(dev->driver); + const struct device_driver *drv = READ_ONCE(dev->driver); struct device *bridge; int ret = 0; @@ -1645,8 +1645,8 @@ static int pci_dma_configure(struct device *dev) pci_put_host_bridge_device(bridge); - /* @driver may not be valid when we're called from the IOMMU layer */ - if (!ret && dev->driver && !driver->driver_managed_dma) { + /* @drv may not be valid when we're called from the IOMMU layer */ + if (!ret && drv && !to_pci_driver(drv)->driver_managed_dma) { ret = iommu_device_use_default_domain(dev); if (ret) arch_teardown_dma_ops(dev); From 64fb810bce03a4e2b4d3ecbba04bb97da3536dd8 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 24 Jun 2025 15:27:56 +0000 Subject: [PATCH 0701/2411] rust: types: rename Opaque::raw_get to cast_into In the previous patch we added Opaque::cast_from() that performs the opposite operation to Opaque::raw_get(). For consistency with this naming, rename raw_get() to cast_from(). There are a few other options such as calling cast_from() something closer to raw_get() rather than renaming this method. However, I could not find a great naming scheme that works with raw_get(). The previous version of this patch used from_raw(), but functions of that name typically have a different signature, so that's not a great option. Suggested-by: Danilo Krummrich Signed-off-by: Alice Ryhl Acked-by: Benno Lossin Acked-by: Andreas Hindborg Acked-by: Boqun Feng Reviewed-by: Danilo Krummrich Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250624-opaque-from-raw-v2-2-e4da40bdc59c@google.com [ Removed `HrTimer::raw_get` change. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/configfs.rs | 2 +- rust/kernel/init.rs | 6 +++--- rust/kernel/lib.rs | 4 ++-- rust/kernel/list.rs | 2 +- rust/kernel/list/impl_list_item_mod.rs | 4 ++-- rust/kernel/time/hrtimer.rs | 2 +- rust/kernel/types.rs | 8 ++++---- rust/kernel/workqueue.rs | 2 +- 8 files changed, 15 insertions(+), 15 deletions(-) diff --git a/rust/kernel/configfs.rs b/rust/kernel/configfs.rs index d287e11e4233..2736b798cdc6 100644 --- a/rust/kernel/configfs.rs +++ b/rust/kernel/configfs.rs @@ -279,7 +279,7 @@ pub fn new( // within the `group` field. unsafe impl HasGroup for Group { unsafe fn group(this: *const Self) -> *const bindings::config_group { - Opaque::raw_get( + Opaque::cast_into( // SAFETY: By impl and function safety requirements this field // projection is within bounds of the allocation. unsafe { &raw const (*this).group }, diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 49a61fa3dee8..75d3d99aed6a 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -100,13 +100,13 @@ //! let foo = addr_of_mut!((*slot).foo); //! //! // Initialize the `foo` -//! bindings::init_foo(Opaque::raw_get(foo)); +//! bindings::init_foo(Opaque::cast_into(foo)); //! //! // Try to enable it. -//! let err = bindings::enable_foo(Opaque::raw_get(foo), flags); +//! let err = bindings::enable_foo(Opaque::cast_into(foo), flags); //! if err != 0 { //! // Enabling has failed, first clean up the foo and then return the error. -//! bindings::destroy_foo(Opaque::raw_get(foo)); +//! bindings::destroy_foo(Opaque::cast_into(foo)); //! return Err(Error::from_errno(err)); //! } //! diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 529ce9074996..f61ac6f81f5d 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -204,11 +204,11 @@ fn panic(info: &core::panic::PanicInfo<'_>) -> ! { /// Produces a pointer to an object from a pointer to one of its fields. /// -/// If you encounter a type mismatch due to the [`Opaque`] type, then use [`Opaque::raw_get`] or +/// If you encounter a type mismatch due to the [`Opaque`] type, then use [`Opaque::cast_into`] or /// [`Opaque::cast_from`] to resolve the mismatch. /// /// [`Opaque`]: crate::types::Opaque -/// [`Opaque::raw_get`]: crate::types::Opaque::raw_get +/// [`Opaque::cast_into`]: crate::types::Opaque::cast_into /// [`Opaque::cast_from`]: crate::types::Opaque::cast_from /// /// # Safety diff --git a/rust/kernel/list.rs b/rust/kernel/list.rs index fe58a3920e70..7ebb81b2a3d4 100644 --- a/rust/kernel/list.rs +++ b/rust/kernel/list.rs @@ -284,7 +284,7 @@ pub fn new() -> impl PinInit { #[inline] unsafe fn fields(me: *mut Self) -> *mut ListLinksFields { // SAFETY: The caller promises that the pointer is valid. - unsafe { Opaque::raw_get(ptr::addr_of!((*me).inner)) } + unsafe { Opaque::cast_into(ptr::addr_of!((*me).inner)) } } /// # Safety diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index 1f9498c1458f..c1edba0a9501 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -209,7 +209,7 @@ unsafe fn prepare_to_insert(me: *const Self) -> *mut $crate::list::ListLinks<$nu // the pointer stays in bounds of the allocation. let self_ptr = unsafe { (links_field as *const u8).add(spoff) } as *const $crate::types::Opaque<*const Self>; - let cell_inner = $crate::types::Opaque::raw_get(self_ptr); + let cell_inner = $crate::types::Opaque::cast_into(self_ptr); // SAFETY: This value is not accessed in any other places than `prepare_to_insert`, // `post_remove`, or `view_value`. By the safety requirements of those methods, @@ -252,7 +252,7 @@ unsafe fn view_value(links_field: *mut $crate::list::ListLinks<$num>) -> *const // the pointer stays in bounds of the allocation. let self_ptr = unsafe { (links_field as *const u8).add(spoff) } as *const ::core::cell::UnsafeCell<*const Self>; - let cell_inner = ::core::cell::UnsafeCell::raw_get(self_ptr); + let cell_inner = ::core::cell::UnsafeCell::cast_into(self_ptr); // SAFETY: This is not a data race, because the only function that writes to this // value is `prepare_to_insert`, but by the safety requirements the // `prepare_to_insert` method may not be called in parallel with `view_value` or diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index 36e1290cd079..113463e64815 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -148,7 +148,7 @@ unsafe fn raw_get(this: *const Self) -> *mut bindings::hrtimer { // SAFETY: The field projection to `timer` does not go out of bounds, // because the caller of this function promises that `this` points to an // allocation of at least the size of `Self`. - unsafe { Opaque::raw_get(core::ptr::addr_of!((*this).timer)) } + unsafe { Opaque::cast_into(core::ptr::addr_of!((*this).timer)) } } /// Cancel an initialized and potentially running timer. diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index 9de8e0011b1d..49a0e8e9326b 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -377,7 +377,7 @@ pub fn ffi_init(init_func: impl FnOnce(*mut T)) -> impl PinInit { // initialize the `T`. unsafe { pin_init::pin_init_from_closure::<_, ::core::convert::Infallible>(move |slot| { - init_func(Self::raw_get(slot)); + init_func(Self::cast_into(slot)); Ok(()) }) } @@ -397,7 +397,7 @@ pub fn try_ffi_init( // SAFETY: We contain a `MaybeUninit`, so it is OK for the `init_func` to not fully // initialize the `T`. unsafe { - pin_init::pin_init_from_closure::<_, E>(move |slot| init_func(Self::raw_get(slot))) + pin_init::pin_init_from_closure::<_, E>(move |slot| init_func(Self::cast_into(slot))) } } @@ -410,11 +410,11 @@ pub const fn get(&self) -> *mut T { /// /// This function is useful to get access to the value without creating intermediate /// references. - pub const fn raw_get(this: *const Self) -> *mut T { + pub const fn cast_into(this: *const Self) -> *mut T { UnsafeCell::raw_get(this.cast::>>()).cast::() } - /// The opposite operation of [`Opaque::raw_get`]. + /// The opposite operation of [`Opaque::cast_into`]. pub const fn cast_from(this: *const T) -> *const Self { this.cast() } diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index cce23684af24..c90b7431bbba 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -403,7 +403,7 @@ pub unsafe fn raw_get(ptr: *const Self) -> *mut bindings::work_struct { // // A pointer cast would also be ok due to `#[repr(transparent)]`. We use `addr_of!` so that // the compiler does not complain that the `work` field is unused. - unsafe { Opaque::raw_get(core::ptr::addr_of!((*ptr).work)) } + unsafe { Opaque::cast_into(core::ptr::addr_of!((*ptr).work)) } } } From 7c098cd5eaae557934f4e4ea0b2809a9972f6a5a Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Fri, 11 Jul 2025 07:59:40 +0000 Subject: [PATCH 0702/2411] workqueue: rust: add delayed work items This patch is being sent for use in the various Rust GPU drivers that are under development. It provides the additional feature of work items that are executed after a delay. The design of the existing workqueue is rather extensible, as most of the logic is reused for delayed work items even though a different work item type is required. The new logic consists of: * A new DelayedWork struct that wraps struct delayed_work. * A new impl_has_delayed_work! macro that provides adjusted versions of the container_of logic, that is suitable with delayed work items. * A `enqueue_delayed` method that can enqueue a delayed work item. This patch does *not* rely on the fact that `struct delayed_work` contains `struct work_struct` at offset zero. It will continue to work even if the layout is changed to hold the `work` field at a different offset. Please see the example introduced at the top of the file for example usage of delayed work items. Acked-by: Tejun Heo Reviewed-by: Boqun Feng Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20250711-workqueue-delay-v3-1-3fe17b18b9d1@google.com [ Replaced `as _` with `as ffi::c_int` to clean warning. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/workqueue.rs | 330 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 327 insertions(+), 3 deletions(-) diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index c90b7431bbba..b9343d5bc00f 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -131,10 +131,69 @@ //! # print_2_later(MyStruct::new(41, 42).unwrap()); //! ``` //! +//! This example shows how you can schedule delayed work items: +//! +//! ``` +//! use kernel::sync::Arc; +//! use kernel::workqueue::{self, impl_has_delayed_work, new_delayed_work, DelayedWork, WorkItem}; +//! +//! #[pin_data] +//! struct MyStruct { +//! value: i32, +//! #[pin] +//! work: DelayedWork, +//! } +//! +//! impl_has_delayed_work! { +//! impl HasDelayedWork for MyStruct { self.work } +//! } +//! +//! impl MyStruct { +//! fn new(value: i32) -> Result> { +//! Arc::pin_init( +//! pin_init!(MyStruct { +//! value, +//! work <- new_delayed_work!("MyStruct::work"), +//! }), +//! GFP_KERNEL, +//! ) +//! } +//! } +//! +//! impl WorkItem for MyStruct { +//! type Pointer = Arc; +//! +//! fn run(this: Arc) { +//! pr_info!("The value is: {}\n", this.value); +//! } +//! } +//! +//! /// This method will enqueue the struct for execution on the system workqueue, where its value +//! /// will be printed 12 jiffies later. +//! fn print_later(val: Arc) { +//! let _ = workqueue::system().enqueue_delayed(val, 12); +//! } +//! +//! /// It is also possible to use the ordinary `enqueue` method together with `DelayedWork`. This +//! /// is equivalent to calling `enqueue_delayed` with a delay of zero. +//! fn print_now(val: Arc) { +//! let _ = workqueue::system().enqueue(val); +//! } +//! # print_later(MyStruct::new(42).unwrap()); +//! # print_now(MyStruct::new(42).unwrap()); +//! ``` +//! //! C header: [`include/linux/workqueue.h`](srctree/include/linux/workqueue.h) -use crate::alloc::{AllocError, Flags}; -use crate::{prelude::*, sync::Arc, sync::LockClassKey, types::Opaque}; +use crate::{ + alloc::{AllocError, Flags}, + container_of, + prelude::*, + sync::Arc, + sync::LockClassKey, + time::Jiffies, + types::Opaque, +}; use core::marker::PhantomData; /// Creates a [`Work`] initialiser with the given name and a newly-created lock class. @@ -146,6 +205,33 @@ macro_rules! new_work { } pub use new_work; +/// Creates a [`DelayedWork`] initialiser with the given name and a newly-created lock class. +#[macro_export] +macro_rules! new_delayed_work { + () => { + $crate::workqueue::DelayedWork::new( + $crate::optional_name!(), + $crate::static_lock_class!(), + $crate::c_str!(::core::concat!( + ::core::file!(), + ":", + ::core::line!(), + "_timer" + )), + $crate::static_lock_class!(), + ) + }; + ($name:literal) => { + $crate::workqueue::DelayedWork::new( + $crate::c_str!($name), + $crate::static_lock_class!(), + $crate::c_str!(::core::concat!($name, "_timer")), + $crate::static_lock_class!(), + ) + }; +} +pub use new_delayed_work; + /// A kernel work queue. /// /// Wraps the kernel's C `struct workqueue_struct`. @@ -206,6 +292,42 @@ pub fn enqueue(&self, w: W) -> W::EnqueueOutput } } + /// Enqueues a delayed work item. + /// + /// This may fail if the work item is already enqueued in a workqueue. + /// + /// The work item will be submitted using `WORK_CPU_UNBOUND`. + pub fn enqueue_delayed(&self, w: W, delay: Jiffies) -> W::EnqueueOutput + where + W: RawDelayedWorkItem + Send + 'static, + { + let queue_ptr = self.0.get(); + + // SAFETY: We only return `false` if the `work_struct` is already in a workqueue. The other + // `__enqueue` requirements are not relevant since `W` is `Send` and static. + // + // The call to `bindings::queue_delayed_work_on` will dereference the provided raw pointer, + // which is ok because `__enqueue` guarantees that the pointer is valid for the duration of + // this closure, and the safety requirements of `RawDelayedWorkItem` expands this + // requirement to apply to the entire `delayed_work`. + // + // Furthermore, if the C workqueue code accesses the pointer after this call to + // `__enqueue`, then the work item was successfully enqueued, and + // `bindings::queue_delayed_work_on` will have returned true. In this case, `__enqueue` + // promises that the raw pointer will stay valid until we call the function pointer in the + // `work_struct`, so the access is ok. + unsafe { + w.__enqueue(move |work_ptr| { + bindings::queue_delayed_work_on( + bindings::wq_misc_consts_WORK_CPU_UNBOUND as ffi::c_int, + queue_ptr, + container_of!(work_ptr, bindings::delayed_work, work), + delay, + ) + }) + } + } + /// Tries to spawn the given function or closure as a work item. /// /// This method can fail because it allocates memory to store the work item. @@ -298,6 +420,16 @@ unsafe fn __enqueue(self, queue_work_on: F) -> Self::EnqueueOutput F: FnOnce(*mut bindings::work_struct) -> bool; } +/// A raw delayed work item. +/// +/// # Safety +/// +/// If the `__enqueue` method in the `RawWorkItem` implementation calls the closure, then the +/// provided pointer must point at the `work` field of a valid `delayed_work`, and the guarantees +/// that `__enqueue` provides about accessing the `work_struct` must also apply to the rest of the +/// `delayed_work` struct. +pub unsafe trait RawDelayedWorkItem: RawWorkItem {} + /// Defines the method that should be called directly when a work item is executed. /// /// This trait is implemented by `Pin>` and [`Arc`], and is mainly intended to be @@ -407,7 +539,7 @@ pub unsafe fn raw_get(ptr: *const Self) -> *mut bindings::work_struct { } } -/// Declares that a type has a [`Work`] field. +/// Declares that a type contains a [`Work`]. /// /// The intended way of using this trait is via the [`impl_has_work!`] macro. You can use the macro /// like this: @@ -506,6 +638,178 @@ unsafe fn work_container_of( impl{T} HasWork for ClosureWork { self.work } } +/// Links for a delayed work item. +/// +/// This struct contains a function pointer to the [`run`] function from the [`WorkItemPointer`] +/// trait, and defines the linked list pointers necessary to enqueue a work item in a workqueue in +/// a delayed manner. +/// +/// Wraps the kernel's C `struct delayed_work`. +/// +/// This is a helper type used to associate a `delayed_work` with the [`WorkItem`] that uses it. +/// +/// [`run`]: WorkItemPointer::run +#[pin_data] +#[repr(transparent)] +pub struct DelayedWork { + #[pin] + dwork: Opaque, + _inner: PhantomData, +} + +// SAFETY: Kernel work items are usable from any thread. +// +// We do not need to constrain `T` since the work item does not actually contain a `T`. +unsafe impl Send for DelayedWork {} +// SAFETY: Kernel work items are usable from any thread. +// +// We do not need to constrain `T` since the work item does not actually contain a `T`. +unsafe impl Sync for DelayedWork {} + +impl DelayedWork { + /// Creates a new instance of [`DelayedWork`]. + #[inline] + pub fn new( + work_name: &'static CStr, + work_key: Pin<&'static LockClassKey>, + timer_name: &'static CStr, + timer_key: Pin<&'static LockClassKey>, + ) -> impl PinInit + where + T: WorkItem, + { + pin_init!(Self { + dwork <- Opaque::ffi_init(|slot: *mut bindings::delayed_work| { + // SAFETY: The `WorkItemPointer` implementation promises that `run` can be used as + // the work item function. + unsafe { + bindings::init_work_with_key( + core::ptr::addr_of_mut!((*slot).work), + Some(T::Pointer::run), + false, + work_name.as_char_ptr(), + work_key.as_ptr(), + ) + } + + // SAFETY: The `delayed_work_timer_fn` function pointer can be used here because + // the timer is embedded in a `struct delayed_work`, and only ever scheduled via + // the core workqueue code, and configured to run in irqsafe context. + unsafe { + bindings::timer_init_key( + core::ptr::addr_of_mut!((*slot).timer), + Some(bindings::delayed_work_timer_fn), + bindings::TIMER_IRQSAFE, + timer_name.as_char_ptr(), + timer_key.as_ptr(), + ) + } + }), + _inner: PhantomData, + }) + } + + /// Get a pointer to the inner `delayed_work`. + /// + /// # Safety + /// + /// The provided pointer must not be dangling and must be properly aligned. (But the memory + /// need not be initialized.) + #[inline] + pub unsafe fn raw_as_work(ptr: *const Self) -> *mut Work { + // SAFETY: The caller promises that the pointer is aligned and not dangling. + let dw: *mut bindings::delayed_work = + unsafe { Opaque::cast_into(core::ptr::addr_of!((*ptr).dwork)) }; + // SAFETY: The caller promises that the pointer is aligned and not dangling. + let wrk: *mut bindings::work_struct = unsafe { core::ptr::addr_of_mut!((*dw).work) }; + // CAST: Work and work_struct have compatible layouts. + wrk.cast() + } +} + +/// Declares that a type contains a [`DelayedWork`]. +/// +/// # Safety +/// +/// The `HasWork` implementation must return a `work_struct` that is stored in the `work` +/// field of a `delayed_work` with the same access rules as the `work_struct`. +pub unsafe trait HasDelayedWork: HasWork {} + +/// Used to safely implement the [`HasDelayedWork`] trait. +/// +/// This macro also implements the [`HasWork`] trait, so you do not need to use [`impl_has_work!`] +/// when using this macro. +/// +/// # Examples +/// +/// ``` +/// use kernel::sync::Arc; +/// use kernel::workqueue::{self, impl_has_delayed_work, DelayedWork}; +/// +/// struct MyStruct<'a, T, const N: usize> { +/// work_field: DelayedWork, 17>, +/// f: fn(&'a [T; N]), +/// } +/// +/// impl_has_delayed_work! { +/// impl{'a, T, const N: usize} HasDelayedWork, 17> +/// for MyStruct<'a, T, N> { self.work_field } +/// } +/// ``` +#[macro_export] +macro_rules! impl_has_delayed_work { + ($(impl$({$($generics:tt)*})? + HasDelayedWork<$work_type:ty $(, $id:tt)?> + for $self:ty + { self.$field:ident } + )*) => {$( + // SAFETY: The implementation of `raw_get_work` only compiles if the field has the right + // type. + unsafe impl$(<$($generics)+>)? + $crate::workqueue::HasDelayedWork<$work_type $(, $id)?> for $self {} + + // SAFETY: The implementation of `raw_get_work` only compiles if the field has the right + // type. + unsafe impl$(<$($generics)+>)? $crate::workqueue::HasWork<$work_type $(, $id)?> for $self { + #[inline] + unsafe fn raw_get_work( + ptr: *mut Self + ) -> *mut $crate::workqueue::Work<$work_type $(, $id)?> { + // SAFETY: The caller promises that the pointer is not dangling. + let ptr: *mut $crate::workqueue::DelayedWork<$work_type $(, $id)?> = unsafe { + ::core::ptr::addr_of_mut!((*ptr).$field) + }; + + // SAFETY: The caller promises that the pointer is not dangling. + unsafe { $crate::workqueue::DelayedWork::raw_as_work(ptr) } + } + + #[inline] + unsafe fn work_container_of( + ptr: *mut $crate::workqueue::Work<$work_type $(, $id)?>, + ) -> *mut Self { + // SAFETY: The caller promises that the pointer points at a field of the right type + // in the right kind of struct. + let ptr = unsafe { $crate::workqueue::Work::raw_get(ptr) }; + + // SAFETY: The caller promises that the pointer points at a field of the right type + // in the right kind of struct. + let delayed_work = unsafe { + $crate::container_of!(ptr, $crate::bindings::delayed_work, work) + }; + + let delayed_work: *mut $crate::workqueue::DelayedWork<$work_type $(, $id)?> = + delayed_work.cast(); + + // SAFETY: The caller promises that the pointer points at a field of the right type + // in the right kind of struct. + unsafe { $crate::container_of!(delayed_work, Self, $field) } + } + } + )*}; +} +pub use impl_has_delayed_work; + // SAFETY: The `__enqueue` implementation in RawWorkItem uses a `work_struct` initialized with the // `run` method of this trait as the function pointer because: // - `__enqueue` gets the `work_struct` from the `Work` field, using `T::raw_get_work`. @@ -567,6 +871,16 @@ unsafe fn __enqueue(self, queue_work_on: F) -> Self::EnqueueOutput } } +// SAFETY: By the safety requirements of `HasDelayedWork`, the `work_struct` returned by methods in +// `HasWork` provides a `work_struct` that is the `work` field of a `delayed_work`, and the rest of +// the `delayed_work` has the same access rules as its `work` field. +unsafe impl RawDelayedWorkItem for Arc +where + T: WorkItem, + T: HasDelayedWork, +{ +} + // SAFETY: TODO. unsafe impl WorkItemPointer for Pin> where @@ -617,6 +931,16 @@ unsafe fn __enqueue(self, queue_work_on: F) -> Self::EnqueueOutput } } +// SAFETY: By the safety requirements of `HasDelayedWork`, the `work_struct` returned by methods in +// `HasWork` provides a `work_struct` that is the `work` field of a `delayed_work`, and the rest of +// the `delayed_work` has the same access rules as its `work` field. +unsafe impl RawDelayedWorkItem for Pin> +where + T: WorkItem, + T: HasDelayedWork, +{ +} + /// Returns the system work queue (`system_wq`). /// /// It is the one used by `schedule[_delayed]_work[_on]()`. Multi-CPU multi-threaded. There are From aff426f35966e6e77ecfe065984344a7d834eaa9 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 23 May 2025 21:04:51 -0700 Subject: [PATCH 0703/2411] apparmor: mitigate parser generating large xtables Some versions of the parser are generating an xtable transition per state in the state machine, even when the state machine isn't using the transition table. The parser bug is triggered by commit 2e12c5f06017 ("apparmor: add additional flags to extended permission.") In addition to fixing this in userspace, mitigate this in the kernel as part of the policy verification checks by detecting this situation and adjusting to what is actually used, or if not used at all freeing it, so we are not wasting unneeded memory on policy. Fixes: 2e12c5f06017 ("apparmor: add additional flags to extended permission.") Signed-off-by: John Johansen --- security/apparmor/include/lib.h | 1 + security/apparmor/lib.c | 23 +++++++++++++++++++++++ security/apparmor/policy_unpack.c | 27 +++++++++++++++++++++------ 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index e60bfa410e55..200cf36c5e0a 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -125,6 +125,7 @@ struct aa_str_table { }; void aa_free_str_table(struct aa_str_table *table); +bool aa_resize_str_table(struct aa_str_table *t, int newsize, gfp_t gfp); struct counted_str { struct kref count; diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 7cdf430762a8..f51e79cc36d4 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -116,6 +116,29 @@ int aa_print_debug_params(char *buffer) aa_g_debug); } +bool aa_resize_str_table(struct aa_str_table *t, int newsize, gfp_t gfp) +{ + char **n; + int i; + + if (t->size == newsize) + return true; + n = kcalloc(newsize, sizeof(*n), gfp); + if (!n) + return false; + for (i = 0; i < min(t->size, newsize); i++) + n[i] = t->table[i]; + for (; i < t->size; i++) + kfree_sensitive(t->table[i]); + if (newsize > t->size) + memset(&n[t->size], 0, (newsize-t->size)*sizeof(*n)); + kfree_sensitive(t->table); + t->table = n; + t->size = newsize; + + return true; +} + /** * aa_free_str_table - free entries str table * @t: the string table to free (MAYBE NULL) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 588dd1d5d364..58c106b63727 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -802,8 +802,12 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy, if (!pdb->dfa && pdb->trans.table) aa_free_str_table(&pdb->trans); - /* TODO: move compat mapping here, requires dfa merging first */ - /* TODO: move verify here, it has to be done after compat mappings */ + /* TODO: + * - move compat mapping here, requires dfa merging first + * - move verify here, it has to be done after compat mappings + * - move free of unneeded trans table here, has to be done + * after perm mapping. + */ out: *policy = pdb; return 0; @@ -1242,21 +1246,32 @@ static bool verify_perm(struct aa_perms *perm) static bool verify_perms(struct aa_policydb *pdb) { int i; + int xidx, xmax = -1; for (i = 0; i < pdb->size; i++) { if (!verify_perm(&pdb->perms[i])) return false; /* verify indexes into str table */ - if ((pdb->perms[i].xindex & AA_X_TYPE_MASK) == AA_X_TABLE && - (pdb->perms[i].xindex & AA_X_INDEX_MASK) >= pdb->trans.size) - return false; + if ((pdb->perms[i].xindex & AA_X_TYPE_MASK) == AA_X_TABLE) { + xidx = pdb->perms[i].xindex & AA_X_INDEX_MASK; + if (xidx >= pdb->trans.size) + return false; + if (xmax < xidx) + xmax = xidx; + } if (pdb->perms[i].tag && pdb->perms[i].tag >= pdb->trans.size) return false; if (pdb->perms[i].label && pdb->perms[i].label >= pdb->trans.size) return false; } - + /* deal with incorrectly constructed string tables */ + if (xmax == -1) { + aa_free_str_table(&pdb->trans); + } else if (pdb->trans.size > xmax + 1) { + if (!aa_resize_str_table(&pdb->trans, xmax + 1, GFP_KERNEL)) + return false; + } return true; } From 37a3741d27b64012ab6a5d9c92b514b977349dbb Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 30 Jun 2025 00:06:22 -0700 Subject: [PATCH 0704/2411] Revert "apparmor: use SHA-256 library API instead of crypto_shash API" This reverts commit e9ed1eb8f6217e53843d82ecf2d50f8d1a93e77c. Eric has requested that this patch be taken through the libcrypto-next tree, instead. Signed-off-by: John Johansen --- security/apparmor/Kconfig | 3 +- security/apparmor/crypto.c | 85 ++++++++++++++++++++++++++++++++------ 2 files changed, 75 insertions(+), 13 deletions(-) diff --git a/security/apparmor/Kconfig b/security/apparmor/Kconfig index 1e3bd44643da..64cc3044a42c 100644 --- a/security/apparmor/Kconfig +++ b/security/apparmor/Kconfig @@ -59,7 +59,8 @@ config SECURITY_APPARMOR_INTROSPECT_POLICY config SECURITY_APPARMOR_HASH bool "Enable introspection of sha256 hashes for loaded profiles" depends on SECURITY_APPARMOR_INTROSPECT_POLICY - select CRYPTO_LIB_SHA256 + select CRYPTO + select CRYPTO_SHA256 default y help This option selects whether introspection of loaded policy diff --git a/security/apparmor/crypto.c b/security/apparmor/crypto.c index 40e17e153f1e..aad486b2fca6 100644 --- a/security/apparmor/crypto.c +++ b/security/apparmor/crypto.c @@ -11,52 +11,113 @@ * it should be. */ -#include +#include #include "include/apparmor.h" #include "include/crypto.h" +static unsigned int apparmor_hash_size; + +static struct crypto_shash *apparmor_tfm; + unsigned int aa_hash_size(void) { - return SHA256_DIGEST_SIZE; + return apparmor_hash_size; } char *aa_calc_hash(void *data, size_t len) { + SHASH_DESC_ON_STACK(desc, apparmor_tfm); char *hash; + int error; - hash = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); + if (!apparmor_tfm) + return NULL; + + hash = kzalloc(apparmor_hash_size, GFP_KERNEL); if (!hash) return ERR_PTR(-ENOMEM); - sha256(data, len, hash); + desc->tfm = apparmor_tfm; + + error = crypto_shash_init(desc); + if (error) + goto fail; + error = crypto_shash_update(desc, (u8 *) data, len); + if (error) + goto fail; + error = crypto_shash_final(desc, hash); + if (error) + goto fail; + return hash; + +fail: + kfree(hash); + + return ERR_PTR(error); } int aa_calc_profile_hash(struct aa_profile *profile, u32 version, void *start, size_t len) { - struct sha256_state state; + SHASH_DESC_ON_STACK(desc, apparmor_tfm); + int error; __le32 le32_version = cpu_to_le32(version); if (!aa_g_hash_policy) return 0; - profile->hash = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); + if (!apparmor_tfm) + return 0; + + profile->hash = kzalloc(apparmor_hash_size, GFP_KERNEL); if (!profile->hash) return -ENOMEM; - sha256_init(&state); - sha256_update(&state, (u8 *)&le32_version, 4); - sha256_update(&state, (u8 *)start, len); - sha256_final(&state, profile->hash); + desc->tfm = apparmor_tfm; + + error = crypto_shash_init(desc); + if (error) + goto fail; + error = crypto_shash_update(desc, (u8 *) &le32_version, 4); + if (error) + goto fail; + error = crypto_shash_update(desc, (u8 *) start, len); + if (error) + goto fail; + error = crypto_shash_final(desc, profile->hash); + if (error) + goto fail; + return 0; + +fail: + kfree(profile->hash); + profile->hash = NULL; + + return error; } static int __init init_profile_hash(void) { - if (apparmor_initialized) - aa_info_message("AppArmor sha256 policy hashing enabled"); + struct crypto_shash *tfm; + + if (!apparmor_initialized) + return 0; + + tfm = crypto_alloc_shash("sha256", 0, 0); + if (IS_ERR(tfm)) { + int error = PTR_ERR(tfm); + AA_ERROR("failed to setup profile sha256 hashing: %d\n", error); + return error; + } + apparmor_tfm = tfm; + apparmor_hash_size = crypto_shash_digestsize(apparmor_tfm); + + aa_info_message("AppArmor sha256 policy hashing enabled"); + return 0; } + late_initcall(init_profile_hash); From 87cc7b00114f6f751d25f6a5f05128dc27ef64db Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 18 Mar 2025 23:06:41 +0100 Subject: [PATCH 0705/2411] apparmor: make __begin_current_label_crit_section() indicate whether put is needed Same as aa_get_newest_cred_label_condref(). This avoids a bunch of work overall and allows the compiler to note when no clean up is necessary, allowing for tail calls. This in particular happens in apparmor_file_permission(), which manages to tail call aa_file_perm() 105 bytes in (vs a regular call 112 bytes in followed by branches to figure out if clean up is needed). Signed-off-by: Mateusz Guzik Signed-off-by: John Johansen --- security/apparmor/include/cred.h | 21 ++++++--- security/apparmor/lsm.c | 75 ++++++++++++++++++++------------ security/apparmor/policy.c | 12 ++--- 3 files changed, 67 insertions(+), 41 deletions(-) diff --git a/security/apparmor/include/cred.h b/security/apparmor/include/cred.h index 674af3175905..de6ec4969598 100644 --- a/security/apparmor/include/cred.h +++ b/security/apparmor/include/cred.h @@ -114,7 +114,12 @@ static inline struct aa_label *aa_get_current_label(void) return aa_get_label(l); } -#define __end_current_label_crit_section(X) end_current_label_crit_section(X) +static inline void __end_current_label_crit_section(struct aa_label *label, + bool needput) +{ + if (unlikely(needput)) + aa_put_label(label); +} /** * end_current_label_crit_section - put a reference found with begin_current_label.. @@ -142,13 +147,16 @@ static inline void end_current_label_crit_section(struct aa_label *label) * critical section between __begin_current_label_crit_section() .. * __end_current_label_crit_section() */ -static inline struct aa_label *__begin_current_label_crit_section(void) +static inline struct aa_label *__begin_current_label_crit_section(bool *needput) { struct aa_label *label = aa_current_raw_label(); - if (label_is_stale(label)) - label = aa_get_newest_label(label); + if (label_is_stale(label)) { + *needput = true; + return aa_get_newest_label(label); + } + *needput = false; return label; } @@ -184,10 +192,11 @@ static inline struct aa_ns *aa_get_current_ns(void) { struct aa_label *label; struct aa_ns *ns; + bool needput; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); ns = aa_get_ns(labels_ns(label)); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return ns; } diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 74e2f31ac2d8..990211381319 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -127,14 +127,15 @@ static int apparmor_ptrace_access_check(struct task_struct *child, struct aa_label *tracer, *tracee; const struct cred *cred; int error; + bool needput; cred = get_task_cred(child); tracee = cred_label(cred); /* ref count on cred */ - tracer = __begin_current_label_crit_section(); + tracer = __begin_current_label_crit_section(&needput); error = aa_may_ptrace(current_cred(), tracer, cred, tracee, (mode & PTRACE_MODE_READ) ? AA_PTRACE_READ : AA_PTRACE_TRACE); - __end_current_label_crit_section(tracer); + __end_current_label_crit_section(tracer, needput); put_cred(cred); return error; @@ -145,14 +146,15 @@ static int apparmor_ptrace_traceme(struct task_struct *parent) struct aa_label *tracer, *tracee; const struct cred *cred; int error; + bool needput; - tracee = __begin_current_label_crit_section(); + tracee = __begin_current_label_crit_section(&needput); cred = get_task_cred(parent); tracer = cred_label(cred); /* ref count on cred */ error = aa_may_ptrace(cred, tracer, current_cred(), tracee, AA_PTRACE_TRACE); put_cred(cred); - __end_current_label_crit_section(tracee); + __end_current_label_crit_section(tracee, needput); return error; } @@ -221,12 +223,13 @@ static int common_perm(const char *op, const struct path *path, u32 mask, { struct aa_label *label; int error = 0; + bool needput; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); if (!unconfined(label)) error = aa_path_perm(op, current_cred(), label, path, 0, mask, cond); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return error; } @@ -524,14 +527,15 @@ static int common_file_perm(const char *op, struct file *file, u32 mask, { struct aa_label *label; int error = 0; + bool needput; /* don't reaudit files closed during inheritance */ - if (file->f_path.dentry == aa_null.dentry) + if (unlikely(file->f_path.dentry == aa_null.dentry)) return -EACCES; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); error = aa_file_perm(op, current_cred(), label, file, mask, in_atomic); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return error; } @@ -664,15 +668,16 @@ static int apparmor_uring_override_creds(const struct cred *new) struct aa_profile *profile; struct aa_label *label; int error; + bool needput; DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_IO_URING, OP_URING_OVERRIDE); ad.uring.target = cred_label(new); - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); error = fn_for_each(label, profile, profile_uring(profile, AA_MAY_OVERRIDE_CRED, cred_label(new), CAP_SYS_ADMIN, &ad)); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return error; } @@ -688,14 +693,15 @@ static int apparmor_uring_sqpoll(void) struct aa_profile *profile; struct aa_label *label; int error; + bool needput; DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_IO_URING, OP_URING_SQPOLL); - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); error = fn_for_each(label, profile, profile_uring(profile, AA_MAY_CREATE_SQPOLL, NULL, CAP_SYS_ADMIN, &ad)); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return error; } @@ -706,6 +712,7 @@ static int apparmor_sb_mount(const char *dev_name, const struct path *path, { struct aa_label *label; int error = 0; + bool needput; /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) @@ -713,7 +720,7 @@ static int apparmor_sb_mount(const char *dev_name, const struct path *path, flags &= ~AA_MS_IGNORE_MASK; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); if (!unconfined(label)) { if (flags & MS_REMOUNT) error = aa_remount(current_cred(), label, path, flags, @@ -732,7 +739,7 @@ static int apparmor_sb_mount(const char *dev_name, const struct path *path, error = aa_new_mount(current_cred(), label, dev_name, path, type, flags, data); } - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return error; } @@ -742,12 +749,13 @@ static int apparmor_move_mount(const struct path *from_path, { struct aa_label *label; int error = 0; + bool needput; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); if (!unconfined(label)) error = aa_move_mount(current_cred(), label, from_path, to_path); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return error; } @@ -756,11 +764,12 @@ static int apparmor_sb_umount(struct vfsmount *mnt, int flags) { struct aa_label *label; int error = 0; + bool needput; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); if (!unconfined(label)) error = aa_umount(current_cred(), label, mnt, flags); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return error; } @@ -984,10 +993,12 @@ static void apparmor_bprm_committed_creds(const struct linux_binprm *bprm) static void apparmor_current_getlsmprop_subj(struct lsm_prop *prop) { - struct aa_label *label = __begin_current_label_crit_section(); + struct aa_label *label; + bool needput; + label = __begin_current_label_crit_section(&needput); prop->apparmor.label = label; - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); } static void apparmor_task_getlsmprop_obj(struct task_struct *p, @@ -1002,13 +1013,16 @@ static void apparmor_task_getlsmprop_obj(struct task_struct *p, static int apparmor_task_setrlimit(struct task_struct *task, unsigned int resource, struct rlimit *new_rlim) { - struct aa_label *label = __begin_current_label_crit_section(); + struct aa_label *label; int error = 0; + bool needput; + + label = __begin_current_label_crit_section(&needput); if (!unconfined(label)) error = aa_task_setrlimit(current_cred(), label, task, resource, new_rlim); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return error; } @@ -1019,6 +1033,7 @@ static int apparmor_task_kill(struct task_struct *target, struct kernel_siginfo const struct cred *tc; struct aa_label *cl, *tl; int error; + bool needput; tc = get_task_cred(target); tl = aa_get_newest_cred_label(tc); @@ -1030,9 +1045,9 @@ static int apparmor_task_kill(struct task_struct *target, struct kernel_siginfo error = aa_may_signal(cred, cl, tc, tl, sig); aa_put_label(cl); } else { - cl = __begin_current_label_crit_section(); + cl = __begin_current_label_crit_section(&needput); error = aa_may_signal(current_cred(), cl, tc, tl, sig); - __end_current_label_crit_section(cl); + __end_current_label_crit_section(cl, needput); } aa_put_label(tl); put_cred(tc); @@ -1133,10 +1148,11 @@ static int apparmor_unix_stream_connect(struct sock *sk, struct sock *peer_sk, struct aa_sk_ctx *new_ctx = aa_sock(newsk); struct aa_label *label; int error; + bool needput; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); error = unix_connect_perm(current_cred(), label, sk, peer_sk); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); if (error) return error; @@ -1163,8 +1179,9 @@ static int apparmor_unix_may_send(struct socket *sock, struct socket *peer) struct aa_sk_ctx *peer_ctx = aa_sock(peer->sk); struct aa_label *label; int error; + bool needput; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); error = xcheck(aa_unix_peer_perm(current_cred(), label, OP_SENDMSG, AA_MAY_SEND, sock->sk, peer->sk, NULL), @@ -1172,7 +1189,7 @@ static int apparmor_unix_may_send(struct socket *sock, struct socket *peer) peer_ctx->label, OP_SENDMSG, AA_MAY_RECEIVE, peer->sk, sock->sk, label)); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return error; } diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 1f532fe48a1c..a60bb7d9b583 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -870,11 +870,11 @@ bool aa_policy_admin_capable(const struct cred *subj_cred, bool aa_current_policy_view_capable(struct aa_ns *ns) { struct aa_label *label; - bool res; + bool needput, res; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); res = aa_policy_view_capable(current_cred(), label, ns); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return res; } @@ -882,11 +882,11 @@ bool aa_current_policy_view_capable(struct aa_ns *ns) bool aa_current_policy_admin_capable(struct aa_ns *ns) { struct aa_label *label; - bool res; + bool needput, res; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); res = aa_policy_admin_capable(current_cred(), label, ns); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return res; } From 6afb0a7bc95a61e40c38c58e2bcf6c88fff68d67 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sun, 22 Jun 2025 04:09:06 -0700 Subject: [PATCH 0706/2411] apparmor: update kernel doc comments for xxx_label_crit_section Add a kernel doc header for __end_current_label_crit_section(), and update the header for __begin_current_label_crit_section(). Fixes: b42ecc5f58ef ("apparmor: make __begin_current_label_crit_section() indicate whether put is needed") Signed-off-by: John Johansen --- security/apparmor/include/cred.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/security/apparmor/include/cred.h b/security/apparmor/include/cred.h index de6ec4969598..b028e4c13b6f 100644 --- a/security/apparmor/include/cred.h +++ b/security/apparmor/include/cred.h @@ -114,6 +114,13 @@ static inline struct aa_label *aa_get_current_label(void) return aa_get_label(l); } +/** + * __end_current_label_crit_section - end crit section begun with __begin_... + * @label: label obtained from __begin_current_label_crit_section + * @needput: output: bool set by __begin_current_label_crit_section + * + * Returns: label to use for this crit section + */ static inline void __end_current_label_crit_section(struct aa_label *label, bool needput) { @@ -137,6 +144,7 @@ static inline void end_current_label_crit_section(struct aa_label *label) /** * __begin_current_label_crit_section - current's confining label + * @needput: store whether the label needs to be put when ending crit section * * Returns: up to date confining label or the ns unconfined label (NOT NULL) * From bc6e5f6933b8e7b74858ac830d5b9b4ca10a099a Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 1 Apr 2025 15:28:13 -0700 Subject: [PATCH 0707/2411] apparmor: Remove use of the double lock The use of the double lock is not necessary and problematic. Instead pull the bits that need locks into their own sections and grab the needed references. Fixes: c05e705812d1 ("apparmor: add fine grained af_unix mediation") Signed-off-by: John Johansen --- security/apparmor/af_unix.c | 197 ++++++++++++++-------------- security/apparmor/include/af_unix.h | 1 + security/apparmor/include/audit.h | 1 - security/apparmor/lsm.c | 4 +- security/apparmor/net.c | 3 - 5 files changed, 104 insertions(+), 102 deletions(-) diff --git a/security/apparmor/af_unix.c b/security/apparmor/af_unix.c index ed4b34b88e38..53ccf9becdf7 100644 --- a/security/apparmor/af_unix.c +++ b/security/apparmor/af_unix.c @@ -30,11 +30,10 @@ static inline struct sock *aa_unix_sk(struct unix_sock *u) } static int unix_fs_perm(const char *op, u32 mask, const struct cred *subj_cred, - struct aa_label *label, struct unix_sock *u) + struct aa_label *label, struct path *path) { AA_BUG(!label); - AA_BUG(!u); - AA_BUG(!is_unix_fs(aa_unix_sk(u))); + AA_BUG(!path); if (unconfined(label) || !label_mediates(label, AA_CLASS_FILE)) return 0; @@ -43,13 +42,13 @@ static int unix_fs_perm(const char *op, u32 mask, const struct cred *subj_cred, /* if !u->path.dentry socket is being shutdown - implicit delegation * until obj delegation is supported */ - if (u->path.dentry) { + if (path->dentry) { /* the sunpath may not be valid for this ns so use the path */ - struct path_cond cond = { u->path.dentry->d_inode->i_uid, - u->path.dentry->d_inode->i_mode + struct path_cond cond = { path->dentry->d_inode->i_uid, + path->dentry->d_inode->i_mode }; - return aa_path_perm(op, subj_cred, label, &u->path, + return aa_path_perm(op, subj_cred, label, path, PATH_SOCK_COND, mask, &cond); } /* else implicitly delegated */ @@ -102,18 +101,27 @@ static aa_state_t match_to_local(struct aa_policydb *policy, return state; } +struct sockaddr_un *aa_sunaddr(const struct unix_sock *u, int *addrlen) +{ + struct unix_address *addr; + + /* memory barrier is sufficient see note in net/unix/af_unix.c */ + addr = smp_load_acquire(&u->addr); + if (addr) { + *addrlen = addr->len; + return addr->name; + } + *addrlen = 0; + return NULL; +} + static aa_state_t match_to_sk(struct aa_policydb *policy, aa_state_t state, u32 request, struct unix_sock *u, struct aa_perms **p, const char **info) { - struct sockaddr_un *addr = NULL; - int addrlen = 0; - - if (u->addr) { - addr = u->addr->name; - addrlen = u->addr->len; - } + int addrlen; + struct sockaddr_un *addr = aa_sunaddr(u, &addrlen); return match_to_local(policy, state, request, u->sk.sk_type, u->sk.sk_protocol, addr, addrlen, p, info); @@ -363,7 +371,8 @@ static int profile_opt_perm(struct aa_profile *profile, u32 request, /* null peer_label is allowed, in which case the peer_sk label is used */ static int profile_peer_perm(struct aa_profile *profile, u32 request, - struct sock *sk, struct sock *peer_sk, + struct sock *sk, struct sockaddr_un *peer_addr, + int peer_addrlen, struct aa_label *peer_label, struct apparmor_audit_data *ad) { @@ -375,26 +384,16 @@ static int profile_peer_perm(struct aa_profile *profile, u32 request, AA_BUG(!profile); AA_BUG(profile_unconfined(profile)); AA_BUG(!sk); - AA_BUG(!peer_sk); + AA_BUG(!peer_label); AA_BUG(!ad); - AA_BUG(is_unix_fs(peer_sk)); /* currently always calls unix_fs_perm */ state = RULE_MEDIATES_v9NET(rules); if (state) { - struct aa_sk_ctx *peer_ctx = aa_sock(peer_sk); struct aa_profile *peerp; - struct sockaddr_un *addr = NULL; - int len = 0; - if (unix_sk(peer_sk)->addr) { - addr = unix_sk(peer_sk)->addr->name; - len = unix_sk(peer_sk)->addr->len; - } state = match_to_peer(rules->policy, state, request, unix_sk(sk), - addr, len, &p, &ad->info); - if (!peer_label) - peer_label = peer_ctx->label; + peer_addr, peer_addrlen, &p, &ad->info); return fn_for_each_in_ns(peer_label, peerp, match_label(profile, rules, state, request, @@ -422,9 +421,8 @@ int aa_unix_create_perm(struct aa_label *label, int family, int type, return 0; } -int aa_unix_label_sk_perm(const struct cred *subj_cred, - struct aa_label *label, const char *op, u32 request, - struct sock *sk) +int aa_unix_label_sk_perm(const struct cred *subj_cred, struct aa_label *label, + const char *op, u32 request, struct sock *sk) { if (!unconfined(label)) { struct aa_profile *profile; @@ -436,19 +434,6 @@ int aa_unix_label_sk_perm(const struct cred *subj_cred, return 0; } -static int unix_label_sock_perm(const struct cred *subj_cred, - struct aa_label *label, const char *op, - u32 request, struct socket *sock) -{ - if (unconfined(label)) - return 0; - if (is_unix_fs(sock->sk)) - return unix_fs_perm(op, request, subj_cred, label, - unix_sk(sock->sk)); - - return aa_unix_label_sk_perm(subj_cred, label, op, request, sock->sk); -} - /* revalidation, get/set attr, shutdown */ int aa_unix_sock_perm(const char *op, u32 request, struct socket *sock) { @@ -456,7 +441,12 @@ int aa_unix_sock_perm(const char *op, u32 request, struct socket *sock) int error; label = begin_current_label_crit_section(); - error = unix_label_sock_perm(current_cred(), label, op, request, sock); + if (is_unix_fs(sock->sk)) + error = unix_fs_perm(op, request, current_cred(), label, + &unix_sk(sock->sk)->path); + else + error = aa_unix_label_sk_perm(current_cred(), label, op, + request, sock->sk); end_current_label_crit_section(label); return error; @@ -464,7 +454,7 @@ int aa_unix_sock_perm(const char *op, u32 request, struct socket *sock) static int valid_addr(struct sockaddr *addr, int addr_len) { - struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; + struct sockaddr_un *sunaddr = unix_addr(addr); /* addr_len == offsetof(struct sockaddr_un, sun_path) is autobind */ if (addr_len < offsetof(struct sockaddr_un, sun_path) || @@ -586,6 +576,22 @@ int aa_unix_opt_perm(const char *op, u32 request, struct socket *sock, return error; } +static int unix_peer_perm(const struct cred *subj_cred, + struct aa_label *label, const char *op, u32 request, + struct sock *sk, struct sockaddr_un *peer_addr, + int peer_addrlen, struct aa_label *peer_label) +{ + struct aa_profile *profile; + DEFINE_AUDIT_SK(ad, op, subj_cred, sk); + + ad.net.addr = peer_addr; + ad.net.addrlen = peer_addrlen; + + return fn_for_each_confined(label, profile, + profile_peer_perm(profile, request, sk, + peer_addr, peer_addrlen, peer_label, &ad)); +} + /** * * Requires: lock held on both @sk and @peer_sk @@ -602,58 +608,37 @@ int aa_unix_peer_perm(const struct cred *subj_cred, AA_BUG(!label); AA_BUG(!sk); AA_BUG(!peer_sk); + AA_BUG(!peer_label); if (is_unix_fs(aa_unix_sk(peeru))) { - return unix_fs_perm(op, request, subj_cred, label, peeru); + return unix_fs_perm(op, request, subj_cred, label, + &peeru->path); } else if (is_unix_fs(aa_unix_sk(u))) { - return unix_fs_perm(op, request, subj_cred, label, u); + return unix_fs_perm(op, request, subj_cred, label, &u->path); } else if (!unconfined(label)) { - struct aa_profile *profile; - DEFINE_AUDIT_SK(ad, op, subj_cred, sk); + int plen; + struct sockaddr_un *paddr = aa_sunaddr(unix_sk(peer_sk), + &plen); - ad.net.peer_sk = peer_sk; - - return fn_for_each_confined(label, profile, - profile_peer_perm(profile, request, sk, - peer_sk, peer_label, &ad)); + return unix_peer_perm(subj_cred, label, op, request, + sk, paddr, plen, peer_label); } return 0; } -static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) -{ - if (unlikely(sk1 == sk2) || !sk2) { - unix_state_lock(sk1); - return; - } - if (sk1 < sk2) { - unix_state_lock(sk1); - unix_state_lock(sk2); - } else { - unix_state_lock(sk2); - unix_state_lock(sk1); - } -} - -static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) -{ - if (unlikely(sk1 == sk2) || !sk2) { - unix_state_unlock(sk1); - return; - } - unix_state_unlock(sk1); - unix_state_unlock(sk2); -} - -/* TODO: examine replacing double lock with cached addr */ - +/* This fn is only checked if something has changed in the security + * boundaries. Otherwise cached info off file is sufficient + */ int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, const char *op, u32 request, struct file *file) { struct socket *sock = (struct socket *) file->private_data; + struct sockaddr_un *addr, *peer_addr; + int addrlen, peer_addrlen; struct sock *peer_sk = NULL; u32 sk_req = request & ~NET_PEER_MASK; + struct path path; bool is_sk_fs; int error = 0; @@ -663,40 +648,60 @@ int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, AA_BUG(sock->sk->sk_family != PF_UNIX); /* TODO: update sock label with new task label */ + /* investigate only using lock via unix_peer_get() + * addr only needs the memory barrier, but need to investigate + * path + */ unix_state_lock(sock->sk); peer_sk = unix_peer(sock->sk); if (peer_sk) sock_hold(peer_sk); is_sk_fs = is_unix_fs(sock->sk); + addr = aa_sunaddr(unix_sk(sock->sk), &addrlen); + path = unix_sk(sock->sk)->path; + unix_state_unlock(sock->sk); + if (is_sk_fs && peer_sk) sk_req = request; - if (sk_req) - error = unix_label_sock_perm(subj_cred, label, op, sk_req, - sock); - unix_state_unlock(sock->sk); + if (sk_req) { + if (is_sk_fs) + error = unix_fs_perm(op, sk_req, subj_cred, label, + &path); + else + error = aa_unix_label_sk_perm(subj_cred, label, op, + sk_req, sock->sk); + } if (!peer_sk) - return error; + goto out; - unix_state_double_lock(sock->sk, peer_sk); + peer_addr = aa_sunaddr(unix_sk(peer_sk), &peer_addrlen); + + struct path peer_path; + + peer_path = unix_sk(peer_sk)->path; if (!is_sk_fs && is_unix_fs(peer_sk)) { last_error(error, unix_fs_perm(op, request, subj_cred, label, - unix_sk(peer_sk))); + &peer_path)); } else if (!is_sk_fs) { struct aa_sk_ctx *pctx = aa_sock(peer_sk); + /* no fs check of aa_unix_peer_perm because conditions above + * ensure they will never be done + */ last_error(error, - xcheck(aa_unix_peer_perm(subj_cred, label, op, - MAY_READ | MAY_WRITE, - sock->sk, peer_sk, NULL), - aa_unix_peer_perm(file->f_cred, pctx->label, op, - MAY_READ | MAY_WRITE, - peer_sk, sock->sk, label))); + xcheck(unix_peer_perm(subj_cred, label, op, + MAY_READ | MAY_WRITE, sock->sk, + peer_addr, peer_addrlen, + pctx->label), + unix_peer_perm(file->f_cred, pctx->label, op, + MAY_READ | MAY_WRITE, peer_sk, + addr, addrlen, label))); } - unix_state_double_unlock(sock->sk, peer_sk); - sock_put(peer_sk); +out: + return error; } diff --git a/security/apparmor/include/af_unix.h b/security/apparmor/include/af_unix.h index 28390eec3204..760d98132392 100644 --- a/security/apparmor/include/af_unix.h +++ b/security/apparmor/include/af_unix.h @@ -31,6 +31,7 @@ #define is_unix_connected(S) ((S)->state == SS_CONNECTED) +struct sockaddr_un *aa_sunaddr(const struct unix_sock *u, int *addrlen); int aa_unix_peer_perm(const struct cred *subj_cred, struct aa_label *label, const char *op, u32 request, struct sock *sk, struct sock *peer_sk, diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index e27229349abb..365bc67dd150 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -138,7 +138,6 @@ struct apparmor_audit_data { }; struct { int type, protocol; - struct sock *peer_sk; void *addr; int addrlen; } net; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 990211381319..0b53ac1c2d70 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1112,7 +1112,7 @@ static int unix_connect_perm(const struct cred *cred, struct aa_label *label, error = aa_unix_peer_perm(cred, label, OP_CONNECT, (AA_MAY_CONNECT | AA_MAY_SEND | AA_MAY_RECEIVE), - sk, peer_sk, NULL); + sk, peer_sk, peer_ctx->label); if (!is_unix_fs(peer_sk)) { last_error(error, aa_unix_peer_perm(cred, @@ -1184,7 +1184,7 @@ static int apparmor_unix_may_send(struct socket *sock, struct socket *peer) label = __begin_current_label_crit_section(&needput); error = xcheck(aa_unix_peer_perm(current_cred(), label, OP_SENDMSG, AA_MAY_SEND, - sock->sk, peer->sk, NULL), + sock->sk, peer->sk, peer_ctx->label), aa_unix_peer_perm(peer->file ? peer->file->f_cred : NULL, peer_ctx->label, OP_SENDMSG, AA_MAY_RECEIVE, diff --git a/security/apparmor/net.c b/security/apparmor/net.c index a256a4664826..e6f9e11eaa6a 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -148,9 +148,6 @@ void audit_net_cb(struct audit_buffer *ab, void *va) audit_unix_addr(ab, "peer_addr", unix_addr(ad->net.addr), ad->net.addrlen); - else - audit_unix_sk_addr(ab, "peer_addr", - ad->net.peer_sk); } } if (ad->peer) { From a30a9fdb66319466a7c76b455524d27c75d2b05b Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sat, 14 Jun 2025 13:49:02 -0700 Subject: [PATCH 0708/2411] apparmor: fix af_unix auditing to include all address information The auditing of addresses currently doesn't include the source address and mixes source and foreign/peer under the same audit name. Fix this so source is always addr, and the foreign/peer is peer_addr. Fixes: c05e705812d1 ("apparmor: add fine grained af_unix mediation") Signed-off-by: John Johansen --- security/apparmor/af_unix.c | 4 ++-- security/apparmor/include/audit.h | 4 ++++ security/apparmor/net.c | 20 ++++++++++++-------- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/security/apparmor/af_unix.c b/security/apparmor/af_unix.c index 53ccf9becdf7..03d44fa19d12 100644 --- a/security/apparmor/af_unix.c +++ b/security/apparmor/af_unix.c @@ -584,8 +584,8 @@ static int unix_peer_perm(const struct cred *subj_cred, struct aa_profile *profile; DEFINE_AUDIT_SK(ad, op, subj_cred, sk); - ad.net.addr = peer_addr; - ad.net.addrlen = peer_addrlen; + ad.net.peer.addr = peer_addr; + ad.net.peer.addrlen = peer_addrlen; return fn_for_each_confined(label, profile, profile_peer_perm(profile, request, sk, diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index 365bc67dd150..1a71a94ea19c 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -140,6 +140,10 @@ struct apparmor_audit_data { int type, protocol; void *addr; int addrlen; + struct { + void *addr; + int addrlen; + } peer; } net; }; }; diff --git a/security/apparmor/net.c b/security/apparmor/net.c index e6f9e11eaa6a..2da554cc3a35 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -99,10 +99,15 @@ static void audit_unix_sk_addr(struct audit_buffer *ab, const char *str, { const struct unix_sock *u = unix_sk(sk); - if (u && u->addr) - audit_unix_addr(ab, str, u->addr->name, u->addr->len); - else + if (u && u->addr) { + int addrlen; + struct sockaddr_un *addr = aa_sunaddr(u, &addrlen); + + audit_unix_addr(ab, str, addr, addrlen); + } else { audit_unix_addr(ab, str, NULL, 0); + + } } /* audit callback for net specific fields */ @@ -137,17 +142,16 @@ void audit_net_cb(struct audit_buffer *ab, void *va) } } if (ad->common.u.net->family == PF_UNIX) { - if ((ad->request & ~NET_PEER_MASK) && ad->net.addr) + if (ad->net.addr || !ad->common.u.net->sk) audit_unix_addr(ab, "addr", unix_addr(ad->net.addr), ad->net.addrlen); else audit_unix_sk_addr(ab, "addr", ad->common.u.net->sk); if (ad->request & NET_PEER_MASK) { - if (ad->net.addr) - audit_unix_addr(ab, "peer_addr", - unix_addr(ad->net.addr), - ad->net.addrlen); + audit_unix_addr(ab, "peer_addr", + unix_addr(ad->net.peer.addr), + ad->net.peer.addrlen); } } if (ad->peer) { From 50d56a1a366a3a5e7e41d9efff1a5e4ee7bf98a7 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sat, 14 Jun 2025 13:49:34 -0700 Subject: [PATCH 0709/2411] apparmor: fix AA_DEBUG_LABEL() AA_DEBUG_LABEL() was not specifying it vargs, which is needed so it can output debug parameters. Fixes: 71e6cff3e0dd ("apparmor: Improve debug print infrastructure") Signed-off-by: John Johansen --- security/apparmor/include/lib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index 200cf36c5e0a..444197075fd6 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -42,7 +42,7 @@ extern struct aa_dfa *stacksplitdfa; if (aa_g_debug & opt) \ pr_warn_ratelimited("%s: " fmt, __func__, ##args); \ } while (0) -#define AA_DEBUG_LABEL(LAB, X, fmt, args) \ +#define AA_DEBUG_LABEL(LAB, X, fmt, args...) \ do { \ if ((LAB)->flags & FLAG_DEBUG1) \ AA_DEBUG(X, fmt, args); \ From 6456ccbd2ff72814b3c1b2e2a3a2145a2ced858d Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 4 Jun 2025 01:45:05 -0700 Subject: [PATCH 0710/2411] apparmor: fix regression in fs based unix sockets when using old abi Policy loaded using abi 7 socket mediation was not being applied correctly in all cases. In some cases with fs based unix sockets a subset of permissions where allowed when they should have been denied. This was happening because the check for if the socket was an fs based unix socket came before the abi check. But the abi check is where the correct path is selected, so having the fs unix socket check occur early would cause the wrong code path to be used. Fix this by pushing the fs unix to be done after the abi check. Fixes: dcd7a559411e ("apparmor: gate make fine grained unix mediation behind v9 abi") Signed-off-by: John Johansen --- security/apparmor/af_unix.c | 119 +++++++++++++++++----------- security/apparmor/include/af_unix.h | 3 - 2 files changed, 71 insertions(+), 51 deletions(-) diff --git a/security/apparmor/af_unix.c b/security/apparmor/af_unix.c index 03d44fa19d12..dc25f1afe819 100644 --- a/security/apparmor/af_unix.c +++ b/security/apparmor/af_unix.c @@ -221,7 +221,7 @@ static int profile_create_perm(struct aa_profile *profile, int family, static int profile_sk_perm(struct aa_profile *profile, struct apparmor_audit_data *ad, - u32 request, struct sock *sk) + u32 request, struct sock *sk, struct path *path) { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), @@ -231,11 +231,15 @@ static int profile_sk_perm(struct aa_profile *profile, AA_BUG(!profile); AA_BUG(!sk); - AA_BUG(is_unix_fs(sk)); AA_BUG(profile_unconfined(profile)); state = RULE_MEDIATES_v9NET(rules); if (state) { + if (is_unix_fs(sk)) + return unix_fs_perm(ad->op, request, ad->subj_cred, + &profile->label, + &unix_sk(sk)->path); + state = match_to_sk(rules->policy, state, request, unix_sk(sk), &p, &ad->info); @@ -261,6 +265,9 @@ static int profile_bind_perm(struct aa_profile *profile, struct sock *sk, state = RULE_MEDIATES_v9NET(rules); if (state) { + if (is_unix_addr_fs(ad->net.addr, ad->net.addrlen)) + /* under v7-9 fs hook handles bind */ + return 0; /* bind for abstract socket */ state = match_to_local(rules->policy, state, AA_MAY_BIND, sk->sk_type, sk->sk_protocol, @@ -285,7 +292,6 @@ static int profile_listen_perm(struct aa_profile *profile, struct sock *sk, AA_BUG(!profile); AA_BUG(!sk); - AA_BUG(is_unix_fs(sk)); AA_BUG(!ad); AA_BUG(profile_unconfined(profile)); @@ -293,6 +299,11 @@ static int profile_listen_perm(struct aa_profile *profile, struct sock *sk, if (state) { __be16 b = cpu_to_be16(backlog); + if (is_unix_fs(sk)) + return unix_fs_perm(ad->op, AA_MAY_LISTEN, + ad->subj_cred, &profile->label, + &unix_sk(sk)->path); + state = match_to_cmd(rules->policy, state, AA_MAY_LISTEN, unix_sk(sk), CMD_LISTEN, &p, &ad->info); if (state && !p) { @@ -319,12 +330,16 @@ static int profile_accept_perm(struct aa_profile *profile, AA_BUG(!profile); AA_BUG(!sk); - AA_BUG(is_unix_fs(sk)); AA_BUG(!ad); AA_BUG(profile_unconfined(profile)); state = RULE_MEDIATES_v9NET(rules); if (state) { + if (is_unix_fs(sk)) + return unix_fs_perm(ad->op, AA_MAY_ACCEPT, + ad->subj_cred, &profile->label, + &unix_sk(sk)->path); + state = match_to_sk(rules->policy, state, AA_MAY_ACCEPT, unix_sk(sk), &p, &ad->info); @@ -346,13 +361,16 @@ static int profile_opt_perm(struct aa_profile *profile, u32 request, AA_BUG(!profile); AA_BUG(!sk); - AA_BUG(is_unix_fs(sk)); AA_BUG(!ad); AA_BUG(profile_unconfined(profile)); state = RULE_MEDIATES_v9NET(rules); if (state) { __be16 b = cpu_to_be16(optname); + if (is_unix_fs(sk)) + return unix_fs_perm(ad->op, request, + ad->subj_cred, &profile->label, + &unix_sk(sk)->path); state = match_to_cmd(rules->policy, state, request, unix_sk(sk), CMD_OPT, &p, &ad->info); @@ -371,8 +389,9 @@ static int profile_opt_perm(struct aa_profile *profile, u32 request, /* null peer_label is allowed, in which case the peer_sk label is used */ static int profile_peer_perm(struct aa_profile *profile, u32 request, - struct sock *sk, struct sockaddr_un *peer_addr, - int peer_addrlen, + struct sock *sk, struct path *path, + struct sockaddr_un *peer_addr, + int peer_addrlen, struct path *peer_path, struct aa_label *peer_label, struct apparmor_audit_data *ad) { @@ -391,6 +410,12 @@ static int profile_peer_perm(struct aa_profile *profile, u32 request, if (state) { struct aa_profile *peerp; + if (peer_path) + return unix_fs_perm(ad->op, request, ad->subj_cred, + &profile->label, peer_path); + else if (path) + return unix_fs_perm(ad->op, request, ad->subj_cred, + &profile->label, path); state = match_to_peer(rules->policy, state, request, unix_sk(sk), peer_addr, peer_addrlen, &p, &ad->info); @@ -421,15 +446,18 @@ int aa_unix_create_perm(struct aa_label *label, int family, int type, return 0; } -int aa_unix_label_sk_perm(const struct cred *subj_cred, struct aa_label *label, - const char *op, u32 request, struct sock *sk) +static int aa_unix_label_sk_perm(const struct cred *subj_cred, + struct aa_label *label, + const char *op, u32 request, struct sock *sk, + struct path *path) { if (!unconfined(label)) { struct aa_profile *profile; DEFINE_AUDIT_SK(ad, op, subj_cred, sk); return fn_for_each_confined(label, profile, - profile_sk_perm(profile, &ad, request, sk)); + profile_sk_perm(profile, &ad, request, sk, + path)); } return 0; } @@ -441,12 +469,9 @@ int aa_unix_sock_perm(const char *op, u32 request, struct socket *sock) int error; label = begin_current_label_crit_section(); - if (is_unix_fs(sock->sk)) - error = unix_fs_perm(op, request, current_cred(), label, - &unix_sk(sock->sk)->path); - else - error = aa_unix_label_sk_perm(current_cred(), label, op, - request, sock->sk); + error = aa_unix_label_sk_perm(current_cred(), label, op, + request, sock->sk, + is_unix_fs(sock->sk) ? &unix_sk(sock->sk)->path : NULL); end_current_label_crit_section(label); return error; @@ -476,7 +501,7 @@ int aa_unix_bind_perm(struct socket *sock, struct sockaddr *addr, label = begin_current_label_crit_section(); /* fs bind is handled by mknod */ - if (!(unconfined(label) || is_unix_addr_fs(addr, addrlen))) { + if (!unconfined(label)) { DEFINE_AUDIT_SK(ad, OP_BIND, current_cred(), sock->sk); ad.net.addr = unix_addr(addr); @@ -510,7 +535,7 @@ int aa_unix_listen_perm(struct socket *sock, int backlog) int error = 0; label = begin_current_label_crit_section(); - if (!(unconfined(label) || is_unix_fs(sock->sk))) { + if (!unconfined(label)) { DEFINE_AUDIT_SK(ad, OP_LISTEN, current_cred(), sock->sk); error = fn_for_each_confined(label, profile, @@ -531,7 +556,7 @@ int aa_unix_accept_perm(struct socket *sock, struct socket *newsock) int error = 0; label = begin_current_label_crit_section(); - if (!(unconfined(label) || is_unix_fs(sock->sk))) { + if (!unconfined(label)) { DEFINE_AUDIT_SK(ad, OP_ACCEPT, current_cred(), sock->sk); error = fn_for_each_confined(label, profile, @@ -564,12 +589,12 @@ int aa_unix_opt_perm(const char *op, u32 request, struct socket *sock, int error = 0; label = begin_current_label_crit_section(); - if (!(unconfined(label) || is_unix_fs(sock->sk))) { + if (!unconfined(label)) { DEFINE_AUDIT_SK(ad, op, current_cred(), sock->sk); error = fn_for_each_confined(label, profile, - profile_opt_perm(profile, request, - sock->sk, optname, &ad)); + profile_opt_perm(profile, request, sock->sk, + optname, &ad)); } end_current_label_crit_section(label); @@ -578,8 +603,9 @@ int aa_unix_opt_perm(const char *op, u32 request, struct socket *sock, static int unix_peer_perm(const struct cred *subj_cred, struct aa_label *label, const char *op, u32 request, - struct sock *sk, struct sockaddr_un *peer_addr, - int peer_addrlen, struct aa_label *peer_label) + struct sock *sk, struct path *path, + struct sockaddr_un *peer_addr, int peer_addrlen, + struct path *peer_path, struct aa_label *peer_label) { struct aa_profile *profile; DEFINE_AUDIT_SK(ad, op, subj_cred, sk); @@ -588,8 +614,9 @@ static int unix_peer_perm(const struct cred *subj_cred, ad.net.peer.addrlen = peer_addrlen; return fn_for_each_confined(label, profile, - profile_peer_perm(profile, request, sk, - peer_addr, peer_addrlen, peer_label, &ad)); + profile_peer_perm(profile, request, sk, path, + peer_addr, peer_addrlen, peer_path, + peer_label, &ad)); } /** @@ -604,27 +631,19 @@ int aa_unix_peer_perm(const struct cred *subj_cred, { struct unix_sock *peeru = unix_sk(peer_sk); struct unix_sock *u = unix_sk(sk); + int plen; + struct sockaddr_un *paddr = aa_sunaddr(unix_sk(peer_sk), &plen); AA_BUG(!label); AA_BUG(!sk); AA_BUG(!peer_sk); AA_BUG(!peer_label); - if (is_unix_fs(aa_unix_sk(peeru))) { - return unix_fs_perm(op, request, subj_cred, label, - &peeru->path); - } else if (is_unix_fs(aa_unix_sk(u))) { - return unix_fs_perm(op, request, subj_cred, label, &u->path); - } else if (!unconfined(label)) { - int plen; - struct sockaddr_un *paddr = aa_sunaddr(unix_sk(peer_sk), - &plen); - - return unix_peer_perm(subj_cred, label, op, request, - sk, paddr, plen, peer_label); - } - - return 0; + return unix_peer_perm(subj_cred, label, op, request, sk, + is_unix_fs(sk) ? &u->path : NULL, + paddr, plen, + is_unix_fs(peer_sk) ? &peeru->path : NULL, + peer_label); } /* This fn is only checked if something has changed in the security @@ -665,12 +684,9 @@ int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, if (is_sk_fs && peer_sk) sk_req = request; if (sk_req) { - if (is_sk_fs) - error = unix_fs_perm(op, sk_req, subj_cred, label, - &path); - else error = aa_unix_label_sk_perm(subj_cred, label, op, - sk_req, sock->sk); + sk_req, sock->sk, + is_sk_fs ? &path : NULL); } if (!peer_sk) goto out; @@ -683,7 +699,7 @@ int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, if (!is_sk_fs && is_unix_fs(peer_sk)) { last_error(error, unix_fs_perm(op, request, subj_cred, label, - &peer_path)); + is_unix_fs(peer_sk) ? &peer_path : NULL)); } else if (!is_sk_fs) { struct aa_sk_ctx *pctx = aa_sock(peer_sk); @@ -693,11 +709,18 @@ int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, last_error(error, xcheck(unix_peer_perm(subj_cred, label, op, MAY_READ | MAY_WRITE, sock->sk, + is_sk_fs ? &path : NULL, peer_addr, peer_addrlen, + is_unix_fs(peer_sk) ? + &peer_path : NULL, pctx->label), unix_peer_perm(file->f_cred, pctx->label, op, MAY_READ | MAY_WRITE, peer_sk, - addr, addrlen, label))); + is_unix_fs(peer_sk) ? + &peer_path : NULL, + addr, addrlen, + is_sk_fs ? &path : NULL, + label))); } sock_put(peer_sk); diff --git a/security/apparmor/include/af_unix.h b/security/apparmor/include/af_unix.h index 760d98132392..4a62e600d82b 100644 --- a/security/apparmor/include/af_unix.h +++ b/security/apparmor/include/af_unix.h @@ -36,9 +36,6 @@ int aa_unix_peer_perm(const struct cred *subj_cred, struct aa_label *label, const char *op, u32 request, struct sock *sk, struct sock *peer_sk, struct aa_label *peer_label); -int aa_unix_label_sk_perm(const struct cred *subj_cred, - struct aa_label *label, const char *op, u32 request, - struct sock *sk); int aa_unix_sock_perm(const char *op, u32 request, struct socket *sock); int aa_unix_create_perm(struct aa_label *label, int family, int type, int protocol); From 8b61d8ca751bc15875b50e0ff6ac3ba0cf95a529 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 28 May 2025 23:22:19 +0300 Subject: [PATCH 0711/2411] watchdog: ziirave_wdt: check record length in ziirave_firm_verify() The "rec->len" value comes from the firmware. We generally do trust firmware, but it's always better to double check. If the length value is too large it would lead to memory corruption when we set "data[i] = ret;" Fixes: 217209db0204 ("watchdog: ziirave_wdt: Add support to upload the firmware.") Signed-off-by: Dan Carpenter Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/3b58b453f0faa8b968c90523f52c11908b56c346.1748463049.git.dan.carpenter@linaro.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index fcc1ba02e75b..5c6e3fa001d8 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -302,6 +302,9 @@ static int ziirave_firm_verify(struct watchdog_device *wdd, const u16 len = be16_to_cpu(rec->len); const u32 addr = be32_to_cpu(rec->addr); + if (len > sizeof(data)) + return -EINVAL; + if (ziirave_firm_addr_readonly(addr)) continue; From 0987760b27834548052bd716e040681cec9e822d Mon Sep 17 00:00:00 2001 From: Frank Li Date: Tue, 24 Jun 2025 16:12:27 -0400 Subject: [PATCH 0712/2411] dt-bindings: watchdog: nxp,pnx4008-wdt: allow clocks property Allow clocks property to fix below CHECK_DTB warning: arch/arm/boot/dts/nxp/lpc/lpc3250-phy3250.dtb: watchdog@4003c000 (nxp,pnx4008-wdt): Unevaluated properties are not allowed ('clocks' was unexpected) Signed-off-by: Frank Li Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250624201227.2515275-1-Frank.Li@nxp.com Signed-off-by: Wim Van Sebroeck --- .../devicetree/bindings/watchdog/nxp,pnx4008-wdt.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/watchdog/nxp,pnx4008-wdt.yaml b/Documentation/devicetree/bindings/watchdog/nxp,pnx4008-wdt.yaml index 35ef940cbabe..8964c1c5d522 100644 --- a/Documentation/devicetree/bindings/watchdog/nxp,pnx4008-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/nxp,pnx4008-wdt.yaml @@ -19,6 +19,9 @@ properties: reg: maxItems: 1 + clocks: + maxItems: 1 + required: - compatible - reg From 3b3643e1cd6f276810640ee04e41c04e7a753c0f Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 3 Jul 2025 13:35:18 -0500 Subject: [PATCH 0713/2411] watchdog: rti_wdt: Use of_reserved_mem_region_to_resource() for "memory-region" Use the newly added of_reserved_mem_region_to_resource() function to handle "memory-region" properties. The error handling is a bit different. "memory-region" is optional, so failed lookup is not an error. But then an error in of_address_to_resource() is treated as an error. However, that distinction is not really important. Either the region is available and usable or it is not. So now, it is just of_reserved_mem_region_to_resource() which is checked for an error. Signed-off-by: Rob Herring (Arm) Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250703183518.2075108-1-robh@kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/rti_wdt.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/watchdog/rti_wdt.c b/drivers/watchdog/rti_wdt.c index d1f9ce4100a8..be7d7db47591 100644 --- a/drivers/watchdog/rti_wdt.c +++ b/drivers/watchdog/rti_wdt.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -214,7 +214,6 @@ static int rti_wdt_probe(struct platform_device *pdev) struct rti_wdt_device *wdt; struct clk *clk; u32 last_ping = 0; - struct device_node *node; u32 reserved_mem_size; struct resource res; u32 *vaddr; @@ -299,15 +298,8 @@ static int rti_wdt_probe(struct platform_device *pdev) } } - node = of_parse_phandle(pdev->dev.of_node, "memory-region", 0); - if (node) { - ret = of_address_to_resource(node, 0, &res); - of_node_put(node); - if (ret) { - dev_err(dev, "No memory address assigned to the region.\n"); - goto err_iomap; - } - + ret = of_reserved_mem_region_to_resource(pdev->dev.of_node, 0, &res); + if (!ret) { /* * If reserved memory is defined for watchdog reset cause. * Readout the Power-on(PON) reason and pass to bootstatus. From 40efc43eb7ffb5a4e2f998c13b8cfb555e671b92 Mon Sep 17 00:00:00 2001 From: Ziyan Fu Date: Fri, 4 Jul 2025 15:35:18 +0800 Subject: [PATCH 0714/2411] watchdog: iTCO_wdt: Report error if timeout configuration fails The driver probes with the invalid timeout value when 'iTCO_wdt_set_timeout()' fails, as its return value is not checked. In this case, when executing "wdctl", we may get: Device: /dev/watchdog0 Timeout: 30 seconds Timeleft: 613 seconds The timeout value is the value of "heartbeat" or "WATCHDOG_TIMEOUT", and the timeleft value is calculated from the register value we actually read (0xffff) by masking with 0x3ff and converting ticks to seconds (* 6 / 10). Add error handling to return the failure code if 'iTCO_wdt_set_timeout()' fails, ensuring the driver probe fails and prevents invalid operation. Signed-off-by: Ziyan Fu Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250704073518.7838-1-13281011316@163.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/iTCO_wdt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index 9ab769aa0244..4ab3405ef8e6 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -577,7 +577,11 @@ static int iTCO_wdt_probe(struct platform_device *pdev) /* Check that the heartbeat value is within it's range; if not reset to the default */ if (iTCO_wdt_set_timeout(&p->wddev, heartbeat)) { - iTCO_wdt_set_timeout(&p->wddev, WATCHDOG_TIMEOUT); + ret = iTCO_wdt_set_timeout(&p->wddev, WATCHDOG_TIMEOUT); + if (ret != 0) { + dev_err(dev, "Failed to set watchdog timeout (%d)\n", WATCHDOG_TIMEOUT); + return ret; + } dev_info(dev, "timeout value out of range, using %d\n", WATCHDOG_TIMEOUT); heartbeat = WATCHDOG_TIMEOUT; From 801c6592bf4c9892389352586ba36173ae9e8f9e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 9 Jul 2025 21:08:05 +0200 Subject: [PATCH 0715/2411] watchdog: renesas_wdt: Convert to DEFINE_SIMPLE_DEV_PM_OPS() Convert the Renesas WDT watchdog driver from SIMPLE_DEV_PM_OPS() to DEFINE_SIMPLE_DEV_PM_OPS() and pm_sleep_ptr(). This lets us drop the __maybe_unused annotations from its suspend and resume callbacks, and reduces kernel size in case CONFIG_PM or CONFIG_PM_SLEEP is disabled. Signed-off-by: Geert Uytterhoeven Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/3d6d46ff56c908880a167ffb2a74c713060a1a57.1752088043.git.geert+renesas@glider.be Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/renesas_wdt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/watchdog/renesas_wdt.c b/drivers/watchdog/renesas_wdt.c index c0b2a9c5250d..97bcd32bade5 100644 --- a/drivers/watchdog/renesas_wdt.c +++ b/drivers/watchdog/renesas_wdt.c @@ -300,7 +300,7 @@ static void rwdt_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); } -static int __maybe_unused rwdt_suspend(struct device *dev) +static int rwdt_suspend(struct device *dev) { struct rwdt_priv *priv = dev_get_drvdata(dev); @@ -310,7 +310,7 @@ static int __maybe_unused rwdt_suspend(struct device *dev) return 0; } -static int __maybe_unused rwdt_resume(struct device *dev) +static int rwdt_resume(struct device *dev) { struct rwdt_priv *priv = dev_get_drvdata(dev); @@ -320,7 +320,7 @@ static int __maybe_unused rwdt_resume(struct device *dev) return 0; } -static SIMPLE_DEV_PM_OPS(rwdt_pm_ops, rwdt_suspend, rwdt_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(rwdt_pm_ops, rwdt_suspend, rwdt_resume); static const struct of_device_id rwdt_ids[] = { { .compatible = "renesas,rcar-gen2-wdt", }, @@ -334,7 +334,7 @@ static struct platform_driver rwdt_driver = { .driver = { .name = "renesas_wdt", .of_match_table = rwdt_ids, - .pm = &rwdt_pm_ops, + .pm = pm_sleep_ptr(&rwdt_pm_ops), }, .probe = rwdt_probe, .remove = rwdt_remove, From 95d692f9aba7c13b5b3e8d842656c47bde7e551f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 15 Jul 2025 17:46:35 -0700 Subject: [PATCH 0716/2411] perf flamegraph: Fix minor pylint/type hint issues Switch to assuming python3. Fix minor pylint issues on line length, repeated compares, not using f-strings and variable case. Add type hints and check with mypy. Signed-off-by: Ian Rogers Tested-by: Namhyung Kim Link: https://lore.kernel.org/r/20250716004635.31161-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/scripts/python/flamegraph.py | 61 +++++++++++++------------ 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/tools/perf/scripts/python/flamegraph.py b/tools/perf/scripts/python/flamegraph.py index e49ff242b779..ad735990c5be 100755 --- a/tools/perf/scripts/python/flamegraph.py +++ b/tools/perf/scripts/python/flamegraph.py @@ -18,7 +18,6 @@ # pylint: disable=missing-class-docstring # pylint: disable=missing-function-docstring -from __future__ import print_function import argparse import hashlib import io @@ -26,9 +25,10 @@ import json import os import subprocess import sys +from typing import Dict, Optional, Union import urllib.request -minimal_html = """ +MINIMAL_HTML = """ @@ -50,20 +50,20 @@ minimal_html = """ # pylint: disable=too-few-public-methods class Node: - def __init__(self, name, libtype): + def __init__(self, name: str, libtype: str): self.name = name # "root" | "kernel" | "" # "" indicates user space self.libtype = libtype - self.value = 0 - self.children = [] + self.value: int = 0 + self.children: list[Node] = [] - def to_json(self): + def to_json(self) -> Dict[str, Union[str, int, list[Dict]]]: return { "n": self.name, "l": self.libtype, "v": self.value, - "c": self.children + "c": [x.to_json() for x in self.children] } @@ -73,7 +73,7 @@ class FlameGraphCLI: self.stack = Node("all", "root") @staticmethod - def get_libtype_from_dso(dso): + def get_libtype_from_dso(dso: Optional[str]) -> str: """ when kernel-debuginfo is installed, dso points to /usr/lib/debug/lib/modules/*/vmlinux @@ -84,7 +84,7 @@ class FlameGraphCLI: return "" @staticmethod - def find_or_create_node(node, name, libtype): + def find_or_create_node(node: Node, name: str, libtype: str) -> Node: for child in node.children: if child.name == name: return child @@ -93,7 +93,7 @@ class FlameGraphCLI: node.children.append(child) return child - def process_event(self, event): + def process_event(self, event) -> None: # ignore events where the event name does not match # the one specified by the user if self.args.event_name and event.get("ev_name") != self.args.event_name: @@ -106,7 +106,7 @@ class FlameGraphCLI: comm = event["comm"] libtype = "kernel" else: - comm = "{} ({})".format(event["comm"], pid) + comm = f"{event['comm']} ({pid})" libtype = "" node = self.find_or_create_node(self.stack, comm, libtype) @@ -121,7 +121,7 @@ class FlameGraphCLI: node = self.find_or_create_node(node, name, libtype) node.value += 1 - def get_report_header(self): + def get_report_header(self) -> str: if self.args.input == "-": # when this script is invoked with "perf script flamegraph", # no perf.data is created and we cannot read the header of it @@ -131,7 +131,8 @@ class FlameGraphCLI: # if the file name other than perf.data is given, # we read the header of that file if self.args.input: - output = subprocess.check_output(["perf", "report", "--header-only", "-i", self.args.input]) + output = subprocess.check_output(["perf", "report", "--header-only", + "-i", self.args.input]) else: output = subprocess.check_output(["perf", "report", "--header-only"]) @@ -140,10 +141,10 @@ class FlameGraphCLI: result += "\nFocused event: " + self.args.event_name return result except Exception as err: # pylint: disable=broad-except - print("Error reading report header: {}".format(err), file=sys.stderr) + print(f"Error reading report header: {err}", file=sys.stderr) return "" - def trace_end(self): + def trace_end(self) -> None: stacks_json = json.dumps(self.stack, default=lambda x: x.to_json()) if self.args.format == "html": @@ -167,7 +168,8 @@ graph template (--template PATH) or use another output format (--format FORMAT).""", file=sys.stderr) if self.args.input == "-": - print("""Not attempting to download Flame Graph template as script command line + print( +"""Not attempting to download Flame Graph template as script command line input is disabled due to using live mode. If you want to download the template retry without live mode. For example, use 'perf record -a -g -F 99 sleep 60' and 'perf script report flamegraph'. Alternatively, @@ -176,37 +178,40 @@ https://cdn.jsdelivr.net/npm/d3-flame-graph@4.1.3/dist/templates/d3-flamegraph-b and place it at: /usr/share/d3-flame-graph/d3-flamegraph-base.html""", file=sys.stderr) - quit() + sys.exit(1) s = None - while s != "y" and s != "n": - s = input("Do you wish to download a template from cdn.jsdelivr.net? (this warning can be suppressed with --allow-download) [yn] ").lower() + while s not in ["y", "n"]: + s = input("Do you wish to download a template from cdn.jsdelivr.net?" + + "(this warning can be suppressed with --allow-download) [yn] " + ).lower() if s == "n": - quit() - template = "https://cdn.jsdelivr.net/npm/d3-flame-graph@4.1.3/dist/templates/d3-flamegraph-base.html" + sys.exit(1) + template = ("https://cdn.jsdelivr.net/npm/d3-flame-graph@4.1.3/dist/templates/" + "d3-flamegraph-base.html") template_md5sum = "143e0d06ba69b8370b9848dcd6ae3f36" try: - with urllib.request.urlopen(template) as template: + with urllib.request.urlopen(template) as url_template: output_str = "".join([ - l.decode("utf-8") for l in template.readlines() + l.decode("utf-8") for l in url_template.readlines() ]) except Exception as err: print(f"Error reading template {template}: {err}\n" "a minimal flame graph will be generated", file=sys.stderr) - output_str = minimal_html + output_str = MINIMAL_HTML template_md5sum = None if template_md5sum: download_md5sum = hashlib.md5(output_str.encode("utf-8")).hexdigest() if download_md5sum != template_md5sum: s = None - while s != "y" and s != "n": + while s not in ["y", "n"]: s = input(f"""Unexpected template md5sum. {download_md5sum} != {template_md5sum}, for: {output_str} continue?[yn] """).lower() if s == "n": - quit() + sys.exit(1) output_str = output_str.replace("/** @options_json **/", options_json) output_str = output_str.replace("/** @flamegraph_json **/", stacks_json) @@ -220,12 +225,12 @@ continue?[yn] """).lower() with io.open(sys.stdout.fileno(), "w", encoding="utf-8", closefd=False) as out: out.write(output_str) else: - print("dumping data to {}".format(output_fn)) + print(f"dumping data to {output_fn}") try: with io.open(output_fn, "w", encoding="utf-8") as out: out.write(output_str) except IOError as err: - print("Error writing output file: {}".format(err), file=sys.stderr) + print(f"Error writing output file: {err}", file=sys.stderr) sys.exit(1) From 857d18f23ab17284d1b6de6f61f4e74958596376 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Jul 2025 16:49:25 -0700 Subject: [PATCH 0717/2411] cleanup: Introduce ACQUIRE() and ACQUIRE_ERR() for conditional locks scoped_cond_guard(), automatic cleanup for conditional locks, has a couple pain points: * It causes existing straight-line code to be re-indented into a new bracketed scope. While this can be mitigated by a new helper function to contain the scope, that is not always a comfortable conversion. * The return code from the conditional lock is tossed in favor of a scheme to pass a 'return err;' statement to the macro. Other attempts to clean this up, to behave more like guard() [1], got hung up trying to both establish and evaluate the conditional lock in one statement. ACQUIRE() solves this by reflecting the result of the condition in the automatic variable established by the lock CLASS(). The result is separately retrieved with the ACQUIRE_ERR() helper, effectively a PTR_ERR() operation. Link: http://lore.kernel.org/all/Z1LBnX9TpZLR5Dkf@gmail.com [1] Link: http://patch.msgid.link/20250512105026.GP4439@noisy.programming.kicks-ass.net Link: http://patch.msgid.link/20250512185817.GA1808@noisy.programming.kicks-ass.net Cc: Ingo Molnar Cc: Linus Torvalds Cc: David Lechner Cc: Fabio M. De Francesco Signed-off-by: Peter Zijlstra (Intel) [djbw: wrap Peter's proposal with changelog and comments] Co-developed-by: Dan Williams Signed-off-by: Dan Williams Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250711234932.671292-2-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- include/linux/cleanup.h | 95 +++++++++++++++++++++++++++++++++++------ include/linux/mutex.h | 2 +- include/linux/rwsem.h | 2 +- 3 files changed, 83 insertions(+), 16 deletions(-) diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 7093e1d08af0..4eb83dd71cfe 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -3,6 +3,8 @@ #define _LINUX_CLEANUP_H #include +#include +#include /** * DOC: scope-based cleanup helpers @@ -61,9 +63,21 @@ * Observe the lock is held for the remainder of the "if ()" block not * the remainder of "func()". * - * Now, when a function uses both __free() and guard(), or multiple - * instances of __free(), the LIFO order of variable definition order - * matters. GCC documentation says: + * The ACQUIRE() macro can be used in all places that guard() can be + * used and additionally support conditional locks + * + * + * DEFINE_GUARD_COND(pci_dev, _try, pci_dev_trylock(_T)) + * ... + * ACQUIRE(pci_dev_try, lock)(dev); + * rc = ACQUIRE_ERR(pci_dev_try, &lock); + * if (rc) + * return rc; + * // @lock is held + * + * Now, when a function uses both __free() and guard()/ACQUIRE(), or + * multiple instances of __free(), the LIFO order of variable definition + * order matters. GCC documentation says: * * "When multiple variables in the same scope have cleanup attributes, * at exit from the scope their associated cleanup functions are run in @@ -305,14 +319,46 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ * acquire fails. * * Only for conditional locks. + * + * ACQUIRE(name, var): + * a named instance of the (guard) class, suitable for conditional + * locks when paired with ACQUIRE_ERR(). + * + * ACQUIRE_ERR(name, &var): + * a helper that is effectively a PTR_ERR() conversion of the guard + * pointer. Returns 0 when the lock was acquired and a negative + * error code otherwise. */ #define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond) \ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond -#define __DEFINE_GUARD_LOCK_PTR(_name, _exp) \ - static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ - { return (void *)(__force unsigned long)*(_exp); } +#define __GUARD_IS_ERR(_ptr) \ + ({ \ + unsigned long _rc = (__force unsigned long)(_ptr); \ + unlikely((_rc - 1) >= -MAX_ERRNO - 1); \ + }) + +#define __DEFINE_GUARD_LOCK_PTR(_name, _exp) \ + static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \ + { \ + void *_ptr = (void *)(__force unsigned long)*(_exp); \ + if (IS_ERR(_ptr)) { \ + _ptr = NULL; \ + } \ + return _ptr; \ + } \ + static inline int class_##_name##_lock_err(class_##_name##_t *_T) \ + { \ + long _rc = (__force unsigned long)*(_exp); \ + if (!_rc) { \ + _rc = -EBUSY; \ + } \ + if (!IS_ERR_VALUE(_rc)) { \ + _rc = 0; \ + } \ + return _rc; \ + } #define DEFINE_CLASS_IS_GUARD(_name) \ __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ @@ -323,23 +369,37 @@ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond __DEFINE_GUARD_LOCK_PTR(_name, _T) #define DEFINE_GUARD(_name, _type, _lock, _unlock) \ - DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ + DEFINE_CLASS(_name, _type, if (!__GUARD_IS_ERR(_T)) { _unlock; }, ({ _lock; _T; }), _type _T); \ DEFINE_CLASS_IS_GUARD(_name) -#define DEFINE_GUARD_COND(_name, _ext, _condlock) \ +#define DEFINE_GUARD_COND_4(_name, _ext, _lock, _cond) \ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ EXTEND_CLASS(_name, _ext, \ - ({ void *_t = _T; if (_T && !(_condlock)) _t = NULL; _t; }), \ + ({ void *_t = _T; int _RET = (_lock); if (_T && !(_cond)) _t = ERR_PTR(_RET); _t; }), \ class_##_name##_t _T) \ static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ - { return class_##_name##_lock_ptr(_T); } + { return class_##_name##_lock_ptr(_T); } \ + static inline int class_##_name##_ext##_lock_err(class_##_name##_t *_T) \ + { return class_##_name##_lock_err(_T); } + +/* + * Default binary condition; success on 'true'. + */ +#define DEFINE_GUARD_COND_3(_name, _ext, _lock) \ + DEFINE_GUARD_COND_4(_name, _ext, _lock, _RET) + +#define DEFINE_GUARD_COND(X...) CONCATENATE(DEFINE_GUARD_COND_, COUNT_ARGS(X))(X) #define guard(_name) \ CLASS(_name, __UNIQUE_ID(guard)) #define __guard_ptr(_name) class_##_name##_lock_ptr +#define __guard_err(_name) class_##_name##_lock_err #define __is_cond_ptr(_name) class_##_name##_is_conditional +#define ACQUIRE(_name, _var) CLASS(_name, _var) +#define ACQUIRE_ERR(_name, _var) __guard_err(_name)(_var) + /* * Helper macro for scoped_guard(). * @@ -401,7 +461,7 @@ typedef struct { \ \ static inline void class_##_name##_destructor(class_##_name##_t *_T) \ { \ - if (_T->lock) { _unlock; } \ + if (!__GUARD_IS_ERR(_T->lock)) { _unlock; } \ } \ \ __DEFINE_GUARD_LOCK_PTR(_name, &_T->lock) @@ -433,15 +493,22 @@ __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ __DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \ __DEFINE_LOCK_GUARD_0(_name, _lock) -#define DEFINE_LOCK_GUARD_1_COND(_name, _ext, _condlock) \ +#define DEFINE_LOCK_GUARD_1_COND_4(_name, _ext, _lock, _cond) \ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ EXTEND_CLASS(_name, _ext, \ ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\ - if (_T->lock && !(_condlock)) _T->lock = NULL; \ + int _RET = (_lock); \ + if (_T->lock && !(_cond)) _T->lock = ERR_PTR(_RET);\ _t; }), \ typeof_member(class_##_name##_t, lock) l) \ static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ - { return class_##_name##_lock_ptr(_T); } + { return class_##_name##_lock_ptr(_T); } \ + static inline int class_##_name##_ext##_lock_err(class_##_name##_t *_T) \ + { return class_##_name##_lock_err(_T); } +#define DEFINE_LOCK_GUARD_1_COND_3(_name, _ext, _lock) \ + DEFINE_LOCK_GUARD_1_COND_4(_name, _ext, _lock, _RET) + +#define DEFINE_LOCK_GUARD_1_COND(X...) CONCATENATE(DEFINE_LOCK_GUARD_1_COND_, COUNT_ARGS(X))(X) #endif /* _LINUX_CLEANUP_H */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index a039fa8c1780..9d5d7ed5c101 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -224,7 +224,7 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T)) -DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0) +DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T), _RET == 0) extern unsigned long mutex_get_owner(struct mutex *lock); diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index c8b543d428b0..c810deb88d13 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -240,7 +240,7 @@ extern void up_write(struct rw_semaphore *sem); DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T)) DEFINE_GUARD_COND(rwsem_read, _try, down_read_trylock(_T)) -DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T) == 0) +DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T), _RET == 0) DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) DEFINE_GUARD_COND(rwsem_write, _try, down_write_trylock(_T)) From 683513084acb978fb7f401b9e4dce7e3866af172 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 11 Jul 2025 16:49:26 -0700 Subject: [PATCH 0718/2411] cxl/mbox: Convert poison list mutex to ACQUIRE() Towards removing all explicit unlock calls in the CXL subsystem, convert the conditional poison list mutex to use a conditional lock guard. Rename the lock to have the compiler validate that all existing call sites are converted. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Davidlohr Bueso Cc: Jonathan Cameron Cc: Dave Jiang Cc: Alison Schofield Cc: Vishal Verma Cc: Ira Weiny Acked-by: Peter Zijlstra (Intel) Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Alison Schofield Signed-off-by: Dan Williams Link: https://patch.msgid.link/20250711234932.671292-3-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 7 +++---- drivers/cxl/cxlmem.h | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 2689e6453c5a..81b21effe8cf 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1401,8 +1401,8 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, int nr_records = 0; int rc; - rc = mutex_lock_interruptible(&mds->poison.lock); - if (rc) + ACQUIRE(mutex_intr, lock)(&mds->poison.mutex); + if ((rc = ACQUIRE_ERR(mutex_intr, &lock))) return rc; po = mds->poison.list_out; @@ -1437,7 +1437,6 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, } } while (po->flags & CXL_POISON_FLAG_MORE); - mutex_unlock(&mds->poison.lock); return rc; } EXPORT_SYMBOL_NS_GPL(cxl_mem_get_poison, "CXL"); @@ -1473,7 +1472,7 @@ int cxl_poison_state_init(struct cxl_memdev_state *mds) return rc; } - mutex_init(&mds->poison.lock); + mutex_init(&mds->poison.mutex); return 0; } EXPORT_SYMBOL_NS_GPL(cxl_poison_state_init, "CXL"); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 551b0ba2caa1..f5b20641e57c 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -254,7 +254,7 @@ enum security_cmd_enabled_bits { * @max_errors: Maximum media error records held in device cache * @enabled_cmds: All poison commands enabled in the CEL * @list_out: The poison list payload returned by device - * @lock: Protect reads of the poison list + * @mutex: Protect reads of the poison list * * Reads of the poison list are synchronized to ensure that a reader * does not get an incomplete list because their request overlapped @@ -265,7 +265,7 @@ struct cxl_poison_state { u32 max_errors; DECLARE_BITMAP(enabled_cmds, CXL_POISON_ENABLED_MAX); struct cxl_mbox_poison_out *list_out; - struct mutex lock; /* Protect reads of poison list */ + struct mutex mutex; /* Protect reads of poison list */ }; /* From 7cb3b42a6bce4e604ca948e6ede543542b49fb54 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 11 Jul 2025 16:49:27 -0700 Subject: [PATCH 0719/2411] cxl/decoder: Move decoder register programming to a helper In preparation for converting to rw_semaphore_acquire semantics move the contents of an open-coded {down,up}_read(&cxl_dpa_rwsem) section to a helper function. Cc: Davidlohr Bueso Cc: Jonathan Cameron Cc: Dave Jiang Cc: Alison Schofield Cc: Vishal Verma Cc: Ira Weiny Acked-by: Peter Zijlstra (Intel) Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Alison Schofield Signed-off-by: Dan Williams Link: https://patch.msgid.link/20250711234932.671292-4-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/hdm.c | 79 +++++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index ab1007495f6b..81556d12e9b8 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -764,46 +764,12 @@ static int cxld_await_commit(void __iomem *hdm, int id) return -ETIMEDOUT; } -static int cxl_decoder_commit(struct cxl_decoder *cxld) +static void setup_hw_decoder(struct cxl_decoder *cxld, void __iomem *hdm) { - struct cxl_port *port = to_cxl_port(cxld->dev.parent); - struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); - void __iomem *hdm = cxlhdm->regs.hdm_decoder; - int id = cxld->id, rc; + int id = cxld->id; u64 base, size; u32 ctrl; - if (cxld->flags & CXL_DECODER_F_ENABLE) - return 0; - - if (cxl_num_decoders_committed(port) != id) { - dev_dbg(&port->dev, - "%s: out of order commit, expected decoder%d.%d\n", - dev_name(&cxld->dev), port->id, - cxl_num_decoders_committed(port)); - return -EBUSY; - } - - /* - * For endpoint decoders hosted on CXL memory devices that - * support the sanitize operation, make sure sanitize is not in-flight. - */ - if (is_endpoint_decoder(&cxld->dev)) { - struct cxl_endpoint_decoder *cxled = - to_cxl_endpoint_decoder(&cxld->dev); - struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct cxl_memdev_state *mds = - to_cxl_memdev_state(cxlmd->cxlds); - - if (mds && mds->security.sanitize_active) { - dev_dbg(&cxlmd->dev, - "attempted to commit %s during sanitize\n", - dev_name(&cxld->dev)); - return -EBUSY; - } - } - - down_read(&cxl_dpa_rwsem); /* common decoder settings */ ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(cxld->id)); cxld_set_interleave(cxld, &ctrl); @@ -837,6 +803,47 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld) } writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id)); +} + +static int cxl_decoder_commit(struct cxl_decoder *cxld) +{ + struct cxl_port *port = to_cxl_port(cxld->dev.parent); + struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + int id = cxld->id, rc; + + if (cxld->flags & CXL_DECODER_F_ENABLE) + return 0; + + if (cxl_num_decoders_committed(port) != id) { + dev_dbg(&port->dev, + "%s: out of order commit, expected decoder%d.%d\n", + dev_name(&cxld->dev), port->id, + cxl_num_decoders_committed(port)); + return -EBUSY; + } + + /* + * For endpoint decoders hosted on CXL memory devices that + * support the sanitize operation, make sure sanitize is not in-flight. + */ + if (is_endpoint_decoder(&cxld->dev)) { + struct cxl_endpoint_decoder *cxled = + to_cxl_endpoint_decoder(&cxld->dev); + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_memdev_state *mds = + to_cxl_memdev_state(cxlmd->cxlds); + + if (mds && mds->security.sanitize_active) { + dev_dbg(&cxlmd->dev, + "attempted to commit %s during sanitize\n", + dev_name(&cxld->dev)); + return -EBUSY; + } + } + + down_read(&cxl_dpa_rwsem); + setup_hw_decoder(cxld, hdm); up_read(&cxl_dpa_rwsem); port->commit_end++; From 55a89d9c99a9a79a7c2c7cb88c2ae9e86868a60b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 11 Jul 2025 16:49:28 -0700 Subject: [PATCH 0720/2411] cxl/decoder: Drop pointless locking cxl_dpa_rwsem coordinates changes to dpa allocation settings for a given decoder. cxl_decoder_reset() has no need for a consistent snapshot of the dpa settings since it is merely clearing out whatever was there previously. Otherwise, cxl_region_rwsem protects against 'reset' racing 'setup'. In preparation for converting to rw_semaphore_acquire semantics, drop this locking. Cc: Davidlohr Bueso Cc: Jonathan Cameron Cc: Dave Jiang Cc: Alison Schofield Cc: Vishal Verma Cc: Ira Weiny Acked-by: Peter Zijlstra (Intel) Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Alison Schofield Reviewed-by: Davidlohr Bueso Signed-off-by: Dan Williams Link: https://patch.msgid.link/20250711234932.671292-5-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/hdm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 81556d12e9b8..e9cb34e30248 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -914,7 +914,6 @@ static void cxl_decoder_reset(struct cxl_decoder *cxld) "%s: out of order reset, expected decoder%d.%d\n", dev_name(&cxld->dev), port->id, port->commit_end); - down_read(&cxl_dpa_rwsem); ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id)); ctrl &= ~CXL_HDM_DECODER0_CTRL_COMMIT; writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id)); @@ -923,7 +922,6 @@ static void cxl_decoder_reset(struct cxl_decoder *cxld) writel(0, hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(id)); writel(0, hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(id)); writel(0, hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(id)); - up_read(&cxl_dpa_rwsem); cxld->flags &= ~CXL_DECODER_F_ENABLE; From a235d7d963e82ac026eca968b71da376534dc9b9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 11 Jul 2025 16:49:29 -0700 Subject: [PATCH 0721/2411] cxl/region: Split commit_store() into __commit() and queue_reset() helpers The complexity of dropping the lock is removed in favor of splitting commit operations to a helper, and leaving all the complexities of "decommit" for commit_store() to coordinate the different locking contexts. The CPU cache-invalidation in the decommit path is solely handled now by cxl_region_decode_reset(). Previously the CPU caches were being needlessly flushed twice in the decommit path where the first flush had no guarantee that the memory would not be immediately re-dirtied. Cc: Davidlohr Bueso Cc: Jonathan Cameron Cc: Dave Jiang Cc: Alison Schofield Cc: Vishal Verma Cc: Ira Weiny Acked-by: Peter Zijlstra (Intel) Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Signed-off-by: Dan Williams Reviewed-by: Fabio M. De Francesco Link: https://patch.msgid.link/20250711234932.671292-6-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 103 ++++++++++++++++++++++++++------------ 1 file changed, 72 insertions(+), 31 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 6e5e1460068d..3a77aec2c447 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -349,30 +349,42 @@ static int cxl_region_decode_commit(struct cxl_region *cxlr) return rc; } -static ssize_t commit_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t len) +static int queue_reset(struct cxl_region *cxlr) { - struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; - bool commit; - ssize_t rc; - - rc = kstrtobool(buf, &commit); - if (rc) - return rc; + int rc; rc = down_write_killable(&cxl_region_rwsem); if (rc) return rc; /* Already in the requested state? */ - if (commit && p->state >= CXL_CONFIG_COMMIT) + if (p->state < CXL_CONFIG_COMMIT) goto out; - if (!commit && p->state < CXL_CONFIG_COMMIT) + + p->state = CXL_CONFIG_RESET_PENDING; + +out: + up_write(&cxl_region_rwsem); + + return rc; +} + +static int __commit(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + int rc; + + rc = down_write_killable(&cxl_region_rwsem); + if (rc) + return rc; + + /* Already in the requested state? */ + if (p->state >= CXL_CONFIG_COMMIT) goto out; /* Not ready to commit? */ - if (commit && p->state < CXL_CONFIG_ACTIVE) { + if (p->state < CXL_CONFIG_ACTIVE) { rc = -ENXIO; goto out; } @@ -385,31 +397,60 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr, if (rc) goto out; - if (commit) { - rc = cxl_region_decode_commit(cxlr); - if (rc == 0) - p->state = CXL_CONFIG_COMMIT; - } else { - p->state = CXL_CONFIG_RESET_PENDING; - up_write(&cxl_region_rwsem); - device_release_driver(&cxlr->dev); - down_write(&cxl_region_rwsem); - - /* - * The lock was dropped, so need to revalidate that the reset is - * still pending. - */ - if (p->state == CXL_CONFIG_RESET_PENDING) { - cxl_region_decode_reset(cxlr, p->interleave_ways); - p->state = CXL_CONFIG_ACTIVE; - } - } + rc = cxl_region_decode_commit(cxlr); + if (rc == 0) + p->state = CXL_CONFIG_COMMIT; out: up_write(&cxl_region_rwsem); + return rc; +} + +static ssize_t commit_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + bool commit; + ssize_t rc; + + rc = kstrtobool(buf, &commit); if (rc) return rc; + + if (commit) { + rc = __commit(cxlr); + if (rc) + return rc; + return len; + } + + rc = queue_reset(cxlr); + if (rc) + return rc; + + /* + * Unmap the region and depend the reset-pending state to ensure + * it does not go active again until post reset + */ + device_release_driver(&cxlr->dev); + + /* + * With the reset pending take cxl_region_rwsem unconditionally + * to ensure the reset gets handled before returning. + */ + guard(rwsem_write)(&cxl_region_rwsem); + + /* + * Revalidate that the reset is still pending in case another + * thread already handled this reset. + */ + if (p->state == CXL_CONFIG_RESET_PENDING) { + cxl_region_decode_reset(cxlr, p->interleave_ways); + p->state = CXL_CONFIG_ACTIVE; + } + return len; } From 695d9455af282056b53baf9782da5bcec3409a57 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 11 Jul 2025 16:49:30 -0700 Subject: [PATCH 0722/2411] cxl/region: Move ready-to-probe state check to a helper Rather than unlocking the region rwsem in the middle of cxl_region_probe() create a helper for determining when the region is ready-to-probe. Cc: Davidlohr Bueso Cc: Jonathan Cameron Cc: Dave Jiang Cc: Alison Schofield Cc: Vishal Verma Cc: Ira Weiny Acked-by: Peter Zijlstra (Intel) Reviewed-by: Dave Jiang Signed-off-by: Dan Williams Reviewed-by: Jonathan Cameron Reviewed-by: Fabio M. De Francesco Link: https://patch.msgid.link/20250711234932.671292-7-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 3a77aec2c447..2a97fa9a394f 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3572,9 +3572,8 @@ static void shutdown_notifiers(void *_cxlr) unregister_mt_adistance_algorithm(&cxlr->adist_notifier); } -static int cxl_region_probe(struct device *dev) +static int cxl_region_can_probe(struct cxl_region *cxlr) { - struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; int rc; @@ -3597,15 +3596,28 @@ static int cxl_region_probe(struct device *dev) goto out; } - /* - * From this point on any path that changes the region's state away from - * CXL_CONFIG_COMMIT is also responsible for releasing the driver. - */ out: up_read(&cxl_region_rwsem); if (rc) return rc; + return 0; +} + +static int cxl_region_probe(struct device *dev) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + int rc; + + rc = cxl_region_can_probe(cxlr); + if (rc) + return rc; + + /* + * From this point on any path that changes the region's state away from + * CXL_CONFIG_COMMIT is also responsible for releasing the driver. + */ cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback; cxlr->memory_notifier.priority = CXL_CALLBACK_PRI; From b3a88225519cfd05d71b99946d37476c941145b8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 11 Jul 2025 16:49:31 -0700 Subject: [PATCH 0723/2411] cxl/region: Consolidate cxl_decoder_kill_region() and cxl_region_detach() Both detach_target() and cxld_unregister() want to tear down a cxl_region when an endpoint decoder is either detached or destroyed. When a region is to be destroyed cxl_region_detach() releases cxl_region_rwsem unbinds the cxl_region driver and re-acquires the rwsem. This "reverse" locking pattern is difficult to reason about, not amenable to scope-based cleanup, and the minor differences in the calling context of detach_target() and cxld_unregister() currently results in the cxl_decoder_kill_region() wrapper. Introduce cxl_decoder_detach() to wrap a core __cxl_decoder_detach() that serves both cases. I.e. either detaching a known position in a region (interruptible), or detaching an endpoint decoder if it is found to be a member of a region (uninterruptible). Cc: Davidlohr Bueso Cc: Jonathan Cameron Cc: Dave Jiang Cc: Alison Schofield Cc: Vishal Verma Cc: Ira Weiny Acked-by: Peter Zijlstra (Intel) Signed-off-by: Dan Williams Reviewed-by: Fabio M. De Francesco Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Alison Schofield Reviewed-by: Davidlohr Bueso Link: https://patch.msgid.link/20250711234932.671292-8-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/core.h | 15 +++++- drivers/cxl/core/port.c | 9 ++-- drivers/cxl/core/region.c | 103 ++++++++++++++++++++++---------------- 3 files changed, 75 insertions(+), 52 deletions(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 29b61828a847..2be37084409f 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -12,6 +12,11 @@ extern const struct device_type cxl_pmu_type; extern struct attribute_group cxl_base_attribute_group; +enum cxl_detach_mode { + DETACH_ONLY, + DETACH_INVALIDATE, +}; + #ifdef CONFIG_CXL_REGION extern struct device_attribute dev_attr_create_pmem_region; extern struct device_attribute dev_attr_create_ram_region; @@ -20,7 +25,11 @@ extern struct device_attribute dev_attr_region; extern const struct device_type cxl_pmem_region_type; extern const struct device_type cxl_dax_region_type; extern const struct device_type cxl_region_type; -void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled); + +int cxl_decoder_detach(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos, + enum cxl_detach_mode mode); + #define CXL_REGION_ATTR(x) (&dev_attr_##x.attr) #define CXL_REGION_TYPE(x) (&cxl_region_type) #define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr), @@ -48,7 +57,9 @@ static inline int cxl_get_poison_by_endpoint(struct cxl_port *port) { return 0; } -static inline void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) +static inline int cxl_decoder_detach(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, + int pos, enum cxl_detach_mode mode) { } static inline int cxl_region_init(void) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index eb46c6764d20..087a20a9ee1c 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -2001,12 +2001,9 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, "CXL"); static void cxld_unregister(void *dev) { - struct cxl_endpoint_decoder *cxled; - - if (is_endpoint_decoder(dev)) { - cxled = to_cxl_endpoint_decoder(dev); - cxl_decoder_kill_region(cxled); - } + if (is_endpoint_decoder(dev)) + cxl_decoder_detach(NULL, to_cxl_endpoint_decoder(dev), -1, + DETACH_INVALIDATE); device_unregister(dev); } diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 2a97fa9a394f..4314aaed8ad8 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2135,27 +2135,43 @@ static int cxl_region_attach(struct cxl_region *cxlr, return 0; } -static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) +static struct cxl_region * +__cxl_decoder_detach(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos, + enum cxl_detach_mode mode) { - struct cxl_port *iter, *ep_port = cxled_to_port(cxled); - struct cxl_region *cxlr = cxled->cxld.region; struct cxl_region_params *p; - int rc = 0; lockdep_assert_held_write(&cxl_region_rwsem); - if (!cxlr) - return 0; + if (!cxled) { + p = &cxlr->params; - p = &cxlr->params; - get_device(&cxlr->dev); + if (pos >= p->interleave_ways) { + dev_dbg(&cxlr->dev, "position %d out of range %d\n", + pos, p->interleave_ways); + return ERR_PTR(-ENXIO); + } + + if (!p->targets[pos]) + return NULL; + cxled = p->targets[pos]; + } else { + cxlr = cxled->cxld.region; + if (!cxlr) + return NULL; + p = &cxlr->params; + } + + if (mode == DETACH_INVALIDATE) + cxled->part = -1; if (p->state > CXL_CONFIG_ACTIVE) { cxl_region_decode_reset(cxlr, p->interleave_ways); p->state = CXL_CONFIG_ACTIVE; } - for (iter = ep_port; !is_cxl_root(iter); + for (struct cxl_port *iter = cxled_to_port(cxled); !is_cxl_root(iter); iter = to_cxl_port(iter->dev.parent)) cxl_port_detach_region(iter, cxlr, cxled); @@ -2166,7 +2182,7 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), cxled->pos); - goto out; + return NULL; } if (p->state == CXL_CONFIG_ACTIVE) { @@ -2180,21 +2196,42 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) .end = -1, }; - /* notify the region driver that one of its targets has departed */ - up_write(&cxl_region_rwsem); - device_release_driver(&cxlr->dev); - down_write(&cxl_region_rwsem); -out: - put_device(&cxlr->dev); - return rc; + get_device(&cxlr->dev); + return cxlr; } -void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) +/* + * Cleanup a decoder's interest in a region. There are 2 cases to + * handle, removing an unknown @cxled from a known position in a region + * (detach_target()) or removing a known @cxled from an unknown @cxlr + * (cxld_unregister()) + * + * When the detachment finds a region release the region driver. + */ +int cxl_decoder_detach(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos, + enum cxl_detach_mode mode) { - down_write(&cxl_region_rwsem); - cxled->part = -1; - cxl_region_detach(cxled); + struct cxl_region *detach; + + /* when the decoder is being destroyed lock unconditionally */ + if (mode == DETACH_INVALIDATE) + down_write(&cxl_region_rwsem); + else { + int rc = down_write_killable(&cxl_region_rwsem); + + if (rc) + return rc; + } + + detach = __cxl_decoder_detach(cxlr, cxled, pos, mode); up_write(&cxl_region_rwsem); + + if (detach) { + device_release_driver(&detach->dev); + put_device(&detach->dev); + } + return 0; } static int attach_target(struct cxl_region *cxlr, @@ -2225,29 +2262,7 @@ static int attach_target(struct cxl_region *cxlr, static int detach_target(struct cxl_region *cxlr, int pos) { - struct cxl_region_params *p = &cxlr->params; - int rc; - - rc = down_write_killable(&cxl_region_rwsem); - if (rc) - return rc; - - if (pos >= p->interleave_ways) { - dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos, - p->interleave_ways); - rc = -ENXIO; - goto out; - } - - if (!p->targets[pos]) { - rc = 0; - goto out; - } - - rc = cxl_region_detach(p->targets[pos]); -out: - up_write(&cxl_region_rwsem); - return rc; + return cxl_decoder_detach(cxlr, NULL, pos, DETACH_ONLY); } static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos, From d03fcf50ba56f4479685b951506422eeca230853 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 11 Jul 2025 16:49:32 -0700 Subject: [PATCH 0724/2411] cxl: Convert to ACQUIRE() for conditional rwsem locking Use ACQUIRE() to cleanup conditional locking paths in the CXL driver The ACQUIRE() macro and its associated ACQUIRE_ERR() helpers, like scoped_cond_guard(), arrange for scoped-based conditional locking. Unlike scoped_cond_guard(), these macros arrange for an ERR_PTR() to be retrieved representing the state of the conditional lock. The goal of this conversion is to complete the removal of all explicit unlock calls in the subsystem. I.e. the methods to acquire a lock are solely via guard(), scoped_guard() (for limited cases), or ACQUIRE(). All unlock is implicit / scope-based. In order to make sure all lock sites are converted, the existing rwsem's are consolidated and renamed in 'struct cxl_rwsem'. While that makes the patch noisier it gives a clean cut-off between old-world (explicit unlock allowed), and new world (explicit unlock deleted). Cc: David Lechner Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Ingo Molnar Cc: Fabio M. De Francesco Cc: Davidlohr Bueso Cc: Jonathan Cameron Cc: Dave Jiang Cc: Alison Schofield Cc: Vishal Verma Cc: Ira Weiny Cc: Shiju Jose Acked-by: Peter Zijlstra (Intel) Signed-off-by: Dan Williams Reviewed-by: Jonathan Cameron Reviewed-by: Fabio M. De Francesco Reviewed-by: Dave Jiang Tested-by: Shiju Jose Link: https://patch.msgid.link/20250711234932.671292-9-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/cdat.c | 6 +- drivers/cxl/core/core.h | 17 ++- drivers/cxl/core/edac.c | 44 +++--- drivers/cxl/core/hdm.c | 41 +++--- drivers/cxl/core/mbox.c | 6 +- drivers/cxl/core/memdev.c | 50 +++---- drivers/cxl/core/port.c | 18 +-- drivers/cxl/core/region.c | 295 ++++++++++++++++---------------------- drivers/cxl/cxl.h | 13 +- include/linux/rwsem.h | 1 + 10 files changed, 212 insertions(+), 279 deletions(-) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 0ccef2f2a26a..c0af645425f4 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -336,7 +336,7 @@ static int match_cxlrd_hb(struct device *dev, void *data) cxlrd = to_cxl_root_decoder(dev); cxlsd = &cxlrd->cxlsd; - guard(rwsem_read)(&cxl_region_rwsem); + guard(rwsem_read)(&cxl_rwsem.region); for (int i = 0; i < cxlsd->nr_targets; i++) { if (host_bridge == cxlsd->target[i]->dport_dev) return 1; @@ -987,7 +987,7 @@ void cxl_region_shared_upstream_bandwidth_update(struct cxl_region *cxlr) bool is_root; int rc; - lockdep_assert_held(&cxl_dpa_rwsem); + lockdep_assert_held(&cxl_rwsem.dpa); struct xarray *usp_xa __free(free_perf_xa) = kzalloc(sizeof(*usp_xa), GFP_KERNEL); @@ -1057,7 +1057,7 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr, { struct cxl_dpa_perf *perf; - lockdep_assert_held(&cxl_dpa_rwsem); + lockdep_assert_held(&cxl_rwsem.dpa); perf = cxled_get_dpa_perf(cxled); if (IS_ERR(perf)) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 2be37084409f..f796731deedf 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -5,6 +5,7 @@ #define __CXL_CORE_H__ #include +#include extern const struct device_type cxl_nvdimm_bridge_type; extern const struct device_type cxl_nvdimm_type; @@ -107,8 +108,20 @@ u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb); #define PCI_RCRB_CAP_HDR_NEXT_MASK GENMASK(15, 8) #define PCI_CAP_EXP_SIZEOF 0x3c -extern struct rw_semaphore cxl_dpa_rwsem; -extern struct rw_semaphore cxl_region_rwsem; +struct cxl_rwsem { + /* + * All changes to HPA (interleave configuration) occur with this + * lock held for write. + */ + struct rw_semaphore region; + /* + * All changes to a device DPA space occur with this lock held + * for write. + */ + struct rw_semaphore dpa; +}; + +extern struct cxl_rwsem cxl_rwsem; int cxl_memdev_init(void); void cxl_memdev_exit(void); diff --git a/drivers/cxl/core/edac.c b/drivers/cxl/core/edac.c index 623aaa4439c4..9ed1b670efb8 100644 --- a/drivers/cxl/core/edac.c +++ b/drivers/cxl/core/edac.c @@ -115,10 +115,9 @@ static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx, flags, min_cycle); } - struct rw_semaphore *region_lock __free(rwsem_read_release) = - rwsem_read_intr_acquire(&cxl_region_rwsem); - if (!region_lock) - return -EINTR; + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((ret = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) + return ret; cxlr = cxl_ps_ctx->cxlr; p = &cxlr->params; @@ -158,10 +157,9 @@ static int cxl_scrub_set_attrbs_region(struct device *dev, struct cxl_region *cxlr; int ret, i; - struct rw_semaphore *region_lock __free(rwsem_read_release) = - rwsem_read_intr_acquire(&cxl_region_rwsem); - if (!region_lock) - return -EINTR; + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((ret = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) + return ret; cxlr = cxl_ps_ctx->cxlr; p = &cxlr->params; @@ -1340,16 +1338,15 @@ cxl_mem_perform_sparing(struct device *dev, struct cxl_memdev_sparing_in_payload sparing_pi; struct cxl_event_dram *rec = NULL; u16 validity_flags = 0; + int ret; - struct rw_semaphore *region_lock __free(rwsem_read_release) = - rwsem_read_intr_acquire(&cxl_region_rwsem); - if (!region_lock) - return -EINTR; + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((ret = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return ret; - struct rw_semaphore *dpa_lock __free(rwsem_read_release) = - rwsem_read_intr_acquire(&cxl_dpa_rwsem); - if (!dpa_lock) - return -EINTR; + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((ret = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return ret; if (!cxl_sparing_ctx->cap_safe_when_in_use) { /* Memory to repair must be offline */ @@ -1787,16 +1784,15 @@ static int cxl_mem_perform_ppr(struct cxl_ppr_context *cxl_ppr_ctx) struct cxl_memdev_ppr_maintenance_attrbs maintenance_attrbs; struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; struct cxl_mem_repair_attrbs attrbs = { 0 }; + int ret; - struct rw_semaphore *region_lock __free(rwsem_read_release) = - rwsem_read_intr_acquire(&cxl_region_rwsem); - if (!region_lock) - return -EINTR; + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((ret = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return ret; - struct rw_semaphore *dpa_lock __free(rwsem_read_release) = - rwsem_read_intr_acquire(&cxl_dpa_rwsem); - if (!dpa_lock) - return -EINTR; + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((ret = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return ret; if (!cxl_ppr_ctx->media_accessible || !cxl_ppr_ctx->data_retained) { /* Memory to repair must be offline */ diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index e9cb34e30248..865a71bce251 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -16,7 +16,10 @@ * for enumerating these registers and capabilities. */ -DECLARE_RWSEM(cxl_dpa_rwsem); +struct cxl_rwsem cxl_rwsem = { + .region = __RWSEM_INITIALIZER(cxl_rwsem.region), + .dpa = __RWSEM_INITIALIZER(cxl_rwsem.dpa), +}; static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, int *target_map) @@ -214,7 +217,7 @@ void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds) { struct resource *p1, *p2; - guard(rwsem_read)(&cxl_dpa_rwsem); + guard(rwsem_read)(&cxl_rwsem.dpa); for (p1 = cxlds->dpa_res.child; p1; p1 = p1->sibling) { __cxl_dpa_debug(file, p1, 0); for (p2 = p1->child; p2; p2 = p2->sibling) @@ -266,7 +269,7 @@ static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled) struct resource *res = cxled->dpa_res; resource_size_t skip_start; - lockdep_assert_held_write(&cxl_dpa_rwsem); + lockdep_assert_held_write(&cxl_rwsem.dpa); /* save @skip_start, before @res is released */ skip_start = res->start - cxled->skip; @@ -281,7 +284,7 @@ static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled) static void cxl_dpa_release(void *cxled) { - guard(rwsem_write)(&cxl_dpa_rwsem); + guard(rwsem_write)(&cxl_rwsem.dpa); __cxl_dpa_release(cxled); } @@ -293,7 +296,7 @@ static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled) { struct cxl_port *port = cxled_to_port(cxled); - lockdep_assert_held_write(&cxl_dpa_rwsem); + lockdep_assert_held_write(&cxl_rwsem.dpa); devm_remove_action(&port->dev, cxl_dpa_release, cxled); __cxl_dpa_release(cxled); } @@ -361,7 +364,7 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, struct resource *res; int rc; - lockdep_assert_held_write(&cxl_dpa_rwsem); + lockdep_assert_held_write(&cxl_rwsem.dpa); if (!len) { dev_warn(dev, "decoder%d.%d: empty reservation attempted\n", @@ -470,7 +473,7 @@ int cxl_dpa_setup(struct cxl_dev_state *cxlds, const struct cxl_dpa_info *info) { struct device *dev = cxlds->dev; - guard(rwsem_write)(&cxl_dpa_rwsem); + guard(rwsem_write)(&cxl_rwsem.dpa); if (cxlds->nr_partitions) return -EBUSY; @@ -516,9 +519,8 @@ int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, struct cxl_port *port = cxled_to_port(cxled); int rc; - down_write(&cxl_dpa_rwsem); - rc = __cxl_dpa_reserve(cxled, base, len, skipped); - up_write(&cxl_dpa_rwsem); + scoped_guard(rwsem_write, &cxl_rwsem.dpa) + rc = __cxl_dpa_reserve(cxled, base, len, skipped); if (rc) return rc; @@ -529,7 +531,7 @@ EXPORT_SYMBOL_NS_GPL(devm_cxl_dpa_reserve, "CXL"); resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled) { - guard(rwsem_read)(&cxl_dpa_rwsem); + guard(rwsem_read)(&cxl_rwsem.dpa); if (cxled->dpa_res) return resource_size(cxled->dpa_res); @@ -540,7 +542,7 @@ resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled) { resource_size_t base = -1; - lockdep_assert_held(&cxl_dpa_rwsem); + lockdep_assert_held(&cxl_rwsem.dpa); if (cxled->dpa_res) base = cxled->dpa_res->start; @@ -552,7 +554,7 @@ int cxl_dpa_free(struct cxl_endpoint_decoder *cxled) struct cxl_port *port = cxled_to_port(cxled); struct device *dev = &cxled->cxld.dev; - guard(rwsem_write)(&cxl_dpa_rwsem); + guard(rwsem_write)(&cxl_rwsem.dpa); if (!cxled->dpa_res) return 0; if (cxled->cxld.region) { @@ -582,7 +584,7 @@ int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, struct device *dev = &cxled->cxld.dev; int part; - guard(rwsem_write)(&cxl_dpa_rwsem); + guard(rwsem_write)(&cxl_rwsem.dpa); if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) return -EBUSY; @@ -614,7 +616,7 @@ static int __cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size) struct resource *p, *last; int part; - guard(rwsem_write)(&cxl_dpa_rwsem); + guard(rwsem_write)(&cxl_rwsem.dpa); if (cxled->cxld.region) { dev_dbg(dev, "decoder attached to %s\n", dev_name(&cxled->cxld.region->dev)); @@ -842,9 +844,8 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld) } } - down_read(&cxl_dpa_rwsem); - setup_hw_decoder(cxld, hdm); - up_read(&cxl_dpa_rwsem); + scoped_guard(rwsem_read, &cxl_rwsem.dpa) + setup_hw_decoder(cxld, hdm); port->commit_end++; rc = cxld_await_commit(hdm, cxld->id); @@ -882,7 +883,7 @@ void cxl_port_commit_reap(struct cxl_decoder *cxld) { struct cxl_port *port = to_cxl_port(cxld->dev.parent); - lockdep_assert_held_write(&cxl_region_rwsem); + lockdep_assert_held_write(&cxl_rwsem.region); /* * Once the highest committed decoder is disabled, free any other @@ -1030,7 +1031,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, else cxld->target_type = CXL_DECODER_DEVMEM; - guard(rwsem_write)(&cxl_region_rwsem); + guard(rwsem_write)(&cxl_rwsem.region); if (cxld->id != cxl_num_decoders_committed(port)) { dev_warn(&port->dev, "decoder%d.%d: Committed out of order\n", diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 81b21effe8cf..92cd3cbdd8ec 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -909,8 +909,8 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, * translations. Take topology mutation locks and lookup * { HPA, REGION } from { DPA, MEMDEV } in the event record. */ - guard(rwsem_read)(&cxl_region_rwsem); - guard(rwsem_read)(&cxl_dpa_rwsem); + guard(rwsem_read)(&cxl_rwsem.region); + guard(rwsem_read)(&cxl_rwsem.dpa); dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK; cxlr = cxl_dpa_to_region(cxlmd, dpa); @@ -1265,7 +1265,7 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd) /* synchronize with cxl_mem_probe() and decoder write operations */ guard(device)(&cxlmd->dev); endpoint = cxlmd->endpoint; - guard(rwsem_read)(&cxl_region_rwsem); + guard(rwsem_read)(&cxl_rwsem.region); /* * Require an endpoint to be safe otherwise the driver can not * be sure that the device is unmapped. diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index f88a13adf7fa..f5fbd34310fd 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -232,15 +232,13 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) if (!port || !is_cxl_endpoint(port)) return -EINVAL; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) return rc; - rc = down_read_interruptible(&cxl_dpa_rwsem); - if (rc) { - up_read(&cxl_region_rwsem); + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) return rc; - } if (cxl_num_decoders_committed(port) == 0) { /* No regions mapped to this memdev */ @@ -249,8 +247,6 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) /* Regions mapped, collect poison by endpoint */ rc = cxl_get_poison_by_endpoint(port); } - up_read(&cxl_dpa_rwsem); - up_read(&cxl_region_rwsem); return rc; } @@ -292,19 +288,17 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) return rc; - rc = down_read_interruptible(&cxl_dpa_rwsem); - if (rc) { - up_read(&cxl_region_rwsem); + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) return rc; - } rc = cxl_validate_poison_dpa(cxlmd, dpa); if (rc) - goto out; + return rc; inject.address = cpu_to_le64(dpa); mbox_cmd = (struct cxl_mbox_cmd) { @@ -314,7 +308,7 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) }; rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc) - goto out; + return rc; cxlr = cxl_dpa_to_region(cxlmd, dpa); if (cxlr) @@ -327,11 +321,8 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) .length = cpu_to_le32(1), }; trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT); -out: - up_read(&cxl_dpa_rwsem); - up_read(&cxl_region_rwsem); - return rc; + return 0; } EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, "CXL"); @@ -347,19 +338,17 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) return rc; - rc = down_read_interruptible(&cxl_dpa_rwsem); - if (rc) { - up_read(&cxl_region_rwsem); + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) return rc; - } rc = cxl_validate_poison_dpa(cxlmd, dpa); if (rc) - goto out; + return rc; /* * In CXL 3.0 Spec 8.2.9.8.4.3, the Clear Poison mailbox command @@ -378,7 +367,7 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc) - goto out; + return rc; cxlr = cxl_dpa_to_region(cxlmd, dpa); if (cxlr) @@ -391,11 +380,8 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) .length = cpu_to_le32(1), }; trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR); -out: - up_read(&cxl_dpa_rwsem); - up_read(&cxl_region_rwsem); - return rc; + return 0; } EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, "CXL"); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 087a20a9ee1c..bacf1380dc4d 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -30,18 +30,12 @@ * instantiated by the core. */ -/* - * All changes to the interleave configuration occur with this lock held - * for write. - */ -DECLARE_RWSEM(cxl_region_rwsem); - static DEFINE_IDA(cxl_port_ida); static DEFINE_XARRAY(cxl_root_buses); int cxl_num_decoders_committed(struct cxl_port *port) { - lockdep_assert_held(&cxl_region_rwsem); + lockdep_assert_held(&cxl_rwsem.region); return port->commit_end + 1; } @@ -176,7 +170,7 @@ static ssize_t target_list_show(struct device *dev, ssize_t offset; int rc; - guard(rwsem_read)(&cxl_region_rwsem); + guard(rwsem_read)(&cxl_rwsem.region); rc = emit_target_list(cxlsd, buf); if (rc < 0) return rc; @@ -196,7 +190,7 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr, struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_dev_state *cxlds = cxlmd->cxlds; - /* without @cxl_dpa_rwsem, make sure @part is not reloaded */ + /* without @cxl_rwsem.dpa, make sure @part is not reloaded */ int part = READ_ONCE(cxled->part); const char *desc; @@ -235,7 +229,7 @@ static ssize_t dpa_resource_show(struct device *dev, struct device_attribute *at { struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); - guard(rwsem_read)(&cxl_dpa_rwsem); + guard(rwsem_read)(&cxl_rwsem.dpa); return sysfs_emit(buf, "%#llx\n", (u64)cxl_dpa_resource_start(cxled)); } static DEVICE_ATTR_RO(dpa_resource); @@ -560,7 +554,7 @@ static ssize_t decoders_committed_show(struct device *dev, { struct cxl_port *port = to_cxl_port(dev); - guard(rwsem_read)(&cxl_region_rwsem); + guard(rwsem_read)(&cxl_rwsem.region); return sysfs_emit(buf, "%d\n", cxl_num_decoders_committed(port)); } @@ -1722,7 +1716,7 @@ static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd, if (xa_empty(&port->dports)) return -EINVAL; - guard(rwsem_write)(&cxl_region_rwsem); + guard(rwsem_write)(&cxl_rwsem.region); for (i = 0; i < cxlsd->cxld.interleave_ways; i++) { struct cxl_dport *dport = find_dport(port, target_map[i]); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 4314aaed8ad8..ad60c93be803 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -141,16 +141,12 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, struct cxl_region_params *p = &cxlr->params; ssize_t rc; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) return rc; if (cxlr->mode != CXL_PARTMODE_PMEM) - rc = sysfs_emit(buf, "\n"); - else - rc = sysfs_emit(buf, "%pUb\n", &p->uuid); - up_read(&cxl_region_rwsem); - - return rc; + return sysfs_emit(buf, "\n"); + return sysfs_emit(buf, "%pUb\n", &p->uuid); } static int is_dup(struct device *match, void *data) @@ -162,7 +158,7 @@ static int is_dup(struct device *match, void *data) if (!is_cxl_region(match)) return 0; - lockdep_assert_held(&cxl_region_rwsem); + lockdep_assert_held(&cxl_rwsem.region); cxlr = to_cxl_region(match); p = &cxlr->params; @@ -192,27 +188,22 @@ static ssize_t uuid_store(struct device *dev, struct device_attribute *attr, if (uuid_is_null(&temp)) return -EINVAL; - rc = down_write_killable(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_write_kill, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, ®ion_rwsem))) return rc; if (uuid_equal(&p->uuid, &temp)) - goto out; + return len; - rc = -EBUSY; if (p->state >= CXL_CONFIG_ACTIVE) - goto out; + return -EBUSY; rc = bus_for_each_dev(&cxl_bus_type, NULL, &temp, is_dup); if (rc < 0) - goto out; + return rc; uuid_copy(&p->uuid, &temp); -out: - up_write(&cxl_region_rwsem); - if (rc) - return rc; return len; } static DEVICE_ATTR_RW(uuid); @@ -354,20 +345,17 @@ static int queue_reset(struct cxl_region *cxlr) struct cxl_region_params *p = &cxlr->params; int rc; - rc = down_write_killable(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) return rc; /* Already in the requested state? */ if (p->state < CXL_CONFIG_COMMIT) - goto out; + return 0; p->state = CXL_CONFIG_RESET_PENDING; -out: - up_write(&cxl_region_rwsem); - - return rc; + return 0; } static int __commit(struct cxl_region *cxlr) @@ -375,19 +363,17 @@ static int __commit(struct cxl_region *cxlr) struct cxl_region_params *p = &cxlr->params; int rc; - rc = down_write_killable(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) return rc; /* Already in the requested state? */ if (p->state >= CXL_CONFIG_COMMIT) - goto out; + return 0; /* Not ready to commit? */ - if (p->state < CXL_CONFIG_ACTIVE) { - rc = -ENXIO; - goto out; - } + if (p->state < CXL_CONFIG_ACTIVE) + return -ENXIO; /* * Invalidate caches before region setup to drop any speculative @@ -395,16 +381,15 @@ static int __commit(struct cxl_region *cxlr) */ rc = cxl_region_invalidate_memregion(cxlr); if (rc) - goto out; + return rc; rc = cxl_region_decode_commit(cxlr); - if (rc == 0) - p->state = CXL_CONFIG_COMMIT; + if (rc) + return rc; -out: - up_write(&cxl_region_rwsem); + p->state = CXL_CONFIG_COMMIT; - return rc; + return 0; } static ssize_t commit_store(struct device *dev, struct device_attribute *attr, @@ -437,10 +422,10 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr, device_release_driver(&cxlr->dev); /* - * With the reset pending take cxl_region_rwsem unconditionally + * With the reset pending take cxl_rwsem.region unconditionally * to ensure the reset gets handled before returning. */ - guard(rwsem_write)(&cxl_region_rwsem); + guard(rwsem_write)(&cxl_rwsem.region); /* * Revalidate that the reset is still pending in case another @@ -461,13 +446,10 @@ static ssize_t commit_show(struct device *dev, struct device_attribute *attr, struct cxl_region_params *p = &cxlr->params; ssize_t rc; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) return rc; - rc = sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT); - up_read(&cxl_region_rwsem); - - return rc; + return sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT); } static DEVICE_ATTR_RW(commit); @@ -491,15 +473,12 @@ static ssize_t interleave_ways_show(struct device *dev, { struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; - ssize_t rc; + int rc; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) return rc; - rc = sysfs_emit(buf, "%d\n", p->interleave_ways); - up_read(&cxl_region_rwsem); - - return rc; + return sysfs_emit(buf, "%d\n", p->interleave_ways); } static const struct attribute_group *get_cxl_region_target_group(void); @@ -534,23 +513,21 @@ static ssize_t interleave_ways_store(struct device *dev, return -EINVAL; } - rc = down_write_killable(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) return rc; - if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { - rc = -EBUSY; - goto out; - } + + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) + return -EBUSY; save = p->interleave_ways; p->interleave_ways = val; rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group()); - if (rc) + if (rc) { p->interleave_ways = save; -out: - up_write(&cxl_region_rwsem); - if (rc) return rc; + } + return len; } static DEVICE_ATTR_RW(interleave_ways); @@ -561,15 +538,12 @@ static ssize_t interleave_granularity_show(struct device *dev, { struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; - ssize_t rc; + int rc; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) return rc; - rc = sysfs_emit(buf, "%d\n", p->interleave_granularity); - up_read(&cxl_region_rwsem); - - return rc; + return sysfs_emit(buf, "%d\n", p->interleave_granularity); } static ssize_t interleave_granularity_store(struct device *dev, @@ -602,19 +576,15 @@ static ssize_t interleave_granularity_store(struct device *dev, if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity) return -EINVAL; - rc = down_write_killable(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) return rc; - if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { - rc = -EBUSY; - goto out; - } + + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) + return -EBUSY; p->interleave_granularity = val; -out: - up_write(&cxl_region_rwsem); - if (rc) - return rc; + return len; } static DEVICE_ATTR_RW(interleave_granularity); @@ -625,17 +595,15 @@ static ssize_t resource_show(struct device *dev, struct device_attribute *attr, struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; u64 resource = -1ULL; - ssize_t rc; + int rc; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) return rc; + if (p->res) resource = p->res->start; - rc = sysfs_emit(buf, "%#llx\n", resource); - up_read(&cxl_region_rwsem); - - return rc; + return sysfs_emit(buf, "%#llx\n", resource); } static DEVICE_ATTR_RO(resource); @@ -663,7 +631,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) struct resource *res; u64 remainder = 0; - lockdep_assert_held_write(&cxl_region_rwsem); + lockdep_assert_held_write(&cxl_rwsem.region); /* Nothing to do... */ if (p->res && resource_size(p->res) == size) @@ -705,7 +673,7 @@ static void cxl_region_iomem_release(struct cxl_region *cxlr) struct cxl_region_params *p = &cxlr->params; if (device_is_registered(&cxlr->dev)) - lockdep_assert_held_write(&cxl_region_rwsem); + lockdep_assert_held_write(&cxl_rwsem.region); if (p->res) { /* * Autodiscovered regions may not have been able to insert their @@ -722,7 +690,7 @@ static int free_hpa(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; - lockdep_assert_held_write(&cxl_region_rwsem); + lockdep_assert_held_write(&cxl_rwsem.region); if (!p->res) return 0; @@ -746,15 +714,14 @@ static ssize_t size_store(struct device *dev, struct device_attribute *attr, if (rc) return rc; - rc = down_write_killable(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) return rc; if (val) rc = alloc_hpa(cxlr, val); else rc = free_hpa(cxlr); - up_write(&cxl_region_rwsem); if (rc) return rc; @@ -770,15 +737,12 @@ static ssize_t size_show(struct device *dev, struct device_attribute *attr, u64 size = 0; ssize_t rc; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) return rc; if (p->res) size = resource_size(p->res); - rc = sysfs_emit(buf, "%#llx\n", size); - up_read(&cxl_region_rwsem); - - return rc; + return sysfs_emit(buf, "%#llx\n", size); } static DEVICE_ATTR_RW(size); @@ -804,26 +768,20 @@ static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos) struct cxl_endpoint_decoder *cxled; int rc; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) return rc; if (pos >= p->interleave_ways) { dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos, p->interleave_ways); - rc = -ENXIO; - goto out; + return -ENXIO; } cxled = p->targets[pos]; if (!cxled) - rc = sysfs_emit(buf, "\n"); - else - rc = sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev)); -out: - up_read(&cxl_region_rwsem); - - return rc; + return sysfs_emit(buf, "\n"); + return sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev)); } static int check_commit_order(struct device *dev, void *data) @@ -938,7 +896,7 @@ cxl_port_pick_region_decoder(struct cxl_port *port, /* * This decoder is pinned registered as long as the endpoint decoder is * registered, and endpoint decoder unregistration holds the - * cxl_region_rwsem over unregister events, so no need to hold on to + * cxl_rwsem.region over unregister events, so no need to hold on to * this extra reference. */ put_device(dev); @@ -1129,7 +1087,7 @@ static int cxl_port_attach_region(struct cxl_port *port, unsigned long index; int rc = -EBUSY; - lockdep_assert_held_write(&cxl_region_rwsem); + lockdep_assert_held_write(&cxl_rwsem.region); cxl_rr = cxl_rr_load(port, cxlr); if (cxl_rr) { @@ -1239,7 +1197,7 @@ static void cxl_port_detach_region(struct cxl_port *port, struct cxl_region_ref *cxl_rr; struct cxl_ep *ep = NULL; - lockdep_assert_held_write(&cxl_region_rwsem); + lockdep_assert_held_write(&cxl_rwsem.region); cxl_rr = cxl_rr_load(port, cxlr); if (!cxl_rr) @@ -2142,7 +2100,7 @@ __cxl_decoder_detach(struct cxl_region *cxlr, { struct cxl_region_params *p; - lockdep_assert_held_write(&cxl_region_rwsem); + lockdep_assert_held_write(&cxl_rwsem.region); if (!cxled) { p = &cxlr->params; @@ -2215,18 +2173,18 @@ int cxl_decoder_detach(struct cxl_region *cxlr, struct cxl_region *detach; /* when the decoder is being destroyed lock unconditionally */ - if (mode == DETACH_INVALIDATE) - down_write(&cxl_region_rwsem); - else { - int rc = down_write_killable(&cxl_region_rwsem); + if (mode == DETACH_INVALIDATE) { + guard(rwsem_write)(&cxl_rwsem.region); + detach = __cxl_decoder_detach(cxlr, cxled, pos, mode); + } else { + int rc; - if (rc) + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) return rc; + detach = __cxl_decoder_detach(cxlr, cxled, pos, mode); } - detach = __cxl_decoder_detach(cxlr, cxled, pos, mode); - up_write(&cxl_region_rwsem); - if (detach) { device_release_driver(&detach->dev); put_device(&detach->dev); @@ -2234,29 +2192,35 @@ int cxl_decoder_detach(struct cxl_region *cxlr, return 0; } +static int __attach_target(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos, + unsigned int state) +{ + int rc; + + if (state == TASK_INTERRUPTIBLE) { + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) + return rc; + guard(rwsem_read)(&cxl_rwsem.dpa); + return cxl_region_attach(cxlr, cxled, pos); + } + guard(rwsem_write)(&cxl_rwsem.region); + guard(rwsem_read)(&cxl_rwsem.dpa); + return cxl_region_attach(cxlr, cxled, pos); +} + static int attach_target(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled, int pos, unsigned int state) { - int rc = 0; + int rc = __attach_target(cxlr, cxled, pos, state); - if (state == TASK_INTERRUPTIBLE) - rc = down_write_killable(&cxl_region_rwsem); - else - down_write(&cxl_region_rwsem); - if (rc) - return rc; - - down_read(&cxl_dpa_rwsem); - rc = cxl_region_attach(cxlr, cxled, pos); - up_read(&cxl_dpa_rwsem); - up_write(&cxl_region_rwsem); - - if (rc) - dev_warn(cxled->cxld.dev.parent, - "failed to attach %s to %s: %d\n", - dev_name(&cxled->cxld.dev), dev_name(&cxlr->dev), rc); + if (rc == 0) + return 0; + dev_warn(cxled->cxld.dev.parent, "failed to attach %s to %s: %d\n", + dev_name(&cxled->cxld.dev), dev_name(&cxlr->dev), rc); return rc; } @@ -2516,7 +2480,7 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb, return NOTIFY_DONE; /* - * No need to hold cxl_region_rwsem; region parameters are stable + * No need to hold cxl_rwsem.region; region parameters are stable * within the cxl_region driver. */ region_nid = phys_to_target_node(cxlr->params.res->start); @@ -2539,7 +2503,7 @@ static int cxl_region_calculate_adistance(struct notifier_block *nb, int region_nid; /* - * No need to hold cxl_region_rwsem; region parameters are stable + * No need to hold cxl_rwsem.region; region parameters are stable * within the cxl_region driver. */ region_nid = phys_to_target_node(cxlr->params.res->start); @@ -2688,17 +2652,13 @@ static ssize_t region_show(struct device *dev, struct device_attribute *attr, struct cxl_decoder *cxld = to_cxl_decoder(dev); ssize_t rc; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) return rc; if (cxld->region) - rc = sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev)); - else - rc = sysfs_emit(buf, "\n"); - up_read(&cxl_region_rwsem); - - return rc; + return sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev)); + return sysfs_emit(buf, "\n"); } DEVICE_ATTR_RO(region); @@ -3037,7 +2997,7 @@ static int cxl_pmem_region_alloc(struct cxl_region *cxlr) struct device *dev; int i; - guard(rwsem_read)(&cxl_region_rwsem); + guard(rwsem_read)(&cxl_rwsem.region); if (p->state != CXL_CONFIG_COMMIT) return -ENXIO; @@ -3049,7 +3009,7 @@ static int cxl_pmem_region_alloc(struct cxl_region *cxlr) cxlr_pmem->hpa_range.start = p->res->start; cxlr_pmem->hpa_range.end = p->res->end; - /* Snapshot the region configuration underneath the cxl_region_rwsem */ + /* Snapshot the region configuration underneath the cxl_rwsem.region */ cxlr_pmem->nr_mappings = p->nr_targets; for (i = 0; i < p->nr_targets; i++) { struct cxl_endpoint_decoder *cxled = p->targets[i]; @@ -3126,7 +3086,7 @@ static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr) struct cxl_dax_region *cxlr_dax; struct device *dev; - guard(rwsem_read)(&cxl_region_rwsem); + guard(rwsem_read)(&cxl_rwsem.region); if (p->state != CXL_CONFIG_COMMIT) return ERR_PTR(-ENXIO); @@ -3326,7 +3286,7 @@ static int match_region_by_range(struct device *dev, const void *data) cxlr = to_cxl_region(dev); p = &cxlr->params; - guard(rwsem_read)(&cxl_region_rwsem); + guard(rwsem_read)(&cxl_rwsem.region); if (p->res && p->res->start == r->start && p->res->end == r->end) return 1; @@ -3386,7 +3346,7 @@ static int __construct_region(struct cxl_region *cxlr, struct resource *res; int rc; - guard(rwsem_write)(&cxl_region_rwsem); + guard(rwsem_write)(&cxl_rwsem.region); p = &cxlr->params; if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { dev_err(cxlmd->dev.parent, @@ -3522,10 +3482,10 @@ int cxl_add_to_region(struct cxl_endpoint_decoder *cxled) attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE); - down_read(&cxl_region_rwsem); - p = &cxlr->params; - attach = p->state == CXL_CONFIG_COMMIT; - up_read(&cxl_region_rwsem); + scoped_guard(rwsem_read, &cxl_rwsem.region) { + p = &cxlr->params; + attach = p->state == CXL_CONFIG_COMMIT; + } if (attach) { /* @@ -3550,7 +3510,7 @@ u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa) if (!endpoint) return ~0ULL; - guard(rwsem_write)(&cxl_region_rwsem); + guard(rwsem_write)(&cxl_rwsem.region); xa_for_each(&endpoint->regions, index, iter) { struct cxl_region_params *p = &iter->region->params; @@ -3592,30 +3552,23 @@ static int cxl_region_can_probe(struct cxl_region *cxlr) struct cxl_region_params *p = &cxlr->params; int rc; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) { + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) { dev_dbg(&cxlr->dev, "probe interrupted\n"); return rc; } if (p->state < CXL_CONFIG_COMMIT) { dev_dbg(&cxlr->dev, "config state: %d\n", p->state); - rc = -ENXIO; - goto out; + return -ENXIO; } if (test_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags)) { dev_err(&cxlr->dev, "failed to activate, re-commit region and retry\n"); - rc = -ENXIO; - goto out; + return -ENXIO; } -out: - up_read(&cxl_region_rwsem); - - if (rc) - return rc; return 0; } diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 3f1695c96abc..50799a681231 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -469,7 +469,7 @@ enum cxl_config_state { * @nr_targets: number of targets * @cache_size: extended linear cache size if exists, otherwise zero. * - * State transitions are protected by the cxl_region_rwsem + * State transitions are protected by cxl_rwsem.region */ struct cxl_region_params { enum cxl_config_state state; @@ -912,15 +912,4 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); #endif u16 cxl_gpf_get_dvsec(struct device *dev); - -static inline struct rw_semaphore *rwsem_read_intr_acquire(struct rw_semaphore *rwsem) -{ - if (down_read_interruptible(rwsem)) - return NULL; - - return rwsem; -} - -DEFINE_FREE(rwsem_read_release, struct rw_semaphore *, if (_T) up_read(_T)) - #endif /* __CXL_H__ */ diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index c810deb88d13..cbafdc12e743 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -244,6 +244,7 @@ DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T), _RET == 0) DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) DEFINE_GUARD_COND(rwsem_write, _try, down_write_trylock(_T)) +DEFINE_GUARD_COND(rwsem_write, _kill, down_write_killable(_T), _RET == 0) /* * downgrade write lock to read lock From 23b128bba76776541dc09efaf3acf6242917e1f0 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 10 Jul 2025 18:51:13 -0400 Subject: [PATCH 0725/2411] rust: time: Pass correct timer mode ID to hrtimer_start_range_ns While rebasing rvkms I noticed that timers I was setting seemed to have pretty random timer values that amounted slightly over 2x the time value I set each time. After a lot of debugging, I finally managed to figure out why: it seems that since we moved to Instant and Delta, we mistakenly began passing the clocksource ID to hrtimer_start_range_ns, when we should be passing the timer mode instead. Presumably, this works fine for simple relative timers - but immediately breaks on other types of timers. So, fix this by passing the ID for the timer mode instead. Signed-off-by: Lyude Paul Acked-by: Andreas Hindborg Reviewed-by: FUJITA Tomonori Fixes: e0c0ab04f678 ("rust: time: Make HasHrTimer generic over HrTimerMode") Link: https://lore.kernel.org/r/20250710225129.670051-1-lyude@redhat.com [ Removed cast, applied `rustfmt`, fixed `Fixes:` tag. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/time/hrtimer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index d6830b6bbee7..144e3b57cc78 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -398,7 +398,7 @@ unsafe fn start(this: *const Self, expires: ::Ex Self::c_timer_ptr(this).cast_mut(), expires.as_nanos(), 0, - ::Clock::ID as u32, + ::C_MODE, ); } } From 6a4a2d5cba74e1cd31dabea29be7a79e2a409f3d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 8 Jul 2025 16:33:43 +0300 Subject: [PATCH 0726/2411] watchdog: it87_wdt: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Note that kernel.h is discouraged to be included as it's written at the top of that file. Signed-off-by: Andy Shevchenko Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250708133646.70384-2-andriy.shevchenko@linux.intel.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/it87_wdt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c index a1e23dce8810..3b8488c86a2f 100644 --- a/drivers/watchdog/it87_wdt.c +++ b/drivers/watchdog/it87_wdt.c @@ -22,11 +22,13 @@ #include #include +#include #include #include -#include +#include #include #include +#include #include #include From ddb8172cdf8854a215ce23ad0f20b2578fa512db Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 8 Jul 2025 16:33:44 +0300 Subject: [PATCH 0727/2411] watchdog: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Note that kernel.h is discouraged to be included as it's written at the top of that file. Signed-off-by: Andy Shevchenko Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250708133646.70384-3-andriy.shevchenko@linux.intel.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/watchdog_core.h | 8 +++++++- drivers/watchdog/watchdog_pretimeout.c | 2 ++ include/linux/watchdog.h | 12 ++++++++---- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/watchdog/watchdog_core.h b/drivers/watchdog/watchdog_core.h index 5b35a8439e26..ab825d9f9248 100644 --- a/drivers/watchdog/watchdog_core.h +++ b/drivers/watchdog/watchdog_core.h @@ -24,8 +24,14 @@ * This material is provided "AS-IS" and at no charge. */ -#include +#include +#include +#include +#include #include +#include +#include +#include #define MAX_DOGS 32 /* Maximum number of watchdog devices */ diff --git a/drivers/watchdog/watchdog_pretimeout.c b/drivers/watchdog/watchdog_pretimeout.c index e5295c990fa1..2526436dc74d 100644 --- a/drivers/watchdog/watchdog_pretimeout.c +++ b/drivers/watchdog/watchdog_pretimeout.c @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include "watchdog_core.h" diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 99660197a36c..8c60687a3e55 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -9,14 +9,18 @@ #ifndef _LINUX_WATCHDOG_H #define _LINUX_WATCHDOG_H - #include -#include -#include -#include +#include #include +#include +#include + #include +struct attribute_group; +struct device; +struct module; + struct watchdog_ops; struct watchdog_device; struct watchdog_core_data; From fde41f282590b46e96864ae88da2e2c20a967b3a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 24 Jun 2025 16:19:22 -0700 Subject: [PATCH 0728/2411] MAINTAINERS: Drop Nicolas from maintaining pcie-brcmstb Nicolas indicated a long time back that he would not have the bandwidth and indeed, has not provided any review or feedback since. Signed-off-by: Florian Fainelli Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250624231923.990361-2-florian.fainelli@broadcom.com --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..be6adf80c652 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5085,7 +5085,6 @@ F: include/linux/platform_data/brcmnand.h BROADCOM STB PCIE DRIVER M: Jim Quinlan -M: Nicolas Saenz Julienne M: Florian Fainelli R: Broadcom internal kernel review list L: linux-pci@vger.kernel.org From e8e7c1e95d6d4ccdc53654a5966d2183532ab115 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 24 Jun 2025 16:19:23 -0700 Subject: [PATCH 0729/2411] PCI: brcmstb: Replace open coded value with PCIE_T_RRS_READY_MS The delay that we are waiting on in brcm_pcie_start_link() is PCIE_T_RRS_READY_MS, use it. Signed-off-by: Florian Fainelli [mani: Removed the redundant comment] Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250624231923.990361-3-florian.fainelli@broadcom.com --- drivers/pci/controller/pcie-brcmstb.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c index 744df5bd39ae..db7402bb6291 100644 --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -1358,11 +1358,7 @@ static int brcm_pcie_start_link(struct brcm_pcie *pcie) if (ret) return ret; - /* - * Wait for 100ms after PERST# deassertion; see PCIe CEM specification - * sections 2.2, PCIe r5.0, 6.6.1. - */ - msleep(100); + msleep(PCIE_T_RRS_READY_MS); /* * Give the RC/EP even more time to wake up, before trying to From 76720eed7d18baf51c0f31fe8a3784702f50e3fc Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 17 Jul 2025 12:38:04 -0500 Subject: [PATCH 0730/2411] PCI: Add pci_is_display() to check if device is a display controller Several places in the kernel do class shifting to match whether a PCI device is display class. Add pci_is_display() for those places to use. Signed-off-by: Mario Limonciello Signed-off-by: Bjorn Helgaas Reviewed-by: Daniel Dadap Reviewed-by: Simona Vetter Link: https://patch.msgid.link/20250717173812.3633478-2-superm1@kernel.org --- include/linux/pci.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/linux/pci.h b/include/linux/pci.h index 05e68f35f392..4fff6405a830 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -744,6 +744,21 @@ static inline bool pci_is_vga(struct pci_dev *pdev) return false; } +/** + * pci_is_display - check if the PCI device is a display controller + * @pdev: PCI device + * + * Determine whether the given PCI device corresponds to a display + * controller. Display controllers are typically used for graphical output + * and are identified based on their class code. + * + * Return: true if the PCI device is a display controller, false otherwise. + */ +static inline bool pci_is_display(struct pci_dev *pdev) +{ + return (pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY; +} + #define for_each_pci_bridge(dev, bus) \ list_for_each_entry(dev, &bus->devices, bus_list) \ if (!pci_is_bridge(dev)) {} else From a7feca7c88187b83f95dc18ad788015f1bff8939 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 17 Jul 2025 12:38:05 -0500 Subject: [PATCH 0731/2411] vfio/pci: Use pci_is_display() The inline pci_is_display() helper does the same thing. Use it. Suggested-by: Bjorn Helgaas Signed-off-by: Mario Limonciello Signed-off-by: Bjorn Helgaas Reviewed-by: Daniel Dadap Reviewed-by: Simona Vetter Acked-by: Alex Williamson Link: https://patch.msgid.link/20250717173812.3633478-3-superm1@kernel.org --- drivers/vfio/pci/vfio_pci_igd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index ef490a4545f4..988b6919c2c3 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -437,8 +437,7 @@ static int vfio_pci_igd_cfg_init(struct vfio_pci_core_device *vdev) bool vfio_pci_is_intel_display(struct pci_dev *pdev) { - return (pdev->vendor == PCI_VENDOR_ID_INTEL) && - ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY); + return (pdev->vendor == PCI_VENDOR_ID_INTEL) && pci_is_display(pdev); } int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) From b1060ea44a1f846203ca3ef90389cd0e4f46bc8b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 17 Jul 2025 12:38:06 -0500 Subject: [PATCH 0732/2411] vga_switcheroo: Use pci_is_display() The inline pci_is_display() helper does the same thing. Use it. Suggested-by: Bjorn Helgaas Signed-off-by: Mario Limonciello Signed-off-by: Bjorn Helgaas Reviewed-by: Daniel Dadap Reviewed-by: Simona Vetter Link: https://patch.msgid.link/20250717173812.3633478-4-superm1@kernel.org --- drivers/gpu/vga/vga_switcheroo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c index 18f2c92beff8..68e45a26e85f 100644 --- a/drivers/gpu/vga/vga_switcheroo.c +++ b/drivers/gpu/vga/vga_switcheroo.c @@ -437,7 +437,7 @@ find_active_client(struct list_head *head) */ bool vga_switcheroo_client_probe_defer(struct pci_dev *pdev) { - if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) { + if (pci_is_display(pdev)) { /* * apple-gmux is needed on pre-retina MacBook Pro * to probe the panel if pdev is the inactive GPU. From 75952c497550fd34d60b4e45aee15249d91263fa Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 17 Jul 2025 12:38:07 -0500 Subject: [PATCH 0733/2411] iommu/vt-d: Use pci_is_display() The inline pci_is_display() helper does the same thing. Use it. Suggested-by: Bjorn Helgaas Signed-off-by: Mario Limonciello Signed-off-by: Bjorn Helgaas Reviewed-by: Lu Baolu Reviewed-by: Daniel Dadap Reviewed-by: Simona Vetter Link: https://patch.msgid.link/20250717173812.3633478-5-superm1@kernel.org --- drivers/iommu/intel/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7aa3932251b2..17267cd476ce 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -34,7 +34,7 @@ #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE -#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) +#define IS_GFX_DEVICE(pdev) pci_is_display(pdev) #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) From 6642adf0c1fbe2977597ab277dfd507053a874ac Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 17 Jul 2025 12:38:08 -0500 Subject: [PATCH 0734/2411] ALSA: hda: Use pci_is_display() The inline pci_is_display() helper does the same thing. Use it. Suggested-by: Bjorn Helgaas Signed-off-by: Mario Limonciello Signed-off-by: Bjorn Helgaas Reviewed-by: Takashi Iwai Reviewed-by: Daniel Dadap Reviewed-by: Simona Vetter Link: https://patch.msgid.link/20250717173812.3633478-6-superm1@kernel.org --- sound/hda/hdac_i915.c | 2 +- sound/pci/hda/hda_intel.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c index e9425213320e..44438c799f95 100644 --- a/sound/hda/hdac_i915.c +++ b/sound/hda/hdac_i915.c @@ -155,7 +155,7 @@ static int i915_gfx_present(struct pci_dev *hdac_pci) for_each_pci_dev(display_dev) { if (display_dev->vendor != PCI_VENDOR_ID_INTEL || - (display_dev->class >> 16) != PCI_BASE_CLASS_DISPLAY) + !pci_is_display(display_dev)) continue; if (pci_match_id(denylist, display_dev)) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index e5210ed48ddf..a165c44b4394 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1465,7 +1465,7 @@ static struct pci_dev *get_bound_vga(struct pci_dev *pci) * the dGPU is the one who is involved in * vgaswitcheroo. */ - if (((p->class >> 16) == PCI_BASE_CLASS_DISPLAY) && + if (pci_is_display(p) && (atpx_present() || apple_gmux_detect(NULL, NULL))) return p; pci_dev_put(p); @@ -1477,7 +1477,7 @@ static struct pci_dev *get_bound_vga(struct pci_dev *pci) p = pci_get_domain_bus_and_slot(pci_domain_nr(pci->bus), pci->bus->number, 0); if (p) { - if ((p->class >> 16) == PCI_BASE_CLASS_DISPLAY) + if (pci_is_display(p)) return p; pci_dev_put(p); } From 39f473f6d0b24cf375893f2110b1cc9d8a079a42 Mon Sep 17 00:00:00 2001 From: Anubhav Shelat Date: Wed, 16 Jul 2025 16:39:15 -0400 Subject: [PATCH 0735/2411] perf sched timehist: decode process names of processes in zombie state Previously when running perf trace timehist --state, when recording processes in the zombie state the process name would not be decoded properly and appears with just the PID: 1140057.412177 [0006] Mutter Input Th[3139/3104] 0.956 0.019 0.041 S 1140057.412222 [0012] :1248612[1248612] 0.000 0.000 0.332 Z 1140057.412275 [0004] 0.052 0.052 0.953 I 1140057.412284 [0008] 0.070 0.070 0.932 I 1140057.412333 [0004] KMS thread[3126/3104] 0.953 0.112 0.058 S Now some extra processing has been added to decode the process name: 1140057.412177 [0006] Mutter Input Th[3139/3104] 0.956 0.019 0.041 S 1140057.412222 [0012] sleep[1248612] 0.000 0.000 0.332 Z 1140057.412275 [0004] 0.052 0.052 0.953 I 1140057.412284 [0008] 0.070 0.070 0.932 I 1140057.412333 [0004] KMS thread[3126/3104] 0.953 0.112 0.058 S Signed-off-by: Anubhav Shelat Link: https://lore.kernel.org/r/20250716203914.45772-2-ashelat@redhat.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-sched.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 4bbebd6ef2e4..34051ad23493 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2201,6 +2201,11 @@ static void timehist_print_sample(struct perf_sched *sched, printf(" "); } + if (!thread__comm_set(thread)) { + const char *prev_comm = evsel__strval(evsel, sample, "prev_comm"); + thread__set_comm(thread, prev_comm, sample->time); + } + printf(" %-*s ", comm_width, timehist_get_commstr(thread)); if (sched->show_prio) From b2df55a98672f4be076ff69d0f0d0b1fc81f2044 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 17 Jul 2025 09:30:36 -0700 Subject: [PATCH 0736/2411] cleanup: Fix documentation build error for ACQUIRE updates Stephen reports: Documentation/core-api/cleanup:7: include/linux/cleanup.h:73: ERROR: Unexpected indentation. [docutils] Documentation/core-api/cleanup:7: include/linux/cleanup.h:74: WARNING: Block quote ends without a blank line; unexpected unindent. [docutils] Which points out that the ACQUIRE() example in cleanup.h missed the "::" suffix to mark the following text as a code-block. Fixes: 857d18f23ab1 ("cleanup: Introduce ACQUIRE() and ACQUIRE_ERR() for conditional locks") Reported-by: Stephen Rothwell Closes: http://lore.kernel.org/20250717173354.34375751@canb.auug.org.au Signed-off-by: Dan Williams Acked-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://patch.msgid.link/20250717163036.1275791-1-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- include/linux/cleanup.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 4eb83dd71cfe..0fb796db4811 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -64,8 +64,7 @@ * the remainder of "func()". * * The ACQUIRE() macro can be used in all places that guard() can be - * used and additionally support conditional locks - * + * used and additionally support conditional locks:: * * DEFINE_GUARD_COND(pci_dev, _try, pci_dev_trylock(_T)) * ... From 3796f2985c267b90052613cf0b379e51c61e9367 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Thu, 17 Jul 2025 11:12:51 +0800 Subject: [PATCH 0737/2411] cxl: Fix -Werror=return-type in cxl_decoder_detach() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix following compiling errors: In file included from ../drivers/cxl/core/pmu.c:10: ../drivers/cxl/core/core.h: In function ‘cxl_decoder_detach’: ../drivers/cxl/core/core.h:65:1: error: no return statement in function returning non-void [-Werror=return-type] } ^ cc1: some warnings being treated as errors CC [M] drivers/nvdimm/claim.o make[6]: *** [../scripts/Makefile.build:287: drivers/cxl/core/pmu.o] Error 1 make[6]: *** Waiting for unfinished jobs.... CC [M] drivers/infiniband/core/verbs.o Fixes: b3a88225519c ("cxl/region: Consolidate cxl_decoder_kill_region() and cxl_region_detach()") Signed-off-by: Li Zhijian Link: https://patch.msgid.link/20250717031251.1043825-1-lizhijian@fujitsu.com Signed-off-by: Dave Jiang --- drivers/cxl/core/core.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 705a5f09aa78..2669f251d677 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -62,6 +62,7 @@ static inline int cxl_decoder_detach(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled, int pos, enum cxl_detach_mode mode) { + return 0; } static inline int cxl_region_init(void) { From 1f4f8166110f037f15a89c2203ff887b98a8393a Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Thu, 17 Jul 2025 11:18:14 +0100 Subject: [PATCH 0738/2411] cxl/events: Update Common Event Record to CXL spec rev 3.2 CXL spec 3.2 section 8.2.10.2.1 Table 8-55, Common Event Record format defined new fields LD-ID and Head ID. LD-ID: ID of logical device from where the event originated, which is valid only if LD-ID valid flag is set to 1. CXL spec 3.2 Section 2.4 describes, a Type 3 Multi-Logical Device (MLD) can partition its resources into up to 16 isolated Logical Devices. Each Logical Device is identified by a Logical Device Identifier (LD-ID) in CXL.mem and CXL.io protocols. LD-ID is a 16-bit Logical Device identifier applicable for CXL.io and CXL.mem requests and responses. CXL.mem supports only the lower 4 bits of LD-ID and therefore can support up to 16 unique LD-ID values over the link. Requests and responses forwarded over an MLD Port are tagged with LD-ID. Head ID: ID of the device head, from where the event originated, which is valid only if head valid flag is set to 1. Add updates for the above spec changes in the CXL events record and CXL common trace event implementation. Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Signed-off-by: Shiju Jose Link: https://patch.msgid.link/20250717101817.2104-2-shiju.jose@huawei.com Signed-off-by: Dave Jiang --- drivers/cxl/core/trace.h | 18 ++++++++++++++---- include/cxl/event.h | 4 +++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index 25ebfbc1616c..a77487a257b3 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -214,12 +214,16 @@ TRACE_EVENT(cxl_overflow, #define CXL_EVENT_RECORD_FLAG_PERF_DEGRADED BIT(4) #define CXL_EVENT_RECORD_FLAG_HW_REPLACE BIT(5) #define CXL_EVENT_RECORD_FLAG_MAINT_OP_SUB_CLASS_VALID BIT(6) +#define CXL_EVENT_RECORD_FLAG_LD_ID_VALID BIT(7) +#define CXL_EVENT_RECORD_FLAG_HEAD_ID_VALID BIT(8) #define show_hdr_flags(flags) __print_flags(flags, " | ", \ { CXL_EVENT_RECORD_FLAG_PERMANENT, "PERMANENT_CONDITION" }, \ { CXL_EVENT_RECORD_FLAG_MAINT_NEEDED, "MAINTENANCE_NEEDED" }, \ { CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, "PERFORMANCE_DEGRADED" }, \ { CXL_EVENT_RECORD_FLAG_HW_REPLACE, "HARDWARE_REPLACEMENT_NEEDED" }, \ - { CXL_EVENT_RECORD_FLAG_MAINT_OP_SUB_CLASS_VALID, "MAINT_OP_SUB_CLASS_VALID" } \ + { CXL_EVENT_RECORD_FLAG_MAINT_OP_SUB_CLASS_VALID, "MAINT_OP_SUB_CLASS_VALID" }, \ + { CXL_EVENT_RECORD_FLAG_LD_ID_VALID, "LD_ID_VALID" }, \ + { CXL_EVENT_RECORD_FLAG_HEAD_ID_VALID, "HEAD_ID_VALID" } \ ) /* @@ -247,7 +251,9 @@ TRACE_EVENT(cxl_overflow, __field(u64, hdr_timestamp) \ __field(u8, hdr_length) \ __field(u8, hdr_maint_op_class) \ - __field(u8, hdr_maint_op_sub_class) + __field(u8, hdr_maint_op_sub_class) \ + __field(u16, hdr_ld_id) \ + __field(u8, hdr_head_id) #define CXL_EVT_TP_fast_assign(cxlmd, l, hdr) \ __assign_str(memdev); \ @@ -260,18 +266,22 @@ TRACE_EVENT(cxl_overflow, __entry->hdr_related_handle = le16_to_cpu((hdr).related_handle); \ __entry->hdr_timestamp = le64_to_cpu((hdr).timestamp); \ __entry->hdr_maint_op_class = (hdr).maint_op_class; \ - __entry->hdr_maint_op_sub_class = (hdr).maint_op_sub_class + __entry->hdr_maint_op_sub_class = (hdr).maint_op_sub_class; \ + __entry->hdr_ld_id = le16_to_cpu((hdr).ld_id); \ + __entry->hdr_head_id = (hdr).head_id #define CXL_EVT_TP_printk(fmt, ...) \ TP_printk("memdev=%s host=%s serial=%lld log=%s : time=%llu uuid=%pUb " \ "len=%d flags='%s' handle=%x related_handle=%x " \ - "maint_op_class=%u maint_op_sub_class=%u : " fmt, \ + "maint_op_class=%u maint_op_sub_class=%u " \ + "ld_id=%x head_id=%x : " fmt, \ __get_str(memdev), __get_str(host), __entry->serial, \ cxl_event_log_type_str(__entry->log), \ __entry->hdr_timestamp, &__entry->hdr_uuid, __entry->hdr_length,\ show_hdr_flags(__entry->hdr_flags), __entry->hdr_handle, \ __entry->hdr_related_handle, __entry->hdr_maint_op_class, \ __entry->hdr_maint_op_sub_class, \ + __entry->hdr_ld_id, __entry->hdr_head_id, \ ##__VA_ARGS__) TRACE_EVENT(cxl_generic_event, diff --git a/include/cxl/event.h b/include/cxl/event.h index f9ae1796da85..f4cb8568566b 100644 --- a/include/cxl/event.h +++ b/include/cxl/event.h @@ -19,7 +19,9 @@ struct cxl_event_record_hdr { __le64 timestamp; u8 maint_op_class; u8 maint_op_sub_class; - u8 reserved[14]; + __le16 ld_id; + u8 head_id; + u8 reserved[11]; } __packed; struct cxl_event_media_hdr { From cd3b36cfc659306456d3cf3714c8856307693c01 Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Thu, 17 Jul 2025 11:18:15 +0100 Subject: [PATCH 0739/2411] cxl/events: Add extra validity checks for corrected memory error count in General Media Event Record According to the CXL Specification Revision 3.2, Section 8.2.10.2.1.1, Table 8-57 (General Media Event Record), the Corrected Memory Error Count field is valid under the following conditions: 1. The Threshold Event bit is set in the Memory Event Descriptor field, and 2. The Corrected Memory Error Count must be greater than 0 for events where the Advanced Programmable Threshold Counter has expired. Additionally, if the Advanced Programmable Corrected Memory Error Counter Expire bit in the Memory Event Type field is set, then the Threshold Event bit in the Memory Event Descriptor field shall also be set. Add validity checks for the above conditions while reporting the event to the userspace. Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Signed-off-by: Shiju Jose Link: https://patch.msgid.link/20250717101817.2104-3-shiju.jose@huawei.com Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 9 +++++++++ drivers/cxl/core/trace.h | 5 ++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 2689e6453c5a..ba4a29afd3aa 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -926,6 +926,15 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, if (cxl_store_rec_gen_media((struct cxl_memdev *)cxlmd, evt)) dev_dbg(&cxlmd->dev, "CXL store rec_gen_media failed\n"); + if (evt->gen_media.media_hdr.descriptor & + CXL_GMER_EVT_DESC_THRESHOLD_EVENT) + WARN_ON_ONCE((evt->gen_media.media_hdr.type & + CXL_GMER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE) && + !get_unaligned_le24(evt->gen_media.cme_count)); + else + WARN_ON_ONCE(evt->gen_media.media_hdr.type & + CXL_GMER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE); + trace_cxl_general_media(cxlmd, type, cxlr, hpa, hpa_alias, &evt->gen_media); } else if (event_type == CXL_CPER_EVENT_DRAM) { diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index a77487a257b3..c38f94ca0ca1 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -506,7 +506,10 @@ TRACE_EVENT(cxl_general_media, uuid_copy(&__entry->region_uuid, &uuid_null); } __entry->cme_threshold_ev_flags = rec->cme_threshold_ev_flags; - __entry->cme_count = get_unaligned_le24(rec->cme_count); + if (rec->media_hdr.descriptor & CXL_GMER_EVT_DESC_THRESHOLD_EVENT) + __entry->cme_count = get_unaligned_le24(rec->cme_count); + else + __entry->cme_count = 0; ), CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' " \ From d8145bb8af5c09d27c4dde4f4030d589771594d1 Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Thu, 17 Jul 2025 11:18:16 +0100 Subject: [PATCH 0740/2411] cxl/events: Add extra validity checks for CVME count in DRAM Event Record According to the CXL Specification Revision 3.2, Section 8.2.10.2.1.2, Table 8-58 (DRAM Event Record), the CVME (Corrected Volatile Memory Error) Count field is valid under the following conditions: 1. The Threshold Event bit is set in the Memory Event Descriptor field, and 2. The CVME Count must be greater than 0 for events where the Advanced Programmable Threshold Counter has expired. Additionally, if the Advanced Programmable Corrected Memory Error Counter Expire bit in the Memory Event Type field is set, then the Threshold Event bit in the Memory Event Descriptor field shall also be set. Add validity checks for the above conditions while reporting the event to the userspace. Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Signed-off-by: Shiju Jose Link: https://patch.msgid.link/20250717101817.2104-4-shiju.jose@huawei.com Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 9 +++++++++ drivers/cxl/core/trace.h | 5 ++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index ba4a29afd3aa..445889b128cd 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -941,6 +941,15 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, if (cxl_store_rec_dram((struct cxl_memdev *)cxlmd, evt)) dev_dbg(&cxlmd->dev, "CXL store rec_dram failed\n"); + if (evt->dram.media_hdr.descriptor & + CXL_GMER_EVT_DESC_THRESHOLD_EVENT) + WARN_ON_ONCE((evt->dram.media_hdr.type & + CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE) && + !get_unaligned_le24(evt->dram.cvme_count)); + else + WARN_ON_ONCE(evt->dram.media_hdr.type & + CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE); + trace_cxl_dram(cxlmd, type, cxlr, hpa, hpa_alias, &evt->dram); } diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index c38f94ca0ca1..462c2e892ba2 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -661,7 +661,10 @@ TRACE_EVENT(cxl_dram, CXL_EVENT_GEN_MED_COMP_ID_SIZE); __entry->sub_channel = rec->sub_channel; __entry->cme_threshold_ev_flags = rec->cme_threshold_ev_flags; - __entry->cvme_count = get_unaligned_le24(rec->cvme_count); + if (rec->media_hdr.descriptor & CXL_GMER_EVT_DESC_THRESHOLD_EVENT) + __entry->cvme_count = get_unaligned_le24(rec->cvme_count); + else + __entry->cvme_count = 0; ), CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' descriptor='%s' type='%s' sub_type='%s' " \ From f10f46a0ee53420f707195fe33b7c235a1c0e48a Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Thu, 17 Jul 2025 11:18:17 +0100 Subject: [PATCH 0741/2411] cxl/events: Trace Memory Sparing Event Record CXL rev 3.2 section 8.2.10.2.1.4 Table 8-60 defines the Memory Sparing Event Record. Determine if the event read is memory sparing record and if so trace the record. Memory device shall produce a memory sparing event record 1. After completion of a PPR maintenance operation if the memory sparing event record enable bit is set (Field: sPPR/hPPR Operation Mode in Table 8-128/Table 8-131). 2. In response to a query request by the host (see section 8.2.10.7.1.4) to determine the availability of sparing resources. The device shall report the resource availability by producing the Memory Sparing Event Record (see Table 8-60) in which the channel, rank, nibble mask, bank group, bank, row, column, sub-channel fields are a copy of the values specified in the request. If the controller does not support reporting whether a resource is available, and a perform maintenance operation for memory sparing is issued with query resources set to 1, the controller shall return invalid input. Example trace log for produce memory sparing event record on completion of a soft PPR operation, cxl_memory_sparing: memdev=mem1 host=0000:0f:00.0 serial=3 log=Informational : time=55045163029 uuid=e71f3a40-2d29-4092-8a39-4d1c966c7c65 len=128 flags='0x1' handle=1 related_handle=0 maint_op_class=2 maint_op_sub_class=1 ld_id=0 head_id=0 : flags='' result=0 validity_flags='CHANNEL|RANK|NIBBLE|BANK GROUP|BANK|ROW|COLUMN' spare resource avail=1 channel=2 rank=5 nibble_mask=a59c bank_group=2 bank=4 row=13 column=23 sub_channel=0 comp_id=00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 comp_id_pldm_valid_flags='' pldm_entity_id=0x00 pldm_resource_id=0x00 Note: For memory sparing event record, fields 'maintenance operation class' and 'maintenance operation subclass' are defined twice, first in the common event record (Table 8-55) and second in the memory sparing event record (Table 8-60). Thus those in the sparing event record coded as reserved, to be removed when the spec is updated. Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Signed-off-by: Shiju Jose Link: https://patch.msgid.link/20250717101817.2104-5-shiju.jose@huawei.com Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 6 +++ drivers/cxl/core/trace.h | 105 +++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxlmem.h | 8 +++ include/cxl/event.h | 33 ++++++++++++ 4 files changed, 152 insertions(+) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 445889b128cd..f7e081c00c49 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -899,6 +899,10 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, trace_cxl_generic_event(cxlmd, type, uuid, &evt->generic); return; } + if (event_type == CXL_CPER_EVENT_MEM_SPARING) { + trace_cxl_memory_sparing(cxlmd, type, &evt->mem_sparing); + return; + } if (trace_cxl_general_media_enabled() || trace_cxl_dram_enabled()) { u64 dpa, hpa = ULLONG_MAX, hpa_alias = ULLONG_MAX; @@ -970,6 +974,8 @@ static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd, ev_type = CXL_CPER_EVENT_DRAM; else if (uuid_equal(uuid, &CXL_EVENT_MEM_MODULE_UUID)) ev_type = CXL_CPER_EVENT_MEM_MODULE; + else if (uuid_equal(uuid, &CXL_EVENT_MEM_SPARING_UUID)) + ev_type = CXL_CPER_EVENT_MEM_SPARING; cxl_event_trace_record(cxlmd, type, ev_type, uuid, &record->event); } diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index 462c2e892ba2..a53ec4798b12 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -887,6 +887,111 @@ TRACE_EVENT(cxl_memory_module, ) ); +/* + * Memory Sparing Event Record - MSER + * + * CXL rev 3.2 section 8.2.10.2.1.4; Table 8-60 + */ +#define CXL_MSER_QUERY_RESOURCE_FLAG BIT(0) +#define CXL_MSER_HARD_SPARING_FLAG BIT(1) +#define CXL_MSER_DEV_INITED_FLAG BIT(2) +#define show_mem_sparing_flags(flags) __print_flags(flags, "|", \ + { CXL_MSER_QUERY_RESOURCE_FLAG, "Query Resources" }, \ + { CXL_MSER_HARD_SPARING_FLAG, "Hard Sparing" }, \ + { CXL_MSER_DEV_INITED_FLAG, "Device Initiated Sparing" } \ +) + +#define CXL_MSER_VALID_CHANNEL BIT(0) +#define CXL_MSER_VALID_RANK BIT(1) +#define CXL_MSER_VALID_NIBBLE BIT(2) +#define CXL_MSER_VALID_BANK_GROUP BIT(3) +#define CXL_MSER_VALID_BANK BIT(4) +#define CXL_MSER_VALID_ROW BIT(5) +#define CXL_MSER_VALID_COLUMN BIT(6) +#define CXL_MSER_VALID_COMPONENT_ID BIT(7) +#define CXL_MSER_VALID_COMPONENT_ID_FORMAT BIT(8) +#define CXL_MSER_VALID_SUB_CHANNEL BIT(9) +#define show_mem_sparing_valid_flags(flags) __print_flags(flags, "|", \ + { CXL_MSER_VALID_CHANNEL, "CHANNEL" }, \ + { CXL_MSER_VALID_RANK, "RANK" }, \ + { CXL_MSER_VALID_NIBBLE, "NIBBLE" }, \ + { CXL_MSER_VALID_BANK_GROUP, "BANK GROUP" }, \ + { CXL_MSER_VALID_BANK, "BANK" }, \ + { CXL_MSER_VALID_ROW, "ROW" }, \ + { CXL_MSER_VALID_COLUMN, "COLUMN" }, \ + { CXL_MSER_VALID_COMPONENT_ID, "COMPONENT ID" }, \ + { CXL_MSER_VALID_COMPONENT_ID_FORMAT, "COMPONENT ID PLDM FORMAT" }, \ + { CXL_MSER_VALID_SUB_CHANNEL, "SUB CHANNEL" } \ +) + +TRACE_EVENT(cxl_memory_sparing, + + TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, + struct cxl_event_mem_sparing *rec), + + TP_ARGS(cxlmd, log, rec), + + TP_STRUCT__entry( + CXL_EVT_TP_entry + + /* Memory Sparing Event */ + __field(u8, flags) + __field(u8, result) + __field(u16, validity_flags) + __field(u16, res_avail) + __field(u8, channel) + __field(u8, rank) + __field(u32, nibble_mask) + __field(u8, bank_group) + __field(u8, bank) + __field(u32, row) + __field(u16, column) + __field(u8, sub_channel) + __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE) + ), + + TP_fast_assign( + CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + __entry->hdr_uuid = CXL_EVENT_MEM_SPARING_UUID; + + /* Memory Sparing Event */ + __entry->flags = rec->flags; + __entry->result = rec->result; + __entry->validity_flags = le16_to_cpu(rec->validity_flags); + __entry->res_avail = le16_to_cpu(rec->res_avail); + __entry->channel = rec->channel; + __entry->rank = rec->rank; + __entry->nibble_mask = get_unaligned_le24(rec->nibble_mask); + __entry->bank_group = rec->bank_group; + __entry->bank = rec->bank; + __entry->row = get_unaligned_le24(rec->row); + __entry->column = le16_to_cpu(rec->column); + __entry->sub_channel = rec->sub_channel; + memcpy(__entry->comp_id, &rec->component_id, + CXL_EVENT_GEN_MED_COMP_ID_SIZE); + ), + + CXL_EVT_TP_printk("flags='%s' result=%u validity_flags='%s' " \ + "spare resource avail=%u channel=%u rank=%u " \ + "nibble_mask=%x bank_group=%u bank=%u " \ + "row=%u column=%u sub_channel=%u " \ + "comp_id=%s comp_id_pldm_valid_flags='%s' " \ + "pldm_entity_id=%s pldm_resource_id=%s", + show_mem_sparing_flags(__entry->flags), + __entry->result, + show_mem_sparing_valid_flags(__entry->validity_flags), + __entry->res_avail, __entry->channel, __entry->rank, + __entry->nibble_mask, __entry->bank_group, __entry->bank, + __entry->row, __entry->column, __entry->sub_channel, + __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE), + show_comp_id_pldm_flags(__entry->comp_id[0]), + show_pldm_entity_id(__entry->validity_flags, CXL_MSER_VALID_COMPONENT_ID, + CXL_MSER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), + show_pldm_resource_id(__entry->validity_flags, CXL_MSER_VALID_COMPONENT_ID, + CXL_MSER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id) + ) +); + #define show_poison_trace_type(type) \ __print_symbolic(type, \ { CXL_POISON_TRACE_LIST, "List" }, \ diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 551b0ba2caa1..f98311f357b7 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -633,6 +633,14 @@ struct cxl_mbox_identify { UUID_INIT(0xfe927475, 0xdd59, 0x4339, 0xa5, 0x86, 0x79, 0xba, 0xb1, \ 0x13, 0xb7, 0x74) +/* + * Memory Sparing Event Record UUID + * CXL rev 3.2 section 8.2.10.2.1.4: Table 8-60 + */ +#define CXL_EVENT_MEM_SPARING_UUID \ + UUID_INIT(0xe71f3a40, 0x2d29, 0x4092, 0x8a, 0x39, 0x4d, 0x1c, 0x96, \ + 0x6c, 0x7c, 0x65) + /* * Get Event Records output payload * CXL rev 3.0 section 8.2.9.2.2; Table 8-50 diff --git a/include/cxl/event.h b/include/cxl/event.h index f4cb8568566b..6fd90f9cc203 100644 --- a/include/cxl/event.h +++ b/include/cxl/event.h @@ -110,11 +110,43 @@ struct cxl_event_mem_module { u8 reserved[0x2a]; } __packed; +/* + * Memory Sparing Event Record - MSER + * CXL rev 3.2 section 8.2.10.2.1.4; Table 8-60 + */ +struct cxl_event_mem_sparing { + struct cxl_event_record_hdr hdr; + /* + * The fields maintenance operation class and maintenance operation + * subclass defined in the Memory Sparing Event Record are the + * duplication of the same in the common event record. Thus defined + * as reserved and to be removed after the spec correction. + */ + u8 rsv1; + u8 rsv2; + u8 flags; + u8 result; + __le16 validity_flags; + u8 reserved1[6]; + __le16 res_avail; + u8 channel; + u8 rank; + u8 nibble_mask[3]; + u8 bank_group; + u8 bank; + u8 row[3]; + __le16 column; + u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; + u8 sub_channel; + u8 reserved2[0x25]; +} __packed; + union cxl_event { struct cxl_event_generic generic; struct cxl_event_gen_media gen_media; struct cxl_event_dram dram; struct cxl_event_mem_module mem_module; + struct cxl_event_mem_sparing mem_sparing; /* dram & gen_media event header */ struct cxl_event_media_hdr media_hdr; } __packed; @@ -133,6 +165,7 @@ enum cxl_event_type { CXL_CPER_EVENT_GEN_MEDIA, CXL_CPER_EVENT_DRAM, CXL_CPER_EVENT_MEM_MODULE, + CXL_CPER_EVENT_MEM_SPARING, }; #define CPER_CXL_DEVICE_ID_VALID BIT(0) From 49d6e658e758e42aaff8ae5ecdd2d06b29abf53e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 18 Jul 2025 16:22:40 -0500 Subject: [PATCH 0742/2411] cxl/region: Fix an ERR_PTR() vs NULL bug The __cxl_decoder_detach() function is expected to return NULL on error but this error path accidentally returns an error pointer. It could potentially lead to an error pointer dereference in the caller. Change it to return NULL. Fixes: b3a88225519c ("cxl/region: Consolidate cxl_decoder_kill_region() and cxl_region_detach()") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/7def7da0-326a-410d-8c92-718c8963c0a2@sabinyo.mountain Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index e2e9cce13cd2..e9bf42d91689 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2108,7 +2108,7 @@ __cxl_decoder_detach(struct cxl_region *cxlr, if (pos >= p->interleave_ways) { dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos, p->interleave_ways); - return ERR_PTR(-ENXIO); + return NULL; } if (!p->targets[pos]) From 63149542dcf4468ed15469035740a937cf9ff88a Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 17 Jul 2025 14:57:58 +0200 Subject: [PATCH 0743/2411] pinctrl: ma35: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250717125758.53141-1-brgl@bgdev.pl Signed-off-by: Linus Walleij --- drivers/pinctrl/nuvoton/pinctrl-ma35.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/nuvoton/pinctrl-ma35.c b/drivers/pinctrl/nuvoton/pinctrl-ma35.c index 06ae1fe8b8c5..0562d2476b35 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-ma35.c +++ b/drivers/pinctrl/nuvoton/pinctrl-ma35.c @@ -361,7 +361,7 @@ static int ma35_gpio_core_get(struct gpio_chip *gc, unsigned int gpio) return !!(readl(reg_pin) & BIT(gpio)); } -static void ma35_gpio_core_set(struct gpio_chip *gc, unsigned int gpio, int val) +static int ma35_gpio_core_set(struct gpio_chip *gc, unsigned int gpio, int val) { struct ma35_pin_bank *bank = gpiochip_get_data(gc); void __iomem *reg_dout = bank->reg_base + MA35_GP_REG_DOUT; @@ -373,6 +373,8 @@ static void ma35_gpio_core_set(struct gpio_chip *gc, unsigned int gpio, int val) regval = readl(reg_dout) & ~BIT(gpio); writel(regval, reg_dout); + + return 0; } static int ma35_gpio_core_to_request(struct gpio_chip *gc, unsigned int gpio) @@ -524,7 +526,7 @@ static int ma35_gpiolib_register(struct platform_device *pdev, struct ma35_pinct bank->chip.direction_input = ma35_gpio_core_direction_in; bank->chip.direction_output = ma35_gpio_core_direction_out; bank->chip.get = ma35_gpio_core_get; - bank->chip.set = ma35_gpio_core_set; + bank->chip.set_rv = ma35_gpio_core_set; bank->chip.base = -1; bank->chip.ngpio = bank->nr_pins; bank->chip.can_sleep = false; From dd47155a0e6f4ad1fe9ae0a00282f324153bb3a8 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 9 Jul 2025 16:38:57 +0200 Subject: [PATCH 0744/2411] pinctrl: pinmux: open-code PINCTRL_FUNCTION_DESC() This macro is only used in one place and pin function descriptors should only be created by pinmux core so there's no point in exposing it to other pinctrl users. Remove the macro and hand-code its functionality. Signed-off-by: Bartosz Golaszewski Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/20250709-pinctrl-gpio-pinfuncs-v2-1-b6135149c0d9@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinmux.c | 3 ++- drivers/pinctrl/pinmux.h | 7 ------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c index 2c31e7f2a27a..a7865997ea14 100644 --- a/drivers/pinctrl/pinmux.c +++ b/drivers/pinctrl/pinmux.c @@ -891,7 +891,8 @@ int pinmux_generic_add_function(struct pinctrl_dev *pctldev, if (!function) return -ENOMEM; - *function = PINCTRL_FUNCTION_DESC(name, groups, ngroups, data); + function->func = PINCTRL_PINFUNCTION(name, groups, ngroups); + function->data = data; error = radix_tree_insert(&pctldev->pin_function_tree, selector, function); if (error) diff --git a/drivers/pinctrl/pinmux.h b/drivers/pinctrl/pinmux.h index 2965ec20b77f..5c039fd09f74 100644 --- a/drivers/pinctrl/pinmux.h +++ b/drivers/pinctrl/pinmux.h @@ -141,13 +141,6 @@ struct function_desc { void *data; }; -/* Convenient macro to define a generic pin function descriptor */ -#define PINCTRL_FUNCTION_DESC(_name, _grps, _num_grps, _data) \ -(struct function_desc) { \ - .func = PINCTRL_PINFUNCTION(_name, _grps, _num_grps), \ - .data = _data, \ -} - int pinmux_generic_get_function_count(struct pinctrl_dev *pctldev); const char * From 431b68ae73566125e498a6b95b44afc3325c2f18 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 9 Jul 2025 16:38:58 +0200 Subject: [PATCH 0745/2411] pinctrl: provide pinmux_generic_add_pinfunction() Several drivers call pinmux_generic_add_function() passing it the contents of struct pinfunction as first three arguments. We can make this shorter by simply providing an interface allowing to pass the address of struct pinfunction directly when adding a new function. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250709-pinctrl-gpio-pinfuncs-v2-2-b6135149c0d9@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinmux.c | 24 ++++++++++++++++++------ drivers/pinctrl/pinmux.h | 3 +++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c index a7865997ea14..79814758a084 100644 --- a/drivers/pinctrl/pinmux.c +++ b/drivers/pinctrl/pinmux.c @@ -874,14 +874,26 @@ int pinmux_generic_add_function(struct pinctrl_dev *pctldev, const char * const *groups, const unsigned int ngroups, void *data) +{ + struct pinfunction func = PINCTRL_PINFUNCTION(name, groups, ngroups); + + return pinmux_generic_add_pinfunction(pctldev, &func, data); +} +EXPORT_SYMBOL_GPL(pinmux_generic_add_function); + +/** + * pinmux_generic_add_pinfunction() - adds a function group + * @pctldev: pin controller device + * @func: pinfunction structure describing the function group + * @data: pin controller driver specific data + */ +int pinmux_generic_add_pinfunction(struct pinctrl_dev *pctldev, + const struct pinfunction *func, void *data) { struct function_desc *function; int selector, error; - if (!name) - return -EINVAL; - - selector = pinmux_func_name_to_selector(pctldev, name); + selector = pinmux_func_name_to_selector(pctldev, func->name); if (selector >= 0) return selector; @@ -891,7 +903,7 @@ int pinmux_generic_add_function(struct pinctrl_dev *pctldev, if (!function) return -ENOMEM; - function->func = PINCTRL_PINFUNCTION(name, groups, ngroups); + function->func = *func; function->data = data; error = radix_tree_insert(&pctldev->pin_function_tree, selector, function); @@ -902,7 +914,7 @@ int pinmux_generic_add_function(struct pinctrl_dev *pctldev, return selector; } -EXPORT_SYMBOL_GPL(pinmux_generic_add_function); +EXPORT_SYMBOL_GPL(pinmux_generic_add_pinfunction); /** * pinmux_generic_remove_function() - removes a numbered function diff --git a/drivers/pinctrl/pinmux.h b/drivers/pinctrl/pinmux.h index 5c039fd09f74..bdb5be1a636e 100644 --- a/drivers/pinctrl/pinmux.h +++ b/drivers/pinctrl/pinmux.h @@ -161,6 +161,9 @@ int pinmux_generic_add_function(struct pinctrl_dev *pctldev, unsigned int const ngroups, void *data); +int pinmux_generic_add_pinfunction(struct pinctrl_dev *pctldev, + const struct pinfunction *func, void *data); + int pinmux_generic_remove_function(struct pinctrl_dev *pctldev, unsigned int selector); From cc154c00a61cdddafa8f6053afa09fcc519ddf25 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 9 Jul 2025 16:38:59 +0200 Subject: [PATCH 0746/2411] pinctrl: equilibrium: use pinmux_generic_add_pinfunction() Instead of passing individual fields of struct pinfunction to pinmux_generic_add_function(), use pinmux_generic_add_pinfunction() and pass the entire structure directly. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250709-pinctrl-gpio-pinfuncs-v2-3-b6135149c0d9@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-equilibrium.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/pinctrl-equilibrium.c b/drivers/pinctrl/pinctrl-equilibrium.c index 128b7efb110a..fce804d42e7d 100644 --- a/drivers/pinctrl/pinctrl-equilibrium.c +++ b/drivers/pinctrl/pinctrl-equilibrium.c @@ -687,11 +687,8 @@ static int eqbr_build_functions(struct eqbr_pinctrl_drv_data *drvdata) if (funcs[i].name == NULL) continue; - ret = pinmux_generic_add_function(drvdata->pctl_dev, - funcs[i].name, - funcs[i].groups, - funcs[i].ngroups, - drvdata); + ret = pinmux_generic_add_pinfunction(drvdata->pctl_dev, + &funcs[i], drvdata); if (ret < 0) { dev_err(dev, "Failed to register function %s\n", funcs[i].name); From 7d7883db6efb7c48c8e9f94ed59c910f14256771 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 9 Jul 2025 16:39:00 +0200 Subject: [PATCH 0747/2411] pinctrl: airoha: use pinmux_generic_add_pinfunction() Instead of passing individual fields of struct pinfunction to pinmux_generic_add_function(), use pinmux_generic_add_pinfunction() and pass the entire structure directly. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250709-pinctrl-gpio-pinfuncs-v2-4-b6135149c0d9@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-airoha.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-airoha.c b/drivers/pinctrl/mediatek/pinctrl-airoha.c index ccd2b512e836..1737b88530c3 100644 --- a/drivers/pinctrl/mediatek/pinctrl-airoha.c +++ b/drivers/pinctrl/mediatek/pinctrl-airoha.c @@ -2907,11 +2907,9 @@ static int airoha_pinctrl_probe(struct platform_device *pdev) const struct airoha_pinctrl_func *func; func = &airoha_pinctrl_funcs[i]; - err = pinmux_generic_add_function(pinctrl->ctrl, - func->desc.func.name, - func->desc.func.groups, - func->desc.func.ngroups, - (void *)func); + err = pinmux_generic_add_pinfunction(pinctrl->ctrl, + &func->desc.func, + (void *)func); if (err < 0) { dev_err(dev, "Failed to register function %s\n", func->desc.func.name); From 8f8fe52c5a072c1d1e2a8f91f9de95739bd80d52 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 9 Jul 2025 16:39:01 +0200 Subject: [PATCH 0748/2411] pinctrl: mediatek: moore: use pinmux_generic_add_pinfunction() Instead of passing individual fields of struct pinfunction to pinmux_generic_add_function(), use pinmux_generic_add_pinfunction() and pass the entire structure directly. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250709-pinctrl-gpio-pinfuncs-v2-5-b6135149c0d9@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-moore.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-moore.c b/drivers/pinctrl/mediatek/pinctrl-moore.c index 827d0f191031..ba0d6f880c6e 100644 --- a/drivers/pinctrl/mediatek/pinctrl-moore.c +++ b/drivers/pinctrl/mediatek/pinctrl-moore.c @@ -625,9 +625,8 @@ static int mtk_build_functions(struct mtk_pinctrl *hw) const struct function_desc *function = hw->soc->funcs + i; const struct pinfunction *func = &function->func; - err = pinmux_generic_add_function(hw->pctrl, func->name, - func->groups, func->ngroups, - function->data); + err = pinmux_generic_add_pinfunction(hw->pctrl, func, + function->data); if (err < 0) { dev_err(hw->dev, "Failed to register function %s\n", func->name); From adb9e21052c76ef8769b8f6c4c3c26a919bafc5e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 9 Jul 2025 16:39:02 +0200 Subject: [PATCH 0749/2411] pinctrl: keembay: use pinmux_generic_add_pinfunction() Instead of passing individual fields of struct pinfunction to pinmux_generic_add_function(), use pinmux_generic_add_pinfunction() and pass the entire structure directly. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250709-pinctrl-gpio-pinfuncs-v2-6-b6135149c0d9@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-keembay.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/pinctrl/pinctrl-keembay.c b/drivers/pinctrl/pinctrl-keembay.c index 622000139317..30e641571cfe 100644 --- a/drivers/pinctrl/pinctrl-keembay.c +++ b/drivers/pinctrl/pinctrl-keembay.c @@ -1586,13 +1586,9 @@ static int keembay_add_functions(struct keembay_pinctrl *kpc, } /* Add all functions */ - for (i = 0; i < kpc->nfuncs; i++) { - pinmux_generic_add_function(kpc->pctrl, - functions[i].func.name, - functions[i].func.groups, - functions[i].func.ngroups, - functions[i].data); - } + for (i = 0; i < kpc->nfuncs; i++) + pinmux_generic_add_pinfunction(kpc->pctrl, &functions[i].func, + functions[i].data); return 0; } From 0bbd90c2c6b2dc5b1211cc461a144c6c8808605d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 9 Jul 2025 16:39:03 +0200 Subject: [PATCH 0750/2411] pinctrl: ingenic: use pinmux_generic_add_pinfunction() Instead of passing individual fields of struct pinfunction to pinmux_generic_add_function(), use pinmux_generic_add_pinfunction() and pass the entire structure directly. Signed-off-by: Bartosz Golaszewski Reviewed-by: Paul Cercueil Link: https://lore.kernel.org/20250709-pinctrl-gpio-pinfuncs-v2-7-b6135149c0d9@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-ingenic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c index 3c660471ec69..79119cf20efc 100644 --- a/drivers/pinctrl/pinctrl-ingenic.c +++ b/drivers/pinctrl/pinctrl-ingenic.c @@ -4574,9 +4574,8 @@ static int __init ingenic_pinctrl_probe(struct platform_device *pdev) const struct function_desc *function = &chip_info->functions[i]; const struct pinfunction *func = &function->func; - err = pinmux_generic_add_function(jzpc->pctl, func->name, - func->groups, func->ngroups, - function->data); + err = pinmux_generic_add_pinfunction(jzpc->pctl, func, + function->data); if (err < 0) { dev_err(dev, "Failed to register function %s\n", func->name); return err; From b0c7d8c9e8c671aaee6d14abd834f7d0d63e3c19 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 19 Jul 2025 20:36:49 +0200 Subject: [PATCH 0751/2411] rust: list: undo unintended replacement of method name When we renamed `Opaque::raw_get` to `cast_into`, there was one replacement that was not supposed to be there. It does not cause an issue so far because it is inside a macro rule (the `ListLinksSelfPtr` one) that is unused so far. However, it will start to be used soon. Thus fix it now. Fixes: 64fb810bce03 ("rust: types: rename Opaque::raw_get to cast_into") Reviewed-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250719183649.596051-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/list/impl_list_item_mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index c1edba0a9501..3f6c30e14904 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -252,7 +252,7 @@ unsafe fn view_value(links_field: *mut $crate::list::ListLinks<$num>) -> *const // the pointer stays in bounds of the allocation. let self_ptr = unsafe { (links_field as *const u8).add(spoff) } as *const ::core::cell::UnsafeCell<*const Self>; - let cell_inner = ::core::cell::UnsafeCell::cast_into(self_ptr); + let cell_inner = ::core::cell::UnsafeCell::raw_get(self_ptr); // SAFETY: This is not a data race, because the only function that writes to this // value is `prepare_to_insert`, but by the safety requirements the // `prepare_to_insert` method may not be called in parallel with `view_value` or From e71d7e39be6e2aa3fbba34cad12aa72ab853cfb5 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 9 Jul 2025 15:31:11 -0400 Subject: [PATCH 0752/2411] rust: list: simplify macro capture Avoid manually capturing generics; use `ty` to capture the whole type instead. Reviewed-by: Christian Schrefl Tested-by: Alice Ryhl Reviewed-by: Alice Ryhl Signed-off-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250709-list-no-offset-v4-1-a429e75840a9@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/list/impl_list_item_mod.rs | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index 3f6c30e14904..5eacc4009c1c 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -43,7 +43,7 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut ListLinks { macro_rules! impl_has_list_links { ($(impl$(<$($implarg:ident),*>)? HasListLinks$(<$id:tt>)? - for $self:ident $(<$($selfarg:ty),*>)? + for $self:ty { self$(.$field:ident)* } )*) => {$( // SAFETY: The implementation of `raw_get_list_links` only compiles if the field has the @@ -51,9 +51,7 @@ macro_rules! impl_has_list_links { // // The behavior of `raw_get_list_links` is not changed since the `addr_of_mut!` macro is // equivalent to the pointer offset operation in the trait definition. - unsafe impl$(<$($implarg),*>)? $crate::list::HasListLinks$(<$id>)? for - $self $(<$($selfarg),*>)? - { + unsafe impl$(<$($implarg),*>)? $crate::list::HasListLinks$(<$id>)? for $self { const OFFSET: usize = ::core::mem::offset_of!(Self, $($field).*) as usize; #[inline] @@ -85,18 +83,14 @@ pub unsafe trait HasSelfPtr macro_rules! impl_has_list_links_self_ptr { ($(impl$({$($implarg:tt)*})? HasSelfPtr<$item_type:ty $(, $id:tt)?> - for $self:ident $(<$($selfarg:ty),*>)? + for $self:ty { self.$field:ident } )*) => {$( // SAFETY: The implementation of `raw_get_list_links` only compiles if the field has the // right type. - unsafe impl$(<$($implarg)*>)? $crate::list::HasSelfPtr<$item_type $(, $id)?> for - $self $(<$($selfarg),*>)? - {} + unsafe impl$(<$($implarg)*>)? $crate::list::HasSelfPtr<$item_type $(, $id)?> for $self {} - unsafe impl$(<$($implarg)*>)? $crate::list::HasListLinks$(<$id>)? for - $self $(<$($selfarg),*>)? - { + unsafe impl$(<$($implarg)*>)? $crate::list::HasListLinks$(<$id>)? for $self { const OFFSET: usize = ::core::mem::offset_of!(Self, $field) as usize; #[inline] From 9cec86e4ae000947b268913ca0ef6ba519b6719a Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 9 Jul 2025 15:31:12 -0400 Subject: [PATCH 0753/2411] rust: list: use consistent type parameter style Refer to the type parameters of `impl_has_list_links{,_self_ptr}!` by the same name used in `impl_list_item!`. Capture type parameters of `impl_list_item!` as `tt` using `{}` to match the style of all other macros that work with generics. Reviewed-by: Christian Schrefl Tested-by: Alice Ryhl Reviewed-by: Alice Ryhl Signed-off-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250709-list-no-offset-v4-2-a429e75840a9@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/list/impl_list_item_mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index 5eacc4009c1c..a19fb8bc6b40 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -41,7 +41,7 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut ListLinks { /// Implements the [`HasListLinks`] trait for the given type. #[macro_export] macro_rules! impl_has_list_links { - ($(impl$(<$($implarg:ident),*>)? + ($(impl$({$($generics:tt)*})? HasListLinks$(<$id:tt>)? for $self:ty { self$(.$field:ident)* } @@ -51,7 +51,7 @@ macro_rules! impl_has_list_links { // // The behavior of `raw_get_list_links` is not changed since the `addr_of_mut!` macro is // equivalent to the pointer offset operation in the trait definition. - unsafe impl$(<$($implarg),*>)? $crate::list::HasListLinks$(<$id>)? for $self { + unsafe impl$(<$($generics)*>)? $crate::list::HasListLinks$(<$id>)? for $self { const OFFSET: usize = ::core::mem::offset_of!(Self, $($field).*) as usize; #[inline] @@ -81,16 +81,16 @@ pub unsafe trait HasSelfPtr /// Implements the [`HasListLinks`] and [`HasSelfPtr`] traits for the given type. #[macro_export] macro_rules! impl_has_list_links_self_ptr { - ($(impl$({$($implarg:tt)*})? + ($(impl$({$($generics:tt)*})? HasSelfPtr<$item_type:ty $(, $id:tt)?> for $self:ty { self.$field:ident } )*) => {$( // SAFETY: The implementation of `raw_get_list_links` only compiles if the field has the // right type. - unsafe impl$(<$($implarg)*>)? $crate::list::HasSelfPtr<$item_type $(, $id)?> for $self {} + unsafe impl$(<$($generics)*>)? $crate::list::HasSelfPtr<$item_type $(, $id)?> for $self {} - unsafe impl$(<$($implarg)*>)? $crate::list::HasListLinks$(<$id>)? for $self { + unsafe impl$(<$($generics)*>)? $crate::list::HasListLinks$(<$id>)? for $self { const OFFSET: usize = ::core::mem::offset_of!(Self, $field) as usize; #[inline] From 9e626edd7b1469005625e7c5e7f2aea50d2b6646 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 9 Jul 2025 15:31:13 -0400 Subject: [PATCH 0754/2411] rust: list: use consistent self parameter name Refer to the self parameter of `impl_list_item!` by the same name used in `impl_has_list_links{,_self_ptr}!`. Reviewed-by: Christian Schrefl Tested-by: Alice Ryhl Reviewed-by: Alice Ryhl Signed-off-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250709-list-no-offset-v4-3-a429e75840a9@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/list/impl_list_item_mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index a19fb8bc6b40..a1b22d47ac9d 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -114,12 +114,12 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$ #[macro_export] macro_rules! impl_list_item { ( - $(impl$({$($generics:tt)*})? ListItem<$num:tt> for $t:ty { + $(impl$({$($generics:tt)*})? ListItem<$num:tt> for $self:ty { using ListLinks; })* ) => {$( // SAFETY: See GUARANTEES comment on each method. - unsafe impl$(<$($generics)*>)? $crate::list::ListItem<$num> for $t { + unsafe impl$(<$($generics)*>)? $crate::list::ListItem<$num> for $self { // GUARANTEES: // * This returns the same pointer as `prepare_to_insert` because `prepare_to_insert` // is implemented in terms of `view_links`. @@ -178,12 +178,12 @@ unsafe fn post_remove(me: *mut $crate::list::ListLinks<$num>) -> *const Self { )*}; ( - $(impl$({$($generics:tt)*})? ListItem<$num:tt> for $t:ty { + $(impl$({$($generics:tt)*})? ListItem<$num:tt> for $self:ty { using ListLinksSelfPtr; })* ) => {$( // SAFETY: See GUARANTEES comment on each method. - unsafe impl$(<$($generics)*>)? $crate::list::ListItem<$num> for $t { + unsafe impl$(<$($generics)*>)? $crate::list::ListItem<$num> for $self { // GUARANTEES: // This implementation of `ListItem` will not give out exclusive access to the same // `ListLinks` several times because calls to `prepare_to_insert` and `post_remove` From 6a13057d500d48b1786344f4f93b0253adfc4e76 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 9 Jul 2025 15:31:14 -0400 Subject: [PATCH 0755/2411] rust: list: use fully qualified path Use a fully qualified path rooted at `$crate` rather than relying on imports in the invoking scope. Tested-by: Alice Ryhl Reviewed-by: Alice Ryhl Signed-off-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250709-list-no-offset-v4-4-a429e75840a9@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/list/impl_list_item_mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index a1b22d47ac9d..6717b614e896 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -4,8 +4,6 @@ //! Helpers for implementing list traits safely. -use crate::list::ListLinks; - /// Declares that this type has a `ListLinks` field at a fixed offset. /// /// This trait is only used to help implement `ListItem` safely. If `ListItem` is implemented @@ -27,11 +25,11 @@ pub unsafe trait HasListLinks { /// /// The provided pointer must point at a valid struct of type `Self`. /// - /// [`ListLinks`]: ListLinks + /// [`ListLinks`]: crate::list::ListLinks // We don't really need this method, but it's necessary for the implementation of // `impl_has_list_links!` to be correct. #[inline] - unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut ListLinks { + unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut crate::list::ListLinks { // SAFETY: The caller promises that the pointer is valid. The implementer promises that the // `OFFSET` constant is correct. unsafe { ptr.cast::().add(Self::OFFSET).cast() } @@ -222,7 +220,9 @@ unsafe fn prepare_to_insert(me: *const Self) -> *mut $crate::list::ListLinks<$nu // this value is not in a list. unsafe fn view_links(me: *const Self) -> *mut $crate::list::ListLinks<$num> { // SAFETY: The caller promises that `me` points at a valid value of type `Self`. - unsafe { >::raw_get_list_links(me.cast_mut()) } + unsafe { + >::raw_get_list_links(me.cast_mut()) + } } // This function is also used as the implementation of `post_remove`, so the caller From 5d840b4c4935cd5100be97b6df565b4b159970a5 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 9 Jul 2025 15:31:15 -0400 Subject: [PATCH 0756/2411] rust: list: add `impl_list_item!` examples There's a comprehensive example in `rust/kernel/list.rs` but it doesn't exercise the `using ListLinksSelfPtr` variant nor the generic cases. Add that here. Generalize `impl_has_list_links_self_ptr` to handle nested fields in the same manner as `impl_has_list_links`. Tested-by: Alice Ryhl Reviewed-by: Alice Ryhl Signed-off-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250709-list-no-offset-v4-5-a429e75840a9@gmail.com [ Fixed Rust < 1.82 build by enabling the `offset_of_nested` feature. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/list/impl_list_item_mod.rs | 96 +++++++++++++++++++++++++- scripts/Makefile.build | 5 +- 2 files changed, 96 insertions(+), 5 deletions(-) diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index 6717b614e896..374b3dd812d0 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -82,20 +82,20 @@ macro_rules! impl_has_list_links_self_ptr { ($(impl$({$($generics:tt)*})? HasSelfPtr<$item_type:ty $(, $id:tt)?> for $self:ty - { self.$field:ident } + { self$(.$field:ident)* } )*) => {$( // SAFETY: The implementation of `raw_get_list_links` only compiles if the field has the // right type. unsafe impl$(<$($generics)*>)? $crate::list::HasSelfPtr<$item_type $(, $id)?> for $self {} unsafe impl$(<$($generics)*>)? $crate::list::HasListLinks$(<$id>)? for $self { - const OFFSET: usize = ::core::mem::offset_of!(Self, $field) as usize; + const OFFSET: usize = ::core::mem::offset_of!(Self, $($field).*) as usize; #[inline] unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$id>)? { // SAFETY: The caller promises that the pointer is not dangling. let ptr: *mut $crate::list::ListLinksSelfPtr<$item_type $(, $id)?> = - unsafe { ::core::ptr::addr_of_mut!((*ptr).$field) }; + unsafe { ::core::ptr::addr_of_mut!((*ptr)$(.$field)*) }; ptr.cast() } } @@ -109,6 +109,96 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$ /// implement that trait. /// /// [`ListItem`]: crate::list::ListItem +/// +/// # Examples +/// +/// ``` +/// #[pin_data] +/// struct SimpleListItem { +/// value: u32, +/// #[pin] +/// links: kernel::list::ListLinks, +/// } +/// +/// kernel::list::impl_has_list_links! { +/// impl HasListLinks<0> for SimpleListItem { self.links } +/// } +/// +/// kernel::list::impl_list_arc_safe! { +/// impl ListArcSafe<0> for SimpleListItem { untracked; } +/// } +/// +/// kernel::list::impl_list_item! { +/// impl ListItem<0> for SimpleListItem { using ListLinks; } +/// } +/// +/// struct ListLinksHolder { +/// inner: kernel::list::ListLinks, +/// } +/// +/// #[pin_data] +/// struct ComplexListItem { +/// value: Result, +/// #[pin] +/// links: ListLinksHolder, +/// } +/// +/// kernel::list::impl_has_list_links! { +/// impl{T, U} HasListLinks<0> for ComplexListItem { self.links.inner } +/// } +/// +/// kernel::list::impl_list_arc_safe! { +/// impl{T, U} ListArcSafe<0> for ComplexListItem { untracked; } +/// } +/// +/// kernel::list::impl_list_item! { +/// impl{T, U} ListItem<0> for ComplexListItem { using ListLinks; } +/// } +/// ``` +/// +/// ``` +/// #[pin_data] +/// struct SimpleListItem { +/// value: u32, +/// #[pin] +/// links: kernel::list::ListLinksSelfPtr, +/// } +/// +/// kernel::list::impl_list_arc_safe! { +/// impl ListArcSafe<0> for SimpleListItem { untracked; } +/// } +/// +/// kernel::list::impl_has_list_links_self_ptr! { +/// impl HasSelfPtr for SimpleListItem { self.links } +/// } +/// +/// kernel::list::impl_list_item! { +/// impl ListItem<0> for SimpleListItem { using ListLinksSelfPtr; } +/// } +/// +/// struct ListLinksSelfPtrHolder { +/// inner: kernel::list::ListLinksSelfPtr>, +/// } +/// +/// #[pin_data] +/// struct ComplexListItem { +/// value: Result, +/// #[pin] +/// links: ListLinksSelfPtrHolder, +/// } +/// +/// kernel::list::impl_list_arc_safe! { +/// impl{T, U} ListArcSafe<0> for ComplexListItem { untracked; } +/// } +/// +/// kernel::list::impl_has_list_links_self_ptr! { +/// impl{T, U} HasSelfPtr> for ComplexListItem { self.links.inner } +/// } +/// +/// kernel::list::impl_list_item! { +/// impl{T, U} ListItem<0> for ComplexListItem { using ListLinksSelfPtr; } +/// } +/// ``` #[macro_export] macro_rules! impl_list_item { ( diff --git a/scripts/Makefile.build b/scripts/Makefile.build index a6461ea411f7..79c40af6f399 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -309,13 +309,14 @@ $(obj)/%.lst: $(obj)/%.c FORCE # The features in this list are the ones allowed for non-`rust/` code. # # - Stable since Rust 1.81.0: `feature(lint_reasons)`. -# - Stable since Rust 1.82.0: `feature(asm_const)`, `feature(raw_ref_op)`. +# - Stable since Rust 1.82.0: `feature(asm_const)`, +# `feature(offset_of_nested)`, `feature(raw_ref_op)`. # - Stable since Rust 1.87.0: `feature(asm_goto)`. # - Expected to become stable: `feature(arbitrary_self_types)`. # # Please see https://github.com/Rust-for-Linux/linux/issues/2 for details on # the unstable features in use. -rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,lint_reasons,raw_ref_op +rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,lint_reasons,offset_of_nested,raw_ref_op # `--out-dir` is required to avoid temporaries being created by `rustc` in the # current working directory, which may be not accessible in the out-of-tree From c77f85b347dd506ab6ef047031e75c2d03101187 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 9 Jul 2025 15:31:16 -0400 Subject: [PATCH 0757/2411] rust: list: remove OFFSET constants Replace `ListLinksSelfPtr::LIST_LINKS_SELF_PTR_OFFSET` with `unsafe fn raw_get_self_ptr` which returns a pointer to the field rather than requiring the caller to do pointer arithmetic. Implement `HasListLinks::raw_get_list_links` in `impl_has_list_links!`, narrowing the interface of `HasListLinks` and replacing pointer arithmetic with `container_of!`. Modify `impl_list_item` to also invoke `impl_has_list_links!` or `impl_has_list_links_self_ptr!`. This is necessary to allow `impl_list_item` to see more of the tokens used by `impl_has_list_links{,_self_ptr}!`. A similar API change was discussed on the hrtimer series[1]. Link: https://lore.kernel.org/all/20250224-hrtimer-v3-v6-12-rc2-v9-1-5bd3bf0ce6cc@kernel.org/ [1] Tested-by: Alice Ryhl Reviewed-by: Alice Ryhl Signed-off-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250709-list-no-offset-v4-6-a429e75840a9@gmail.com [ Fixed broken intra-doc links. Used the renamed `Opaque::cast_into`. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/list.rs | 23 ++-- rust/kernel/list/impl_list_item_mod.rs | 145 ++++++++++++------------- 2 files changed, 81 insertions(+), 87 deletions(-) diff --git a/rust/kernel/list.rs b/rust/kernel/list.rs index 7ebb81b2a3d4..44e5219cfcbc 100644 --- a/rust/kernel/list.rs +++ b/rust/kernel/list.rs @@ -57,14 +57,11 @@ /// } /// } /// -/// impl_has_list_links! { -/// impl HasListLinks<0> for BasicItem { self.links } -/// } /// impl_list_arc_safe! { /// impl ListArcSafe<0> for BasicItem { untracked; } /// } /// impl_list_item! { -/// impl ListItem<0> for BasicItem { using ListLinks; } +/// impl ListItem<0> for BasicItem { using ListLinks { self.links }; } /// } /// /// // Create a new empty list. @@ -320,9 +317,6 @@ unsafe impl Send for ListLinksSelfPtr {} unsafe impl Sync for ListLinksSelfPtr {} impl ListLinksSelfPtr { - /// The offset from the [`ListLinks`] to the self pointer field. - pub const LIST_LINKS_SELF_PTR_OFFSET: usize = core::mem::offset_of!(Self, self_ptr); - /// Creates a new initializer for this type. pub fn new() -> impl PinInit { // INVARIANT: Pin-init initializers can't be used on an existing `Arc`, so this value will @@ -337,6 +331,16 @@ pub fn new() -> impl PinInit { self_ptr: Opaque::uninit(), } } + + /// Returns a pointer to the self pointer. + /// + /// # Safety + /// + /// The provided pointer must point at a valid struct of type `Self`. + pub unsafe fn raw_get_self_ptr(me: *const Self) -> *const Opaque<*const T> { + // SAFETY: The caller promises that the pointer is valid. + unsafe { ptr::addr_of!((*me).self_ptr) } + } } impl, const ID: u64> List { @@ -711,14 +715,11 @@ fn next(&mut self) -> Option> { /// } /// } /// -/// kernel::list::impl_has_list_links! { -/// impl HasListLinks<0> for ListItem { self.links } -/// } /// kernel::list::impl_list_arc_safe! { /// impl ListArcSafe<0> for ListItem { untracked; } /// } /// kernel::list::impl_list_item! { -/// impl ListItem<0> for ListItem { using ListLinks; } +/// impl ListItem<0> for ListItem { using ListLinks { self.links }; } /// } /// /// // Use a cursor to remove the first element with the given value. diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index 374b3dd812d0..f4c91832a875 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -4,21 +4,19 @@ //! Helpers for implementing list traits safely. -/// Declares that this type has a `ListLinks` field at a fixed offset. +/// Declares that this type has a [`ListLinks`] field. /// -/// This trait is only used to help implement `ListItem` safely. If `ListItem` is implemented +/// This trait is only used to help implement [`ListItem`] safely. If [`ListItem`] is implemented /// manually, then this trait is not needed. Use the [`impl_has_list_links!`] macro to implement /// this trait. /// /// # Safety /// -/// All values of this type must have a `ListLinks` field at the given offset. +/// The methods on this trait must have exactly the behavior that the definitions given below have. /// -/// The behavior of `raw_get_list_links` must not be changed. +/// [`ListLinks`]: crate::list::ListLinks +/// [`ListItem`]: crate::list::ListItem pub unsafe trait HasListLinks { - /// The offset of the `ListLinks` field. - const OFFSET: usize; - /// Returns a pointer to the [`ListLinks`] field. /// /// # Safety @@ -26,14 +24,7 @@ pub unsafe trait HasListLinks { /// The provided pointer must point at a valid struct of type `Self`. /// /// [`ListLinks`]: crate::list::ListLinks - // We don't really need this method, but it's necessary for the implementation of - // `impl_has_list_links!` to be correct. - #[inline] - unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut crate::list::ListLinks { - // SAFETY: The caller promises that the pointer is valid. The implementer promises that the - // `OFFSET` constant is correct. - unsafe { ptr.cast::().add(Self::OFFSET).cast() } - } + unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut crate::list::ListLinks; } /// Implements the [`HasListLinks`] trait for the given type. @@ -46,14 +37,15 @@ macro_rules! impl_has_list_links { )*) => {$( // SAFETY: The implementation of `raw_get_list_links` only compiles if the field has the // right type. - // - // The behavior of `raw_get_list_links` is not changed since the `addr_of_mut!` macro is - // equivalent to the pointer offset operation in the trait definition. unsafe impl$(<$($generics)*>)? $crate::list::HasListLinks$(<$id>)? for $self { - const OFFSET: usize = ::core::mem::offset_of!(Self, $($field).*) as usize; - #[inline] unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$id>)? { + // Statically ensure that `$(.field)*` doesn't follow any pointers. + // + // Cannot be `const` because `$self` may contain generics and E0401 says constants + // "can't use {`Self`,generic parameters} from outer item". + if false { let _: usize = ::core::mem::offset_of!(Self, $($field).*); } + // SAFETY: The caller promises that the pointer is not dangling. We know that this // expression doesn't follow any pointers, as the `offset_of!` invocation above // would otherwise not compile. @@ -64,12 +56,16 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$ } pub use impl_has_list_links; -/// Declares that the `ListLinks` field in this struct is inside a `ListLinksSelfPtr`. +/// Declares that the [`ListLinks`] field in this struct is inside a +/// [`ListLinksSelfPtr`]. /// /// # Safety /// -/// The `ListLinks` field of this struct at the offset `HasListLinks::OFFSET` must be -/// inside a `ListLinksSelfPtr`. +/// The [`ListLinks`] field of this struct at [`HasListLinks::raw_get_list_links`] must be +/// inside a [`ListLinksSelfPtr`]. +/// +/// [`ListLinks`]: crate::list::ListLinks +/// [`ListLinksSelfPtr`]: crate::list::ListLinksSelfPtr pub unsafe trait HasSelfPtr where Self: HasListLinks, @@ -89,8 +85,6 @@ macro_rules! impl_has_list_links_self_ptr { unsafe impl$(<$($generics)*>)? $crate::list::HasSelfPtr<$item_type $(, $id)?> for $self {} unsafe impl$(<$($generics)*>)? $crate::list::HasListLinks$(<$id>)? for $self { - const OFFSET: usize = ::core::mem::offset_of!(Self, $($field).*) as usize; - #[inline] unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$id>)? { // SAFETY: The caller promises that the pointer is not dangling. @@ -120,16 +114,12 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$ /// links: kernel::list::ListLinks, /// } /// -/// kernel::list::impl_has_list_links! { -/// impl HasListLinks<0> for SimpleListItem { self.links } -/// } -/// /// kernel::list::impl_list_arc_safe! { /// impl ListArcSafe<0> for SimpleListItem { untracked; } /// } /// /// kernel::list::impl_list_item! { -/// impl ListItem<0> for SimpleListItem { using ListLinks; } +/// impl ListItem<0> for SimpleListItem { using ListLinks { self.links }; } /// } /// /// struct ListLinksHolder { @@ -143,16 +133,12 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$ /// links: ListLinksHolder, /// } /// -/// kernel::list::impl_has_list_links! { -/// impl{T, U} HasListLinks<0> for ComplexListItem { self.links.inner } -/// } -/// /// kernel::list::impl_list_arc_safe! { /// impl{T, U} ListArcSafe<0> for ComplexListItem { untracked; } /// } /// /// kernel::list::impl_list_item! { -/// impl{T, U} ListItem<0> for ComplexListItem { using ListLinks; } +/// impl{T, U} ListItem<0> for ComplexListItem { using ListLinks { self.links.inner }; } /// } /// ``` /// @@ -168,12 +154,8 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$ /// impl ListArcSafe<0> for SimpleListItem { untracked; } /// } /// -/// kernel::list::impl_has_list_links_self_ptr! { -/// impl HasSelfPtr for SimpleListItem { self.links } -/// } -/// /// kernel::list::impl_list_item! { -/// impl ListItem<0> for SimpleListItem { using ListLinksSelfPtr; } +/// impl ListItem<0> for SimpleListItem { using ListLinksSelfPtr { self.links }; } /// } /// /// struct ListLinksSelfPtrHolder { @@ -191,21 +173,23 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$ /// impl{T, U} ListArcSafe<0> for ComplexListItem { untracked; } /// } /// -/// kernel::list::impl_has_list_links_self_ptr! { -/// impl{T, U} HasSelfPtr> for ComplexListItem { self.links.inner } -/// } -/// /// kernel::list::impl_list_item! { -/// impl{T, U} ListItem<0> for ComplexListItem { using ListLinksSelfPtr; } +/// impl{T, U} ListItem<0> for ComplexListItem { +/// using ListLinksSelfPtr { self.links.inner }; +/// } /// } /// ``` #[macro_export] macro_rules! impl_list_item { ( $(impl$({$($generics:tt)*})? ListItem<$num:tt> for $self:ty { - using ListLinks; + using ListLinks { self$(.$field:ident)* }; })* ) => {$( + $crate::list::impl_has_list_links! { + impl$({$($generics)*})? HasListLinks<$num> for $self { self$(.$field)* } + } + // SAFETY: See GUARANTEES comment on each method. unsafe impl$(<$($generics)*>)? $crate::list::ListItem<$num> for $self { // GUARANTEES: @@ -221,20 +205,19 @@ unsafe fn view_links(me: *const Self) -> *mut $crate::list::ListLinks<$num> { } // GUARANTEES: - // * `me` originates from the most recent call to `prepare_to_insert`, which just added - // `offset` to the pointer passed to `prepare_to_insert`. This method subtracts - // `offset` from `me` so it returns the pointer originally passed to - // `prepare_to_insert`. + // * `me` originates from the most recent call to `prepare_to_insert`, which calls + // `raw_get_list_link`, which is implemented using `addr_of_mut!((*self)$(.$field)*)`. + // This method uses `container_of` to perform the inverse operation, so it returns the + // pointer originally passed to `prepare_to_insert`. // * The pointer remains valid until the next call to `post_remove` because the caller // of the most recent call to `prepare_to_insert` promised to retain ownership of the // `ListArc` containing `Self` until the next call to `post_remove`. The value cannot // be destroyed while a `ListArc` reference exists. unsafe fn view_value(me: *mut $crate::list::ListLinks<$num>) -> *const Self { - let offset = >::OFFSET; // SAFETY: `me` originates from the most recent call to `prepare_to_insert`, so it - // points at the field at offset `offset` in a value of type `Self`. Thus, - // subtracting `offset` from `me` is still in-bounds of the allocation. - unsafe { (me as *const u8).sub(offset) as *const Self } + // points at the field `$field` in a value of type `Self`. Thus, reversing that + // operation is still in-bounds of the allocation. + $crate::container_of!(me, Self, $($field).*) } // GUARANTEES: @@ -251,25 +234,28 @@ unsafe fn prepare_to_insert(me: *const Self) -> *mut $crate::list::ListLinks<$nu } // GUARANTEES: - // * `me` originates from the most recent call to `prepare_to_insert`, which just added - // `offset` to the pointer passed to `prepare_to_insert`. This method subtracts - // `offset` from `me` so it returns the pointer originally passed to - // `prepare_to_insert`. + // * `me` originates from the most recent call to `prepare_to_insert`, which calls + // `raw_get_list_link`, which is implemented using `addr_of_mut!((*self)$(.$field)*)`. + // This method uses `container_of` to perform the inverse operation, so it returns the + // pointer originally passed to `prepare_to_insert`. unsafe fn post_remove(me: *mut $crate::list::ListLinks<$num>) -> *const Self { - let offset = >::OFFSET; // SAFETY: `me` originates from the most recent call to `prepare_to_insert`, so it - // points at the field at offset `offset` in a value of type `Self`. Thus, - // subtracting `offset` from `me` is still in-bounds of the allocation. - unsafe { (me as *const u8).sub(offset) as *const Self } + // points at the field `$field` in a value of type `Self`. Thus, reversing that + // operation is still in-bounds of the allocation. + $crate::container_of!(me, Self, $($field).*) } } )*}; ( $(impl$({$($generics:tt)*})? ListItem<$num:tt> for $self:ty { - using ListLinksSelfPtr; + using ListLinksSelfPtr { self$(.$field:ident)* }; })* ) => {$( + $crate::list::impl_has_list_links_self_ptr! { + impl$({$($generics)*})? HasSelfPtr<$self> for $self { self$(.$field)* } + } + // SAFETY: See GUARANTEES comment on each method. unsafe impl$(<$($generics)*>)? $crate::list::ListItem<$num> for $self { // GUARANTEES: @@ -284,13 +270,15 @@ unsafe fn prepare_to_insert(me: *const Self) -> *mut $crate::list::ListLinks<$nu // SAFETY: The caller promises that `me` points at a valid value of type `Self`. let links_field = unsafe { >::view_links(me) }; - let spoff = $crate::list::ListLinksSelfPtr::::LIST_LINKS_SELF_PTR_OFFSET; - // Goes via the offset as the field is private. - // - // SAFETY: The constant is equal to `offset_of!(ListLinksSelfPtr, self_ptr)`, so - // the pointer stays in bounds of the allocation. - let self_ptr = unsafe { (links_field as *const u8).add(spoff) } - as *const $crate::types::Opaque<*const Self>; + let container = $crate::container_of!( + links_field, $crate::list::ListLinksSelfPtr, inner + ); + + // SAFETY: By the same reasoning above, `links_field` is a valid pointer. + let self_ptr = unsafe { + $crate::list::ListLinksSelfPtr::raw_get_self_ptr(container) + }; + let cell_inner = $crate::types::Opaque::cast_into(self_ptr); // SAFETY: This value is not accessed in any other places than `prepare_to_insert`, @@ -331,12 +319,17 @@ unsafe fn view_links(me: *const Self) -> *mut $crate::list::ListLinks<$num> { // `ListArc` containing `Self` until the next call to `post_remove`. The value cannot // be destroyed while a `ListArc` reference exists. unsafe fn view_value(links_field: *mut $crate::list::ListLinks<$num>) -> *const Self { - let spoff = $crate::list::ListLinksSelfPtr::::LIST_LINKS_SELF_PTR_OFFSET; - // SAFETY: The constant is equal to `offset_of!(ListLinksSelfPtr, self_ptr)`, so - // the pointer stays in bounds of the allocation. - let self_ptr = unsafe { (links_field as *const u8).add(spoff) } - as *const ::core::cell::UnsafeCell<*const Self>; - let cell_inner = ::core::cell::UnsafeCell::raw_get(self_ptr); + let container = $crate::container_of!( + links_field, $crate::list::ListLinksSelfPtr, inner + ); + + // SAFETY: By the same reasoning above, `links_field` is a valid pointer. + let self_ptr = unsafe { + $crate::list::ListLinksSelfPtr::raw_get_self_ptr(container) + }; + + let cell_inner = $crate::types::Opaque::cast_into(self_ptr); + // SAFETY: This is not a data race, because the only function that writes to this // value is `prepare_to_insert`, but by the safety requirements the // `prepare_to_insert` method may not be called in parallel with `view_value` or From cc84ef3b88f407e8bd5a5f7b6906d1e69851c856 Mon Sep 17 00:00:00 2001 From: Daniel Almeida Date: Mon, 14 Jul 2025 20:29:58 -0300 Subject: [PATCH 0758/2411] rust: bits: add support for bits/genmask macros In light of bindgen being unable to generate bindings for macros, and owing to the widespread use of these macros in drivers, manually define the bit and genmask C macros in Rust. The *_checked version of the functions provide runtime checking while the const version performs compile-time assertions on the arguments via the build_assert!() macro. Reviewed-by: Alice Ryhl Reviewed-by: Alexandre Courbot Signed-off-by: Daniel Almeida Reviewed-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250714-topics-tyr-genmask2-v9-1-9e6422cbadb6@collabora.com [ `expect`ed Clippy warning in doctests, hid single `use`, grouped examples. Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/bits.rs | 203 ++++++++++++++++++++++++++++++++++++++++++++ rust/kernel/lib.rs | 1 + 2 files changed, 204 insertions(+) create mode 100644 rust/kernel/bits.rs diff --git a/rust/kernel/bits.rs b/rust/kernel/bits.rs new file mode 100644 index 000000000000..553d50265883 --- /dev/null +++ b/rust/kernel/bits.rs @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Bit manipulation macros. +//! +//! C header: [`include/linux/bits.h`](srctree/include/linux/bits.h) + +use crate::prelude::*; +use core::ops::RangeInclusive; +use macros::paste; + +macro_rules! impl_bit_fn { + ( + $ty:ty + ) => { + paste! { + /// Computes `1 << n` if `n` is in bounds, i.e.: if `n` is smaller than + /// the maximum number of bits supported by the type. + /// + /// Returns [`None`] otherwise. + #[inline] + pub fn [](n: u32) -> Option<$ty> { + (1 as $ty).checked_shl(n) + } + + /// Computes `1 << n` by performing a compile-time assertion that `n` is + /// in bounds. + /// + /// This version is the default and should be used if `n` is known at + /// compile time. + #[inline] + pub const fn [](n: u32) -> $ty { + build_assert!(n < <$ty>::BITS); + (1 as $ty) << n + } + } + }; +} + +impl_bit_fn!(u64); +impl_bit_fn!(u32); +impl_bit_fn!(u16); +impl_bit_fn!(u8); + +macro_rules! impl_genmask_fn { + ( + $ty:ty, + $(#[$genmask_checked_ex:meta])*, + $(#[$genmask_ex:meta])* + ) => { + paste! { + /// Creates a contiguous bitmask for the given range by validating + /// the range at runtime. + /// + /// Returns [`None`] if the range is invalid, i.e.: if the start is + /// greater than the end or if the range is outside of the + /// representable range for the type. + $(#[$genmask_checked_ex])* + #[inline] + pub fn [](range: RangeInclusive) -> Option<$ty> { + let start = *range.start(); + let end = *range.end(); + + if start > end { + return None; + } + + let high = [](end)?; + let low = [](start)?; + Some((high | (high - 1)) & !(low - 1)) + } + + /// Creates a compile-time contiguous bitmask for the given range by + /// performing a compile-time assertion that the range is valid. + /// + /// This version is the default and should be used if the range is known + /// at compile time. + $(#[$genmask_ex])* + #[inline] + pub const fn [](range: RangeInclusive) -> $ty { + let start = *range.start(); + let end = *range.end(); + + build_assert!(start <= end); + + let high = [](end); + let low = [](start); + (high | (high - 1)) & !(low - 1) + } + } + }; +} + +impl_genmask_fn!( + u64, + /// # Examples + /// + /// ``` + /// # #![expect(clippy::reversed_empty_ranges)] + /// # use kernel::bits::genmask_checked_u64; + /// assert_eq!(genmask_checked_u64(0..=0), Some(0b1)); + /// assert_eq!(genmask_checked_u64(0..=63), Some(u64::MAX)); + /// assert_eq!(genmask_checked_u64(21..=39), Some(0x0000_00ff_ffe0_0000)); + /// + /// // `80` is out of the supported bit range. + /// assert_eq!(genmask_checked_u64(21..=80), None); + /// + /// // Invalid range where the start is bigger than the end. + /// assert_eq!(genmask_checked_u64(15..=8), None); + /// ``` + , + /// # Examples + /// + /// ``` + /// # use kernel::bits::genmask_u64; + /// assert_eq!(genmask_u64(21..=39), 0x0000_00ff_ffe0_0000); + /// assert_eq!(genmask_u64(0..=0), 0b1); + /// assert_eq!(genmask_u64(0..=63), u64::MAX); + /// ``` +); + +impl_genmask_fn!( + u32, + /// # Examples + /// + /// ``` + /// # #![expect(clippy::reversed_empty_ranges)] + /// # use kernel::bits::genmask_checked_u32; + /// assert_eq!(genmask_checked_u32(0..=0), Some(0b1)); + /// assert_eq!(genmask_checked_u32(0..=31), Some(u32::MAX)); + /// assert_eq!(genmask_checked_u32(21..=31), Some(0xffe0_0000)); + /// + /// // `40` is out of the supported bit range. + /// assert_eq!(genmask_checked_u32(21..=40), None); + /// + /// // Invalid range where the start is bigger than the end. + /// assert_eq!(genmask_checked_u32(15..=8), None); + /// ``` + , + /// # Examples + /// + /// ``` + /// # use kernel::bits::genmask_u32; + /// assert_eq!(genmask_u32(21..=31), 0xffe0_0000); + /// assert_eq!(genmask_u32(0..=0), 0b1); + /// assert_eq!(genmask_u32(0..=31), u32::MAX); + /// ``` +); + +impl_genmask_fn!( + u16, + /// # Examples + /// + /// ``` + /// # #![expect(clippy::reversed_empty_ranges)] + /// # use kernel::bits::genmask_checked_u16; + /// assert_eq!(genmask_checked_u16(0..=0), Some(0b1)); + /// assert_eq!(genmask_checked_u16(0..=15), Some(u16::MAX)); + /// assert_eq!(genmask_checked_u16(6..=15), Some(0xffc0)); + /// + /// // `20` is out of the supported bit range. + /// assert_eq!(genmask_checked_u16(6..=20), None); + /// + /// // Invalid range where the start is bigger than the end. + /// assert_eq!(genmask_checked_u16(10..=5), None); + /// ``` + , + /// # Examples + /// + /// ``` + /// # use kernel::bits::genmask_u16; + /// assert_eq!(genmask_u16(6..=15), 0xffc0); + /// assert_eq!(genmask_u16(0..=0), 0b1); + /// assert_eq!(genmask_u16(0..=15), u16::MAX); + /// ``` +); + +impl_genmask_fn!( + u8, + /// # Examples + /// + /// ``` + /// # #![expect(clippy::reversed_empty_ranges)] + /// # use kernel::bits::genmask_checked_u8; + /// assert_eq!(genmask_checked_u8(0..=0), Some(0b1)); + /// assert_eq!(genmask_checked_u8(0..=7), Some(u8::MAX)); + /// assert_eq!(genmask_checked_u8(6..=7), Some(0xc0)); + /// + /// // `10` is out of the supported bit range. + /// assert_eq!(genmask_checked_u8(6..=10), None); + /// + /// // Invalid range where the start is bigger than the end. + /// assert_eq!(genmask_checked_u8(5..=2), None); + /// ``` + , + /// # Examples + /// + /// ``` + /// # use kernel::bits::genmask_u8; + /// assert_eq!(genmask_u8(6..=7), 0xc0); + /// assert_eq!(genmask_u8(0..=0), 0b1); + /// assert_eq!(genmask_u8(0..=7), u8::MAX); + /// ``` +); diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index f61ac6f81f5d..38c07f35073b 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -54,6 +54,7 @@ pub mod alloc; #[cfg(CONFIG_AUXILIARY_BUS)] pub mod auxiliary; +pub mod bits; #[cfg(CONFIG_BLOCK)] pub mod block; #[doc(hidden)] From 35c18f2933c596b4fd6a98baee36f3137d133a5f Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 12 Jun 2025 12:13:21 +0200 Subject: [PATCH 0759/2411] Add a new optional ",cma" suffix to the crashkernel= command line option Patch series "kdump: crashkernel reservation from CMA", v5. This series implements a way to reserve additional crash kernel memory using CMA. Currently, all the memory for the crash kernel is not usable by the 1st (production) kernel. It is also unmapped so that it can't be corrupted by the fault that will eventually trigger the crash. This makes sense for the memory actually used by the kexec-loaded crash kernel image and initrd and the data prepared during the load (vmcoreinfo, ...). However, the reserved space needs to be much larger than that to provide enough run-time memory for the crash kernel and the kdump userspace. Estimating the amount of memory to reserve is difficult. Being too careful makes kdump likely to end in OOM, being too generous takes even more memory from the production system. Also, the reservation only allows reserving a single contiguous block (or two with the "low" suffix). I've seen systems where this fails because the physical memory is fragmented. By reserving additional crashkernel memory from CMA, the main crashkernel reservation can be just large enough to fit the kernel and initrd image, minimizing the memory taken away from the production system. Most of the run-time memory for the crash kernel will be memory previously available to userspace in the production system. As this memory is no longer wasted, the reservation can be done with a generous margin, making kdump more reliable. Kernel memory that we need to preserve for dumping is normally not allocated from CMA, unless it is explicitly allocated as movable. Currently this is only the case for memory ballooning and zswap. Such movable memory will be missing from the vmcore. User data is typically not dumped by makedumpfile. When dumping of user data is intended this new CMA reservation cannot be used. There are five patches in this series: The first adds a new ",cma" suffix to the recenly introduced generic crashkernel parsing code. parse_crashkernel() takes one more argument to store the cma reservation size. The second patch implements reserve_crashkernel_cma() which performs the reservation. If the requested size is not available in a single range, multiple smaller ranges will be reserved. The third patch updates Documentation/, explicitly mentioning the potential DMA corruption of the CMA-reserved memory. The fourth patch adds a short delay before booting the kdump kernel, allowing pending DMA transfers to finish. The fifth patch enables the functionality for x86 as a proof of concept. There are just three things every arch needs to do: - call reserve_crashkernel_cma() - include the CMA-reserved ranges in the physical memory map - exclude the CMA-reserved ranges from the memory available through /proc/vmcore by excluding them from the vmcoreinfo PT_LOAD ranges. Adding other architectures is easy and I can do that as soon as this series is merged. With this series applied, specifying crashkernel=100M craskhernel=1G,cma on the command line will make a standard crashkernel reservation of 100M, where kexec will load the kernel and initrd. An additional 1G will be reserved from CMA, still usable by the production system. The crash kernel will have 1.1G memory available. The 100M can be reliably predicted based on the size of the kernel and initrd. The new cma suffix is completely optional. When no crashkernel=size,cma is specified, everything works as before. This patch (of 5): Add a new cma_size parameter to parse_crashkernel(). When not NULL, call __parse_crashkernel to parse the CMA reservation size from "crashkernel=size,cma" and store it in cma_size. Set cma_size to NULL in all calls to parse_crashkernel(). Link: https://lkml.kernel.org/r/aEqnxxfLZMllMC8I@dwarf.suse.cz Link: https://lkml.kernel.org/r/aEqoQckgoTQNULnh@dwarf.suse.cz Signed-off-by: Jiri Bohac Cc: Baoquan He Cc: Dave Young Cc: Donald Dutile Cc: Michal Hocko Cc: Philipp Rudo Cc: Pingfan Liu Cc: Tao Liu Cc: Vivek Goyal Cc: David Hildenbrand Signed-off-by: Andrew Morton --- arch/arm/kernel/setup.c | 2 +- arch/arm64/mm/init.c | 2 +- arch/loongarch/kernel/setup.c | 2 +- arch/mips/kernel/setup.c | 2 +- arch/powerpc/kernel/fadump.c | 2 +- arch/powerpc/kexec/core.c | 2 +- arch/powerpc/mm/nohash/kaslr_booke.c | 2 +- arch/riscv/mm/init.c | 2 +- arch/s390/kernel/setup.c | 2 +- arch/sh/kernel/machine_kexec.c | 2 +- arch/x86/kernel/setup.c | 2 +- include/linux/crash_reserve.h | 3 ++- kernel/crash_reserve.c | 16 ++++++++++++++-- 13 files changed, 27 insertions(+), 14 deletions(-) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index a41c93988d2c..0bfd66c7ada0 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1004,7 +1004,7 @@ static void __init reserve_crashkernel(void) total_mem = get_total_mem(); ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base, - NULL, NULL); + NULL, NULL, NULL); /* invalid value specified or crashkernel=0 */ if (ret || !crash_size) return; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 0c8c35dd645e..ea84a61ed508 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -106,7 +106,7 @@ static void __init arch_reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, - &low_size, &high); + &low_size, NULL, &high); if (ret) return; diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index b99fbb388fe0..22b27cd447a1 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -265,7 +265,7 @@ static void __init arch_reserve_crashkernel(void) return; ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), - &crash_size, &crash_base, &low_size, &high); + &crash_size, &crash_base, &low_size, NULL, &high); if (ret) return; diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index fbfe0771317e..11b9b6b63e19 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -458,7 +458,7 @@ static void __init mips_parse_crashkernel(void) total_mem = memblock_phys_mem_size(); ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base, - NULL, NULL); + NULL, NULL, NULL); if (ret != 0 || crash_size <= 0) return; diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 8ca49e40c473..28cab25d5b33 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -333,7 +333,7 @@ static __init u64 fadump_calculate_reserve_size(void) * memory at a predefined offset. */ ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), - &size, &base, NULL, NULL); + &size, &base, NULL, NULL, NULL); if (ret == 0 && size > 0) { unsigned long max_size; diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 00e9c267b912..d1a2d755381c 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -110,7 +110,7 @@ void __init arch_reserve_crashkernel(void) /* use common parsing */ ret = parse_crashkernel(boot_command_line, total_mem_sz, &crash_size, - &crash_base, NULL, NULL); + &crash_base, NULL, NULL, NULL); if (ret) return; diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 5c8d1bb98b3e..5e4897daaaea 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -178,7 +178,7 @@ static void __init get_crash_kernel(void *fdt, unsigned long size) int ret; ret = parse_crashkernel(boot_command_line, size, &crash_size, - &crash_base, NULL, NULL); + &crash_base, NULL, NULL, NULL); if (ret != 0 || crash_size == 0) return; if (crash_base == 0) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 8d0374d7ce8e..15683ae13fa5 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1408,7 +1408,7 @@ static void __init arch_reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, - &low_size, &high); + &low_size, NULL, &high); if (ret) return; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f244c5560e7f..b99aeb0db2ee 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -605,7 +605,7 @@ static void __init reserve_crashkernel(void) int rc; rc = parse_crashkernel(boot_command_line, ident_map_size, - &crash_size, &crash_base, NULL, NULL); + &crash_size, &crash_base, NULL, NULL, NULL); crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c index 8321b31d2e19..37073ca1e0ad 100644 --- a/arch/sh/kernel/machine_kexec.c +++ b/arch/sh/kernel/machine_kexec.c @@ -146,7 +146,7 @@ void __init reserve_crashkernel(void) return; ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), - &crash_size, &crash_base, NULL, NULL); + &crash_size, &crash_base, NULL, NULL, NULL); if (ret == 0 && crash_size > 0) { crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index fb27be697128..c22dc630c297 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -608,7 +608,7 @@ static void __init arch_reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, - &low_size, &high); + &low_size, NULL, &high); if (ret) return; diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h index 1fe7e7d1b214..e784aaff2f5a 100644 --- a/include/linux/crash_reserve.h +++ b/include/linux/crash_reserve.h @@ -16,7 +16,8 @@ extern struct resource crashk_low_res; int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, - unsigned long long *low_size, bool *high); + unsigned long long *low_size, unsigned long long *cma_size, + bool *high); #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION #ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c index acb6bf42e30d..86ae1365d04e 100644 --- a/kernel/crash_reserve.c +++ b/kernel/crash_reserve.c @@ -172,17 +172,19 @@ static int __init parse_crashkernel_simple(char *cmdline, #define SUFFIX_HIGH 0 #define SUFFIX_LOW 1 -#define SUFFIX_NULL 2 +#define SUFFIX_CMA 2 +#define SUFFIX_NULL 3 static __initdata char *suffix_tbl[] = { [SUFFIX_HIGH] = ",high", [SUFFIX_LOW] = ",low", + [SUFFIX_CMA] = ",cma", [SUFFIX_NULL] = NULL, }; /* * That function parses "suffix" crashkernel command lines like * - * crashkernel=size,[high|low] + * crashkernel=size,[high|low|cma] * * It returns 0 on success and -EINVAL on failure. */ @@ -298,9 +300,11 @@ int __init parse_crashkernel(char *cmdline, unsigned long long *crash_size, unsigned long long *crash_base, unsigned long long *low_size, + unsigned long long *cma_size, bool *high) { int ret; + unsigned long long __always_unused cma_base; /* crashkernel=X[@offset] */ ret = __parse_crashkernel(cmdline, system_ram, crash_size, @@ -331,6 +335,14 @@ int __init parse_crashkernel(char *cmdline, *high = true; } + + /* + * optional CMA reservation + * cma_base is ignored + */ + if (cma_size) + __parse_crashkernel(cmdline, 0, cma_size, + &cma_base, suffix_tbl[SUFFIX_CMA]); #endif if (!*crash_size) ret = -EINVAL; From ab475510e0422bb5672d465f9d0f523d72fdb7f1 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 12 Jun 2025 12:16:39 +0200 Subject: [PATCH 0760/2411] kdump: implement reserve_crashkernel_cma reserve_crashkernel_cma() reserves CMA ranges for the crash kernel. If allocating the requested size fails, try to reserve in smaller blocks. Store the reserved ranges in the crashk_cma_ranges array and the number of ranges in crashk_cma_cnt. Link: https://lkml.kernel.org/r/aEqpBwOy_ekm0gw9@dwarf.suse.cz Signed-off-by: Jiri Bohac Cc: Baoquan He Cc: Dave Young Cc: David Hildenbrand Cc: Donald Dutile Cc: Michal Hocko Cc: Philipp Rudo Cc: Pingfan Liu Cc: Tao Liu Cc: Vivek Goyal Signed-off-by: Andrew Morton --- include/linux/crash_reserve.h | 12 ++++++++ kernel/crash_reserve.c | 52 +++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h index e784aaff2f5a..7b44b41d0a20 100644 --- a/include/linux/crash_reserve.h +++ b/include/linux/crash_reserve.h @@ -13,12 +13,24 @@ */ extern struct resource crashk_res; extern struct resource crashk_low_res; +extern struct range crashk_cma_ranges[]; +#if defined(CONFIG_CMA) && defined(CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION) +#define CRASHKERNEL_CMA +#define CRASHKERNEL_CMA_RANGES_MAX 4 +extern int crashk_cma_cnt; +#else +#define crashk_cma_cnt 0 +#define CRASHKERNEL_CMA_RANGES_MAX 0 +#endif + int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, unsigned long long *low_size, unsigned long long *cma_size, bool *high); +void __init reserve_crashkernel_cma(unsigned long long cma_size); + #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION #ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE #define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c index 86ae1365d04e..87bf4d41eabb 100644 --- a/kernel/crash_reserve.c +++ b/kernel/crash_reserve.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include @@ -469,6 +471,56 @@ void __init reserve_crashkernel_generic(unsigned long long crash_size, #endif } +struct range crashk_cma_ranges[CRASHKERNEL_CMA_RANGES_MAX]; +#ifdef CRASHKERNEL_CMA +int crashk_cma_cnt; +void __init reserve_crashkernel_cma(unsigned long long cma_size) +{ + unsigned long long request_size = roundup(cma_size, PAGE_SIZE); + unsigned long long reserved_size = 0; + + if (!cma_size) + return; + + while (cma_size > reserved_size && + crashk_cma_cnt < CRASHKERNEL_CMA_RANGES_MAX) { + + struct cma *res; + + if (cma_declare_contiguous(0, request_size, 0, 0, 0, false, + "crashkernel", &res)) { + /* reservation failed, try half-sized blocks */ + if (request_size <= PAGE_SIZE) + break; + + request_size = roundup(request_size / 2, PAGE_SIZE); + continue; + } + + crashk_cma_ranges[crashk_cma_cnt].start = cma_get_base(res); + crashk_cma_ranges[crashk_cma_cnt].end = + crashk_cma_ranges[crashk_cma_cnt].start + + cma_get_size(res) - 1; + ++crashk_cma_cnt; + reserved_size += request_size; + } + + if (cma_size > reserved_size) + pr_warn("crashkernel CMA reservation failed: %lld MB requested, %lld MB reserved in %d ranges\n", + cma_size >> 20, reserved_size >> 20, crashk_cma_cnt); + else + pr_info("crashkernel CMA reserved: %lld MB in %d ranges\n", + reserved_size >> 20, crashk_cma_cnt); +} + +#else /* CRASHKERNEL_CMA */ +void __init reserve_crashkernel_cma(unsigned long long cma_size) +{ + if (cma_size) + pr_warn("crashkernel CMA reservation not supported\n"); +} +#endif + #ifndef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY static __init int insert_crashkernel_resources(void) { From ce1bf19a34dfa1f418037cebe11f5d2c7adf9d1e Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 12 Jun 2025 12:17:39 +0200 Subject: [PATCH 0761/2411] kdump, documentation: describe craskernel CMA reservation Describe the new crashkernel ",cma" suffix in Documentation/ Link: https://lkml.kernel.org/r/aEqpQwUy6gqSiUkV@dwarf.suse.cz Signed-off-by: Jiri Bohac Cc: Baoquan He Cc: Dave Young Cc: David Hildenbrand Cc: Donald Dutile Cc: Michal Hocko Cc: Philipp Rudo Cc: Pingfan Liu Cc: Tao Liu Cc: Vivek Goyal Signed-off-by: Andrew Morton --- Documentation/admin-guide/kdump/kdump.rst | 21 ++++++++++++++++++ .../admin-guide/kernel-parameters.txt | 22 +++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index 20fabdf6567e..9c6cd52f69cf 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -311,6 +311,27 @@ crashkernel syntax crashkernel=0,low +4) crashkernel=size,cma + + Reserve additional crash kernel memory from CMA. This reservation is + usable by the first system's userspace memory and kernel movable + allocations (memory balloon, zswap). Pages allocated from this memory + range will not be included in the vmcore so this should not be used if + dumping of userspace memory is intended and it has to be expected that + some movable kernel pages may be missing from the dump. + + A standard crashkernel reservation, as described above, is still needed + to hold the crash kernel and initrd. + + This option increases the risk of a kdump failure: DMA transfers + configured by the first kernel may end up corrupting the second + kernel's memory. + + This reservation method is intended for systems that can't afford to + sacrifice enough memory for standard crashkernel reservation and where + less reliable and possibly incomplete kdump is preferable to no kdump at + all. + Boot into System Kernel ----------------------- 1) Update the boot loader (such as grub, yaboot, or lilo) configuration diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f1f2c0874da9..ac4a239b9388 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -986,6 +986,28 @@ 0: to disable low allocation. It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. + crashkernel=size[KMG],cma + [KNL, X86] Reserve additional crash kernel memory from + CMA. This reservation is usable by the first system's + userspace memory and kernel movable allocations (memory + balloon, zswap). Pages allocated from this memory range + will not be included in the vmcore so this should not + be used if dumping of userspace memory is intended and + it has to be expected that some movable kernel pages + may be missing from the dump. + + A standard crashkernel reservation, as described above, + is still needed to hold the crash kernel and initrd. + + This option increases the risk of a kdump failure: DMA + transfers configured by the first kernel may end up + corrupting the second kernel's memory. + + This reservation method is intended for systems that + can't afford to sacrifice enough memory for standard + crashkernel reservation and where less reliable and + possibly incomplete kdump is preferable to no kdump at + all. cryptomgr.notests [KNL] Disable crypto self-tests From e1280f3071f11abc1bacd84937ecf077dce449f3 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 12 Jun 2025 12:18:40 +0200 Subject: [PATCH 0762/2411] kdump: wait for DMA to finish when using CMA When re-using the CMA area for kdump there is a risk of pending DMA into pinned user pages in the CMA area. Pages residing in CMA areas can usually not get long-term pinned and are instead migrated away from the CMA area, so long-term pinning is typically not a concern. (BUGs in the kernel might still lead to long-term pinning of such pages if everything goes wrong.) Pages pinned without FOLL_LONGTERM remain in the CMA and may possibly be the source or destination of a pending DMA transfer. Although there is no clear specification how long a page may be pinned without FOLL_LONGTERM, pinning without the flag shows an intent of the caller to only use the memory for short-lived DMA transfers, not a transfer initiated by a device asynchronously at a random time in the future. Add a delay of CMA_DMA_TIMEOUT_SEC seconds before starting the kdump kernel, giving such short-lived DMA transfers time to finish before the CMA memory is re-used by the kdump kernel. Set CMA_DMA_TIMEOUT_SEC to 10 seconds - chosen arbitrarily as both a huge margin for a DMA transfer, yet not increasing the kdump time too significantly. Link: https://lkml.kernel.org/r/aEqpgDIBndZ5LXSo@dwarf.suse.cz Signed-off-by: Jiri Bohac Acked-by: David Hildenbrand Cc: Baoquan He Cc: Dave Young Cc: Donald Dutile Cc: Michal Hocko Cc: Philipp Rudo Cc: Pingfan Liu Cc: Tao Liu Cc: Vivek Goyal Signed-off-by: Andrew Morton --- kernel/crash_core.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 335b8425dd4b..a4ef79591eb2 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -33,6 +34,11 @@ /* Per cpu memory for storing cpu states in case of system crash. */ note_buf_t __percpu *crash_notes; +/* time to wait for possible DMA to finish before starting the kdump kernel + * when a CMA reservation is used + */ +#define CMA_DMA_TIMEOUT_SEC 10 + #ifdef CONFIG_CRASH_DUMP int kimage_crash_copy_vmcoreinfo(struct kimage *image) @@ -97,6 +103,14 @@ int kexec_crash_loaded(void) } EXPORT_SYMBOL_GPL(kexec_crash_loaded); +static void crash_cma_clear_pending_dma(void) +{ + if (!crashk_cma_cnt) + return; + + mdelay(CMA_DMA_TIMEOUT_SEC * 1000); +} + /* * No panic_cpu check version of crash_kexec(). This function is called * only when panic_cpu holds the current CPU number; this is the only CPU @@ -119,6 +133,7 @@ void __noclone __crash_kexec(struct pt_regs *regs) crash_setup_regs(&fixed_regs, regs); crash_save_vmcoreinfo(); machine_crash_shutdown(&fixed_regs); + crash_cma_clear_pending_dma(); machine_kexec(kexec_crash_image); } kexec_unlock(); From bf8be1c3610829056e5445282ca92ca7b7a4ba7b Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 12 Jun 2025 12:20:04 +0200 Subject: [PATCH 0763/2411] x86: implement crashkernel cma reservation Implement the crashkernel CMA reservation for x86: - enable parsing of the cma suffix by parse_crashkernel() - reserve memory with reserve_crashkernel_cma() - add the CMA-reserved ranges to the e820 map for the crash kernel - exclude the CMA-reserved ranges from vmcore Link: https://lkml.kernel.org/r/aEqp1LD2og4QeBw9@dwarf.suse.cz Signed-off-by: Jiri Bohac Cc: Baoquan He Cc: Dave Young Cc: David Hildenbrand Cc: Donald Dutile Cc: Michal Hocko Cc: Philipp Rudo Cc: Pingfan Liu Cc: Tao Liu Cc: Vivek Goyal Signed-off-by: Andrew Morton --- arch/x86/kernel/crash.c | 26 ++++++++++++++++++++++---- arch/x86/kernel/setup.c | 5 +++-- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index bcb534688dfe..c6b12bed173d 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -163,10 +163,10 @@ static struct crash_mem *fill_up_crash_elf_data(void) return NULL; /* - * Exclusion of crash region and/or crashk_low_res may cause - * another range split. So add extra two slots here. + * Exclusion of crash region, crashk_low_res and/or crashk_cma_ranges + * may cause range splits. So add extra slots here. */ - nr_ranges += 2; + nr_ranges += 2 + crashk_cma_cnt; cmem = vzalloc(struct_size(cmem, ranges, nr_ranges)); if (!cmem) return NULL; @@ -184,6 +184,7 @@ static struct crash_mem *fill_up_crash_elf_data(void) static int elf_header_exclude_ranges(struct crash_mem *cmem) { int ret = 0; + int i; /* Exclude the low 1M because it is always reserved */ ret = crash_exclude_mem_range(cmem, 0, SZ_1M - 1); @@ -198,8 +199,17 @@ static int elf_header_exclude_ranges(struct crash_mem *cmem) if (crashk_low_res.end) ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); + if (ret) + return ret; - return ret; + for (i = 0; i < crashk_cma_cnt; ++i) { + ret = crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start, + crashk_cma_ranges[i].end); + if (ret) + return ret; + } + + return 0; } static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) @@ -374,6 +384,14 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) add_e820_entry(params, &ei); } + for (i = 0; i < crashk_cma_cnt; ++i) { + ei.addr = crashk_cma_ranges[i].start; + ei.size = crashk_cma_ranges[i].end - + crashk_cma_ranges[i].start + 1; + ei.type = E820_TYPE_RAM; + add_e820_entry(params, &ei); + } + out: vfree(cmem); return ret; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c22dc630c297..680d1b6dfea4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -599,7 +599,7 @@ static void __init memblock_x86_reserve_range_setup_data(void) static void __init arch_reserve_crashkernel(void) { - unsigned long long crash_base, crash_size, low_size = 0; + unsigned long long crash_base, crash_size, low_size = 0, cma_size = 0; bool high = false; int ret; @@ -608,7 +608,7 @@ static void __init arch_reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, - &low_size, NULL, &high); + &low_size, &cma_size, &high); if (ret) return; @@ -618,6 +618,7 @@ static void __init arch_reserve_crashkernel(void) } reserve_crashkernel_generic(crash_size, crash_base, low_size, high); + reserve_crashkernel_cma(cma_size); } static struct resource standard_io_resources[] = { From 261743b0135d1d578cab407ba0cf226df30b43d8 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 3 Jul 2025 10:10:00 +0800 Subject: [PATCH 0764/2411] panic: clean up code for console replay Patch series "generalize panic_print's dump function to be used by other kernel parts", v3. When working on kernel stability issues, panic, task-hung and software/hardware lockup are frequently met. And to debug them, user may need lots of system information at that time, like task call stacks, lock info, memory info etc. panic case already has panic_print_sys_info() for this purpose, and has a 'panic_print' bitmask to control what kinds of information is needed, which is also helpful to debug other task-hung and lockup cases. So this patchset extracts the function out to a new file 'lib/sys_info.c', and makes it available for other cases which also need to dump system info for debugging. Also as suggested by Petr Mladek, add 'panic_sys_info=' interface to take human readable string like "tasks,mem,locks,timers,ftrace,....", and eventually obsolete the current 'panic_print' bitmap interface. In RFC and V1 version, hung_task and SW/HW watchdog modules are enabled with the new sys_info dump interface. In v2, they are kept out for better review of current change, and will be posted later. Locally these have been used in our bug chasing for stability issues and was proven helpful. Many thanks to Petr Mladek for great suggestions on both the code and architectures! This patch (of 5): Currently the panic_print_sys_info() was called twice with different parameters to handle console replay case, which is kind of confusing. Add panic_console_replay() explicitly and rename 'PANIC_PRINT_ALL_PRINTK_MSG' to 'PANIC_CONSOLE_REPLAY', to make the code straightforward. The related kernel document is also updated. Link: https://lkml.kernel.org/r/20250703021004.42328-1-feng.tang@linux.alibaba.com Link: https://lkml.kernel.org/r/20250703021004.42328-2-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang Suggested-by: Petr Mladek Reviewed-by: Petr Mladek Cc: John Ogness Cc: Jonathan Corbet Cc: Lance Yang Cc: "Paul E . McKenney" Cc: Steven Rostedt Cc: Nathan Chancellor Signed-off-by: Andrew Morton --- .../admin-guide/kernel-parameters.txt | 2 +- Documentation/admin-guide/sysctl/kernel.rst | 2 +- kernel/panic.c | 18 +++++++++--------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ac4a239b9388..3780b7e6bfd5 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4555,7 +4555,7 @@ bit 2: print timer info bit 3: print locks info if CONFIG_LOCKDEP is on bit 4: print ftrace buffer - bit 5: print all printk messages in buffer + bit 5: replay all messages on consoles at the end of panic bit 6: print all CPUs backtrace (if available in the arch) bit 7: print only tasks in uninterruptible (blocked) state *Be aware* that this option may print a _lot_ of lines, diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index dd49a89a62d3..0d08b7a2db2d 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -889,7 +889,7 @@ bit 1 print system memory info bit 2 print timer info bit 3 print locks info if ``CONFIG_LOCKDEP`` is on bit 4 print ftrace buffer -bit 5 print all printk messages in buffer +bit 5 replay all messages on consoles at the end of panic bit 6 print all CPUs backtrace (if available in the arch) bit 7 print only tasks in uninterruptible (blocked) state ===== ============================================ diff --git a/kernel/panic.c b/kernel/panic.c index b0b9a8bf4560..9b6c5dc28a65 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -74,7 +74,7 @@ EXPORT_SYMBOL_GPL(panic_timeout); #define PANIC_PRINT_TIMER_INFO 0x00000004 #define PANIC_PRINT_LOCK_INFO 0x00000008 #define PANIC_PRINT_FTRACE_INFO 0x00000010 -#define PANIC_PRINT_ALL_PRINTK_MSG 0x00000020 +#define PANIC_CONSOLE_REPLAY 0x00000020 #define PANIC_PRINT_ALL_CPU_BT 0x00000040 #define PANIC_PRINT_BLOCKED_TASKS 0x00000080 unsigned long panic_print; @@ -238,14 +238,14 @@ void nmi_panic(struct pt_regs *regs, const char *msg) } EXPORT_SYMBOL(nmi_panic); -static void panic_print_sys_info(bool console_flush) +static void panic_console_replay(void) { - if (console_flush) { - if (panic_print & PANIC_PRINT_ALL_PRINTK_MSG) - console_flush_on_panic(CONSOLE_REPLAY_ALL); - return; - } + if (panic_print & PANIC_CONSOLE_REPLAY) + console_flush_on_panic(CONSOLE_REPLAY_ALL); +} +static void panic_print_sys_info(void) +{ if (panic_print & PANIC_PRINT_TASK_INFO) show_state(); @@ -410,7 +410,7 @@ void panic(const char *fmt, ...) */ atomic_notifier_call_chain(&panic_notifier_list, 0, buf); - panic_print_sys_info(false); + panic_print_sys_info(); kmsg_dump_desc(KMSG_DUMP_PANIC, buf); @@ -439,7 +439,7 @@ void panic(const char *fmt, ...) debug_locks_off(); console_flush_on_panic(CONSOLE_FLUSH_PENDING); - panic_print_sys_info(true); + panic_console_replay(); if (!panic_blink) panic_blink = no_blink; From b76e89e50fc3693b7b8a443ed906320d8ccb93fd Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 3 Jul 2025 10:10:01 +0800 Subject: [PATCH 0765/2411] panic: generalize panic_print's function to show sys info 'panic_print' was introduced to help debugging kernel panic by dumping different kinds of system information like tasks' call stack, memory, ftrace buffer, etc. Actually this function could also be used to help debugging other cases like task-hung, soft/hard lockup, etc. where user may need the snapshot of system info at that time. Extract system info dump function related code from panic.c to separate file sys_info.[ch], for wider usage by other kernel parts for debugging. Also modify the macro names about singulars/plurals. Link: https://lkml.kernel.org/r/20250703021004.42328-3-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang Suggested-by: Petr Mladek Cc: John Ogness Cc: Jonathan Corbet Cc: Lance Yang Cc: "Paul E . McKenney" Cc: Steven Rostedt Cc: Nathan Chancellor Signed-off-by: Andrew Morton --- include/linux/sys_info.h | 20 ++++++++++++++++++++ kernel/panic.c | 36 ++++-------------------------------- lib/Makefile | 2 +- lib/sys_info.c | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 57 insertions(+), 33 deletions(-) create mode 100644 include/linux/sys_info.h create mode 100644 lib/sys_info.c diff --git a/include/linux/sys_info.h b/include/linux/sys_info.h new file mode 100644 index 000000000000..53b7e27dbf2a --- /dev/null +++ b/include/linux/sys_info.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SYS_INFO_H +#define _LINUX_SYS_INFO_H + +/* + * SYS_INFO_PANIC_CONSOLE_REPLAY is for panic case only, as it needs special + * handling which only fits panic case. + */ +#define SYS_INFO_TASKS 0x00000001 +#define SYS_INFO_MEM 0x00000002 +#define SYS_INFO_TIMERS 0x00000004 +#define SYS_INFO_LOCKS 0x00000008 +#define SYS_INFO_FTRACE 0x00000010 +#define SYS_INFO_PANIC_CONSOLE_REPLAY 0x00000020 +#define SYS_INFO_ALL_CPU_BT 0x00000040 +#define SYS_INFO_BLOCKED_TASKS 0x00000080 + +void sys_info(unsigned long si_mask); + +#endif /* _LINUX_SYS_INFO_H */ diff --git a/kernel/panic.c b/kernel/panic.c index 9b6c5dc28a65..cbb0681177b3 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -69,14 +70,6 @@ bool panic_triggering_all_cpu_backtrace; int panic_timeout = CONFIG_PANIC_TIMEOUT; EXPORT_SYMBOL_GPL(panic_timeout); -#define PANIC_PRINT_TASK_INFO 0x00000001 -#define PANIC_PRINT_MEM_INFO 0x00000002 -#define PANIC_PRINT_TIMER_INFO 0x00000004 -#define PANIC_PRINT_LOCK_INFO 0x00000008 -#define PANIC_PRINT_FTRACE_INFO 0x00000010 -#define PANIC_CONSOLE_REPLAY 0x00000020 -#define PANIC_PRINT_ALL_CPU_BT 0x00000040 -#define PANIC_PRINT_BLOCKED_TASKS 0x00000080 unsigned long panic_print; ATOMIC_NOTIFIER_HEAD(panic_notifier_list); @@ -240,31 +233,10 @@ EXPORT_SYMBOL(nmi_panic); static void panic_console_replay(void) { - if (panic_print & PANIC_CONSOLE_REPLAY) + if (panic_print & SYS_INFO_PANIC_CONSOLE_REPLAY) console_flush_on_panic(CONSOLE_REPLAY_ALL); } -static void panic_print_sys_info(void) -{ - if (panic_print & PANIC_PRINT_TASK_INFO) - show_state(); - - if (panic_print & PANIC_PRINT_MEM_INFO) - show_mem(); - - if (panic_print & PANIC_PRINT_TIMER_INFO) - sysrq_timer_list_show(); - - if (panic_print & PANIC_PRINT_LOCK_INFO) - debug_show_all_locks(); - - if (panic_print & PANIC_PRINT_FTRACE_INFO) - ftrace_dump(DUMP_ALL); - - if (panic_print & PANIC_PRINT_BLOCKED_TASKS) - show_state_filter(TASK_UNINTERRUPTIBLE); -} - void check_panic_on_warn(const char *origin) { unsigned int limit; @@ -285,7 +257,7 @@ void check_panic_on_warn(const char *origin) */ static void panic_other_cpus_shutdown(bool crash_kexec) { - if (panic_print & PANIC_PRINT_ALL_CPU_BT) { + if (panic_print & SYS_INFO_ALL_CPU_BT) { /* Temporary allow non-panic CPUs to write their backtraces. */ panic_triggering_all_cpu_backtrace = true; trigger_all_cpu_backtrace(); @@ -410,7 +382,7 @@ void panic(const char *fmt, ...) */ atomic_notifier_call_chain(&panic_notifier_list, 0, buf); - panic_print_sys_info(); + sys_info(panic_print); kmsg_dump_desc(KMSG_DUMP_PANIC, buf); diff --git a/lib/Makefile b/lib/Makefile index c38582f187dd..88d6228089a8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -40,7 +40,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ nmi_backtrace.o win_minmax.o memcat_p.o \ - buildid.o objpool.o iomem_copy.o + buildid.o objpool.o iomem_copy.o sys_info.o lib-$(CONFIG_UNION_FIND) += union_find.o lib-$(CONFIG_PRINTK) += dump_stack.o diff --git a/lib/sys_info.c b/lib/sys_info.c new file mode 100644 index 000000000000..53031e5cb98e --- /dev/null +++ b/lib/sys_info.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include + +#include + +void sys_info(unsigned long si_mask) +{ + if (si_mask & SYS_INFO_TASKS) + show_state(); + + if (si_mask & SYS_INFO_MEM) + show_mem(); + + if (si_mask & SYS_INFO_TIMERS) + sysrq_timer_list_show(); + + if (si_mask & SYS_INFO_LOCKS) + debug_show_all_locks(); + + if (si_mask & SYS_INFO_FTRACE) + ftrace_dump(DUMP_ALL); + + if (si_mask & SYS_INFO_ALL_CPU_BT) + trigger_all_cpu_backtrace(); + + if (si_mask & SYS_INFO_BLOCKED_TASKS) + show_state_filter(TASK_UNINTERRUPTIBLE); +} From d747755917bf8ae08f490c3fe7d8e321afab8127 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 3 Jul 2025 10:10:02 +0800 Subject: [PATCH 0766/2411] panic: add 'panic_sys_info' sysctl to take human readable string parameter Bitmap definition for 'panic_print' is hard to remember and decode. Add 'panic_sys_info='sysctl to take human readable string like "tasks,mem,timers,locks,ftrace,..." and translate it into bitmap. The detailed mapping is: SYS_INFO_TASKS "tasks" SYS_INFO_MEM "mem" SYS_INFO_TIMERS "timers" SYS_INFO_LOCKS "locks" SYS_INFO_FTRACE "ftrace" SYS_INFO_ALL_CPU_BT "all_bt" SYS_INFO_BLOCKED_TASKS "blocked_tasks" [nathan@kernel.org: add __maybe_unused to sys_info_avail] Link: https://lkml.kernel.org/r/20250708-fix-clang-sys_info_avail-warning-v1-1-60d239eacd64@kernel.org Link: https://lkml.kernel.org/r/20250703021004.42328-4-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang Suggested-by: Petr Mladek Cc: John Ogness Cc: Jonathan Corbet Cc: Lance Yang Cc: "Paul E . McKenney" Cc: Steven Rostedt Cc: Nathan Chancellor Cc: Andy Shevchenko Signed-off-by: Andrew Morton --- Documentation/admin-guide/sysctl/kernel.rst | 18 +++++ include/linux/sys_info.h | 8 ++ kernel/panic.c | 7 ++ lib/sys_info.c | 90 +++++++++++++++++++++ 4 files changed, 123 insertions(+) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 0d08b7a2db2d..cccb06d1a6bf 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -899,6 +899,24 @@ So for example to print tasks and memory info on panic, user can:: echo 3 > /proc/sys/kernel/panic_print +panic_sys_info +============== + +A comma separated list of extra information to be dumped on panic, +for example, "tasks,mem,timers,...". It is a human readable alternative +to 'panic_print'. Possible values are: + +============= =================================================== +tasks print all tasks info +mem print system memory info +timer print timers info +lock print locks info if CONFIG_LOCKDEP is on +ftrace print ftrace buffer +all_bt print all CPUs backtrace (if available in the arch) +blocked_tasks print only tasks in uninterruptible (blocked) state +============= =================================================== + + panic_on_rcu_stall ================== diff --git a/include/linux/sys_info.h b/include/linux/sys_info.h index 53b7e27dbf2a..89d77dc4f2ed 100644 --- a/include/linux/sys_info.h +++ b/include/linux/sys_info.h @@ -2,6 +2,8 @@ #ifndef _LINUX_SYS_INFO_H #define _LINUX_SYS_INFO_H +#include + /* * SYS_INFO_PANIC_CONSOLE_REPLAY is for panic case only, as it needs special * handling which only fits panic case. @@ -16,5 +18,11 @@ #define SYS_INFO_BLOCKED_TASKS 0x00000080 void sys_info(unsigned long si_mask); +unsigned long sys_info_parse_param(char *str); +#ifdef CONFIG_SYSCTL +int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, + void *buffer, size_t *lenp, + loff_t *ppos); +#endif #endif /* _LINUX_SYS_INFO_H */ diff --git a/kernel/panic.c b/kernel/panic.c index cbb0681177b3..d7aa427dc23c 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -126,6 +126,13 @@ static const struct ctl_table kern_panic_table[] = { .mode = 0644, .proc_handler = proc_douintvec, }, + { + .procname = "panic_sys_info", + .data = &panic_print, + .maxlen = sizeof(panic_print), + .mode = 0644, + .proc_handler = sysctl_sys_info_handler, + }, }; static __init int kernel_panic_sysctls_init(void) diff --git a/lib/sys_info.c b/lib/sys_info.c index 53031e5cb98e..5bf503fd7ec1 100644 --- a/lib/sys_info.c +++ b/lib/sys_info.c @@ -3,10 +3,100 @@ #include #include #include +#include #include #include +struct sys_info_name { + unsigned long bit; + const char *name; +}; + +/* + * When 'si_names' gets updated, please make sure the 'sys_info_avail' + * below is updated accordingly. + */ +static const struct sys_info_name si_names[] = { + { SYS_INFO_TASKS, "tasks" }, + { SYS_INFO_MEM, "mem" }, + { SYS_INFO_TIMERS, "timers" }, + { SYS_INFO_LOCKS, "locks" }, + { SYS_INFO_FTRACE, "ftrace" }, + { SYS_INFO_ALL_CPU_BT, "all_bt" }, + { SYS_INFO_BLOCKED_TASKS, "blocked_tasks" }, +}; + +/* Expecting string like "xxx_sys_info=tasks,mem,timers,locks,ftrace,..." */ +unsigned long sys_info_parse_param(char *str) +{ + unsigned long si_bits = 0; + char *s, *name; + int i; + + s = str; + while ((name = strsep(&s, ",")) && *name) { + for (i = 0; i < ARRAY_SIZE(si_names); i++) { + if (!strcmp(name, si_names[i].name)) { + si_bits |= si_names[i].bit; + break; + } + } + } + + return si_bits; +} + +#ifdef CONFIG_SYSCTL + +static const char sys_info_avail[] __maybe_unused = "tasks,mem,timers,locks,ftrace,all_bt,blocked_tasks"; + +int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, + void *buffer, size_t *lenp, + loff_t *ppos) +{ + char names[sizeof(sys_info_avail) + 1]; + struct ctl_table table; + unsigned long *si_bits_global; + + si_bits_global = ro_table->data; + + if (write) { + unsigned long si_bits; + int ret; + + table = *ro_table; + table.data = names; + table.maxlen = sizeof(names); + ret = proc_dostring(&table, write, buffer, lenp, ppos); + if (ret) + return ret; + + si_bits = sys_info_parse_param(names); + /* The access to the global value is not synchronized. */ + WRITE_ONCE(*si_bits_global, si_bits); + return 0; + } else { + /* for 'read' operation */ + char *delim = ""; + int i, len = 0; + + for (i = 0; i < ARRAY_SIZE(si_names); i++) { + if (*si_bits_global & si_names[i].bit) { + len += scnprintf(names + len, sizeof(names) - len, + "%s%s", delim, si_names[i].name); + delim = ","; + } + } + + table = *ro_table; + table.data = names; + table.maxlen = sizeof(names); + return proc_dostring(&table, write, buffer, lenp, ppos); + } +} +#endif + void sys_info(unsigned long si_mask) { if (si_mask & SYS_INFO_TASKS) From 9743d12d0c63968320ece31e2e48723f3235be6d Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 3 Jul 2025 10:10:03 +0800 Subject: [PATCH 0767/2411] panic: add 'panic_sys_info=' setup option for kernel cmdline 'panic_sys_info=' sysctl interface is already added for runtime setting. Add counterpart kernel cmdline option for boottime setting. Link: https://lkml.kernel.org/r/20250703021004.42328-5-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang Suggested-by: Petr Mladek Cc: John Ogness Cc: Jonathan Corbet Cc: Lance Yang Cc: "Paul E . McKenney" Cc: Steven Rostedt Cc: Nathan Chancellor Signed-off-by: Andrew Morton --- Documentation/admin-guide/kernel-parameters.txt | 15 +++++++++++++++ kernel/panic.c | 9 +++++++++ 2 files changed, 24 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3780b7e6bfd5..55a887d6309c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4563,6 +4563,21 @@ Use this option carefully, maybe worth to setup a bigger log buffer with "log_buf_len" along with this. + panic_sys_info= A comma separated list of extra information to be dumped + on panic. + Format: val[,val...] + Where @val can be any of the following: + + tasks: print all tasks info + mem: print system memory info + timers: print timers info + locks: print locks info if CONFIG_LOCKDEP is on + ftrace: print ftrace buffer + all_bt: print all CPUs backtrace (if available in the arch) + blocked_tasks: print only tasks in uninterruptible (blocked) state + + This is a human readable alternative to the 'panic_print' option. + parkbd.port= [HW] Parallel port number the keyboard adapter is connected to, default is 0. Format: diff --git a/kernel/panic.c b/kernel/panic.c index d7aa427dc23c..d9d4fcd5e318 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -143,6 +143,15 @@ static __init int kernel_panic_sysctls_init(void) late_initcall(kernel_panic_sysctls_init); #endif +/* The format is "panic_sys_info=tasks,mem,locks,ftrace,..." */ +static int __init setup_panic_sys_info(char *buf) +{ + /* There is no risk of race in kernel boot phase */ + panic_print = sys_info_parse_param(buf); + return 1; +} +__setup("panic_sys_info=", setup_panic_sys_info); + static atomic_t warn_count = ATOMIC_INIT(0); #ifdef CONFIG_SYSFS From ee13240cd78b68430eb50af4721b3f18dd08af29 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 3 Jul 2025 10:10:04 +0800 Subject: [PATCH 0768/2411] panic: add note that panic_print sysctl interface is deprecated Add a dedicated core parameter 'panic_console_replay' for controlling console replay, and add note that 'panic_print' sysctl interface will be obsoleted by 'panic_sys_info' and 'panic_console_replay'. When it happens, the SYS_INFO_PANIC_CONSOLE_REPLAY can be removed as well. Link: https://lkml.kernel.org/r/20250703021004.42328-6-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang Suggested-by: Petr Mladek Cc: John Ogness Cc: Jonathan Corbet Cc: Lance Yang Cc: "Paul E . McKenney" Cc: Steven Rostedt Cc: Nathan Chancellor Signed-off-by: Andrew Morton --- .../admin-guide/kernel-parameters.txt | 4 ++++ kernel/panic.c | 21 ++++++++++++------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 55a887d6309c..3d1e55ed4382 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4578,6 +4578,10 @@ This is a human readable alternative to the 'panic_print' option. + panic_console_replay + When panic happens, replay all kernel messages on + consoles at the end of panic. + parkbd.port= [HW] Parallel port number the keyboard adapter is connected to, default is 0. Format: diff --git a/kernel/panic.c b/kernel/panic.c index d9d4fcd5e318..bb16f254cd02 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -64,6 +64,7 @@ int panic_on_warn __read_mostly; unsigned long panic_on_taint; bool panic_on_taint_nousertaint = false; static unsigned int warn_limit __read_mostly; +static bool panic_console_replay; bool panic_triggering_all_cpu_backtrace; @@ -77,6 +78,13 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list); EXPORT_SYMBOL(panic_notifier_list); #ifdef CONFIG_SYSCTL +static int sysctl_panic_print_handler(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + pr_info_once("Kernel: 'panic_print' sysctl interface will be obsoleted by both 'panic_sys_info' and 'panic_console_replay'\n"); + return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); +} + static const struct ctl_table kern_panic_table[] = { #ifdef CONFIG_SMP { @@ -108,7 +116,7 @@ static const struct ctl_table kern_panic_table[] = { .data = &panic_print, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_doulongvec_minmax, + .proc_handler = sysctl_panic_print_handler, }, { .procname = "panic_on_warn", @@ -247,12 +255,6 @@ void nmi_panic(struct pt_regs *regs, const char *msg) } EXPORT_SYMBOL(nmi_panic); -static void panic_console_replay(void) -{ - if (panic_print & SYS_INFO_PANIC_CONSOLE_REPLAY) - console_flush_on_panic(CONSOLE_REPLAY_ALL); -} - void check_panic_on_warn(const char *origin) { unsigned int limit; @@ -427,7 +429,9 @@ void panic(const char *fmt, ...) debug_locks_off(); console_flush_on_panic(CONSOLE_FLUSH_PENDING); - panic_console_replay(); + if ((panic_print & SYS_INFO_PANIC_CONSOLE_REPLAY) || + panic_console_replay) + console_flush_on_panic(CONSOLE_REPLAY_ALL); if (!panic_blink) panic_blink = no_blink; @@ -869,6 +873,7 @@ core_param(panic_print, panic_print, ulong, 0644); core_param(pause_on_oops, pause_on_oops, int, 0644); core_param(panic_on_warn, panic_on_warn, int, 0644); core_param(crash_kexec_post_notifiers, crash_kexec_post_notifiers, bool, 0644); +core_param(panic_console_replay, panic_console_replay, bool, 0644); static int __init oops_setup(char *s) { From 249e7ced7271d8a7e733b1fe7ea7a221eb46698f Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Thu, 3 Jul 2025 15:51:32 -0700 Subject: [PATCH 0769/2411] coccinelle: misc: secs_to_jiffies: implement context and report modes As requested by Ricardo and Jakub, implement report and context modes for the secs_to_jiffies Coccinelle script. While here, add the option to look for opportunities to use secs_to_jiffies() in headers. Link: https://lkml.kernel.org/r/20250703225145.152288-1-eahariha@linux.microsoft.com Signed-off-by: Easwar Hariharan Closes: https://lore.kernel.org/all/20250129-secs_to_jiffles-v1-1-35a5e16b9f03@chromium.org/ Closes: https://lore.kernel.org/all/20250221162107.409ae333@kernel.org/ Tested-by: Ricardo Ribalda Cc: Julia Lawall Cc: Nicolas Palix Cc: Jakub Kicinski Cc: Ricardo Ribalda Signed-off-by: Andrew Morton --- scripts/coccinelle/misc/secs_to_jiffies.cocci | 49 +++++++++++++++++-- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/scripts/coccinelle/misc/secs_to_jiffies.cocci b/scripts/coccinelle/misc/secs_to_jiffies.cocci index 416f348174ca..f3241ce75a7b 100644 --- a/scripts/coccinelle/misc/secs_to_jiffies.cocci +++ b/scripts/coccinelle/misc/secs_to_jiffies.cocci @@ -7,26 +7,65 @@ // Confidence: High // Copyright: (C) 2024 Easwar Hariharan, Microsoft // Keywords: secs, seconds, jiffies -// +// Options: --include-headers virtual patch +virtual report +virtual context -@depends on patch@ constant C; @@ +@pconst depends on patch@ constant C; @@ - msecs_to_jiffies(C * 1000) + secs_to_jiffies(C) -@depends on patch@ constant C; @@ +@pconstms depends on patch@ constant C; @@ - msecs_to_jiffies(C * MSEC_PER_SEC) + secs_to_jiffies(C) -@depends on patch@ expression E; @@ +@pexpr depends on patch@ expression E; @@ - msecs_to_jiffies(E * 1000) + secs_to_jiffies(E) -@depends on patch@ expression E; @@ +@pexprms depends on patch@ expression E; @@ - msecs_to_jiffies(E * MSEC_PER_SEC) + secs_to_jiffies(E) + +@r depends on report && !patch@ +constant C; +expression E; +position p; +@@ + +( + msecs_to_jiffies(C@p * 1000) +| + msecs_to_jiffies(C@p * MSEC_PER_SEC) +| + msecs_to_jiffies(E@p * 1000) +| + msecs_to_jiffies(E@p * MSEC_PER_SEC) +) + +@c depends on context && !patch@ +constant C; +expression E; +@@ + +( +* msecs_to_jiffies(C * 1000) +| +* msecs_to_jiffies(C * MSEC_PER_SEC) +| +* msecs_to_jiffies(E * 1000) +| +* msecs_to_jiffies(E * MSEC_PER_SEC) +) + +@script:python depends on report@ +p << r.p; +@@ + +coccilib.report.print_report(p[0], "WARNING opportunity for secs_to_jiffies()") From ae2da51def76020fa16f53cd3446c00cafe41008 Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Fri, 27 Jun 2025 15:29:22 +0800 Subject: [PATCH 0770/2411] locking/rwsem: make owner helpers globally available Patch series "extend hung task blocker tracking to rwsems". Inspired by mutex blocker tracking[1], and having already extended it to semaphores, let's now add support for reader-writer semaphores (rwsems). The approach is simple: when a task enters TASK_UNINTERRUPTIBLE while waiting for an rwsem, we just call hung_task_set_blocker(). The hung task detector can then query the rwsem's owner to identify the lock holder. Tracking works reliably for writers, as there can only be a single writer holding the lock, and its task struct is stored in the owner field. The main challenge lies with readers. The owner field points to only one of many concurrent readers, so we might lose track of the blocker if that specific reader unlocks, even while others remain. This is not a significant issue, however. In practice, long-lasting lock contention is almost always caused by a writer. Therefore, reliably tracking the writer is the primary goal of this patch series ;) With this change, the hung task detector can now show blocker task's info like below: [Fri Jun 27 15:21:34 2025] INFO: task cat:28631 blocked for more than 122 seconds. [Fri Jun 27 15:21:34 2025] Tainted: G S 6.16.0-rc3 #8 [Fri Jun 27 15:21:34 2025] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [Fri Jun 27 15:21:34 2025] task:cat state:D stack:0 pid:28631 tgid:28631 ppid:28501 task_flags:0x400000 flags:0x00004000 [Fri Jun 27 15:21:34 2025] Call Trace: [Fri Jun 27 15:21:34 2025] [Fri Jun 27 15:21:34 2025] __schedule+0x7c7/0x1930 [Fri Jun 27 15:21:34 2025] ? __pfx___schedule+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? policy_nodemask+0x215/0x340 [Fri Jun 27 15:21:34 2025] ? _raw_spin_lock_irq+0x8a/0xe0 [Fri Jun 27 15:21:34 2025] ? __pfx__raw_spin_lock_irq+0x10/0x10 [Fri Jun 27 15:21:34 2025] schedule+0x6a/0x180 [Fri Jun 27 15:21:34 2025] schedule_preempt_disabled+0x15/0x30 [Fri Jun 27 15:21:34 2025] rwsem_down_read_slowpath+0x55e/0xe10 [Fri Jun 27 15:21:34 2025] ? __pfx_rwsem_down_read_slowpath+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __pfx___might_resched+0x10/0x10 [Fri Jun 27 15:21:34 2025] down_read+0xc9/0x230 [Fri Jun 27 15:21:34 2025] ? __pfx_down_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __debugfs_file_get+0x14d/0x700 [Fri Jun 27 15:21:34 2025] ? __pfx___debugfs_file_get+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? handle_pte_fault+0x52a/0x710 [Fri Jun 27 15:21:34 2025] ? selinux_file_permission+0x3a9/0x590 [Fri Jun 27 15:21:34 2025] read_dummy_rwsem_read+0x4a/0x90 [Fri Jun 27 15:21:34 2025] full_proxy_read+0xff/0x1c0 [Fri Jun 27 15:21:34 2025] ? rw_verify_area+0x6d/0x410 [Fri Jun 27 15:21:34 2025] vfs_read+0x177/0xa50 [Fri Jun 27 15:21:34 2025] ? __pfx_vfs_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? fdget_pos+0x1cf/0x4c0 [Fri Jun 27 15:21:34 2025] ksys_read+0xfc/0x1d0 [Fri Jun 27 15:21:34 2025] ? __pfx_ksys_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] do_syscall_64+0x66/0x2d0 [Fri Jun 27 15:21:34 2025] entry_SYSCALL_64_after_hwframe+0x76/0x7e [Fri Jun 27 15:21:34 2025] RIP: 0033:0x7f3f8faefb40 [Fri Jun 27 15:21:34 2025] RSP: 002b:00007ffdeda5ab98 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [Fri Jun 27 15:21:34 2025] RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007f3f8faefb40 [Fri Jun 27 15:21:34 2025] RDX: 0000000000010000 RSI: 00000000010fa000 RDI: 0000000000000003 [Fri Jun 27 15:21:34 2025] RBP: 00000000010fa000 R08: 0000000000000000 R09: 0000000000010fff [Fri Jun 27 15:21:34 2025] R10: 00007ffdeda59fe0 R11: 0000000000000246 R12: 00000000010fa000 [Fri Jun 27 15:21:34 2025] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000fff [Fri Jun 27 15:21:34 2025] [Fri Jun 27 15:21:34 2025] INFO: task cat:28631 blocked on an rw-semaphore likely owned by task cat:28630 [Fri Jun 27 15:21:34 2025] task:cat state:S stack:0 pid:28630 tgid:28630 ppid:28501 task_flags:0x400000 flags:0x00004000 [Fri Jun 27 15:21:34 2025] Call Trace: [Fri Jun 27 15:21:34 2025] [Fri Jun 27 15:21:34 2025] __schedule+0x7c7/0x1930 [Fri Jun 27 15:21:34 2025] ? __pfx___schedule+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __mod_timer+0x304/0xa80 [Fri Jun 27 15:21:34 2025] schedule+0x6a/0x180 [Fri Jun 27 15:21:34 2025] schedule_timeout+0xfb/0x230 [Fri Jun 27 15:21:34 2025] ? __pfx_schedule_timeout+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __pfx_process_timeout+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? down_write+0xc4/0x140 [Fri Jun 27 15:21:34 2025] msleep_interruptible+0xbe/0x150 [Fri Jun 27 15:21:34 2025] read_dummy_rwsem_write+0x54/0x90 [Fri Jun 27 15:21:34 2025] full_proxy_read+0xff/0x1c0 [Fri Jun 27 15:21:34 2025] ? rw_verify_area+0x6d/0x410 [Fri Jun 27 15:21:34 2025] vfs_read+0x177/0xa50 [Fri Jun 27 15:21:34 2025] ? __pfx_vfs_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? fdget_pos+0x1cf/0x4c0 [Fri Jun 27 15:21:34 2025] ksys_read+0xfc/0x1d0 [Fri Jun 27 15:21:34 2025] ? __pfx_ksys_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] do_syscall_64+0x66/0x2d0 [Fri Jun 27 15:21:34 2025] entry_SYSCALL_64_after_hwframe+0x76/0x7e [Fri Jun 27 15:21:34 2025] RIP: 0033:0x7f8f288efb40 [Fri Jun 27 15:21:34 2025] RSP: 002b:00007ffffb631038 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [Fri Jun 27 15:21:34 2025] RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007f8f288efb40 [Fri Jun 27 15:21:34 2025] RDX: 0000000000010000 RSI: 000000002a4b5000 RDI: 0000000000000003 [Fri Jun 27 15:21:34 2025] RBP: 000000002a4b5000 R08: 0000000000000000 R09: 0000000000010fff [Fri Jun 27 15:21:34 2025] R10: 00007ffffb630460 R11: 0000000000000246 R12: 000000002a4b5000 [Fri Jun 27 15:21:34 2025] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000fff [Fri Jun 27 15:21:34 2025] This patch (of 3): In preparation for extending blocker tracking to support rwsems, make the rwsem_owner() and is_rwsem_reader_owned() helpers globally available for determining if the blocker is a writer or one of the readers. Additionally, a stale owner pointer in a reader-owned rwsem can lead to false positives in blocker tracking when CONFIG_DETECT_HUNG_TASK_BLOCKER is enabled. To mitigate this, clear the owner field on the reader unlock path, similar to what CONFIG_DEBUG_RWSEMS does. A NULL owner is better than a stale one for diagnostics. Link: https://lkml.kernel.org/r/20250627072924.36567-1-lance.yang@linux.dev Link: https://lkml.kernel.org/r/20250627072924.36567-2-lance.yang@linux.dev Link: https://lore.kernel.org/all/174046694331.2194069.15472952050240807469.stgit@mhiramat.tok.corp.google.com/ [1] Signed-off-by: Lance Yang Reviewed-by: Masami Hiramatsu (Google) Cc: Anna Schumaker Cc: Boqun Feng Cc: Ingo Molnar Cc: Joel Granados Cc: John Stultz Cc: Kent Overstreet Cc: Mingzhe Yang Cc: Peter Zijlstra Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Tomasz Figa Cc: Waiman Long Cc: Will Deacon Cc: Yongliang Gao Cc: Zi Li Signed-off-by: Andrew Morton --- include/linux/rwsem.h | 12 ++++++++++++ kernel/locking/rwsem.c | 14 +++++++------- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index c8b543d428b0..544853bed5b9 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -132,6 +132,18 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem) return !list_empty(&sem->wait_list); } +#if defined(CONFIG_DEBUG_RWSEMS) || defined(CONFIG_DETECT_HUNG_TASK_BLOCKER) +/* + * Return just the real task structure pointer of the owner + */ +extern struct task_struct *rwsem_owner(struct rw_semaphore *sem); + +/* + * Return true if the rwsem is owned by a reader. + */ +extern bool is_rwsem_reader_owned(struct rw_semaphore *sem); +#endif + #else /* !CONFIG_PREEMPT_RT */ #include diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 2ddb827e3bea..a310eb9896de 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -181,11 +181,11 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) __rwsem_set_reader_owned(sem, current); } -#ifdef CONFIG_DEBUG_RWSEMS +#if defined(CONFIG_DEBUG_RWSEMS) || defined(CONFIG_DETECT_HUNG_TASK_BLOCKER) /* * Return just the real task structure pointer of the owner */ -static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem) +struct task_struct *rwsem_owner(struct rw_semaphore *sem) { return (struct task_struct *) (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK); @@ -194,7 +194,7 @@ static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem) /* * Return true if the rwsem is owned by a reader. */ -static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) +bool is_rwsem_reader_owned(struct rw_semaphore *sem) { /* * Check the count to see if it is write-locked. @@ -207,10 +207,10 @@ static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) } /* - * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there - * is a task pointer in owner of a reader-owned rwsem, it will be the - * real owner or one of the real owners. The only exception is when the - * unlock is done by up_read_non_owner(). + * With CONFIG_DEBUG_RWSEMS or CONFIG_DETECT_HUNG_TASK_BLOCKER configured, + * it will make sure that the owner field of a reader-owned rwsem either + * points to a real reader-owner(s) or gets cleared. The only exception is + * when the unlock is done by up_read_non_owner(). */ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) { From 77da18de55ac6417e48905bec8b3c66f023b15a9 Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Fri, 27 Jun 2025 15:29:23 +0800 Subject: [PATCH 0771/2411] hung_task: extend hung task blocker tracking to rwsems Inspired by mutex blocker tracking[1], and having already extended it to semaphores, let's now add support for reader-writer semaphores (rwsems). The approach is simple: when a task enters TASK_UNINTERRUPTIBLE while waiting for an rwsem, we just call hung_task_set_blocker(). The hung task detector can then query the rwsem's owner to identify the lock holder. Tracking works reliably for writers, as there can only be a single writer holding the lock, and its task struct is stored in the owner field. The main challenge lies with readers. The owner field points to only one of many concurrent readers, so we might lose track of the blocker if that specific reader unlocks, even while others remain. This is not a significant issue, however. In practice, long-lasting lock contention is almost always caused by a writer. Therefore, reliably tracking the writer is the primary goal of this patch series ;) With this change, the hung task detector can now show blocker task's info like below: [Fri Jun 27 15:21:34 2025] INFO: task cat:28631 blocked for more than 122 seconds. [Fri Jun 27 15:21:34 2025] Tainted: G S 6.16.0-rc3 #8 [Fri Jun 27 15:21:34 2025] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [Fri Jun 27 15:21:34 2025] task:cat state:D stack:0 pid:28631 tgid:28631 ppid:28501 task_flags:0x400000 flags:0x00004000 [Fri Jun 27 15:21:34 2025] Call Trace: [Fri Jun 27 15:21:34 2025] [Fri Jun 27 15:21:34 2025] __schedule+0x7c7/0x1930 [Fri Jun 27 15:21:34 2025] ? __pfx___schedule+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? policy_nodemask+0x215/0x340 [Fri Jun 27 15:21:34 2025] ? _raw_spin_lock_irq+0x8a/0xe0 [Fri Jun 27 15:21:34 2025] ? __pfx__raw_spin_lock_irq+0x10/0x10 [Fri Jun 27 15:21:34 2025] schedule+0x6a/0x180 [Fri Jun 27 15:21:34 2025] schedule_preempt_disabled+0x15/0x30 [Fri Jun 27 15:21:34 2025] rwsem_down_read_slowpath+0x55e/0xe10 [Fri Jun 27 15:21:34 2025] ? __pfx_rwsem_down_read_slowpath+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __pfx___might_resched+0x10/0x10 [Fri Jun 27 15:21:34 2025] down_read+0xc9/0x230 [Fri Jun 27 15:21:34 2025] ? __pfx_down_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __debugfs_file_get+0x14d/0x700 [Fri Jun 27 15:21:34 2025] ? __pfx___debugfs_file_get+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? handle_pte_fault+0x52a/0x710 [Fri Jun 27 15:21:34 2025] ? selinux_file_permission+0x3a9/0x590 [Fri Jun 27 15:21:34 2025] read_dummy_rwsem_read+0x4a/0x90 [Fri Jun 27 15:21:34 2025] full_proxy_read+0xff/0x1c0 [Fri Jun 27 15:21:34 2025] ? rw_verify_area+0x6d/0x410 [Fri Jun 27 15:21:34 2025] vfs_read+0x177/0xa50 [Fri Jun 27 15:21:34 2025] ? __pfx_vfs_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? fdget_pos+0x1cf/0x4c0 [Fri Jun 27 15:21:34 2025] ksys_read+0xfc/0x1d0 [Fri Jun 27 15:21:34 2025] ? __pfx_ksys_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] do_syscall_64+0x66/0x2d0 [Fri Jun 27 15:21:34 2025] entry_SYSCALL_64_after_hwframe+0x76/0x7e [Fri Jun 27 15:21:34 2025] RIP: 0033:0x7f3f8faefb40 [Fri Jun 27 15:21:34 2025] RSP: 002b:00007ffdeda5ab98 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [Fri Jun 27 15:21:34 2025] RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007f3f8faefb40 [Fri Jun 27 15:21:34 2025] RDX: 0000000000010000 RSI: 00000000010fa000 RDI: 0000000000000003 [Fri Jun 27 15:21:34 2025] RBP: 00000000010fa000 R08: 0000000000000000 R09: 0000000000010fff [Fri Jun 27 15:21:34 2025] R10: 00007ffdeda59fe0 R11: 0000000000000246 R12: 00000000010fa000 [Fri Jun 27 15:21:34 2025] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000fff [Fri Jun 27 15:21:34 2025] [Fri Jun 27 15:21:34 2025] INFO: task cat:28631 blocked on an rw-semaphore likely owned by task cat:28630 [Fri Jun 27 15:21:34 2025] task:cat state:S stack:0 pid:28630 tgid:28630 ppid:28501 task_flags:0x400000 flags:0x00004000 [Fri Jun 27 15:21:34 2025] Call Trace: [Fri Jun 27 15:21:34 2025] [Fri Jun 27 15:21:34 2025] __schedule+0x7c7/0x1930 [Fri Jun 27 15:21:34 2025] ? __pfx___schedule+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __mod_timer+0x304/0xa80 [Fri Jun 27 15:21:34 2025] schedule+0x6a/0x180 [Fri Jun 27 15:21:34 2025] schedule_timeout+0xfb/0x230 [Fri Jun 27 15:21:34 2025] ? __pfx_schedule_timeout+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __pfx_process_timeout+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? down_write+0xc4/0x140 [Fri Jun 27 15:21:34 2025] msleep_interruptible+0xbe/0x150 [Fri Jun 27 15:21:34 2025] read_dummy_rwsem_write+0x54/0x90 [Fri Jun 27 15:21:34 2025] full_proxy_read+0xff/0x1c0 [Fri Jun 27 15:21:34 2025] ? rw_verify_area+0x6d/0x410 [Fri Jun 27 15:21:34 2025] vfs_read+0x177/0xa50 [Fri Jun 27 15:21:34 2025] ? __pfx_vfs_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? fdget_pos+0x1cf/0x4c0 [Fri Jun 27 15:21:34 2025] ksys_read+0xfc/0x1d0 [Fri Jun 27 15:21:34 2025] ? __pfx_ksys_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] do_syscall_64+0x66/0x2d0 [Fri Jun 27 15:21:34 2025] entry_SYSCALL_64_after_hwframe+0x76/0x7e [Fri Jun 27 15:21:34 2025] RIP: 0033:0x7f8f288efb40 [Fri Jun 27 15:21:34 2025] RSP: 002b:00007ffffb631038 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [Fri Jun 27 15:21:34 2025] RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007f8f288efb40 [Fri Jun 27 15:21:34 2025] RDX: 0000000000010000 RSI: 000000002a4b5000 RDI: 0000000000000003 [Fri Jun 27 15:21:34 2025] RBP: 000000002a4b5000 R08: 0000000000000000 R09: 0000000000010fff [Fri Jun 27 15:21:34 2025] R10: 00007ffffb630460 R11: 0000000000000246 R12: 000000002a4b5000 [Fri Jun 27 15:21:34 2025] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000fff [Fri Jun 27 15:21:34 2025] [1] https://lore.kernel.org/all/174046694331.2194069.15472952050240807469.stgit@mhiramat.tok.corp.google.com/ Link: https://lkml.kernel.org/r/20250627072924.36567-3-lance.yang@linux.dev Signed-off-by: Lance Yang Suggested-by: Masami Hiramatsu (Google) Reviewed-by: Masami Hiramatsu (Google) Cc: Anna Schumaker Cc: Boqun Feng Cc: Ingo Molnar Cc: Joel Granados Cc: John Stultz Cc: Kent Overstreet Cc: Mingzhe Yang Cc: Peter Zijlstra Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Tomasz Figa Cc: Waiman Long Cc: Will Deacon Cc: Yongliang Gao Cc: Zi Li Signed-off-by: Andrew Morton --- include/linux/hung_task.h | 18 +++++++++--------- kernel/hung_task.c | 29 +++++++++++++++++++++++++---- kernel/locking/rwsem.c | 17 ++++++++++++++++- 3 files changed, 50 insertions(+), 14 deletions(-) diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h index 1bc2b3244613..34e615c76ca5 100644 --- a/include/linux/hung_task.h +++ b/include/linux/hung_task.h @@ -21,17 +21,17 @@ * type. * * Type encoding: - * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX) - * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM) - * 10 - Blocked on rt-mutex (BLOCKER_TYPE_RTMUTEX) - * 11 - Blocked on rw-semaphore (BLOCKER_TYPE_RWSEM) + * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX) + * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM) + * 10 - Blocked on rw-semaphore as READER (BLOCKER_TYPE_RWSEM_READER) + * 11 - Blocked on rw-semaphore as WRITER (BLOCKER_TYPE_RWSEM_WRITER) */ -#define BLOCKER_TYPE_MUTEX 0x00UL -#define BLOCKER_TYPE_SEM 0x01UL -#define BLOCKER_TYPE_RTMUTEX 0x02UL -#define BLOCKER_TYPE_RWSEM 0x03UL +#define BLOCKER_TYPE_MUTEX 0x00UL +#define BLOCKER_TYPE_SEM 0x01UL +#define BLOCKER_TYPE_RWSEM_READER 0x02UL +#define BLOCKER_TYPE_RWSEM_WRITER 0x03UL -#define BLOCKER_TYPE_MASK 0x03UL +#define BLOCKER_TYPE_MASK 0x03UL #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER static inline void hung_task_set_blocker(void *lock, unsigned long type) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index d2432df2b905..8708a1205f82 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -100,6 +101,7 @@ static void debug_show_blocker(struct task_struct *task) { struct task_struct *g, *t; unsigned long owner, blocker, blocker_type; + const char *rwsem_blocked_by, *rwsem_blocked_as; RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "No rcu lock held"); @@ -111,12 +113,20 @@ static void debug_show_blocker(struct task_struct *task) switch (blocker_type) { case BLOCKER_TYPE_MUTEX: - owner = mutex_get_owner( - (struct mutex *)hung_task_blocker_to_lock(blocker)); + owner = mutex_get_owner(hung_task_blocker_to_lock(blocker)); break; case BLOCKER_TYPE_SEM: - owner = sem_last_holder( - (struct semaphore *)hung_task_blocker_to_lock(blocker)); + owner = sem_last_holder(hung_task_blocker_to_lock(blocker)); + break; + case BLOCKER_TYPE_RWSEM_READER: + case BLOCKER_TYPE_RWSEM_WRITER: + owner = (unsigned long)rwsem_owner( + hung_task_blocker_to_lock(blocker)); + rwsem_blocked_as = (blocker_type == BLOCKER_TYPE_RWSEM_READER) ? + "reader" : "writer"; + rwsem_blocked_by = is_rwsem_reader_owned( + hung_task_blocker_to_lock(blocker)) ? + "reader" : "writer"; break; default: WARN_ON_ONCE(1); @@ -134,6 +144,11 @@ static void debug_show_blocker(struct task_struct *task) pr_err("INFO: task %s:%d is blocked on a semaphore, but the last holder is not found.\n", task->comm, task->pid); break; + case BLOCKER_TYPE_RWSEM_READER: + case BLOCKER_TYPE_RWSEM_WRITER: + pr_err("INFO: task %s:%d is blocked on an rw-semaphore, but the owner is not found.\n", + task->comm, task->pid); + break; } return; } @@ -152,6 +167,12 @@ static void debug_show_blocker(struct task_struct *task) pr_err("INFO: task %s:%d blocked on a semaphore likely last held by task %s:%d\n", task->comm, task->pid, t->comm, t->pid); break; + case BLOCKER_TYPE_RWSEM_READER: + case BLOCKER_TYPE_RWSEM_WRITER: + pr_err("INFO: task %s:%d <%s> blocked on an rw-semaphore likely owned by task %s:%d <%s>\n", + task->comm, task->pid, rwsem_blocked_as, t->comm, + t->pid, rwsem_blocked_by); + break; } sched_show_task(t); return; diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index a310eb9896de..92c6332da401 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #ifndef CONFIG_PREEMPT_RT @@ -1065,10 +1066,13 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat wake_up_q(&wake_q); trace_contention_begin(sem, LCB_F_READ); + set_current_state(state); + + if (state == TASK_UNINTERRUPTIBLE) + hung_task_set_blocker(sem, BLOCKER_TYPE_RWSEM_READER); /* wait to be given the lock */ for (;;) { - set_current_state(state); if (!smp_load_acquire(&waiter.task)) { /* Matches rwsem_mark_wake()'s smp_store_release(). */ break; @@ -1083,8 +1087,12 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat } schedule_preempt_disabled(); lockevent_inc(rwsem_sleep_reader); + set_current_state(state); } + if (state == TASK_UNINTERRUPTIBLE) + hung_task_clear_blocker(); + __set_current_state(TASK_RUNNING); lockevent_inc(rwsem_rlock); trace_contention_end(sem, 0); @@ -1146,6 +1154,9 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) set_current_state(state); trace_contention_begin(sem, LCB_F_WRITE); + if (state == TASK_UNINTERRUPTIBLE) + hung_task_set_blocker(sem, BLOCKER_TYPE_RWSEM_WRITER); + for (;;) { if (rwsem_try_write_lock(sem, &waiter)) { /* rwsem_try_write_lock() implies ACQUIRE on success */ @@ -1179,6 +1190,10 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) trylock_again: raw_spin_lock_irq(&sem->wait_lock); } + + if (state == TASK_UNINTERRUPTIBLE) + hung_task_clear_blocker(); + __set_current_state(TASK_RUNNING); raw_spin_unlock_irq(&sem->wait_lock); lockevent_inc(rwsem_wlock); From 4efec6c0919d9081a52be50d5b5bfa32bf489c75 Mon Sep 17 00:00:00 2001 From: Zi Li Date: Fri, 27 Jun 2025 15:29:24 +0800 Subject: [PATCH 0772/2411] samples: enhance hung_task detector test with read-write semaphore support Extend the hung_task detector test module to include read-write semaphore support alongside existing mutex and semaphore tests. This module now creates additional debugfs files under /hung_task, namely 'rw_semaphore_read' and 'rw_semaphore_write', in addition to 'mutex' and 'semaphore'. Reading these files with multiple processes triggers a prolonged sleep (256 seconds) while holding the respective lock, enabling hung_task detector testing for various locking mechanisms. This change builds on the extensible hung_task_tests module, adding read-write semaphore functionality to improve test coverage for kernel locking primitives. The implementation ensures proper lock handling and includes checks to prevent redundant data reads. Usage is: > cd /sys/kernel/debug/hung_task > cat mutex & cat mutex # Test mutex blocking > cat semaphore & cat semaphore # Test semaphore blocking > cat rw_semaphore_write \ & cat rw_semaphore_read # Test rwsem blocking > cat rw_semaphore_write \ & cat rw_semaphore_write # Test rwsem blocking Update the Kconfig description to reflect the addition of read-write semaphore debugfs files. Link: https://lkml.kernel.org/r/20250627072924.36567-4-lance.yang@linux.dev Signed-off-by: Zi Li Suggested-by: Masami Hiramatsu (Google) Cc: Anna Schumaker Cc: Boqun Feng Cc: Ingo Molnar Cc: Joel Granados Cc: John Stultz Cc: Kent Overstreet Cc: Mingzhe Yang Cc: Peter Zijlstra Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Tomasz Figa Cc: Waiman Long Cc: Will Deacon Cc: Yongliang Gao Signed-off-by: Andrew Morton --- samples/Kconfig | 7 ++- samples/hung_task/hung_task_tests.c | 81 ++++++++++++++++++++++++++--- 2 files changed, 77 insertions(+), 11 deletions(-) diff --git a/samples/Kconfig b/samples/Kconfig index ffef99950206..a8880c62d4c8 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -316,10 +316,9 @@ config SAMPLE_HUNG_TASK depends on DETECT_HUNG_TASK && DEBUG_FS help Build a module that provides debugfs files (e.g., mutex, semaphore, - etc.) under /hung_task. If user reads one of these files, - it will sleep long time (256 seconds) with holding a lock. Thus, - if 2 or more processes read the same file concurrently, it will - be detected by the hung_task watchdog. + rw_semaphore_read, rw_semaphore_write) under /hung_task. + Reading these files with multiple processes triggers hung task + detection by holding locks for a long time (256 seconds). source "samples/rust/Kconfig" diff --git a/samples/hung_task/hung_task_tests.c b/samples/hung_task/hung_task_tests.c index a5c09bd3a47d..0360ec916890 100644 --- a/samples/hung_task/hung_task_tests.c +++ b/samples/hung_task/hung_task_tests.c @@ -4,11 +4,12 @@ * semaphore, etc. * * Usage: Load this module and read `/hung_task/mutex`, - * `/hung_task/semaphore`, etc., with 2 or more processes. + * `/hung_task/semaphore`, `/hung_task/rw_semaphore_read`, + * `/hung_task/rw_semaphore_write`, etc., with 2 or more processes. * * This is for testing kernel hung_task error messages with various locking - * mechanisms (e.g., mutex, semaphore, etc.). Note that this may freeze - * your system or cause a panic. Use only for testing purposes. + * mechanisms (e.g., mutex, semaphore, rw_semaphore_read, rw_semaphore_write, etc.). + * Note that this may freeze your system or cause a panic. Use only for testing purposes. */ #include @@ -17,21 +18,29 @@ #include #include #include +#include -#define HUNG_TASK_DIR "hung_task" -#define HUNG_TASK_MUTEX_FILE "mutex" -#define HUNG_TASK_SEM_FILE "semaphore" -#define SLEEP_SECOND 256 +#define HUNG_TASK_DIR "hung_task" +#define HUNG_TASK_MUTEX_FILE "mutex" +#define HUNG_TASK_SEM_FILE "semaphore" +#define HUNG_TASK_RWSEM_READ_FILE "rw_semaphore_read" +#define HUNG_TASK_RWSEM_WRITE_FILE "rw_semaphore_write" +#define SLEEP_SECOND 256 static const char dummy_string[] = "This is a dummy string."; static DEFINE_MUTEX(dummy_mutex); static DEFINE_SEMAPHORE(dummy_sem, 1); +static DECLARE_RWSEM(dummy_rwsem); static struct dentry *hung_task_dir; /* Mutex-based read function */ static ssize_t read_dummy_mutex(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { + /* Check if data is already read */ + if (*ppos >= sizeof(dummy_string)) + return 0; + /* Second task waits on mutex, entering uninterruptible sleep */ guard(mutex)(&dummy_mutex); @@ -46,6 +55,10 @@ static ssize_t read_dummy_mutex(struct file *file, char __user *user_buf, static ssize_t read_dummy_semaphore(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { + /* Check if data is already read */ + if (*ppos >= sizeof(dummy_string)) + return 0; + /* Second task waits on semaphore, entering uninterruptible sleep */ down(&dummy_sem); @@ -58,6 +71,46 @@ static ssize_t read_dummy_semaphore(struct file *file, char __user *user_buf, sizeof(dummy_string)); } +/* Read-write semaphore read function */ +static ssize_t read_dummy_rwsem_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + /* Check if data is already read */ + if (*ppos >= sizeof(dummy_string)) + return 0; + + /* Acquires read lock, allowing concurrent readers but blocks if write lock is held */ + down_read(&dummy_rwsem); + + /* Sleeps here, potentially triggering hung task detection if lock is held too long */ + msleep_interruptible(SLEEP_SECOND * 1000); + + up_read(&dummy_rwsem); + + return simple_read_from_buffer(user_buf, count, ppos, dummy_string, + sizeof(dummy_string)); +} + +/* Read-write semaphore write function */ +static ssize_t read_dummy_rwsem_write(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + /* Check if data is already read */ + if (*ppos >= sizeof(dummy_string)) + return 0; + + /* Acquires exclusive write lock, blocking all other readers and writers */ + down_write(&dummy_rwsem); + + /* Sleeps here, potentially triggering hung task detection if lock is held too long */ + msleep_interruptible(SLEEP_SECOND * 1000); + + up_write(&dummy_rwsem); + + return simple_read_from_buffer(user_buf, count, ppos, dummy_string, + sizeof(dummy_string)); +} + /* File operations for mutex */ static const struct file_operations hung_task_mutex_fops = { .read = read_dummy_mutex, @@ -68,6 +121,16 @@ static const struct file_operations hung_task_sem_fops = { .read = read_dummy_semaphore, }; +/* File operations for rw_semaphore read */ +static const struct file_operations hung_task_rwsem_read_fops = { + .read = read_dummy_rwsem_read, +}; + +/* File operations for rw_semaphore write */ +static const struct file_operations hung_task_rwsem_write_fops = { + .read = read_dummy_rwsem_write, +}; + static int __init hung_task_tests_init(void) { hung_task_dir = debugfs_create_dir(HUNG_TASK_DIR, NULL); @@ -79,6 +142,10 @@ static int __init hung_task_tests_init(void) &hung_task_mutex_fops); debugfs_create_file(HUNG_TASK_SEM_FILE, 0400, hung_task_dir, NULL, &hung_task_sem_fops); + debugfs_create_file(HUNG_TASK_RWSEM_READ_FILE, 0400, hung_task_dir, NULL, + &hung_task_rwsem_read_fops); + debugfs_create_file(HUNG_TASK_RWSEM_WRITE_FILE, 0400, hung_task_dir, NULL, + &hung_task_rwsem_write_fops); return 0; } From 98aa4d5d242d3a73388271e00e11ce91d8c3c3e1 Mon Sep 17 00:00:00 2001 From: Lillian Berry Date: Mon, 7 Jul 2025 09:14:11 +0000 Subject: [PATCH 0773/2411] init/main.c: add warning when file specified in rdinit is inaccessible Avoid silently ignoring the initramfs when the file specified in rdinit is not usable. This prints an error that clearly explains the issue (file was not found, vs initramfs was not found). Link: https://lkml.kernel.org/r/20250707091411.1412681-1-lillian@star-ark.net Signed-off-by: Lillian Berry Cc: Al Viro Signed-off-by: Andrew Morton --- init/main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/init/main.c b/init/main.c index 225a58279acd..e47984871775 100644 --- a/init/main.c +++ b/init/main.c @@ -1592,7 +1592,11 @@ static noinline void __init kernel_init_freeable(void) * check if there is an early userspace init. If yes, let it do all * the work */ - if (init_eaccess(ramdisk_execute_command) != 0) { + int ramdisk_command_access; + ramdisk_command_access = init_eaccess(ramdisk_execute_command); + if (ramdisk_command_access != 0) { + pr_warn("check access for rdinit=%s failed: %i, ignoring\n", + ramdisk_execute_command, ramdisk_command_access); ramdisk_execute_command = NULL; prepare_namespace(); } From 988f451ecb17c359cb34e360fce82039942de7bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahelenia=20Ziemia=C5=84ska?= Date: Thu, 3 Jul 2025 20:21:27 +0200 Subject: [PATCH 0774/2411] ocfs2/dlm: fix "take a while" typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ahelenia Ziemiańska Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/dlm/dlmrecovery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 67fc62a49a76..00f52812dbb0 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2632,7 +2632,7 @@ static int dlm_pick_recovery_master(struct dlm_ctxt *dlm) dlm_reco_master_ready(dlm), msecs_to_jiffies(1000)); if (!dlm_reco_master_ready(dlm)) { - mlog(0, "%s: reco master taking awhile\n", + mlog(0, "%s: reco master taking a while\n", dlm->name); goto again; } From 44acc46d182ff36d40cea69db3875440fab72ba5 Mon Sep 17 00:00:00 2001 From: Ivan Pravdin Date: Mon, 7 Jul 2025 20:10:09 -0400 Subject: [PATCH 0775/2411] ocfs2: avoid NULL pointer dereference in dx_dir_lookup_rec() When a directory entry is not found, ocfs2_dx_dir_lookup_rec() prints an error message that unconditionally dereferences the 'rec' pointer. However, if 'rec' is NULL, this leads to a NULL pointer dereference and a kernel panic. Add an explicit check empty extent list to avoid dereferencing NULL 'rec' pointer. Link: https://lkml.kernel.org/r/20250708001009.372263-1-ipravdin.official@gmail.com Reported-by: syzbot+20282c1b2184a857ac4c@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/67cd7e29.050a0220.e1a89.0007.GAE@google.com/ Signed-off-by: Ivan Pravdin Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/dir.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 7799f4d16ce9..8c9c4825f984 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -798,6 +798,14 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode, } } + if (le16_to_cpu(el->l_next_free_rec) == 0) { + ret = ocfs2_error(inode->i_sb, + "Inode %lu has empty extent list at depth %u\n", + inode->i_ino, + le16_to_cpu(el->l_tree_depth)); + goto out; + } + found = 0; for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { rec = &el->l_recs[i]; From c0f98be69f4b550b19f9517157a30f33877bb14d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 8 Jul 2025 12:49:00 +0100 Subject: [PATCH 0776/2411] squashfs: replace ;; with ; and end of fi declaration There is an extraneous ; after a declaration, remove it. Link: https://lkml.kernel.org/r/20250708114900.1883130-1-colin.i.king@gmail.com Signed-off-by: Colin Ian King Reviewed-by: Phillip Lougher Signed-off-by: Andrew Morton --- fs/squashfs/block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 296c5a0fcc40..b3ae3b1cc0e5 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -83,7 +83,7 @@ static int squashfs_bio_read_cached(struct bio *fullbio, struct folio *head_to_cache = NULL, *tail_to_cache = NULL; struct block_device *bdev = fullbio->bi_bdev; int start_idx = 0, end_idx = 0; - struct folio_iter fi;; + struct folio_iter fi; struct bio *bio = NULL; int idx = 0; int err = 0; From 97103dcec292b8688de142f7a48bd0d46038d3f6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 8 Jul 2025 15:26:04 +0100 Subject: [PATCH 0777/2411] squashfs: fix incorrect argument to sizeof in kmalloc_array call The sizeof(void *) is the incorrect argument in the kmalloc_array call, it best to fix this by using sizeof(*cache_folios) instead. Fortunately the sizes of void* and folio* happen to be the same, so this has not shown up as a run time issue. [akpm@linux-foundation.org: fix build] Link: https://lkml.kernel.org/r/20250708142604.1891156-1-colin.i.king@gmail.com Fixes: 2e227ff5e272 ("squashfs: add optional full compressed block caching") Signed-off-by: Colin Ian King Cc: Phillip Lougher Cc: Chanho Min Signed-off-by: Andrew Morton --- fs/squashfs/block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index b3ae3b1cc0e5..b69c294e3ef0 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -89,7 +89,7 @@ static int squashfs_bio_read_cached(struct bio *fullbio, int err = 0; #ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL struct folio **cache_folios = kmalloc_array(page_count, - sizeof(void *), GFP_KERNEL | __GFP_ZERO); + sizeof(*cache_folios), GFP_KERNEL | __GFP_ZERO); #endif bio_for_each_folio_all(fi, fullbio) { From 08eabe4b9e98d940d2dd6cdb70c7a9187ca54aca Mon Sep 17 00:00:00 2001 From: Ivan Pravdin Date: Mon, 7 Jul 2025 22:06:40 -0400 Subject: [PATCH 0778/2411] ocfs2: avoid potential ABBA deadlock by reordering tl_inode lock In ocfs2_move_extent(), tl_inode is currently locked after the global bitmap inode. However, in ocfs2_flush_truncate_log(), the lock order is reversed: tl_inode is locked first, followed by the global bitmap inode. This creates a classic ABBA deadlock scenario if two threads attempt these operations concurrently and acquire the locks in different orders. To prevent this, move the tl_inode locking earlier in ocfs2_move_extent(), so that it always precedes the global bitmap inode lock. No functional changes beyond lock ordering. Link: https://lkml.kernel.org/r/20250708020640.387741-1-ipravdin.official@gmail.com Reported-by: syzbot+6bf948e47f9bac7aacfa@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/67d5645c.050a0220.1dc86f.0004.GAE@google.com/ Signed-off-by: Ivan Pravdin Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/move_extents.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 369c7d27befd..cbe2f8ed8897 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -617,6 +617,8 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, */ credits += OCFS2_INODE_UPDATE_CREDITS + 1; + inode_lock(tl_inode); + /* * ocfs2_move_extent() didn't reserve any clusters in lock_allocators() * logic, while we still need to lock the global_bitmap. @@ -626,7 +628,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, if (!gb_inode) { mlog(ML_ERROR, "unable to get global_bitmap inode\n"); ret = -EIO; - goto out; + goto out_unlock_tl_inode; } inode_lock(gb_inode); @@ -634,16 +636,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1); if (ret) { mlog_errno(ret); - goto out_unlock_gb_mutex; + goto out_unlock_gb_inode; } - inode_lock(tl_inode); - handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); - goto out_unlock_tl_inode; + goto out_unlock; } new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos); @@ -703,15 +703,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, out_commit: ocfs2_commit_trans(osb, handle); brelse(gd_bh); - -out_unlock_tl_inode: - inode_unlock(tl_inode); - +out_unlock: ocfs2_inode_unlock(gb_inode, 1); -out_unlock_gb_mutex: +out_unlock_gb_inode: inode_unlock(gb_inode); brelse(gb_bh); iput(gb_inode); +out_unlock_tl_inode: + inode_unlock(tl_inode); out: if (context->meta_ac) { From b3d5fd6f82dde8c906dc2a587003a44252ae5eae Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 6 Jun 2025 21:47:56 +0800 Subject: [PATCH 0779/2411] lib/math/gcd: use static key to select implementation at runtime Patch series "Optimize GCD performance on RISC-V by selecting implementation at runtime", v3. The current implementation of gcd() selects between the binary GCD and the odd-even GCD algorithm at compile time, depending on whether CONFIG_CPU_NO_EFFICIENT_FFS is set. On platforms like RISC-V, however, this compile-time decision can be misleading: even when the compiler emits ctz instructions based on the assumption that they are efficient (as is the case when CONFIG_RISCV_ISA_ZBB is enabled), the actual hardware may lack support for the Zbb extension. In such cases, ffs() falls back to a software implementation at runtime, making the binary GCD algorithm significantly slower than the odd-even variant. To address this, we introduce a static key to allow runtime selection between the binary and odd-even GCD implementations. On RISC-V, the kernel now checks for Zbb support during boot. If Zbb is unavailable, the static key is disabled so that gcd() consistently uses the more efficient odd-even algorithm in that scenario. Additionally, to further reduce code size, we select CONFIG_CPU_NO_EFFICIENT_FFS automatically when CONFIG_RISCV_ISA_ZBB is not enabled, avoiding compilation of the unused binary GCD implementation entirely on systems where it would never be executed. This series ensures that the most efficient GCD algorithm is used in practice and avoids compiling unnecessary code based on hardware capabilities and kernel configuration. This patch (of 3): On platforms like RISC-V, the compiler may generate hardware FFS instructions even if the underlying CPU does not actually support them. Currently, the GCD implementation is chosen at compile time based on CONFIG_CPU_NO_EFFICIENT_FFS, which can result in suboptimal behavior on such systems. Introduce a static key, efficient_ffs_key, to enable runtime selection between the binary GCD (using ffs) and the odd-even GCD implementation. This allows the kernel to default to the faster binary GCD when FFS is efficient, while retaining the ability to fall back when needed. Link: https://lkml.kernel.org/r/20250606134758.1308400-1-visitorckw@gmail.com Link: https://lkml.kernel.org/r/20250606134758.1308400-2-visitorckw@gmail.com Co-developed-by: Yu-Chun Lin Signed-off-by: Yu-Chun Lin Signed-off-by: Kuan-Wei Chiu Cc: Albert Ou Cc: Ching-Chun (Jim) Huang Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Alexandre Ghiti Signed-off-by: Andrew Morton --- include/linux/gcd.h | 3 +++ lib/math/gcd.c | 27 +++++++++++++++------------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/include/linux/gcd.h b/include/linux/gcd.h index cb572677fd7f..616e81a7f7e3 100644 --- a/include/linux/gcd.h +++ b/include/linux/gcd.h @@ -3,6 +3,9 @@ #define _GCD_H #include +#include + +DECLARE_STATIC_KEY_TRUE(efficient_ffs_key); unsigned long gcd(unsigned long a, unsigned long b) __attribute_const__; diff --git a/lib/math/gcd.c b/lib/math/gcd.c index e3b042214d1b..62efca6787ae 100644 --- a/lib/math/gcd.c +++ b/lib/math/gcd.c @@ -11,22 +11,16 @@ * has decent hardware division. */ +DEFINE_STATIC_KEY_TRUE(efficient_ffs_key); + #if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) /* If __ffs is available, the even/odd algorithm benchmarks slower. */ -/** - * gcd - calculate and return the greatest common divisor of 2 unsigned longs - * @a: first value - * @b: second value - */ -unsigned long gcd(unsigned long a, unsigned long b) +static unsigned long binary_gcd(unsigned long a, unsigned long b) { unsigned long r = a | b; - if (!a || !b) - return r; - b >>= __ffs(b); if (b == 1) return r & -r; @@ -44,9 +38,15 @@ unsigned long gcd(unsigned long a, unsigned long b) } } -#else +#endif /* If normalization is done by loops, the even/odd algorithm is a win. */ + +/** + * gcd - calculate and return the greatest common divisor of 2 unsigned longs + * @a: first value + * @b: second value + */ unsigned long gcd(unsigned long a, unsigned long b) { unsigned long r = a | b; @@ -54,6 +54,11 @@ unsigned long gcd(unsigned long a, unsigned long b) if (!a || !b) return r; +#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) + if (static_branch_likely(&efficient_ffs_key)) + return binary_gcd(a, b); +#endif + /* Isolate lsbit of r */ r &= -r; @@ -80,6 +85,4 @@ unsigned long gcd(unsigned long a, unsigned long b) } } -#endif - EXPORT_SYMBOL_GPL(gcd); From 26b537edc533058c48f6351569d676703d7d1af3 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 6 Jun 2025 21:47:57 +0800 Subject: [PATCH 0780/2411] riscv: optimize gcd() code size when CONFIG_RISCV_ISA_ZBB is disabled The binary GCD implementation depends on efficient ffs(), which on RISC-V requires hardware support for the Zbb extension. When CONFIG_RISCV_ISA_ZBB is not enabled, the kernel will never use binary GCD, as runtime logic will always fall back to the odd-even implementation. To avoid compiling unused code and reduce code size, select CONFIG_CPU_NO_EFFICIENT_FFS when CONFIG_RISCV_ISA_ZBB is not set. $ ./scripts/bloat-o-meter ./lib/math/gcd.o.old ./lib/math/gcd.o.new add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-274 (-274) Function old new delta gcd 360 86 -274 Total: Before=384, After=110, chg -71.35% Link: https://lkml.kernel.org/r/20250606134758.1308400-3-visitorckw@gmail.com Co-developed-by: Yu-Chun Lin Signed-off-by: Yu-Chun Lin Signed-off-by: Kuan-Wei Chiu Acked-by: Alexandre Ghiti Cc: Albert Ou Cc: Ching-Chun (Jim) Huang Cc: Palmer Dabbelt Cc: Paul Walmsley Signed-off-by: Andrew Morton --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index d71ea0f4466f..1736768426ec 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -97,6 +97,7 @@ config RISCV select CLINT_TIMER if RISCV_M_MODE select CLONE_BACKWARDS select COMMON_CLK + select CPU_NO_EFFICIENT_FFS if !RISCV_ISA_ZBB select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND select EDAC_SUPPORT select FRAME_POINTER if PERF_EVENTS || (FUNCTION_TRACER && !DYNAMIC_FTRACE) From 36e22416872114cae812cdcdd84a5b99ef30b3de Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 6 Jun 2025 21:47:58 +0800 Subject: [PATCH 0781/2411] riscv: optimize gcd() performance on RISC-V without Zbb extension The binary GCD implementation uses FFS (find first set), which benefits from hardware support for the ctz instruction, provided by the Zbb extension on RISC-V. Without Zbb, this results in slower software-emulated behavior. Previously, RISC-V always used the binary GCD, regardless of actual hardware support. This patch improves runtime efficiency by disabling the efficient_ffs_key static branch when Zbb is either not enabled in the kernel (config) or not supported on the executing CPU. This selects the odd-even GCD implementation, which is faster in the absence of efficient FFS. This change ensures the most suitable GCD algorithm is chosen dynamically based on actual hardware capabilities. Link: https://lkml.kernel.org/r/20250606134758.1308400-4-visitorckw@gmail.com Co-developed-by: Yu-Chun Lin Signed-off-by: Yu-Chun Lin Signed-off-by: Kuan-Wei Chiu Acked-by: Alexandre Ghiti Cc: Albert Ou Cc: Ching-Chun (Jim) Huang Cc: Palmer Dabbelt Cc: Paul Walmsley Signed-off-by: Andrew Morton --- arch/riscv/kernel/setup.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 14888e5ea19a..f90cce7a3ace 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include @@ -362,6 +364,9 @@ void __init setup_arch(char **cmdline_p) riscv_user_isa_enable(); riscv_spinlock_init(); + + if (!IS_ENABLED(CONFIG_RISCV_ISA_ZBB) || !riscv_isa_extension_available(NULL, ZBB)) + static_branch_disable(&efficient_ffs_key); } bool arch_cpu_is_hotpluggable(int cpu) From 813b46808822db6838c43e92ba21ce013d23fcdc Mon Sep 17 00:00:00 2001 From: WangYuli Date: Thu, 10 Jul 2025 21:04:12 +0800 Subject: [PATCH 0782/2411] selftests/thermal: remove duplicate sprintf() call in workload_hint_test Remove redundant sprintf() call that was duplicating the same operation of formatting delay_str with argv[1]. Link: https://lkml.kernel.org/r/6338CD0E839B770B+20250710130412.284531-1-wangyuli@uniontech.com Signed-off-by: WangYuli Cc: Guan Wentao Cc: Shuah Khan Signed-off-by: Andrew Morton --- .../selftests/thermal/intel/workload_hint/workload_hint_test.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c index a40097232967..bda006af8b1b 100644 --- a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c +++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c @@ -67,8 +67,6 @@ int main(int argc, char **argv) if (delay < 0) exit(1); - sprintf(delay_str, "%s\n", argv[1]); - sprintf(delay_str, "%s\n", argv[1]); fd = open(WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE, O_RDWR); if (fd < 0) { From 599579e857ab8d8f97409f631090e08018f8343b Mon Sep 17 00:00:00 2001 From: WangYuli Date: Thu, 10 Jul 2025 21:47:51 +0800 Subject: [PATCH 0783/2411] selftests/thermal: remove duplicate newlines in perror calls perror() automatically appends a newline character, so the explicit '\n' in the format strings is redundant and results in duplicate newlines in the output. Remove the redundant '\n' characters from perror() calls in workload_hint_test.c to fix the formatting. Link: https://lkml.kernel.org/r/F482FB1EC020000C+20250710134751.306096-1-wangyuli@uniontech.com Signed-off-by: WangYuli Cc: Guan Wentao Cc: Shuah Khan Signed-off-by: Andrew Morton --- .../intel/workload_hint/workload_hint_test.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c index bda006af8b1b..ba58589a1145 100644 --- a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c +++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c @@ -32,12 +32,12 @@ void workload_hint_exit(int signum) fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR); if (fd < 0) { - perror("Unable to open workload type feature enable file\n"); + perror("Unable to open workload type feature enable file"); exit(1); } if (write(fd, "0\n", 2) < 0) { - perror("Can't disable workload hints\n"); + perror("Can't disable workload hints"); exit(1); } @@ -70,12 +70,12 @@ int main(int argc, char **argv) sprintf(delay_str, "%s\n", argv[1]); fd = open(WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE, O_RDWR); if (fd < 0) { - perror("Unable to open workload notification delay\n"); + perror("Unable to open workload notification delay"); exit(1); } if (write(fd, delay_str, strlen(delay_str)) < 0) { - perror("Can't set delay\n"); + perror("Can't set delay"); exit(1); } @@ -92,12 +92,12 @@ int main(int argc, char **argv) /* Enable feature via sysfs knob */ fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR); if (fd < 0) { - perror("Unable to open workload type feature enable file\n"); + perror("Unable to open workload type feature enable file"); exit(1); } if (write(fd, "1\n", 2) < 0) { - perror("Can't enable workload hints\n"); + perror("Can't enable workload hints"); exit(1); } @@ -108,7 +108,7 @@ int main(int argc, char **argv) while (1) { fd = open(WORKLOAD_TYPE_INDEX_ATTRIBUTE, O_RDONLY); if (fd < 0) { - perror("Unable to open workload type file\n"); + perror("Unable to open workload type file"); exit(1); } From 6b47c9f8ee3960d4b46bda4de63429fdfe468989 Mon Sep 17 00:00:00 2001 From: Wang Yaxin Date: Thu, 10 Jul 2025 13:54:51 +0800 Subject: [PATCH 0784/2411] delaytop: add psi info to show system delay Support showing whole delay of system by reading PSI, just like the first few lines of information output by the top command. the output of delaytop includes both system-wide delay and delay of individual tasks, providing a more comprehensive reflection of system latency status. Use case ======== bash# ./delaytop System Pressure Information: (avg10/avg60/avg300/total) CPU: full: 0.0%/ 0.0%/ 0.0%/0 some: 0.1%/ 0.0%/ 0.0%/14216596 Memory: full: 0.0%/ 0.0%/ 0.0%/34010659 some: 0.0%/ 0.0%/ 0.0%/35406492 IO: full: 0.1%/ 0.0%/ 0.0%/51029453 some: 0.1%/ 0.0%/ 0.0%/55330465 IRQ: full: 0.0%/ 0.0%/ 0.0%/0 Top 20 processes (sorted by CPU delay): PID TGID COMMAND CPU(ms) IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms) --------------------------------------------------------------------------------------------- 32 32 kworker/2:0H-sy 23.65 0.00 0.00 0.00 0.00 0.00 0.00 0.00 497 497 kworker/R-scsi_ 1.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 495 495 kworker/R-scsi_ 1.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00 494 494 scsi_eh_0 1.12 0.00 0.00 0.00 0.00 0.00 0.00 0.00 485 485 kworker/R-ata_s 0.90 0.00 0.00 0.00 0.00 0.00 0.00 0.00 574 574 kworker/R-kdmfl 0.36 0.00 0.00 0.00 0.00 0.00 0.00 0.00 34 34 idle_inject/3 0.33 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1123 1123 nde-netfilter 0.28 0.00 0.00 0.00 0.00 0.00 0.00 0.00 60 60 ksoftirqd/7 0.25 0.00 0.00 0.00 0.00 0.00 0.00 0.00 114 114 kworker/0:2-cgr 0.25 0.00 0.00 0.00 0.00 0.00 0.00 0.00 496 496 scsi_eh_1 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 51 51 cpuhp/6 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1667 1667 atd 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 45 45 cpuhp/5 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1102 1102 nde-backupservi 0.22 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1098 1098 systemsettings 0.21 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1100 1100 audit-monitor 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 53 53 migration/6 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1482 1482 sshd 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 39 39 cpuhp/4 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 Link: https://lkml.kernel.org/r/20250710135451340_5pOgpIFi0M5AE7H44W1D@zte.com.cn Co-developed-by: Fan Yu Signed-off-by: Fan Yu Signed-off-by: Wang Yaxin Signed-off-by: Jiang Kun Cc: Balbir Singh Cc: David Hildenbrand Cc: Peilin He Cc: Qiang Tu Cc: wangyong Cc: xu xin Cc: Yang Yang Cc: Yunkai Zhang Signed-off-by: Andrew Morton --- tools/accounting/delaytop.c | 161 +++++++++++++++++++++++++++++++++--- 1 file changed, 148 insertions(+), 13 deletions(-) diff --git a/tools/accounting/delaytop.c b/tools/accounting/delaytop.c index 23e38f39e97d..cd848af9a856 100644 --- a/tools/accounting/delaytop.c +++ b/tools/accounting/delaytop.c @@ -10,9 +10,9 @@ * individual tasks (PIDs). * * Key features: - * - Collects per-task delay accounting statistics via taskstats. - * - Supports sorting, filtering. - * - Supports both interactive (screen refresh). + * - Collects per-task delay accounting statistics via taskstats. + * - Supports sorting, filtering. + * - Supports both interactive (screen refresh). * * Copyright (C) Fan Yu, ZTE Corp. 2025 * Copyright (C) Wang Yaxin, ZTE Corp. 2025 @@ -43,6 +43,14 @@ #include #include +#define PSI_CPU_SOME "/proc/pressure/cpu" +#define PSI_CPU_FULL "/proc/pressure/cpu" +#define PSI_MEMORY_SOME "/proc/pressure/memory" +#define PSI_MEMORY_FULL "/proc/pressure/memory" +#define PSI_IO_SOME "/proc/pressure/io" +#define PSI_IO_FULL "/proc/pressure/io" +#define PSI_IRQ_FULL "/proc/pressure/irq" + #define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len))) #define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN)) #define NLA_PAYLOAD(len) (len - NLA_HDRLEN) @@ -66,6 +74,24 @@ struct config { char *container_path; /* Path to container cgroup */ }; +/* PSI statistics structure */ +struct psi_stats { + double cpu_some_avg10, cpu_some_avg60, cpu_some_avg300; + unsigned long long cpu_some_total; + double cpu_full_avg10, cpu_full_avg60, cpu_full_avg300; + unsigned long long cpu_full_total; + double memory_some_avg10, memory_some_avg60, memory_some_avg300; + unsigned long long memory_some_total; + double memory_full_avg10, memory_full_avg60, memory_full_avg300; + unsigned long long memory_full_total; + double io_some_avg10, io_some_avg60, io_some_avg300; + unsigned long long io_some_total; + double io_full_avg10, io_full_avg60, io_full_avg300; + unsigned long long io_full_total; + double irq_full_avg10, irq_full_avg60, irq_full_avg300; + unsigned long long irq_full_total; +}; + /* Task delay information structure */ struct task_info { int pid; @@ -100,6 +126,7 @@ struct container_stats { /* Global variables */ static struct config cfg; +static struct psi_stats psi; static struct task_info tasks[MAX_TASKS]; static int task_count; static int running = 1; @@ -130,13 +157,13 @@ static void usage(void) { printf("Usage: delaytop [Options]\n" "Options:\n" - " -h, --help Show this help message and exit\n" - " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n" - " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n" - " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n" - " -o, --once Display once and exit\n" - " -p, --pid=PID Monitor only the specified PID\n" - " -C, --container=PATH Monitor the container at specified cgroup path\n"); + " -h, --help Show this help message and exit\n" + " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n" + " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n" + " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n" + " -o, --once Display once and exit\n" + " -p, --pid=PID Monitor only the specified PID\n" + " -C, --container=PATH Monitor the container at specified cgroup path\n"); exit(0); } @@ -276,7 +303,7 @@ static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, memset(&nladdr, 0, sizeof(nladdr)); nladdr.nl_family = AF_NETLINK; while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr, - sizeof(nladdr))) < buflen) { + sizeof(nladdr))) < buflen) { if (r > 0) { buf += r; buflen -= r; @@ -320,6 +347,89 @@ static int get_family_id(int sd) return id; } +static void read_psi_stats(void) +{ + FILE *fp; + char line[256]; + int ret = 0; + /* Zero all fields */ + memset(&psi, 0, sizeof(psi)); + /* CPU pressure */ + fp = fopen(PSI_CPU_SOME, "r"); + if (fp) { + while (fgets(line, sizeof(line), fp)) { + if (strncmp(line, "some", 4) == 0) { + ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.cpu_some_avg10, &psi.cpu_some_avg60, + &psi.cpu_some_avg300, &psi.cpu_some_total); + if (ret != 4) + fprintf(stderr, "Failed to parse CPU some PSI data\n"); + } else if (strncmp(line, "full", 4) == 0) { + ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.cpu_full_avg10, &psi.cpu_full_avg60, + &psi.cpu_full_avg300, &psi.cpu_full_total); + if (ret != 4) + fprintf(stderr, "Failed to parse CPU full PSI data\n"); + } + } + fclose(fp); + } + /* Memory pressure */ + fp = fopen(PSI_MEMORY_SOME, "r"); + if (fp) { + while (fgets(line, sizeof(line), fp)) { + if (strncmp(line, "some", 4) == 0) { + ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.memory_some_avg10, &psi.memory_some_avg60, + &psi.memory_some_avg300, &psi.memory_some_total); + if (ret != 4) + fprintf(stderr, "Failed to parse Memory some PSI data\n"); + } else if (strncmp(line, "full", 4) == 0) { + ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.memory_full_avg10, &psi.memory_full_avg60, + &psi.memory_full_avg300, &psi.memory_full_total); + } + if (ret != 4) + fprintf(stderr, "Failed to parse Memory full PSI data\n"); + } + fclose(fp); + } + /* IO pressure */ + fp = fopen(PSI_IO_SOME, "r"); + if (fp) { + while (fgets(line, sizeof(line), fp)) { + if (strncmp(line, "some", 4) == 0) { + ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.io_some_avg10, &psi.io_some_avg60, + &psi.io_some_avg300, &psi.io_some_total); + if (ret != 4) + fprintf(stderr, "Failed to parse IO some PSI data\n"); + } else if (strncmp(line, "full", 4) == 0) { + ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.io_full_avg10, &psi.io_full_avg60, + &psi.io_full_avg300, &psi.io_full_total); + if (ret != 4) + fprintf(stderr, "Failed to parse IO full PSI data\n"); + } + } + fclose(fp); + } + /* IRQ pressure (only full) */ + fp = fopen(PSI_IRQ_FULL, "r"); + if (fp) { + while (fgets(line, sizeof(line), fp)) { + if (strncmp(line, "full", 4) == 0) { + ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.irq_full_avg10, &psi.irq_full_avg60, + &psi.irq_full_avg300, &psi.irq_full_total); + if (ret != 4) + fprintf(stderr, "Failed to parse IRQ full PSI data\n"); + } + } + fclose(fp); + } +} + static int read_comm(int pid, char *comm_buf, size_t buf_size) { char path[64]; @@ -549,7 +659,29 @@ static void display_results(void) FILE *out = stdout; fprintf(out, "\033[H\033[J"); + /* PSI output (one-line, no cat style) */ + fprintf(out, "System Pressure Information: "); + fprintf(out, "(avg10/avg60/avg300/total)\n"); + fprintf(out, "CPU:"); + fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.cpu_full_avg10, + psi.cpu_full_avg60, psi.cpu_full_avg300, psi.cpu_full_total); + fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.cpu_some_avg10, + psi.cpu_some_avg60, psi.cpu_some_avg300, psi.cpu_some_total); + fprintf(out, "Memory:"); + fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.memory_full_avg10, + psi.memory_full_avg60, psi.memory_full_avg300, psi.memory_full_total); + fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.memory_some_avg10, + psi.memory_some_avg60, psi.memory_some_avg300, psi.memory_some_total); + + fprintf(out, "IO:"); + fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.io_full_avg10, + psi.io_full_avg60, psi.io_full_avg300, psi.io_full_total); + fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.io_some_avg10, + psi.io_some_avg60, psi.io_some_avg300, psi.io_some_total); + fprintf(out, "IRQ:"); + fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n\n", psi.irq_full_avg10, + psi.irq_full_avg60, psi.irq_full_avg300, psi.irq_full_total); if (cfg.container_path) { fprintf(out, "Container Information (%s):\n", cfg.container_path); fprintf(out, "Processes: running=%d, sleeping=%d, ", @@ -559,8 +691,8 @@ static void display_results(void) container_stats.nr_io_wait); } fprintf(out, "Top %d processes (sorted by CPU delay):\n\n", - cfg.max_processes); - fprintf(out, " PID TGID COMMAND CPU(ms) IO(ms) "); + cfg.max_processes); + fprintf(out, " PID TGID COMMAND CPU(ms) IO(ms) "); fprintf(out, "SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms)\n"); fprintf(out, "-----------------------------------------------"); fprintf(out, "----------------------------------------------\n"); @@ -616,6 +748,9 @@ int main(int argc, char **argv) /* Main loop */ while (running) { + /* Read PSI statistics */ + read_psi_stats(); + /* Get container stats if container path provided */ if (cfg.container_path) get_container_stats(); From 320bf1709a473403840eba60c432a9f9b7d824d5 Mon Sep 17 00:00:00 2001 From: Wang Yaxin Date: Thu, 10 Jul 2025 13:51:41 +0800 Subject: [PATCH 0785/2411] docs: update docs after introducing delaytop The "getdelays" can only display the latency of a single task by specifying a PID, we introduce the "delaytop" with the following capabilities: 1. system view: monitors latency metrics (CPU, I/O, memory, IRQ, etc.) for all system processes 2. supports field-based sorting (e.g., default sort by CPU latency in descending order) 3. dynamic interactive interface: focus on specific processes with --pid; limit displayed entries with --processes 20; control monitoring duration with --iterations; Link: https://lkml.kernel.org/r/2025071013514177028RdjISjqeIOnTCRvGAwy@zte.com.cn Signed-off-by: Fan Yu Signed-off-by: Wang Yaxin Signed-off-by: Jiang Kun Cc: Balbir Singh Cc: David Hildenbrand Cc: Peilin He Cc: Qiang Tu Cc: wangyong Cc: xu xin Cc: Yang Yang Cc: Yunkai Zhang Signed-off-by: Andrew Morton --- Documentation/accounting/delay-accounting.rst | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst index 210c194d4a7b..664950328fb7 100644 --- a/Documentation/accounting/delay-accounting.rst +++ b/Documentation/accounting/delay-accounting.rst @@ -131,3 +131,50 @@ Get IO accounting for pid 1, it works only with -p:: linuxrc: read=65536, write=0, cancelled_write=0 The above command can be used with -v to get more debug information. + +After the system starts, use `delaytop` to get the Top-N high-latency tasks. +this tool supports sorting by CPU latency in descending order by default, +displays the top 20 high-latency tasks by default, and refreshes the latency +data every 2 seconds by default. + +Get Top-N tasks delay, since system boot:: + + bash# ./delaytop + Top 20 processes (sorted by CPU delay): + + PID TGID COMMAND CPU(ms) IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms) + --------------------------------------------------------------------------------------------- + 32 32 kworker/2:0H-sy 23.65 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 497 497 kworker/R-scsi_ 1.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 495 495 kworker/R-scsi_ 1.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 494 494 scsi_eh_0 1.12 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 485 485 kworker/R-ata_s 0.90 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 574 574 kworker/R-kdmfl 0.36 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 34 34 idle_inject/3 0.33 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1123 1123 nde-netfilter 0.28 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 60 60 ksoftirqd/7 0.25 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 114 114 kworker/0:2-cgr 0.25 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 496 496 scsi_eh_1 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 51 51 cpuhp/6 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1667 1667 atd 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 45 45 cpuhp/5 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1102 1102 nde-backupservi 0.22 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1098 1098 systemsettings 0.21 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1100 1100 audit-monitor 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 53 53 migration/6 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1482 1482 sshd 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 39 39 cpuhp/4 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + +Dynamic interactive interface of delaytop:: + + # ./delaytop -p pid + Print delayacct stats + + # ./delaytop -P num + Display the top N tasks + + # ./delaytop -n num + Set delaytop refresh frequency (num times) + + # ./delaytop -d secs + Specify refresh interval as secs From a9ed4422adacce848fd368c3a7076368ead7fc18 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 23 Jun 2025 16:47:25 +0800 Subject: [PATCH 0786/2411] lib/raid6: update recov_rvv.c zero page usage Update lib/raid6/recov_rvv.c, for 1857fcc84744 ("lib/raid6: replace custom zero page with ZERO_PAGE"), per Klara. Link: https://lkml.kernel.org/r/aFkUnXWtxcgOTVkw@gondor.apana.org.au Fixes: 1857fcc84744 ("lib/raid6: replace custom zero page with ZERO_PAGE") Signed-off-by: Herbert Xu Cc: Song Liu Cc: Yu Kuai Cc: Klara Modin Signed-off-by: Andrew Morton --- lib/raid6/recov_rvv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/raid6/recov_rvv.c b/lib/raid6/recov_rvv.c index f29303795ccf..5d54c4b437df 100644 --- a/lib/raid6/recov_rvv.c +++ b/lib/raid6/recov_rvv.c @@ -165,10 +165,10 @@ static void raid6_2data_recov_rvv(int disks, size_t bytes, int faila, * delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -203,7 +203,7 @@ static void raid6_datap_recov_rvv(int disks, size_t bytes, int faila, * Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); From 88fec3526e84123997ecebd6bb6778eb4ce779b7 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 19 Jun 2025 22:11:52 -0700 Subject: [PATCH 0787/2411] apparmor: make sure unix socket labeling is correctly updated. When a unix socket is passed into a different confinement domain make sure its cached mediation labeling is updated to correctly reflect which domains are using the socket. Fixes: c05e705812d1 ("apparmor: add fine grained af_unix mediation") Signed-off-by: John Johansen --- security/apparmor/af_unix.c | 79 +++++++++++++- security/apparmor/file.c | 35 +++++-- security/apparmor/include/label.h | 7 ++ security/apparmor/include/net.h | 5 +- security/apparmor/lsm.c | 165 +++++++++++++++++++++--------- security/apparmor/net.c | 2 +- 6 files changed, 231 insertions(+), 62 deletions(-) diff --git a/security/apparmor/af_unix.c b/security/apparmor/af_unix.c index dc25f1afe819..257648a13bf8 100644 --- a/security/apparmor/af_unix.c +++ b/security/apparmor/af_unix.c @@ -646,6 +646,67 @@ int aa_unix_peer_perm(const struct cred *subj_cred, peer_label); } +/* sk_plabel for comparison only */ +static void update_sk_ctx(struct sock *sk, struct aa_label *label, + struct aa_label *plabel) +{ + struct aa_label *l, *old; + struct aa_sk_ctx *ctx = aa_sock(sk); + bool update_sk; + + rcu_read_lock(); + update_sk = (plabel && + (plabel != rcu_access_pointer(ctx->peer_lastupdate) || + !aa_label_is_subset(plabel, rcu_dereference(ctx->peer)))) || + !__aa_subj_label_is_cached(label, rcu_dereference(ctx->label)); + rcu_read_unlock(); + if (!update_sk) + return; + + spin_lock(&unix_sk(sk)->lock); + old = rcu_dereference_protected(ctx->label, + lockdep_is_held(&unix_sk(sk)->lock)); + l = aa_label_merge(old, label, GFP_ATOMIC); + if (l) { + if (l != old) { + rcu_assign_pointer(ctx->label, l); + aa_put_label(old); + } else + aa_put_label(l); + } + if (plabel && rcu_access_pointer(ctx->peer_lastupdate) != plabel) { + old = rcu_dereference_protected(ctx->peer, lockdep_is_held(&unix_sk(sk)->lock)); + + if (old == plabel) { + rcu_assign_pointer(ctx->peer_lastupdate, plabel); + } else if (aa_label_is_subset(plabel, old)) { + rcu_assign_pointer(ctx->peer_lastupdate, plabel); + rcu_assign_pointer(ctx->peer, aa_get_label(plabel)); + aa_put_label(old); + } /* else race or a subset - don't update */ + } + spin_unlock(&unix_sk(sk)->lock); +} + +static void update_peer_ctx(struct sock *sk, struct aa_sk_ctx *ctx, + struct aa_label *label) +{ + struct aa_label *l, *old; + + spin_lock(&unix_sk(sk)->lock); + old = rcu_dereference_protected(ctx->peer, + lockdep_is_held(&unix_sk(sk)->lock)); + l = aa_label_merge(old, label, GFP_ATOMIC); + if (l) { + if (l != old) { + rcu_assign_pointer(ctx->peer, l); + aa_put_label(old); + } else + aa_put_label(l); + } + spin_unlock(&unix_sk(sk)->lock); +} + /* This fn is only checked if something has changed in the security * boundaries. Otherwise cached info off file is sufficient */ @@ -655,6 +716,7 @@ int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, struct socket *sock = (struct socket *) file->private_data; struct sockaddr_un *addr, *peer_addr; int addrlen, peer_addrlen; + struct aa_label *plabel = NULL; struct sock *peer_sk = NULL; u32 sk_req = request & ~NET_PEER_MASK; struct path path; @@ -666,7 +728,6 @@ int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, AA_BUG(!sock->sk); AA_BUG(sock->sk->sk_family != PF_UNIX); - /* TODO: update sock label with new task label */ /* investigate only using lock via unix_peer_get() * addr only needs the memory barrier, but need to investigate * path @@ -701,8 +762,12 @@ int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, unix_fs_perm(op, request, subj_cred, label, is_unix_fs(peer_sk) ? &peer_path : NULL)); } else if (!is_sk_fs) { + struct aa_label *plabel; struct aa_sk_ctx *pctx = aa_sock(peer_sk); + rcu_read_lock(); + plabel = aa_get_label_rcu(&pctx->label); + rcu_read_unlock(); /* no fs check of aa_unix_peer_perm because conditions above * ensure they will never be done */ @@ -713,18 +778,26 @@ int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, peer_addr, peer_addrlen, is_unix_fs(peer_sk) ? &peer_path : NULL, - pctx->label), - unix_peer_perm(file->f_cred, pctx->label, op, + plabel), + unix_peer_perm(file->f_cred, plabel, op, MAY_READ | MAY_WRITE, peer_sk, is_unix_fs(peer_sk) ? &peer_path : NULL, addr, addrlen, is_sk_fs ? &path : NULL, label))); + if (!error && !__aa_subj_label_is_cached(plabel, label)) + update_peer_ctx(peer_sk, pctx, label); } sock_put(peer_sk); out: + /* update peer cache to latest successful perm check */ + if (error == 0) + update_sk_ctx(sock->sk, label, plabel); + aa_put_label(plabel); + return error; } + diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 5c984792cbf0..65e1d29af792 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -561,19 +561,35 @@ static int __file_sock_perm(const char *op, const struct cred *subj_cred, return error; } -/* wrapper fn to indicate semantics of the check */ -static bool __subj_label_is_cached(struct aa_label *subj_label, - struct aa_label *obj_label) -{ - return aa_label_is_subset(obj_label, subj_label); -} - /* for now separate fn to indicate semantics of the check */ static bool __file_is_delegated(struct aa_label *obj_label) { return unconfined(obj_label); } +static bool __unix_needs_revalidation(struct file *file, struct aa_label *label, + u32 request) +{ + struct socket *sock = (struct socket *) file->private_data; + + lockdep_assert_in_rcu_read_lock(); + + if (!S_ISSOCK(file_inode(file)->i_mode)) + return false; + if (request & NET_PEER_MASK) + return false; + if (sock->sk->sk_family == PF_UNIX) { + struct aa_sk_ctx *ctx = aa_sock(sock->sk); + + if (rcu_access_pointer(ctx->peer) != + rcu_access_pointer(ctx->peer_lastupdate)) + return true; + return !__aa_subj_label_is_cached(rcu_dereference(ctx->label), + label); + } + return false; +} + /** * aa_file_perm - do permission revalidation check & audit for @file * @op: operation being checked @@ -612,14 +628,15 @@ int aa_file_perm(const char *op, const struct cred *subj_cred, */ denied = request & ~fctx->allow; if (unconfined(label) || __file_is_delegated(flabel) || - (!denied && __subj_label_is_cached(label, flabel))) { + __unix_needs_revalidation(file, label, request) || + (!denied && __aa_subj_label_is_cached(label, flabel))) { rcu_read_unlock(); goto done; } + /* slow path - revalidate access */ flabel = aa_get_newest_label(flabel); rcu_read_unlock(); - /* TODO: label cross check */ if (file->f_path.mnt && path_mediated_fs(file->f_path.dentry)) error = __file_path_perm(op, subj_cred, label, flabel, file, diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h index 5e7d199c15e2..9aa2e364cca9 100644 --- a/security/apparmor/include/label.h +++ b/security/apparmor/include/label.h @@ -415,6 +415,13 @@ static inline void aa_put_label(struct aa_label *l) kref_put(&l->count, aa_label_kref); } +/* wrapper fn to indicate semantics of the check */ +static inline bool __aa_subj_label_is_cached(struct aa_label *subj_label, + struct aa_label *obj_label) +{ + return aa_label_is_subset(obj_label, subj_label); +} + struct aa_proxy *aa_alloc_proxy(struct aa_label *l, gfp_t gfp); void aa_proxy_kref(struct kref *kref); diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h index 5089e937d550..0d0b0ce42723 100644 --- a/security/apparmor/include/net.h +++ b/security/apparmor/include/net.h @@ -47,8 +47,9 @@ #define NET_PEER_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CONNECT | \ AA_MAY_ACCEPT) struct aa_sk_ctx { - struct aa_label *label; - struct aa_label *peer; + struct aa_label __rcu *label; + struct aa_label __rcu *peer; + struct aa_label __rcu *peer_lastupdate; /* ptr cmp only, no deref */ }; static inline struct aa_sk_ctx *aa_sock(const struct sock *sk) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 0b53ac1c2d70..0640a379a518 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -508,7 +508,6 @@ static int apparmor_file_alloc_security(struct file *file) struct aa_file_ctx *ctx = file_ctx(file); struct aa_label *label = begin_current_label_crit_section(); - spin_lock_init(&ctx->lock); rcu_assign_pointer(ctx->label, aa_get_label(label)); end_current_label_crit_section(label); return 0; @@ -1076,12 +1075,29 @@ static int apparmor_userns_create(const struct cred *cred) return error; } +static int apparmor_sk_alloc_security(struct sock *sk, int family, gfp_t gfp) +{ + struct aa_sk_ctx *ctx = aa_sock(sk); + struct aa_label *label; + bool needput; + + label = __begin_current_label_crit_section(&needput); + //spin_lock_init(&ctx->lock); + rcu_assign_pointer(ctx->label, aa_get_label(label)); + rcu_assign_pointer(ctx->peer, NULL); + rcu_assign_pointer(ctx->peer_lastupdate, NULL); + __end_current_label_crit_section(label, needput); + return 0; +} + static void apparmor_sk_free_security(struct sock *sk) { struct aa_sk_ctx *ctx = aa_sock(sk); - aa_put_label(ctx->label); - aa_put_label(ctx->peer); + /* dead these won't be updated any more */ + aa_put_label(rcu_dereference_protected(ctx->label, true)); + aa_put_label(rcu_dereference_protected(ctx->peer, true)); + aa_put_label(rcu_dereference_protected(ctx->peer_lastupdate, true)); } /** @@ -1095,13 +1111,22 @@ static void apparmor_sk_clone_security(const struct sock *sk, struct aa_sk_ctx *ctx = aa_sock(sk); struct aa_sk_ctx *new = aa_sock(newsk); - if (new->label) - aa_put_label(new->label); - new->label = aa_get_label(ctx->label); + /* not actually in use yet */ + if (rcu_access_pointer(ctx->label) != rcu_access_pointer(new->label)) { + aa_put_label(rcu_dereference_protected(new->label, true)); + rcu_assign_pointer(new->label, aa_get_label_rcu(&ctx->label)); + } - if (new->peer) - aa_put_label(new->peer); - new->peer = aa_get_label(ctx->peer); + if (rcu_access_pointer(ctx->peer) != rcu_access_pointer(new->peer)) { + aa_put_label(rcu_dereference_protected(new->peer, true)); + rcu_assign_pointer(new->peer, aa_get_label_rcu(&ctx->peer)); + } + + if (rcu_access_pointer(ctx->peer_lastupdate) != rcu_access_pointer(new->peer_lastupdate)) { + aa_put_label(rcu_dereference_protected(new->peer_lastupdate, true)); + rcu_assign_pointer(new->peer_lastupdate, + aa_get_label_rcu(&ctx->peer_lastupdate)); + } } static int unix_connect_perm(const struct cred *cred, struct aa_label *label, @@ -1112,27 +1137,47 @@ static int unix_connect_perm(const struct cred *cred, struct aa_label *label, error = aa_unix_peer_perm(cred, label, OP_CONNECT, (AA_MAY_CONNECT | AA_MAY_SEND | AA_MAY_RECEIVE), - sk, peer_sk, peer_ctx->label); + sk, peer_sk, + rcu_dereference_protected(peer_ctx->label, + lockdep_is_held(&unix_sk(peer_sk)->lock))); if (!is_unix_fs(peer_sk)) { last_error(error, aa_unix_peer_perm(cred, - peer_ctx->label, OP_CONNECT, + rcu_dereference_protected(peer_ctx->label, + lockdep_is_held(&unix_sk(peer_sk)->lock)), + OP_CONNECT, (AA_MAY_ACCEPT | AA_MAY_SEND | AA_MAY_RECEIVE), - peer_sk, sk, label)); + peer_sk, sk, label)); } return error; } +/* lockdep check in unix_connect_perm - push sks here to check */ static void unix_connect_peers(struct aa_sk_ctx *sk_ctx, struct aa_sk_ctx *peer_ctx) { /* Cross reference the peer labels for SO_PEERSEC */ - aa_put_label(peer_ctx->peer); - aa_put_label(sk_ctx->peer); + struct aa_label *label = rcu_dereference_protected(sk_ctx->label, true); - peer_ctx->peer = aa_get_label(sk_ctx->label); - sk_ctx->peer = aa_get_label(peer_ctx->label); + aa_get_label(label); + aa_put_label(rcu_dereference_protected(peer_ctx->peer, + true)); + rcu_assign_pointer(peer_ctx->peer, label); /* transfer cnt */ + + label = aa_get_label(rcu_dereference_protected(peer_ctx->label, + true)); + //spin_unlock(&peer_ctx->lock); + + //spin_lock(&sk_ctx->lock); + aa_put_label(rcu_dereference_protected(sk_ctx->peer, + true)); + aa_put_label(rcu_dereference_protected(sk_ctx->peer_lastupdate, + true)); + + rcu_assign_pointer(sk_ctx->peer, aa_get_label(label)); + rcu_assign_pointer(sk_ctx->peer_lastupdate, label); /* transfer cnt */ + //spin_unlock(&sk_ctx->lock); } /** @@ -1158,8 +1203,10 @@ static int apparmor_unix_stream_connect(struct sock *sk, struct sock *peer_sk, return error; /* newsk doesn't go through post_create */ - AA_BUG(new_ctx->label); - new_ctx->label = aa_get_label(peer_ctx->label); + AA_BUG(rcu_access_pointer(new_ctx->label)); + rcu_assign_pointer(new_ctx->label, + aa_get_label(rcu_dereference_protected(peer_ctx->label, + true))); /* Cross reference the peer labels for SO_PEERSEC */ unix_connect_peers(sk_ctx, new_ctx); @@ -1183,12 +1230,15 @@ static int apparmor_unix_may_send(struct socket *sock, struct socket *peer) label = __begin_current_label_crit_section(&needput); error = xcheck(aa_unix_peer_perm(current_cred(), - label, OP_SENDMSG, AA_MAY_SEND, - sock->sk, peer->sk, peer_ctx->label), + label, OP_SENDMSG, AA_MAY_SEND, + sock->sk, peer->sk, + rcu_dereference_protected(peer_ctx->label, + true)), aa_unix_peer_perm(peer->file ? peer->file->f_cred : NULL, - peer_ctx->label, OP_SENDMSG, - AA_MAY_RECEIVE, - peer->sk, sock->sk, label)); + rcu_dereference_protected(peer_ctx->label, + true), + OP_SENDMSG, AA_MAY_RECEIVE, peer->sk, + sock->sk, label)); __end_current_label_crit_section(label, needput); return error; @@ -1246,8 +1296,9 @@ static int apparmor_socket_post_create(struct socket *sock, int family, if (sock->sk) { struct aa_sk_ctx *ctx = aa_sock(sock->sk); - aa_put_label(ctx->label); - ctx->label = aa_get_label(label); + /* still not live */ + aa_put_label(rcu_dereference_protected(ctx->label, true)); + rcu_assign_pointer(ctx->label, aa_get_label(label)); } aa_put_label(label); @@ -1260,23 +1311,27 @@ static int apparmor_socket_socketpair(struct socket *socka, struct aa_sk_ctx *a_ctx = aa_sock(socka->sk); struct aa_sk_ctx *b_ctx = aa_sock(sockb->sk); struct aa_label *label; - int error = 0; - - aa_put_label(a_ctx->label); - aa_put_label(b_ctx->label); + /* socks not live yet - initial values set in sk_alloc */ label = begin_current_label_crit_section(); - a_ctx->label = aa_get_label(label); - b_ctx->label = aa_get_label(label); + if (rcu_access_pointer(a_ctx->label) != label) { + AA_BUG("a_ctx != label"); + aa_put_label(rcu_dereference_protected(a_ctx->label, true)); + rcu_assign_pointer(a_ctx->label, aa_get_label(label)); + } + if (rcu_access_pointer(b_ctx->label) != label) { + AA_BUG("b_ctx != label"); + aa_put_label(rcu_dereference_protected(b_ctx->label, true)); + rcu_assign_pointer(b_ctx->label, aa_get_label(label)); + } if (socka->sk->sk_family == PF_UNIX) { /* unix socket pairs by-pass unix_stream_connect */ - if (!error) - unix_connect_peers(a_ctx, b_ctx); + unix_connect_peers(a_ctx, b_ctx); } end_current_label_crit_section(label); - return error; + return 0; } /** @@ -1430,6 +1485,7 @@ static int apparmor_socket_shutdown(struct socket *sock, int how) static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct aa_sk_ctx *ctx = aa_sock(sk); + int error; if (!skb->secmark) return 0; @@ -1438,11 +1494,15 @@ static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) * If reach here before socket_post_create hook is called, in which * case label is null, drop the packet. */ - if (!ctx->label) + if (!rcu_access_pointer(ctx->label)) return -EACCES; - return apparmor_secmark_check(ctx->label, OP_RECVMSG, AA_MAY_RECEIVE, - skb->secmark, sk); + rcu_read_lock(); + error = apparmor_secmark_check(rcu_dereference(ctx->label), OP_RECVMSG, + AA_MAY_RECEIVE, skb->secmark, sk); + rcu_read_unlock(); + + return error; } #endif @@ -1452,8 +1512,8 @@ static struct aa_label *sk_peer_get_label(struct sock *sk) struct aa_sk_ctx *ctx = aa_sock(sk); struct aa_label *label = ERR_PTR(-ENOPROTOOPT); - if (ctx->peer) - return aa_get_label(ctx->peer); + if (rcu_access_pointer(ctx->peer)) + return aa_get_label_rcu(&ctx->peer); if (sk->sk_family != PF_UNIX) return ERR_PTR(-ENOPROTOOPT); @@ -1480,12 +1540,12 @@ static int apparmor_socket_getpeersec_stream(struct socket *sock, struct aa_label *label; struct aa_label *peer; - label = begin_current_label_crit_section(); peer = sk_peer_get_label(sock->sk); if (IS_ERR(peer)) { error = PTR_ERR(peer); goto done; } + label = begin_current_label_crit_section(); slen = aa_label_asxprint(&name, labels_ns(label), peer, FLAG_SHOW_MODE | FLAG_VIEW_SUBNS | FLAG_HIDDEN_UNCONFINED, GFP_KERNEL); @@ -1506,9 +1566,9 @@ static int apparmor_socket_getpeersec_stream(struct socket *sock, error = -EFAULT; done_put: + end_current_label_crit_section(label); aa_put_label(peer); done: - end_current_label_crit_section(label); kfree(name); return error; } @@ -1544,8 +1604,9 @@ static void apparmor_sock_graft(struct sock *sk, struct socket *parent) { struct aa_sk_ctx *ctx = aa_sock(sk); - if (!ctx->label) - ctx->label = aa_get_current_label(); + /* setup - not live */ + if (!rcu_access_pointer(ctx->label)) + rcu_assign_pointer(ctx->label, aa_get_current_label()); } #ifdef CONFIG_NETWORK_SECMARK @@ -1553,12 +1614,17 @@ static int apparmor_inet_conn_request(const struct sock *sk, struct sk_buff *skb struct request_sock *req) { struct aa_sk_ctx *ctx = aa_sock(sk); + int error; if (!skb->secmark) return 0; - return apparmor_secmark_check(ctx->label, OP_CONNECT, AA_MAY_CONNECT, - skb->secmark, sk); + rcu_read_lock(); + error = apparmor_secmark_check(rcu_dereference(ctx->label), OP_CONNECT, + AA_MAY_CONNECT, skb->secmark, sk); + rcu_read_unlock(); + + return error; } #endif @@ -1615,6 +1681,7 @@ static struct security_hook_list apparmor_hooks[] __ro_after_init = { LSM_HOOK_INIT(getprocattr, apparmor_getprocattr), LSM_HOOK_INIT(setprocattr, apparmor_setprocattr), + LSM_HOOK_INIT(sk_alloc_security, apparmor_sk_alloc_security), LSM_HOOK_INIT(sk_free_security, apparmor_sk_free_security), LSM_HOOK_INIT(sk_clone_security, apparmor_sk_clone_security), @@ -2266,6 +2333,7 @@ static unsigned int apparmor_ip_postroute(void *priv, { struct aa_sk_ctx *ctx; struct sock *sk; + int error; if (!skb->secmark) return NF_ACCEPT; @@ -2275,8 +2343,11 @@ static unsigned int apparmor_ip_postroute(void *priv, return NF_ACCEPT; ctx = aa_sock(sk); - if (!apparmor_secmark_check(ctx->label, OP_SENDMSG, AA_MAY_SEND, - skb->secmark, sk)) + rcu_read_lock(); + error = apparmor_secmark_check(rcu_dereference(ctx->label), OP_SENDMSG, + AA_MAY_SEND, skb->secmark, sk); + rcu_read_unlock(); + if (!error) return NF_ACCEPT; return NF_DROP_ERR(-ECONNREFUSED); diff --git a/security/apparmor/net.c b/security/apparmor/net.c index 2da554cc3a35..7382069efd7d 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -292,7 +292,7 @@ static int aa_label_sk_perm(const struct cred *subj_cred, AA_BUG(!label); AA_BUG(!sk); - if (ctx->label != kernel_t && !unconfined(label)) { + if (rcu_access_pointer(ctx->label) != kernel_t && !unconfined(label)) { struct aa_profile *profile; DEFINE_AUDIT_SK(ad, op, subj_cred, sk); From c5bf96d20fd787e4909b755de4705d52f3458836 Mon Sep 17 00:00:00 2001 From: Gabriel Totev Date: Wed, 16 Apr 2025 18:42:08 -0400 Subject: [PATCH 0788/2411] apparmor: shift ouid when mediating hard links in userns When using AppArmor profiles inside an unprivileged container, the link operation observes an unshifted ouid. (tested with LXD and Incus) For example, root inside container and uid 1000000 outside, with `owner /root/link l,` profile entry for ln: /root$ touch chain && ln chain link ==> dmesg apparmor="DENIED" operation="link" class="file" namespace="root//lxd-feet_" profile="linkit" name="/root/link" pid=1655 comm="ln" requested_mask="l" denied_mask="l" fsuid=1000000 ouid=0 [<== should be 1000000] target="/root/chain" Fix by mapping inode uid of old_dentry in aa_path_link() rather than using it directly, similarly to how it's mapped in __file_path_perm() later in the file. Signed-off-by: Gabriel Totev Signed-off-by: John Johansen --- security/apparmor/file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 65e1d29af792..5504059d6101 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -430,9 +430,11 @@ int aa_path_link(const struct cred *subj_cred, { struct path link = { .mnt = new_dir->mnt, .dentry = new_dentry }; struct path target = { .mnt = new_dir->mnt, .dentry = old_dentry }; + struct inode *inode = d_backing_inode(old_dentry); + vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_idmap(target.mnt), inode); struct path_cond cond = { - d_backing_inode(old_dentry)->i_uid, - d_backing_inode(old_dentry)->i_mode + .uid = vfsuid_into_kuid(vfsuid), + .mode = inode->i_mode, }; char *buffer = NULL, *buffer2 = NULL; struct aa_profile *profile; From 3fa0af4cc8a31d4139ee85a7b0e3d9b4f37b3093 Mon Sep 17 00:00:00 2001 From: Gabriel Totev Date: Wed, 16 Apr 2025 18:42:09 -0400 Subject: [PATCH 0789/2411] apparmor: shift uid when mediating af_unix in userns Avoid unshifted ouids for socket file operations as observed when using AppArmor profiles in unprivileged containers with LXD or Incus. For example, root inside container and uid 1000000 outside, with `owner /root/sock rw,` profile entry for nc: /root$ nc -lkU sock & nc -U sock ==> dmesg apparmor="DENIED" operation="connect" class="file" namespace="root//lxd-podia_" profile="sockit" name="/root/sock" pid=3924 comm="nc" requested_mask="wr" denied_mask="wr" fsuid=1000000 ouid=0 [<== should be 1000000] Fix by performing uid mapping as per common_perm_cond() in lsm.c Signed-off-by: Gabriel Totev Fixes: c05e705812d1 ("apparmor: add fine grained af_unix mediation") Signed-off-by: John Johansen --- security/apparmor/af_unix.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/security/apparmor/af_unix.c b/security/apparmor/af_unix.c index 257648a13bf8..c4e722605fcd 100644 --- a/security/apparmor/af_unix.c +++ b/security/apparmor/af_unix.c @@ -12,6 +12,7 @@ * License. */ +#include #include #include "include/audit.h" @@ -44,8 +45,11 @@ static int unix_fs_perm(const char *op, u32 mask, const struct cred *subj_cred, */ if (path->dentry) { /* the sunpath may not be valid for this ns so use the path */ - struct path_cond cond = { path->dentry->d_inode->i_uid, - path->dentry->d_inode->i_mode + struct inode *inode = path->dentry->d_inode; + vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_idmap(path->mnt), inode); + struct path_cond cond = { + .uid = vfsuid_into_kuid(vfsuid), + .mode = inode->i_mode, }; return aa_path_perm(op, subj_cred, label, path, From c567de2c4f5fe6e079672e074e1bc6122bf7e444 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 31 May 2025 17:08:21 +0200 Subject: [PATCH 0790/2411] apparmor: Fix 8-byte alignment for initial dfa blob streams The dfa blob stream for the aa_dfa_unpack() function is expected to be aligned on a 8 byte boundary. The static nulldfa_src[] and stacksplitdfa_src[] arrays store the initial apparmor dfa blob streams, but since they are declared as an array-of-chars the compiler and linker will only ensure a "char" (1-byte) alignment. Add an __aligned(8) annotation to the arrays to tell the linker to always align them on a 8-byte boundary. This avoids runtime warnings at startup on alignment-sensitive platforms like parisc such as: Kernel: unaligned access to 0x7f2a584a in aa_dfa_unpack+0x124/0x788 (iir 0xca0109f) Kernel: unaligned access to 0x7f2a584e in aa_dfa_unpack+0x210/0x788 (iir 0xca8109c) Kernel: unaligned access to 0x7f2a586a in aa_dfa_unpack+0x278/0x788 (iir 0xcb01090) Signed-off-by: Helge Deller Cc: stable@vger.kernel.org Fixes: 98b824ff8984 ("apparmor: refcount the pdb") Signed-off-by: John Johansen --- security/apparmor/lsm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 0640a379a518..d3da9db244b0 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -2404,12 +2404,12 @@ static int __init apparmor_nf_ip_init(void) __initcall(apparmor_nf_ip_init); #endif -static char nulldfa_src[] = { +static char nulldfa_src[] __aligned(8) = { #include "nulldfa.in" }; static struct aa_dfa *nulldfa; -static char stacksplitdfa_src[] = { +static char stacksplitdfa_src[] __aligned(8) = { #include "stacksplitdfa.in" }; struct aa_dfa *stacksplitdfa; From c68804199dd9d63868497a27b5da3c3cd15356db Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 31 May 2025 17:08:22 +0200 Subject: [PATCH 0791/2411] apparmor: Fix unaligned memory accesses in KUnit test The testcase triggers some unnecessary unaligned memory accesses on the parisc architecture: Kernel: unaligned access to 0x12f28e27 in policy_unpack_test_init+0x180/0x374 (iir 0x0cdc1280) Kernel: unaligned access to 0x12f28e67 in policy_unpack_test_init+0x270/0x374 (iir 0x64dc00ce) Use the existing helper functions put_unaligned_le32() and put_unaligned_le16() to avoid such warnings on architectures which prefer aligned memory accesses. Signed-off-by: Helge Deller Fixes: 98c0cc48e27e ("apparmor: fix policy_unpack_test on big endian systems") Signed-off-by: John Johansen --- security/apparmor/policy_unpack_test.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/security/apparmor/policy_unpack_test.c b/security/apparmor/policy_unpack_test.c index f070902da8fc..a7ac0ccc6cfe 100644 --- a/security/apparmor/policy_unpack_test.c +++ b/security/apparmor/policy_unpack_test.c @@ -9,6 +9,8 @@ #include "include/policy.h" #include "include/policy_unpack.h" +#include + #define TEST_STRING_NAME "TEST_STRING" #define TEST_STRING_DATA "testing" #define TEST_STRING_BUF_OFFSET \ @@ -80,7 +82,7 @@ static struct aa_ext *build_aa_ext_struct(struct policy_unpack_fixture *puf, *(buf + 1) = strlen(TEST_U32_NAME) + 1; strscpy(buf + 3, TEST_U32_NAME, e->end - (void *)(buf + 3)); *(buf + 3 + strlen(TEST_U32_NAME) + 1) = AA_U32; - *((__le32 *)(buf + 3 + strlen(TEST_U32_NAME) + 2)) = cpu_to_le32(TEST_U32_DATA); + put_unaligned_le32(TEST_U32_DATA, buf + 3 + strlen(TEST_U32_NAME) + 2); buf = e->start + TEST_NAMED_U64_BUF_OFFSET; *buf = AA_NAME; @@ -103,7 +105,7 @@ static struct aa_ext *build_aa_ext_struct(struct policy_unpack_fixture *puf, *(buf + 1) = strlen(TEST_ARRAY_NAME) + 1; strscpy(buf + 3, TEST_ARRAY_NAME, e->end - (void *)(buf + 3)); *(buf + 3 + strlen(TEST_ARRAY_NAME) + 1) = AA_ARRAY; - *((__le16 *)(buf + 3 + strlen(TEST_ARRAY_NAME) + 2)) = cpu_to_le16(TEST_ARRAY_SIZE); + put_unaligned_le16(TEST_ARRAY_SIZE, buf + 3 + strlen(TEST_ARRAY_NAME) + 2); return e; } From da0edababafa444e638a0be6dd2feef0a9e529e2 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 20 Jun 2025 15:05:01 -0700 Subject: [PATCH 0792/2411] apparmor: fix kernel doc warnings for kernel test robot Fix kernel doc warnings for the functions - apparmor_socket_bind - apparmor_unix_may_send - apparmor_unix_stream_connect - val_mask_to_str Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202506070127.B1bc3da4-lkp@intel.com/ Signed-off-by: John Johansen --- security/apparmor/lib.c | 4 ++-- security/apparmor/lsm.c | 10 ++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index f51e79cc36d4..7d43f6a62404 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -84,8 +84,8 @@ int aa_parse_debug_params(const char *str) /** * val_mask_to_str - convert a perm mask to its short string * @str: character buffer to store string in (at least 10 characters) - * @str_size: size of the @str buffer - * @chrs: NUL-terminated character buffer of permission characters + * @size: size of the @str buffer + * @table: NUL-terminated character buffer of permission characters * @mask: permission mask to convert */ static int val_mask_to_str(char *str, size_t size, diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index d3da9db244b0..09fe237e5324 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1182,7 +1182,9 @@ static void unix_connect_peers(struct aa_sk_ctx *sk_ctx, /** * apparmor_unix_stream_connect - check perms before making unix domain conn - * + * @sk: sk attempting to connect + * @peer_sk: sk that is accepting the connection + * @newsk: new sk created for this connection * peer is locked when this hook is called */ static int apparmor_unix_stream_connect(struct sock *sk, struct sock *peer_sk, @@ -1216,9 +1218,10 @@ static int apparmor_unix_stream_connect(struct sock *sk, struct sock *peer_sk, /** * apparmor_unix_may_send - check perms before conn or sending unix dgrams + * @sock: socket sending the message + * @peer: socket message is being send to * * sock and peer are locked when this hook is called - * * called by: dgram_connect peer setup but path not copied to newsk */ static int apparmor_unix_may_send(struct socket *sock, struct socket *peer) @@ -1336,6 +1339,9 @@ static int apparmor_socket_socketpair(struct socket *socka, /** * apparmor_socket_bind - check perms before bind addr to socket + * @sock: socket to bind the address to + * @address: address that is being bound + * @addrlen: length of @address */ static int apparmor_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) From 4ce7d3cf5ad846a8843f8afc78de2a8309f74f12 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Mon, 23 Jun 2025 14:58:00 -0700 Subject: [PATCH 0793/2411] apparmor: remove redundant perms.allow MAY_EXEC bitflag set This section of profile_transition that occurs after x_to_label only happens if perms.allow already has the MAY_EXEC bit set, so we don't need to set it again. Fixes: 16916b17b4f8 ("apparmor: force auditing of conflicting attachment execs from confined") Signed-off-by: Ryan Lee Signed-off-by: John Johansen --- security/apparmor/domain.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index f9370a63a83c..d689597f253b 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -734,10 +734,8 @@ static struct aa_label *profile_transition(const struct cred *subj_cred, * we don't need to care about clobbering it */ if (info == CONFLICTING_ATTACH_STR_IX - || info == CONFLICTING_ATTACH_STR_UX) { + || info == CONFLICTING_ATTACH_STR_UX) perms.audit |= MAY_EXEC; - perms.allow |= MAY_EXEC; - } /* hack ix fallback - improve how this is detected */ goto audit; } else if (!new) { From f9c9dce01e9640d94a37304bddc97b738ee4ac35 Mon Sep 17 00:00:00 2001 From: Peng Jiang Date: Mon, 23 Jun 2025 14:41:11 +0800 Subject: [PATCH 0794/2411] apparmor: fix documentation mismatches in val_mask_to_str and socket functions This patch fixes kernel-doc warnings: 1. val_mask_to_str: - Added missing descriptions for `size` and `table` parameters. - Removed outdated str_size and chrs references. 2. Socket Functions: - Makes non-null requirements clear for socket/address args. - Standardizes return values per kernel conventions. - Adds Unix domain socket protocol details. These changes silence doc validation warnings and improve accuracy for AppArmor LSM docs. Signed-off-by: Peng Jiang Signed-off-by: John Johansen --- security/apparmor/lib.c | 2 +- security/apparmor/lsm.c | 24 ++++++++++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 7d43f6a62404..82dbb97ad406 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -85,7 +85,7 @@ int aa_parse_debug_params(const char *str) * val_mask_to_str - convert a perm mask to its short string * @str: character buffer to store string in (at least 10 characters) * @size: size of the @str buffer - * @table: NUL-terminated character buffer of permission characters + * @table: NUL-terminated character buffer of permission characters (NOT NULL) * @mask: permission mask to convert */ static int val_mask_to_str(char *str, size_t size, diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 09fe237e5324..97f0f25a3cfa 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1186,6 +1186,10 @@ static void unix_connect_peers(struct aa_sk_ctx *sk_ctx, * @peer_sk: sk that is accepting the connection * @newsk: new sk created for this connection * peer is locked when this hook is called + * + * Return: + * 0 if connection is permitted + * error code on denial or failure */ static int apparmor_unix_stream_connect(struct sock *sk, struct sock *peer_sk, struct sock *newsk) @@ -1221,8 +1225,16 @@ static int apparmor_unix_stream_connect(struct sock *sk, struct sock *peer_sk, * @sock: socket sending the message * @peer: socket message is being send to * + * Performs bidirectional permission checks for Unix domain socket communication: + * 1. Verifies sender has AA_MAY_SEND to target socket + * 2. Verifies receiver has AA_MAY_RECEIVE from source socket + * * sock and peer are locked when this hook is called * called by: dgram_connect peer setup but path not copied to newsk + * + * Return: + * 0 if transmission is permitted + * error code on denial or failure */ static int apparmor_unix_may_send(struct socket *sock, struct socket *peer) { @@ -1339,9 +1351,17 @@ static int apparmor_socket_socketpair(struct socket *socka, /** * apparmor_socket_bind - check perms before bind addr to socket - * @sock: socket to bind the address to - * @address: address that is being bound + * @sock: socket to bind the address to (must be non-NULL) + * @address: address that is being bound (must be non-NULL) * @addrlen: length of @address + * + * Performs security checks before allowing a socket to bind to an address. + * Handles Unix domain sockets specially through aa_unix_bind_perm(). + * For other socket families, uses generic permission check via aa_sk_perm(). + * + * Return: + * 0 if binding is permitted + * error code on denial or invalid parameters */ static int apparmor_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) From 9afdc6abb007d5a86f54e9f10870ac1468155ca5 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 17 Feb 2025 01:46:37 -0800 Subject: [PATCH 0795/2411] apparmor: transition from a list of rules to a vector of rules The set of rules on a profile is not dynamically extended, instead if a new ruleset is needed a new version of the profile is created. This allows us to use a vector of rules instead of a list, slightly reducing memory usage and simplifying the code. Signed-off-by: John Johansen --- security/apparmor/af_unix.c | 22 +++++---------- security/apparmor/apparmorfs.c | 3 +- security/apparmor/capability.c | 9 ++---- security/apparmor/domain.c | 23 +++++---------- security/apparmor/file.c | 6 ++-- security/apparmor/include/label.h | 20 +++++++++++-- security/apparmor/include/policy.h | 16 ++--------- security/apparmor/ipc.c | 3 +- security/apparmor/lsm.c | 5 ++-- security/apparmor/mount.c | 12 +++----- security/apparmor/net.c | 6 ++-- security/apparmor/policy.c | 45 +++++++++++++++++------------- security/apparmor/policy_unpack.c | 6 ++-- security/apparmor/resource.c | 11 ++------ security/apparmor/task.c | 11 +++----- 15 files changed, 85 insertions(+), 113 deletions(-) diff --git a/security/apparmor/af_unix.c b/security/apparmor/af_unix.c index c4e722605fcd..9129766d1e9c 100644 --- a/security/apparmor/af_unix.c +++ b/security/apparmor/af_unix.c @@ -202,8 +202,7 @@ static int profile_create_perm(struct aa_profile *profile, int family, int type, int protocol, struct apparmor_audit_data *ad) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; aa_state_t state; AA_BUG(!profile); @@ -227,9 +226,7 @@ static int profile_sk_perm(struct aa_profile *profile, struct apparmor_audit_data *ad, u32 request, struct sock *sk, struct path *path) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), - list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms *p = NULL; aa_state_t state; @@ -257,8 +254,7 @@ static int profile_sk_perm(struct aa_profile *profile, static int profile_bind_perm(struct aa_profile *profile, struct sock *sk, struct apparmor_audit_data *ad) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms *p = NULL; aa_state_t state; @@ -289,8 +285,7 @@ static int profile_bind_perm(struct aa_profile *profile, struct sock *sk, static int profile_listen_perm(struct aa_profile *profile, struct sock *sk, int backlog, struct apparmor_audit_data *ad) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms *p = NULL; aa_state_t state; @@ -327,8 +322,7 @@ static int profile_accept_perm(struct aa_profile *profile, struct sock *sk, struct apparmor_audit_data *ad) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms *p = NULL; aa_state_t state; @@ -358,8 +352,7 @@ static int profile_opt_perm(struct aa_profile *profile, u32 request, struct sock *sk, int optname, struct apparmor_audit_data *ad) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms *p = NULL; aa_state_t state; @@ -399,8 +392,7 @@ static int profile_peer_perm(struct aa_profile *profile, u32 request, struct aa_label *peer_label, struct apparmor_audit_data *ad) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms *p = NULL; aa_state_t state; diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index ecf22251c228..5ae0089554a7 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -612,8 +612,7 @@ static const struct file_operations aa_fs_ns_revision_fops = { static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms, const char *match_str, size_t match_len) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms tmp = { }; aa_state_t state = DFA_NOMATCH; diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c index 25b6219cdeb6..b9ea6bc45c1a 100644 --- a/security/apparmor/capability.c +++ b/security/apparmor/capability.c @@ -69,8 +69,7 @@ static int audit_caps(struct apparmor_audit_data *ad, struct aa_profile *profile { const u64 AUDIT_CACHE_TIMEOUT_NS = 1000*1000*1000; /* 1 second */ - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct audit_cache *ent; int type = AUDIT_APPARMOR_AUTO; @@ -122,8 +121,7 @@ static int audit_caps(struct apparmor_audit_data *ad, struct aa_profile *profile static int profile_capable(struct aa_profile *profile, int cap, unsigned int opts, struct apparmor_audit_data *ad) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; aa_state_t state; int error; @@ -195,8 +193,7 @@ int aa_capable(const struct cred *subj_cred, struct aa_label *label, kernel_cap_t aa_profile_capget(struct aa_profile *profile) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; aa_state_t state; state = RULE_MEDIATES(rules, AA_CLASS_CAP); diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index d689597f253b..267da82afb14 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -93,8 +93,7 @@ static inline aa_state_t match_component(struct aa_profile *profile, struct aa_profile *tp, bool stack, aa_state_t state) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; const char *ns_name; if (stack) @@ -131,8 +130,7 @@ static int label_compound_match(struct aa_profile *profile, aa_state_t state, bool subns, u32 request, struct aa_perms *perms) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_profile *tp; struct label_it i; struct path_cond cond = { }; @@ -194,8 +192,7 @@ static int label_components_match(struct aa_profile *profile, aa_state_t start, bool subns, u32 request, struct aa_perms *perms) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_profile *tp; struct label_it i; struct aa_perms tmp; @@ -520,8 +517,7 @@ static const char *next_name(int xtype, const char *name) struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex, const char **name) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_label *label = NULL; u32 xtype = xindex & AA_X_TYPE_MASK; int index = xindex & AA_X_INDEX_MASK; @@ -575,8 +571,6 @@ static struct aa_label *x_to_label(struct aa_profile *profile, const char **lookupname, const char **info) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); struct aa_label *new = NULL; struct aa_label *stack = NULL; struct aa_ns *ns = profile->ns; @@ -668,8 +662,7 @@ static struct aa_label *profile_transition(const struct cred *subj_cred, char *buffer, struct path_cond *cond, bool *secure_exec) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_label *new = NULL; struct aa_profile *new_profile = NULL; const char *info = NULL, *name = NULL, *target = NULL; @@ -802,8 +795,7 @@ static int profile_onexec(const struct cred *subj_cred, char *buffer, struct path_cond *cond, bool *secure_exec) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; aa_state_t state = rules->file->start[AA_CLASS_FILE]; struct aa_perms perms = {}; const char *xname = NULL, *info = "change_profile onexec"; @@ -1361,8 +1353,7 @@ static int change_profile_perms_wrapper(const char *op, const char *name, struct aa_label *target, bool stack, u32 request, struct aa_perms *perms) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; const char *info = NULL; int error = 0; diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 5504059d6101..deffd278d6fd 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -223,8 +223,7 @@ int __aa_path_perm(const char *op, const struct cred *subj_cred, u32 request, struct path_cond *cond, int flags, struct aa_perms *perms) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; int e = 0; if (profile_unconfined(profile) || @@ -323,8 +322,7 @@ static int profile_path_link(const struct cred *subj_cred, const struct path *target, char *buffer2, struct path_cond *cond) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; const char *lname, *tname = NULL; struct aa_perms lperms = {}, perms; const char *info = NULL; diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h index 9aa2e364cca9..c0812dbc1b5b 100644 --- a/security/apparmor/include/label.h +++ b/security/apparmor/include/label.h @@ -19,6 +19,7 @@ #include "lib.h" struct aa_ns; +struct aa_ruleset; #define LOCAL_VEC_ENTRIES 8 #define DEFINE_VEC(T, V) \ @@ -109,7 +110,7 @@ struct label_it { int i, j; }; -/* struct aa_label - lazy labeling struct +/* struct aa_label_base - base info of label * @count: ref count of active users * @node: rbtree position * @rcu: rcu callback struct @@ -118,7 +119,10 @@ struct label_it { * @flags: stale and other flags - values may change under label set lock * @secid: secid that references this label * @size: number of entries in @ent[] - * @ent: set of profiles for label, actual size determined by @size + * @mediates: bitmask for label_mediates + * profile: label vec when embedded in a profile FLAG_PROFILE is set + * rules: variable length rules in a profile FLAG_PROFILE is set + * vec: vector of profiles comprising the compound label */ struct aa_label { struct kref count; @@ -130,7 +134,17 @@ struct aa_label { u32 secid; int size; u64 mediates; - struct aa_profile *vec[]; + union { + struct { + /* only used is the label is a profile, size of + * rules[] is determined by the profile + * profile[1] is poison or null as guard + */ + struct aa_profile *profile[2]; + DECLARE_FLEX_ARRAY(struct aa_ruleset *, rules); + }; + DECLARE_FLEX_ARRAY(struct aa_profile *, vec); + }; }; #define last_error(E, FN) \ diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index a4c0f76fd03d..4c50875c9d13 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -165,8 +165,6 @@ struct aa_data { * @secmark: secmark label match info */ struct aa_ruleset { - struct list_head list; - int size; /* TODO: merge policy and file */ @@ -180,6 +178,7 @@ struct aa_ruleset { struct aa_secmark *secmark; }; + /* struct aa_attachment - data and rules for a profiles attachment * @list: * @xmatch_str: human readable attachment string @@ -218,6 +217,7 @@ struct aa_attachment { * @dents: set of dentries associated with the profile * @data: hashtable for free-form policy aa_data * @label - label this profile is an extension of + * @rules - label with the rule vec on its end * * The AppArmor profile contains the basic confinement data. Each profile * has a name, and exists in a namespace. The @name and @exec_match are @@ -245,7 +245,6 @@ struct aa_profile { const char *disconnected; struct aa_attachment attach; - struct list_head rules; struct aa_loaddata *rawdata; unsigned char *hash; @@ -253,6 +252,7 @@ struct aa_profile { struct dentry *dents[AAFS_PROF_SIZEOF]; struct rhashtable *data; + int n_rules; /* special - variable length must be last entry in profile */ struct aa_label label; }; @@ -332,16 +332,6 @@ static inline aa_state_t RULE_MEDIATES_NET(struct aa_ruleset *rules) } -static inline aa_state_t ANY_RULE_MEDIATES(struct list_head *head, - unsigned char class) -{ - struct aa_ruleset *rule; - - /* TODO: change to list walk */ - rule = list_first_entry(head, typeof(*rule), list); - return RULE_MEDIATES(rule, class); -} - void aa_compute_profile_mediates(struct aa_profile *profile); static inline bool profile_mediates(struct aa_profile *profile, unsigned char class) diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index 3566d875645e..df5712cea685 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -80,8 +80,7 @@ static int profile_signal_perm(const struct cred *cred, struct aa_label *peer, u32 request, struct apparmor_audit_data *ad) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms perms; aa_state_t state; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 97f0f25a3cfa..cecbb985928f 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -182,8 +182,7 @@ static int apparmor_capget(const struct task_struct *target, kernel_cap_t *effec struct aa_ruleset *rules; kernel_cap_t allowed; - rules = list_first_entry(&profile->rules, - typeof(*rules), list); + rules = profile->label.rules[0]; allowed = aa_profile_capget(profile); *effective = cap_intersect(*effective, allowed); *permitted = cap_intersect(*permitted, allowed); @@ -636,7 +635,7 @@ static int profile_uring(struct aa_profile *profile, u32 request, AA_BUG(!profile); - rules = list_first_entry(&profile->rules, typeof(*rules), list); + rules = profile->label.rules[0]; state = RULE_MEDIATES(rules, AA_CLASS_IO_URING); if (state) { struct aa_perms perms = { }; diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c index bf8863253e07..523570aa1a5a 100644 --- a/security/apparmor/mount.c +++ b/security/apparmor/mount.c @@ -311,8 +311,7 @@ static int match_mnt_path_str(const struct cred *subj_cred, { struct aa_perms perms = { }; const char *mntpnt = NULL, *info = NULL; - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; int pos, error; AA_BUG(!profile); @@ -371,8 +370,7 @@ static int match_mnt(const struct cred *subj_cred, bool binary) { const char *devname = NULL, *info = NULL; - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; int error = -EACCES; AA_BUG(!profile); @@ -604,8 +602,7 @@ static int profile_umount(const struct cred *subj_cred, struct aa_profile *profile, const struct path *path, char *buffer) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms perms = { }; const char *name = NULL, *info = NULL; aa_state_t state; @@ -668,8 +665,7 @@ static struct aa_label *build_pivotroot(const struct cred *subj_cred, const struct path *old_path, char *old_buffer) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; const char *old_name, *new_name = NULL, *info = NULL; const char *trans_name = NULL; struct aa_perms perms = { }; diff --git a/security/apparmor/net.c b/security/apparmor/net.c index 7382069efd7d..45cf25605c34 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -251,8 +251,7 @@ int aa_profile_af_perm(struct aa_profile *profile, struct apparmor_audit_data *ad, u32 request, u16 family, int type, int protocol) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms *p = NULL; aa_state_t state; @@ -362,8 +361,7 @@ static int aa_secmark_perm(struct aa_profile *profile, u32 request, u32 secid, { int i, ret; struct aa_perms perms = { }; - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; if (rules->secmark_count == 0) return 0; diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index a60bb7d9b583..261a9d3a0afe 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -259,8 +259,6 @@ struct aa_ruleset *aa_alloc_ruleset(gfp_t gfp) struct aa_ruleset *rules; rules = kzalloc(sizeof(*rules), gfp); - if (rules) - INIT_LIST_HEAD(&rules->list); return rules; } @@ -277,7 +275,6 @@ struct aa_ruleset *aa_alloc_ruleset(gfp_t gfp) */ void aa_free_profile(struct aa_profile *profile) { - struct aa_ruleset *rule, *tmp; struct rhashtable *rht; AA_DEBUG(DEBUG_POLICY, "%s(%p)\n", __func__, profile); @@ -299,10 +296,9 @@ void aa_free_profile(struct aa_profile *profile) * at this point there are no tasks that can have a reference * to rules */ - list_for_each_entry_safe(rule, tmp, &profile->rules, list) { - list_del_init(&rule->list); - free_ruleset(rule); - } + for (int i = 0; i < profile->n_rules; i++) + free_ruleset(profile->label.rules[i]); + kfree_sensitive(profile->dirname); if (profile->data) { @@ -331,25 +327,25 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy, gfp_t gfp) { struct aa_profile *profile; - struct aa_ruleset *rules; - /* freed by free_profile - usually through aa_put_profile */ - profile = kzalloc(struct_size(profile, label.vec, 2), gfp); + /* freed by free_profile - usually through aa_put_profile + * this adds space for a single ruleset in the rules section of the + * label + */ + profile = kzalloc(struct_size(profile, label.rules, 1), gfp); if (!profile) return NULL; + profile->n_rules = 1; if (!aa_policy_init(&profile->base, NULL, hname, gfp)) goto fail; if (!aa_label_init(&profile->label, 1, gfp)) goto fail; - INIT_LIST_HEAD(&profile->rules); - /* allocate the first ruleset, but leave it empty */ - rules = aa_alloc_ruleset(gfp); - if (!rules) + profile->label.rules[0] = aa_alloc_ruleset(gfp); + if (!profile->label.rules[0]) goto fail; - list_add(&rules->list, &profile->rules); /* update being set needed by fs interface */ if (!proxy) { @@ -374,6 +370,18 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy, return NULL; } +static inline bool ANY_RULE_MEDIATES(struct aa_profile *profile, + unsigned char class) +{ + int i; + + for (i = 0; i < profile->n_rules; i++) { + if (RULE_MEDIATES(profile->label.rules[i], class)) + return true; + } + return false; +} + /* set of rules that are mediated by unconfined */ static int unconfined_mediates[] = { AA_CLASS_NS, AA_CLASS_IO_URING, 0 }; @@ -386,14 +394,13 @@ void aa_compute_profile_mediates(struct aa_profile *profile) int *pos; for (pos = unconfined_mediates; *pos; pos++) { - if (ANY_RULE_MEDIATES(&profile->rules, AA_CLASS_NS) != - DFA_NOMATCH) + if (ANY_RULE_MEDIATES(profile, *pos)) profile->label.mediates |= ((u64) 1) << AA_CLASS_NS; } return; } for (c = 0; c <= AA_CLASS_LAST; c++) { - if (ANY_RULE_MEDIATES(&profile->rules, c) != DFA_NOMATCH) + if (ANY_RULE_MEDIATES(profile, c)) profile->label.mediates |= ((u64) 1) << c; } } @@ -646,7 +653,7 @@ struct aa_profile *aa_alloc_null(struct aa_profile *parent, const char *name, /* TODO: ideally we should inherit abi from parent */ profile->label.flags |= FLAG_NULL; profile->attach.xmatch = aa_get_pdb(nullpdb); - rules = list_first_entry(&profile->rules, typeof(*rules), list); + rules = profile->label.rules[0]; rules->file = aa_get_pdb(nullpdb); rules->policy = aa_get_pdb(nullpdb); aa_compute_profile_mediates(profile); diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 58c106b63727..553e52df3aa9 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -885,7 +885,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) error = -ENOMEM; goto fail; } - rules = list_first_entry(&profile->rules, typeof(*rules), list); + rules = profile->label.rules[0]; /* profile renaming is optional */ (void) aa_unpack_str(e, &profile->rename, "rename"); @@ -1285,8 +1285,8 @@ static bool verify_perms(struct aa_policydb *pdb) */ static int verify_profile(struct aa_profile *profile) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; + if (!rules) return 0; diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c index dcc94c3153d5..8e80db3ae21c 100644 --- a/security/apparmor/resource.c +++ b/security/apparmor/resource.c @@ -89,8 +89,7 @@ static int profile_setrlimit(const struct cred *subj_cred, struct aa_profile *profile, unsigned int resource, struct rlimit *new_rlim) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; int e = 0; if (rules->rlimits.mask & (1 << resource) && new_rlim->rlim_max > @@ -165,9 +164,7 @@ void __aa_transition_rlimits(struct aa_label *old_l, struct aa_label *new_l) * to the lesser of the tasks hard limit and the init tasks soft limit */ label_for_each_confined(i, old_l, old) { - struct aa_ruleset *rules = list_first_entry(&old->rules, - typeof(*rules), - list); + struct aa_ruleset *rules = old->label.rules[0]; if (rules->rlimits.mask) { int j; @@ -185,9 +182,7 @@ void __aa_transition_rlimits(struct aa_label *old_l, struct aa_label *new_l) /* set any new hard limits as dictated by the new profile */ label_for_each_confined(i, new_l, new) { - struct aa_ruleset *rules = list_first_entry(&new->rules, - typeof(*rules), - list); + struct aa_ruleset *rules = new->label.rules[0]; int j; if (!rules->rlimits.mask) diff --git a/security/apparmor/task.c b/security/apparmor/task.c index c87fb9f4ac18..c9bc9cc69475 100644 --- a/security/apparmor/task.c +++ b/security/apparmor/task.c @@ -228,8 +228,7 @@ static int profile_ptrace_perm(const struct cred *cred, struct aa_label *peer, u32 request, struct apparmor_audit_data *ad) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms perms = { }; ad->subj_cred = cred; @@ -246,7 +245,7 @@ static int profile_tracee_perm(const struct cred *cred, struct apparmor_audit_data *ad) { if (profile_unconfined(tracee) || unconfined(tracer) || - !ANY_RULE_MEDIATES(&tracee->rules, AA_CLASS_PTRACE)) + !label_mediates(&tracee->label, AA_CLASS_PTRACE)) return 0; return profile_ptrace_perm(cred, tracee, tracer, request, ad); @@ -260,7 +259,7 @@ static int profile_tracer_perm(const struct cred *cred, if (profile_unconfined(tracer)) return 0; - if (ANY_RULE_MEDIATES(&tracer->rules, AA_CLASS_PTRACE)) + if (label_mediates(&tracer->label, AA_CLASS_PTRACE)) return profile_ptrace_perm(cred, tracer, tracee, request, ad); /* profile uses the old style capability check for ptrace */ @@ -324,9 +323,7 @@ int aa_profile_ns_perm(struct aa_profile *profile, ad->request = request; if (!profile_unconfined(profile)) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), - list); + struct aa_ruleset *rules = profile->label.rules[0]; aa_state_t state; state = RULE_MEDIATES(rules, ad->class); From 4d9d1a08b796efd54f1e29b42bd95879109fe448 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 22 May 2025 13:54:05 -0700 Subject: [PATCH 0796/2411] apparmor: fix: accept2 being specifie even when permission table is presnt The transition to the perms32 permission table dropped the need for the accept2 table as permissions. However accept2 can be used for flags and may be present even when the perms32 table is present. So instead of checking on version, check whether the table is present. Fixes: 2e12c5f06017 ("apparmor: add additional flags to extended permission.") Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 553e52df3aa9..7523971e37d9 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -775,7 +775,8 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy, } } - if (pdb->perms && version <= 2) { + /* accept2 is in some cases being allocated, even with perms */ + if (pdb->perms && !pdb->dfa->tables[YYTD_ID_ACCEPT2]) { /* add dfa flags table missing in v2 */ u32 noents = pdb->dfa->tables[YYTD_ID_ACCEPT]->td_lolen; u16 tdflags = pdb->dfa->tables[YYTD_ID_ACCEPT]->td_flags; From 275ad5e7931160aca2ea7657f7be7ef3493dea79 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 20 Jul 2025 01:25:00 +0200 Subject: [PATCH 0797/2411] rust: list: remove nonexistent generic parameter in link `ListLinks` does not take a `T` generic parameter, unlike `ListLinksSelfPtr`. Thus fix it, which makes it also consistent with the rest of the links in the file. Fixes: 40c53294596b ("rust: list: add macro for implementing ListItem") Reviewed-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250719232500.822313-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/list/impl_list_item_mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index f4c91832a875..202bc6f97c13 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -17,13 +17,13 @@ /// [`ListLinks`]: crate::list::ListLinks /// [`ListItem`]: crate::list::ListItem pub unsafe trait HasListLinks { - /// Returns a pointer to the [`ListLinks`] field. + /// Returns a pointer to the [`ListLinks`] field. /// /// # Safety /// /// The provided pointer must point at a valid struct of type `Self`. /// - /// [`ListLinks`]: crate::list::ListLinks + /// [`ListLinks`]: crate::list::ListLinks unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut crate::list::ListLinks; } From 8b097b5ac68b0fd2a7a251e101a84175b2ed585d Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Thu, 29 May 2025 09:14:58 -0400 Subject: [PATCH 0798/2411] scripts: rust: replace length checks with match Use a match expression with slice patterns instead of length checks and indexing. The result is more idiomatic, which is a better example for future Rust code authors. Reviewed-by: Alice Ryhl Signed-off-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250529-idiomatic-match-slice-v2-1-4925ca2f1550@gmail.com [ Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- scripts/rustdoc_test_gen.rs | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/scripts/rustdoc_test_gen.rs b/scripts/rustdoc_test_gen.rs index 1ca253594d38..d796481f4359 100644 --- a/scripts/rustdoc_test_gen.rs +++ b/scripts/rustdoc_test_gen.rs @@ -85,24 +85,23 @@ fn find_candidates( } } - assert!( - valid_paths.len() > 0, - "No path candidates found for `{file}`. This is likely a bug in the build system, or some \ - files went away while compiling." - ); - - if valid_paths.len() > 1 { - eprintln!("Several path candidates found:"); - for path in valid_paths { - eprintln!(" {path:?}"); + match valid_paths.as_slice() { + [] => panic!( + "No path candidates found for `{file}`. This is likely a bug in the build system, or \ + some files went away while compiling." + ), + [valid_path] => valid_path.to_str().unwrap(), + valid_paths => { + eprintln!("Several path candidates found:"); + for path in valid_paths { + eprintln!(" {path:?}"); + } + panic!( + "Several path candidates found for `{file}`, please resolve the ambiguity by \ + renaming a file or folder." + ); } - panic!( - "Several path candidates found for `{file}`, please resolve the ambiguity by renaming \ - a file or folder." - ); } - - valid_paths[0].to_str().unwrap() } fn main() { From 2254991d5b573662f841998c1d263118a15f067a Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Thu, 29 May 2025 09:14:59 -0400 Subject: [PATCH 0799/2411] scripts: rust: emit path candidates in panic message Include all information in the panic message rather than emit fragments to stderr to avoid possible interleaving with other output. Signed-off-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250529-idiomatic-match-slice-v2-2-4925ca2f1550@gmail.com [ Kept newlines using `writeln!`. Used new message from Tamir. Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- scripts/rustdoc_test_gen.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/rustdoc_test_gen.rs b/scripts/rustdoc_test_gen.rs index d796481f4359..abb34ada2508 100644 --- a/scripts/rustdoc_test_gen.rs +++ b/scripts/rustdoc_test_gen.rs @@ -92,13 +92,15 @@ fn find_candidates( ), [valid_path] => valid_path.to_str().unwrap(), valid_paths => { - eprintln!("Several path candidates found:"); + use std::fmt::Write; + + let mut candidates = String::new(); for path in valid_paths { - eprintln!(" {path:?}"); + writeln!(&mut candidates, " {path:?}").unwrap(); } panic!( "Several path candidates found for `{file}`, please resolve the ambiguity by \ - renaming a file or folder." + renaming a file or folder. Candidates:\n{candidates}", ); } } From f411b7eddde8b780a61dadea0916480f5c9edf5a Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 4 Jul 2025 16:14:52 -0400 Subject: [PATCH 0800/2411] rust: kernel: remove `fmt!`, fix clippy::uninlined-format-args Rather than export a macro that delegates to `core::format_args`, simply re-export `core::format_args` as `fmt` from the prelude. This exposes clippy warnings which were previously obscured by this macro, such as: warning: variables can be used directly in the `format!` string --> ../drivers/cpufreq/rcpufreq_dt.rs:21:43 | 21 | let prop_name = CString::try_from_fmt(fmt!("{}-supply", name)).ok()?; | ^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#uninlined_format_args = note: `-W clippy::uninlined-format-args` implied by `-W clippy::all` = help: to override `-W clippy::all` add `#[allow(clippy::uninlined_format_args)]` help: change this to | 21 - let prop_name = CString::try_from_fmt(fmt!("{}-supply", name)).ok()?; 21 + let prop_name = CString::try_from_fmt(fmt!("{name}-supply")).ok()?; | Thus fix them in the same commit. This could possibly be fixed in two stages, but the diff is small enough (outside of kernel/str.rs) that I hope it can be taken in a single commit. Signed-off-by: Tamir Duberstein Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250704-core-cstr-prepare-v1-1-a91524037783@gmail.com Signed-off-by: Miguel Ojeda --- drivers/cpufreq/rcpufreq_dt.rs | 3 +-- drivers/gpu/nova-core/firmware.rs | 5 +++-- rust/kernel/opp.rs | 2 +- rust/kernel/prelude.rs | 2 +- rust/kernel/str.rs | 34 +++++++++++++------------------ 5 files changed, 20 insertions(+), 26 deletions(-) diff --git a/drivers/cpufreq/rcpufreq_dt.rs b/drivers/cpufreq/rcpufreq_dt.rs index 30a170570c0e..4608d2286fa1 100644 --- a/drivers/cpufreq/rcpufreq_dt.rs +++ b/drivers/cpufreq/rcpufreq_dt.rs @@ -9,7 +9,6 @@ cpumask::CpumaskVar, device::{Core, Device}, error::code::*, - fmt, macros::vtable, module_platform_driver, of, opp, platform, prelude::*, @@ -19,7 +18,7 @@ /// Finds exact supply name from the OF node. fn find_supply_name_exact(dev: &Device, name: &str) -> Option { - let prop_name = CString::try_from_fmt(fmt!("{}-supply", name)).ok()?; + let prop_name = CString::try_from_fmt(fmt!("{name}-supply")).ok()?; dev.property_present(&prop_name) .then(|| CString::try_from_fmt(fmt!("{name}")).ok()) .flatten() diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 4b8a38358a4f..e503a4fddae0 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -24,11 +24,12 @@ pub(crate) struct Firmware { impl Firmware { pub(crate) fn new(dev: &device::Device, chipset: Chipset, ver: &str) -> Result { - let mut chip_name = CString::try_from_fmt(fmt!("{}", chipset))?; + let mut chip_name = CString::try_from_fmt(fmt!("{chipset}"))?; chip_name.make_ascii_lowercase(); + let chip_name = &*chip_name; let request = |name_| { - CString::try_from_fmt(fmt!("nvidia/{}/gsp/{}-{}.bin", &*chip_name, name_, ver)) + CString::try_from_fmt(fmt!("nvidia/{chip_name}/gsp/{name_}-{ver}.bin")) .and_then(|path| firmware::Firmware::request(&path, dev)) }; diff --git a/rust/kernel/opp.rs b/rust/kernel/opp.rs index 0e94cb2703ec..5a161ad12bf7 100644 --- a/rust/kernel/opp.rs +++ b/rust/kernel/opp.rs @@ -345,7 +345,7 @@ fn drop(&mut self) { /// impl ConfigOps for Driver {} /// /// fn configure(dev: &ARef) -> Result { -/// let name = CString::try_from_fmt(fmt!("{}", "slow"))?; +/// let name = CString::try_from_fmt(fmt!("slow"))?; /// /// // The OPP configuration is cleared once the [`ConfigToken`] goes out of scope. /// Config::::new() diff --git a/rust/kernel/prelude.rs b/rust/kernel/prelude.rs index 9a1a830f605c..25fe97aafd02 100644 --- a/rust/kernel/prelude.rs +++ b/rust/kernel/prelude.rs @@ -31,9 +31,9 @@ // `super::std_vendor` is hidden, which makes the macro inline for some reason. #[doc(no_inline)] pub use super::dbg; -pub use super::fmt; pub use super::{dev_alert, dev_crit, dev_dbg, dev_emerg, dev_err, dev_info, dev_notice, dev_warn}; pub use super::{pr_alert, pr_crit, pr_debug, pr_emerg, pr_err, pr_info, pr_notice, pr_warn}; +pub use core::format_args as fmt; pub use super::{try_init, try_pin_init}; diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index cbc8b459ed41..10399fb7af45 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -54,13 +54,13 @@ impl fmt::Display for BStr { /// Formats printable ASCII characters, escaping the rest. /// /// ``` - /// # use kernel::{fmt, b_str, str::{BStr, CString}}; + /// # use kernel::{prelude::fmt, b_str, str::{BStr, CString}}; /// let ascii = b_str!("Hello, BStr!"); - /// let s = CString::try_from_fmt(fmt!("{}", ascii))?; + /// let s = CString::try_from_fmt(fmt!("{ascii}"))?; /// assert_eq!(s.as_bytes(), "Hello, BStr!".as_bytes()); /// /// let non_ascii = b_str!("🦀"); - /// let s = CString::try_from_fmt(fmt!("{}", non_ascii))?; + /// let s = CString::try_from_fmt(fmt!("{non_ascii}"))?; /// assert_eq!(s.as_bytes(), "\\xf0\\x9f\\xa6\\x80".as_bytes()); /// # Ok::<(), kernel::error::Error>(()) /// ``` @@ -85,14 +85,14 @@ impl fmt::Debug for BStr { /// escaping the rest. /// /// ``` - /// # use kernel::{fmt, b_str, str::{BStr, CString}}; + /// # use kernel::{prelude::fmt, b_str, str::{BStr, CString}}; /// // Embedded double quotes are escaped. /// let ascii = b_str!("Hello, \"BStr\"!"); - /// let s = CString::try_from_fmt(fmt!("{:?}", ascii))?; + /// let s = CString::try_from_fmt(fmt!("{ascii:?}"))?; /// assert_eq!(s.as_bytes(), "\"Hello, \\\"BStr\\\"!\"".as_bytes()); /// /// let non_ascii = b_str!("😺"); - /// let s = CString::try_from_fmt(fmt!("{:?}", non_ascii))?; + /// let s = CString::try_from_fmt(fmt!("{non_ascii:?}"))?; /// assert_eq!(s.as_bytes(), "\"\\xf0\\x9f\\x98\\xba\"".as_bytes()); /// # Ok::<(), kernel::error::Error>(()) /// ``` @@ -429,15 +429,15 @@ impl fmt::Display for CStr { /// /// ``` /// # use kernel::c_str; - /// # use kernel::fmt; + /// # use kernel::prelude::fmt; /// # use kernel::str::CStr; /// # use kernel::str::CString; /// let penguin = c_str!("🐧"); - /// let s = CString::try_from_fmt(fmt!("{}", penguin))?; + /// let s = CString::try_from_fmt(fmt!("{penguin}"))?; /// assert_eq!(s.as_bytes_with_nul(), "\\xf0\\x9f\\x90\\xa7\0".as_bytes()); /// /// let ascii = c_str!("so \"cool\""); - /// let s = CString::try_from_fmt(fmt!("{}", ascii))?; + /// let s = CString::try_from_fmt(fmt!("{ascii}"))?; /// assert_eq!(s.as_bytes_with_nul(), "so \"cool\"\0".as_bytes()); /// # Ok::<(), kernel::error::Error>(()) /// ``` @@ -459,16 +459,16 @@ impl fmt::Debug for CStr { /// /// ``` /// # use kernel::c_str; - /// # use kernel::fmt; + /// # use kernel::prelude::fmt; /// # use kernel::str::CStr; /// # use kernel::str::CString; /// let penguin = c_str!("🐧"); - /// let s = CString::try_from_fmt(fmt!("{:?}", penguin))?; + /// let s = CString::try_from_fmt(fmt!("{penguin:?}"))?; /// assert_eq!(s.as_bytes_with_nul(), "\"\\xf0\\x9f\\x90\\xa7\"\0".as_bytes()); /// /// // Embedded double quotes are escaped. /// let ascii = c_str!("so \"cool\""); - /// let s = CString::try_from_fmt(fmt!("{:?}", ascii))?; + /// let s = CString::try_from_fmt(fmt!("{ascii:?}"))?; /// assert_eq!(s.as_bytes_with_nul(), "\"so \\\"cool\\\"\"\0".as_bytes()); /// # Ok::<(), kernel::error::Error>(()) /// ``` @@ -578,7 +578,7 @@ mod tests { macro_rules! format { ($($f:tt)*) => ({ - CString::try_from_fmt(::kernel::fmt!($($f)*))?.to_str()? + CString::try_from_fmt(fmt!($($f)*))?.to_str()? }) } @@ -840,7 +840,7 @@ fn write_str(&mut self, s: &str) -> fmt::Result { /// # Examples /// /// ``` -/// use kernel::{str::CString, fmt}; +/// use kernel::{str::CString, prelude::fmt}; /// /// let s = CString::try_from_fmt(fmt!("{}{}{}", "abc", 10, 20))?; /// assert_eq!(s.as_bytes_with_nul(), "abc1020\0".as_bytes()); @@ -930,9 +930,3 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Debug::fmt(&**self, f) } } - -/// A convenience alias for [`core::format_args`]. -#[macro_export] -macro_rules! fmt { - ($($f:tt)*) => ( ::core::format_args!($($f)*) ) -} From bda947d613f1882c73ebb3ddda388459bab5902c Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 4 Jul 2025 16:14:53 -0400 Subject: [PATCH 0801/2411] rust: kernel: add `fmt` module `kernel::fmt` is a facade over `core::fmt` that can be used downstream, allowing future changes to the formatting machinery to be contained within the kernel crate without downstream code needing to be modified. Signed-off-by: Tamir Duberstein Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250704-core-cstr-prepare-v1-2-a91524037783@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/fmt.rs | 7 +++++++ rust/kernel/lib.rs | 1 + 2 files changed, 8 insertions(+) create mode 100644 rust/kernel/fmt.rs diff --git a/rust/kernel/fmt.rs b/rust/kernel/fmt.rs new file mode 100644 index 000000000000..0306e8388968 --- /dev/null +++ b/rust/kernel/fmt.rs @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Formatting utilities. +//! +//! This module is intended to be used in place of `core::fmt` in kernel code. + +pub use core::fmt::{Arguments, Debug, Display, Error, Formatter, Result, Write}; diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 38c07f35073b..e88bc4b27d6e 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -78,6 +78,7 @@ pub mod faux; #[cfg(CONFIG_RUST_FW_LOADER_ABSTRACTIONS)] pub mod firmware; +pub mod fmt; pub mod fs; pub mod init; pub mod io; From 386f285d885ae40b64ccf8328d59694055af3187 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 4 Jul 2025 16:14:54 -0400 Subject: [PATCH 0802/2411] rust: use `kernel::{fmt,prelude::fmt!}` Reduce coupling to implementation details of the formatting machinery by avoiding direct use for `core`'s formatting traits and macros. Signed-off-by: Tamir Duberstein Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250704-core-cstr-prepare-v1-3-a91524037783@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/error.rs | 6 +++--- rust/kernel/print.rs | 6 +++--- rust/kernel/str.rs | 2 +- samples/rust/rust_print_main.rs | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index 6277af1c1baa..ffa8efd2d547 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -6,10 +6,10 @@ use crate::{ alloc::{layout::LayoutError, AllocError}, + fmt, str::CStr, }; -use core::fmt; use core::num::NonZeroI32; use core::num::TryFromIntError; use core::str::Utf8Error; @@ -219,8 +219,8 @@ fn from(_: LayoutError) -> Error { } } -impl From for Error { - fn from(_: core::fmt::Error) -> Error { +impl From for Error { + fn from(_: fmt::Error) -> Error { code::EINVAL } } diff --git a/rust/kernel/print.rs b/rust/kernel/print.rs index ecdcee43e5a5..2d743d78d220 100644 --- a/rust/kernel/print.rs +++ b/rust/kernel/print.rs @@ -8,10 +8,10 @@ use crate::{ ffi::{c_char, c_void}, + fmt, prelude::*, str::RawFormatter, }; -use core::fmt; // Called from `vsprintf` with format specifier `%pA`. #[expect(clippy::missing_safety_doc)] @@ -149,7 +149,7 @@ macro_rules! print_macro ( // takes borrows on the arguments, but does not extend the scope of temporaries. // Therefore, a `match` expression is used to keep them around, since // the scrutinee is kept until the end of the `match`. - match format_args!($($arg)+) { + match $crate::prelude::fmt!($($arg)+) { // SAFETY: This hidden macro should only be called by the documented // printing macros which ensure the format string is one of the fixed // ones. All `__LOG_PREFIX`s are null-terminated as they are generated @@ -168,7 +168,7 @@ macro_rules! print_macro ( // The `CONT` case. ($format_string:path, true, $($arg:tt)+) => ( $crate::print::call_printk_cont( - format_args!($($arg)+), + $crate::prelude::fmt!($($arg)+), ); ); ); diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index 10399fb7af45..48d9a518db96 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -3,7 +3,7 @@ //! String representations. use crate::alloc::{flags::*, AllocError, KVec}; -use core::fmt::{self, Write}; +use crate::fmt::{self, Write}; use core::ops::{self, Deref, DerefMut, Index}; use crate::prelude::*; diff --git a/samples/rust/rust_print_main.rs b/samples/rust/rust_print_main.rs index 8ea95e8c2f36..4095c72afeab 100644 --- a/samples/rust/rust_print_main.rs +++ b/samples/rust/rust_print_main.rs @@ -40,7 +40,7 @@ fn arc_print() -> Result { // behaviour, contract or protocol on both `i32` and `&str` into a single `Arc` of // type `Arc`. - use core::fmt::Display; + use kernel::fmt::Display; fn arc_dyn_print(arc: &Arc) { pr_info!("Arc says {arc}"); } From 0f6d225671e05bd84b426823152b77a8db580f92 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 4 Jul 2025 16:14:55 -0400 Subject: [PATCH 0803/2411] rust: str: remove unnecessary qualification `core::ffi::*` is in the prelude, which is imported here. Signed-off-by: Tamir Duberstein Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250704-core-cstr-prepare-v1-4-a91524037783@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/str.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index 48d9a518db96..f326f0c40ab0 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -232,7 +232,7 @@ pub const fn is_empty(&self) -> bool { /// last at least `'a`. When `CStr` is alive, the memory pointed by `ptr` /// must not be mutated. #[inline] - pub unsafe fn from_char_ptr<'a>(ptr: *const crate::ffi::c_char) -> &'a Self { + pub unsafe fn from_char_ptr<'a>(ptr: *const c_char) -> &'a Self { // SAFETY: The safety precondition guarantees `ptr` is a valid pointer // to a `NUL`-terminated C string. let len = unsafe { bindings::strlen(ptr) } + 1; @@ -295,7 +295,7 @@ pub unsafe fn from_bytes_with_nul_unchecked_mut(bytes: &mut [u8]) -> &mut CStr { /// Returns a C pointer to the string. #[inline] - pub const fn as_char_ptr(&self) -> *const crate::ffi::c_char { + pub const fn as_char_ptr(&self) -> *const c_char { self.0.as_ptr() } From 10a7108d4bd411166a7b4484bda8894dc3f0f04b Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 4 Jul 2025 16:14:56 -0400 Subject: [PATCH 0804/2411] rust: str: add `CStr` methods matching `core::ffi::CStr` Prepare for replacing `CStr` with `core::ffi::CStr` by soft-deprecating methods which don't exist on `core::ffi::CStr`. We could keep `as_bytes{,_with_nul}` through an extension trait but seeing as we have to introduce `as_char_ptr_in_const_context` as a free function, we may as well introduce `to_bytes{,_with_nul}` here to allow downstream code to migrate in one cycle rather than two. Link: https://github.com/Rust-for-Linux/linux/issues/1075 Signed-off-by: Tamir Duberstein Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250704-core-cstr-prepare-v1-5-a91524037783@gmail.com [ Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/str.rs | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index f326f0c40ab0..cbb357fc0111 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -175,6 +175,15 @@ macro_rules! b_str { }}; } +/// Returns a C pointer to the string. +// It is a free function rather than a method on an extension trait because: +// +// - error[E0379]: functions in trait impls cannot be declared const +#[inline] +pub const fn as_char_ptr_in_const_context(c_str: &CStr) -> *const c_char { + c_str.0.as_ptr() +} + /// Possible errors when using conversion functions in [`CStr`]. #[derive(Debug, Clone, Copy)] pub enum CStrConvertError { @@ -294,23 +303,45 @@ pub unsafe fn from_bytes_with_nul_unchecked_mut(bytes: &mut [u8]) -> &mut CStr { } /// Returns a C pointer to the string. + /// + /// Using this function in a const context is deprecated in favor of + /// [`as_char_ptr_in_const_context`] in preparation for replacing `CStr` with `core::ffi::CStr` + /// which does not have this method. #[inline] pub const fn as_char_ptr(&self) -> *const c_char { - self.0.as_ptr() + as_char_ptr_in_const_context(self) } /// Convert the string to a byte slice without the trailing `NUL` byte. #[inline] - pub fn as_bytes(&self) -> &[u8] { + pub fn to_bytes(&self) -> &[u8] { &self.0[..self.len()] } + /// Convert the string to a byte slice without the trailing `NUL` byte. + /// + /// This function is deprecated in favor of [`Self::to_bytes`] in preparation for replacing + /// `CStr` with `core::ffi::CStr` which does not have this method. + #[inline] + pub fn as_bytes(&self) -> &[u8] { + self.to_bytes() + } + /// Convert the string to a byte slice containing the trailing `NUL` byte. #[inline] - pub const fn as_bytes_with_nul(&self) -> &[u8] { + pub const fn to_bytes_with_nul(&self) -> &[u8] { &self.0 } + /// Convert the string to a byte slice containing the trailing `NUL` byte. + /// + /// This function is deprecated in favor of [`Self::to_bytes_with_nul`] in preparation for + /// replacing `CStr` with `core::ffi::CStr` which does not have this method. + #[inline] + pub const fn as_bytes_with_nul(&self) -> &[u8] { + self.to_bytes_with_nul() + } + /// Yields a [`&str`] slice if the [`CStr`] contains valid UTF-8. /// /// If the contents of the [`CStr`] are valid UTF-8 data, this From 1523590203786bf4e1d29b7d08a7100c783f20ba Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 4 Jul 2025 16:14:57 -0400 Subject: [PATCH 0805/2411] rust: kernel: use `core::ffi::CStr` method names Prepare for `core::ffi::CStr` taking the place of `kernel::str::CStr` by avoiding methods that only exist on the latter. Also avoid `Deref for CStr` as that impl doesn't exist on `core::ffi::CStr`. Link: https://github.com/Rust-for-Linux/linux/issues/1075 Signed-off-by: Tamir Duberstein Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250704-core-cstr-prepare-v1-6-a91524037783@gmail.com [ Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/error.rs | 2 +- rust/kernel/str.rs | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index ffa8efd2d547..e29a5d76300e 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -188,7 +188,7 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { Some(name) => f .debug_tuple( // SAFETY: These strings are ASCII-only. - unsafe { core::str::from_utf8_unchecked(name) }, + unsafe { core::str::from_utf8_unchecked(name.to_bytes()) }, ) .finish(), } diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index cbb357fc0111..6c892550c0ba 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -57,11 +57,11 @@ impl fmt::Display for BStr { /// # use kernel::{prelude::fmt, b_str, str::{BStr, CString}}; /// let ascii = b_str!("Hello, BStr!"); /// let s = CString::try_from_fmt(fmt!("{ascii}"))?; - /// assert_eq!(s.as_bytes(), "Hello, BStr!".as_bytes()); + /// assert_eq!(s.to_bytes(), "Hello, BStr!".as_bytes()); /// /// let non_ascii = b_str!("🦀"); /// let s = CString::try_from_fmt(fmt!("{non_ascii}"))?; - /// assert_eq!(s.as_bytes(), "\\xf0\\x9f\\xa6\\x80".as_bytes()); + /// assert_eq!(s.to_bytes(), "\\xf0\\x9f\\xa6\\x80".as_bytes()); /// # Ok::<(), kernel::error::Error>(()) /// ``` fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -89,11 +89,11 @@ impl fmt::Debug for BStr { /// // Embedded double quotes are escaped. /// let ascii = b_str!("Hello, \"BStr\"!"); /// let s = CString::try_from_fmt(fmt!("{ascii:?}"))?; - /// assert_eq!(s.as_bytes(), "\"Hello, \\\"BStr\\\"!\"".as_bytes()); + /// assert_eq!(s.to_bytes(), "\"Hello, \\\"BStr\\\"!\"".as_bytes()); /// /// let non_ascii = b_str!("😺"); /// let s = CString::try_from_fmt(fmt!("{non_ascii:?}"))?; - /// assert_eq!(s.as_bytes(), "\"\\xf0\\x9f\\x98\\xba\"".as_bytes()); + /// assert_eq!(s.to_bytes(), "\"\\xf0\\x9f\\x98\\xba\"".as_bytes()); /// # Ok::<(), kernel::error::Error>(()) /// ``` fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -465,15 +465,15 @@ impl fmt::Display for CStr { /// # use kernel::str::CString; /// let penguin = c_str!("🐧"); /// let s = CString::try_from_fmt(fmt!("{penguin}"))?; - /// assert_eq!(s.as_bytes_with_nul(), "\\xf0\\x9f\\x90\\xa7\0".as_bytes()); + /// assert_eq!(s.to_bytes_with_nul(), "\\xf0\\x9f\\x90\\xa7\0".as_bytes()); /// /// let ascii = c_str!("so \"cool\""); /// let s = CString::try_from_fmt(fmt!("{ascii}"))?; - /// assert_eq!(s.as_bytes_with_nul(), "so \"cool\"\0".as_bytes()); + /// assert_eq!(s.to_bytes_with_nul(), "so \"cool\"\0".as_bytes()); /// # Ok::<(), kernel::error::Error>(()) /// ``` fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - for &c in self.as_bytes() { + for &c in self.to_bytes() { if (0x20..0x7f).contains(&c) { // Printable character. f.write_char(c as char)?; @@ -874,11 +874,11 @@ fn write_str(&mut self, s: &str) -> fmt::Result { /// use kernel::{str::CString, prelude::fmt}; /// /// let s = CString::try_from_fmt(fmt!("{}{}{}", "abc", 10, 20))?; -/// assert_eq!(s.as_bytes_with_nul(), "abc1020\0".as_bytes()); +/// assert_eq!(s.to_bytes_with_nul(), "abc1020\0".as_bytes()); /// /// let tmp = "testing"; /// let s = CString::try_from_fmt(fmt!("{tmp}{}", 123))?; -/// assert_eq!(s.as_bytes_with_nul(), "testing123\0".as_bytes()); +/// assert_eq!(s.to_bytes_with_nul(), "testing123\0".as_bytes()); /// /// // This fails because it has an embedded `NUL` byte. /// let s = CString::try_from_fmt(fmt!("a\0b{}", 123)); @@ -948,7 +948,7 @@ impl<'a> TryFrom<&'a CStr> for CString { fn try_from(cstr: &'a CStr) -> Result { let mut buf = KVec::new(); - buf.extend_from_slice(cstr.as_bytes_with_nul(), GFP_KERNEL)?; + buf.extend_from_slice(cstr.to_bytes_with_nul(), GFP_KERNEL)?; // INVARIANT: The `CStr` and `CString` types have the same invariants for // the string data, and we copied it over without changes. From e9fdf0d2ecc095ce9f078588c7ce06967e0138b2 Mon Sep 17 00:00:00 2001 From: Federico Pellegrin Date: Fri, 18 Jul 2025 06:12:24 +0200 Subject: [PATCH 0806/2411] perf build: Always disable stack protection for BPF skeleton objects When the clang toolchain has stack protection enabled, the bpf skeletons build fails with: error: A call to built-in function '__stack_chk_fail' is not supported. Since stack-protector makes no sense for the BPF bits, just unconditionally disable it. See also similar case at 878625e1c7a10dfbb1fdaaaae2c4d2a58fbce627 Signed-off-by: Federico Pellegrin Link: https://lore.kernel.org/r/20250718041224.12389-1-fede@evolware.org [ rearrange long lines ] Signed-off-by: Namhyung Kim --- tools/perf/Makefile.perf | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 9b51593628c1..e2150acc2c13 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1249,8 +1249,10 @@ else $(Q)cp "$(VMLINUX_H)" $@ endif -$(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(OUTPUT)PERF-VERSION-FILE util/bpf_skel/perf_version.h $(LIBBPF) $(SKEL_OUT)/vmlinux.h | $(SKEL_TMP_OUT) - $(QUIET_CLANG)$(CLANG) -g -O2 --target=bpf $(CLANG_OPTIONS) $(BPF_INCLUDE) $(TOOLS_UAPI_INCLUDE) \ +$(SKEL_TMP_OUT)/%.bpf.o: $(OUTPUT)PERF-VERSION-FILE util/bpf_skel/perf_version.h | $(SKEL_TMP_OUT) +$(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) $(SKEL_OUT)/vmlinux.h + $(QUIET_CLANG)$(CLANG) -g -O2 -fno-stack-protector --target=bpf \ + $(CLANG_OPTIONS) $(BPF_INCLUDE) $(TOOLS_UAPI_INCLUDE) \ -include $(OUTPUT)PERF-VERSION-FILE -include util/bpf_skel/perf_version.h \ -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ From dba7d9dbfdc4389361ff3a910e767d3cfca22587 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Thu, 26 Jun 2025 14:09:52 +0800 Subject: [PATCH 0807/2411] soundwire: stream: restore params when prepare ports fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bus->params should be restored if the stream is failed to prepare. The issue exists since beginning. The Fixes tag just indicates the first commit that the commit can be applied to. Fixes: 17ed5bef49f4 ("soundwire: add missing newlines in dynamic debug logs") Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20250626060952.405996-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c index a4bea742b5d9..38c9dbd35606 100644 --- a/drivers/soundwire/stream.c +++ b/drivers/soundwire/stream.c @@ -1510,7 +1510,7 @@ static int _sdw_prepare_stream(struct sdw_stream_runtime *stream, if (ret < 0) { dev_err(bus->dev, "Prepare port(s) failed ret = %d\n", ret); - return ret; + goto restore_params; } } From 34b1cb4ec286603127aa8c4191ea527eb8dd3567 Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Tue, 15 Jul 2025 17:40:41 +0530 Subject: [PATCH 0808/2411] soundwire: amd: Add support for acp7.2 platform Add soundwire support for acp7.2 platform. Signed-off-by: Venkata Prasad Potturu Link: https://lore.kernel.org/r/20250715121048.1795607-1-venkataprasad.potturu@amd.com Signed-off-by: Vinod Koul --- drivers/soundwire/amd_manager.c | 4 ++++ include/linux/soundwire/sdw_amd.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index d4e62c383b12..3632838f3ed9 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -499,6 +499,7 @@ static int amd_sdw_port_params(struct sdw_bus *bus, struct sdw_port_params *p_pa break; case ACP70_PCI_REV_ID: case ACP71_PCI_REV_ID: + case ACP72_PCI_REV_ID: frame_fmt_reg = acp70_sdw_dp_reg[p_params->num].frame_fmt_reg; break; default: @@ -551,6 +552,7 @@ static int amd_sdw_transport_params(struct sdw_bus *bus, break; case ACP70_PCI_REV_ID: case ACP71_PCI_REV_ID: + case ACP72_PCI_REV_ID: frame_fmt_reg = acp70_sdw_dp_reg[params->port_num].frame_fmt_reg; sample_int_reg = acp70_sdw_dp_reg[params->port_num].sample_int_reg; hctrl_dp0_reg = acp70_sdw_dp_reg[params->port_num].hctrl_dp0_reg; @@ -614,6 +616,7 @@ static int amd_sdw_port_enable(struct sdw_bus *bus, break; case ACP70_PCI_REV_ID: case ACP71_PCI_REV_ID: + case ACP72_PCI_REV_ID: lane_ctrl_ch_en_reg = acp70_sdw_dp_reg[enable_ch->port_num].lane_ctrl_ch_en_reg; break; default: @@ -1038,6 +1041,7 @@ static int amd_sdw_manager_probe(struct platform_device *pdev) break; case ACP70_PCI_REV_ID: case ACP71_PCI_REV_ID: + case ACP72_PCI_REV_ID: amd_manager->num_dout_ports = AMD_ACP70_SDW_MAX_TX_PORTS; amd_manager->num_din_ports = AMD_ACP70_SDW_MAX_RX_PORTS; break; diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h index 6b839987f14c..fe31773d5210 100644 --- a/include/linux/soundwire/sdw_amd.h +++ b/include/linux/soundwire/sdw_amd.h @@ -30,6 +30,7 @@ #define ACP63_PCI_REV_ID 0x63 #define ACP70_PCI_REV_ID 0x70 #define ACP71_PCI_REV_ID 0x71 +#define ACP72_PCI_REV_ID 0x72 struct acp_sdw_pdata { u16 instance; From 61ae7f8694fb4b57a8c02a1a8d2b601806afc999 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 9 Jul 2025 18:20:22 +0530 Subject: [PATCH 0809/2411] PCI: endpoint: pci-epf-vntb: Fix the incorrect usage of __iomem attribute __iomem attribute is supposed to be used only with variables holding the MMIO pointer. But here, 'mw_addr' variable is just holding a 'void *' returned by pci_epf_alloc_space(). So annotating it with __iomem is clearly wrong. Hence, drop the attribute. This also fixes the below sparse warning: drivers/pci/endpoint/functions/pci-epf-vntb.c:524:17: warning: incorrect type in assignment (different address spaces) drivers/pci/endpoint/functions/pci-epf-vntb.c:524:17: expected void [noderef] __iomem *mw_addr drivers/pci/endpoint/functions/pci-epf-vntb.c:524:17: got void * drivers/pci/endpoint/functions/pci-epf-vntb.c:530:21: warning: incorrect type in assignment (different address spaces) drivers/pci/endpoint/functions/pci-epf-vntb.c:530:21: expected unsigned int [usertype] *epf_db drivers/pci/endpoint/functions/pci-epf-vntb.c:530:21: got void [noderef] __iomem *mw_addr drivers/pci/endpoint/functions/pci-epf-vntb.c:542:38: warning: incorrect type in argument 2 (different address spaces) drivers/pci/endpoint/functions/pci-epf-vntb.c:542:38: expected void *addr drivers/pci/endpoint/functions/pci-epf-vntb.c:542:38: got void [noderef] __iomem *mw_addr Fixes: e35f56bb0330 ("PCI: endpoint: Support NTB transfer between RC and EP") Signed-off-by: Manivannan Sadhasivam Reviewed-by: Frank Li Link: https://patch.msgid.link/20250709125022.22524-1-mani@kernel.org --- drivers/pci/endpoint/functions/pci-epf-vntb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c index ac83a6dc6116..83e9ab10f9c4 100644 --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c @@ -512,7 +512,7 @@ static int epf_ntb_db_bar_init(struct epf_ntb *ntb) struct device *dev = &ntb->epf->dev; int ret; struct pci_epf_bar *epf_bar; - void __iomem *mw_addr; + void *mw_addr; enum pci_barno barno; size_t size = sizeof(u32) * ntb->db_count; From ac3dbb91e0167d017f44701dd51c1efe30d0c256 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 17 Jul 2025 18:55:02 +0200 Subject: [PATCH 0810/2411] watchdog: dw_wdt: Fix default timeout The Synopsys Watchdog driver sets the default timeout to 30 seconds, but on some devices this is not a valid timeout. E.g. on RK3588 the actual timeout being used is 44 seconds instead. Once the watchdog is started the value is updated accordingly, but it would be better to expose a sensible timeout to userspace without the need to first start the watchdog. Signed-off-by: Sebastian Reichel Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250717-dw-wdt-fix-initial-timeout-v1-1-86dc864d48dd@kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/dw_wdt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c index 26efca9ae0e7..c3fbb6068c52 100644 --- a/drivers/watchdog/dw_wdt.c +++ b/drivers/watchdog/dw_wdt.c @@ -644,6 +644,8 @@ static int dw_wdt_drv_probe(struct platform_device *pdev) } else { wdd->timeout = DW_WDT_DEFAULT_SECONDS; watchdog_init_timeout(wdd, 0, dev); + /* Limit timeout value to hardware constraints. */ + dw_wdt_set_timeout(wdd, wdd->timeout); } platform_set_drvdata(pdev, dw_wdt); From 9d0ca8df2451eb66a0c13a9932f348d417d9603b Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 10 Jul 2025 15:13:47 -0400 Subject: [PATCH 0811/2411] PCI: imx6: Add helper function imx_pcie_add_lut_by_rid() Add helper function imx_pcie_add_lut_by_rid(), which will be used by the upcoming LUT configuration for MSI/IOMMU in the Endpoint mode. No functional change. Signed-off-by: Frank Li [mani: reworded commit message and dropped tested-by tag] Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250710-ep-msi-v21-1-57683fc7fb25@nxp.com --- drivers/pci/controller/dwc/pci-imx6.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 9754cc6e09b9..1f479da88fc8 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -1096,18 +1096,14 @@ static void imx_pcie_remove_lut(struct imx_pcie *imx_pcie, u16 rid) } } -static int imx_pcie_enable_device(struct pci_host_bridge *bridge, - struct pci_dev *pdev) +static int imx_pcie_add_lut_by_rid(struct imx_pcie *imx_pcie, u32 rid) { - struct imx_pcie *imx_pcie = to_imx_pcie(to_dw_pcie_from_pp(bridge->sysdata)); - u32 sid_i, sid_m, rid = pci_dev_id(pdev); + struct device *dev = imx_pcie->pci->dev; struct device_node *target; - struct device *dev; + u32 sid_i, sid_m; int err_i, err_m; u32 sid = 0; - dev = imx_pcie->pci->dev; - target = NULL; err_i = of_map_id(dev->of_node, rid, "iommu-map", "iommu-map-mask", &target, &sid_i); @@ -1182,6 +1178,13 @@ static int imx_pcie_enable_device(struct pci_host_bridge *bridge, return imx_pcie_add_lut(imx_pcie, rid, sid); } +static int imx_pcie_enable_device(struct pci_host_bridge *bridge, struct pci_dev *pdev) +{ + struct imx_pcie *imx_pcie = to_imx_pcie(to_dw_pcie_from_pp(bridge->sysdata)); + + return imx_pcie_add_lut_by_rid(imx_pcie, pci_dev_id(pdev)); +} + static void imx_pcie_disable_device(struct pci_host_bridge *bridge, struct pci_dev *pdev) { From 234b9258c6907cabbb2594ee366286d35ff056f3 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 10 Jul 2025 15:13:48 -0400 Subject: [PATCH 0812/2411] PCI: imx6: Add LUT configuration for MSI/IOMMU in Endpoint mode Add LUT entry for MSI/IOMMU in Endpoint mode by calling imx_pcie_add_lut_by_rid() helper function. Since only one physical function is supported in the Endpoint mode for now, '0' is passed as the Device ID. This sets up a single LUT entry required for MSI/IOMMU. The Endpoint function can operate without LUT configuration if neither IOMMU nor MSI is used by the platform. This LUT configuration is used for the EP doorbell feature by allowing the Root Complex to trigger the doorbell on the Endpoint with the help of the Endpoint MSI controller. Signed-off-by: Frank Li [mani: reworded the comments & commit message and dropped tested-by tag] Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250710-ep-msi-v21-2-57683fc7fb25@nxp.com --- drivers/pci/controller/dwc/pci-imx6.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 1f479da88fc8..eefe922d533b 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -1063,7 +1063,10 @@ static int imx_pcie_add_lut(struct imx_pcie *imx_pcie, u16 rid, u8 sid) data1 |= IMX95_PE0_LUT_VLD; regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, data1); - data2 = IMX95_PE0_LUT_MASK; /* Match all bits of RID */ + if (imx_pcie->drvdata->mode == DW_PCIE_EP_TYPE) + data2 = 0x7; /* In the EP mode, only 'Device ID' is required */ + else + data2 = IMX95_PE0_LUT_MASK; /* Match all bits of RID */ data2 |= FIELD_PREP(IMX95_PE0_LUT_REQID, rid); regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, data2); @@ -1769,6 +1772,12 @@ static int imx_pcie_probe(struct platform_device *pdev) ret = imx_add_pcie_ep(imx_pcie, pdev); if (ret < 0) return ret; + + /* + * FIXME: Only single Device (EPF) is supported due to the + * Endpoint framework limitation. + */ + imx_pcie_add_lut_by_rid(imx_pcie, 0); } else { pci->pp.use_atu_msg = true; ret = dw_pcie_host_init(&pci->pp); From 28753212e0f9c61afd859acf1d678f5de7faa4b8 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Mon, 19 May 2025 14:43:02 +0200 Subject: [PATCH 0813/2411] rust: types: remove `Either` This enum is not used. Additionally, using it would result in poor ergonomics, because in order to do any operation on a value it has to be matched first. Our version of `Either` also doesn't provide any helper methods making it even more difficult to use. The alternative of creating a custom enum for the concrete use-case also is much better for ergonomics. As one can provide functions on the type directly and users don't need to match the value manually. Signed-off-by: Benno Lossin Reviewed-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250519124304.79237-1-lossin@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/types.rs | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index 49a0e8e9326b..82b9cfeb4739 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -569,24 +569,6 @@ fn drop(&mut self) { } } -/// A sum type that always holds either a value of type `L` or `R`. -/// -/// # Examples -/// -/// ``` -/// use kernel::types::Either; -/// -/// let left_value: Either = Either::Left(7); -/// let right_value: Either = Either::Right("right value"); -/// ``` -pub enum Either { - /// Constructs an instance of [`Either`] containing a value of type `L`. - Left(L), - - /// Constructs an instance of [`Either`] containing a value of type `R`. - Right(R), -} - /// Zero-sized type to mark types not [`Send`]. /// /// Add this type as a field to your struct if your type should not be sent to a different task. From 0c6f0d77ab62ff557fd0cace8284bc67ef7ab79c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 1 Jul 2025 09:44:39 +0200 Subject: [PATCH 0814/2411] rtc: Rename lib_test to test_rtc_lib When compiling the RTC library functions test as a module, the module has the non-descriptive name "lib_test.ko". Fix this by renaming it to "test_rtc_lib.ko". Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/47019d7f8ced12107b54a372fdf34b1b8f7b6183.1751355848.git.geert@linux-m68k.org Signed-off-by: Alexandre Belloni --- drivers/rtc/Makefile | 2 +- drivers/rtc/{lib_test.c => test_rtc_lib.c} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename drivers/rtc/{lib_test.c => test_rtc_lib.c} (100%) diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 4619aa2ac469..789bddfea99d 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -15,7 +15,7 @@ rtc-core-$(CONFIG_RTC_INTF_DEV) += dev.o rtc-core-$(CONFIG_RTC_INTF_PROC) += proc.o rtc-core-$(CONFIG_RTC_INTF_SYSFS) += sysfs.o -obj-$(CONFIG_RTC_LIB_KUNIT_TEST) += lib_test.o +obj-$(CONFIG_RTC_LIB_KUNIT_TEST) += test_rtc_lib.o # Keep the list ordered. diff --git a/drivers/rtc/lib_test.c b/drivers/rtc/test_rtc_lib.c similarity index 100% rename from drivers/rtc/lib_test.c rename to drivers/rtc/test_rtc_lib.c From e92eda97f8c534be63ab0ef322ad2fdfeb759e16 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 9 Jul 2025 20:47:52 +0200 Subject: [PATCH 0815/2411] rtc: sh: Convert to DEFINE_SIMPLE_DEV_PM_OPS() Convert the Renesas SuperH On-Chip RTC driver from SIMPLE_DEV_PM_OPS() to DEFINE_SIMPLE_DEV_PM_OPS() and pm_sleep_ptr(). This lets us drop the __maybe_unused annotations from its suspend and resume callbacks, and reduces kernel size in case CONFIG_PM or CONFIG_PM_SLEEP is disabled. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/396d4a769b8d3c6fec43c65022cdfd8a6854524a.1752086758.git.geert+renesas@glider.be Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-sh.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c index f15ef3aa82a0..619800a00479 100644 --- a/drivers/rtc/rtc-sh.c +++ b/drivers/rtc/rtc-sh.c @@ -455,7 +455,7 @@ static void __exit sh_rtc_remove(struct platform_device *pdev) clk_disable(rtc->clk); } -static int __maybe_unused sh_rtc_suspend(struct device *dev) +static int sh_rtc_suspend(struct device *dev) { struct sh_rtc *rtc = dev_get_drvdata(dev); @@ -465,7 +465,7 @@ static int __maybe_unused sh_rtc_suspend(struct device *dev) return 0; } -static int __maybe_unused sh_rtc_resume(struct device *dev) +static int sh_rtc_resume(struct device *dev) { struct sh_rtc *rtc = dev_get_drvdata(dev); @@ -475,7 +475,7 @@ static int __maybe_unused sh_rtc_resume(struct device *dev) return 0; } -static SIMPLE_DEV_PM_OPS(sh_rtc_pm_ops, sh_rtc_suspend, sh_rtc_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(sh_rtc_pm_ops, sh_rtc_suspend, sh_rtc_resume); static const struct of_device_id sh_rtc_of_match[] = { { .compatible = "renesas,sh-rtc", }, @@ -492,7 +492,7 @@ MODULE_DEVICE_TABLE(of, sh_rtc_of_match); static struct platform_driver sh_rtc_platform_driver __refdata = { .driver = { .name = DRV_NAME, - .pm = &sh_rtc_pm_ops, + .pm = pm_sleep_ptr(&sh_rtc_pm_ops), .of_match_table = sh_rtc_of_match, }, .remove = __exit_p(sh_rtc_remove), From 4dda8df717b7e5ad89de79844e5491aaf78b44da Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 2 Jul 2025 09:15:34 +0300 Subject: [PATCH 0816/2411] rtc: sysfs: Use sysfs_emit() to instead of s*printf() Follow the advice of the Documentation/filesystems/sysfs.rst that show() should only use sysfs_emit() or sysfs_emit_at() when formatting the value to be returned to user space. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250702061534.2670729-1-andriy.shevchenko@linux.intel.com Signed-off-by: Alexandre Belloni --- drivers/rtc/sysfs.c | 46 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/drivers/rtc/sysfs.c b/drivers/rtc/sysfs.c index e3062c4d3f2c..86d1140b4f39 100644 --- a/drivers/rtc/sysfs.c +++ b/drivers/rtc/sysfs.c @@ -24,8 +24,8 @@ static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%s %s\n", dev_driver_string(dev->parent), - dev_name(dev->parent)); + return sysfs_emit(buf, "%s %s\n", dev_driver_string(dev->parent), + dev_name(dev->parent)); } static DEVICE_ATTR_RO(name); @@ -39,7 +39,7 @@ date_show(struct device *dev, struct device_attribute *attr, char *buf) if (retval) return retval; - return sprintf(buf, "%ptRd\n", &tm); + return sysfs_emit(buf, "%ptRd\n", &tm); } static DEVICE_ATTR_RO(date); @@ -53,7 +53,7 @@ time_show(struct device *dev, struct device_attribute *attr, char *buf) if (retval) return retval; - return sprintf(buf, "%ptRt\n", &tm); + return sysfs_emit(buf, "%ptRt\n", &tm); } static DEVICE_ATTR_RO(time); @@ -64,21 +64,17 @@ since_epoch_show(struct device *dev, struct device_attribute *attr, char *buf) struct rtc_time tm; retval = rtc_read_time(to_rtc_device(dev), &tm); - if (retval == 0) { - time64_t time; + if (retval) + return retval; - time = rtc_tm_to_time64(&tm); - retval = sprintf(buf, "%lld\n", time); - } - - return retval; + return sysfs_emit(buf, "%lld\n", rtc_tm_to_time64(&tm)); } static DEVICE_ATTR_RO(since_epoch); static ssize_t max_user_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", to_rtc_device(dev)->max_user_freq); + return sysfs_emit(buf, "%d\n", to_rtc_device(dev)->max_user_freq); } static ssize_t @@ -118,9 +114,9 @@ hctosys_show(struct device *dev, struct device_attribute *attr, char *buf) if (rtc_hctosys_ret == 0 && strcmp(dev_name(&to_rtc_device(dev)->dev), CONFIG_RTC_HCTOSYS_DEVICE) == 0) - return sprintf(buf, "1\n"); + return sysfs_emit(buf, "1\n"); #endif - return sprintf(buf, "0\n"); + return sysfs_emit(buf, "0\n"); } static DEVICE_ATTR_RO(hctosys); @@ -128,7 +124,6 @@ static ssize_t wakealarm_show(struct device *dev, struct device_attribute *attr, char *buf) { ssize_t retval; - time64_t alarm; struct rtc_wkalrm alm; /* Don't show disabled alarms. For uniformity, RTC alarms are @@ -140,12 +135,13 @@ wakealarm_show(struct device *dev, struct device_attribute *attr, char *buf) * alarms after they trigger, to ensure one-shot semantics. */ retval = rtc_read_alarm(to_rtc_device(dev), &alm); - if (retval == 0 && alm.enabled) { - alarm = rtc_tm_to_time64(&alm.time); - retval = sprintf(buf, "%lld\n", alarm); - } + if (retval) + return retval; - return retval; + if (alm.enabled) + return sysfs_emit(buf, "%lld\n", rtc_tm_to_time64(&alm.time)); + + return 0; } static ssize_t @@ -222,10 +218,10 @@ offset_show(struct device *dev, struct device_attribute *attr, char *buf) long offset; retval = rtc_read_offset(to_rtc_device(dev), &offset); - if (retval == 0) - retval = sprintf(buf, "%ld\n", offset); + if (retval) + return retval; - return retval; + return sysfs_emit(buf, "%ld\n", offset); } static ssize_t @@ -246,8 +242,8 @@ static DEVICE_ATTR_RW(offset); static ssize_t range_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "[%lld,%llu]\n", to_rtc_device(dev)->range_min, - to_rtc_device(dev)->range_max); + return sysfs_emit(buf, "[%lld,%llu]\n", to_rtc_device(dev)->range_min, + to_rtc_device(dev)->range_max); } static DEVICE_ATTR_RO(range); From bbe8d4fef308cddd11b2e766c8710d318334b88b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 2 Jul 2025 10:32:24 +0300 Subject: [PATCH 0817/2411] rtc: sysfs: Bail out earlier if no new groups provided When there is no new groups provided, no need to reallocate memory, copy the old ones and free them in order to do nothing. Do nothing instead. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250702073224.2684097-1-andriy.shevchenko@linux.intel.com Signed-off-by: Alexandre Belloni --- drivers/rtc/sysfs.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/sysfs.c b/drivers/rtc/sysfs.c index 86d1140b4f39..bf98a89d0e4e 100644 --- a/drivers/rtc/sysfs.c +++ b/drivers/rtc/sysfs.c @@ -314,17 +314,21 @@ int rtc_add_groups(struct rtc_device *rtc, const struct attribute_group **grps) size_t old_cnt = 0, add_cnt = 0, new_cnt; const struct attribute_group **groups, **old; - if (!grps) + if (grps) { + for (groups = grps; *groups; groups++) + add_cnt++; + /* No need to modify current groups if nothing new is provided */ + if (add_cnt == 0) + return 0; + } else { return -EINVAL; + } groups = rtc->dev.groups; if (groups) for (; *groups; groups++) old_cnt++; - for (groups = grps; *groups; groups++) - add_cnt++; - new_cnt = old_cnt + add_cnt + 1; groups = devm_kcalloc(&rtc->dev, new_cnt, sizeof(*groups), GFP_KERNEL); if (!groups) From fed5aaeb4e94e0f071bf467f2bafd5ea6f093722 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 2 Jul 2025 11:01:08 +0300 Subject: [PATCH 0818/2411] rtc: sysfs: use __ATTRIBUTE_GROUPS() Embrace __ATTRIBUTE_GROUPS() to avoid boiler plate code. This should not introduce any functional changes. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250702080108.2722905-1-andriy.shevchenko@linux.intel.com Signed-off-by: Alexandre Belloni --- drivers/rtc/sysfs.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/rtc/sysfs.c b/drivers/rtc/sysfs.c index bf98a89d0e4e..4ab05e105a76 100644 --- a/drivers/rtc/sysfs.c +++ b/drivers/rtc/sysfs.c @@ -298,11 +298,7 @@ static struct attribute_group rtc_attr_group = { .is_visible = rtc_attr_is_visible, .attrs = rtc_attrs, }; - -static const struct attribute_group *rtc_attr_groups[] = { - &rtc_attr_group, - NULL -}; +__ATTRIBUTE_GROUPS(rtc_attr); const struct attribute_group **rtc_get_dev_attribute_groups(void) { From 4e6b5b8ab3e28148d04a63defadc29cfc771b102 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Wed, 21 May 2025 01:17:13 +0200 Subject: [PATCH 0819/2411] rust: sync: fix safety comment for `static_lock_class` The safety comment mentions lockdep -- which from a Rust perspective isn't important -- and doesn't mention the real reason for why it's sound to create `LockClassKey` as uninitialized memory. Signed-off-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250520231714.323931-1-lossin@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/sync.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs index 63c99e015ad6..096cb78c8ec3 100644 --- a/rust/kernel/sync.rs +++ b/rust/kernel/sync.rs @@ -95,8 +95,11 @@ fn drop(self: Pin<&mut Self>) { macro_rules! static_lock_class { () => {{ static CLASS: $crate::sync::LockClassKey = - // SAFETY: lockdep expects uninitialized memory when it's handed a statically allocated - // lock_class_key + // Lockdep expects uninitialized memory when it's handed a statically allocated `struct + // lock_class_key`. + // + // SAFETY: `LockClassKey` transparently wraps `Opaque` which permits uninitialized + // memory. unsafe { ::core::mem::MaybeUninit::uninit().assume_init() }; $crate::prelude::Pin::static_ref(&CLASS) }}; From 07dad44aa9a93b16af19e8609a10b241c352b440 Mon Sep 17 00:00:00 2001 From: Shankari Anand Date: Tue, 15 Jul 2025 16:34:23 +0530 Subject: [PATCH 0820/2411] rust: kernel: move ARef and AlwaysRefCounted to sync::aref Move the definitions of `ARef` and `AlwaysRefCounted` from `types.rs` to a new file `sync/aref.rs`. Define the corresponding `aref` module under `rust/kernel/sync.rs`. These types are better grouped in `sync`. To avoid breaking existing imports, they are re-exported from `types.rs`. Drop unused imports `mem::ManuallyDrop`, `ptr::NonNull` from `types.rs`, they are now only used in `sync/aref.rs`, where they are already imported. Suggested-by: Benno Lossin Link: https://github.com/Rust-for-Linux/linux/issues/1173 Signed-off-by: Shankari Anand Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250715110423.334744-1-shankari.ak0208@gmail.com [ Added missing `///`. Changed module title. Reworded slightly. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/sync.rs | 1 + rust/kernel/sync/aref.rs | 154 +++++++++++++++++++++++++++++++++++++++ rust/kernel/types.rs | 154 +-------------------------------------- 3 files changed, 158 insertions(+), 151 deletions(-) create mode 100644 rust/kernel/sync/aref.rs diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs index 096cb78c8ec3..00f9b558a3ad 100644 --- a/rust/kernel/sync.rs +++ b/rust/kernel/sync.rs @@ -10,6 +10,7 @@ use pin_init; mod arc; +pub mod aref; pub mod completion; mod condvar; pub mod lock; diff --git a/rust/kernel/sync/aref.rs b/rust/kernel/sync/aref.rs new file mode 100644 index 000000000000..dbd77bb68617 --- /dev/null +++ b/rust/kernel/sync/aref.rs @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Internal reference counting support. + +use core::{marker::PhantomData, mem::ManuallyDrop, ops::Deref, ptr::NonNull}; + +/// Types that are _always_ reference counted. +/// +/// It allows such types to define their own custom ref increment and decrement functions. +/// Additionally, it allows users to convert from a shared reference `&T` to an owned reference +/// [`ARef`]. +/// +/// This is usually implemented by wrappers to existing structures on the C side of the code. For +/// Rust code, the recommendation is to use [`Arc`](crate::sync::Arc) to create reference-counted +/// instances of a type. +/// +/// # Safety +/// +/// Implementers must ensure that increments to the reference count keep the object alive in memory +/// at least until matching decrements are performed. +/// +/// Implementers must also ensure that all instances are reference-counted. (Otherwise they +/// won't be able to honour the requirement that [`AlwaysRefCounted::inc_ref`] keep the object +/// alive.) +pub unsafe trait AlwaysRefCounted { + /// Increments the reference count on the object. + fn inc_ref(&self); + + /// Decrements the reference count on the object. + /// + /// Frees the object when the count reaches zero. + /// + /// # Safety + /// + /// Callers must ensure that there was a previous matching increment to the reference count, + /// and that the object is no longer used after its reference count is decremented (as it may + /// result in the object being freed), unless the caller owns another increment on the refcount + /// (e.g., it calls [`AlwaysRefCounted::inc_ref`] twice, then calls + /// [`AlwaysRefCounted::dec_ref`] once). + unsafe fn dec_ref(obj: NonNull); +} + +/// An owned reference to an always-reference-counted object. +/// +/// The object's reference count is automatically decremented when an instance of [`ARef`] is +/// dropped. It is also automatically incremented when a new instance is created via +/// [`ARef::clone`]. +/// +/// # Invariants +/// +/// The pointer stored in `ptr` is non-null and valid for the lifetime of the [`ARef`] instance. In +/// particular, the [`ARef`] instance owns an increment on the underlying object's reference count. +pub struct ARef { + ptr: NonNull, + _p: PhantomData, +} + +// SAFETY: It is safe to send `ARef` to another thread when the underlying `T` is `Sync` because +// it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally, it needs +// `T` to be `Send` because any thread that has an `ARef` may ultimately access `T` using a +// mutable reference, for example, when the reference count reaches zero and `T` is dropped. +unsafe impl Send for ARef {} + +// SAFETY: It is safe to send `&ARef` to another thread when the underlying `T` is `Sync` +// because it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally, +// it needs `T` to be `Send` because any thread that has a `&ARef` may clone it and get an +// `ARef` on that thread, so the thread may ultimately access `T` using a mutable reference, for +// example, when the reference count reaches zero and `T` is dropped. +unsafe impl Sync for ARef {} + +impl ARef { + /// Creates a new instance of [`ARef`]. + /// + /// It takes over an increment of the reference count on the underlying object. + /// + /// # Safety + /// + /// Callers must ensure that the reference count was incremented at least once, and that they + /// are properly relinquishing one increment. That is, if there is only one increment, callers + /// must not use the underlying object anymore -- it is only safe to do so via the newly + /// created [`ARef`]. + pub unsafe fn from_raw(ptr: NonNull) -> Self { + // INVARIANT: The safety requirements guarantee that the new instance now owns the + // increment on the refcount. + Self { + ptr, + _p: PhantomData, + } + } + + /// Consumes the `ARef`, returning a raw pointer. + /// + /// This function does not change the refcount. After calling this function, the caller is + /// responsible for the refcount previously managed by the `ARef`. + /// + /// # Examples + /// + /// ``` + /// use core::ptr::NonNull; + /// use kernel::types::{ARef, AlwaysRefCounted}; + /// + /// struct Empty {} + /// + /// # // SAFETY: TODO. + /// unsafe impl AlwaysRefCounted for Empty { + /// fn inc_ref(&self) {} + /// unsafe fn dec_ref(_obj: NonNull) {} + /// } + /// + /// let mut data = Empty {}; + /// let ptr = NonNull::::new(&mut data).unwrap(); + /// # // SAFETY: TODO. + /// let data_ref: ARef = unsafe { ARef::from_raw(ptr) }; + /// let raw_ptr: NonNull = ARef::into_raw(data_ref); + /// + /// assert_eq!(ptr, raw_ptr); + /// ``` + pub fn into_raw(me: Self) -> NonNull { + ManuallyDrop::new(me).ptr + } +} + +impl Clone for ARef { + fn clone(&self) -> Self { + self.inc_ref(); + // SAFETY: We just incremented the refcount above. + unsafe { Self::from_raw(self.ptr) } + } +} + +impl Deref for ARef { + type Target = T; + + fn deref(&self) -> &Self::Target { + // SAFETY: The type invariants guarantee that the object is valid. + unsafe { self.ptr.as_ref() } + } +} + +impl From<&T> for ARef { + fn from(b: &T) -> Self { + b.inc_ref(); + // SAFETY: We just incremented the refcount above. + unsafe { Self::from_raw(NonNull::from(b)) } + } +} + +impl Drop for ARef { + fn drop(&mut self) { + // SAFETY: The type invariants guarantee that the `ARef` owns the reference we're about to + // decrement. + unsafe { T::dec_ref(self.ptr) }; + } +} diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index 82b9cfeb4739..ec82a163cb0e 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -6,12 +6,13 @@ use core::{ cell::UnsafeCell, marker::{PhantomData, PhantomPinned}, - mem::{ManuallyDrop, MaybeUninit}, + mem::MaybeUninit, ops::{Deref, DerefMut}, - ptr::NonNull, }; use pin_init::{PinInit, Zeroable}; +pub use crate::sync::aref::{ARef, AlwaysRefCounted}; + /// Used to transfer ownership to and from foreign (non-Rust) languages. /// /// Ownership is transferred from Rust to a foreign language by calling [`Self::into_foreign`] and @@ -420,155 +421,6 @@ pub const fn cast_from(this: *const T) -> *const Self { } } -/// Types that are _always_ reference counted. -/// -/// It allows such types to define their own custom ref increment and decrement functions. -/// Additionally, it allows users to convert from a shared reference `&T` to an owned reference -/// [`ARef`]. -/// -/// This is usually implemented by wrappers to existing structures on the C side of the code. For -/// Rust code, the recommendation is to use [`Arc`](crate::sync::Arc) to create reference-counted -/// instances of a type. -/// -/// # Safety -/// -/// Implementers must ensure that increments to the reference count keep the object alive in memory -/// at least until matching decrements are performed. -/// -/// Implementers must also ensure that all instances are reference-counted. (Otherwise they -/// won't be able to honour the requirement that [`AlwaysRefCounted::inc_ref`] keep the object -/// alive.) -pub unsafe trait AlwaysRefCounted { - /// Increments the reference count on the object. - fn inc_ref(&self); - - /// Decrements the reference count on the object. - /// - /// Frees the object when the count reaches zero. - /// - /// # Safety - /// - /// Callers must ensure that there was a previous matching increment to the reference count, - /// and that the object is no longer used after its reference count is decremented (as it may - /// result in the object being freed), unless the caller owns another increment on the refcount - /// (e.g., it calls [`AlwaysRefCounted::inc_ref`] twice, then calls - /// [`AlwaysRefCounted::dec_ref`] once). - unsafe fn dec_ref(obj: NonNull); -} - -/// An owned reference to an always-reference-counted object. -/// -/// The object's reference count is automatically decremented when an instance of [`ARef`] is -/// dropped. It is also automatically incremented when a new instance is created via -/// [`ARef::clone`]. -/// -/// # Invariants -/// -/// The pointer stored in `ptr` is non-null and valid for the lifetime of the [`ARef`] instance. In -/// particular, the [`ARef`] instance owns an increment on the underlying object's reference count. -pub struct ARef { - ptr: NonNull, - _p: PhantomData, -} - -// SAFETY: It is safe to send `ARef` to another thread when the underlying `T` is `Sync` because -// it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally, it needs -// `T` to be `Send` because any thread that has an `ARef` may ultimately access `T` using a -// mutable reference, for example, when the reference count reaches zero and `T` is dropped. -unsafe impl Send for ARef {} - -// SAFETY: It is safe to send `&ARef` to another thread when the underlying `T` is `Sync` -// because it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally, -// it needs `T` to be `Send` because any thread that has a `&ARef` may clone it and get an -// `ARef` on that thread, so the thread may ultimately access `T` using a mutable reference, for -// example, when the reference count reaches zero and `T` is dropped. -unsafe impl Sync for ARef {} - -impl ARef { - /// Creates a new instance of [`ARef`]. - /// - /// It takes over an increment of the reference count on the underlying object. - /// - /// # Safety - /// - /// Callers must ensure that the reference count was incremented at least once, and that they - /// are properly relinquishing one increment. That is, if there is only one increment, callers - /// must not use the underlying object anymore -- it is only safe to do so via the newly - /// created [`ARef`]. - pub unsafe fn from_raw(ptr: NonNull) -> Self { - // INVARIANT: The safety requirements guarantee that the new instance now owns the - // increment on the refcount. - Self { - ptr, - _p: PhantomData, - } - } - - /// Consumes the `ARef`, returning a raw pointer. - /// - /// This function does not change the refcount. After calling this function, the caller is - /// responsible for the refcount previously managed by the `ARef`. - /// - /// # Examples - /// - /// ``` - /// use core::ptr::NonNull; - /// use kernel::types::{ARef, AlwaysRefCounted}; - /// - /// struct Empty {} - /// - /// # // SAFETY: TODO. - /// unsafe impl AlwaysRefCounted for Empty { - /// fn inc_ref(&self) {} - /// unsafe fn dec_ref(_obj: NonNull) {} - /// } - /// - /// let mut data = Empty {}; - /// let ptr = NonNull::::new(&mut data).unwrap(); - /// # // SAFETY: TODO. - /// let data_ref: ARef = unsafe { ARef::from_raw(ptr) }; - /// let raw_ptr: NonNull = ARef::into_raw(data_ref); - /// - /// assert_eq!(ptr, raw_ptr); - /// ``` - pub fn into_raw(me: Self) -> NonNull { - ManuallyDrop::new(me).ptr - } -} - -impl Clone for ARef { - fn clone(&self) -> Self { - self.inc_ref(); - // SAFETY: We just incremented the refcount above. - unsafe { Self::from_raw(self.ptr) } - } -} - -impl Deref for ARef { - type Target = T; - - fn deref(&self) -> &Self::Target { - // SAFETY: The type invariants guarantee that the object is valid. - unsafe { self.ptr.as_ref() } - } -} - -impl From<&T> for ARef { - fn from(b: &T) -> Self { - b.inc_ref(); - // SAFETY: We just incremented the refcount above. - unsafe { Self::from_raw(NonNull::from(b)) } - } -} - -impl Drop for ARef { - fn drop(&mut self) { - // SAFETY: The type invariants guarantee that the `ARef` owns the reference we're about to - // decrement. - unsafe { T::dec_ref(self.ptr) }; - } -} - /// Zero-sized type to mark types not [`Send`]. /// /// Add this type as a field to your struct if your type should not be sent to a different task. From f3fc8f06492693d4fcb32c9821fb465d4c7f5a97 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Jul 2025 08:30:45 +0200 Subject: [PATCH 0821/2411] NFS: pass struct nfs_client_initdata to nfs4_set_client Passed the partially filled out structure to nfs4_set_client instead of 11 arguments that then get stashed into the structure. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 151 ++++++++++++++++++++------------------------ 1 file changed, 68 insertions(+), 83 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 162c85a83a14..2e623da1a787 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -895,55 +895,40 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, * Set up an NFS4 client */ static int nfs4_set_client(struct nfs_server *server, - const char *hostname, - const struct sockaddr_storage *addr, - const size_t addrlen, - const char *ip_addr, - int proto, const struct rpc_timeout *timeparms, - u32 minorversion, unsigned int nconnect, - unsigned int max_connect, - struct net *net, - struct xprtsec_parms *xprtsec) + struct nfs_client_initdata *cl_init) { - struct nfs_client_initdata cl_init = { - .hostname = hostname, - .addr = addr, - .addrlen = addrlen, - .ip_addr = ip_addr, - .nfs_mod = &nfs_v4, - .proto = proto, - .minorversion = minorversion, - .net = net, - .timeparms = timeparms, - .cred = server->cred, - .xprtsec = *xprtsec, - }; struct nfs_client *clp; - if (minorversion == 0) - __set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags); - else - cl_init.max_connect = max_connect; - switch (proto) { + cl_init->nfs_mod = &nfs_v4; + cl_init->cred = server->cred; + + if (cl_init->minorversion == 0) { + __set_bit(NFS_CS_REUSEPORT, &cl_init->init_flags); + cl_init->max_connect = 0; + } + + switch (cl_init->proto) { case XPRT_TRANSPORT_RDMA: case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_TCP_TLS: - cl_init.nconnect = nconnect; + break; + default: + cl_init->nconnect = 0; } if (server->flags & NFS_MOUNT_NORESVPORT) - __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + __set_bit(NFS_CS_NORESVPORT, &cl_init->init_flags); if (server->options & NFS_OPTION_MIGRATION) - __set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); + __set_bit(NFS_CS_MIGRATION, &cl_init->init_flags); if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status)) - __set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags); - server->port = rpc_get_port((struct sockaddr *)addr); + __set_bit(NFS_CS_TSM_POSSIBLE, &cl_init->init_flags); + server->port = rpc_get_port((struct sockaddr *)cl_init->addr); if (server->flags & NFS_MOUNT_NETUNREACH_FATAL) - __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags); + __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init->init_flags); /* Allocate or find a client reference we can use */ - clp = nfs_get_client(&cl_init); + clp = nfs_get_client(cl_init); if (IS_ERR(clp)) return PTR_ERR(clp); @@ -1156,6 +1141,19 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); struct rpc_timeout timeparms; + struct nfs_client_initdata cl_init = { + .hostname = ctx->nfs_server.hostname, + .addr = &ctx->nfs_server._address, + .addrlen = ctx->nfs_server.addrlen, + .ip_addr = ctx->client_address, + .proto = ctx->nfs_server.protocol, + .minorversion = ctx->minorversion, + .net = fc->net_ns, + .timeparms = &timeparms, + .xprtsec = ctx->xprtsec, + .nconnect = ctx->nfs_server.nconnect, + .max_connect = ctx->nfs_server.max_connect, + }; int error; nfs_init_timeout_values(&timeparms, ctx->nfs_server.protocol, @@ -1175,18 +1173,7 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc) ctx->selected_flavor = RPC_AUTH_UNIX; /* Get a client record */ - error = nfs4_set_client(server, - ctx->nfs_server.hostname, - &ctx->nfs_server._address, - ctx->nfs_server.addrlen, - ctx->client_address, - ctx->nfs_server.protocol, - &timeparms, - ctx->minorversion, - ctx->nfs_server.nconnect, - ctx->nfs_server.max_connect, - fc->net_ns, - &ctx->xprtsec); + error = nfs4_set_client(server, &cl_init); if (error < 0) return error; @@ -1246,18 +1233,28 @@ struct nfs_server *nfs4_create_server(struct fs_context *fc) struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); - struct nfs_client *parent_client; - struct nfs_server *server, *parent_server; - int proto, error; + struct nfs_server *parent_server = NFS_SB(ctx->clone_data.sb); + struct nfs_client *parent_client = parent_server->nfs_client; + struct nfs_client_initdata cl_init = { + .hostname = ctx->nfs_server.hostname, + .addr = &ctx->nfs_server._address, + .addrlen = ctx->nfs_server.addrlen, + .ip_addr = parent_client->cl_ipaddr, + .minorversion = parent_client->cl_mvops->minor_version, + .net = parent_client->cl_net, + .timeparms = parent_server->client->cl_timeout, + .xprtsec = parent_client->cl_xprtsec, + .nconnect = parent_client->cl_nconnect, + .max_connect = parent_client->cl_max_connect, + }; + struct nfs_server *server; bool auth_probe; + int error; server = nfs_alloc_server(); if (!server) return ERR_PTR(-ENOMEM); - parent_server = NFS_SB(ctx->clone_data.sb); - parent_client = parent_server->nfs_client; - server->cred = get_cred(parent_server->cred); /* Initialise the client representation from the parent server */ @@ -1266,38 +1263,17 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) /* Get a client representation */ #if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) rpc_set_port(&ctx->nfs_server.address, NFS_RDMA_PORT); - error = nfs4_set_client(server, - ctx->nfs_server.hostname, - &ctx->nfs_server._address, - ctx->nfs_server.addrlen, - parent_client->cl_ipaddr, - XPRT_TRANSPORT_RDMA, - parent_server->client->cl_timeout, - parent_client->cl_mvops->minor_version, - parent_client->cl_nconnect, - parent_client->cl_max_connect, - parent_client->cl_net, - &parent_client->cl_xprtsec); + cl_init.proto = XPRT_TRANSPORT_RDMA; + error = nfs4_set_client(server, &cl_init); if (!error) goto init_server; #endif /* IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) */ - proto = XPRT_TRANSPORT_TCP; + cl_init.proto = XPRT_TRANSPORT_TCP; if (parent_client->cl_xprtsec.policy != RPC_XPRTSEC_NONE) - proto = XPRT_TRANSPORT_TCP_TLS; + cl_init.proto = XPRT_TRANSPORT_TCP_TLS; rpc_set_port(&ctx->nfs_server.address, NFS_PORT); - error = nfs4_set_client(server, - ctx->nfs_server.hostname, - &ctx->nfs_server._address, - ctx->nfs_server.addrlen, - parent_client->cl_ipaddr, - proto, - parent_server->client->cl_timeout, - parent_client->cl_mvops->minor_version, - parent_client->cl_nconnect, - parent_client->cl_max_connect, - parent_client->cl_net, - &parent_client->cl_xprtsec); + error = nfs4_set_client(server, &cl_init); if (error < 0) goto error; @@ -1353,6 +1329,19 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, char buf[INET6_ADDRSTRLEN + 1]; struct sockaddr_storage address; struct sockaddr *localaddr = (struct sockaddr *)&address; + struct nfs_client_initdata cl_init = { + .hostname = hostname, + .addr = sap, + .addrlen = salen, + .ip_addr = buf, + .proto = clp->cl_proto, + .minorversion = clp->cl_minorversion, + .net = net, + .timeparms = clnt->cl_timeout, + .xprtsec = clp->cl_xprtsec, + .nconnect = clp->cl_nconnect, + .max_connect = clp->cl_max_connect, + }; int error; error = rpc_switch_client_transport(clnt, &xargs, clnt->cl_timeout); @@ -1368,11 +1357,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, nfs_server_remove_lists(server); set_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status); - error = nfs4_set_client(server, hostname, sap, salen, buf, - clp->cl_proto, clnt->cl_timeout, - clp->cl_minorversion, - clp->cl_nconnect, clp->cl_max_connect, - net, &clp->cl_xprtsec); + error = nfs4_set_client(server, &cl_init); clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status); if (error != 0) { nfs_server_insert_lists(server); From c262b444bd0d6bfbcda65130e6137952bef422f6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Jul 2025 08:24:50 +0200 Subject: [PATCH 0822/2411] NFS: drop __exit from nfs_exit_keyring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise built-in NFS can lead to sectіon mismatches. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250714062450.1468117-1-hch@lst.de Fixes: 87268f7a4f1f ("nfs: create a kernel keyring") Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 60fa0c8ff04e..338ef77ae423 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2663,7 +2663,7 @@ static int __init nfs_init_keyring(void) return PTR_ERR_OR_ZERO(nfs_keyring); } -static void __exit nfs_exit_keyring(void) +static void nfs_exit_keyring(void) { key_put(nfs_keyring); } From f06bedfa62d57f7b67d44aacd6badad2e13a803f Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Fri, 27 Jun 2025 09:17:51 +0200 Subject: [PATCH 0823/2411] pNFS/flexfiles: don't attempt pnfs on fatal DS errors When an applications get killed (SIGTERM/SIGINT) while pNFS client performs a connection to DS, client ends in an infinite loop of connect-disconnect. This source of the issue, it that flexfilelayoutdev#nfs4_ff_layout_prepare_ds gets an error on nfs4_pnfs_ds_connect with status ERESTARTSYS, which is set by rpc_signal_task, but the error is treated as transient, thus retried. The issue is reproducible with Ctrl+C the following script(there should be ~1000 files in a directory, client should must not have any connections to DSes): ``` echo 3 > /proc/sys/vm/drop_caches for i in * do head -1 $i done ``` The change aims to propagate the nfs4_ff_layout_prepare_ds error state to the caller that can decide whatever this is a retryable error or not. Signed-off-by: Tigran Mkrtchyan Link: https://lore.kernel.org/r/20250627071751.189663-1-tigran.mkrtchyan@desy.de Fixes: 260f32adb88d ("pNFS/flexfiles: Check the result of nfs4_pnfs_ds_connect") Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 26 ++++++++++++++--------- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 6 +++--- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 4bea008dbebd..8dc921d83538 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -762,14 +762,14 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, { struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); struct nfs4_ff_layout_mirror *mirror; - struct nfs4_pnfs_ds *ds; + struct nfs4_pnfs_ds *ds = ERR_PTR(-EAGAIN); u32 idx; /* mirrors are initially sorted by efficiency */ for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { mirror = FF_LAYOUT_COMP(lseg, idx); ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false); - if (!ds) + if (IS_ERR(ds)) continue; if (check_device && @@ -777,10 +777,10 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, continue; *best_idx = idx; - return ds; + break; } - return NULL; + return ds; } static struct nfs4_pnfs_ds * @@ -942,7 +942,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, for (i = 0; i < pgio->pg_mirror_count; i++) { mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true); - if (!ds) { + if (IS_ERR(ds)) { if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) goto out_mds; pnfs_generic_pg_cleanup(pgio); @@ -1867,6 +1867,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) u32 idx = hdr->pgio_mirror_idx; int vers; struct nfs_fh *fh; + bool ds_fatal_error = false; dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n", __func__, hdr->inode->i_ino, @@ -1874,8 +1875,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) mirror = FF_LAYOUT_COMP(lseg, idx); ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false); - if (!ds) + if (IS_ERR(ds)) { + ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds)); goto out_failed; + } ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, hdr->inode); @@ -1923,7 +1926,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) return PNFS_ATTEMPTED; out_failed: - if (ff_layout_avoid_mds_available_ds(lseg)) + if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error) return PNFS_TRY_AGAIN; trace_pnfs_mds_fallback_read_pagelist(hdr->inode, hdr->args.offset, hdr->args.count, @@ -1945,11 +1948,14 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) int vers; struct nfs_fh *fh; u32 idx = hdr->pgio_mirror_idx; + bool ds_fatal_error = false; mirror = FF_LAYOUT_COMP(lseg, idx); ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true); - if (!ds) + if (IS_ERR(ds)) { + ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds)); goto out_failed; + } ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, hdr->inode); @@ -2000,7 +2006,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) return PNFS_ATTEMPTED; out_failed: - if (ff_layout_avoid_mds_available_ds(lseg)) + if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error) return PNFS_TRY_AGAIN; trace_pnfs_mds_fallback_write_pagelist(hdr->inode, hdr->args.offset, hdr->args.count, @@ -2043,7 +2049,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); mirror = FF_LAYOUT_COMP(lseg, idx); ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true); - if (!ds) + if (IS_ERR(ds)) goto out_err; ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 656d5c50bbce..30365ec782bb 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -370,11 +370,11 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, struct nfs4_ff_layout_mirror *mirror, bool fail_return) { - struct nfs4_pnfs_ds *ds = NULL; + struct nfs4_pnfs_ds *ds; struct inode *ino = lseg->pls_layout->plh_inode; struct nfs_server *s = NFS_SERVER(ino); unsigned int max_payload; - int status; + int status = -EAGAIN; if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror)) goto noconnect; @@ -418,7 +418,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, ff_layout_send_layouterror(lseg); if (fail_return || !ff_layout_has_available_ds(lseg)) pnfs_error_mark_layout_for_return(ino, lseg); - ds = NULL; + ds = ERR_PTR(status); out: return ds; } From 67173860a763b99317184bfaa821abd3578a4ce3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Jul 2025 10:14:46 +0200 Subject: [PATCH 0824/2411] NFS: cleanup error handling in nfs4_server_common_setup Return error directly instead of using a goto label for it. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250718081509.2607553-2-hch@lst.de Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 2e623da1a787..5943a192f36b 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1103,14 +1103,14 @@ static int nfs4_server_common_setup(struct nfs_server *server, /* We must ensure the session is initialised first */ error = nfs4_init_session(server->nfs_client); if (error < 0) - goto out; + return error; nfs4_server_set_init_caps(server); /* Probe the root fh to retrieve its FSID and filehandle */ error = nfs4_get_rootfh(server, mntfh, auth_probe); if (error < 0) - goto out; + return error; dprintk("Server FSID: %llx:%llx\n", (unsigned long long) server->fsid.major, @@ -1119,7 +1119,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, error = nfs_probe_server(server, mntfh); if (error < 0) - goto out; + return error; nfs4_session_limit_rwsize(server); nfs4_session_limit_xasize(server); @@ -1130,8 +1130,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, nfs_server_insert_lists(server); server->mount_time = jiffies; server->destroy = nfs4_destroy_server; -out: - return error; + return 0; } /* From 7375bbad467e9b1b101d591a458b49a0f3896641 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Jul 2025 10:14:47 +0200 Subject: [PATCH 0825/2411] NFS: cleanup nfs_inode_reclaim_delegation Reduce a level of indentation for most of the code in this function. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/20250718081509.2607553-3-hch@lst.de Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 48 ++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 6f136c47eed7..568d2e6d65fa 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -237,34 +237,34 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); - if (delegation != NULL) { - spin_lock(&delegation->lock); - nfs4_stateid_copy(&delegation->stateid, stateid); - delegation->type = type; - delegation->pagemod_limit = pagemod_limit; - oldcred = delegation->cred; - delegation->cred = get_cred(cred); - switch (deleg_type) { - case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: - case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: - set_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags); - break; - default: - clear_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags); - } - clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); - if (test_and_clear_bit(NFS_DELEGATION_REVOKED, - &delegation->flags)) - atomic_long_inc(&nfs_active_delegations); - spin_unlock(&delegation->lock); - rcu_read_unlock(); - put_cred(oldcred); - trace_nfs4_reclaim_delegation(inode, type); - } else { + if (!delegation) { rcu_read_unlock(); nfs_inode_set_delegation(inode, cred, type, stateid, pagemod_limit, deleg_type); + return; } + + spin_lock(&delegation->lock); + nfs4_stateid_copy(&delegation->stateid, stateid); + delegation->type = type; + delegation->pagemod_limit = pagemod_limit; + oldcred = delegation->cred; + delegation->cred = get_cred(cred); + switch (deleg_type) { + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: + set_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags); + break; + default: + clear_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags); + } + clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); + if (test_and_clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) + atomic_long_inc(&nfs_active_delegations); + spin_unlock(&delegation->lock); + rcu_read_unlock(); + put_cred(oldcred); + trace_nfs4_reclaim_delegation(inode, type); } static int nfs_do_return_delegation(struct inode *inode, From aee077d8edc8b9772b205f4104686d676171e61f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Jul 2025 10:14:48 +0200 Subject: [PATCH 0826/2411] NFS: move the delegation_watermark module parameter Keep the module_param_named next to the variable declaration instead of somewhere unrelated, following the best practice in the rest of the kernel. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/20250718081509.2607553-4-hch@lst.de Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 568d2e6d65fa..5f85966d7709 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -29,6 +29,7 @@ static atomic_long_t nfs_active_delegations; static unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK; +module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644); static void __nfs_free_delegation(struct nfs_delegation *delegation) { @@ -1573,5 +1574,3 @@ bool nfs4_delegation_flush_on_close(const struct inode *inode) rcu_read_unlock(); return ret; } - -module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644); From 2fb4af5ea3c735a205d97de10f044f809b20af51 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Jul 2025 10:14:49 +0200 Subject: [PATCH 0827/2411] NFS: track active delegations per-server The active delegation watermark was added to avoid overloading servers. Track the active delegation per-server instead of globally so that clients talking to multiple servers aren't limited by the global limit. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/20250718081509.2607553-5-hch@lst.de Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 + fs/nfs/delegation.c | 35 +++++++++++++++++++---------------- include/linux/nfs_fs_sb.h | 1 + 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 47258dc3af70..e13eb429b8b5 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1005,6 +1005,7 @@ struct nfs_server *nfs_alloc_server(void) INIT_LIST_HEAD(&server->ss_src_copies); atomic_set(&server->active, 0); + atomic_long_set(&server->nr_active_delegations, 0); server->io_stats = nfs_alloc_iostats(); if (!server->io_stats) { diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5f85966d7709..ea96f77e38c2 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -27,7 +27,6 @@ #define NFS_DEFAULT_DELEGATION_WATERMARK (5000U) -static atomic_long_t nfs_active_delegations; static unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK; module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644); @@ -38,11 +37,12 @@ static void __nfs_free_delegation(struct nfs_delegation *delegation) kfree_rcu(delegation, rcu); } -static void nfs_mark_delegation_revoked(struct nfs_delegation *delegation) +static void nfs_mark_delegation_revoked(struct nfs_server *server, + struct nfs_delegation *delegation) { if (!test_and_set_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { delegation->stateid.type = NFS4_INVALID_STATEID_TYPE; - atomic_long_dec(&nfs_active_delegations); + atomic_long_dec(&server->nr_active_delegations); if (!test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) nfs_clear_verifier_delegated(delegation->inode); } @@ -60,9 +60,10 @@ static void nfs_put_delegation(struct nfs_delegation *delegation) __nfs_free_delegation(delegation); } -static void nfs_free_delegation(struct nfs_delegation *delegation) +static void nfs_free_delegation(struct nfs_server *server, + struct nfs_delegation *delegation) { - nfs_mark_delegation_revoked(delegation); + nfs_mark_delegation_revoked(server, delegation); nfs_put_delegation(delegation); } @@ -261,7 +262,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, } clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); if (test_and_clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) - atomic_long_inc(&nfs_active_delegations); + atomic_long_inc(&NFS_SERVER(inode)->nr_active_delegations); spin_unlock(&delegation->lock); rcu_read_unlock(); put_cred(oldcred); @@ -413,7 +414,8 @@ nfs_update_delegation_cred(struct nfs_delegation *delegation, } static void -nfs_update_inplace_delegation(struct nfs_delegation *delegation, +nfs_update_inplace_delegation(struct nfs_server *server, + struct nfs_delegation *delegation, const struct nfs_delegation *update) { if (nfs4_stateid_is_newer(&update->stateid, &delegation->stateid)) { @@ -426,7 +428,7 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation, nfs_update_delegation_cred(delegation, update->cred); /* smp_mb__before_atomic() is implicit due to xchg() */ clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags); - atomic_long_inc(&nfs_active_delegations); + atomic_long_inc(&server->nr_active_delegations); } } } @@ -481,7 +483,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, if (nfs4_stateid_match_other(&old_delegation->stateid, &delegation->stateid)) { spin_lock(&old_delegation->lock); - nfs_update_inplace_delegation(old_delegation, + nfs_update_inplace_delegation(server, old_delegation, delegation); spin_unlock(&old_delegation->lock); goto out; @@ -530,7 +532,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, rcu_assign_pointer(nfsi->delegation, delegation); delegation = NULL; - atomic_long_inc(&nfs_active_delegations); + atomic_long_inc(&server->nr_active_delegations); trace_nfs4_set_delegation(inode, type); @@ -544,7 +546,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, __nfs_free_delegation(delegation); if (freeme != NULL) { nfs_do_return_delegation(inode, freeme, 0); - nfs_free_delegation(freeme); + nfs_free_delegation(server, freeme); } return status; } @@ -756,7 +758,7 @@ void nfs_inode_evict_delegation(struct inode *inode) set_bit(NFS_DELEGATION_RETURNING, &delegation->flags); set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags); nfs_do_return_delegation(inode, delegation, 1); - nfs_free_delegation(delegation); + nfs_free_delegation(NFS_SERVER(inode), delegation); } } @@ -842,7 +844,8 @@ void nfs4_inode_return_delegation_on_close(struct inode *inode) if (!delegation) goto out; if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) || - atomic_long_read(&nfs_active_delegations) >= nfs_delegation_watermark) { + atomic_long_read(&NFS_SERVER(inode)->nr_active_delegations) >= + nfs_delegation_watermark) { spin_lock(&delegation->lock); if (delegation->inode && list_empty(&NFS_I(inode)->open_files) && @@ -1018,7 +1021,7 @@ static void nfs_revoke_delegation(struct inode *inode, } spin_unlock(&delegation->lock); } - nfs_mark_delegation_revoked(delegation); + nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); ret = true; out: rcu_read_unlock(); @@ -1050,7 +1053,7 @@ void nfs_delegation_mark_returned(struct inode *inode, delegation->stateid.seqid = stateid->seqid; } - nfs_mark_delegation_revoked(delegation); + nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags); spin_unlock(&delegation->lock); if (nfs_detach_delegation(NFS_I(inode), delegation, NFS_SERVER(inode))) @@ -1270,7 +1273,7 @@ static int nfs_server_reap_unclaimed_delegations(struct nfs_server *server, if (delegation != NULL) { if (nfs_detach_delegation(NFS_I(inode), delegation, server) != NULL) - nfs_free_delegation(delegation); + nfs_free_delegation(server, delegation); /* Match nfs_start_delegation_return_locked */ nfs_put_delegation(delegation); } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d2d36711a119..a9b44f12623f 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -254,6 +254,7 @@ struct nfs_server { struct list_head state_owners_lru; struct list_head layouts; struct list_head delegations; + atomic_long_t nr_active_delegations; struct list_head ss_copies; struct list_head ss_src_copies; From f5b3108e6a14418b120a3c38ca589b8d6cf87627 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Jul 2025 10:14:50 +0200 Subject: [PATCH 0828/2411] NFS: use a hash table for delegation lookup nfs_delegation_find_inode currently has to walk the entire list of delegations per inode, which can become pretty large, and can become even larger when increasing the delegation watermark. Add a hash table to speed up the delegation lookup, sized as a fraction of the delegation watermark. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250718081509.2607553-6-hch@lst.de Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 28 +++++++++++++++++++++++++++- fs/nfs/delegation.h | 3 +++ fs/nfs/nfs4client.c | 5 +++++ fs/nfs/nfs4proc.c | 22 +++++++++++++++++++++- include/linux/nfs_fs_sb.h | 2 ++ 5 files changed, 58 insertions(+), 2 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index ea96f77e38c2..9d3a5f29f17f 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -30,6 +30,13 @@ static unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK; module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644); +static struct hlist_head *nfs_delegation_hash(struct nfs_server *server, + const struct nfs_fh *fhandle) +{ + return server->delegation_hash_table + + (nfs_fhandle_hash(fhandle) & server->delegation_hash_mask); +} + static void __nfs_free_delegation(struct nfs_delegation *delegation) { put_cred(delegation->cred); @@ -367,6 +374,7 @@ nfs_detach_delegation_locked(struct nfs_inode *nfsi, spin_unlock(&delegation->lock); return NULL; } + hlist_del_init_rcu(&delegation->hash); list_del_rcu(&delegation->super_list); delegation->inode = NULL; rcu_assign_pointer(nfsi->delegation, NULL); @@ -529,6 +537,8 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, spin_unlock(&inode->i_lock); list_add_tail_rcu(&delegation->super_list, &server->delegations); + hlist_add_head_rcu(&delegation->hash, + nfs_delegation_hash(server, &NFS_I(inode)->fh)); rcu_assign_pointer(nfsi->delegation, delegation); delegation = NULL; @@ -1166,11 +1176,12 @@ static struct inode * nfs_delegation_find_inode_server(struct nfs_server *server, const struct nfs_fh *fhandle) { + struct hlist_head *head = nfs_delegation_hash(server, fhandle); struct nfs_delegation *delegation; struct super_block *freeme = NULL; struct inode *res = NULL; - list_for_each_entry_rcu(delegation, &server->delegations, super_list) { + hlist_for_each_entry_rcu(delegation, head, hash) { spin_lock(&delegation->lock); if (delegation->inode != NULL && !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) && @@ -1577,3 +1588,18 @@ bool nfs4_delegation_flush_on_close(const struct inode *inode) rcu_read_unlock(); return ret; } + +int nfs4_delegation_hash_alloc(struct nfs_server *server) +{ + int delegation_buckets, i; + + delegation_buckets = roundup_pow_of_two(nfs_delegation_watermark / 16); + server->delegation_hash_mask = delegation_buckets - 1; + server->delegation_hash_table = kmalloc_array(delegation_buckets, + sizeof(*server->delegation_hash_table), GFP_KERNEL); + if (!server->delegation_hash_table) + return -ENOMEM; + for (i = 0; i < delegation_buckets; i++) + INIT_HLIST_HEAD(&server->delegation_hash_table[i]); + return 0; +} diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 8ff5ab9c5c25..08ec2e9c68a4 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -14,6 +14,7 @@ * NFSv4 delegation */ struct nfs_delegation { + struct hlist_node hash; struct list_head super_list; const struct cred *cred; struct inode *inode; @@ -123,4 +124,6 @@ static inline int nfs_have_delegated_mtime(struct inode *inode) NFS_DELEGATION_FLAG_TIME); } +int nfs4_delegation_hash_alloc(struct nfs_server *server); + #endif diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 5943a192f36b..2ea98f1f116f 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -802,6 +802,7 @@ static void nfs4_destroy_server(struct nfs_server *server) unset_pnfs_layoutdriver(server); nfs4_purge_state_owners(server, &freeme); nfs4_free_state_owners(&freeme); + kfree(server->delegation_hash_table); } /* @@ -1096,6 +1097,10 @@ static int nfs4_server_common_setup(struct nfs_server *server, { int error; + error = nfs4_delegation_hash_alloc(server); + if (error) + return error; + /* data servers support only a subset of NFSv4.1 */ if (is_ds_only_client(server->nfs_client)) return -EPROTONOSUPPORT; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ef2077e185b6..d8bebd757af3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10967,6 +10967,26 @@ static const struct inode_operations nfs4_file_inode_operations = { .listxattr = nfs4_listxattr, }; +static struct nfs_server *nfs4_clone_server(struct nfs_server *source, + struct nfs_fh *fh, struct nfs_fattr *fattr, + rpc_authflavor_t flavor) +{ + struct nfs_server *server; + int error; + + server = nfs_clone_server(source, fh, fattr, flavor); + if (IS_ERR(server)) + return server; + + error = nfs4_delegation_hash_alloc(server); + if (error) { + nfs_free_server(server); + return ERR_PTR(error); + } + + return server; +} + const struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ .dentry_ops = &nfs4_dentry_operations, @@ -11019,7 +11039,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .init_client = nfs4_init_client, .free_client = nfs4_free_client, .create_server = nfs4_create_server, - .clone_server = nfs_clone_server, + .clone_server = nfs4_clone_server, .discover_trunking = nfs4_discover_trunking, .enable_swap = nfs4_enable_swap, .disable_swap = nfs4_disable_swap, diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a9b44f12623f..d30c0245031c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -255,6 +255,8 @@ struct nfs_server { struct list_head layouts; struct list_head delegations; atomic_long_t nr_active_delegations; + unsigned int delegation_hash_mask; + struct hlist_head *delegation_hash_table; struct list_head ss_copies; struct list_head ss_src_copies; From 1db3a48e83bb64a70bf27263b7002585574a9c2d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Jul 2025 16:15:27 -0700 Subject: [PATCH 0829/2411] NFS: Fix wakeup of __nfs_lookup_revalidate() in unblock_revalidate() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use store_release_wake_up() to add the appropriate memory barrier before calling wake_up_var(&dentry->d_fsdata). Reported-by: Lukáš Hejtmánek Suggested-by: Santosh Pradhan Link: https://lore.kernel.org/all/18945D18-3EDB-4771-B019-0335CE671077@ics.muni.cz/ Fixes: 99bc9f2eb3f7 ("NFS: add barriers when testing for NFS_FSDATA_BLOCKED") Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index d0e0b435a843..d81217923936 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1828,9 +1828,7 @@ static void block_revalidate(struct dentry *dentry) static void unblock_revalidate(struct dentry *dentry) { - /* store_release ensures wait_var_event() sees the update */ - smp_store_release(&dentry->d_fsdata, NULL); - wake_up_var(&dentry->d_fsdata); + store_release_wake_up(&dentry->d_fsdata, NULL); } /* From ec0abdda891f082dcb95bfbe7fcc82b12342e506 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Jul 2025 16:44:03 -0700 Subject: [PATCH 0830/2411] NFS: Clean up pnfs_put_layout_hdr()/pnfs_destroy_layout_final() Use the wake_up_var_locked() and wait_var_event_spinlock() helpers. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3fd0971bf16f..a3135b5af7ee 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -306,7 +306,6 @@ void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) { struct inode *inode; - unsigned long i_state; if (!lo) return; @@ -317,12 +316,11 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) if (!list_empty(&lo->plh_segs)) WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); pnfs_detach_layout_hdr(lo); - i_state = inode->i_state; + /* Notify pnfs_destroy_layout_final() that we're done */ + if (inode->i_state & (I_FREEING | I_CLEAR)) + wake_up_var_locked(lo, &inode->i_lock); spin_unlock(&inode->i_lock); pnfs_free_layout_hdr(lo); - /* Notify pnfs_destroy_layout_final() that we're done */ - if (i_state & (I_FREEING | I_CLEAR)) - wake_up_var(lo); } } @@ -809,23 +807,17 @@ void pnfs_destroy_layout(struct nfs_inode *nfsi) } EXPORT_SYMBOL_GPL(pnfs_destroy_layout); -static bool pnfs_layout_removed(struct nfs_inode *nfsi, - struct pnfs_layout_hdr *lo) -{ - bool ret; - - spin_lock(&nfsi->vfs_inode.i_lock); - ret = nfsi->layout != lo; - spin_unlock(&nfsi->vfs_inode.i_lock); - return ret; -} - void pnfs_destroy_layout_final(struct nfs_inode *nfsi) { struct pnfs_layout_hdr *lo = __pnfs_destroy_layout(nfsi); + struct inode *inode = &nfsi->vfs_inode; - if (lo) - wait_var_event(lo, pnfs_layout_removed(nfsi, lo)); + if (lo) { + spin_lock(&inode->i_lock); + wait_var_event_spinlock(lo, nfsi->layout != lo, + &inode->i_lock); + spin_unlock(&inode->i_lock); + } } static bool From f66e6bffc531bafaeb067e6f6af56f52d5cd4ac2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Jul 2025 19:13:55 -0700 Subject: [PATCH 0831/2411] SUNRPC: Silence warnings about parameters not being described Warning: net/sunrpc/auth_gss/gss_krb5_crypto.c:902 function parameter 'len' not described in 'krb5_etm_decrypt' Warning: net/sunrpc/auth_gss/gss_krb5_crypto.c:902 function parameter 'buf' not described in 'krb5_etm_decrypt' Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/gss_krb5_crypto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 8f2d65c1e831..16dcf115de1e 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -875,8 +875,8 @@ krb5_etm_encrypt(struct krb5_ctx *kctx, u32 offset, * krb5_etm_decrypt - Decrypt using the RFC 8009 rules * @kctx: Kerberos context * @offset: starting offset of the ciphertext, in bytes - * @len: - * @buf: + * @len: size of ciphertext to unwrap + * @buf: ciphertext to unwrap * @headskip: OUT: the enctype's confounder length, in octets * @tailskip: OUT: the enctype's HMAC length, in octets * From dfef90f29811b5b8bc6353e259cac6134a88671f Mon Sep 17 00:00:00 2001 From: Ziyue Zhang Date: Thu, 3 Jul 2025 02:56:28 -0700 Subject: [PATCH 0832/2411] dt-bindings: phy: qcom,sc8280xp-qmp-pcie-phy: Update pcie phy bindings for QCS615 QCS615 pcie phy only use 5 clocks, which are aux, cfg_ahb, ref, ref_gen, pipe. So move "qcom,qcs615-qmp-gen3x1-pcie-phy" compatible from 6 clocks' list to 5 clocks' list. Fixes: 1e889f2bd837 ("dt-bindings: phy: qcom,sc8280xp-qmp-pcie-phy: Document the QCS615 QMP PCIe PHY Gen3 x1") Signed-off-by: Ziyue Zhang Acked-by: Krzysztof Kozlowski Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20250703095630.669044-2-ziyue.zhang@oss.qualcomm.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml index 2c6c9296e4c0..a1ae8c7988c8 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml @@ -145,6 +145,7 @@ allOf: compatible: contains: enum: + - qcom,qcs615-qmp-gen3x1-pcie-phy - qcom,sar2130p-qmp-gen3x2-pcie-phy - qcom,sc8180x-qmp-pcie-phy - qcom,sdm845-qhp-pcie-phy @@ -175,7 +176,6 @@ allOf: compatible: contains: enum: - - qcom,qcs615-qmp-gen3x1-pcie-phy - qcom,sc8280xp-qmp-gen3x1-pcie-phy - qcom,sc8280xp-qmp-gen3x2-pcie-phy - qcom,sc8280xp-qmp-gen3x4-pcie-phy From c3fe7071e196e25789ecf90dbc9e8491a98884d7 Mon Sep 17 00:00:00 2001 From: Valmantas Paliksa Date: Mon, 30 Jun 2025 19:25:14 -0300 Subject: [PATCH 0833/2411] phy: rockchip-pcie: Enable all four lanes if required Current code enables only Lane 0 because pwr_cnt will be incremented on first call to the function. Let's reorder the enablement code to enable all 4 lanes through GRF. Reviewed-by: Neil Armstrong Reviewed-by: Robin Murphy Signed-off-by: Valmantas Paliksa Signed-off-by: Geraldo Nascimento Reviewed-by: Robin Murphy Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/16b610aab34e069fd31d9f57260c10df2a968f80.1751322015.git.geraldogabriel@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/rockchip/phy-rockchip-pcie.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-pcie.c b/drivers/phy/rockchip/phy-rockchip-pcie.c index bd44af36c67a..f22ffb41cdc2 100644 --- a/drivers/phy/rockchip/phy-rockchip-pcie.c +++ b/drivers/phy/rockchip/phy-rockchip-pcie.c @@ -160,6 +160,12 @@ static int rockchip_pcie_phy_power_on(struct phy *phy) guard(mutex)(&rk_phy->pcie_mutex); + regmap_write(rk_phy->reg_base, + rk_phy->phy_data->pcie_laneoff, + HIWORD_UPDATE(!PHY_LANE_IDLE_OFF, + PHY_LANE_IDLE_MASK, + PHY_LANE_IDLE_A_SHIFT + inst->index)); + if (rk_phy->pwr_cnt++) { return 0; } @@ -176,12 +182,6 @@ static int rockchip_pcie_phy_power_on(struct phy *phy) PHY_CFG_ADDR_MASK, PHY_CFG_ADDR_SHIFT)); - regmap_write(rk_phy->reg_base, - rk_phy->phy_data->pcie_laneoff, - HIWORD_UPDATE(!PHY_LANE_IDLE_OFF, - PHY_LANE_IDLE_MASK, - PHY_LANE_IDLE_A_SHIFT + inst->index)); - /* * No documented timeout value for phy operation below, * so we make it large enough here. And we use loop-break From 25facbabc3fc33c794ad09d73f73268c0f8cbc7d Mon Sep 17 00:00:00 2001 From: Geraldo Nascimento Date: Mon, 30 Jun 2025 19:25:28 -0300 Subject: [PATCH 0834/2411] phy: rockchip-pcie: Properly disable TEST_WRITE strobe signal pcie_conf is used to touch TEST_WRITE strobe signal. This signal should be enabled, a little time waited, and then disabled. Current code clearly was copy-pasted and never disables the strobe signal. Adjust the define. While at it, remove PHY_CFG_RD_MASK which has been unused since 64cdc0360811 ("phy: rockchip-pcie: remove unused phy_rd_cfg function"). Reviewed-by: Neil Armstrong Signed-off-by: Geraldo Nascimento Link: https://lore.kernel.org/r/d514d5d5627680caafa8b7548cbdfee4307f5440.1751322015.git.geraldogabriel@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/rockchip/phy-rockchip-pcie.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-pcie.c b/drivers/phy/rockchip/phy-rockchip-pcie.c index f22ffb41cdc2..4e2dfd01adf2 100644 --- a/drivers/phy/rockchip/phy-rockchip-pcie.c +++ b/drivers/phy/rockchip/phy-rockchip-pcie.c @@ -30,9 +30,8 @@ #define PHY_CFG_ADDR_SHIFT 1 #define PHY_CFG_DATA_MASK 0xf #define PHY_CFG_ADDR_MASK 0x3f -#define PHY_CFG_RD_MASK 0x3ff #define PHY_CFG_WR_ENABLE 1 -#define PHY_CFG_WR_DISABLE 1 +#define PHY_CFG_WR_DISABLE 0 #define PHY_CFG_WR_SHIFT 0 #define PHY_CFG_WR_MASK 1 #define PHY_CFG_PLL_LOCK 0x10 From 9e891b0d21bc889898e726783f20bd81f5fd4056 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Tue, 15 Jul 2025 09:29:34 +0200 Subject: [PATCH 0835/2411] dt-bindings: usb: qcom,snps-dwc3: Add Milos compatible Document the Milos dwc3 compatible. Acked-by: Krzysztof Kozlowski Signed-off-by: Luca Weiss Link: https://lore.kernel.org/r/20250715-sm7635-eusb-phy-v3-1-6c3224085eb6@fairphone.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml b/Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml index 8dac5eba61b4..dfd084ed9024 100644 --- a/Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml @@ -32,6 +32,7 @@ properties: - qcom,ipq8064-dwc3 - qcom,ipq8074-dwc3 - qcom,ipq9574-dwc3 + - qcom,milos-dwc3 - qcom,msm8953-dwc3 - qcom,msm8994-dwc3 - qcom,msm8996-dwc3 @@ -338,6 +339,7 @@ allOf: compatible: contains: enum: + - qcom,milos-dwc3 - qcom,qcm2290-dwc3 - qcom,qcs615-dwc3 - qcom,sar2130p-dwc3 @@ -453,6 +455,7 @@ allOf: compatible: contains: enum: + - qcom,milos-dwc3 - qcom,x1e80100-dwc3 then: properties: From bb39f49a433312ba7558b7cc44cfd9131b46bce1 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Tue, 15 Jul 2025 09:29:35 +0200 Subject: [PATCH 0836/2411] dt-bindings: phy: qcom,snps-eusb2: document the Milos Synopsys eUSB2 PHY Document the Synopsys eUSB2 PHY on the Milos SoC by using the SM8550 as fallback. Acked-by: Krzysztof Kozlowski Signed-off-by: Luca Weiss Link: https://lore.kernel.org/r/20250715-sm7635-eusb-phy-v3-2-6c3224085eb6@fairphone.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/phy/qcom,snps-eusb2-phy.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-phy.yaml index 142b3c8839d6..854f70af0a6c 100644 --- a/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-phy.yaml @@ -17,6 +17,7 @@ properties: oneOf: - items: - enum: + - qcom,milos-snps-eusb2-phy - qcom,sar2130p-snps-eusb2-phy - qcom,sdx75-snps-eusb2-phy - qcom,sm8650-snps-eusb2-phy From 7f5f703210109366c1e1b685086c9b0a4897ea54 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Tue, 15 Jul 2025 09:29:36 +0200 Subject: [PATCH 0837/2411] phy: qcom: phy-qcom-snps-eusb2: Add missing write from init sequence As per a commit from Qualcomm's downstream 6.1 kernel[0], the init sequence is missing setting the CMN_CTRL_OVERRIDE_EN bit back to 0 at the end, as per the 'latest' HPG revision (as of November 2023). [0] https://git.codelinaro.org/clo/la/kernel/qcom/-/commit/b77774a89e3fda3246e09dd39e16e2ab43cd1329 Fixes: 80090810f5d3 ("phy: qcom: Add QCOM SNPS eUSB2 driver") Reviewed-by: Konrad Dybcio Reviewed-by: Neil Armstrong Signed-off-by: Luca Weiss Link: https://lore.kernel.org/r/20250715-sm7635-eusb-phy-v3-3-6c3224085eb6@fairphone.com Signed-off-by: Vinod Koul --- drivers/phy/phy-snps-eusb2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/phy/phy-snps-eusb2.c b/drivers/phy/phy-snps-eusb2.c index 87f323e758d6..cd41d8a9f290 100644 --- a/drivers/phy/phy-snps-eusb2.c +++ b/drivers/phy/phy-snps-eusb2.c @@ -437,6 +437,9 @@ static int qcom_snps_eusb2_hsphy_init(struct phy *p) snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_HS_PHY_CTRL2, USB2_SUSPEND_N_SEL, 0); + snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_CFG0, + CMN_CTRL_OVERRIDE_EN, 0); + return 0; } From 828c3e9dce25a9551e52fd076136f4d9936c0498 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Tue, 15 Jul 2025 09:29:37 +0200 Subject: [PATCH 0838/2411] phy: qcom: phy-qcom-snps-eusb2: Update init sequence per HPG 1.0.2 The eUSB2 HPG version 1.0.2 asks to clear bits [7:1] on all targets. Implement that change in the driver to follow. See also https://lore.kernel.org/linux-arm-msm/7d073433-f254-4d75-a68b-d184f900294a@oss.qualcomm.com/ Signed-off-by: Luca Weiss Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250715-sm7635-eusb-phy-v3-4-6c3224085eb6@fairphone.com Signed-off-by: Vinod Koul --- drivers/phy/phy-snps-eusb2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/phy-snps-eusb2.c b/drivers/phy/phy-snps-eusb2.c index cd41d8a9f290..749cbcc18fab 100644 --- a/drivers/phy/phy-snps-eusb2.c +++ b/drivers/phy/phy-snps-eusb2.c @@ -392,7 +392,7 @@ static int qcom_snps_eusb2_hsphy_init(struct phy *p) snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_CFG_CTRL_1, PHY_CFG_PLL_CPBIAS_CNTRL_MASK, - FIELD_PREP(PHY_CFG_PLL_CPBIAS_CNTRL_MASK, 0x1)); + FIELD_PREP(PHY_CFG_PLL_CPBIAS_CNTRL_MASK, 0x0)); snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_CFG_CTRL_4, PHY_CFG_PLL_INT_CNTRL_MASK, From f31ac39c037a77a87e210b0f6d86fdefe8fc7258 Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Thu, 10 Jul 2025 16:42:48 +0530 Subject: [PATCH 0839/2411] phy: exynos-mipi-video: correct cam0 sysreg property name for exynos7870 Fix the cam0 sysreg property name (samsung,cam0-sysreg), which has been erroneously declared as samsung,cam-sysreg. This follows the same name used in Exynos5433 PHY. Fixes: 543f5e314282 ("phy: exynos-mipi-video: introduce support for exynos7870") Reviewed-by: Krzysztof Kozlowski Signed-off-by: Kaustabh Chakraborty Link: https://lore.kernel.org/r/20250710-exynos7870-mipi-phy-fix-v2-1-5cf50d69c9d7@disroot.org Signed-off-by: Vinod Koul --- drivers/phy/samsung/phy-exynos-mipi-video.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/samsung/phy-exynos-mipi-video.c b/drivers/phy/samsung/phy-exynos-mipi-video.c index b184923b9b40..be925508ed97 100644 --- a/drivers/phy/samsung/phy-exynos-mipi-video.c +++ b/drivers/phy/samsung/phy-exynos-mipi-video.c @@ -218,7 +218,7 @@ static const struct mipi_phy_device_desc exynos7870_mipi_phy = { .regmap_names = { "samsung,pmu-syscon", "samsung,disp-sysreg", - "samsung,cam-sysreg" + "samsung,cam0-sysreg" }, .num_phys = 4, .phys = { From 429efeb1900d4a3164e1233b392ee5f489b6c3f8 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 7 Jul 2025 10:47:00 +0800 Subject: [PATCH 0840/2411] dt-bindings: phy: mixel, mipi-dsi-phy: Allow assigned-clock* properties assigned-clock* properties can be used by default now, so allow them. Signed-off-by: Liu Ying Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250707-dt-bindings-phy-mixel-mipi-dsi-phy-allow-assign-clock-properties-v1-1-5e34b257e1ef@nxp.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/mixel,mipi-dsi-phy.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/mixel,mipi-dsi-phy.yaml b/Documentation/devicetree/bindings/phy/mixel,mipi-dsi-phy.yaml index 3c28ec50f097..286a4fcc977d 100644 --- a/Documentation/devicetree/bindings/phy/mixel,mipi-dsi-phy.yaml +++ b/Documentation/devicetree/bindings/phy/mixel,mipi-dsi-phy.yaml @@ -72,11 +72,6 @@ allOf: contains: const: fsl,imx8qxp-mipi-dphy then: - properties: - assigned-clocks: false - assigned-clock-parents: false - assigned-clock-rates: false - required: - fsl,syscon From a91ec5efde530747c23f3182cc5b53ba99b57051 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 1 Jul 2025 08:36:37 +0200 Subject: [PATCH 0841/2411] dt-bindings: phy: marvell,mmp2-usb-phy: Drop status from the example Examples should not have the 'status' property and 'okay' is anyway by default. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250701063636.23872-2-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/phy/marvell,mmp2-usb-phy.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/phy/marvell,mmp2-usb-phy.yaml b/Documentation/devicetree/bindings/phy/marvell,mmp2-usb-phy.yaml index 2441c5fae550..af1ae2406f65 100644 --- a/Documentation/devicetree/bindings/phy/marvell,mmp2-usb-phy.yaml +++ b/Documentation/devicetree/bindings/phy/marvell,mmp2-usb-phy.yaml @@ -34,5 +34,4 @@ examples: compatible = "marvell,mmp2-usb-phy"; reg = <0xd4207000 0x40>; #phy-cells = <0>; - status = "okay"; }; From 99dd7faeb7a4d973f049e1bad234888777e03646 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Fri, 27 Jun 2025 17:01:06 -0500 Subject: [PATCH 0842/2411] dt-bindings: phy: Convert ti,da830-usb-phy to DT schema Convert the TI DA830 USB PHY binding to DT schema format. Add "clocks" and "clock-names" which are already in use. As they are always present, make them required as well. Signed-off-by: Rob Herring (Arm) Reviewed-by: David Lechner Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250627220107.214162-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/phy-da8xx-usb.txt | 40 -------------- .../bindings/phy/ti,da830-usb-phy.yaml | 53 +++++++++++++++++++ 2 files changed, 53 insertions(+), 40 deletions(-) delete mode 100644 Documentation/devicetree/bindings/phy/phy-da8xx-usb.txt create mode 100644 Documentation/devicetree/bindings/phy/ti,da830-usb-phy.yaml diff --git a/Documentation/devicetree/bindings/phy/phy-da8xx-usb.txt b/Documentation/devicetree/bindings/phy/phy-da8xx-usb.txt deleted file mode 100644 index c26478be391b..000000000000 --- a/Documentation/devicetree/bindings/phy/phy-da8xx-usb.txt +++ /dev/null @@ -1,40 +0,0 @@ -TI DA8xx/OMAP-L1xx/AM18xx USB PHY - -Required properties: - - compatible: must be "ti,da830-usb-phy". - - #phy-cells: must be 1. - -This device controls the PHY for both the USB 1.1 OHCI and USB 2.0 OTG -controllers on DA8xx SoCs. Consumers of this device should use index 0 for -the USB 2.0 phy device and index 1 for the USB 1.1 phy device. - -It also requires a "syscon" node with compatible = "ti,da830-cfgchip", "syscon" -to access the CFGCHIP2 register. - -Example: - - cfgchip: cfgchip@1417c { - compatible = "ti,da830-cfgchip", "syscon"; - reg = <0x1417c 0x14>; - }; - - usb_phy: usb-phy { - compatible = "ti,da830-usb-phy"; - #phy-cells = <1>; - }; - - usb20: usb@200000 { - compatible = "ti,da830-musb"; - reg = <0x200000 0x1000>; - interrupts = <58>; - phys = <&usb_phy 0>; - phy-names = "usb-phy"; - }; - - usb11: usb@225000 { - compatible = "ti,da830-ohci"; - reg = <0x225000 0x1000>; - interrupts = <59>; - phys = <&usb_phy 1>; - phy-names = "usb-phy"; - }; diff --git a/Documentation/devicetree/bindings/phy/ti,da830-usb-phy.yaml b/Documentation/devicetree/bindings/phy/ti,da830-usb-phy.yaml new file mode 100644 index 000000000000..e168cbce8fd1 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/ti,da830-usb-phy.yaml @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/ti,da830-usb-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: TI DA8xx/OMAP-L1xx/AM18xx USB PHY + +maintainers: + - David Lechner + +description: > + This device controls the PHY for both the USB 1.1 OHCI and USB 2.0 OTG + controllers on DA8xx SoCs. + + It also requires a "syscon" node with compatible = "ti,da830-cfgchip", "syscon" + to access the CFGCHIP2 register. + +properties: + compatible: + items: + - const: ti,da830-usb-phy + + '#phy-cells': + const: 1 + description: + Consumers of this device should use index 0 for the USB 2.0 phy device and + index 1 for the USB 1.1 phy device. + + clocks: + maxItems: 2 + + clock-names: + items: + - const: usb0_clk48 + - const: usb1_clk48 + +required: + - compatible + - '#phy-cells' + - clocks + - clock-names + +additionalProperties: false + +examples: + - | + usb-phy { + compatible = "ti,da830-usb-phy"; + #phy-cells = <1>; + clocks = <&usb_phy_clk 0>, <&usb_phy_clk 1>; + clock-names = "usb0_clk48", "usb1_clk48"; + }; From 4c3d05da59eb75bdb7869f8668778dae87229168 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Fri, 27 Jun 2025 17:01:25 -0500 Subject: [PATCH 0843/2411] dt-bindings: phy: Convert brcm,sr-usb-combo-phy to DT schema Convert the Broadcom Stingray USB PHY binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20250627220126.214577-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../bindings/phy/brcm,sr-usb-combo-phy.yaml | 65 +++++++++++++++++++ .../bindings/phy/brcm,stingray-usb-phy.txt | 32 --------- 2 files changed, 65 insertions(+), 32 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/brcm,sr-usb-combo-phy.yaml delete mode 100644 Documentation/devicetree/bindings/phy/brcm,stingray-usb-phy.txt diff --git a/Documentation/devicetree/bindings/phy/brcm,sr-usb-combo-phy.yaml b/Documentation/devicetree/bindings/phy/brcm,sr-usb-combo-phy.yaml new file mode 100644 index 000000000000..6224ba0f2990 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/brcm,sr-usb-combo-phy.yaml @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/brcm,sr-usb-combo-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Broadcom Stingray USB PHY + +maintainers: + - Ray Jui + - Scott Branden + +properties: + compatible: + enum: + - brcm,sr-usb-combo-phy + - brcm,sr-usb-hs-phy + + reg: + maxItems: 1 + + '#phy-cells': + description: PHY cell count indicating PHY type + enum: [ 0, 1 ] + +required: + - compatible + - reg + - '#phy-cells' + +allOf: + - if: + properties: + compatible: + contains: + const: brcm,sr-usb-combo-phy + then: + properties: + '#phy-cells': + const: 1 + - if: + properties: + compatible: + contains: + const: brcm,sr-usb-hs-phy + then: + properties: + '#phy-cells': + const: 0 + +additionalProperties: false + +examples: + - | + usb-phy@0 { + compatible = "brcm,sr-usb-combo-phy"; + reg = <0x00000000 0x100>; + #phy-cells = <1>; + }; + - | + usb-phy@20000 { + compatible = "brcm,sr-usb-hs-phy"; + reg = <0x00020000 0x100>; + #phy-cells = <0>; + }; diff --git a/Documentation/devicetree/bindings/phy/brcm,stingray-usb-phy.txt b/Documentation/devicetree/bindings/phy/brcm,stingray-usb-phy.txt deleted file mode 100644 index 4ba298966af9..000000000000 --- a/Documentation/devicetree/bindings/phy/brcm,stingray-usb-phy.txt +++ /dev/null @@ -1,32 +0,0 @@ -Broadcom Stingray USB PHY - -Required properties: - - compatible : should be one of the listed compatibles - - "brcm,sr-usb-combo-phy" is combo PHY has two PHYs, one SS and one HS. - - "brcm,sr-usb-hs-phy" is a single HS PHY. - - reg: offset and length of the PHY blocks registers - - #phy-cells: - - Must be 1 for brcm,sr-usb-combo-phy as it expects one argument to indicate - the PHY number of two PHYs. 0 for HS PHY and 1 for SS PHY. - - Must be 0 for brcm,sr-usb-hs-phy. - -Refer to phy/phy-bindings.txt for the generic PHY binding properties - -Example: - usbphy0: usb-phy@0 { - compatible = "brcm,sr-usb-combo-phy"; - reg = <0x00000000 0x100>; - #phy-cells = <1>; - }; - - usbphy1: usb-phy@10000 { - compatible = "brcm,sr-usb-combo-phy"; - reg = <0x00010000 0x100>, - #phy-cells = <1>; - }; - - usbphy2: usb-phy@20000 { - compatible = "brcm,sr-usb-hs-phy"; - reg = <0x00020000 0x100>, - #phy-cells = <0>; - }; From 4a3556b81b99f0c8c0358f7cc6801a62b4538fe2 Mon Sep 17 00:00:00 2001 From: Kathiravan Thirumoorthy Date: Mon, 30 Jun 2025 13:48:13 +0530 Subject: [PATCH 0844/2411] phy: qcom: phy-qcom-m31: Update IPQ5332 M31 USB phy initialization sequence The current configuration used for the IPQ5332 M31 USB PHY fails the Near End High Speed Signal Quality compliance test. To resolve this, update the initialization sequence as specified in the Hardware Design Document. Fixes: 08e49af50701 ("phy: qcom: Introduce M31 USB PHY driver") Cc: stable@kernel.org Signed-off-by: Kathiravan Thirumoorthy Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250630-ipq5332_hsphy_complaince-v2-1-63621439ebdb@oss.qualcomm.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-m31.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c index 7caeea1b109e..168ea980fda0 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31.c +++ b/drivers/phy/qualcomm/phy-qcom-m31.c @@ -58,14 +58,16 @@ #define USB2_0_TX_ENABLE BIT(2) #define USB2PHY_USB_PHY_M31_XCFGI_4 0xc8 - #define HSTX_SLEW_RATE_565PS GENMASK(1, 0) + #define HSTX_SLEW_RATE_400PS GENMASK(2, 0) #define PLL_CHARGING_PUMP_CURRENT_35UA GENMASK(4, 3) #define ODT_VALUE_38_02_OHM GENMASK(7, 6) #define USB2PHY_USB_PHY_M31_XCFGI_5 0xcc - #define ODT_VALUE_45_02_OHM BIT(2) #define HSTX_PRE_EMPHASIS_LEVEL_0_55MA BIT(0) +#define USB2PHY_USB_PHY_M31_XCFGI_9 0xdc + #define HSTX_CURRENT_17_1MA_385MV BIT(1) + #define USB2PHY_USB_PHY_M31_XCFGI_11 0xe4 #define XCFG_COARSE_TUNE_NUM BIT(1) #define XCFG_FINE_TUNE_NUM BIT(3) @@ -164,7 +166,7 @@ static struct m31_phy_regs m31_ipq5332_regs[] = { }, { USB2PHY_USB_PHY_M31_XCFGI_4, - HSTX_SLEW_RATE_565PS | PLL_CHARGING_PUMP_CURRENT_35UA | ODT_VALUE_38_02_OHM, + HSTX_SLEW_RATE_400PS | PLL_CHARGING_PUMP_CURRENT_35UA | ODT_VALUE_38_02_OHM, 0 }, { @@ -174,9 +176,13 @@ static struct m31_phy_regs m31_ipq5332_regs[] = { }, { USB2PHY_USB_PHY_M31_XCFGI_5, - ODT_VALUE_45_02_OHM | HSTX_PRE_EMPHASIS_LEVEL_0_55MA, + HSTX_PRE_EMPHASIS_LEVEL_0_55MA, 4 }, + { + USB2PHY_USB_PHY_M31_XCFGI_9, + HSTX_CURRENT_17_1MA_385MV, + }, { USB_PHY_UTMI_CTRL5, 0x0, From ef93a685e01a281b5e2a25ce4e3428cf9371a205 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Jul 2025 09:24:58 -0400 Subject: [PATCH 0845/2411] NFS: Fix filehandle bounds checking in nfs_fh_to_dentry() The function needs to check the minimal filehandle length before it can access the embedded filehandle. Reported-by: zhangjian Fixes: 20fa19027286 ("nfs: add export operations") Signed-off-by: Trond Myklebust --- fs/nfs/export.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/nfs/export.c b/fs/nfs/export.c index e9c233b6fd20..a10dd5f9d078 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -66,14 +66,21 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid, { struct nfs_fattr *fattr = NULL; struct nfs_fh *server_fh = nfs_exp_embedfh(fid->raw); - size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size; + size_t fh_size = offsetof(struct nfs_fh, data); const struct nfs_rpc_ops *rpc_ops; struct dentry *dentry; struct inode *inode; - int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size); + int len = EMBED_FH_OFF; u32 *p = fid->raw; int ret; + /* Initial check of bounds */ + if (fh_len < len + XDR_QUADLEN(fh_size) || + fh_len > XDR_QUADLEN(NFS_MAXFHSIZE)) + return NULL; + /* Calculate embedded filehandle size */ + fh_size += server_fh->size; + len += XDR_QUADLEN(fh_size); /* NULL translates to ESTALE */ if (fh_len < len || fh_type != len) return NULL; From 699cdd706290208d47bd858a188b030df2e90357 Mon Sep 17 00:00:00 2001 From: Tanmay Shah Date: Wed, 16 Jul 2025 14:30:47 -0700 Subject: [PATCH 0846/2411] remoteproc: xlnx: Disable unsupported features AMD-Xilinx platform driver does not support iommu or recovery mechanism yet. Disable both features in platform driver. Signed-off-by: Tanmay Shah Link: https://lore.kernel.org/r/20250716213048.2316424-2-tanmay.shah@amd.com Fixes: 6b291e8020a8 ("drivers: remoteproc: Add Xilinx r5 remoteproc driver") Signed-off-by: Mathieu Poirier --- drivers/remoteproc/xlnx_r5_remoteproc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/remoteproc/xlnx_r5_remoteproc.c b/drivers/remoteproc/xlnx_r5_remoteproc.c index a51523456c6e..0ffd26a47685 100644 --- a/drivers/remoteproc/xlnx_r5_remoteproc.c +++ b/drivers/remoteproc/xlnx_r5_remoteproc.c @@ -938,6 +938,8 @@ static struct zynqmp_r5_core *zynqmp_r5_add_rproc_core(struct device *cdev) rproc_coredump_set_elf_info(r5_rproc, ELFCLASS32, EM_ARM); + r5_rproc->recovery_disabled = true; + r5_rproc->has_iommu = false; r5_rproc->auto_boot = false; r5_core = r5_rproc->priv; r5_core->dev = cdev; From 1c20224123f41e4f7da44ae020832bdac3f30ec1 Mon Sep 17 00:00:00 2001 From: Tanmay Shah Date: Wed, 16 Jul 2025 14:30:48 -0700 Subject: [PATCH 0847/2411] remoteproc: xlnx: Fix kernel-doc warnings Fix kernel-doc warnings generated by following command: `scripts/kernel-doc -Werror -Wshort-desc -Wall \ drivers/remoteproc/xlnx_r5_remoteproc.c > /dev/null` warning: missing initial short description on line: * struct mbox_info ... Total 8 warnings fixed Signed-off-by: Tanmay Shah Link: https://lore.kernel.org/r/20250716213048.2316424-3-tanmay.shah@amd.com Signed-off-by: Mathieu Poirier --- drivers/remoteproc/xlnx_r5_remoteproc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/remoteproc/xlnx_r5_remoteproc.c b/drivers/remoteproc/xlnx_r5_remoteproc.c index 0ffd26a47685..0b7b173d0d26 100644 --- a/drivers/remoteproc/xlnx_r5_remoteproc.c +++ b/drivers/remoteproc/xlnx_r5_remoteproc.c @@ -68,7 +68,7 @@ struct zynqmp_sram_bank { }; /** - * struct mbox_info + * struct mbox_info - mailbox channel data * * @rx_mc_buf: to copy data from mailbox rx channel * @tx_mc_buf: to copy data to mailbox tx channel @@ -89,7 +89,7 @@ struct mbox_info { }; /** - * struct rsc_tbl_data + * struct rsc_tbl_data - resource table metadata * * Platform specific data structure used to sync resource table address. * It's important to maintain order and size of each field on remote side. @@ -128,7 +128,7 @@ static const struct mem_bank_data zynqmp_tcm_banks_lockstep[] = { }; /** - * struct zynqmp_r5_core + * struct zynqmp_r5_core - remoteproc core's internal data * * @rsc_tbl_va: resource table virtual address * @sram: Array of sram memories assigned to this core @@ -157,7 +157,7 @@ struct zynqmp_r5_core { }; /** - * struct zynqmp_r5_cluster + * struct zynqmp_r5_cluster - remoteproc cluster's internal data * * @dev: r5f subsystem cluster device node * @mode: cluster mode of type zynqmp_r5_cluster_mode @@ -732,7 +732,7 @@ static int zynqmp_r5_parse_fw(struct rproc *rproc, const struct firmware *fw) } /** - * zynqmp_r5_rproc_prepare() + * zynqmp_r5_rproc_prepare() - prepare core to boot/attach * adds carveouts for TCM bank and reserved memory regions * * @rproc: Device node of each rproc @@ -765,7 +765,7 @@ static int zynqmp_r5_rproc_prepare(struct rproc *rproc) } /** - * zynqmp_r5_rproc_unprepare() + * zynqmp_r5_rproc_unprepare() - programming sequence after stop/detach. * Turns off TCM banks using power-domain id * * @rproc: Device node of each rproc @@ -908,7 +908,7 @@ static const struct rproc_ops zynqmp_r5_rproc_ops = { }; /** - * zynqmp_r5_add_rproc_core() + * zynqmp_r5_add_rproc_core() - Add core data to framework. * Allocate and add struct rproc object for each r5f core * This is called for each individual r5f core * @@ -1144,7 +1144,7 @@ static int zynqmp_r5_get_tcm_node_from_dt(struct zynqmp_r5_cluster *cluster) } /** - * zynqmp_r5_get_tcm_node() + * zynqmp_r5_get_tcm_node() - Get TCM info * Ideally this function should parse tcm node and store information * in r5_core instance. For now, Hardcoded TCM information is used. * This approach is used as TCM bindings for system-dt is being developed From 9050cabbe1addc5bd2d080b04d51e1f05802a7c4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:02:58 +0100 Subject: [PATCH 0848/2411] f2fs: Pass a folio to recover_dentry() The only caller has a folio, so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index f7d2fc86aeb1..ab1a877ca3a9 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -157,10 +157,10 @@ static int init_recovered_filename(const struct inode *dir, return 0; } -static int recover_dentry(struct inode *inode, struct page *ipage, +static int recover_dentry(struct inode *inode, struct folio *ifolio, struct list_head *dir_list) { - struct f2fs_inode *raw_inode = F2FS_INODE(ipage); + struct f2fs_inode *raw_inode = F2FS_INODE(&ifolio->page); nid_t pino = le32_to_cpu(raw_inode->i_pino); struct f2fs_dir_entry *de; struct f2fs_filename fname; @@ -233,7 +233,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage, else name = raw_inode->i_name; f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d", - __func__, ino_of_node(ipage), name, + __func__, ino_of_node(&ifolio->page), name, IS_ERR(dir) ? 0 : dir->i_ino, err); return err; } @@ -830,7 +830,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, recovered_inode++; } if (entry->last_dentry == blkaddr) { - err = recover_dentry(entry->inode, &folio->page, dir_list); + err = recover_dentry(entry->inode, folio, dir_list); if (err) { f2fs_folio_put(folio, true); break; From 7872c71e646b9682ce22be8f3616b5a3f5a5df62 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:02:59 +0100 Subject: [PATCH 0849/2411] f2fs: Pass a folio to recover_inode() The only caller has a folio, so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ab1a877ca3a9..1695d9581301 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -277,16 +277,16 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) clear_inode_flag(inode, FI_DATA_EXIST); } -static int recover_inode(struct inode *inode, struct page *page) +static int recover_inode(struct inode *inode, struct folio *folio) { - struct f2fs_inode *raw = F2FS_INODE(page); + struct f2fs_inode *raw = F2FS_INODE(&folio->page); struct f2fs_inode_info *fi = F2FS_I(inode); char *name; int err; inode->i_mode = le16_to_cpu(raw->i_mode); - err = recover_quota_data(inode, page); + err = recover_quota_data(inode, &folio->page); if (err) return err; @@ -333,10 +333,10 @@ static int recover_inode(struct inode *inode, struct page *page) if (file_enc_name(inode)) name = ""; else - name = F2FS_INODE(page)->i_name; + name = F2FS_INODE(&folio->page)->i_name; f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x", - ino_of_node(page), name, raw->i_inline); + ino_of_node(&folio->page), name, raw->i_inline); return 0; } @@ -822,7 +822,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, * So, call recover_inode for the inode update. */ if (IS_INODE(&folio->page)) { - err = recover_inode(entry->inode, &folio->page); + err = recover_inode(entry->inode, folio); if (err) { f2fs_folio_put(folio, true); break; From 71e5066738e91890ec2dd98b7148c61c417013ee Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:00 +0100 Subject: [PATCH 0850/2411] f2fs: Pass a folio to recover_quota_data() The only caller has a folio, so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 1695d9581301..cb6217e6475f 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -238,9 +238,9 @@ static int recover_dentry(struct inode *inode, struct folio *ifolio, return err; } -static int recover_quota_data(struct inode *inode, struct page *page) +static int recover_quota_data(struct inode *inode, struct folio *folio) { - struct f2fs_inode *raw = F2FS_INODE(page); + struct f2fs_inode *raw = F2FS_INODE(&folio->page); struct iattr attr; uid_t i_uid = le32_to_cpu(raw->i_uid); gid_t i_gid = le32_to_cpu(raw->i_gid); @@ -286,7 +286,7 @@ static int recover_inode(struct inode *inode, struct folio *folio) inode->i_mode = le16_to_cpu(raw->i_mode); - err = recover_quota_data(inode, &folio->page); + err = recover_quota_data(inode, folio); if (err) return err; From b77dc031a7848066555e7c6da2a2c091b4572e8e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:01 +0100 Subject: [PATCH 0851/2411] f2fs: Pass a folio to f2fs_recover_inode_page() The only caller has a folio, so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 6 +++--- fs/f2fs/recovery.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 493f1c5fb2d5..e51ba3585d3c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3790,7 +3790,7 @@ void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio); int f2fs_recover_xattr_data(struct inode *inode, struct page *page); -int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); +int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio); int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum); int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index be3d38d1fdee..a9aade30c4cb 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2801,10 +2801,10 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page) return 0; } -int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) +int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio) { struct f2fs_inode *src, *dst; - nid_t ino = ino_of_node(page); + nid_t ino = ino_of_node(&folio->page); struct node_info old_ni, new_ni; struct folio *ifolio; int err; @@ -2830,7 +2830,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) fill_node_footer(&ifolio->page, ino, ino, 0, true); set_cold_node(&ifolio->page, false); - src = F2FS_INODE(page); + src = F2FS_INODE(&folio->page); dst = F2FS_INODE(&ifolio->page); memcpy(dst, src, offsetof(struct f2fs_inode, i_ext)); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index cb6217e6475f..5120713ffd53 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -439,7 +439,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, if (!check_only && IS_INODE(&folio->page) && is_dent_dnode(&folio->page)) { - err = f2fs_recover_inode_page(sbi, &folio->page); + err = f2fs_recover_inode_page(sbi, folio); if (err) { f2fs_folio_put(folio, true); break; From afd42fa98b9c01596daf0f1e41a6ffd0f7179144 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:02 +0100 Subject: [PATCH 0852/2411] f2fs: Pass a folio to sanity_check_extent_cache() The only caller has a folio, so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 4 ++-- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inode.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 4ce19a310f38..5be503a875dc 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -19,10 +19,10 @@ #include "node.h" #include -bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) +bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; + struct f2fs_extent *i_ext = &F2FS_INODE(&ifolio->page)->i_ext; struct extent_info ei; int devi; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e51ba3585d3c..e075c1b19864 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4350,7 +4350,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); /* * extent_cache.c */ -bool sanity_check_extent_cache(struct inode *inode, struct page *ipage); +bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio); void f2fs_init_extent_tree(struct inode *inode); void f2fs_drop_extent_tree(struct inode *inode); void f2fs_destroy_extent_node(struct inode *inode); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index fc774de1c752..79f130496387 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -531,7 +531,7 @@ static int do_read_inode(struct inode *inode) init_idisk_time(inode); - if (!sanity_check_extent_cache(inode, &node_folio->page)) { + if (!sanity_check_extent_cache(inode, node_folio)) { f2fs_folio_put(node_folio, true); f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); return -EFSCORRUPTED; From ea3f2069ea162a2d85cf5a020c6b0324533b871a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:03 +0100 Subject: [PATCH 0853/2411] f2fs: Pass a folio to sanity_check_inode() The only caller has a folio, so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 79f130496387..cdb6640719fa 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -266,28 +266,28 @@ static bool sanity_check_compress_inode(struct inode *inode, return false; } -static bool sanity_check_inode(struct inode *inode, struct page *node_page) +static bool sanity_check_inode(struct inode *inode, struct folio *node_folio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - struct f2fs_inode *ri = F2FS_INODE(node_page); + struct f2fs_inode *ri = F2FS_INODE(&node_folio->page); unsigned long long iblocks; - iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); + iblocks = le64_to_cpu(F2FS_INODE(&node_folio->page)->i_blocks); if (!iblocks) { f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, run fsck to fix.", __func__, inode->i_ino, iblocks); return false; } - if (ino_of_node(node_page) != nid_of_node(node_page)) { + if (ino_of_node(&node_folio->page) != nid_of_node(&node_folio->page)) { f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.", __func__, inode->i_ino, - ino_of_node(node_page), nid_of_node(node_page)); + ino_of_node(&node_folio->page), nid_of_node(&node_folio->page)); return false; } - if (ino_of_node(node_page) == fi->i_xattr_nid) { + if (ino_of_node(&node_folio->page) == fi->i_xattr_nid) { f2fs_warn(sbi, "%s: corrupted inode i_ino=%lx, xnid=%x, run fsck to fix.", __func__, inode->i_ino, fi->i_xattr_nid); return false; @@ -354,7 +354,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) } } - if (f2fs_sanity_check_inline_data(inode, node_page)) { + if (f2fs_sanity_check_inline_data(inode, &node_folio->page)) { f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix", __func__, inode->i_ino, inode->i_mode); return false; @@ -469,7 +469,7 @@ static int do_read_inode(struct inode *inode) fi->i_inline_xattr_size = 0; } - if (!sanity_check_inode(inode, &node_folio->page)) { + if (!sanity_check_inode(inode, node_folio)) { f2fs_folio_put(node_folio, true); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); From 1f6425e33da270f0ebb8b43f686ba5d1d40cbe2f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:04 +0100 Subject: [PATCH 0854/2411] f2fs: Pass a folio to f2fs_sanity_check_inline_data() The only caller has a folio, so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inline.c | 4 ++-- fs/f2fs/inode.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e075c1b19864..3efe6a248558 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4308,7 +4308,7 @@ extern struct kmem_cache *f2fs_inode_entry_slab; * inline.c */ bool f2fs_may_inline_data(struct inode *inode); -bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage); +bool f2fs_sanity_check_inline_data(struct inode *inode, struct folio *ifolio); bool f2fs_may_inline_dentry(struct inode *inode); void f2fs_do_read_inline_data(struct folio *folio, struct folio *ifolio); void f2fs_truncate_inline_inode(struct inode *inode, struct folio *ifolio, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 901c630685ce..0d021c638922 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -48,12 +48,12 @@ static bool inode_has_blocks(struct inode *inode, struct page *ipage) return false; } -bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage) +bool f2fs_sanity_check_inline_data(struct inode *inode, struct folio *ifolio) { if (!f2fs_has_inline_data(inode)) return false; - if (inode_has_blocks(inode, ipage)) + if (inode_has_blocks(inode, &ifolio->page)) return false; if (!support_inline_data(inode)) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index cdb6640719fa..3d1ee92a613c 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -354,7 +354,7 @@ static bool sanity_check_inode(struct inode *inode, struct folio *node_folio) } } - if (f2fs_sanity_check_inline_data(inode, &node_folio->page)) { + if (f2fs_sanity_check_inline_data(inode, node_folio)) { f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix", __func__, inode->i_ino, inode->i_mode); return false; From 4a09966a2066cc50f8dfa55e11986e2b5ffeecc0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:05 +0100 Subject: [PATCH 0855/2411] f2fs: Pass a folio to inode_has_blocks() The only caller has a folio, so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 0d021c638922..fa072e4a5616 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -33,9 +33,9 @@ bool f2fs_may_inline_data(struct inode *inode) return !f2fs_post_read_required(inode); } -static bool inode_has_blocks(struct inode *inode, struct page *ipage) +static bool inode_has_blocks(struct inode *inode, struct folio *ifolio) { - struct f2fs_inode *ri = F2FS_INODE(ipage); + struct f2fs_inode *ri = F2FS_INODE(&ifolio->page); int i; if (F2FS_HAS_BLOCKS(inode)) @@ -53,7 +53,7 @@ bool f2fs_sanity_check_inline_data(struct inode *inode, struct folio *ifolio) if (!f2fs_has_inline_data(inode)) return false; - if (inode_has_blocks(inode, &ifolio->page)) + if (inode_has_blocks(inode, ifolio)) return false; if (!support_inline_data(inode)) From 9d717807167f82687592742002bd5fbaeb69380a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:06 +0100 Subject: [PATCH 0856/2411] f2fs: Pass a folio to F2FS_INODE() All callers now have a folio, so pass it in. Also make it const as F2FS_INODE() does not modify the struct folio passed in (the data it describes is mutable, but it does not change the contents of the struct). This may improve code generation. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 +- fs/f2fs/extent_cache.c | 4 ++-- fs/f2fs/f2fs.h | 9 +++++---- fs/f2fs/gc.c | 2 +- fs/f2fs/inline.c | 6 +++--- fs/f2fs/inode.c | 10 +++++----- fs/f2fs/node.c | 8 ++++---- fs/f2fs/recovery.c | 8 ++++---- 8 files changed, 25 insertions(+), 24 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c36b3b22bfff..888dca7e82ac 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -454,7 +454,7 @@ static void init_dent_inode(struct inode *dir, struct inode *inode, f2fs_folio_wait_writeback(ifolio, NODE, true, true); /* copy name info. to this inode folio */ - ri = F2FS_INODE(&ifolio->page); + ri = F2FS_INODE(ifolio); ri->i_namelen = cpu_to_le32(fname->disk_name.len); memcpy(ri->i_name, fname->disk_name.name, fname->disk_name.len); if (IS_ENCRYPTED(dir)) { diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 5be503a875dc..a6eb3d73231e 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -22,7 +22,7 @@ bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_extent *i_ext = &F2FS_INODE(&ifolio->page)->i_ext; + struct f2fs_extent *i_ext = &F2FS_INODE(ifolio)->i_ext; struct extent_info ei; int devi; @@ -411,7 +411,7 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct folio *ifolio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; - struct f2fs_extent *i_ext = &F2FS_INODE(&ifolio->page)->i_ext; + struct f2fs_extent *i_ext = &F2FS_INODE(ifolio)->i_ext; struct extent_tree *et; struct extent_node *en; struct extent_info ei = {0}; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3efe6a248558..c33ed614c011 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2053,9 +2053,9 @@ static inline struct f2fs_node *F2FS_NODE(const struct page *page) return (struct f2fs_node *)page_address(page); } -static inline struct f2fs_inode *F2FS_INODE(struct page *page) +static inline struct f2fs_inode *F2FS_INODE(const struct folio *folio) { - return &((struct f2fs_node *)page_address(page))->i; + return &((struct f2fs_node *)folio_address(folio))->i; } static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) @@ -3371,9 +3371,10 @@ static inline unsigned int addrs_per_page(struct inode *inode, return addrs; } -static inline void *inline_xattr_addr(struct inode *inode, struct folio *folio) +static inline +void *inline_xattr_addr(struct inode *inode, const struct folio *folio) { - struct f2fs_inode *ri = F2FS_INODE(&folio->page); + struct f2fs_inode *ri = F2FS_INODE(folio); return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - get_inline_xattr_addrs(inode)]); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 778f9ec40b70..edeae4ee137c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1163,7 +1163,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } if (IS_INODE(&node_folio->page)) { - base = offset_in_addr(F2FS_INODE(&node_folio->page)); + base = offset_in_addr(F2FS_INODE(node_folio)); max_addrs = DEF_ADDRS_PER_INODE; } else { base = 0; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index fa072e4a5616..4c636a8043f8 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -35,7 +35,7 @@ bool f2fs_may_inline_data(struct inode *inode) static bool inode_has_blocks(struct inode *inode, struct folio *ifolio) { - struct f2fs_inode *ri = F2FS_INODE(&ifolio->page); + struct f2fs_inode *ri = F2FS_INODE(ifolio); int i; if (F2FS_HAS_BLOCKS(inode)) @@ -306,7 +306,7 @@ int f2fs_recover_inline_data(struct inode *inode, struct folio *nfolio) * x x -> recover data blocks */ if (IS_INODE(&nfolio->page)) - ri = F2FS_INODE(&nfolio->page); + ri = F2FS_INODE(nfolio); if (f2fs_has_inline_data(inode) && ri && (ri->i_inline & F2FS_INLINE_DATA)) { @@ -825,7 +825,7 @@ int f2fs_inline_data_fiemap(struct inode *inode, byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; byteaddr += (char *)inline_data_addr(inode, ifolio) - - (char *)F2FS_INODE(&ifolio->page); + (char *)F2FS_INODE(ifolio); err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags); trace_f2fs_fiemap(inode, start, byteaddr, ilen, flags, err); out: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 3d1ee92a613c..6caf4817e99b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -108,7 +108,7 @@ static void __recover_inline_status(struct inode *inode, struct folio *ifolio) f2fs_folio_wait_writeback(ifolio, NODE, true, true); set_inode_flag(inode, FI_DATA_EXIST); - set_raw_inline(inode, F2FS_INODE(&ifolio->page)); + set_raw_inline(inode, F2FS_INODE(ifolio)); folio_mark_dirty(ifolio); return; } @@ -270,10 +270,10 @@ static bool sanity_check_inode(struct inode *inode, struct folio *node_folio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - struct f2fs_inode *ri = F2FS_INODE(&node_folio->page); + struct f2fs_inode *ri = F2FS_INODE(node_folio); unsigned long long iblocks; - iblocks = le64_to_cpu(F2FS_INODE(&node_folio->page)->i_blocks); + iblocks = le64_to_cpu(F2FS_INODE(node_folio)->i_blocks); if (!iblocks) { f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, run fsck to fix.", __func__, inode->i_ino, iblocks); @@ -419,7 +419,7 @@ static int do_read_inode(struct inode *inode) if (IS_ERR(node_folio)) return PTR_ERR(node_folio); - ri = F2FS_INODE(&node_folio->page); + ri = F2FS_INODE(node_folio); inode->i_mode = le16_to_cpu(ri->i_mode); i_uid_write(inode, le32_to_cpu(ri->i_uid)); @@ -669,7 +669,7 @@ void f2fs_update_inode(struct inode *inode, struct folio *node_folio) f2fs_inode_synced(inode); - ri = F2FS_INODE(&node_folio->page); + ri = F2FS_INODE(node_folio); ri->i_mode = cpu_to_le16(inode->i_mode); ri->i_advise = fi->i_advise; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index a9aade30c4cb..ce9cc32fec3f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1172,7 +1172,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) set_new_dnode(&dn, inode, folio, NULL, 0); folio_unlock(folio); - ri = F2FS_INODE(&folio->page); + ri = F2FS_INODE(folio); switch (level) { case 0: case 1: @@ -2727,7 +2727,7 @@ int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio) if (IS_ERR(ifolio)) return PTR_ERR(ifolio); - ri = F2FS_INODE(&folio->page); + ri = F2FS_INODE(folio); if (ri->i_inline & F2FS_INLINE_XATTR) { if (!f2fs_has_inline_xattr(inode)) { set_inode_flag(inode, FI_INLINE_XATTR); @@ -2830,8 +2830,8 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio) fill_node_footer(&ifolio->page, ino, ino, 0, true); set_cold_node(&ifolio->page, false); - src = F2FS_INODE(&folio->page); - dst = F2FS_INODE(&ifolio->page); + src = F2FS_INODE(folio); + dst = F2FS_INODE(ifolio); memcpy(dst, src, offsetof(struct f2fs_inode, i_ext)); dst->i_size = 0; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 5120713ffd53..448a2bbc0b2f 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -160,7 +160,7 @@ static int init_recovered_filename(const struct inode *dir, static int recover_dentry(struct inode *inode, struct folio *ifolio, struct list_head *dir_list) { - struct f2fs_inode *raw_inode = F2FS_INODE(&ifolio->page); + struct f2fs_inode *raw_inode = F2FS_INODE(ifolio); nid_t pino = le32_to_cpu(raw_inode->i_pino); struct f2fs_dir_entry *de; struct f2fs_filename fname; @@ -240,7 +240,7 @@ static int recover_dentry(struct inode *inode, struct folio *ifolio, static int recover_quota_data(struct inode *inode, struct folio *folio) { - struct f2fs_inode *raw = F2FS_INODE(&folio->page); + struct f2fs_inode *raw = F2FS_INODE(folio); struct iattr attr; uid_t i_uid = le32_to_cpu(raw->i_uid); gid_t i_gid = le32_to_cpu(raw->i_gid); @@ -279,7 +279,7 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) static int recover_inode(struct inode *inode, struct folio *folio) { - struct f2fs_inode *raw = F2FS_INODE(&folio->page); + struct f2fs_inode *raw = F2FS_INODE(folio); struct f2fs_inode_info *fi = F2FS_I(inode); char *name; int err; @@ -333,7 +333,7 @@ static int recover_inode(struct inode *inode, struct folio *folio) if (file_enc_name(inode)) name = ""; else - name = F2FS_INODE(&folio->page)->i_name; + name = F2FS_INODE(folio)->i_name; f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x", ino_of_node(&folio->page), name, raw->i_inline); From 28fde0d7ff293e07f03d8fb9bfa61ede3144b552 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:07 +0100 Subject: [PATCH 0857/2411] f2fs: Pass a folio to ino_of_node() All callers have a folio so pass it in. Also make the argument const as the function does not modify it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/inode.c | 8 ++++---- fs/f2fs/node.c | 24 ++++++++++++------------ fs/f2fs/node.h | 4 ++-- fs/f2fs/recovery.c | 14 +++++++------- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 40292e4ad341..95db528bcd35 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -574,7 +574,7 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode, return true; if (page && page == &target->page) return true; - if (ino && ino == ino_of_node(&target->page)) + if (ino && ino == ino_of_node(target)) return true; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 6caf4817e99b..249cc37ad35b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -178,7 +178,7 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio) if (provided != calculated) f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", - folio->index, ino_of_node(&folio->page), + folio->index, ino_of_node(folio), provided, calculated); return provided == calculated; @@ -280,14 +280,14 @@ static bool sanity_check_inode(struct inode *inode, struct folio *node_folio) return false; } - if (ino_of_node(&node_folio->page) != nid_of_node(&node_folio->page)) { + if (ino_of_node(node_folio) != nid_of_node(&node_folio->page)) { f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.", __func__, inode->i_ino, - ino_of_node(&node_folio->page), nid_of_node(&node_folio->page)); + ino_of_node(node_folio), nid_of_node(&node_folio->page)); return false; } - if (ino_of_node(&node_folio->page) == fi->i_xattr_nid) { + if (ino_of_node(node_folio) == fi->i_xattr_nid) { f2fs_warn(sbi, "%s: corrupted inode i_ino=%lx, xnid=%x, run fsck to fix.", __func__, inode->i_ino, fi->i_xattr_nid); return false; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ce9cc32fec3f..b3956daacbf8 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -974,9 +974,9 @@ static int truncate_dnode(struct dnode_of_data *dn) else if (IS_ERR(folio)) return PTR_ERR(folio); - if (IS_INODE(&folio->page) || ino_of_node(&folio->page) != dn->inode->i_ino) { + if (IS_INODE(&folio->page) || ino_of_node(folio) != dn->inode->i_ino) { f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u", - dn->inode->i_ino, dn->nid, ino_of_node(&folio->page)); + dn->inode->i_ino, dn->nid, ino_of_node(folio)); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_INVALID_NODE_REFERENCE); f2fs_folio_put(folio, true); @@ -1484,7 +1484,7 @@ static int sanity_check_node_footer(struct f2fs_sb_info *sbi, time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))) { f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, " "node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", - ntype, nid, nid_of_node(page), ino_of_node(page), + ntype, nid, nid_of_node(page), ino_of_node(folio), ofs_of_node(page), cpver_of_node(page), next_blkaddr_of_node(folio)); set_sbi_flag(sbi, SBI_NEED_FSCK); @@ -1633,7 +1633,7 @@ static struct folio *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) if (!IS_DNODE(&folio->page) || !is_cold_node(&folio->page)) continue; - if (ino_of_node(&folio->page) != ino) + if (ino_of_node(folio) != ino) continue; folio_lock(folio); @@ -1643,7 +1643,7 @@ static struct folio *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) folio_unlock(folio); continue; } - if (ino_of_node(&folio->page) != ino) + if (ino_of_node(folio) != ino) goto continue_unlock; if (!folio_test_dirty(folio)) { @@ -1673,7 +1673,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted struct node_info ni; struct f2fs_io_info fio = { .sbi = sbi, - .ino = ino_of_node(&folio->page), + .ino = ino_of_node(folio), .type = NODE, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), @@ -1842,7 +1842,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, if (!IS_DNODE(&folio->page) || !is_cold_node(&folio->page)) continue; - if (ino_of_node(&folio->page) != ino) + if (ino_of_node(folio) != ino) continue; folio_lock(folio); @@ -1852,7 +1852,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, folio_unlock(folio); continue; } - if (ino_of_node(&folio->page) != ino) + if (ino_of_node(folio) != ino) goto continue_unlock; if (!folio_test_dirty(folio) && folio != last_folio) { @@ -1948,7 +1948,7 @@ static bool flush_dirty_inode(struct folio *folio) { struct f2fs_sb_info *sbi = F2FS_F_SB(folio); struct inode *inode; - nid_t ino = ino_of_node(&folio->page); + nid_t ino = ino_of_node(folio); inode = find_inode_nowait(sbi->sb, ino, f2fs_match_ino, NULL); if (!inode) @@ -1991,7 +1991,7 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi) if (page_private_inline(&folio->page)) { clear_page_private_inline(&folio->page); folio_unlock(folio); - flush_inline_data(sbi, ino_of_node(&folio->page)); + flush_inline_data(sbi, ino_of_node(folio)); continue; } unlock: @@ -2073,7 +2073,7 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, if (page_private_inline(&folio->page)) { clear_page_private_inline(&folio->page); folio_unlock(folio); - flush_inline_data(sbi, ino_of_node(&folio->page)); + flush_inline_data(sbi, ino_of_node(folio)); goto lock_node; } @@ -2804,7 +2804,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page) int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio) { struct f2fs_inode *src, *dst; - nid_t ino = ino_of_node(&folio->page); + nid_t ino = ino_of_node(folio); struct node_info old_ni, new_ni; struct folio *ifolio; int err; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index b5218d642545..3bf9d637168c 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -243,9 +243,9 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) #endif } -static inline nid_t ino_of_node(struct page *node_page) +static inline nid_t ino_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(node_page); + struct f2fs_node *rn = F2FS_NODE(&node_folio->page); return le32_to_cpu(rn->footer.ino); } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 448a2bbc0b2f..b81ae66fff4d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -233,7 +233,7 @@ static int recover_dentry(struct inode *inode, struct folio *ifolio, else name = raw_inode->i_name; f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d", - __func__, ino_of_node(&ifolio->page), name, + __func__, ino_of_node(ifolio), name, IS_ERR(dir) ? 0 : dir->i_ino, err); return err; } @@ -336,7 +336,7 @@ static int recover_inode(struct inode *inode, struct folio *folio) name = F2FS_INODE(folio)->i_name; f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x", - ino_of_node(&folio->page), name, raw->i_inline); + ino_of_node(folio), name, raw->i_inline); return 0; } @@ -432,7 +432,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, if (!is_fsync_dnode(&folio->page)) goto next; - entry = get_fsync_inode(head, ino_of_node(&folio->page)); + entry = get_fsync_inode(head, ino_of_node(folio)); if (!entry) { bool quota_inode = false; @@ -451,7 +451,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, * CP | dnode(F) | inode(DF) * For this case, we should not give up now. */ - entry = add_fsync_inode(sbi, head, ino_of_node(&folio->page), + entry = add_fsync_inode(sbi, head, ino_of_node(folio), quota_inode); if (IS_ERR(entry)) { err = PTR_ERR(entry); @@ -553,7 +553,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, return PTR_ERR(node_folio); offset = ofs_of_node(&node_folio->page); - ino = ino_of_node(&node_folio->page); + ino = ino_of_node(node_folio); f2fs_folio_put(node_folio, true); if (ino != dn->inode->i_ino) { @@ -668,7 +668,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, if (err) goto err; - f2fs_bug_on(sbi, ni.ino != ino_of_node(&folio->page)); + f2fs_bug_on(sbi, ni.ino != ino_of_node(folio)); if (ofs_of_node(&dn.node_folio->page) != ofs_of_node(&folio->page)) { f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u", @@ -812,7 +812,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, } recoverable_dnode++; - entry = get_fsync_inode(inode_list, ino_of_node(&folio->page)); + entry = get_fsync_inode(inode_list, ino_of_node(folio)); if (!entry) goto next; fsynced_dnode++; From a63f2de2dd950aaa7c6008e90f30c43b34f643f5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:08 +0100 Subject: [PATCH 0858/2411] f2fs: Pass a folio to nid_of_node() All callers have a folio so pass it in. Also make the argument const as the function does not modify it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/inode.c | 4 ++-- fs/f2fs/node.c | 6 +++--- fs/f2fs/node.h | 4 ++-- fs/f2fs/recovery.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 95db528bcd35..c4da33de0d1d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -355,7 +355,7 @@ static void f2fs_write_end_io(struct bio *bio) } f2fs_bug_on(sbi, is_node_folio(folio) && - folio->index != nid_of_node(&folio->page)); + folio->index != nid_of_node(folio)); dec_page_count(sbi, type); if (f2fs_in_warm_node_list(sbi, folio)) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 249cc37ad35b..c5dbc9963e4f 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -280,10 +280,10 @@ static bool sanity_check_inode(struct inode *inode, struct folio *node_folio) return false; } - if (ino_of_node(node_folio) != nid_of_node(&node_folio->page)) { + if (ino_of_node(node_folio) != nid_of_node(node_folio)) { f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.", __func__, inode->i_ino, - ino_of_node(node_folio), nid_of_node(&node_folio->page)); + ino_of_node(node_folio), nid_of_node(node_folio)); return false; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b3956daacbf8..750addd11713 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1477,14 +1477,14 @@ static int sanity_check_node_footer(struct f2fs_sb_info *sbi, { struct page *page = &folio->page; - if (unlikely(nid != nid_of_node(page) || + if (unlikely(nid != nid_of_node(folio) || (ntype == NODE_TYPE_INODE && !IS_INODE(page)) || (ntype == NODE_TYPE_XATTR && !f2fs_has_xattr_block(ofs_of_node(page))) || time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))) { f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, " "node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", - ntype, nid, nid_of_node(page), ino_of_node(folio), + ntype, nid, nid_of_node(folio), ino_of_node(folio), ofs_of_node(page), cpver_of_node(page), next_blkaddr_of_node(folio)); set_sbi_flag(sbi, SBI_NEED_FSCK); @@ -1706,7 +1706,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted goto redirty_out; /* get old block addr of this node page */ - nid = nid_of_node(&folio->page); + nid = nid_of_node(folio); f2fs_bug_on(sbi, folio->index != nid); if (f2fs_get_node_info(sbi, nid, &ni, !do_balance)) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 3bf9d637168c..a4ffb9460ee9 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -249,9 +249,9 @@ static inline nid_t ino_of_node(const struct folio *node_folio) return le32_to_cpu(rn->footer.ino); } -static inline nid_t nid_of_node(struct page *node_page) +static inline nid_t nid_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(node_page); + struct f2fs_node *rn = F2FS_NODE(&node_folio->page); return le32_to_cpu(rn->footer.nid); } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b81ae66fff4d..e5cd9959c894 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -767,7 +767,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, out: f2fs_notice(sbi, "recover_data: ino = %lx, nid = %x (i_size: %s), " "range (%u, %u), recovered = %d, err = %d", - inode->i_ino, nid_of_node(&folio->page), + inode->i_ino, nid_of_node(folio), file_keep_isize(inode) ? "keep" : "recover", start, end, recovered, err); return err; From bead9a6f1b8d22a2e8185a380a4738cb737d0d70 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:09 +0100 Subject: [PATCH 0859/2411] f2fs: Pass a folio to is_recoverable_dnode() All callers have a folio so pass it in. Also make the argument const as the function does not modify it. Removes a call to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.h | 10 +++++----- fs/f2fs/recovery.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c33ed614c011..f6a295d61d2b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2018,7 +2018,7 @@ static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping) return F2FS_I_SB(mapping->host); } -static inline struct f2fs_sb_info *F2FS_F_SB(struct folio *folio) +static inline struct f2fs_sb_info *F2FS_F_SB(const struct folio *folio) { return F2FS_M_SB(folio->mapping); } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index a4ffb9460ee9..76dae4ab57d2 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -262,7 +262,7 @@ static inline unsigned int ofs_of_node(const struct page *node_page) return flag >> OFFSET_BIT_SHIFT; } -static inline __u64 cpver_of_node(struct page *node_page) +static inline __u64 cpver_of_node(const struct page *node_page) { struct f2fs_node *rn = F2FS_NODE(node_page); return le64_to_cpu(rn->footer.cp_ver); @@ -313,19 +313,19 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) rn->footer.next_blkaddr = cpu_to_le32(blkaddr); } -static inline bool is_recoverable_dnode(struct page *page) +static inline bool is_recoverable_dnode(const struct folio *folio) { - struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_F_SB(folio)); __u64 cp_ver = cur_cp_version(ckpt); /* Don't care crc part, if fsck.f2fs sets it. */ if (__is_set_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG)) - return (cp_ver << 32) == (cpver_of_node(page) << 32); + return (cp_ver << 32) == (cpver_of_node(&folio->page) << 32); if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) cp_ver |= (cur_cp_crc(ckpt) << 32); - return cp_ver == cpver_of_node(page); + return cp_ver == cpver_of_node(&folio->page); } /* diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index e5cd9959c894..dac0d7189b2b 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -375,7 +375,7 @@ static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr, if (IS_ERR(folio)) return PTR_ERR(folio); - if (!is_recoverable_dnode(&folio->page)) { + if (!is_recoverable_dnode(folio)) { f2fs_folio_put(folio, true); *is_detecting = false; return 0; @@ -424,7 +424,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, break; } - if (!is_recoverable_dnode(&folio->page)) { + if (!is_recoverable_dnode(folio)) { f2fs_folio_put(folio, true); break; } @@ -806,7 +806,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, break; } - if (!is_recoverable_dnode(&folio->page)) { + if (!is_recoverable_dnode(folio)) { f2fs_folio_put(folio, true); break; } From 4f3466d79b2bfe92879968595dd74efbfa224058 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:10 +0100 Subject: [PATCH 0860/2411] f2fs: Pass a folio to set_dentry_mark() All callers have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 6 +++--- fs/f2fs/node.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 750addd11713..02831323da9c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1863,7 +1863,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, f2fs_folio_wait_writeback(folio, NODE, true, true); set_fsync_mark(&folio->page, 0); - set_dentry_mark(&folio->page, 0); + set_dentry_mark(folio, 0); if (!atomic || folio == last_folio) { set_fsync_mark(&folio->page, 1); @@ -1872,7 +1872,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, if (is_inode_flag_set(inode, FI_DIRTY_INODE)) f2fs_update_inode(inode, folio); - set_dentry_mark(&folio->page, + set_dentry_mark(folio, f2fs_need_dentry_mark(sbi, ino)); } /* may be written by other thread */ @@ -2087,7 +2087,7 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, goto continue_unlock; set_fsync_mark(&folio->page, 0); - set_dentry_mark(&folio->page, 0); + set_dentry_mark(folio, 0); if (!__write_node_folio(folio, false, &submitted, wbc, do_balance, io_type, NULL)) { diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 76dae4ab57d2..8f33134538cf 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -432,5 +432,5 @@ static inline void set_mark(struct page *page, int mark, int type) f2fs_inode_chksum_set(F2FS_P_SB(page), page); #endif } -#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) +#define set_dentry_mark(folio, mark) set_mark(&folio->page, mark, DENT_BIT_SHIFT) #define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) From b07bfa70e4b11078b48e0cec3c2c2dd36c34e534 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:11 +0100 Subject: [PATCH 0861/2411] f2fs: Pass a folio to set_fsync_mark() All callers have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 6 +++--- fs/f2fs/node.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 02831323da9c..c9bf269f0fdb 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1862,11 +1862,11 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, f2fs_folio_wait_writeback(folio, NODE, true, true); - set_fsync_mark(&folio->page, 0); + set_fsync_mark(folio, 0); set_dentry_mark(folio, 0); if (!atomic || folio == last_folio) { - set_fsync_mark(&folio->page, 1); + set_fsync_mark(folio, 1); percpu_counter_inc(&sbi->rf_node_block_count); if (IS_INODE(&folio->page)) { if (is_inode_flag_set(inode, @@ -2086,7 +2086,7 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, if (!folio_clear_dirty_for_io(folio)) goto continue_unlock; - set_fsync_mark(&folio->page, 0); + set_fsync_mark(folio, 0); set_dentry_mark(folio, 0); if (!__write_node_folio(folio, false, &submitted, diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 8f33134538cf..825fa3ad6357 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -433,4 +433,4 @@ static inline void set_mark(struct page *page, int mark, int type) #endif } #define set_dentry_mark(folio, mark) set_mark(&folio->page, mark, DENT_BIT_SHIFT) -#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) +#define set_fsync_mark(folio, mark) set_mark(&folio->page, mark, FSYNC_BIT_SHIFT) From 61fcaf3eb88d389cd0792983cdd0da9e5cad0901 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:12 +0100 Subject: [PATCH 0862/2411] f2fs: Pass a folio to set_mark() All callers have a folio so pass it in. Removes a call to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 825fa3ad6357..ca0e9361ab68 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -418,9 +418,9 @@ static inline void set_cold_node(struct page *page, bool is_dir) rn->footer.flag = cpu_to_le32(flag); } -static inline void set_mark(struct page *page, int mark, int type) +static inline void set_mark(struct folio *folio, int mark, int type) { - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_node *rn = F2FS_NODE(&folio->page); unsigned int flag = le32_to_cpu(rn->footer.flag); if (mark) flag |= BIT(type); @@ -429,8 +429,8 @@ static inline void set_mark(struct page *page, int mark, int type) rn->footer.flag = cpu_to_le32(flag); #ifdef CONFIG_F2FS_CHECK_FS - f2fs_inode_chksum_set(F2FS_P_SB(page), page); + f2fs_inode_chksum_set(F2FS_F_SB(folio), &folio->page); #endif } -#define set_dentry_mark(folio, mark) set_mark(&folio->page, mark, DENT_BIT_SHIFT) -#define set_fsync_mark(folio, mark) set_mark(&folio->page, mark, FSYNC_BIT_SHIFT) +#define set_dentry_mark(folio, mark) set_mark(folio, mark, DENT_BIT_SHIFT) +#define set_fsync_mark(folio, mark) set_mark(folio, mark, FSYNC_BIT_SHIFT) From c3c06275e4e2131111d4d6b2ead0221e67bf70b8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:13 +0100 Subject: [PATCH 0863/2411] f2fs: Pass a folio to f2fs_allocate_data_block() Most callers pass NULL, and the one which passes a page already has a folio, so we can pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f6a295d61d2b..008d92dcbbce 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3858,7 +3858,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, bool recover_newaddr); enum temp_type f2fs_get_segment_temp(struct f2fs_sb_info *sbi, enum log_type seg_type); -int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct folio *folio, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index df5a1e226aa9..ca154a80778f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3747,7 +3747,7 @@ static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi, get_random_u32_inclusive(1, sbi->max_fragment_hole); } -int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct folio *folio, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio) @@ -3851,10 +3851,10 @@ int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, up_write(&sit_i->sentry_lock); - if (page && IS_NODESEG(curseg->seg_type)) { - fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); + if (folio && IS_NODESEG(curseg->seg_type)) { + fill_node_footer_blkaddr(&folio->page, NEXT_FREE_BLKADDR(sbi, curseg)); - f2fs_inode_chksum_set(sbi, page); + f2fs_inode_chksum_set(sbi, &folio->page); } if (fio) { @@ -3941,7 +3941,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) if (keep_order) f2fs_down_read(&fio->sbi->io_order_lock); - if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, + if (f2fs_allocate_data_block(fio->sbi, folio, fio->old_blkaddr, &fio->new_blkaddr, sum, type, fio)) { if (fscrypt_inode_uses_fs_layer_crypto(folio->mapping->host)) fscrypt_finalize_bounce_page(&fio->encrypted_page); From e3f1b76d877c14897b4776fc5dd08af3c7751976 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:14 +0100 Subject: [PATCH 0864/2411] f2fs: Pass a folio to f2fs_inode_chksum_set() All callers have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inode.c | 10 +++++----- fs/f2fs/node.c | 2 +- fs/f2fs/node.h | 2 +- fs/f2fs/segment.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 008d92dcbbce..cb02452d7fc0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3634,7 +3634,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc); */ void f2fs_set_inode_flags(struct inode *inode); bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio); -void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page); +void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio); struct inode *f2fs_iget(struct super_block *sb, unsigned long ino); struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino); int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index c5dbc9963e4f..cbee1ce33db7 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -184,14 +184,14 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio) return provided == calculated; } -void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page) +void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio) { - struct f2fs_inode *ri = &F2FS_NODE(page)->i; + struct f2fs_inode *ri = &F2FS_NODE(&folio->page)->i; - if (!f2fs_enable_inode_chksum(sbi, page)) + if (!f2fs_enable_inode_chksum(sbi, &folio->page)) return; - ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page)); + ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, &folio->page)); } static bool sanity_check_compress_inode(struct inode *inode, @@ -752,7 +752,7 @@ void f2fs_update_inode(struct inode *inode, struct folio *node_folio) init_idisk_time(inode); #ifdef CONFIG_F2FS_CHECK_FS - f2fs_inode_chksum_set(F2FS_I_SB(inode), &node_folio->page); + f2fs_inode_chksum_set(F2FS_I_SB(inode), node_folio); #endif } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c9bf269f0fdb..1565f105c75d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2215,7 +2215,7 @@ static bool f2fs_dirty_node_folio(struct address_space *mapping, folio_mark_uptodate(folio); #ifdef CONFIG_F2FS_CHECK_FS if (IS_INODE(&folio->page)) - f2fs_inode_chksum_set(F2FS_M_SB(mapping), &folio->page); + f2fs_inode_chksum_set(F2FS_M_SB(mapping), folio); #endif if (filemap_dirty_folio(mapping, folio)) { inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index ca0e9361ab68..4a9544744e46 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -429,7 +429,7 @@ static inline void set_mark(struct folio *folio, int mark, int type) rn->footer.flag = cpu_to_le32(flag); #ifdef CONFIG_F2FS_CHECK_FS - f2fs_inode_chksum_set(F2FS_F_SB(folio), &folio->page); + f2fs_inode_chksum_set(F2FS_F_SB(folio), folio); #endif } #define set_dentry_mark(folio, mark) set_mark(folio, mark, DENT_BIT_SHIFT) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ca154a80778f..9279e06d75ca 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3854,7 +3854,7 @@ int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct folio *folio, if (folio && IS_NODESEG(curseg->seg_type)) { fill_node_footer_blkaddr(&folio->page, NEXT_FREE_BLKADDR(sbi, curseg)); - f2fs_inode_chksum_set(sbi, &folio->page); + f2fs_inode_chksum_set(sbi, folio); } if (fio) { From 6ebd7ba499c5c5141ff082dc772ca04ef581490c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:15 +0100 Subject: [PATCH 0865/2411] f2fs: Pass a folio to f2fs_enable_inode_chksum() All callers have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index cbee1ce33db7..cffeddfb7b4b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -116,14 +116,15 @@ static void __recover_inline_status(struct inode *inode, struct folio *ifolio) return; } -static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page) +static +bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio) { - struct f2fs_inode *ri = &F2FS_NODE(page)->i; + struct f2fs_inode *ri = &F2FS_NODE(&folio->page)->i; if (!f2fs_sb_has_inode_chksum(sbi)) return false; - if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR)) + if (!IS_INODE(&folio->page) || !(ri->i_inline & F2FS_EXTRA_ATTR)) return false; if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize), @@ -164,9 +165,9 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio) return true; #ifdef CONFIG_F2FS_CHECK_FS - if (!f2fs_enable_inode_chksum(sbi, &folio->page)) + if (!f2fs_enable_inode_chksum(sbi, folio)) #else - if (!f2fs_enable_inode_chksum(sbi, &folio->page) || + if (!f2fs_enable_inode_chksum(sbi, folio) || folio_test_dirty(folio) || folio_test_writeback(folio)) #endif @@ -188,7 +189,7 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio) { struct f2fs_inode *ri = &F2FS_NODE(&folio->page)->i; - if (!f2fs_enable_inode_chksum(sbi, &folio->page)) + if (!f2fs_enable_inode_chksum(sbi, folio)) return; ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, &folio->page)); From 5ea99b6d70b3c9b6b8ae7807827a92beee3f8903 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:16 +0100 Subject: [PATCH 0866/2411] f2fs: Pass a folio to f2fs_inode_chksum() Both callers have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index cffeddfb7b4b..05850cb00299 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -134,9 +134,9 @@ bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio) return true; } -static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct page *page) +static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio) { - struct f2fs_node *node = F2FS_NODE(page); + struct f2fs_node *node = F2FS_NODE(&folio->page); struct f2fs_inode *ri = &node->i; __le32 ino = node->footer.ino; __le32 gen = ri->i_generation; @@ -175,7 +175,7 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio) ri = &F2FS_NODE(&folio->page)->i; provided = le32_to_cpu(ri->i_inode_checksum); - calculated = f2fs_inode_chksum(sbi, &folio->page); + calculated = f2fs_inode_chksum(sbi, folio); if (provided != calculated) f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", @@ -192,7 +192,7 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio) if (!f2fs_enable_inode_chksum(sbi, folio)) return; - ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, &folio->page)); + ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, folio)); } static bool sanity_check_compress_inode(struct inode *inode, From 889293ea1148857fcf3879073d223dd7c47a61fd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:17 +0100 Subject: [PATCH 0867/2411] f2fs: Pass a folio to fill_node_footer_blkaddr() The only caller has a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.h | 6 +++--- fs/f2fs/segment.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 4a9544744e46..539dc7b704c0 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -300,10 +300,10 @@ static inline void copy_node_footer(struct page *dst, struct page *src) memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); } -static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) +static inline void fill_node_footer_blkaddr(struct folio *folio, block_t blkaddr) { - struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_F_SB(folio)); + struct f2fs_node *rn = F2FS_NODE(&folio->page); __u64 cp_ver = cur_cp_version(ckpt); if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9279e06d75ca..04b7dfa51d6d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3852,7 +3852,7 @@ int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct folio *folio, up_write(&sit_i->sentry_lock); if (folio && IS_NODESEG(curseg->seg_type)) { - fill_node_footer_blkaddr(&folio->page, NEXT_FREE_BLKADDR(sbi, curseg)); + fill_node_footer_blkaddr(folio, NEXT_FREE_BLKADDR(sbi, curseg)); f2fs_inode_chksum_set(sbi, folio); } From fddd722e73afadff41f570affea351b970ea23e4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:18 +0100 Subject: [PATCH 0868/2411] f2fs: Pass a folio to get_nid() All callers have a folio so pass it in. Also mark it as const to help the compiler. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 18 +++++++++--------- fs/f2fs/node.h | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1565f105c75d..9657b9f2ecdf 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -649,7 +649,7 @@ static void f2fs_ra_node_pages(struct folio *parent, int start, int n) end = start + n; end = min(end, (int)NIDS_PER_BLOCK); for (i = start; i < end; i++) { - nid = get_nid(&parent->page, i, false); + nid = get_nid(parent, i, false); f2fs_ra_node_page(sbi, nid); } @@ -808,7 +808,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) parent = nfolio[0]; if (level != 0) - nids[1] = get_nid(&parent->page, offset[0], true); + nids[1] = get_nid(parent, offset[0], true); dn->inode_folio = nfolio[0]; dn->inode_folio_locked = true; @@ -859,7 +859,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) } if (i < level) { parent = nfolio[i]; - nids[i + 1] = get_nid(&parent->page, offset[i], false); + nids[i + 1] = get_nid(parent, offset[i], false); } } dn->nid = nids[level]; @@ -1083,7 +1083,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, int i; int idx = depth - 2; - nid[0] = get_nid(&dn->inode_folio->page, offset[0], true); + nid[0] = get_nid(dn->inode_folio, offset[0], true); if (!nid[0]) return 0; @@ -1096,14 +1096,14 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, idx = i - 1; goto fail; } - nid[i + 1] = get_nid(&folios[i]->page, offset[i + 1], false); + nid[i + 1] = get_nid(folios[i], offset[i + 1], false); } f2fs_ra_node_pages(folios[idx], offset[idx + 1], NIDS_PER_BLOCK); /* free direct nodes linked to a partial indirect node */ for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) { - child_nid = get_nid(&folios[idx]->page, i, false); + child_nid = get_nid(folios[idx], i, false); if (!child_nid) continue; dn->nid = child_nid; @@ -1201,7 +1201,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) skip_partial: while (cont) { - dn.nid = get_nid(&folio->page, offset[0], true); + dn.nid = get_nid(folio, offset[0], true); switch (offset[0]) { case NODE_DIR1_BLOCK: case NODE_DIR2_BLOCK: @@ -1233,7 +1233,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) } if (err < 0) goto fail; - if (offset[1] == 0 && get_nid(&folio->page, offset[0], true)) { + if (offset[1] == 0 && get_nid(folio, offset[0], true)) { folio_lock(folio); BUG_ON(!is_node_folio(folio)); set_nid(folio, offset[0], 0, true); @@ -1566,7 +1566,7 @@ struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid) static struct folio *f2fs_get_node_folio_ra(struct folio *parent, int start) { struct f2fs_sb_info *sbi = F2FS_F_SB(parent); - nid_t nid = get_nid(&parent->page, start, false); + nid_t nid = get_nid(parent, start, false); return __get_node_folio(sbi, nid, parent, start, NODE_TYPE_REGULAR); } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 539dc7b704c0..5bcda63fa748 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -380,9 +380,9 @@ static inline int set_nid(struct folio *folio, int off, nid_t nid, bool i) return folio_mark_dirty(folio); } -static inline nid_t get_nid(struct page *p, int off, bool i) +static inline nid_t get_nid(const struct folio *folio, int off, bool i) { - struct f2fs_node *rn = F2FS_NODE(p); + struct f2fs_node *rn = F2FS_NODE(&folio->page); if (i) return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]); From 53987453349bdd64f4897a83a5e7ee89aa9b907b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:19 +0100 Subject: [PATCH 0869/2411] f2fs: Pass a folio to set_cold_node() All callers have a folio so pass it in. Also mark it as const to help the compiler. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 4 ++-- fs/f2fs/node.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 05850cb00299..251526c73930 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -484,7 +484,7 @@ static int do_read_inode(struct inode *inode) /* try to recover cold bit for non-dir inode */ if (!S_ISDIR(inode->i_mode) && !is_cold_node(&node_folio->page)) { f2fs_folio_wait_writeback(node_folio, NODE, true, true); - set_cold_node(&node_folio->page, false); + set_cold_node(node_folio, false); folio_mark_dirty(node_folio); } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9657b9f2ecdf..b410f2d125f3 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1381,7 +1381,7 @@ struct folio *f2fs_new_node_folio(struct dnode_of_data *dn, unsigned int ofs) f2fs_folio_wait_writeback(folio, NODE, true, true); fill_node_footer(&folio->page, dn->nid, dn->inode->i_ino, ofs, true); - set_cold_node(&folio->page, S_ISDIR(dn->inode->i_mode)); + set_cold_node(folio, S_ISDIR(dn->inode->i_mode)); if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); if (folio_mark_dirty(folio)) @@ -2828,7 +2828,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio) if (!folio_test_uptodate(ifolio)) folio_mark_uptodate(ifolio); fill_node_footer(&ifolio->page, ino, ino, 0, true); - set_cold_node(&ifolio->page, false); + set_cold_node(ifolio, false); src = F2FS_INODE(folio); dst = F2FS_INODE(ifolio); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 5bcda63fa748..43137b5fcbf4 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -406,9 +406,9 @@ static inline int is_node(const struct page *page, int type) #define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) #define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) -static inline void set_cold_node(struct page *page, bool is_dir) +static inline void set_cold_node(const struct folio *folio, bool is_dir) { - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_node *rn = F2FS_NODE(&folio->page); unsigned int flag = le32_to_cpu(rn->footer.flag); if (is_dir) From 171a3aebbd48dd1d43cbf3fc8b1be3b6ab7d5836 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:20 +0100 Subject: [PATCH 0870/2411] f2fs: Pass folios to copy_node_footer() The only caller has folios so pass them in. Also mark them as const to help the compiler. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.h | 7 ++++--- fs/f2fs/recovery.c | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 43137b5fcbf4..412ee80afa69 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -293,10 +293,11 @@ static inline void fill_node_footer(struct page *page, nid_t nid, (old_flag & OFFSET_BIT_MASK)); } -static inline void copy_node_footer(struct page *dst, struct page *src) +static inline void copy_node_footer(const struct folio *dst, + const struct folio *src) { - struct f2fs_node *src_rn = F2FS_NODE(src); - struct f2fs_node *dst_rn = F2FS_NODE(dst); + struct f2fs_node *src_rn = F2FS_NODE(&src->page); + struct f2fs_node *dst_rn = F2FS_NODE(&dst->page); memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index dac0d7189b2b..ddfb105ad2bd 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -758,7 +758,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } } - copy_node_footer(&dn.node_folio->page, &folio->page); + copy_node_footer(dn.node_folio, folio); fill_node_footer(&dn.node_folio->page, dn.nid, ni.ino, ofs_of_node(&folio->page), false); folio_mark_dirty(dn.node_folio); From 06bf11829b495211c3c654332fcd126e1cc59227 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:21 +0100 Subject: [PATCH 0871/2411] f2fs: Pass a folio to fill_node_footer() All callers have a folio so pass it in. Also mark it as const to help the compiler. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++-- fs/f2fs/node.h | 4 ++-- fs/f2fs/recovery.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b410f2d125f3..6ac1540d6aab 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1380,7 +1380,7 @@ struct folio *f2fs_new_node_folio(struct dnode_of_data *dn, unsigned int ofs) set_node_addr(sbi, &new_ni, NEW_ADDR, false); f2fs_folio_wait_writeback(folio, NODE, true, true); - fill_node_footer(&folio->page, dn->nid, dn->inode->i_ino, ofs, true); + fill_node_footer(folio, dn->nid, dn->inode->i_ino, ofs, true); set_cold_node(folio, S_ISDIR(dn->inode->i_mode)); if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); @@ -2827,7 +2827,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio) if (!folio_test_uptodate(ifolio)) folio_mark_uptodate(ifolio); - fill_node_footer(&ifolio->page, ino, ino, 0, true); + fill_node_footer(ifolio, ino, ino, 0, true); set_cold_node(ifolio, false); src = F2FS_INODE(folio); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 412ee80afa69..a80be47cd2e5 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -274,10 +274,10 @@ static inline block_t next_blkaddr_of_node(struct folio *node_folio) return le32_to_cpu(rn->footer.next_blkaddr); } -static inline void fill_node_footer(struct page *page, nid_t nid, +static inline void fill_node_footer(const struct folio *folio, nid_t nid, nid_t ino, unsigned int ofs, bool reset) { - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_node *rn = F2FS_NODE(&folio->page); unsigned int old_flag = 0; if (reset) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ddfb105ad2bd..1e038bc98173 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -759,7 +759,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } copy_node_footer(dn.node_folio, folio); - fill_node_footer(&dn.node_folio->page, dn.nid, ni.ino, + fill_node_footer(dn.node_folio, dn.nid, ni.ino, ofs_of_node(&folio->page), false); folio_mark_dirty(dn.node_folio); err: From eca35d6d5a0245ffeb0e80a7d07aab8801b6572a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:22 +0100 Subject: [PATCH 0872/2411] f2fs: Pass a folio to cpver_of_node() All callers have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 +- fs/f2fs/node.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6ac1540d6aab..1e7bec223dbe 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1485,7 +1485,7 @@ static int sanity_check_node_footer(struct f2fs_sb_info *sbi, f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, " "node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", ntype, nid, nid_of_node(folio), ino_of_node(folio), - ofs_of_node(page), cpver_of_node(page), + ofs_of_node(page), cpver_of_node(folio), next_blkaddr_of_node(folio)); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index a80be47cd2e5..78a9a411fe77 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -262,9 +262,9 @@ static inline unsigned int ofs_of_node(const struct page *node_page) return flag >> OFFSET_BIT_SHIFT; } -static inline __u64 cpver_of_node(const struct page *node_page) +static inline __u64 cpver_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(node_page); + struct f2fs_node *rn = F2FS_NODE(&node_folio->page); return le64_to_cpu(rn->footer.cp_ver); } @@ -321,12 +321,12 @@ static inline bool is_recoverable_dnode(const struct folio *folio) /* Don't care crc part, if fsck.f2fs sets it. */ if (__is_set_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG)) - return (cp_ver << 32) == (cpver_of_node(&folio->page) << 32); + return (cp_ver << 32) == (cpver_of_node(folio) << 32); if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) cp_ver |= (cur_cp_crc(ckpt) << 32); - return cp_ver == cpver_of_node(&folio->page); + return cp_ver == cpver_of_node(folio); } /* From 447e4fb5e8800648c6c7b8edaa90ad3f8919ce0b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:23 +0100 Subject: [PATCH 0873/2411] f2fs: Pass a folio to f2fs_recover_xattr_data() One caller passes NULL and the other caller already has a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 6 +++--- fs/f2fs/recovery.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cb02452d7fc0..69b766723960 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3790,7 +3790,7 @@ void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio); -int f2fs_recover_xattr_data(struct inode *inode, struct page *page); +int f2fs_recover_xattr_data(struct inode *inode, struct folio *folio); int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio); int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1e7bec223dbe..9c4052282c8c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2753,7 +2753,7 @@ int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio) return 0; } -int f2fs_recover_xattr_data(struct inode *inode, struct page *page) +int f2fs_recover_xattr_data(struct inode *inode, struct folio *folio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; @@ -2791,8 +2791,8 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page) f2fs_update_inode_page(inode); /* 3: update and set xattr node page dirty */ - if (page) { - memcpy(F2FS_NODE(&xfolio->page), F2FS_NODE(page), + if (folio) { + memcpy(F2FS_NODE(&xfolio->page), F2FS_NODE(&folio->page), VALID_XATTR_BLOCK_SIZE); folio_mark_dirty(xfolio); } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 1e038bc98173..7d63a74e9ca6 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -633,7 +633,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, if (err) goto out; } else if (f2fs_has_xattr_block(ofs_of_node(&folio->page))) { - err = f2fs_recover_xattr_data(inode, &folio->page); + err = f2fs_recover_xattr_data(inode, folio); if (!err) recovered++; goto out; From ac576da7c950984c7bbb71c1b557187c58758d16 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:24 +0100 Subject: [PATCH 0874/2411] f2fs: Pass a folio to is_fsync_dnode() Both callers have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 +- fs/f2fs/node.h | 2 +- fs/f2fs/recovery.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9c4052282c8c..cc4bf8525cb5 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1744,7 +1744,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted fio.old_blkaddr = ni.blk_addr; f2fs_do_write_node_page(nid, &fio); - set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(&folio->page)); + set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(folio)); dec_page_count(sbi, F2FS_DIRTY_NODES); f2fs_up_read(&sbi->node_write); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 78a9a411fe77..b8ec837f423c 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -404,7 +404,7 @@ static inline int is_node(const struct page *page, int type) } #define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) -#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) +#define is_fsync_dnode(folio) is_node(&folio->page, FSYNC_BIT_SHIFT) #define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) static inline void set_cold_node(const struct folio *folio, bool is_dir) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 7d63a74e9ca6..ac8906bdcf07 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -429,7 +429,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, break; } - if (!is_fsync_dnode(&folio->page)) + if (!is_fsync_dnode(folio)) goto next; entry = get_fsync_inode(head, ino_of_node(folio)); From 4aecdc80b3a6207a9e477857bf9a0f2095addc09 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:25 +0100 Subject: [PATCH 0875/2411] f2fs: Pass a folio to is_dent_dnode() Both callers have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.h | 2 +- fs/f2fs/recovery.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index b8ec837f423c..b54c2d520d1e 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -405,7 +405,7 @@ static inline int is_node(const struct page *page, int type) #define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) #define is_fsync_dnode(folio) is_node(&folio->page, FSYNC_BIT_SHIFT) -#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) +#define is_dent_dnode(folio) is_node(&folio->page, DENT_BIT_SHIFT) static inline void set_cold_node(const struct folio *folio, bool is_dir) { diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ac8906bdcf07..da4b733aeaf2 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -438,7 +438,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, if (!check_only && IS_INODE(&folio->page) && - is_dent_dnode(&folio->page)) { + is_dent_dnode(folio)) { err = f2fs_recover_inode_page(sbi, folio); if (err) { f2fs_folio_put(folio, true); @@ -463,7 +463,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, } entry->blkaddr = blkaddr; - if (IS_INODE(&folio->page) && is_dent_dnode(&folio->page)) + if (IS_INODE(&folio->page) && is_dent_dnode(folio)) entry->last_dentry = blkaddr; next: /* check next segment */ From d342b7adad71e5a4a609fedd673e964cfad91822 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:26 +0100 Subject: [PATCH 0876/2411] f2fs: Add fio->folio Put fio->page insto a union with fio->folio. This lets us remove a lot of folio->page and page->folio conversions. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ++-- fs/f2fs/data.c | 17 ++++++++--------- fs/f2fs/f2fs.h | 7 +++++-- fs/f2fs/gc.c | 6 +++--- fs/f2fs/inline.c | 2 +- fs/f2fs/node.c | 4 ++-- fs/f2fs/segment.c | 7 +++---- 7 files changed, 24 insertions(+), 23 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f149ec28aefd..07ca10c66649 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -82,7 +82,7 @@ static struct folio *__get_meta_folio(struct f2fs_sb_info *sbi, pgoff_t index, if (folio_test_uptodate(folio)) goto out; - fio.page = &folio->page; + fio.folio = folio; err = f2fs_submit_page_bio(&fio); if (err) { @@ -309,7 +309,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, continue; } - fio.page = &folio->page; + fio.folio = folio; err = f2fs_submit_page_bio(&fio); f2fs_folio_put(folio, err ? true : false); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c4da33de0d1d..0e261caf2f91 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -419,7 +419,6 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio) { unsigned int temp_mask = GENMASK(NR_TEMP_TYPE - 1, 0); - struct folio *fio_folio = page_folio(fio->page); unsigned int fua_flag, meta_flag, io_flag; blk_opf_t op_flags = 0; @@ -447,7 +446,7 @@ static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio) op_flags |= REQ_FUA; if (fio->type == DATA && - F2FS_I(fio_folio->mapping->host)->ioprio_hint == F2FS_IOPRIO_WRITE) + F2FS_I(fio->folio->mapping->host)->ioprio_hint == F2FS_IOPRIO_WRITE) op_flags |= REQ_PRIO; return op_flags; @@ -691,7 +690,7 @@ void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) int f2fs_submit_page_bio(struct f2fs_io_info *fio) { struct bio *bio; - struct folio *fio_folio = page_folio(fio->page); + struct folio *fio_folio = fio->folio; struct folio *data_folio = fio->encrypted_page ? page_folio(fio->encrypted_page) : fio_folio; @@ -779,7 +778,7 @@ static void del_bio_entry(struct bio_entry *be) static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, struct page *page) { - struct folio *fio_folio = page_folio(fio->page); + struct folio *fio_folio = fio->folio; struct f2fs_sb_info *sbi = fio->sbi; enum temp_type temp; bool found = false; @@ -888,7 +887,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) struct bio *bio = *fio->bio; struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; - struct folio *folio = page_folio(fio->page); + struct folio *folio = fio->folio; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) @@ -1012,12 +1011,12 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) } if (fio->io_wbc) - wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page), - PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, fio->folio, + folio_size(fio->folio)); io->last_block_in_bio = fio->new_blkaddr; - trace_f2fs_submit_folio_write(page_folio(fio->page), fio); + trace_f2fs_submit_folio_write(fio->folio, fio); #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && btype < META && is_end_zone_blkaddr(sbi, fio->new_blkaddr)) { @@ -2650,7 +2649,7 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio) int f2fs_do_write_data_page(struct f2fs_io_info *fio) { - struct folio *folio = page_folio(fio->page); + struct folio *folio = fio->folio; struct inode *inode = folio->mapping->host; struct dnode_of_data dn; struct node_info ni; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 69b766723960..0987f868cdd9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1240,7 +1240,10 @@ struct f2fs_io_info { blk_opf_t op_flags; /* req_flag_bits */ block_t new_blkaddr; /* new block address to be written */ block_t old_blkaddr; /* old block address before Cow */ - struct page *page; /* page to be written */ + union { + struct page *page; /* page to be written */ + struct folio *folio; + }; struct page *encrypted_page; /* encrypted page */ struct page *compressed_page; /* compressed page */ struct list_head list; /* serialize IOs */ @@ -3892,7 +3895,7 @@ unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi, static inline struct inode *fio_inode(struct f2fs_io_info *fio) { - return page_folio(fio->page)->mapping->host; + return fio->folio->mapping->host; } #define DEF_FRAGMENT_SIZE 4 diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index edeae4ee137c..f82430759cf7 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1249,7 +1249,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) } got_it: /* read folio */ - fio.page = &folio->page; + fio.folio = folio; fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; /* @@ -1353,7 +1353,7 @@ static int move_data_block(struct inode *inode, block_t bidx, goto put_out; /* read page */ - fio.page = &folio->page; + fio.folio = folio; fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; if (lfs_mode) @@ -1483,7 +1483,7 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC, .old_blkaddr = NULL_ADDR, - .page = &folio->page, + .folio = folio, .encrypted_page = NULL, .need_lock = LOCK_REQ, .io_type = FS_GC_DATA_IO, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 4c636a8043f8..9851310cdb87 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -150,7 +150,7 @@ int f2fs_convert_inline_folio(struct dnode_of_data *dn, struct folio *folio) .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_PRIO, - .page = &folio->page, + .folio = folio, .encrypted_page = NULL, .io_type = FS_DATA_IO, }; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index cc4bf8525cb5..0574f0456305 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1413,7 +1413,7 @@ static int read_node_folio(struct folio *folio, blk_opf_t op_flags) .type = NODE, .op = REQ_OP_READ, .op_flags = op_flags, - .page = &folio->page, + .folio = folio, .encrypted_page = NULL, }; int err; @@ -1677,7 +1677,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted .type = NODE, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), - .page = &folio->page, + .folio = folio, .encrypted_page = NULL, .submitted = 0, .io_type = io_type, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 04b7dfa51d6d..965c9f55559c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3666,8 +3666,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (file_is_cold(inode) || f2fs_need_compress_data(inode)) return CURSEG_COLD_DATA; - type = __get_age_segment_type(inode, - page_folio(fio->page)->index); + type = __get_age_segment_type(inode, fio->folio->index); if (type != NO_CHECK_TYPE) return type; @@ -3932,7 +3931,7 @@ static int log_type_to_seg_type(enum log_type type) static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { - struct folio *folio = page_folio(fio->page); + struct folio *folio = fio->folio; enum log_type type = __get_segment_type(fio); int seg_type = log_type_to_seg_type(type); bool keep_order = (f2fs_lfs_mode(fio->sbi) && @@ -3979,7 +3978,7 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio, .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, .old_blkaddr = folio->index, .new_blkaddr = folio->index, - .page = folio_page(folio, 0), + .folio = folio, .encrypted_page = NULL, .in_list = 0, }; From 79d976a2e73b103762942fcf46a9bbe3ecc9d699 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:27 +0100 Subject: [PATCH 0877/2411] f2fs: Use folio_unlock() in f2fs_write_compressed_pages() Remove a call to compound_head() by replacing a call to unlock_page() with a call to folio_unlock(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 8cbb8038bc72..5be1a4396f80 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1419,7 +1419,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, (*submitted)++; unlock_continue: inode_dec_dirty_pages(cc->inode); - unlock_page(fio.page); + folio_unlock(fio.folio); } if (fio.compr_blocks) From 1fd0dffdb446c780a555d9b792408560a5c693d6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:28 +0100 Subject: [PATCH 0878/2411] f2fs: Pass a folio to is_cold_node() All callers now have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 12 ++++++------ fs/f2fs/node.h | 2 +- fs/f2fs/segment.c | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 251526c73930..a8f64d206d19 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -482,7 +482,7 @@ static int do_read_inode(struct inode *inode) __recover_inline_status(inode, node_folio); /* try to recover cold bit for non-dir inode */ - if (!S_ISDIR(inode->i_mode) && !is_cold_node(&node_folio->page)) { + if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_folio)) { f2fs_folio_wait_writeback(node_folio, NODE, true, true); set_cold_node(node_folio, false); folio_mark_dirty(node_folio); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0574f0456305..6fc0a8de7158 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -313,7 +313,7 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio) { return is_node_folio(folio) && IS_DNODE(&folio->page) && - is_cold_node(&folio->page); + is_cold_node(folio); } void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi) @@ -1631,7 +1631,7 @@ static struct folio *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) return ERR_PTR(-EIO); } - if (!IS_DNODE(&folio->page) || !is_cold_node(&folio->page)) + if (!IS_DNODE(&folio->page) || !is_cold_node(folio)) continue; if (ino_of_node(folio) != ino) continue; @@ -1702,7 +1702,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) && wbc->sync_mode == WB_SYNC_NONE && - IS_DNODE(&folio->page) && is_cold_node(&folio->page)) + IS_DNODE(&folio->page) && is_cold_node(folio)) goto redirty_out; /* get old block addr of this node page */ @@ -1840,7 +1840,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, goto out; } - if (!IS_DNODE(&folio->page) || !is_cold_node(&folio->page)) + if (!IS_DNODE(&folio->page) || !is_cold_node(folio)) continue; if (ino_of_node(folio) != ino) continue; @@ -2043,10 +2043,10 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, if (step == 0 && IS_DNODE(&folio->page)) continue; if (step == 1 && (!IS_DNODE(&folio->page) || - is_cold_node(&folio->page))) + is_cold_node(folio))) continue; if (step == 2 && (!IS_DNODE(&folio->page) || - !is_cold_node(&folio->page))) + !is_cold_node(folio))) continue; lock_node: if (wbc->sync_mode == WB_SYNC_ALL) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index b54c2d520d1e..6daacadb0ee0 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -403,7 +403,7 @@ static inline int is_node(const struct page *page, int type) return le32_to_cpu(rn->footer.flag) & BIT(type); } -#define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) +#define is_cold_node(folio) is_node(&folio->page, COLD_BIT_SHIFT) #define is_fsync_dnode(folio) is_node(&folio->page, FSYNC_BIT_SHIFT) #define is_dent_dnode(folio) is_node(&folio->page, DENT_BIT_SHIFT) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 965c9f55559c..72b3448e88ce 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3620,7 +3620,7 @@ static int __get_segment_type_4(struct f2fs_io_info *fio) else return CURSEG_COLD_DATA; } else { - if (IS_DNODE(fio->page) && is_cold_node(fio->page)) + if (IS_DNODE(fio->page) && is_cold_node(fio->folio)) return CURSEG_WARM_NODE; else return CURSEG_COLD_NODE; @@ -3678,7 +3678,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) inode->i_write_hint); } else { if (IS_DNODE(fio->page)) - return is_cold_node(fio->page) ? CURSEG_WARM_NODE : + return is_cold_node(fio->folio) ? CURSEG_WARM_NODE : CURSEG_HOT_NODE; return CURSEG_COLD_NODE; } From 5bba2a22494cf47a1e0021457ca2d9a6722d90fe Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:29 +0100 Subject: [PATCH 0879/2411] f2fs: Pass a folio to is_node() All three callers now have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 6daacadb0ee0..92e73cff0d21 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -397,15 +397,15 @@ static inline nid_t get_nid(const struct folio *folio, int off, bool i) * - Mark cold data pages in page cache */ -static inline int is_node(const struct page *page, int type) +static inline int is_node(const struct folio *folio, int type) { - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_node *rn = F2FS_NODE(&folio->page); return le32_to_cpu(rn->footer.flag) & BIT(type); } -#define is_cold_node(folio) is_node(&folio->page, COLD_BIT_SHIFT) -#define is_fsync_dnode(folio) is_node(&folio->page, FSYNC_BIT_SHIFT) -#define is_dent_dnode(folio) is_node(&folio->page, DENT_BIT_SHIFT) +#define is_cold_node(folio) is_node(folio, COLD_BIT_SHIFT) +#define is_fsync_dnode(folio) is_node(folio, FSYNC_BIT_SHIFT) +#define is_dent_dnode(folio) is_node(folio, DENT_BIT_SHIFT) static inline void set_cold_node(const struct folio *folio, bool is_dir) { From fb92a5c9f89a4e5337768c7d2f374669e0ab454b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:30 +0100 Subject: [PATCH 0880/2411] f2fs: Pass a folio to IS_DNODE() All callers now have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 15 +++++++-------- fs/f2fs/node.h | 4 ++-- fs/f2fs/segment.c | 4 ++-- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6fc0a8de7158..dbc45b856ffa 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -312,8 +312,7 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio) { - return is_node_folio(folio) && IS_DNODE(&folio->page) && - is_cold_node(folio); + return is_node_folio(folio) && IS_DNODE(folio) && is_cold_node(folio); } void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi) @@ -1631,7 +1630,7 @@ static struct folio *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) return ERR_PTR(-EIO); } - if (!IS_DNODE(&folio->page) || !is_cold_node(folio)) + if (!IS_DNODE(folio) || !is_cold_node(folio)) continue; if (ino_of_node(folio) != ino) continue; @@ -1702,7 +1701,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) && wbc->sync_mode == WB_SYNC_NONE && - IS_DNODE(&folio->page) && is_cold_node(folio)) + IS_DNODE(folio) && is_cold_node(folio)) goto redirty_out; /* get old block addr of this node page */ @@ -1840,7 +1839,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, goto out; } - if (!IS_DNODE(&folio->page) || !is_cold_node(folio)) + if (!IS_DNODE(folio) || !is_cold_node(folio)) continue; if (ino_of_node(folio) != ino) continue; @@ -2040,12 +2039,12 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, * 1. dentry dnodes * 2. file dnodes */ - if (step == 0 && IS_DNODE(&folio->page)) + if (step == 0 && IS_DNODE(folio)) continue; - if (step == 1 && (!IS_DNODE(&folio->page) || + if (step == 1 && (!IS_DNODE(folio) || is_cold_node(folio))) continue; - if (step == 2 && (!IS_DNODE(&folio->page) || + if (step == 2 && (!IS_DNODE(folio) || !is_cold_node(folio))) continue; lock_node: diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 92e73cff0d21..1b57b61f911b 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -350,9 +350,9 @@ static inline bool is_recoverable_dnode(const struct folio *folio) * `- indirect node ((6 + 2N) + (N - 1)(N + 1)) * `- direct node */ -static inline bool IS_DNODE(const struct page *node_page) +static inline bool IS_DNODE(const struct folio *node_folio) { - unsigned int ofs = ofs_of_node(node_page); + unsigned int ofs = ofs_of_node(&node_folio->page); if (f2fs_has_xattr_block(ofs)) return true; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 72b3448e88ce..2a6dcfba911f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3620,7 +3620,7 @@ static int __get_segment_type_4(struct f2fs_io_info *fio) else return CURSEG_COLD_DATA; } else { - if (IS_DNODE(fio->page) && is_cold_node(fio->folio)) + if (IS_DNODE(fio->folio) && is_cold_node(fio->folio)) return CURSEG_WARM_NODE; else return CURSEG_COLD_NODE; @@ -3677,7 +3677,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode), inode->i_write_hint); } else { - if (IS_DNODE(fio->page)) + if (IS_DNODE(fio->folio)) return is_cold_node(fio->folio) ? CURSEG_WARM_NODE : CURSEG_HOT_NODE; return CURSEG_COLD_NODE; From 6d3a7f6589fec21addb4bdff6289283dfdf55af9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:31 +0100 Subject: [PATCH 0881/2411] f2fs: Pass a folio to ofs_of_node() All callers now have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 2 +- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 2 +- fs/f2fs/node.c | 4 ++-- fs/f2fs/node.h | 6 +++--- fs/f2fs/recovery.c | 14 +++++++------- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index a6eb3d73231e..199c1e7a83ef 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -934,7 +934,7 @@ static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type typ if (!__may_extent_tree(dn->inode, type)) return; - ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(&dn->node_folio->page), dn->inode) + + ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio), dn->inode) + dn->ofs_in_node; ei.len = 1; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index bd835c4f874a..04a5a1089320 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -707,7 +707,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) * once we invalidate valid blkaddr in range [ofs, ofs + count], * we will invalidate all blkaddr in the whole range. */ - fofs = f2fs_start_bidx_of_node(ofs_of_node(&dn->node_folio->page), + fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio), dn->inode) + ofs; f2fs_update_read_extent_cache_range(dn, fofs, 0, len); f2fs_update_age_extent_cache_range(dn, fofs, len); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index f82430759cf7..781b955cbb77 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1177,7 +1177,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return false; } - *nofs = ofs_of_node(&node_folio->page); + *nofs = ofs_of_node(node_folio); source_blkaddr = data_blkaddr(NULL, node_folio, ofs_in_node); f2fs_folio_put(node_folio, true); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index dbc45b856ffa..b56e627e0b56 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1479,12 +1479,12 @@ static int sanity_check_node_footer(struct f2fs_sb_info *sbi, if (unlikely(nid != nid_of_node(folio) || (ntype == NODE_TYPE_INODE && !IS_INODE(page)) || (ntype == NODE_TYPE_XATTR && - !f2fs_has_xattr_block(ofs_of_node(page))) || + !f2fs_has_xattr_block(ofs_of_node(folio))) || time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))) { f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, " "node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", ntype, nid, nid_of_node(folio), ino_of_node(folio), - ofs_of_node(page), cpver_of_node(folio), + ofs_of_node(folio), cpver_of_node(folio), next_blkaddr_of_node(folio)); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 1b57b61f911b..70a58c4052fe 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -255,9 +255,9 @@ static inline nid_t nid_of_node(const struct folio *node_folio) return le32_to_cpu(rn->footer.nid); } -static inline unsigned int ofs_of_node(const struct page *node_page) +static inline unsigned int ofs_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(node_page); + struct f2fs_node *rn = F2FS_NODE(&node_folio->page); unsigned flag = le32_to_cpu(rn->footer.flag); return flag >> OFFSET_BIT_SHIFT; } @@ -352,7 +352,7 @@ static inline bool is_recoverable_dnode(const struct folio *folio) */ static inline bool IS_DNODE(const struct folio *node_folio) { - unsigned int ofs = ofs_of_node(&node_folio->page); + unsigned int ofs = ofs_of_node(node_folio); if (f2fs_has_xattr_block(ofs)) return true; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index da4b733aeaf2..5a45d0d1f05c 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -552,7 +552,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, if (IS_ERR(node_folio)) return PTR_ERR(node_folio); - offset = ofs_of_node(&node_folio->page); + offset = ofs_of_node(node_folio); ino = ino_of_node(node_folio); f2fs_folio_put(node_folio, true); @@ -632,7 +632,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, err = f2fs_recover_inline_xattr(inode, folio); if (err) goto out; - } else if (f2fs_has_xattr_block(ofs_of_node(&folio->page))) { + } else if (f2fs_has_xattr_block(ofs_of_node(folio))) { err = f2fs_recover_xattr_data(inode, folio); if (!err) recovered++; @@ -648,7 +648,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } /* step 3: recover data indices */ - start = f2fs_start_bidx_of_node(ofs_of_node(&folio->page), inode); + start = f2fs_start_bidx_of_node(ofs_of_node(folio), inode); end = start + ADDRS_PER_PAGE(&folio->page, inode); set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -670,10 +670,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, f2fs_bug_on(sbi, ni.ino != ino_of_node(folio)); - if (ofs_of_node(&dn.node_folio->page) != ofs_of_node(&folio->page)) { + if (ofs_of_node(dn.node_folio) != ofs_of_node(folio)) { f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u", - inode->i_ino, ofs_of_node(&dn.node_folio->page), - ofs_of_node(&folio->page)); + inode->i_ino, ofs_of_node(dn.node_folio), + ofs_of_node(folio)); err = -EFSCORRUPTED; f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); goto err; @@ -760,7 +760,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, copy_node_footer(dn.node_folio, folio); fill_node_footer(dn.node_folio, dn.nid, ni.ino, - ofs_of_node(&folio->page), false); + ofs_of_node(folio), false); folio_mark_dirty(dn.node_folio); err: f2fs_put_dnode(&dn); From e8f46b2c3aef32a4efdf5459b26c1f7d96d81826 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:32 +0100 Subject: [PATCH 0882/2411] f2fs: Pass a folio to get_dnode_base() The only caller already has a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0987f868cdd9..3933327d8cc3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3039,20 +3039,20 @@ static inline __le32 *blkaddr_in_node(struct f2fs_node *node) static inline int f2fs_has_extra_attr(struct inode *inode); static inline unsigned int get_dnode_base(struct inode *inode, - struct page *node_page) + struct folio *node_folio) { - if (!IS_INODE(node_page)) + if (!IS_INODE(&node_folio->page)) return 0; return inode ? get_extra_isize(inode) : - offset_in_addr(&F2FS_NODE(node_page)->i); + offset_in_addr(&F2FS_NODE(&node_folio->page)->i); } static inline __le32 *get_dnode_addr(struct inode *inode, struct folio *node_folio) { return blkaddr_in_node(F2FS_NODE(&node_folio->page)) + - get_dnode_base(inode, &node_folio->page); + get_dnode_base(inode, node_folio); } static inline block_t data_blkaddr(struct inode *inode, From ad38574a8e8223361e265973fbd87013ea058c5d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:33 +0100 Subject: [PATCH 0883/2411] f2fs: Pass a folio to ADDRS_PER_PAGE() All callers now have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/file.c | 18 +++++++++--------- fs/f2fs/recovery.c | 4 ++-- fs/f2fs/segment.c | 2 +- include/linux/f2fs_fs.h | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0e261caf2f91..8a2414ce39ff 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1588,7 +1588,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) start_pgofs = pgofs; prealloc = 0; last_ofs_in_node = ofs_in_node = dn.ofs_in_node; - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); next_block: blkaddr = f2fs_data_blkaddr(&dn); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 04a5a1089320..60618c52ba50 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -489,7 +489,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) } } - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); /* find data/hole in dnode block */ for (; dn.ofs_in_node < end_offset; @@ -814,7 +814,7 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) goto out; } - count = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + count = ADDRS_PER_PAGE(dn.node_folio, inode); count -= dn.ofs_in_node; f2fs_bug_on(sbi, count < 0); @@ -1233,7 +1233,7 @@ int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) return err; } - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); count = min(end_offset - dn.ofs_in_node, pg_end - pg_start); f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset); @@ -1332,7 +1332,7 @@ static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, goto next; } - done = min((pgoff_t)ADDRS_PER_PAGE(&dn.node_folio->page, inode) - + done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, inode) - dn.ofs_in_node, len); for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { *blkaddr = f2fs_data_blkaddr(&dn); @@ -1421,7 +1421,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, } ilen = min((pgoff_t) - ADDRS_PER_PAGE(&dn.node_folio->page, dst_inode) - + ADDRS_PER_PAGE(dn.node_folio, dst_inode) - dn.ofs_in_node, len - i); do { dn.data_blkaddr = f2fs_data_blkaddr(&dn); @@ -1717,7 +1717,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, goto out; } - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); end = min(pg_end, end_offset - dn.ofs_in_node + index); ret = f2fs_do_zero_range(&dn, index, end); @@ -3885,7 +3885,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) break; } - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); count = round_up(count, fi->i_cluster_size); @@ -4063,7 +4063,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) break; } - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); count = round_up(count, fi->i_cluster_size); @@ -4227,7 +4227,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) goto out; } - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); count = min(end_offset - dn.ofs_in_node, pg_end - index); for (i = 0; i < count; i++, index++, dn.ofs_in_node++) { struct block_device *cur_bdev; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 5a45d0d1f05c..894b27b0329d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -527,7 +527,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, nid = le32_to_cpu(sum.nid); ofs_in_node = le16_to_cpu(sum.ofs_in_node); - max_addrs = ADDRS_PER_PAGE(&dn->node_folio->page, dn->inode); + max_addrs = ADDRS_PER_PAGE(dn->node_folio, dn->inode); if (ofs_in_node >= max_addrs) { f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%lu, nid:%u, max:%u", ofs_in_node, dn->inode->i_ino, nid, max_addrs); @@ -649,7 +649,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, /* step 3: recover data indices */ start = f2fs_start_bidx_of_node(ofs_of_node(folio), inode); - end = start + ADDRS_PER_PAGE(&folio->page, inode); + end = start + ADDRS_PER_PAGE(folio, inode); set_new_dnode(&dn, inode, NULL, NULL, 0); retry_dn: diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2a6dcfba911f..909637873ff7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -334,7 +334,7 @@ static int __f2fs_commit_atomic_write(struct inode *inode) goto next; } - blen = min((pgoff_t)ADDRS_PER_PAGE(&dn.node_folio->page, cow_inode), + blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, cow_inode), len); index = off; for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) { diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 5206d63b3386..25857877eaec 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -268,7 +268,7 @@ struct node_footer { /* Node IDs in an Indirect Block */ #define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) -#define ADDRS_PER_PAGE(page, inode) (addrs_per_page(inode, IS_INODE(page))) +#define ADDRS_PER_PAGE(folio, inode) (addrs_per_page(inode, IS_INODE(&folio->page))) #define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) #define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) From a5f3be6e652a7beaaf6c482bc013b64129a5d239 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:34 +0100 Subject: [PATCH 0884/2411] f2fs: Pass a folio to IS_INODE() All callers now have a folio so pass it in. Also make it const to help the compiler. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 +++--- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 2 +- fs/f2fs/inline.c | 2 +- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 14 ++++++-------- fs/f2fs/recovery.c | 8 ++++---- include/linux/f2fs_fs.h | 2 +- 8 files changed, 18 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3933327d8cc3..09ddc0626dfe 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3019,9 +3019,9 @@ static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, #define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) -static inline bool IS_INODE(struct page *page) +static inline bool IS_INODE(const struct folio *folio) { - struct f2fs_node *p = F2FS_NODE(page); + struct f2fs_node *p = F2FS_NODE(&folio->page); return RAW_IS_INODE(p); } @@ -3041,7 +3041,7 @@ static inline int f2fs_has_extra_attr(struct inode *inode); static inline unsigned int get_dnode_base(struct inode *inode, struct folio *node_folio) { - if (!IS_INODE(&node_folio->page)) + if (!IS_INODE(node_folio)) return 0; return inode ? get_extra_isize(inode) : diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 60618c52ba50..36b32757d5b9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -819,7 +819,7 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) count -= dn.ofs_in_node; f2fs_bug_on(sbi, count < 0); - if (dn.ofs_in_node || IS_INODE(&dn.node_folio->page)) { + if (dn.ofs_in_node || IS_INODE(dn.node_folio)) { f2fs_truncate_data_blocks_range(&dn, count); free_from += count; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 781b955cbb77..c1d4ecbd2505 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1162,7 +1162,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return false; } - if (IS_INODE(&node_folio->page)) { + if (IS_INODE(node_folio)) { base = offset_in_addr(F2FS_INODE(node_folio)); max_addrs = DEF_ADDRS_PER_INODE; } else { diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 9851310cdb87..51adc43d5a5c 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -305,7 +305,7 @@ int f2fs_recover_inline_data(struct inode *inode, struct folio *nfolio) * x o -> remove data blocks, and then recover inline_data * x x -> recover data blocks */ - if (IS_INODE(&nfolio->page)) + if (IS_INODE(nfolio)) ri = F2FS_INODE(nfolio); if (f2fs_has_inline_data(inode) && diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index a8f64d206d19..dd3b43c24831 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -124,7 +124,7 @@ bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio) if (!f2fs_sb_has_inode_chksum(sbi)) return false; - if (!IS_INODE(&folio->page) || !(ri->i_inline & F2FS_EXTRA_ATTR)) + if (!IS_INODE(folio) || !(ri->i_inline & F2FS_EXTRA_ATTR)) return false; if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize), diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b56e627e0b56..908a1eb9c415 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -973,7 +973,7 @@ static int truncate_dnode(struct dnode_of_data *dn) else if (IS_ERR(folio)) return PTR_ERR(folio); - if (IS_INODE(&folio->page) || ino_of_node(folio) != dn->inode->i_ino) { + if (IS_INODE(folio) || ino_of_node(folio) != dn->inode->i_ino) { f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u", dn->inode->i_ino, dn->nid, ino_of_node(folio)); set_sbi_flag(sbi, SBI_NEED_FSCK); @@ -1474,10 +1474,8 @@ static int sanity_check_node_footer(struct f2fs_sb_info *sbi, struct folio *folio, pgoff_t nid, enum node_type ntype) { - struct page *page = &folio->page; - if (unlikely(nid != nid_of_node(folio) || - (ntype == NODE_TYPE_INODE && !IS_INODE(page)) || + (ntype == NODE_TYPE_INODE && !IS_INODE(folio)) || (ntype == NODE_TYPE_XATTR && !f2fs_has_xattr_block(ofs_of_node(folio))) || time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))) { @@ -1867,7 +1865,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, if (!atomic || folio == last_folio) { set_fsync_mark(folio, 1); percpu_counter_inc(&sbi->rf_node_block_count); - if (IS_INODE(&folio->page)) { + if (IS_INODE(folio)) { if (is_inode_flag_set(inode, FI_DIRTY_INODE)) f2fs_update_inode(inode, folio); @@ -1976,7 +1974,7 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi) for (i = 0; i < nr_folios; i++) { struct folio *folio = fbatch.folios[i]; - if (!IS_INODE(&folio->page)) + if (!IS_INODE(folio)) continue; folio_lock(folio); @@ -2077,7 +2075,7 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, } /* flush dirty inode */ - if (IS_INODE(&folio->page) && flush_dirty_inode(folio)) + if (IS_INODE(folio) && flush_dirty_inode(folio)) goto lock_node; write_node: f2fs_folio_wait_writeback(folio, NODE, true, true); @@ -2213,7 +2211,7 @@ static bool f2fs_dirty_node_folio(struct address_space *mapping, if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); #ifdef CONFIG_F2FS_CHECK_FS - if (IS_INODE(&folio->page)) + if (IS_INODE(folio)) f2fs_inode_chksum_set(F2FS_M_SB(mapping), folio); #endif if (filemap_dirty_folio(mapping, folio)) { diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 894b27b0329d..4cb3a91801b4 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -437,7 +437,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, bool quota_inode = false; if (!check_only && - IS_INODE(&folio->page) && + IS_INODE(folio) && is_dent_dnode(folio)) { err = f2fs_recover_inode_page(sbi, folio); if (err) { @@ -463,7 +463,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, } entry->blkaddr = blkaddr; - if (IS_INODE(&folio->page) && is_dent_dnode(folio)) + if (IS_INODE(folio) && is_dent_dnode(folio)) entry->last_dentry = blkaddr; next: /* check next segment */ @@ -628,7 +628,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, int err = 0, recovered = 0; /* step 1: recover xattr */ - if (IS_INODE(&folio->page)) { + if (IS_INODE(folio)) { err = f2fs_recover_inline_xattr(inode, folio); if (err) goto out; @@ -821,7 +821,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, * In this case, we can lose the latest inode(x). * So, call recover_inode for the inode update. */ - if (IS_INODE(&folio->page)) { + if (IS_INODE(folio)) { err = recover_inode(entry->inode, folio); if (err) { f2fs_folio_put(folio, true); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 25857877eaec..2f8b8bfc0e73 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -268,7 +268,7 @@ struct node_footer { /* Node IDs in an Indirect Block */ #define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) -#define ADDRS_PER_PAGE(folio, inode) (addrs_per_page(inode, IS_INODE(&folio->page))) +#define ADDRS_PER_PAGE(folio, inode) (addrs_per_page(inode, IS_INODE(folio))) #define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) #define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) From 4ecaf580ee3520265350d0433755dc080f118afa Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:35 +0100 Subject: [PATCH 0885/2411] f2fs: Add folio counterparts to page_private_flags functions Name these new functions folio_test_f2fs_*(), folio_set_f2fs_*() and folio_clear_f2fs_*(). Convert all callers which currently have a folio and cast back to a page. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ++-- fs/f2fs/data.c | 12 ++++++------ fs/f2fs/f2fs.h | 28 ++++++++++++++++++++++++++++ fs/f2fs/file.c | 6 +++--- fs/f2fs/gc.c | 6 +++--- fs/f2fs/inline.c | 4 ++-- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 10 +++++----- 8 files changed, 50 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 07ca10c66649..db3831f7f2f5 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -485,7 +485,7 @@ static bool f2fs_dirty_meta_folio(struct address_space *mapping, folio_mark_uptodate(folio); if (filemap_dirty_folio(mapping, folio)) { inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_META); - set_page_private_reference(&folio->page); + folio_set_f2fs_reference(folio); return true; } return false; @@ -1045,7 +1045,7 @@ void f2fs_update_dirty_folio(struct inode *inode, struct folio *folio) inode_inc_dirty_pages(inode); spin_unlock(&sbi->inode_lock[type]); - set_page_private_reference(&folio->page); + folio_set_f2fs_reference(folio); } void f2fs_remove_dirty_inode(struct inode *inode) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8a2414ce39ff..f3e11f5672ec 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -360,7 +360,7 @@ static void f2fs_write_end_io(struct bio *bio) dec_page_count(sbi, type); if (f2fs_in_warm_node_list(sbi, folio)) f2fs_del_fsync_node_entry(sbi, folio); - clear_page_private_gcing(&folio->page); + folio_clear_f2fs_gcing(folio); folio_end_writeback(folio); } if (!get_pages(sbi, F2FS_WB_CP_DATA) && @@ -2659,7 +2659,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) /* Use COW inode to make dnode_of_data for atomic write */ atomic_commit = f2fs_is_atomic_file(inode) && - page_private_atomic(folio_page(folio, 0)); + folio_test_f2fs_atomic(folio); if (atomic_commit) set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0); else @@ -2690,7 +2690,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) /* This page is already truncated */ if (fio->old_blkaddr == NULL_ADDR) { folio_clear_uptodate(folio); - clear_page_private_gcing(folio_page(folio, 0)); + folio_clear_f2fs_gcing(folio); goto out_writepage; } got_it: @@ -2760,7 +2760,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) trace_f2fs_do_write_data_page(folio, OPU); set_inode_flag(inode, FI_APPEND_WRITE); if (atomic_commit) - clear_page_private_atomic(folio_page(folio, 0)); + folio_clear_f2fs_atomic(folio); out_writepage: f2fs_put_dnode(&dn); out: @@ -3383,7 +3383,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, f2fs_do_read_inline_data(folio, ifolio); set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) - set_page_private_inline(&ifolio->page); + folio_set_f2fs_inline(ifolio); goto out; } err = f2fs_convert_inline_folio(&dn, folio); @@ -3703,7 +3703,7 @@ static int f2fs_write_end(struct file *file, folio_mark_dirty(folio); if (f2fs_is_atomic_file(inode)) - set_page_private_atomic(folio_page(folio, 0)); + folio_set_f2fs_atomic(folio); if (pos + copied > i_size_read(inode) && !f2fs_verity_in_progress(inode)) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 09ddc0626dfe..0e607305e308 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2461,6 +2461,13 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, } #define PAGE_PRIVATE_GET_FUNC(name, flagname) \ +static inline bool folio_test_f2fs_##name(const struct folio *folio) \ +{ \ + unsigned long priv = (unsigned long)folio->private; \ + unsigned long v = (1UL << PAGE_PRIVATE_NOT_POINTER) | \ + (1UL << PAGE_PRIVATE_##flagname); \ + return (priv & v) == v; \ +} \ static inline bool page_private_##name(struct page *page) \ { \ return PagePrivate(page) && \ @@ -2469,6 +2476,17 @@ static inline bool page_private_##name(struct page *page) \ } #define PAGE_PRIVATE_SET_FUNC(name, flagname) \ +static inline void folio_set_f2fs_##name(struct folio *folio) \ +{ \ + unsigned long v = (1UL << PAGE_PRIVATE_NOT_POINTER) | \ + (1UL << PAGE_PRIVATE_##flagname); \ + if (!folio->private) \ + folio_attach_private(folio, (void *)v); \ + else { \ + v |= (unsigned long)folio->private; \ + folio->private = (void *)v; \ + } \ +} \ static inline void set_page_private_##name(struct page *page) \ { \ if (!PagePrivate(page)) \ @@ -2478,6 +2496,16 @@ static inline void set_page_private_##name(struct page *page) \ } #define PAGE_PRIVATE_CLEAR_FUNC(name, flagname) \ +static inline void folio_clear_f2fs_##name(struct folio *folio) \ +{ \ + unsigned long v = (unsigned long)folio->private; \ + \ + v &= ~(1UL << PAGE_PRIVATE_##flagname); \ + if (v == (1UL << PAGE_PRIVATE_NOT_POINTER)) \ + folio_detach_private(folio); \ + else \ + folio->private = (void *)v; \ +} \ static inline void clear_page_private_##name(struct page *page) \ { \ clear_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 36b32757d5b9..4039ccb5022c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1463,7 +1463,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, memcpy_folio(fdst, 0, fsrc, 0, PAGE_SIZE); folio_mark_dirty(fdst); - set_page_private_gcing(&fdst->page); + folio_set_f2fs_gcing(fdst); f2fs_folio_put(fdst, true); f2fs_folio_put(fsrc, true); @@ -2987,7 +2987,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, f2fs_folio_wait_writeback(folio, DATA, true, true); folio_mark_dirty(folio); - set_page_private_gcing(&folio->page); + folio_set_f2fs_gcing(folio); f2fs_folio_put(folio, true); idx++; @@ -4424,7 +4424,7 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) f2fs_folio_wait_writeback(folio, DATA, true, true); folio_mark_dirty(folio); - set_page_private_gcing(&folio->page); + folio_set_f2fs_gcing(folio); redirty_idx = folio_next_index(folio); folio_unlock(folio); folio_put_refs(folio, 2); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c1d4ecbd2505..271c7f90741b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1473,7 +1473,7 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, goto out; } folio_mark_dirty(folio); - set_page_private_gcing(&folio->page); + folio_set_f2fs_gcing(folio); } else { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), @@ -1499,11 +1499,11 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, f2fs_remove_dirty_inode(inode); } - set_page_private_gcing(&folio->page); + folio_set_f2fs_gcing(folio); err = f2fs_do_write_data_page(&fio); if (err) { - clear_page_private_gcing(&folio->page); + folio_clear_f2fs_gcing(folio); if (err == -ENOMEM) { memalloc_retry_wait(GFP_NOFS); goto retry; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 51adc43d5a5c..58ac831ef704 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -206,7 +206,7 @@ int f2fs_convert_inline_folio(struct dnode_of_data *dn, struct folio *folio) /* clear inline data and flag after data writeback */ f2fs_truncate_inline_inode(dn->inode, dn->inode_folio, 0); - clear_page_private_inline(&dn->inode_folio->page); + folio_clear_f2fs_inline(dn->inode_folio); clear_out: stat_dec_inline_inode(dn->inode); clear_inode_flag(dn->inode, FI_INLINE_DATA); @@ -286,7 +286,7 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio) set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); - clear_page_private_inline(&ifolio->page); + folio_clear_f2fs_inline(ifolio); f2fs_folio_put(ifolio, 1); return 0; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index dd3b43c24831..cc9bea5b97f3 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -749,7 +749,7 @@ void f2fs_update_inode(struct inode *inode, struct folio *node_folio) /* deleted inode */ if (inode->i_nlink == 0) - clear_page_private_inline(&node_folio->page); + folio_clear_f2fs_inline(node_folio); init_idisk_time(inode); #ifdef CONFIG_F2FS_CHECK_FS diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 908a1eb9c415..f1c6c0c8ee74 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1985,8 +1985,8 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi) goto unlock; /* flush inline_data, if it's async context. */ - if (page_private_inline(&folio->page)) { - clear_page_private_inline(&folio->page); + if (folio_test_f2fs_inline(folio)) { + folio_clear_f2fs_inline(folio); folio_unlock(folio); flush_inline_data(sbi, ino_of_node(folio)); continue; @@ -2067,8 +2067,8 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, goto write_node; /* flush inline_data */ - if (page_private_inline(&folio->page)) { - clear_page_private_inline(&folio->page); + if (folio_test_f2fs_inline(folio)) { + folio_clear_f2fs_inline(folio); folio_unlock(folio); flush_inline_data(sbi, ino_of_node(folio)); goto lock_node; @@ -2216,7 +2216,7 @@ static bool f2fs_dirty_node_folio(struct address_space *mapping, #endif if (filemap_dirty_folio(mapping, folio)) { inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); - set_page_private_reference(&folio->page); + folio_set_f2fs_reference(folio); return true; } return false; From a824388d911927b2a82bf7dcfd7cef6ee45c8b43 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:36 +0100 Subject: [PATCH 0886/2411] f2fs: Use a folio in f2fs_is_cp_guaranteed() Convert the passed page to a folio and use it throughout. Removes a use of fscrypt_is_bounce_page(), which we're trying to remove. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 ++++++----- fs/f2fs/f2fs.h | 2 +- include/linux/fscrypt.h | 10 ++++++---- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f3e11f5672ec..c1fc8c7b1256 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -47,14 +47,15 @@ void f2fs_destroy_bioset(void) bioset_exit(&f2fs_bioset); } -bool f2fs_is_cp_guaranteed(struct page *page) +bool f2fs_is_cp_guaranteed(const struct page *page) { - struct address_space *mapping = page_folio(page)->mapping; + const struct folio *folio = page_folio(page); + struct address_space *mapping = folio->mapping; struct inode *inode; struct f2fs_sb_info *sbi; - if (fscrypt_is_bounce_page(page)) - return page_private_gcing(fscrypt_pagecache_page(page)); + if (fscrypt_is_bounce_folio(folio)) + return folio_test_f2fs_gcing(fscrypt_pagecache_folio(folio)); inode = mapping->host; sbi = F2FS_I_SB(inode); @@ -65,7 +66,7 @@ bool f2fs_is_cp_guaranteed(struct page *page) return true; if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) || - page_private_gcing(page)) + folio_test_f2fs_gcing(folio)) return true; return false; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0e607305e308..be9b7a0120a9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3990,7 +3990,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi); */ int __init f2fs_init_bioset(void); void f2fs_destroy_bioset(void); -bool f2fs_is_cp_guaranteed(struct page *page); +bool f2fs_is_cp_guaranteed(const struct page *page); int f2fs_init_bio_entry_cache(void); void f2fs_destroy_bio_entry_cache(void); void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 56fad33043d5..8d9127a0fdb3 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -332,12 +332,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) return (struct page *)page_private(bounce_page); } -static inline bool fscrypt_is_bounce_folio(struct folio *folio) +static inline bool fscrypt_is_bounce_folio(const struct folio *folio) { return folio->mapping == NULL; } -static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio) +static inline +struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio) { return bounce_folio->private; } @@ -518,12 +519,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) return ERR_PTR(-EINVAL); } -static inline bool fscrypt_is_bounce_folio(struct folio *folio) +static inline bool fscrypt_is_bounce_folio(const struct folio *folio) { return false; } -static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio) +static inline +struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio) { WARN_ON_ONCE(1); return ERR_PTR(-EINVAL); From 161922410d6ec7231740c28557d387dbd79fe132 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:37 +0100 Subject: [PATCH 0887/2411] f2fs: Convert set_page_private_data() to folio_set_f2fs_data() The only caller has a folio, so pass it in and operate on it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- fs/f2fs/f2fs.h | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 5be1a4396f80..d7346c1fcd62 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1953,7 +1953,7 @@ void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, return; } - set_page_private_data(&cfolio->page, ino); + folio_set_f2fs_data(cfolio, ino); memcpy(folio_address(cfolio), page_address(page), PAGE_SIZE); folio_mark_uptodate(cfolio); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index be9b7a0120a9..ffb80b9756a5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2537,12 +2537,14 @@ static inline unsigned long get_page_private_data(struct page *page) return data >> PAGE_PRIVATE_MAX; } -static inline void set_page_private_data(struct page *page, unsigned long data) +static inline void folio_set_f2fs_data(struct folio *folio, unsigned long data) { - if (!PagePrivate(page)) - attach_page_private(page, (void *)0); - set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); - page_private(page) |= data << PAGE_PRIVATE_MAX; + data = (1UL << PAGE_PRIVATE_NOT_POINTER) | (data << PAGE_PRIVATE_MAX); + + if (!folio_test_private(folio)) + folio_attach_private(folio, (void *)data); + else + folio->private = (void *)((unsigned long)folio->private | data); } static inline void clear_page_private_data(struct page *page) From 3659196c872349b8c4a4f1ef389780b2ab8f0093 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:38 +0100 Subject: [PATCH 0888/2411] f2fs: Convert get_page_private_data() to folio_get_f2fs_data() The only caller already has a folio so convert this function to be folio based. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- fs/f2fs/f2fs.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index d7346c1fcd62..4e432df2431f 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -2012,7 +2012,7 @@ void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino) continue; } - if (ino != get_page_private_data(&folio->page)) { + if (ino != folio_get_f2fs_data(folio)) { folio_unlock(folio); continue; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ffb80b9756a5..b2c694949657 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2528,9 +2528,9 @@ PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION); PAGE_PRIVATE_CLEAR_FUNC(atomic, ATOMIC_WRITE); -static inline unsigned long get_page_private_data(struct page *page) +static inline unsigned long folio_get_f2fs_data(struct folio *folio) { - unsigned long data = page_private(page); + unsigned long data = (unsigned long)folio->private; if (!test_bit(PAGE_PRIVATE_NOT_POINTER, &data)) return 0; From ca8049c99f3d297665b8e5c5c3bec08573386691 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:39 +0100 Subject: [PATCH 0889/2411] f2fs: Pass a folio to f2fs_compress_write_end_io() The only caller has a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 6 +++--- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 4e432df2431f..c1334e61823c 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1473,11 +1473,11 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, return -EAGAIN; } -void f2fs_compress_write_end_io(struct bio *bio, struct page *page) +void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio) { + struct page *page = &folio->page; struct f2fs_sb_info *sbi = bio->bi_private; - struct compress_io_ctx *cic = - (struct compress_io_ctx *)page_private(page); + struct compress_io_ctx *cic = folio->private; enum count_type type = WB_DATA_TYPE(page, f2fs_is_compressed_page(page)); int i; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c1fc8c7b1256..9d792cc7d00d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -341,7 +341,7 @@ static void f2fs_write_end_io(struct bio *bio) #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_is_compressed_page(&folio->page)) { - f2fs_compress_write_end_io(bio, &folio->page); + f2fs_compress_write_end_io(bio, folio); continue; } #endif diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b2c694949657..ad3694071d8b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4481,7 +4481,7 @@ int f2fs_prepare_compress_overwrite(struct inode *inode, bool f2fs_compress_write_end(struct inode *inode, void *fsdata, pgoff_t index, unsigned copied); int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock); -void f2fs_compress_write_end_io(struct bio *bio, struct page *page); +void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio); bool f2fs_is_compress_backend_ready(struct inode *inode); bool f2fs_is_compress_level_valid(int alg, int lvl); int __init f2fs_init_compress_mempool(void); From 5e2a00e6e0099fa7f22be90ee87c5019b2e02223 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:40 +0100 Subject: [PATCH 0890/2411] f2fs: Use a folio in f2fs_merge_page_bio() We have two folios to deal with here; one carries the metadata and the other points to the data. They may be the same, but if it's compressed, the data_folio will differ from the metadata folio. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9d792cc7d00d..29dd31fb035a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -886,15 +886,15 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, int f2fs_merge_page_bio(struct f2fs_io_info *fio) { struct bio *bio = *fio->bio; - struct page *page = fio->encrypted_page ? - fio->encrypted_page : fio->page; + struct folio *data_folio = fio->encrypted_page ? + page_folio(fio->encrypted_page) : fio->folio; struct folio *folio = fio->folio; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) return -EFSCORRUPTED; - trace_f2fs_submit_folio_bio(page_folio(page), fio); + trace_f2fs_submit_folio_bio(data_folio, fio); if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block, fio->new_blkaddr)) @@ -905,16 +905,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) f2fs_set_bio_crypt_ctx(bio, folio->mapping->host, folio->index, fio, GFP_NOIO); - add_bio_entry(fio->sbi, bio, page, fio->temp); + add_bio_entry(fio->sbi, bio, &data_folio->page, fio->temp); } else { - if (add_ipu_page(fio, &bio, page)) + if (add_ipu_page(fio, &bio, &data_folio->page)) goto alloc_new; } if (fio->io_wbc) wbc_account_cgroup_owner(fio->io_wbc, folio, folio_size(folio)); - inc_page_count(fio->sbi, WB_DATA_TYPE(page, false)); + inc_page_count(fio->sbi, WB_DATA_TYPE(&data_folio->page, false)); *fio->last_block = fio->new_blkaddr; *fio->bio = bio; From fec903541713bcb606f7f93cfdad99d2083cfda7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:41 +0100 Subject: [PATCH 0891/2411] f2fs: Use a bio in f2fs_submit_page_write() Convert bio_page to bio_folio and use it throughout. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 29dd31fb035a..c5050d90dcd1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -949,7 +949,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) struct f2fs_sb_info *sbi = fio->sbi; enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp; - struct page *bio_page; + struct folio *bio_folio; enum count_type type; f2fs_bug_on(sbi, is_read_io(fio->op)); @@ -980,33 +980,33 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) verify_fio_blkaddr(fio); if (fio->encrypted_page) - bio_page = fio->encrypted_page; + bio_folio = page_folio(fio->encrypted_page); else if (fio->compressed_page) - bio_page = fio->compressed_page; + bio_folio = page_folio(fio->compressed_page); else - bio_page = fio->page; + bio_folio = fio->folio; /* set submitted = true as a return value */ fio->submitted = 1; - type = WB_DATA_TYPE(bio_page, fio->compressed_page); + type = WB_DATA_TYPE(&bio_folio->page, fio->compressed_page); inc_page_count(sbi, type); if (io->bio && (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, fio->new_blkaddr) || !f2fs_crypt_mergeable_bio(io->bio, fio_inode(fio), - page_folio(bio_page)->index, fio))) + bio_folio->index, fio))) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { io->bio = __bio_alloc(fio, BIO_MAX_VECS); f2fs_set_bio_crypt_ctx(io->bio, fio_inode(fio), - page_folio(bio_page)->index, fio, GFP_NOIO); + bio_folio->index, fio, GFP_NOIO); io->fio = *fio; } - if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) { + if (!bio_add_folio(io->bio, bio_folio, folio_size(bio_folio), 0)) { __submit_merged_bio(io); goto alloc_new; } From d6966e7ed280caf1f4397c4a0cad14618e5ff5f7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:42 +0100 Subject: [PATCH 0892/2411] f2fs: Pass a folio to WB_DATA_TYPE() and f2fs_is_cp_guaranteed() All callers now have a folio so pass it in. Removes a call to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- fs/f2fs/data.c | 11 +++++------ fs/f2fs/f2fs.h | 6 +++--- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index c1334e61823c..10b4230607de 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1478,7 +1478,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio) struct page *page = &folio->page; struct f2fs_sb_info *sbi = bio->bi_private; struct compress_io_ctx *cic = folio->private; - enum count_type type = WB_DATA_TYPE(page, + enum count_type type = WB_DATA_TYPE(folio, f2fs_is_compressed_page(page)); int i; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c5050d90dcd1..7993b2f8d711 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -47,9 +47,8 @@ void f2fs_destroy_bioset(void) bioset_exit(&f2fs_bioset); } -bool f2fs_is_cp_guaranteed(const struct page *page) +bool f2fs_is_cp_guaranteed(const struct folio *folio) { - const struct folio *folio = page_folio(page); struct address_space *mapping = folio->mapping; struct inode *inode; struct f2fs_sb_info *sbi; @@ -346,7 +345,7 @@ static void f2fs_write_end_io(struct bio *bio) } #endif - type = WB_DATA_TYPE(&folio->page, false); + type = WB_DATA_TYPE(folio, false); if (unlikely(bio->bi_status != BLK_STS_OK)) { mapping_set_error(folio->mapping, -EIO); @@ -713,7 +712,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) wbc_account_cgroup_owner(fio->io_wbc, fio_folio, PAGE_SIZE); inc_page_count(fio->sbi, is_read_io(fio->op) ? - __read_io_type(data_folio) : WB_DATA_TYPE(fio->page, false)); + __read_io_type(data_folio) : WB_DATA_TYPE(fio->folio, false)); if (is_read_io(bio_op(bio))) f2fs_submit_read_bio(fio->sbi, bio, fio->type); @@ -914,7 +913,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) if (fio->io_wbc) wbc_account_cgroup_owner(fio->io_wbc, folio, folio_size(folio)); - inc_page_count(fio->sbi, WB_DATA_TYPE(&data_folio->page, false)); + inc_page_count(fio->sbi, WB_DATA_TYPE(data_folio, false)); *fio->last_block = fio->new_blkaddr; *fio->bio = bio; @@ -989,7 +988,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) /* set submitted = true as a return value */ fio->submitted = 1; - type = WB_DATA_TYPE(&bio_folio->page, fio->compressed_page); + type = WB_DATA_TYPE(bio_folio, fio->compressed_page); inc_page_count(sbi, type); if (io->bio && diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ad3694071d8b..c9e1bf89bbfa 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1123,8 +1123,8 @@ struct f2fs_sm_info { * f2fs monitors the number of several block types such as on-writeback, * dirty dentry blocks, dirty node blocks, and dirty meta blocks. */ -#define WB_DATA_TYPE(p, f) \ - (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) +#define WB_DATA_TYPE(folio, f) \ + (f || f2fs_is_cp_guaranteed(folio) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_DATA, @@ -3992,7 +3992,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi); */ int __init f2fs_init_bioset(void); void f2fs_destroy_bioset(void); -bool f2fs_is_cp_guaranteed(const struct page *page); +bool f2fs_is_cp_guaranteed(const struct folio *folio); int f2fs_init_bio_entry_cache(void); void f2fs_destroy_bio_entry_cache(void); void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, From a9249a2671bca27860b152fc5db32d448f359af3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:43 +0100 Subject: [PATCH 0893/2411] f2fs: Use a folio iterator in f2fs_handle_step_decompress() Change from bio_for_each_segment_all() to bio_for_each_folio_all() to iterate over each folio instead of each page. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7993b2f8d711..47c10cec1540 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -233,16 +233,15 @@ static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task) static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx, bool in_task) { - struct bio_vec *bv; - struct bvec_iter_all iter_all; + struct folio_iter fi; bool all_compressed = true; block_t blkaddr = ctx->fs_blkaddr; - bio_for_each_segment_all(bv, ctx->bio, iter_all) { - struct page *page = bv->bv_page; + bio_for_each_folio_all(fi, ctx->bio) { + struct folio *folio = fi.folio; - if (f2fs_is_compressed_page(page)) - f2fs_end_read_compressed_page(page, false, blkaddr, + if (f2fs_is_compressed_page(&folio->page)) + f2fs_end_read_compressed_page(&folio->page, false, blkaddr, in_task); else all_compressed = false; From 587b2df524f9cd3f799d7196315453b5f2b01813 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:44 +0100 Subject: [PATCH 0894/2411] f2fs: Pass a folio to f2fs_end_read_compressed_page() Both callers now have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 7 +++---- fs/f2fs/data.c | 4 ++-- fs/f2fs/f2fs.h | 4 ++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 10b4230607de..5847d22a5833 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -801,11 +801,10 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) * page being waited on in the cluster, and if so, it decompresses the cluster * (or in the case of a failure, cleans up without actually decompressing). */ -void f2fs_end_read_compressed_page(struct page *page, bool failed, +void f2fs_end_read_compressed_page(struct folio *folio, bool failed, block_t blkaddr, bool in_task) { - struct decompress_io_ctx *dic = - (struct decompress_io_ctx *)page_private(page); + struct decompress_io_ctx *dic = folio->private; struct f2fs_sb_info *sbi = dic->sbi; dec_page_count(sbi, F2FS_RD_DATA); @@ -813,7 +812,7 @@ void f2fs_end_read_compressed_page(struct page *page, bool failed, if (failed) WRITE_ONCE(dic->failed, true); else if (blkaddr && in_task) - f2fs_cache_compressed_page(sbi, page, + f2fs_cache_compressed_page(sbi, &folio->page, dic->inode->i_ino, blkaddr); if (atomic_dec_and_test(&dic->remaining_pages)) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 47c10cec1540..092c9871acdf 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -144,7 +144,7 @@ static void f2fs_finish_read_bio(struct bio *bio, bool in_task) if (f2fs_is_compressed_page(&folio->page)) { if (ctx && !ctx->decompression_attempted) - f2fs_end_read_compressed_page(&folio->page, true, 0, + f2fs_end_read_compressed_page(folio, true, 0, in_task); f2fs_put_folio_dic(folio, in_task); continue; @@ -241,7 +241,7 @@ static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx, struct folio *folio = fi.folio; if (f2fs_is_compressed_page(&folio->page)) - f2fs_end_read_compressed_page(&folio->page, false, blkaddr, + f2fs_end_read_compressed_page(folio, false, blkaddr, in_task); else all_compressed = false; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c9e1bf89bbfa..2c3b6ea5c1d2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4487,7 +4487,7 @@ bool f2fs_is_compress_level_valid(int alg, int lvl); int __init f2fs_init_compress_mempool(void); void f2fs_destroy_compress_mempool(void); void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task); -void f2fs_end_read_compressed_page(struct page *page, bool failed, +void f2fs_end_read_compressed_page(struct folio *folio, bool failed, block_t blkaddr, bool in_task); bool f2fs_cluster_is_empty(struct compress_ctx *cc); bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); @@ -4561,7 +4561,7 @@ static inline int __init f2fs_init_compress_mempool(void) { return 0; } static inline void f2fs_destroy_compress_mempool(void) { } static inline void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) { } -static inline void f2fs_end_read_compressed_page(struct page *page, +static inline void f2fs_end_read_compressed_page(struct folio *folio, bool failed, block_t blkaddr, bool in_task) { WARN_ON_ONCE(1); From cabda16223ed7ac41af27e491a7385e5c5a0c5cd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:45 +0100 Subject: [PATCH 0895/2411] f2fs: Use a folio iterator in f2fs_verify_bio() Change from bio_for_each_segment_all() to bio_for_each_folio_all() to iterate over each folio instead of each page. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 092c9871acdf..5d3e8a4e754e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -181,14 +181,13 @@ static void f2fs_verify_bio(struct work_struct *work) * as those were handled separately by f2fs_end_read_compressed_page(). */ if (may_have_compressed_pages) { - struct bio_vec *bv; - struct bvec_iter_all iter_all; + struct folio_iter fi; - bio_for_each_segment_all(bv, bio, iter_all) { - struct page *page = bv->bv_page; + bio_for_each_folio_all(fi, bio) { + struct folio *folio = fi.folio; - if (!f2fs_is_compressed_page(page) && - !fsverity_verify_page(page)) { + if (!f2fs_is_compressed_page(&folio->page) && + !fsverity_verify_page(&folio->page)) { bio->bi_status = BLK_STS_IOERR; break; } From 9e3d138737f8b7a26d078dc088ed33da87884723 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:46 +0100 Subject: [PATCH 0896/2411] f2fs: Pass a folio to f2fs_is_compressed_page() All callers now have a folio so pass it in. Also remove the test for the private flag; it is redundant with checking folio->private for being NULL. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 14 ++++++-------- fs/f2fs/data.c | 10 +++++----- fs/f2fs/f2fs.h | 4 ++-- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 5847d22a5833..24c7489b7427 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -71,17 +71,15 @@ static pgoff_t start_idx_of_cluster(struct compress_ctx *cc) return cc->cluster_idx << cc->log_cluster_size; } -bool f2fs_is_compressed_page(struct page *page) +bool f2fs_is_compressed_page(struct folio *folio) { - if (!PagePrivate(page)) + if (!folio->private) return false; - if (!page_private(page)) - return false; - if (page_private_nonpointer(page)) + if (folio_test_f2fs_nonpointer(folio)) return false; - f2fs_bug_on(F2FS_P_SB(page), - *((u32 *)page_private(page)) != F2FS_COMPRESSED_PAGE_MAGIC); + f2fs_bug_on(F2FS_F_SB(folio), + *((u32 *)folio->private) != F2FS_COMPRESSED_PAGE_MAGIC); return true; } @@ -1478,7 +1476,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio) struct f2fs_sb_info *sbi = bio->bi_private; struct compress_io_ctx *cic = folio->private; enum count_type type = WB_DATA_TYPE(folio, - f2fs_is_compressed_page(page)); + f2fs_is_compressed_page(folio)); int i; if (unlikely(bio->bi_status != BLK_STS_OK)) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5d3e8a4e754e..1bb4b0c87e36 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -142,7 +142,7 @@ static void f2fs_finish_read_bio(struct bio *bio, bool in_task) bio_for_each_folio_all(fi, bio) { struct folio *folio = fi.folio; - if (f2fs_is_compressed_page(&folio->page)) { + if (f2fs_is_compressed_page(folio)) { if (ctx && !ctx->decompression_attempted) f2fs_end_read_compressed_page(folio, true, 0, in_task); @@ -186,7 +186,7 @@ static void f2fs_verify_bio(struct work_struct *work) bio_for_each_folio_all(fi, bio) { struct folio *folio = fi.folio; - if (!f2fs_is_compressed_page(&folio->page) && + if (!f2fs_is_compressed_page(folio) && !fsverity_verify_page(&folio->page)) { bio->bi_status = BLK_STS_IOERR; break; @@ -239,7 +239,7 @@ static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx, bio_for_each_folio_all(fi, ctx->bio) { struct folio *folio = fi.folio; - if (f2fs_is_compressed_page(&folio->page)) + if (f2fs_is_compressed_page(folio)) f2fs_end_read_compressed_page(folio, false, blkaddr, in_task); else @@ -337,7 +337,7 @@ static void f2fs_write_end_io(struct bio *bio) } #ifdef CONFIG_F2FS_FS_COMPRESSION - if (f2fs_is_compressed_page(&folio->page)) { + if (f2fs_is_compressed_page(folio)) { f2fs_compress_write_end_io(bio, folio); continue; } @@ -561,7 +561,7 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode, if (IS_ERR(target)) continue; } - if (f2fs_is_compressed_page(&target->page)) { + if (f2fs_is_compressed_page(target)) { target = f2fs_compress_control_folio(target); if (IS_ERR(target)) continue; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2c3b6ea5c1d2..73f80b2514d1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4474,7 +4474,7 @@ enum cluster_check_type { CLUSTER_COMPR_BLKS, /* return # of compressed blocks in a cluster */ CLUSTER_RAW_BLKS /* return # of raw blocks in a cluster */ }; -bool f2fs_is_compressed_page(struct page *page); +bool f2fs_is_compressed_page(struct folio *folio); struct folio *f2fs_compress_control_folio(struct folio *folio); int f2fs_prepare_compress_overwrite(struct inode *inode, struct page **pagep, pgoff_t index, void **fsdata); @@ -4543,7 +4543,7 @@ void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino); sbi->compr_saved_block += diff; \ } while (0) #else -static inline bool f2fs_is_compressed_page(struct page *page) { return false; } +static inline bool f2fs_is_compressed_page(struct folio *folio) { return false; } static inline bool f2fs_is_compress_backend_ready(struct inode *inode) { if (!f2fs_compressed_file(inode)) From 3a19caf12f03a3d731dfae79384a5fe998bc28ca Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:47 +0100 Subject: [PATCH 0897/2411] f2fs: Convert get_next_nat_page() to get_next_nat_folio() Return a folio from this function and convert its one caller. Removes a call to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f1c6c0c8ee74..7c8fb3590136 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -135,7 +135,7 @@ static struct folio *get_current_nat_folio(struct f2fs_sb_info *sbi, nid_t nid) return f2fs_get_meta_folio_retry(sbi, current_nat_addr(sbi, nid)); } -static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) +static struct folio *get_next_nat_folio(struct f2fs_sb_info *sbi, nid_t nid) { struct folio *src_folio; struct folio *dst_folio; @@ -149,7 +149,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) /* get current nat block page with lock */ src_folio = get_current_nat_folio(sbi, nid); if (IS_ERR(src_folio)) - return &src_folio->page; + return src_folio; dst_folio = f2fs_grab_meta_folio(sbi, dst_off); f2fs_bug_on(sbi, folio_test_dirty(src_folio)); @@ -161,7 +161,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) set_to_next_nat(nm_i, nid); - return &dst_folio->page; + return dst_folio; } static struct nat_entry *__alloc_nat_entry(struct f2fs_sb_info *sbi, @@ -3010,7 +3010,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, bool to_journal = true; struct f2fs_nat_block *nat_blk; struct nat_entry *ne, *cur; - struct page *page = NULL; + struct folio *folio = NULL; /* * there are two steps to flush nat entries: @@ -3024,11 +3024,11 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, if (to_journal) { down_write(&curseg->journal_rwsem); } else { - page = get_next_nat_page(sbi, start_nid); - if (IS_ERR(page)) - return PTR_ERR(page); + folio = get_next_nat_folio(sbi, start_nid); + if (IS_ERR(folio)) + return PTR_ERR(folio); - nat_blk = page_address(page); + nat_blk = folio_address(folio); f2fs_bug_on(sbi, !nat_blk); } @@ -3064,8 +3064,8 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, if (to_journal) { up_write(&curseg->journal_rwsem); } else { - __update_nat_bits(sbi, start_nid, page); - f2fs_put_page(page, 1); + __update_nat_bits(sbi, start_nid, &folio->page); + f2fs_folio_put(folio, true); } /* Allow dirty nats by node block allocation in write_begin */ From c07de7557a5647e289287d6cf5063ebfa42afd68 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:48 +0100 Subject: [PATCH 0898/2411] f2fs: Pass the nat_blk to __update_nat_bits() The page argument is only used to look up the address of the nat_blk. Since the caller already has it, pass it in instead. Also mark it const as the nat_blk isn't modified by this function. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 7c8fb3590136..c22ff6203dc2 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2969,11 +2969,10 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes, } static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, - struct page *page) + const struct f2fs_nat_block *nat_blk) { struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK; - struct f2fs_nat_block *nat_blk = page_address(page); int valid = 0; int i = 0; @@ -3064,7 +3063,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, if (to_journal) { up_write(&curseg->journal_rwsem); } else { - __update_nat_bits(sbi, start_nid, &folio->page); + __update_nat_bits(sbi, start_nid, nat_blk); f2fs_folio_put(folio, true); } From 8591db2a6571e2074f0cab835f8ac2cff516529e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:49 +0100 Subject: [PATCH 0899/2411] f2fs: Pass a folio to F2FS_NODE() All callers now have a folio so pass it in Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 10 +++++----- fs/f2fs/inode.c | 8 ++++---- fs/f2fs/node.c | 6 +++--- fs/f2fs/node.h | 30 +++++++++++++++--------------- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 73f80b2514d1..0122da864db4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2051,9 +2051,9 @@ static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) return (struct f2fs_checkpoint *)(sbi->ckpt); } -static inline struct f2fs_node *F2FS_NODE(const struct page *page) +static inline struct f2fs_node *F2FS_NODE(const struct folio *folio) { - return (struct f2fs_node *)page_address(page); + return (struct f2fs_node *)folio_address(folio); } static inline struct f2fs_inode *F2FS_INODE(const struct folio *folio) @@ -3051,7 +3051,7 @@ static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, static inline bool IS_INODE(const struct folio *folio) { - struct f2fs_node *p = F2FS_NODE(&folio->page); + struct f2fs_node *p = F2FS_NODE(folio); return RAW_IS_INODE(p); } @@ -3075,13 +3075,13 @@ static inline unsigned int get_dnode_base(struct inode *inode, return 0; return inode ? get_extra_isize(inode) : - offset_in_addr(&F2FS_NODE(&node_folio->page)->i); + offset_in_addr(&F2FS_NODE(node_folio)->i); } static inline __le32 *get_dnode_addr(struct inode *inode, struct folio *node_folio) { - return blkaddr_in_node(F2FS_NODE(&node_folio->page)) + + return blkaddr_in_node(F2FS_NODE(node_folio)) + get_dnode_base(inode, node_folio); } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index cc9bea5b97f3..154106aa350b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -119,7 +119,7 @@ static void __recover_inline_status(struct inode *inode, struct folio *ifolio) static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio) { - struct f2fs_inode *ri = &F2FS_NODE(&folio->page)->i; + struct f2fs_inode *ri = &F2FS_NODE(folio)->i; if (!f2fs_sb_has_inode_chksum(sbi)) return false; @@ -136,7 +136,7 @@ bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio) static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio) { - struct f2fs_node *node = F2FS_NODE(&folio->page); + struct f2fs_node *node = F2FS_NODE(folio); struct f2fs_inode *ri = &node->i; __le32 ino = node->footer.ino; __le32 gen = ri->i_generation; @@ -173,7 +173,7 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio) #endif return true; - ri = &F2FS_NODE(&folio->page)->i; + ri = &F2FS_NODE(folio)->i; provided = le32_to_cpu(ri->i_inode_checksum); calculated = f2fs_inode_chksum(sbi, folio); @@ -187,7 +187,7 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio) void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio) { - struct f2fs_inode *ri = &F2FS_NODE(&folio->page)->i; + struct f2fs_inode *ri = &F2FS_NODE(folio)->i; if (!f2fs_enable_inode_chksum(sbi, folio)) return; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c22ff6203dc2..4b3d9070e299 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1019,7 +1019,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, f2fs_ra_node_pages(folio, ofs, NIDS_PER_BLOCK); - rn = F2FS_NODE(&folio->page); + rn = F2FS_NODE(folio); if (depth < 3) { for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { child_nid = le32_to_cpu(rn->in.nid[i]); @@ -2789,7 +2789,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct folio *folio) /* 3: update and set xattr node page dirty */ if (folio) { - memcpy(F2FS_NODE(&xfolio->page), F2FS_NODE(&folio->page), + memcpy(F2FS_NODE(xfolio), F2FS_NODE(folio), VALID_XATTR_BLOCK_SIZE); folio_mark_dirty(xfolio); } @@ -2894,7 +2894,7 @@ int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, if (IS_ERR(folio)) return PTR_ERR(folio); - rn = F2FS_NODE(&folio->page); + rn = F2FS_NODE(folio); sum_entry->nid = rn->footer.nid; sum_entry->version = 0; sum_entry->ofs_in_node = 0; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 70a58c4052fe..030390543b54 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -245,39 +245,39 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) static inline nid_t ino_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(&node_folio->page); + struct f2fs_node *rn = F2FS_NODE(node_folio); return le32_to_cpu(rn->footer.ino); } static inline nid_t nid_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(&node_folio->page); + struct f2fs_node *rn = F2FS_NODE(node_folio); return le32_to_cpu(rn->footer.nid); } static inline unsigned int ofs_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(&node_folio->page); + struct f2fs_node *rn = F2FS_NODE(node_folio); unsigned flag = le32_to_cpu(rn->footer.flag); return flag >> OFFSET_BIT_SHIFT; } static inline __u64 cpver_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(&node_folio->page); + struct f2fs_node *rn = F2FS_NODE(node_folio); return le64_to_cpu(rn->footer.cp_ver); } -static inline block_t next_blkaddr_of_node(struct folio *node_folio) +static inline block_t next_blkaddr_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(&node_folio->page); + struct f2fs_node *rn = F2FS_NODE(node_folio); return le32_to_cpu(rn->footer.next_blkaddr); } static inline void fill_node_footer(const struct folio *folio, nid_t nid, nid_t ino, unsigned int ofs, bool reset) { - struct f2fs_node *rn = F2FS_NODE(&folio->page); + struct f2fs_node *rn = F2FS_NODE(folio); unsigned int old_flag = 0; if (reset) @@ -296,15 +296,15 @@ static inline void fill_node_footer(const struct folio *folio, nid_t nid, static inline void copy_node_footer(const struct folio *dst, const struct folio *src) { - struct f2fs_node *src_rn = F2FS_NODE(&src->page); - struct f2fs_node *dst_rn = F2FS_NODE(&dst->page); + struct f2fs_node *src_rn = F2FS_NODE(src); + struct f2fs_node *dst_rn = F2FS_NODE(dst); memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); } static inline void fill_node_footer_blkaddr(struct folio *folio, block_t blkaddr) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_F_SB(folio)); - struct f2fs_node *rn = F2FS_NODE(&folio->page); + struct f2fs_node *rn = F2FS_NODE(folio); __u64 cp_ver = cur_cp_version(ckpt); if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) @@ -370,7 +370,7 @@ static inline bool IS_DNODE(const struct folio *node_folio) static inline int set_nid(struct folio *folio, int off, nid_t nid, bool i) { - struct f2fs_node *rn = F2FS_NODE(&folio->page); + struct f2fs_node *rn = F2FS_NODE(folio); f2fs_folio_wait_writeback(folio, NODE, true, true); @@ -383,7 +383,7 @@ static inline int set_nid(struct folio *folio, int off, nid_t nid, bool i) static inline nid_t get_nid(const struct folio *folio, int off, bool i) { - struct f2fs_node *rn = F2FS_NODE(&folio->page); + struct f2fs_node *rn = F2FS_NODE(folio); if (i) return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]); @@ -399,7 +399,7 @@ static inline nid_t get_nid(const struct folio *folio, int off, bool i) static inline int is_node(const struct folio *folio, int type) { - struct f2fs_node *rn = F2FS_NODE(&folio->page); + struct f2fs_node *rn = F2FS_NODE(folio); return le32_to_cpu(rn->footer.flag) & BIT(type); } @@ -409,7 +409,7 @@ static inline int is_node(const struct folio *folio, int type) static inline void set_cold_node(const struct folio *folio, bool is_dir) { - struct f2fs_node *rn = F2FS_NODE(&folio->page); + struct f2fs_node *rn = F2FS_NODE(folio); unsigned int flag = le32_to_cpu(rn->footer.flag); if (is_dir) @@ -421,7 +421,7 @@ static inline void set_cold_node(const struct folio *folio, bool is_dir) static inline void set_mark(struct folio *folio, int mark, int type) { - struct f2fs_node *rn = F2FS_NODE(&folio->page); + struct f2fs_node *rn = F2FS_NODE(folio); unsigned int flag = le32_to_cpu(rn->footer.flag); if (mark) flag |= BIT(type); From 49bb2b894e87bd9542ee6c5d67aeb7e3fcaee6e4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:50 +0100 Subject: [PATCH 0900/2411] f2fs: Pass a folio to f2fs_cache_compressed_page() The only caller already has a folio so pass it in. f2fs_cache_compressed_page() is not used outside compress.c so make it static. This requires a forward declaration (or would require rearranging this file, but I've chosen not to do that for readability of the diff). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 11 +++++++---- fs/f2fs/f2fs.h | 4 ---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 24c7489b7427..5c1f47e45dab 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -793,6 +793,9 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) f2fs_decompress_end_io(dic, ret, in_task); } +static void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, + struct folio *folio, nid_t ino, block_t blkaddr); + /* * This is called when a page of a compressed cluster has been read from disk * (or failed to be read from disk). It checks whether this page was the last @@ -810,7 +813,7 @@ void f2fs_end_read_compressed_page(struct folio *folio, bool failed, if (failed) WRITE_ONCE(dic->failed, true); else if (blkaddr && in_task) - f2fs_cache_compressed_page(sbi, &folio->page, + f2fs_cache_compressed_page(sbi, folio, dic->inode->i_ino, blkaddr); if (atomic_dec_and_test(&dic->remaining_pages)) @@ -1918,8 +1921,8 @@ void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, invalidate_mapping_pages(COMPRESS_MAPPING(sbi), blkaddr, blkaddr + len - 1); } -void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, - nid_t ino, block_t blkaddr) +static void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, + struct folio *folio, nid_t ino, block_t blkaddr) { struct folio *cfolio; int ret; @@ -1952,7 +1955,7 @@ void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, folio_set_f2fs_data(cfolio, ino); - memcpy(folio_address(cfolio), page_address(page), PAGE_SIZE); + memcpy(folio_address(cfolio), folio_address(folio), PAGE_SIZE); folio_mark_uptodate(cfolio); f2fs_folio_put(cfolio, true); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0122da864db4..48fc1279101f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4525,8 +4525,6 @@ void f2fs_destroy_compress_cache(void); struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi); void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, block_t blkaddr, unsigned int len); -void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, - nid_t ino, block_t blkaddr); bool f2fs_load_compressed_folio(struct f2fs_sb_info *sbi, struct folio *folio, block_t blkaddr); void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino); @@ -4581,8 +4579,6 @@ static inline int __init f2fs_init_compress_cache(void) { return 0; } static inline void f2fs_destroy_compress_cache(void) { } static inline void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, block_t blkaddr, unsigned int len) { } -static inline void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, - struct page *page, nid_t ino, block_t blkaddr) { } static inline bool f2fs_load_compressed_folio(struct f2fs_sb_info *sbi, struct folio *folio, block_t blkaddr) { return false; } static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, From 015622b8c7ed781329284802a690f1517d3599e6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:51 +0100 Subject: [PATCH 0901/2411] f2fs: Use a folio in f2fs_encrypted_get_link() Use a folio instead of a page when dealing with the page cache. Removes a hidden call to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 07e333ee21b7..b882771e4699 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1298,19 +1298,19 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - struct page *page; + struct folio *folio; const char *target; if (!dentry) return ERR_PTR(-ECHILD); - page = read_mapping_page(inode->i_mapping, 0, NULL); - if (IS_ERR(page)) - return ERR_CAST(page); + folio = read_mapping_folio(inode->i_mapping, 0, NULL); + if (IS_ERR(folio)) + return ERR_CAST(folio); - target = fscrypt_get_symlink(inode, page_address(page), + target = fscrypt_get_symlink(inode, folio_address(folio), inode->i_sb->s_blocksize, done); - put_page(page); + folio_put(folio); return target; } From 0f54eec0cb89887e3ed8ed430f5b9cd513038ca4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:52 +0100 Subject: [PATCH 0902/2411] f2fs: Use F2FS_F_SB() in f2fs_read_end_io() Get the folio from the bio instead of the page. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1bb4b0c87e36..f1cbbea56a17 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -278,7 +278,7 @@ static void f2fs_post_read_work(struct work_struct *work) static void f2fs_read_end_io(struct bio *bio) { - struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio)); + struct f2fs_sb_info *sbi = F2FS_F_SB(bio_first_folio_all(bio)); struct bio_post_read_ctx *ctx; bool intask = in_task(); From 6974b21f7013fe08008f2fea5d61b96c5a4858dd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:53 +0100 Subject: [PATCH 0903/2411] f2fs: Remove clear_page_private_all() All callers can simply call folio_detach_private(). This was the only way that clear_page_private_data() could be called, so remove that too. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 18 ------------------ 3 files changed, 3 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f1cbbea56a17..a062defcb019 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3736,7 +3736,7 @@ void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length) f2fs_remove_dirty_inode(inode); } } - clear_page_private_all(&folio->page); + folio_detach_private(folio); } bool f2fs_release_folio(struct folio *folio, gfp_t wait) @@ -3745,7 +3745,7 @@ bool f2fs_release_folio(struct folio *folio, gfp_t wait) if (folio_test_dirty(folio)) return false; - clear_page_private_all(&folio->page); + folio_detach_private(folio); return true; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 888dca7e82ac..fffd7749d6d1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -897,7 +897,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct folio *folio, f2fs_clear_page_cache_dirty_tag(folio); folio_clear_dirty_for_io(folio); folio_clear_uptodate(folio); - clear_page_private_all(&folio->page); + folio_detach_private(folio); inode_dec_dirty_pages(dir); f2fs_remove_dirty_inode(dir); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 48fc1279101f..1b3708480c30 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2547,24 +2547,6 @@ static inline void folio_set_f2fs_data(struct folio *folio, unsigned long data) folio->private = (void *)((unsigned long)folio->private | data); } -static inline void clear_page_private_data(struct page *page) -{ - page_private(page) &= GENMASK(PAGE_PRIVATE_MAX - 1, 0); - if (page_private(page) == BIT(PAGE_PRIVATE_NOT_POINTER)) - detach_page_private(page); -} - -static inline void clear_page_private_all(struct page *page) -{ - clear_page_private_data(page); - clear_page_private_reference(page); - clear_page_private_gcing(page); - clear_page_private_inline(page); - clear_page_private_atomic(page); - - f2fs_bug_on(F2FS_P_SB(page), page_private(page)); -} - static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, block_t count) From 7695f8ccf61451305d08051cd1a1d8388f65fd54 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:54 +0100 Subject: [PATCH 0904/2411] f2fs: Remove use of page from f2fs_write_single_data_page() Both remaining uses of page now have a folio equivalent. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a062defcb019..90f7a85fa7b6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2776,7 +2776,6 @@ int f2fs_write_single_data_page(struct folio *folio, int *submitted, bool allow_balance) { struct inode *inode = folio->mapping->host; - struct page *page = folio_page(folio, 0); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long)i_size) @@ -2793,7 +2792,7 @@ int f2fs_write_single_data_page(struct folio *folio, int *submitted, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), .old_blkaddr = NULL_ADDR, - .page = page, + .folio = folio, .encrypted_page = NULL, .submitted = 0, .compr_blocks = compr_blocks, @@ -2895,7 +2894,7 @@ int f2fs_write_single_data_page(struct folio *folio, int *submitted, inode_dec_dirty_pages(inode); if (err) { folio_clear_uptodate(folio); - clear_page_private_gcing(page); + folio_clear_f2fs_gcing(folio); } folio_unlock(folio); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && From 06e42bf4327a410c72b7e689190f4c6b769e1e02 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:55 +0100 Subject: [PATCH 0905/2411] f2fs: Pass a folio to f2fs_submit_merged_write_cond() Most callers pass NULL, and the one that passes a page already has a folio. Also convert __submit_merged_write_cond() to take a folio. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 ++++---- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 90f7a85fa7b6..7cba071db401 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -638,7 +638,7 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, } static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, struct page *page, + struct inode *inode, struct folio *folio, nid_t ino, enum page_type type, bool force) { enum temp_type temp; @@ -650,7 +650,7 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, struct f2fs_bio_info *io = sbi->write_io[btype] + temp; f2fs_down_read(&io->io_rwsem); - ret = __has_merged_page(io->bio, inode, page, ino); + ret = __has_merged_page(io->bio, inode, &folio->page, ino); f2fs_up_read(&io->io_rwsem); } if (ret) @@ -668,10 +668,10 @@ void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) } void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, struct page *page, + struct inode *inode, struct folio *folio, nid_t ino, enum page_type type) { - __submit_merged_write_cond(sbi, inode, page, ino, type, false); + __submit_merged_write_cond(sbi, inode, folio, ino, type, false); } void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1b3708480c30..8e092f4fd670 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3982,7 +3982,7 @@ void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi); void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, struct page *page, + struct inode *inode, struct folio *folio, nid_t ino, enum page_type type); void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, struct bio **bio, struct folio *folio); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 909637873ff7..cc82d42ef14c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4197,7 +4197,7 @@ void f2fs_folio_wait_writeback(struct folio *folio, enum page_type type, struct f2fs_sb_info *sbi = F2FS_F_SB(folio); /* submit cached LFS IO */ - f2fs_submit_merged_write_cond(sbi, NULL, &folio->page, 0, type); + f2fs_submit_merged_write_cond(sbi, NULL, folio, 0, type); /* submit cached IPU IO */ f2fs_submit_merged_ipu_write(sbi, NULL, folio); if (ordered) { From 5fb60c0365c4dad347e4958f78976cb733d903f2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:56 +0100 Subject: [PATCH 0906/2411] f2fs: Pass a folio to __has_merged_page() All three callers have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7cba071db401..d1a2616d41be 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -543,14 +543,14 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) } static bool __has_merged_page(struct bio *bio, struct inode *inode, - struct page *page, nid_t ino) + struct folio *folio, nid_t ino) { struct folio_iter fi; if (!bio) return false; - if (!inode && !page && !ino) + if (!inode && !folio && !ino) return true; bio_for_each_folio_all(fi, bio) { @@ -569,7 +569,7 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode, if (inode && inode == target->mapping->host) return true; - if (page && page == &target->page) + if (folio && folio == target) return true; if (ino && ino == ino_of_node(target)) return true; @@ -650,7 +650,7 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, struct f2fs_bio_info *io = sbi->write_io[btype] + temp; f2fs_down_read(&io->io_rwsem); - ret = __has_merged_page(io->bio, inode, &folio->page, ino); + ret = __has_merged_page(io->bio, inode, folio, ino); f2fs_up_read(&io->io_rwsem); } if (ret) @@ -845,7 +845,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, found = (target == be->bio); else found = __has_merged_page(be->bio, NULL, - &folio->page, 0); + folio, 0); if (found) break; } @@ -862,7 +862,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, found = (target == be->bio); else found = __has_merged_page(be->bio, NULL, - &folio->page, 0); + folio, 0); if (found) { target = be->bio; del_bio_entry(be); From 816aa305cd499c5fd53a1960b6fa3e80b909d922 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:57 +0100 Subject: [PATCH 0907/2411] f2fs: Remove F2FS_P_SB() All callers have been converted to F2FS_F_SB() so delete this wrapper. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8e092f4fd670..b2cc22b29d6a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2026,11 +2026,6 @@ static inline struct f2fs_sb_info *F2FS_F_SB(const struct folio *folio) return F2FS_M_SB(folio->mapping); } -static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page) -{ - return F2FS_F_SB(page_folio(page)); -} - static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) { return (struct f2fs_super_block *)(sbi->raw_super); From 5661998536af52848cc4d52a377e90368196edea Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 11 Jul 2025 15:14:50 +0800 Subject: [PATCH 0908/2411] f2fs: fix to avoid out-of-boundary access in devs.path - touch /mnt/f2fs/012345678901234567890123456789012345678901234567890123 - truncate -s $((1024*1024*1024)) \ /mnt/f2fs/012345678901234567890123456789012345678901234567890123 - touch /mnt/f2fs/file - truncate -s $((1024*1024*1024)) /mnt/f2fs/file - mkfs.f2fs /mnt/f2fs/012345678901234567890123456789012345678901234567890123 \ -c /mnt/f2fs/file - mount /mnt/f2fs/012345678901234567890123456789012345678901234567890123 \ /mnt/f2fs/loop [16937.192225] F2FS-fs (loop0): Mount Device [ 0]: /mnt/f2fs/012345678901234567890123456789012345678901234567890123\xff\x01, 511, 0 - 3ffff [16937.192268] F2FS-fs (loop0): Failed to find devices If device path length equals to MAX_PATH_LEN, sbi->devs.path[] may not end up w/ null character due to path array is fully filled, So accidently, fields locate after path[] may be treated as part of device path, result in parsing wrong device path. struct f2fs_dev_info { ... char path[MAX_PATH_LEN]; ... }; Let's add one byte space for sbi->devs.path[] to store null character of device path string. Fixes: 3c62be17d4f5 ("f2fs: support multiple devices") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b2cc22b29d6a..dfddb66910b3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1289,7 +1289,7 @@ struct f2fs_bio_info { struct f2fs_dev_info { struct file *bdev_file; struct block_device *bdev; - char path[MAX_PATH_LEN]; + char path[MAX_PATH_LEN + 1]; unsigned int total_segments; block_t start_blk; block_t end_blk; From f2091cc188c60d6f9436b4da5bd75cda46665315 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 10 Jul 2025 12:14:09 +0000 Subject: [PATCH 0909/2411] f2fs: Add fs parameter specifications for mount options Use an array of `fs_parameter_spec` called f2fs_param_specs to hold the mount option specifications for the new mount api. Add constant_table structures for several options to facilitate parsing. Signed-off-by: Hongbo Li [sandeen: forward port, minor fixes and updates, more fsparam_enum] Signed-off-by: Eric Sandeen Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 73492270ea93..713dc55f086b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -196,9 +197,130 @@ enum { Opt_age_extent_cache, Opt_errors, Opt_nat_bits, + Opt_jqfmt, + Opt_checkpoint, Opt_err, }; +static const struct constant_table f2fs_param_background_gc[] = { + {"on", BGGC_MODE_ON}, + {"off", BGGC_MODE_OFF}, + {"sync", BGGC_MODE_SYNC}, + {} +}; + +static const struct constant_table f2fs_param_mode[] = { + {"adaptive", FS_MODE_ADAPTIVE}, + {"lfs", FS_MODE_LFS}, + {"fragment:segment", FS_MODE_FRAGMENT_SEG}, + {"fragment:block", FS_MODE_FRAGMENT_BLK}, + {} +}; + +static const struct constant_table f2fs_param_jqfmt[] = { + {"vfsold", QFMT_VFS_OLD}, + {"vfsv0", QFMT_VFS_V0}, + {"vfsv1", QFMT_VFS_V1}, + {} +}; + +static const struct constant_table f2fs_param_alloc_mode[] = { + {"default", ALLOC_MODE_DEFAULT}, + {"reuse", ALLOC_MODE_REUSE}, + {} +}; +static const struct constant_table f2fs_param_fsync_mode[] = { + {"posix", FSYNC_MODE_POSIX}, + {"strict", FSYNC_MODE_STRICT}, + {"nobarrier", FSYNC_MODE_NOBARRIER}, + {} +}; + +static const struct constant_table f2fs_param_compress_mode[] = { + {"fs", COMPR_MODE_FS}, + {"user", COMPR_MODE_USER}, + {} +}; + +static const struct constant_table f2fs_param_discard_unit[] = { + {"block", DISCARD_UNIT_BLOCK}, + {"segment", DISCARD_UNIT_SEGMENT}, + {"section", DISCARD_UNIT_SECTION}, + {} +}; + +static const struct constant_table f2fs_param_memory_mode[] = { + {"normal", MEMORY_MODE_NORMAL}, + {"low", MEMORY_MODE_LOW}, + {} +}; + +static const struct constant_table f2fs_param_errors[] = { + {"remount-ro", MOUNT_ERRORS_READONLY}, + {"continue", MOUNT_ERRORS_CONTINUE}, + {"panic", MOUNT_ERRORS_PANIC}, + {} +}; + +static const struct fs_parameter_spec f2fs_param_specs[] = { + fsparam_enum("background_gc", Opt_gc_background, f2fs_param_background_gc), + fsparam_flag("disable_roll_forward", Opt_disable_roll_forward), + fsparam_flag("norecovery", Opt_norecovery), + fsparam_flag_no("discard", Opt_discard), + fsparam_flag("no_heap", Opt_noheap), + fsparam_flag("heap", Opt_heap), + fsparam_flag_no("user_xattr", Opt_user_xattr), + fsparam_flag_no("acl", Opt_acl), + fsparam_s32("active_logs", Opt_active_logs), + fsparam_flag("disable_ext_identify", Opt_disable_ext_identify), + fsparam_flag_no("inline_xattr", Opt_inline_xattr), + fsparam_s32("inline_xattr_size", Opt_inline_xattr_size), + fsparam_flag_no("inline_data", Opt_inline_data), + fsparam_flag_no("inline_dentry", Opt_inline_dentry), + fsparam_flag_no("flush_merge", Opt_flush_merge), + fsparam_flag_no("barrier", Opt_barrier), + fsparam_flag("fastboot", Opt_fastboot), + fsparam_flag_no("extent_cache", Opt_extent_cache), + fsparam_flag("data_flush", Opt_data_flush), + fsparam_u32("reserve_root", Opt_reserve_root), + fsparam_gid("resgid", Opt_resgid), + fsparam_uid("resuid", Opt_resuid), + fsparam_enum("mode", Opt_mode, f2fs_param_mode), + fsparam_s32("fault_injection", Opt_fault_injection), + fsparam_u32("fault_type", Opt_fault_type), + fsparam_flag_no("lazytime", Opt_lazytime), + fsparam_flag_no("quota", Opt_quota), + fsparam_flag("usrquota", Opt_usrquota), + fsparam_flag("grpquota", Opt_grpquota), + fsparam_flag("prjquota", Opt_prjquota), + fsparam_string_empty("usrjquota", Opt_usrjquota), + fsparam_string_empty("grpjquota", Opt_grpjquota), + fsparam_string_empty("prjjquota", Opt_prjjquota), + fsparam_flag("nat_bits", Opt_nat_bits), + fsparam_enum("jqfmt", Opt_jqfmt, f2fs_param_jqfmt), + fsparam_enum("alloc_mode", Opt_alloc, f2fs_param_alloc_mode), + fsparam_enum("fsync_mode", Opt_fsync, f2fs_param_fsync_mode), + fsparam_string("test_dummy_encryption", Opt_test_dummy_encryption), + fsparam_flag("test_dummy_encryption", Opt_test_dummy_encryption), + fsparam_flag("inlinecrypt", Opt_inlinecrypt), + fsparam_string("checkpoint", Opt_checkpoint), + fsparam_flag_no("checkpoint_merge", Opt_checkpoint_merge), + fsparam_string("compress_algorithm", Opt_compress_algorithm), + fsparam_u32("compress_log_size", Opt_compress_log_size), + fsparam_string("compress_extension", Opt_compress_extension), + fsparam_string("nocompress_extension", Opt_nocompress_extension), + fsparam_flag("compress_chksum", Opt_compress_chksum), + fsparam_enum("compress_mode", Opt_compress_mode, f2fs_param_compress_mode), + fsparam_flag("compress_cache", Opt_compress_cache), + fsparam_flag("atgc", Opt_atgc), + fsparam_flag_no("gc_merge", Opt_gc_merge), + fsparam_enum("discard_unit", Opt_discard_unit, f2fs_param_discard_unit), + fsparam_enum("memory", Opt_memory_mode, f2fs_param_memory_mode), + fsparam_flag("age_extent_cache", Opt_age_extent_cache), + fsparam_enum("errors", Opt_errors, f2fs_param_errors), + {} +}; + static match_table_t f2fs_tokens = { {Opt_gc_background, "background_gc=%s"}, {Opt_disable_roll_forward, "disable_roll_forward"}, From 02eb5fe42a8c6cfcf063126df7e41ec2036b083c Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 10 Jul 2025 12:14:10 +0000 Subject: [PATCH 0910/2411] f2fs: move the option parser into handle_mount_opt In handle_mount_opt, we use fs_parameter to parse each option. However we're still using the old API to get the options string. Using fsparams parse_options allows us to remove many of the Opt_ enums, so remove them. The checkpoint disable cap (or percent) involves rather complex parsing; we retain the old match_table mechanism for this, which handles it well. There are some changes about parsing options: 1. For `active_logs`, `inline_xattr_size` and `fault_injection`, we use s32 type according the internal structure to record the option's value. Signed-off-by: Hongbo Li [sandeen: forward port, minor fixes and updates] Signed-off-by: Eric Sandeen [hongbo: minor cleanup] Signed-off-by: Hongbo Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1128 +++++++++++++++++++---------------------------- 1 file changed, 443 insertions(+), 685 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 713dc55f086b..fddd33b1118c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "f2fs.h" @@ -126,29 +127,20 @@ enum { Opt_disable_roll_forward, Opt_norecovery, Opt_discard, - Opt_nodiscard, Opt_noheap, Opt_heap, Opt_user_xattr, - Opt_nouser_xattr, Opt_acl, - Opt_noacl, Opt_active_logs, Opt_disable_ext_identify, Opt_inline_xattr, - Opt_noinline_xattr, Opt_inline_xattr_size, Opt_inline_data, Opt_inline_dentry, - Opt_noinline_dentry, Opt_flush_merge, - Opt_noflush_merge, Opt_barrier, - Opt_nobarrier, Opt_fastboot, Opt_extent_cache, - Opt_noextent_cache, - Opt_noinline_data, Opt_data_flush, Opt_reserve_root, Opt_resgid, @@ -157,21 +149,13 @@ enum { Opt_fault_injection, Opt_fault_type, Opt_lazytime, - Opt_nolazytime, Opt_quota, - Opt_noquota, Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_usrjquota, Opt_grpjquota, Opt_prjjquota, - Opt_offusrjquota, - Opt_offgrpjquota, - Opt_offprjjquota, - Opt_jqfmt_vfsold, - Opt_jqfmt_vfsv0, - Opt_jqfmt_vfsv1, Opt_alloc, Opt_fsync, Opt_test_dummy_encryption, @@ -181,17 +165,15 @@ enum { Opt_checkpoint_disable_cap_perc, Opt_checkpoint_enable, Opt_checkpoint_merge, - Opt_nocheckpoint_merge, Opt_compress_algorithm, Opt_compress_log_size, - Opt_compress_extension, Opt_nocompress_extension, + Opt_compress_extension, Opt_compress_chksum, Opt_compress_mode, Opt_compress_cache, Opt_atgc, Opt_gc_merge, - Opt_nogc_merge, Opt_discard_unit, Opt_memory_mode, Opt_age_extent_cache, @@ -321,83 +303,12 @@ static const struct fs_parameter_spec f2fs_param_specs[] = { {} }; -static match_table_t f2fs_tokens = { - {Opt_gc_background, "background_gc=%s"}, - {Opt_disable_roll_forward, "disable_roll_forward"}, - {Opt_norecovery, "norecovery"}, - {Opt_discard, "discard"}, - {Opt_nodiscard, "nodiscard"}, - {Opt_noheap, "no_heap"}, - {Opt_heap, "heap"}, - {Opt_user_xattr, "user_xattr"}, - {Opt_nouser_xattr, "nouser_xattr"}, - {Opt_acl, "acl"}, - {Opt_noacl, "noacl"}, - {Opt_active_logs, "active_logs=%u"}, - {Opt_disable_ext_identify, "disable_ext_identify"}, - {Opt_inline_xattr, "inline_xattr"}, - {Opt_noinline_xattr, "noinline_xattr"}, - {Opt_inline_xattr_size, "inline_xattr_size=%u"}, - {Opt_inline_data, "inline_data"}, - {Opt_inline_dentry, "inline_dentry"}, - {Opt_noinline_dentry, "noinline_dentry"}, - {Opt_flush_merge, "flush_merge"}, - {Opt_noflush_merge, "noflush_merge"}, - {Opt_barrier, "barrier"}, - {Opt_nobarrier, "nobarrier"}, - {Opt_fastboot, "fastboot"}, - {Opt_extent_cache, "extent_cache"}, - {Opt_noextent_cache, "noextent_cache"}, - {Opt_noinline_data, "noinline_data"}, - {Opt_data_flush, "data_flush"}, - {Opt_reserve_root, "reserve_root=%u"}, - {Opt_resgid, "resgid=%u"}, - {Opt_resuid, "resuid=%u"}, - {Opt_mode, "mode=%s"}, - {Opt_fault_injection, "fault_injection=%u"}, - {Opt_fault_type, "fault_type=%u"}, - {Opt_lazytime, "lazytime"}, - {Opt_nolazytime, "nolazytime"}, - {Opt_quota, "quota"}, - {Opt_noquota, "noquota"}, - {Opt_usrquota, "usrquota"}, - {Opt_grpquota, "grpquota"}, - {Opt_prjquota, "prjquota"}, - {Opt_usrjquota, "usrjquota=%s"}, - {Opt_grpjquota, "grpjquota=%s"}, - {Opt_prjjquota, "prjjquota=%s"}, - {Opt_offusrjquota, "usrjquota="}, - {Opt_offgrpjquota, "grpjquota="}, - {Opt_offprjjquota, "prjjquota="}, - {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, - {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, - {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, - {Opt_alloc, "alloc_mode=%s"}, - {Opt_fsync, "fsync_mode=%s"}, - {Opt_test_dummy_encryption, "test_dummy_encryption=%s"}, - {Opt_test_dummy_encryption, "test_dummy_encryption"}, - {Opt_inlinecrypt, "inlinecrypt"}, - {Opt_checkpoint_disable, "checkpoint=disable"}, - {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"}, - {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"}, - {Opt_checkpoint_enable, "checkpoint=enable"}, - {Opt_checkpoint_merge, "checkpoint_merge"}, - {Opt_nocheckpoint_merge, "nocheckpoint_merge"}, - {Opt_compress_algorithm, "compress_algorithm=%s"}, - {Opt_compress_log_size, "compress_log_size=%u"}, - {Opt_compress_extension, "compress_extension=%s"}, - {Opt_nocompress_extension, "nocompress_extension=%s"}, - {Opt_compress_chksum, "compress_chksum"}, - {Opt_compress_mode, "compress_mode=%s"}, - {Opt_compress_cache, "compress_cache"}, - {Opt_atgc, "atgc"}, - {Opt_gc_merge, "gc_merge"}, - {Opt_nogc_merge, "nogc_merge"}, - {Opt_discard_unit, "discard_unit=%s"}, - {Opt_memory_mode, "memory=%s"}, - {Opt_age_extent_cache, "age_extent_cache"}, - {Opt_errors, "errors=%s"}, - {Opt_nat_bits, "nat_bits"}, +/* Resort to a match_table for this interestingly formatted option */ +static match_table_t f2fs_checkpoint_tokens = { + {Opt_checkpoint_disable, "disable"}, + {Opt_checkpoint_disable_cap, "disable:%u"}, + {Opt_checkpoint_disable_cap_perc, "disable:%u%%"}, + {Opt_checkpoint_enable, "enable"}, {Opt_err, NULL}, }; @@ -513,7 +424,7 @@ static void init_once(void *foo) static const char * const quotatypes[] = INITQFNAMES; #define QTYPE2NAME(t) (quotatypes[t]) static int f2fs_set_qf_name(struct f2fs_sb_info *sbi, int qtype, - substring_t *args) + struct fs_parameter *param) { struct super_block *sb = sbi->sb; char *qname; @@ -528,7 +439,7 @@ static int f2fs_set_qf_name(struct f2fs_sb_info *sbi, int qtype, return 0; } - qname = match_strdup(args); + qname = kmemdup_nul(param->string, param->size, GFP_KERNEL); if (!qname) { f2fs_err(sbi, "Not enough memory for storing quotafile name"); return -ENOMEM; @@ -613,14 +524,9 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) #endif static int f2fs_set_test_dummy_encryption(struct f2fs_sb_info *sbi, - const char *opt, - const substring_t *arg, + const struct fs_parameter *param, bool is_remount) { - struct fs_parameter param = { - .type = fs_value_is_string, - .string = arg->from ? arg->from : "", - }; struct fscrypt_dummy_policy *policy = &F2FS_OPTION(sbi).dummy_enc_policy; int err; @@ -646,17 +552,17 @@ static int f2fs_set_test_dummy_encryption(struct f2fs_sb_info *sbi, return -EINVAL; } - err = fscrypt_parse_test_dummy_encryption(¶m, policy); + err = fscrypt_parse_test_dummy_encryption(param, policy); if (err) { if (err == -EEXIST) f2fs_warn(sbi, "Can't change test_dummy_encryption on remount"); else if (err == -EINVAL) f2fs_warn(sbi, "Value of option \"%s\" is unrecognized", - opt); + param->key); else f2fs_warn(sbi, "Error processing option \"%s\" [%d]", - opt, err); + param->key, err); return -EINVAL; } f2fs_warn(sbi, "Test dummy encryption mode enabled"); @@ -799,372 +705,269 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) #endif #endif -static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remount) +static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) { - substring_t args[MAX_OPT_ARGS]; + struct f2fs_sb_info *sbi = fc->s_fs_info; #ifdef CONFIG_F2FS_FS_COMPRESSION unsigned char (*ext)[F2FS_EXTENSION_LEN]; unsigned char (*noext)[F2FS_EXTENSION_LEN]; int ext_cnt, noext_cnt; + char *name; #endif - char *p, *name; - int arg = 0; - kuid_t uid; - kgid_t gid; - int ret; + substring_t args[MAX_OPT_ARGS]; + struct fs_parse_result result; + bool is_remount; + int token, ret, arg; - if (!options) - return 0; + token = fs_parse(fc, f2fs_param_specs, param, &result); + if (token < 0) + return token; - while ((p = strsep(&options, ",")) != NULL) { - int token; + is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; - if (!*p) - continue; - /* - * Initialize args struct so we know whether arg was - * found; some options take optional arguments. - */ - args[0].to = args[0].from = NULL; - token = match_token(p, f2fs_tokens, args); - - switch (token) { - case Opt_gc_background: - name = match_strdup(&args[0]); - - if (!name) - return -ENOMEM; - if (!strcmp(name, "on")) { - F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; - } else if (!strcmp(name, "off")) { - if (f2fs_sb_has_blkzoned(sbi)) { - f2fs_warn(sbi, "zoned devices need bggc"); - kfree(name); - return -EINVAL; - } - F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF; - } else if (!strcmp(name, "sync")) { - F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_disable_roll_forward: - set_opt(sbi, DISABLE_ROLL_FORWARD); - break; - case Opt_norecovery: - /* requires ro mount, checked in f2fs_default_check */ - set_opt(sbi, NORECOVERY); - break; - case Opt_discard: - if (!f2fs_hw_support_discard(sbi)) { - f2fs_warn(sbi, "device does not support discard"); - break; - } - set_opt(sbi, DISCARD); - break; - case Opt_nodiscard: + switch (token) { + case Opt_gc_background: + F2FS_OPTION(sbi).bggc_mode = result.uint_32; + break; + case Opt_disable_roll_forward: + set_opt(sbi, DISABLE_ROLL_FORWARD); + break; + case Opt_norecovery: + /* requires ro mount, checked in f2fs_validate_options */ + set_opt(sbi, NORECOVERY); + break; + case Opt_discard: + if (result.negated) { if (f2fs_hw_should_discard(sbi)) { f2fs_warn(sbi, "discard is required for zoned block devices"); return -EINVAL; } clear_opt(sbi, DISCARD); - break; - case Opt_noheap: - case Opt_heap: - f2fs_warn(sbi, "heap/no_heap options were deprecated"); - break; + } else { + if (!f2fs_hw_support_discard(sbi)) { + f2fs_warn(sbi, "device does not support discard"); + break; + } + set_opt(sbi, DISCARD); + } + break; + case Opt_noheap: + case Opt_heap: + f2fs_warn(sbi, "heap/no_heap options were deprecated"); + break; #ifdef CONFIG_F2FS_FS_XATTR - case Opt_user_xattr: - set_opt(sbi, XATTR_USER); - break; - case Opt_nouser_xattr: + case Opt_user_xattr: + if (result.negated) clear_opt(sbi, XATTR_USER); - break; - case Opt_inline_xattr: - set_opt(sbi, INLINE_XATTR); - break; - case Opt_noinline_xattr: + else + set_opt(sbi, XATTR_USER); + break; + case Opt_inline_xattr: + if (result.negated) clear_opt(sbi, INLINE_XATTR); - break; - case Opt_inline_xattr_size: - if (args->from && match_int(args, &arg)) - return -EINVAL; - set_opt(sbi, INLINE_XATTR_SIZE); - F2FS_OPTION(sbi).inline_xattr_size = arg; - break; + else + set_opt(sbi, INLINE_XATTR); + break; + case Opt_inline_xattr_size: + set_opt(sbi, INLINE_XATTR_SIZE); + F2FS_OPTION(sbi).inline_xattr_size = result.int_32; + break; #else - case Opt_user_xattr: - case Opt_nouser_xattr: - case Opt_inline_xattr: - case Opt_noinline_xattr: - case Opt_inline_xattr_size: - f2fs_info(sbi, "xattr options not supported"); - break; + case Opt_user_xattr: + case Opt_inline_xattr: + case Opt_inline_xattr_size: + f2fs_info(sbi, "%s options not supported", param->key); + break; #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL - case Opt_acl: - set_opt(sbi, POSIX_ACL); - break; - case Opt_noacl: + case Opt_acl: + if (result.negated) clear_opt(sbi, POSIX_ACL); - break; + else + set_opt(sbi, POSIX_ACL); + break; #else - case Opt_acl: - case Opt_noacl: - f2fs_info(sbi, "acl options not supported"); - break; + case Opt_acl: + f2fs_info(sbi, "%s options not supported", param->key); + break; #endif - case Opt_active_logs: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (arg != 2 && arg != 4 && - arg != NR_CURSEG_PERSIST_TYPE) - return -EINVAL; - F2FS_OPTION(sbi).active_logs = arg; - break; - case Opt_disable_ext_identify: - set_opt(sbi, DISABLE_EXT_IDENTIFY); - break; - case Opt_inline_data: + case Opt_active_logs: + if (result.int_32 != 2 && result.int_32 != 4 && + result.int_32 != NR_CURSEG_PERSIST_TYPE) + return -EINVAL; + F2FS_OPTION(sbi).active_logs = result.int_32; + break; + case Opt_disable_ext_identify: + set_opt(sbi, DISABLE_EXT_IDENTIFY); + break; + case Opt_inline_data: + if (result.negated) + clear_opt(sbi, INLINE_DATA); + else set_opt(sbi, INLINE_DATA); - break; - case Opt_inline_dentry: - set_opt(sbi, INLINE_DENTRY); - break; - case Opt_noinline_dentry: + break; + case Opt_inline_dentry: + if (result.negated) clear_opt(sbi, INLINE_DENTRY); - break; - case Opt_flush_merge: - set_opt(sbi, FLUSH_MERGE); - break; - case Opt_noflush_merge: + else + set_opt(sbi, INLINE_DENTRY); + break; + case Opt_flush_merge: + if (result.negated) clear_opt(sbi, FLUSH_MERGE); - break; - case Opt_nobarrier: + else + set_opt(sbi, FLUSH_MERGE); + break; + case Opt_barrier: + if (result.negated) set_opt(sbi, NOBARRIER); - break; - case Opt_barrier: + else clear_opt(sbi, NOBARRIER); - break; - case Opt_fastboot: - set_opt(sbi, FASTBOOT); - break; - case Opt_extent_cache: - set_opt(sbi, READ_EXTENT_CACHE); - break; - case Opt_noextent_cache: + break; + case Opt_fastboot: + set_opt(sbi, FASTBOOT); + break; + case Opt_extent_cache: + if (result.negated) { if (f2fs_sb_has_device_alias(sbi)) { f2fs_err(sbi, "device aliasing requires extent cache"); return -EINVAL; } clear_opt(sbi, READ_EXTENT_CACHE); - break; - case Opt_noinline_data: - clear_opt(sbi, INLINE_DATA); - break; - case Opt_data_flush: - set_opt(sbi, DATA_FLUSH); - break; - case Opt_reserve_root: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (test_opt(sbi, RESERVE_ROOT)) { - f2fs_info(sbi, "Preserve previous reserve_root=%u", - F2FS_OPTION(sbi).root_reserved_blocks); - } else { - F2FS_OPTION(sbi).root_reserved_blocks = arg; - set_opt(sbi, RESERVE_ROOT); - } - break; - case Opt_resuid: - if (args->from && match_int(args, &arg)) - return -EINVAL; - uid = make_kuid(current_user_ns(), arg); - if (!uid_valid(uid)) { - f2fs_err(sbi, "Invalid uid value %d", arg); - return -EINVAL; - } - F2FS_OPTION(sbi).s_resuid = uid; - break; - case Opt_resgid: - if (args->from && match_int(args, &arg)) - return -EINVAL; - gid = make_kgid(current_user_ns(), arg); - if (!gid_valid(gid)) { - f2fs_err(sbi, "Invalid gid value %d", arg); - return -EINVAL; - } - F2FS_OPTION(sbi).s_resgid = gid; - break; - case Opt_mode: - name = match_strdup(&args[0]); - - if (!name) - return -ENOMEM; - if (!strcmp(name, "adaptive")) { - F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; - } else if (!strcmp(name, "lfs")) { - F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; - } else if (!strcmp(name, "fragment:segment")) { - F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_SEG; - } else if (!strcmp(name, "fragment:block")) { - F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_BLK; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; + } else + set_opt(sbi, READ_EXTENT_CACHE); + break; + case Opt_data_flush: + set_opt(sbi, DATA_FLUSH); + break; + case Opt_reserve_root: + if (test_opt(sbi, RESERVE_ROOT)) { + f2fs_info(sbi, "Preserve previous reserve_root=%u", + F2FS_OPTION(sbi).root_reserved_blocks); + } else { + F2FS_OPTION(sbi).root_reserved_blocks = result.int_32; + set_opt(sbi, RESERVE_ROOT); + } + break; + case Opt_resuid: + F2FS_OPTION(sbi).s_resuid = result.uid; + break; + case Opt_resgid: + F2FS_OPTION(sbi).s_resgid = result.gid; + break; + case Opt_mode: + F2FS_OPTION(sbi).fs_mode = result.uint_32; + break; #ifdef CONFIG_F2FS_FAULT_INJECTION - case Opt_fault_injection: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (f2fs_build_fault_attr(sbi, arg, 0, FAULT_RATE)) - return -EINVAL; - set_opt(sbi, FAULT_INJECTION); - break; + case Opt_fault_injection: + if (f2fs_build_fault_attr(sbi, result.int_32, 0, FAULT_RATE)) + return -EINVAL; + set_opt(sbi, FAULT_INJECTION); + break; - case Opt_fault_type: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (f2fs_build_fault_attr(sbi, 0, arg, FAULT_TYPE)) - return -EINVAL; - set_opt(sbi, FAULT_INJECTION); - break; + case Opt_fault_type: + if (f2fs_build_fault_attr(sbi, 0, result.int_32, FAULT_TYPE)) + return -EINVAL; + set_opt(sbi, FAULT_INJECTION); + break; #else - case Opt_fault_injection: - case Opt_fault_type: - f2fs_info(sbi, "fault injection options not supported"); - break; + case Opt_fault_injection: + case Opt_fault_type: + f2fs_info(sbi, "%s options not supported", param->key); + break; #endif - case Opt_lazytime: - set_opt(sbi, LAZYTIME); - break; - case Opt_nolazytime: + case Opt_lazytime: + if (result.negated) clear_opt(sbi, LAZYTIME); - break; + else + set_opt(sbi, LAZYTIME); + break; #ifdef CONFIG_QUOTA - case Opt_quota: - case Opt_usrquota: - set_opt(sbi, USRQUOTA); - break; - case Opt_grpquota: - set_opt(sbi, GRPQUOTA); - break; - case Opt_prjquota: - set_opt(sbi, PRJQUOTA); - break; - case Opt_usrjquota: - ret = f2fs_set_qf_name(sbi, USRQUOTA, &args[0]); - if (ret) - return ret; - break; - case Opt_grpjquota: - ret = f2fs_set_qf_name(sbi, GRPQUOTA, &args[0]); - if (ret) - return ret; - break; - case Opt_prjjquota: - ret = f2fs_set_qf_name(sbi, PRJQUOTA, &args[0]); - if (ret) - return ret; - break; - case Opt_offusrjquota: - ret = f2fs_clear_qf_name(sbi, USRQUOTA); - if (ret) - return ret; - break; - case Opt_offgrpjquota: - ret = f2fs_clear_qf_name(sbi, GRPQUOTA); - if (ret) - return ret; - break; - case Opt_offprjjquota: - ret = f2fs_clear_qf_name(sbi, PRJQUOTA); - if (ret) - return ret; - break; - case Opt_jqfmt_vfsold: - F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_OLD; - break; - case Opt_jqfmt_vfsv0: - F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V0; - break; - case Opt_jqfmt_vfsv1: - F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V1; - break; - case Opt_noquota: + case Opt_quota: + if (result.negated) { clear_opt(sbi, QUOTA); clear_opt(sbi, USRQUOTA); clear_opt(sbi, GRPQUOTA); clear_opt(sbi, PRJQUOTA); - break; + } else + set_opt(sbi, USRQUOTA); + break; + case Opt_usrquota: + set_opt(sbi, USRQUOTA); + break; + case Opt_grpquota: + set_opt(sbi, GRPQUOTA); + break; + case Opt_prjquota: + set_opt(sbi, PRJQUOTA); + break; + case Opt_usrjquota: + if (!*param->string) + ret = f2fs_clear_qf_name(sbi, USRQUOTA); + else + ret = f2fs_set_qf_name(sbi, USRQUOTA, param); + if (ret) + return ret; + break; + case Opt_grpjquota: + if (!*param->string) + ret = f2fs_clear_qf_name(sbi, GRPQUOTA); + else + ret = f2fs_set_qf_name(sbi, GRPQUOTA, param); + if (ret) + return ret; + break; + case Opt_prjjquota: + if (!*param->string) + ret = f2fs_clear_qf_name(sbi, PRJQUOTA); + else + ret = f2fs_set_qf_name(sbi, PRJQUOTA, param); + if (ret) + return ret; + break; + case Opt_jqfmt: + F2FS_OPTION(sbi).s_jquota_fmt = result.uint_32; + break; #else - case Opt_quota: - case Opt_usrquota: - case Opt_grpquota: - case Opt_prjquota: - case Opt_usrjquota: - case Opt_grpjquota: - case Opt_prjjquota: - case Opt_offusrjquota: - case Opt_offgrpjquota: - case Opt_offprjjquota: - case Opt_jqfmt_vfsold: - case Opt_jqfmt_vfsv0: - case Opt_jqfmt_vfsv1: - case Opt_noquota: - f2fs_info(sbi, "quota operations not supported"); - break; + case Opt_quota: + case Opt_usrquota: + case Opt_grpquota: + case Opt_prjquota: + case Opt_usrjquota: + case Opt_grpjquota: + case Opt_prjjquota: + f2fs_info(sbi, "quota operations not supported"); + break; #endif - case Opt_alloc: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - - if (!strcmp(name, "default")) { - F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; - } else if (!strcmp(name, "reuse")) { - F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_fsync: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "posix")) { - F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; - } else if (!strcmp(name, "strict")) { - F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT; - } else if (!strcmp(name, "nobarrier")) { - F2FS_OPTION(sbi).fsync_mode = - FSYNC_MODE_NOBARRIER; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_test_dummy_encryption: - ret = f2fs_set_test_dummy_encryption(sbi, p, &args[0], - is_remount); - if (ret) - return ret; - break; - case Opt_inlinecrypt: + case Opt_alloc: + F2FS_OPTION(sbi).alloc_mode = result.uint_32; + break; + case Opt_fsync: + F2FS_OPTION(sbi).fsync_mode = result.uint_32; + break; + case Opt_test_dummy_encryption: + ret = f2fs_set_test_dummy_encryption(sbi, param, is_remount); + if (ret) + return ret; + break; + case Opt_inlinecrypt: #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT - set_opt(sbi, INLINECRYPT); + set_opt(sbi, INLINECRYPT); #else - f2fs_info(sbi, "inline encryption not supported"); + f2fs_info(sbi, "inline encryption not supported"); #endif - break; + break; + case Opt_checkpoint: + /* + * Initialize args struct so we know whether arg was + * found; some options take optional arguments. + */ + args[0].from = args[0].to = NULL; + arg = 0; + + /* revert to match_table for checkpoint= options */ + token = match_token(param->string, f2fs_checkpoint_tokens, args); + switch (token) { case Opt_checkpoint_disable_cap_perc: if (args->from && match_int(args, &arg)) return -EINVAL; @@ -1185,270 +988,225 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remoun case Opt_checkpoint_enable: clear_opt(sbi, DISABLE_CHECKPOINT); break; - case Opt_checkpoint_merge: - set_opt(sbi, MERGE_CHECKPOINT); - break; - case Opt_nocheckpoint_merge: - clear_opt(sbi, MERGE_CHECKPOINT); - break; -#ifdef CONFIG_F2FS_FS_COMPRESSION - case Opt_compress_algorithm: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "lzo")) { -#ifdef CONFIG_F2FS_FS_LZO - F2FS_OPTION(sbi).compress_level = 0; - F2FS_OPTION(sbi).compress_algorithm = - COMPRESS_LZO; -#else - f2fs_info(sbi, "kernel doesn't support lzo compression"); -#endif - } else if (!strncmp(name, "lz4", 3)) { -#ifdef CONFIG_F2FS_FS_LZ4 - ret = f2fs_set_lz4hc_level(sbi, name); - if (ret) { - kfree(name); - return -EINVAL; - } - F2FS_OPTION(sbi).compress_algorithm = - COMPRESS_LZ4; -#else - f2fs_info(sbi, "kernel doesn't support lz4 compression"); -#endif - } else if (!strncmp(name, "zstd", 4)) { -#ifdef CONFIG_F2FS_FS_ZSTD - ret = f2fs_set_zstd_level(sbi, name); - if (ret) { - kfree(name); - return -EINVAL; - } - F2FS_OPTION(sbi).compress_algorithm = - COMPRESS_ZSTD; -#else - f2fs_info(sbi, "kernel doesn't support zstd compression"); -#endif - } else if (!strcmp(name, "lzo-rle")) { -#ifdef CONFIG_F2FS_FS_LZORLE - F2FS_OPTION(sbi).compress_level = 0; - F2FS_OPTION(sbi).compress_algorithm = - COMPRESS_LZORLE; -#else - f2fs_info(sbi, "kernel doesn't support lzorle compression"); -#endif - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_compress_log_size: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (arg < MIN_COMPRESS_LOG_SIZE || - arg > MAX_COMPRESS_LOG_SIZE) { - f2fs_err(sbi, - "Compress cluster log size is out of range"); - return -EINVAL; - } - F2FS_OPTION(sbi).compress_log_size = arg; - break; - case Opt_compress_extension: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - - ext = F2FS_OPTION(sbi).extensions; - ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; - - if (strlen(name) >= F2FS_EXTENSION_LEN || - ext_cnt >= COMPRESS_EXT_NUM) { - f2fs_err(sbi, - "invalid extension length/number"); - kfree(name); - return -EINVAL; - } - - if (is_compress_extension_exist(sbi, name, true)) { - kfree(name); - break; - } - - ret = strscpy(ext[ext_cnt], name); - if (ret < 0) { - kfree(name); - return ret; - } - F2FS_OPTION(sbi).compress_ext_cnt++; - kfree(name); - break; - case Opt_nocompress_extension: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - - noext = F2FS_OPTION(sbi).noextensions; - noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; - - if (strlen(name) >= F2FS_EXTENSION_LEN || - noext_cnt >= COMPRESS_EXT_NUM) { - f2fs_err(sbi, - "invalid extension length/number"); - kfree(name); - return -EINVAL; - } - - if (is_compress_extension_exist(sbi, name, false)) { - kfree(name); - break; - } - - ret = strscpy(noext[noext_cnt], name); - if (ret < 0) { - kfree(name); - return ret; - } - F2FS_OPTION(sbi).nocompress_ext_cnt++; - kfree(name); - break; - case Opt_compress_chksum: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - F2FS_OPTION(sbi).compress_chksum = true; - break; - case Opt_compress_mode: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "fs")) { - F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS; - } else if (!strcmp(name, "user")) { - F2FS_OPTION(sbi).compress_mode = COMPR_MODE_USER; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_compress_cache: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - set_opt(sbi, COMPRESS_CACHE); - break; -#else - case Opt_compress_algorithm: - case Opt_compress_log_size: - case Opt_compress_extension: - case Opt_nocompress_extension: - case Opt_compress_chksum: - case Opt_compress_mode: - case Opt_compress_cache: - f2fs_info(sbi, "compression options not supported"); - break; -#endif - case Opt_atgc: - set_opt(sbi, ATGC); - break; - case Opt_gc_merge: - set_opt(sbi, GC_MERGE); - break; - case Opt_nogc_merge: - clear_opt(sbi, GC_MERGE); - break; - case Opt_discard_unit: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "block")) { - F2FS_OPTION(sbi).discard_unit = - DISCARD_UNIT_BLOCK; - } else if (!strcmp(name, "segment")) { - F2FS_OPTION(sbi).discard_unit = - DISCARD_UNIT_SEGMENT; - } else if (!strcmp(name, "section")) { - F2FS_OPTION(sbi).discard_unit = - DISCARD_UNIT_SECTION; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_memory_mode: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "normal")) { - F2FS_OPTION(sbi).memory_mode = - MEMORY_MODE_NORMAL; - } else if (!strcmp(name, "low")) { - F2FS_OPTION(sbi).memory_mode = - MEMORY_MODE_LOW; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_age_extent_cache: - set_opt(sbi, AGE_EXTENT_CACHE); - break; - case Opt_errors: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "remount-ro")) { - F2FS_OPTION(sbi).errors = - MOUNT_ERRORS_READONLY; - } else if (!strcmp(name, "continue")) { - F2FS_OPTION(sbi).errors = - MOUNT_ERRORS_CONTINUE; - } else if (!strcmp(name, "panic")) { - F2FS_OPTION(sbi).errors = - MOUNT_ERRORS_PANIC; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_nat_bits: - set_opt(sbi, NAT_BITS); - break; default: - f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", - p); return -EINVAL; } + break; + case Opt_checkpoint_merge: + if (result.negated) + clear_opt(sbi, MERGE_CHECKPOINT); + else + set_opt(sbi, MERGE_CHECKPOINT); + break; +#ifdef CONFIG_F2FS_FS_COMPRESSION + case Opt_compress_algorithm: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_info(sbi, "Image doesn't support compression"); + break; + } + name = param->string; + if (!strcmp(name, "lzo")) { +#ifdef CONFIG_F2FS_FS_LZO + F2FS_OPTION(sbi).compress_level = 0; + F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZO; +#else + f2fs_info(sbi, "kernel doesn't support lzo compression"); +#endif + } else if (!strncmp(name, "lz4", 3)) { +#ifdef CONFIG_F2FS_FS_LZ4 + ret = f2fs_set_lz4hc_level(sbi, name); + if (ret) + return -EINVAL; + F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; +#else + f2fs_info(sbi, "kernel doesn't support lz4 compression"); +#endif + } else if (!strncmp(name, "zstd", 4)) { +#ifdef CONFIG_F2FS_FS_ZSTD + ret = f2fs_set_zstd_level(sbi, name); + if (ret) + return -EINVAL; + F2FS_OPTION(sbi).compress_algorithm = COMPRESS_ZSTD; +#else + f2fs_info(sbi, "kernel doesn't support zstd compression"); +#endif + } else if (!strcmp(name, "lzo-rle")) { +#ifdef CONFIG_F2FS_FS_LZORLE + F2FS_OPTION(sbi).compress_level = 0; + F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZORLE; +#else + f2fs_info(sbi, "kernel doesn't support lzorle compression"); +#endif + } else + return -EINVAL; + break; + case Opt_compress_log_size: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_info(sbi, "Image doesn't support compression"); + break; + } + if (result.uint_32 < MIN_COMPRESS_LOG_SIZE || + result.uint_32 > MAX_COMPRESS_LOG_SIZE) { + f2fs_err(sbi, + "Compress cluster log size is out of range"); + return -EINVAL; + } + F2FS_OPTION(sbi).compress_log_size = result.uint_32; + break; + case Opt_compress_extension: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_info(sbi, "Image doesn't support compression"); + break; + } + name = param->string; + ext = F2FS_OPTION(sbi).extensions; + ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + + if (strlen(name) >= F2FS_EXTENSION_LEN || + ext_cnt >= COMPRESS_EXT_NUM) { + f2fs_err(sbi, "invalid extension length/number"); + return -EINVAL; + } + + if (is_compress_extension_exist(sbi, name, true)) + break; + + ret = strscpy(ext[ext_cnt], name, F2FS_EXTENSION_LEN); + if (ret < 0) + return ret; + F2FS_OPTION(sbi).compress_ext_cnt++; + break; + case Opt_nocompress_extension: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_info(sbi, "Image doesn't support compression"); + break; + } + name = param->string; + noext = F2FS_OPTION(sbi).noextensions; + noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + + if (strlen(name) >= F2FS_EXTENSION_LEN || + noext_cnt >= COMPRESS_EXT_NUM) { + f2fs_err(sbi, "invalid extension length/number"); + return -EINVAL; + } + + if (is_compress_extension_exist(sbi, name, false)) + break; + + ret = strscpy(noext[noext_cnt], name, F2FS_EXTENSION_LEN); + if (ret < 0) + return ret; + F2FS_OPTION(sbi).nocompress_ext_cnt++; + break; + case Opt_compress_chksum: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_info(sbi, "Image doesn't support compression"); + break; + } + F2FS_OPTION(sbi).compress_chksum = true; + break; + case Opt_compress_mode: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_info(sbi, "Image doesn't support compression"); + break; + } + F2FS_OPTION(sbi).compress_mode = result.uint_32; + break; + case Opt_compress_cache: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_info(sbi, "Image doesn't support compression"); + break; + } + set_opt(sbi, COMPRESS_CACHE); + break; +#else + case Opt_compress_algorithm: + case Opt_compress_log_size: + case Opt_compress_extension: + case Opt_nocompress_extension: + case Opt_compress_chksum: + case Opt_compress_mode: + case Opt_compress_cache: + f2fs_info(sbi, "compression options not supported"); + break; +#endif + case Opt_atgc: + set_opt(sbi, ATGC); + break; + case Opt_gc_merge: + if (result.negated) + clear_opt(sbi, GC_MERGE); + else + set_opt(sbi, GC_MERGE); + break; + case Opt_discard_unit: + F2FS_OPTION(sbi).discard_unit = result.uint_32; + break; + case Opt_memory_mode: + F2FS_OPTION(sbi).memory_mode = result.uint_32; + break; + case Opt_age_extent_cache: + set_opt(sbi, AGE_EXTENT_CACHE); + break; + case Opt_errors: + F2FS_OPTION(sbi).errors = result.uint_32; + break; + case Opt_nat_bits: + set_opt(sbi, NAT_BITS); + break; + } + return 0; +} + +static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remount) +{ + struct fs_parameter param; + struct fs_context fc; + char *key; + int ret; + + if (!options) + return 0; + + memset(&fc, 0, sizeof(fc)); + fc.s_fs_info = sbi; + if (is_remount) + fc.purpose = FS_CONTEXT_FOR_RECONFIGURE; + + while ((key = strsep(&options, ",")) != NULL) { + if (*key) { + size_t v_len = 0; + char *value = strchr(key, '='); + + param.type = fs_value_is_flag; + param.string = NULL; + + if (value) { + if (value == key) + continue; + + *value++ = 0; + v_len = strlen(value); + param.string = kmemdup_nul(value, v_len, GFP_KERNEL); + if (!param.string) + return -ENOMEM; + param.type = fs_value_is_string; + } + + param.key = key; + param.size = v_len; + + ret = handle_mount_opt(&fc, ¶m); + kfree(param.string); + if (ret < 0) + return ret; + } } return 0; } -static int f2fs_default_check(struct f2fs_sb_info *sbi) +static int f2fs_validate_options(struct f2fs_sb_info *sbi) { #ifdef CONFIG_QUOTA if (f2fs_check_quota_options(sbi)) @@ -2527,7 +2285,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) } #endif - err = f2fs_default_check(sbi); + err = f2fs_validate_options(sbi); if (err) goto restore_opts; @@ -4726,7 +4484,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto free_options; - err = f2fs_default_check(sbi); + err = f2fs_validate_options(sbi); if (err) goto free_options; From 19c4b380f23e5a445cfc9e922c996784990d218c Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 10 Jul 2025 12:14:11 +0000 Subject: [PATCH 0911/2411] f2fs: Allow sbi to be NULL in f2fs_printk At the parsing phase of the new mount api, sbi will not be available. So here allows sbi to be NULL in f2fs log helpers and use that in handle_mount_opt(). Signed-off-by: Hongbo Li [sandeen: forward port] Signed-off-by: Eric Sandeen Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 90 +++++++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 41 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index fddd33b1118c..4f0cd790a24e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -325,11 +325,19 @@ void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, vaf.fmt = printk_skip_level(fmt); vaf.va = &args; if (limit_rate) - printk_ratelimited("%c%cF2FS-fs (%s): %pV\n", - KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + if (sbi) + printk_ratelimited("%c%cF2FS-fs (%s): %pV\n", + KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + else + printk_ratelimited("%c%cF2FS-fs: %pV\n", + KERN_SOH_ASCII, level, &vaf); else - printk("%c%cF2FS-fs (%s): %pV\n", - KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + if (sbi) + printk("%c%cF2FS-fs (%s): %pV\n", + KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + else + printk("%c%cF2FS-fs: %pV\n", + KERN_SOH_ASCII, level, &vaf); va_end(args); } @@ -739,13 +747,13 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) case Opt_discard: if (result.negated) { if (f2fs_hw_should_discard(sbi)) { - f2fs_warn(sbi, "discard is required for zoned block devices"); + f2fs_warn(NULL, "discard is required for zoned block devices"); return -EINVAL; } clear_opt(sbi, DISCARD); } else { if (!f2fs_hw_support_discard(sbi)) { - f2fs_warn(sbi, "device does not support discard"); + f2fs_warn(NULL, "device does not support discard"); break; } set_opt(sbi, DISCARD); @@ -753,7 +761,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; case Opt_noheap: case Opt_heap: - f2fs_warn(sbi, "heap/no_heap options were deprecated"); + f2fs_warn(NULL, "heap/no_heap options were deprecated"); break; #ifdef CONFIG_F2FS_FS_XATTR case Opt_user_xattr: @@ -776,7 +784,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) case Opt_user_xattr: case Opt_inline_xattr: case Opt_inline_xattr_size: - f2fs_info(sbi, "%s options not supported", param->key); + f2fs_info(NULL, "%s options not supported", param->key); break; #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL @@ -788,7 +796,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; #else case Opt_acl: - f2fs_info(sbi, "%s options not supported", param->key); + f2fs_info(NULL, "%s options not supported", param->key); break; #endif case Opt_active_logs: @@ -842,7 +850,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; case Opt_reserve_root: if (test_opt(sbi, RESERVE_ROOT)) { - f2fs_info(sbi, "Preserve previous reserve_root=%u", + f2fs_info(NULL, "Preserve previous reserve_root=%u", F2FS_OPTION(sbi).root_reserved_blocks); } else { F2FS_OPTION(sbi).root_reserved_blocks = result.int_32; @@ -873,7 +881,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) #else case Opt_fault_injection: case Opt_fault_type: - f2fs_info(sbi, "%s options not supported", param->key); + f2fs_info(NULL, "%s options not supported", param->key); break; #endif case Opt_lazytime: @@ -936,7 +944,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) case Opt_usrjquota: case Opt_grpjquota: case Opt_prjjquota: - f2fs_info(sbi, "quota operations not supported"); + f2fs_info(NULL, "quota operations not supported"); break; #endif case Opt_alloc: @@ -954,7 +962,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT set_opt(sbi, INLINECRYPT); #else - f2fs_info(sbi, "inline encryption not supported"); + f2fs_info(NULL, "inline encryption not supported"); #endif break; case Opt_checkpoint: @@ -1001,7 +1009,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) #ifdef CONFIG_F2FS_FS_COMPRESSION case Opt_compress_algorithm: if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); + f2fs_info(NULL, "Image doesn't support compression"); break; } name = param->string; @@ -1010,7 +1018,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) F2FS_OPTION(sbi).compress_level = 0; F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZO; #else - f2fs_info(sbi, "kernel doesn't support lzo compression"); + f2fs_info(NULL, "kernel doesn't support lzo compression"); #endif } else if (!strncmp(name, "lz4", 3)) { #ifdef CONFIG_F2FS_FS_LZ4 @@ -1019,7 +1027,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) return -EINVAL; F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; #else - f2fs_info(sbi, "kernel doesn't support lz4 compression"); + f2fs_info(NULL, "kernel doesn't support lz4 compression"); #endif } else if (!strncmp(name, "zstd", 4)) { #ifdef CONFIG_F2FS_FS_ZSTD @@ -1028,26 +1036,26 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) return -EINVAL; F2FS_OPTION(sbi).compress_algorithm = COMPRESS_ZSTD; #else - f2fs_info(sbi, "kernel doesn't support zstd compression"); + f2fs_info(NULL, "kernel doesn't support zstd compression"); #endif } else if (!strcmp(name, "lzo-rle")) { #ifdef CONFIG_F2FS_FS_LZORLE F2FS_OPTION(sbi).compress_level = 0; F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZORLE; #else - f2fs_info(sbi, "kernel doesn't support lzorle compression"); + f2fs_info(NULL, "kernel doesn't support lzorle compression"); #endif } else return -EINVAL; break; case Opt_compress_log_size: if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); + f2fs_info(NULL, "Image doesn't support compression"); break; } if (result.uint_32 < MIN_COMPRESS_LOG_SIZE || result.uint_32 > MAX_COMPRESS_LOG_SIZE) { - f2fs_err(sbi, + f2fs_err(NULL, "Compress cluster log size is out of range"); return -EINVAL; } @@ -1055,7 +1063,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; case Opt_compress_extension: if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); + f2fs_info(NULL, "Image doesn't support compression"); break; } name = param->string; @@ -1064,7 +1072,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) if (strlen(name) >= F2FS_EXTENSION_LEN || ext_cnt >= COMPRESS_EXT_NUM) { - f2fs_err(sbi, "invalid extension length/number"); + f2fs_err(NULL, "invalid extension length/number"); return -EINVAL; } @@ -1078,7 +1086,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; case Opt_nocompress_extension: if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); + f2fs_info(NULL, "Image doesn't support compression"); break; } name = param->string; @@ -1087,7 +1095,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) if (strlen(name) >= F2FS_EXTENSION_LEN || noext_cnt >= COMPRESS_EXT_NUM) { - f2fs_err(sbi, "invalid extension length/number"); + f2fs_err(NULL, "invalid extension length/number"); return -EINVAL; } @@ -1101,21 +1109,21 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; case Opt_compress_chksum: if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); + f2fs_info(NULL, "Image doesn't support compression"); break; } F2FS_OPTION(sbi).compress_chksum = true; break; case Opt_compress_mode: if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); + f2fs_info(NULL, "Image doesn't support compression"); break; } F2FS_OPTION(sbi).compress_mode = result.uint_32; break; case Opt_compress_cache: if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); + f2fs_info(NULL, "Image doesn't support compression"); break; } set_opt(sbi, COMPRESS_CACHE); @@ -1128,7 +1136,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) case Opt_compress_chksum: case Opt_compress_mode: case Opt_compress_cache: - f2fs_info(sbi, "compression options not supported"); + f2fs_info(NULL, "compression options not supported"); break; #endif case Opt_atgc: @@ -1213,17 +1221,17 @@ static int f2fs_validate_options(struct f2fs_sb_info *sbi) return -EINVAL; #else if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + f2fs_info(NULL, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA"); return -EINVAL; } if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + f2fs_err(NULL, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA"); return -EINVAL; } #endif if (!IS_ENABLED(CONFIG_UNICODE) && f2fs_sb_has_casefold(sbi)) { - f2fs_err(sbi, + f2fs_err(NULL, "Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE"); return -EINVAL; } @@ -1237,24 +1245,24 @@ static int f2fs_validate_options(struct f2fs_sb_info *sbi) #ifdef CONFIG_BLK_DEV_ZONED if (F2FS_OPTION(sbi).discard_unit != DISCARD_UNIT_SECTION) { - f2fs_info(sbi, "Zoned block device doesn't need small discard, set discard_unit=section by default"); + f2fs_info(NULL, "Zoned block device doesn't need small discard, set discard_unit=section by default"); F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_SECTION; } if (F2FS_OPTION(sbi).fs_mode != FS_MODE_LFS) { - f2fs_info(sbi, "Only lfs mode is allowed with zoned block device feature"); + f2fs_info(NULL, "Only lfs mode is allowed with zoned block device feature"); return -EINVAL; } #else - f2fs_err(sbi, "Zoned block device support is not enabled"); + f2fs_err(NULL, "Zoned block device support is not enabled"); return -EINVAL; #endif } #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_test_compress_extension(sbi)) { - f2fs_err(sbi, "invalid compress or nocompress extension"); + f2fs_err(NULL, "invalid compress or nocompress extension"); return -EINVAL; } #endif @@ -1264,11 +1272,11 @@ static int f2fs_validate_options(struct f2fs_sb_info *sbi) if (!f2fs_sb_has_extra_attr(sbi) || !f2fs_sb_has_flexible_inline_xattr(sbi)) { - f2fs_err(sbi, "extra_attr or flexible_inline_xattr feature is off"); + f2fs_err(NULL, "extra_attr or flexible_inline_xattr feature is off"); return -EINVAL; } if (!test_opt(sbi, INLINE_XATTR)) { - f2fs_err(sbi, "inline_xattr_size option should be set with inline_xattr option"); + f2fs_err(NULL, "inline_xattr_size option should be set with inline_xattr option"); return -EINVAL; } @@ -1277,24 +1285,24 @@ static int f2fs_validate_options(struct f2fs_sb_info *sbi) if (F2FS_OPTION(sbi).inline_xattr_size < min_size || F2FS_OPTION(sbi).inline_xattr_size > max_size) { - f2fs_err(sbi, "inline xattr size is out of range: %d ~ %d", + f2fs_err(NULL, "inline xattr size is out of range: %d ~ %d", min_size, max_size); return -EINVAL; } } if (test_opt(sbi, ATGC) && f2fs_lfs_mode(sbi)) { - f2fs_err(sbi, "LFS is not compatible with ATGC"); + f2fs_err(NULL, "LFS is not compatible with ATGC"); return -EINVAL; } if (f2fs_is_readonly(sbi) && test_opt(sbi, FLUSH_MERGE)) { - f2fs_err(sbi, "FLUSH_MERGE not compatible with readonly mode"); + f2fs_err(NULL, "FLUSH_MERGE not compatible with readonly mode"); return -EINVAL; } if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_err(sbi, "Allow to mount readonly mode only"); + f2fs_err(NULL, "Allow to mount readonly mode only"); return -EROFS; } From 1a9094b10cf7339e4aa8d8c004534200968b558c Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 10 Jul 2025 12:14:12 +0000 Subject: [PATCH 0912/2411] f2fs: Add f2fs_fs_context to record the mount options At the parsing phase of mouont in the new mount api, options value will be recorded with the context, and then it will be used in fill_super and other helpers. Note that, this is a temporary status, we want remove the sb and sbi usages in handle_mount_opt. So here the f2fs_fs_context only records the mount options, it will be copied in sb/sbi in later process. (At this point in the series, mount options are temporarily not set during mount.) Signed-off-by: Hongbo Li [sandeen: forward port, minor fixes and updates] Signed-off-by: Eric Sandeen [hongbo: minor cleanup] Signed-off-by: Hongbo Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 410 +++++++++++++++++++++++++++--------------------- 1 file changed, 235 insertions(+), 175 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4f0cd790a24e..c84425771f0e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -312,8 +312,56 @@ static match_table_t f2fs_checkpoint_tokens = { {Opt_err, NULL}, }; +#define F2FS_SPEC_background_gc (1 << 0) +#define F2FS_SPEC_inline_xattr_size (1 << 1) +#define F2FS_SPEC_active_logs (1 << 2) +#define F2FS_SPEC_reserve_root (1 << 3) +#define F2FS_SPEC_resgid (1 << 4) +#define F2FS_SPEC_resuid (1 << 5) +#define F2FS_SPEC_mode (1 << 6) +#define F2FS_SPEC_fault_injection (1 << 7) +#define F2FS_SPEC_fault_type (1 << 8) +#define F2FS_SPEC_jqfmt (1 << 9) +#define F2FS_SPEC_alloc_mode (1 << 10) +#define F2FS_SPEC_fsync_mode (1 << 11) +#define F2FS_SPEC_checkpoint_disable_cap (1 << 12) +#define F2FS_SPEC_checkpoint_disable_cap_perc (1 << 13) +#define F2FS_SPEC_compress_level (1 << 14) +#define F2FS_SPEC_compress_algorithm (1 << 15) +#define F2FS_SPEC_compress_log_size (1 << 16) +#define F2FS_SPEC_compress_extension (1 << 17) +#define F2FS_SPEC_nocompress_extension (1 << 18) +#define F2FS_SPEC_compress_chksum (1 << 19) +#define F2FS_SPEC_compress_mode (1 << 20) +#define F2FS_SPEC_discard_unit (1 << 21) +#define F2FS_SPEC_memory_mode (1 << 22) +#define F2FS_SPEC_errors (1 << 23) + +struct f2fs_fs_context { + struct f2fs_mount_info info; + unsigned int opt_mask; /* Bits changed */ + unsigned int spec_mask; + unsigned short qname_mask; +}; + +#define F2FS_CTX_INFO(ctx) ((ctx)->info) + +static inline void ctx_set_opt(struct f2fs_fs_context *ctx, + unsigned int flag) +{ + ctx->info.opt |= flag; + ctx->opt_mask |= flag; +} + +static inline void ctx_clear_opt(struct f2fs_fs_context *ctx, + unsigned int flag) +{ + ctx->info.opt &= ~flag; + ctx->opt_mask |= flag; +} + void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, - const char *fmt, ...) + const char *fmt, ...) { struct va_format vaf; va_list args; @@ -431,57 +479,51 @@ static void init_once(void *foo) #ifdef CONFIG_QUOTA static const char * const quotatypes[] = INITQFNAMES; #define QTYPE2NAME(t) (quotatypes[t]) -static int f2fs_set_qf_name(struct f2fs_sb_info *sbi, int qtype, - struct fs_parameter *param) +/* + * Note the name of the specified quota file. + */ +static int f2fs_note_qf_name(struct fs_context *fc, int qtype, + struct fs_parameter *param) { - struct super_block *sb = sbi->sb; + struct f2fs_fs_context *ctx = fc->fs_private; char *qname; - int ret = -EINVAL; - if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) { - f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); + if (param->size < 1) { + f2fs_err(NULL, "Missing quota name"); return -EINVAL; } - if (f2fs_sb_has_quota_ino(sbi)) { - f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name"); + if (strchr(param->string, '/')) { + f2fs_err(NULL, "quotafile must be on filesystem root"); + return -EINVAL; + } + if (ctx->info.s_qf_names[qtype]) { + if (strcmp(ctx->info.s_qf_names[qtype], param->string) != 0) { + f2fs_err(NULL, "Quota file already specified"); + return -EINVAL; + } return 0; } qname = kmemdup_nul(param->string, param->size, GFP_KERNEL); if (!qname) { - f2fs_err(sbi, "Not enough memory for storing quotafile name"); + f2fs_err(NULL, "Not enough memory for storing quotafile name"); return -ENOMEM; } - if (F2FS_OPTION(sbi).s_qf_names[qtype]) { - if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0) - ret = 0; - else - f2fs_err(sbi, "%s quota file already specified", - QTYPE2NAME(qtype)); - goto errout; - } - if (strchr(qname, '/')) { - f2fs_err(sbi, "quotafile must be on filesystem root"); - goto errout; - } - F2FS_OPTION(sbi).s_qf_names[qtype] = qname; - set_opt(sbi, QUOTA); + F2FS_CTX_INFO(ctx).s_qf_names[qtype] = qname; + ctx->qname_mask |= 1 << qtype; return 0; -errout: - kfree(qname); - return ret; } -static int f2fs_clear_qf_name(struct f2fs_sb_info *sbi, int qtype) +/* + * Clear the name of the specified quota file. + */ +static int f2fs_unnote_qf_name(struct fs_context *fc, int qtype) { - struct super_block *sb = sbi->sb; + struct f2fs_fs_context *ctx = fc->fs_private; - if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) { - f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); - return -EINVAL; - } - kfree(F2FS_OPTION(sbi).s_qf_names[qtype]); - F2FS_OPTION(sbi).s_qf_names[qtype] = NULL; + kfree(ctx->info.s_qf_names[qtype]); + ctx->info.s_qf_names[qtype] = NULL; + ctx->qname_mask |= 1 << qtype; return 0; } @@ -531,54 +573,33 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) } #endif -static int f2fs_set_test_dummy_encryption(struct f2fs_sb_info *sbi, - const struct fs_parameter *param, - bool is_remount) +static int f2fs_parse_test_dummy_encryption(const struct fs_parameter *param, + struct f2fs_fs_context *ctx) { - struct fscrypt_dummy_policy *policy = - &F2FS_OPTION(sbi).dummy_enc_policy; int err; if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) { - f2fs_warn(sbi, "test_dummy_encryption option not supported"); + f2fs_warn(NULL, "test_dummy_encryption option not supported"); return -EINVAL; } - - if (!f2fs_sb_has_encrypt(sbi)) { - f2fs_err(sbi, "Encrypt feature is off"); - return -EINVAL; - } - - /* - * This mount option is just for testing, and it's not worthwhile to - * implement the extra complexity (e.g. RCU protection) that would be - * needed to allow it to be set or changed during remount. We do allow - * it to be specified during remount, but only if there is no change. - */ - if (is_remount && !fscrypt_is_dummy_policy_set(policy)) { - f2fs_warn(sbi, "Can't set test_dummy_encryption on remount"); - return -EINVAL; - } - - err = fscrypt_parse_test_dummy_encryption(param, policy); + err = fscrypt_parse_test_dummy_encryption(param, + &ctx->info.dummy_enc_policy); if (err) { - if (err == -EEXIST) - f2fs_warn(sbi, - "Can't change test_dummy_encryption on remount"); - else if (err == -EINVAL) - f2fs_warn(sbi, "Value of option \"%s\" is unrecognized", + if (err == -EINVAL) + f2fs_warn(NULL, "Value of option \"%s\" is unrecognized", param->key); + else if (err == -EEXIST) + f2fs_warn(NULL, "Conflicting test_dummy_encryption options"); else - f2fs_warn(sbi, "Error processing option \"%s\" [%d]", + f2fs_warn(NULL, "Error processing option \"%s\" [%d]", param->key, err); return -EINVAL; } - f2fs_warn(sbi, "Test dummy encryption mode enabled"); return 0; } #ifdef CONFIG_F2FS_FS_COMPRESSION -static bool is_compress_extension_exist(struct f2fs_sb_info *sbi, +static bool is_compress_extension_exist(struct f2fs_mount_info *info, const char *new_ext, bool is_ext) { unsigned char (*ext)[F2FS_EXTENSION_LEN]; @@ -586,11 +607,11 @@ static bool is_compress_extension_exist(struct f2fs_sb_info *sbi, int i; if (is_ext) { - ext = F2FS_OPTION(sbi).extensions; - ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + ext = info->extensions; + ext_cnt = info->compress_ext_cnt; } else { - ext = F2FS_OPTION(sbi).noextensions; - ext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + ext = info->noextensions; + ext_cnt = info->nocompress_ext_cnt; } for (i = 0; i < ext_cnt; i++) { @@ -639,58 +660,62 @@ static int f2fs_test_compress_extension(struct f2fs_sb_info *sbi) } #ifdef CONFIG_F2FS_FS_LZ4 -static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str) +static int f2fs_set_lz4hc_level(struct f2fs_fs_context *ctx, const char *str) { #ifdef CONFIG_F2FS_FS_LZ4HC unsigned int level; if (strlen(str) == 3) { - F2FS_OPTION(sbi).compress_level = 0; + F2FS_CTX_INFO(ctx).compress_level = 0; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; } str += 3; if (str[0] != ':') { - f2fs_info(sbi, "wrong format, e.g. :"); + f2fs_info(NULL, "wrong format, e.g. :"); return -EINVAL; } if (kstrtouint(str + 1, 10, &level)) return -EINVAL; if (!f2fs_is_compress_level_valid(COMPRESS_LZ4, level)) { - f2fs_info(sbi, "invalid lz4hc compress level: %d", level); + f2fs_info(NULL, "invalid lz4hc compress level: %d", level); return -EINVAL; } - F2FS_OPTION(sbi).compress_level = level; + F2FS_CTX_INFO(ctx).compress_level = level; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; #else if (strlen(str) == 3) { - F2FS_OPTION(sbi).compress_level = 0; + F2FS_CTX_INFO(ctx).compress_level = 0; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; } - f2fs_info(sbi, "kernel doesn't support lz4hc compression"); + f2fs_info(NULL, "kernel doesn't support lz4hc compression"); return -EINVAL; #endif } #endif #ifdef CONFIG_F2FS_FS_ZSTD -static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) +static int f2fs_set_zstd_level(struct f2fs_fs_context *ctx, const char *str) { int level; int len = 4; if (strlen(str) == len) { - F2FS_OPTION(sbi).compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; + F2FS_CTX_INFO(ctx).compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; } str += len; if (str[0] != ':') { - f2fs_info(sbi, "wrong format, e.g. :"); + f2fs_info(NULL, "wrong format, e.g. :"); return -EINVAL; } if (kstrtoint(str + 1, 10, &level)) @@ -698,16 +723,17 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) /* f2fs does not support negative compress level now */ if (level < 0) { - f2fs_info(sbi, "do not support negative compress level: %d", level); + f2fs_info(NULL, "do not support negative compress level: %d", level); return -ERANGE; } if (!f2fs_is_compress_level_valid(COMPRESS_ZSTD, level)) { - f2fs_info(sbi, "invalid zstd compress level: %d", level); + f2fs_info(NULL, "invalid zstd compress level: %d", level); return -EINVAL; } - F2FS_OPTION(sbi).compress_level = level; + F2FS_CTX_INFO(ctx).compress_level = level; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; } #endif @@ -715,6 +741,7 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) { + struct f2fs_fs_context *ctx = fc->fs_private; struct f2fs_sb_info *sbi = fc->s_fs_info; #ifdef CONFIG_F2FS_FS_COMPRESSION unsigned char (*ext)[F2FS_EXTENSION_LEN]; @@ -735,14 +762,15 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) switch (token) { case Opt_gc_background: - F2FS_OPTION(sbi).bggc_mode = result.uint_32; + F2FS_CTX_INFO(ctx).bggc_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_background_gc; break; case Opt_disable_roll_forward: - set_opt(sbi, DISABLE_ROLL_FORWARD); + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_ROLL_FORWARD); break; case Opt_norecovery: /* requires ro mount, checked in f2fs_validate_options */ - set_opt(sbi, NORECOVERY); + ctx_set_opt(ctx, F2FS_MOUNT_NORECOVERY); break; case Opt_discard: if (result.negated) { @@ -750,13 +778,13 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) f2fs_warn(NULL, "discard is required for zoned block devices"); return -EINVAL; } - clear_opt(sbi, DISCARD); + ctx_clear_opt(ctx, F2FS_MOUNT_DISCARD); } else { if (!f2fs_hw_support_discard(sbi)) { f2fs_warn(NULL, "device does not support discard"); break; } - set_opt(sbi, DISCARD); + ctx_set_opt(ctx, F2FS_MOUNT_DISCARD); } break; case Opt_noheap: @@ -766,19 +794,20 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) #ifdef CONFIG_F2FS_FS_XATTR case Opt_user_xattr: if (result.negated) - clear_opt(sbi, XATTR_USER); + ctx_clear_opt(ctx, F2FS_MOUNT_XATTR_USER); else - set_opt(sbi, XATTR_USER); + ctx_set_opt(ctx, F2FS_MOUNT_XATTR_USER); break; case Opt_inline_xattr: if (result.negated) - clear_opt(sbi, INLINE_XATTR); + ctx_clear_opt(ctx, F2FS_MOUNT_INLINE_XATTR); else - set_opt(sbi, INLINE_XATTR); + ctx_set_opt(ctx, F2FS_MOUNT_INLINE_XATTR); break; case Opt_inline_xattr_size: - set_opt(sbi, INLINE_XATTR_SIZE); - F2FS_OPTION(sbi).inline_xattr_size = result.int_32; + ctx_set_opt(ctx, F2FS_MOUNT_INLINE_XATTR_SIZE); + F2FS_CTX_INFO(ctx).inline_xattr_size = result.int_32; + ctx->spec_mask |= F2FS_SPEC_inline_xattr_size; break; #else case Opt_user_xattr: @@ -790,9 +819,9 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) #ifdef CONFIG_F2FS_FS_POSIX_ACL case Opt_acl: if (result.negated) - clear_opt(sbi, POSIX_ACL); + ctx_clear_opt(ctx, F2FS_MOUNT_POSIX_ACL); else - set_opt(sbi, POSIX_ACL); + ctx_set_opt(ctx, F2FS_MOUNT_POSIX_ACL); break; #else case Opt_acl: @@ -803,37 +832,38 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) if (result.int_32 != 2 && result.int_32 != 4 && result.int_32 != NR_CURSEG_PERSIST_TYPE) return -EINVAL; - F2FS_OPTION(sbi).active_logs = result.int_32; + ctx->spec_mask |= F2FS_SPEC_active_logs; + F2FS_CTX_INFO(ctx).active_logs = result.int_32; break; case Opt_disable_ext_identify: - set_opt(sbi, DISABLE_EXT_IDENTIFY); + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_EXT_IDENTIFY); break; case Opt_inline_data: if (result.negated) - clear_opt(sbi, INLINE_DATA); + ctx_clear_opt(ctx, F2FS_MOUNT_INLINE_DATA); else - set_opt(sbi, INLINE_DATA); + ctx_set_opt(ctx, F2FS_MOUNT_INLINE_DATA); break; case Opt_inline_dentry: if (result.negated) - clear_opt(sbi, INLINE_DENTRY); + ctx_clear_opt(ctx, F2FS_MOUNT_INLINE_DENTRY); else - set_opt(sbi, INLINE_DENTRY); + ctx_set_opt(ctx, F2FS_MOUNT_INLINE_DENTRY); break; case Opt_flush_merge: if (result.negated) - clear_opt(sbi, FLUSH_MERGE); + ctx_clear_opt(ctx, F2FS_MOUNT_FLUSH_MERGE); else - set_opt(sbi, FLUSH_MERGE); + ctx_set_opt(ctx, F2FS_MOUNT_FLUSH_MERGE); break; case Opt_barrier: if (result.negated) - set_opt(sbi, NOBARRIER); + ctx_set_opt(ctx, F2FS_MOUNT_NOBARRIER); else - clear_opt(sbi, NOBARRIER); + ctx_clear_opt(ctx, F2FS_MOUNT_NOBARRIER); break; case Opt_fastboot: - set_opt(sbi, FASTBOOT); + ctx_set_opt(ctx, F2FS_MOUNT_FASTBOOT); break; case Opt_extent_cache: if (result.negated) { @@ -841,42 +871,50 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) f2fs_err(sbi, "device aliasing requires extent cache"); return -EINVAL; } - clear_opt(sbi, READ_EXTENT_CACHE); + ctx_clear_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE); } else - set_opt(sbi, READ_EXTENT_CACHE); + ctx_set_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE); break; case Opt_data_flush: - set_opt(sbi, DATA_FLUSH); + ctx_set_opt(ctx, F2FS_MOUNT_DATA_FLUSH); break; case Opt_reserve_root: if (test_opt(sbi, RESERVE_ROOT)) { f2fs_info(NULL, "Preserve previous reserve_root=%u", F2FS_OPTION(sbi).root_reserved_blocks); } else { - F2FS_OPTION(sbi).root_reserved_blocks = result.int_32; - set_opt(sbi, RESERVE_ROOT); + ctx_set_opt(ctx, F2FS_MOUNT_RESERVE_ROOT); + F2FS_CTX_INFO(ctx).root_reserved_blocks = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_reserve_root; } break; case Opt_resuid: - F2FS_OPTION(sbi).s_resuid = result.uid; + F2FS_CTX_INFO(ctx).s_resuid = result.uid; + ctx->spec_mask |= F2FS_SPEC_resuid; break; case Opt_resgid: - F2FS_OPTION(sbi).s_resgid = result.gid; + F2FS_CTX_INFO(ctx).s_resgid = result.gid; + ctx->spec_mask |= F2FS_SPEC_resgid; break; case Opt_mode: - F2FS_OPTION(sbi).fs_mode = result.uint_32; + F2FS_CTX_INFO(ctx).fs_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_mode; break; #ifdef CONFIG_F2FS_FAULT_INJECTION case Opt_fault_injection: if (f2fs_build_fault_attr(sbi, result.int_32, 0, FAULT_RATE)) return -EINVAL; - set_opt(sbi, FAULT_INJECTION); + F2FS_CTX_INFO(ctx).fault_info.inject_rate = result.int_32; + ctx->spec_mask |= F2FS_SPEC_fault_injection; + ctx_set_opt(ctx, F2FS_MOUNT_FAULT_INJECTION); break; case Opt_fault_type: if (f2fs_build_fault_attr(sbi, 0, result.int_32, FAULT_TYPE)) return -EINVAL; - set_opt(sbi, FAULT_INJECTION); + F2FS_CTX_INFO(ctx).fault_info.inject_type = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_fault_type; + ctx_set_opt(ctx, F2FS_MOUNT_FAULT_INJECTION); break; #else case Opt_fault_injection: @@ -886,55 +924,56 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) #endif case Opt_lazytime: if (result.negated) - clear_opt(sbi, LAZYTIME); + ctx_clear_opt(ctx, F2FS_MOUNT_LAZYTIME); else - set_opt(sbi, LAZYTIME); + ctx_set_opt(ctx, F2FS_MOUNT_LAZYTIME); break; #ifdef CONFIG_QUOTA case Opt_quota: if (result.negated) { - clear_opt(sbi, QUOTA); - clear_opt(sbi, USRQUOTA); - clear_opt(sbi, GRPQUOTA); - clear_opt(sbi, PRJQUOTA); + ctx_clear_opt(ctx, F2FS_MOUNT_QUOTA); + ctx_clear_opt(ctx, F2FS_MOUNT_USRQUOTA); + ctx_clear_opt(ctx, F2FS_MOUNT_GRPQUOTA); + ctx_clear_opt(ctx, F2FS_MOUNT_PRJQUOTA); } else - set_opt(sbi, USRQUOTA); + ctx_set_opt(ctx, F2FS_MOUNT_USRQUOTA); break; case Opt_usrquota: - set_opt(sbi, USRQUOTA); + ctx_set_opt(ctx, F2FS_MOUNT_USRQUOTA); break; case Opt_grpquota: - set_opt(sbi, GRPQUOTA); + ctx_set_opt(ctx, F2FS_MOUNT_GRPQUOTA); break; case Opt_prjquota: - set_opt(sbi, PRJQUOTA); + ctx_set_opt(ctx, F2FS_MOUNT_PRJQUOTA); break; case Opt_usrjquota: if (!*param->string) - ret = f2fs_clear_qf_name(sbi, USRQUOTA); + ret = f2fs_unnote_qf_name(fc, USRQUOTA); else - ret = f2fs_set_qf_name(sbi, USRQUOTA, param); + ret = f2fs_note_qf_name(fc, USRQUOTA, param); if (ret) return ret; break; case Opt_grpjquota: if (!*param->string) - ret = f2fs_clear_qf_name(sbi, GRPQUOTA); + ret = f2fs_unnote_qf_name(fc, GRPQUOTA); else - ret = f2fs_set_qf_name(sbi, GRPQUOTA, param); + ret = f2fs_note_qf_name(fc, GRPQUOTA, param); if (ret) return ret; break; case Opt_prjjquota: if (!*param->string) - ret = f2fs_clear_qf_name(sbi, PRJQUOTA); + ret = f2fs_unnote_qf_name(fc, PRJQUOTA); else - ret = f2fs_set_qf_name(sbi, PRJQUOTA, param); + ret = f2fs_note_qf_name(fc, PRJQUOTA, param); if (ret) return ret; break; case Opt_jqfmt: - F2FS_OPTION(sbi).s_jquota_fmt = result.uint_32; + F2FS_CTX_INFO(ctx).s_jquota_fmt = result.int_32; + ctx->spec_mask |= F2FS_SPEC_jqfmt; break; #else case Opt_quota: @@ -948,19 +987,21 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; #endif case Opt_alloc: - F2FS_OPTION(sbi).alloc_mode = result.uint_32; + F2FS_CTX_INFO(ctx).alloc_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_alloc_mode; break; case Opt_fsync: - F2FS_OPTION(sbi).fsync_mode = result.uint_32; + F2FS_CTX_INFO(ctx).fsync_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_fsync_mode; break; case Opt_test_dummy_encryption: - ret = f2fs_set_test_dummy_encryption(sbi, param, is_remount); + ret = f2fs_parse_test_dummy_encryption(param, ctx); if (ret) return ret; break; case Opt_inlinecrypt: #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT - set_opt(sbi, INLINECRYPT); + ctx_set_opt(ctx, F2FS_MOUNT_INLINECRYPT); #else f2fs_info(NULL, "inline encryption not supported"); #endif @@ -981,20 +1022,22 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) return -EINVAL; if (arg < 0 || arg > 100) return -EINVAL; - F2FS_OPTION(sbi).unusable_cap_perc = arg; - set_opt(sbi, DISABLE_CHECKPOINT); + F2FS_CTX_INFO(ctx).unusable_cap_perc = arg; + ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap_perc; + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT); break; case Opt_checkpoint_disable_cap: if (args->from && match_int(args, &arg)) return -EINVAL; - F2FS_OPTION(sbi).unusable_cap = arg; - set_opt(sbi, DISABLE_CHECKPOINT); + F2FS_CTX_INFO(ctx).unusable_cap = arg; + ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap; + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT); break; case Opt_checkpoint_disable: - set_opt(sbi, DISABLE_CHECKPOINT); + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT); break; case Opt_checkpoint_enable: - clear_opt(sbi, DISABLE_CHECKPOINT); + ctx_clear_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT); break; default: return -EINVAL; @@ -1002,9 +1045,9 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; case Opt_checkpoint_merge: if (result.negated) - clear_opt(sbi, MERGE_CHECKPOINT); + ctx_clear_opt(ctx, F2FS_MOUNT_MERGE_CHECKPOINT); else - set_opt(sbi, MERGE_CHECKPOINT); + ctx_set_opt(ctx, F2FS_MOUNT_MERGE_CHECKPOINT); break; #ifdef CONFIG_F2FS_FS_COMPRESSION case Opt_compress_algorithm: @@ -1015,33 +1058,39 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) name = param->string; if (!strcmp(name, "lzo")) { #ifdef CONFIG_F2FS_FS_LZO - F2FS_OPTION(sbi).compress_level = 0; - F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZO; + F2FS_CTX_INFO(ctx).compress_level = 0; + F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_LZO; + ctx->spec_mask |= F2FS_SPEC_compress_level; + ctx->spec_mask |= F2FS_SPEC_compress_algorithm; #else f2fs_info(NULL, "kernel doesn't support lzo compression"); #endif } else if (!strncmp(name, "lz4", 3)) { #ifdef CONFIG_F2FS_FS_LZ4 - ret = f2fs_set_lz4hc_level(sbi, name); + ret = f2fs_set_lz4hc_level(ctx, name); if (ret) return -EINVAL; - F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; + F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_LZ4; + ctx->spec_mask |= F2FS_SPEC_compress_algorithm; #else f2fs_info(NULL, "kernel doesn't support lz4 compression"); #endif } else if (!strncmp(name, "zstd", 4)) { #ifdef CONFIG_F2FS_FS_ZSTD - ret = f2fs_set_zstd_level(sbi, name); + ret = f2fs_set_zstd_level(ctx, name); if (ret) return -EINVAL; - F2FS_OPTION(sbi).compress_algorithm = COMPRESS_ZSTD; + F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_ZSTD; + ctx->spec_mask |= F2FS_SPEC_compress_algorithm; #else f2fs_info(NULL, "kernel doesn't support zstd compression"); #endif } else if (!strcmp(name, "lzo-rle")) { #ifdef CONFIG_F2FS_FS_LZORLE - F2FS_OPTION(sbi).compress_level = 0; - F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZORLE; + F2FS_CTX_INFO(ctx).compress_level = 0; + F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_LZORLE; + ctx->spec_mask |= F2FS_SPEC_compress_level; + ctx->spec_mask |= F2FS_SPEC_compress_algorithm; #else f2fs_info(NULL, "kernel doesn't support lzorle compression"); #endif @@ -1059,7 +1108,8 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) "Compress cluster log size is out of range"); return -EINVAL; } - F2FS_OPTION(sbi).compress_log_size = result.uint_32; + F2FS_CTX_INFO(ctx).compress_log_size = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_compress_log_size; break; case Opt_compress_extension: if (!f2fs_sb_has_compression(sbi)) { @@ -1067,8 +1117,8 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; } name = param->string; - ext = F2FS_OPTION(sbi).extensions; - ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + ext = F2FS_CTX_INFO(ctx).extensions; + ext_cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt; if (strlen(name) >= F2FS_EXTENSION_LEN || ext_cnt >= COMPRESS_EXT_NUM) { @@ -1076,13 +1126,14 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) return -EINVAL; } - if (is_compress_extension_exist(sbi, name, true)) + if (is_compress_extension_exist(&ctx->info, name, true)) break; ret = strscpy(ext[ext_cnt], name, F2FS_EXTENSION_LEN); if (ret < 0) return ret; - F2FS_OPTION(sbi).compress_ext_cnt++; + F2FS_CTX_INFO(ctx).compress_ext_cnt++; + ctx->spec_mask |= F2FS_SPEC_compress_extension; break; case Opt_nocompress_extension: if (!f2fs_sb_has_compression(sbi)) { @@ -1090,8 +1141,8 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; } name = param->string; - noext = F2FS_OPTION(sbi).noextensions; - noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + noext = F2FS_CTX_INFO(ctx).noextensions; + noext_cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt; if (strlen(name) >= F2FS_EXTENSION_LEN || noext_cnt >= COMPRESS_EXT_NUM) { @@ -1099,34 +1150,37 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) return -EINVAL; } - if (is_compress_extension_exist(sbi, name, false)) + if (is_compress_extension_exist(&ctx->info, name, false)) break; ret = strscpy(noext[noext_cnt], name, F2FS_EXTENSION_LEN); if (ret < 0) return ret; - F2FS_OPTION(sbi).nocompress_ext_cnt++; + F2FS_CTX_INFO(ctx).nocompress_ext_cnt++; + ctx->spec_mask |= F2FS_SPEC_nocompress_extension; break; case Opt_compress_chksum: if (!f2fs_sb_has_compression(sbi)) { f2fs_info(NULL, "Image doesn't support compression"); break; } - F2FS_OPTION(sbi).compress_chksum = true; + F2FS_CTX_INFO(ctx).compress_chksum = true; + ctx->spec_mask |= F2FS_SPEC_compress_chksum; break; case Opt_compress_mode: if (!f2fs_sb_has_compression(sbi)) { f2fs_info(NULL, "Image doesn't support compression"); break; } - F2FS_OPTION(sbi).compress_mode = result.uint_32; + F2FS_CTX_INFO(ctx).compress_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_compress_mode; break; case Opt_compress_cache: if (!f2fs_sb_has_compression(sbi)) { f2fs_info(NULL, "Image doesn't support compression"); break; } - set_opt(sbi, COMPRESS_CACHE); + ctx_set_opt(ctx, F2FS_MOUNT_COMPRESS_CACHE); break; #else case Opt_compress_algorithm: @@ -1140,28 +1194,31 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; #endif case Opt_atgc: - set_opt(sbi, ATGC); + ctx_set_opt(ctx, F2FS_MOUNT_ATGC); break; case Opt_gc_merge: if (result.negated) - clear_opt(sbi, GC_MERGE); + ctx_clear_opt(ctx, F2FS_MOUNT_GC_MERGE); else - set_opt(sbi, GC_MERGE); + ctx_set_opt(ctx, F2FS_MOUNT_GC_MERGE); break; case Opt_discard_unit: - F2FS_OPTION(sbi).discard_unit = result.uint_32; + F2FS_CTX_INFO(ctx).discard_unit = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_discard_unit; break; case Opt_memory_mode: - F2FS_OPTION(sbi).memory_mode = result.uint_32; + F2FS_CTX_INFO(ctx).memory_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_memory_mode; break; case Opt_age_extent_cache: - set_opt(sbi, AGE_EXTENT_CACHE); + ctx_set_opt(ctx, F2FS_MOUNT_AGE_EXTENT_CACHE); break; case Opt_errors: - F2FS_OPTION(sbi).errors = result.uint_32; + F2FS_CTX_INFO(ctx).errors = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_errors; break; case Opt_nat_bits: - set_opt(sbi, NAT_BITS); + ctx_set_opt(ctx, F2FS_MOUNT_NAT_BITS); break; } return 0; @@ -1171,6 +1228,7 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remoun { struct fs_parameter param; struct fs_context fc; + struct f2fs_fs_context ctx; char *key; int ret; @@ -1179,6 +1237,8 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remoun memset(&fc, 0, sizeof(fc)); fc.s_fs_info = sbi; + fc.fs_private = &ctx; + if (is_remount) fc.purpose = FS_CONTEXT_FOR_RECONFIGURE; From d185351325237da688de006a2c579e82ea97bdfe Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 10 Jul 2025 12:14:13 +0000 Subject: [PATCH 0913/2411] f2fs: separate the options parsing and options checking The new mount api separates option parsing and super block setup into two distinct steps and so we need to separate the options parsing out of the parse_options(). In order to achieve this, here we handle the mount options with three steps: - Firstly, we move sb/sbi out of handle_mount_opt. As the former patch introduced f2fs_fs_context, so we record the changed mount options in this context. In handle_mount_opt, sb/sbi is null, so we should move all relative code out of handle_mount_opt (thus, some check case which use sb/sbi should move out). - Secondly, we introduce the some check helpers to keep the option consistent. During filling superblock period, sb/sbi are ready. So we check the f2fs_fs_context which holds the mount options base on sb/sbi. - Thirdly, we apply the new mount options to sb/sbi. After checking the f2fs_fs_context, all changed on mount options are valid. So we can apply them to sb/sbi directly. After do these, option parsing and super block setting have been decoupled. Also it should have retained the original execution flow. Signed-off-by: Hongbo Li [sandeen: forward port, minor fixes and updates] Signed-off-by: Eric Sandeen [hongbo: minor fixes] Signed-off-by: Hongbo Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 742 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 547 insertions(+), 195 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c84425771f0e..e0c64b33d254 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -360,6 +360,12 @@ static inline void ctx_clear_opt(struct f2fs_fs_context *ctx, ctx->opt_mask |= flag; } +static inline bool ctx_test_opt(struct f2fs_fs_context *ctx, + unsigned int flag) +{ + return ctx->info.opt & flag; +} + void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, const char *fmt, ...) { @@ -526,51 +532,6 @@ static int f2fs_unnote_qf_name(struct fs_context *fc, int qtype) ctx->qname_mask |= 1 << qtype; return 0; } - -static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) -{ - /* - * We do the test below only for project quotas. 'usrquota' and - * 'grpquota' mount options are allowed even without quota feature - * to support legacy quotas in quota files. - */ - if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) { - f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement."); - return -1; - } - if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || - F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || - F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) { - if (test_opt(sbi, USRQUOTA) && - F2FS_OPTION(sbi).s_qf_names[USRQUOTA]) - clear_opt(sbi, USRQUOTA); - - if (test_opt(sbi, GRPQUOTA) && - F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]) - clear_opt(sbi, GRPQUOTA); - - if (test_opt(sbi, PRJQUOTA) && - F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) - clear_opt(sbi, PRJQUOTA); - - if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) || - test_opt(sbi, PRJQUOTA)) { - f2fs_err(sbi, "old and new quota format mixing"); - return -1; - } - - if (!F2FS_OPTION(sbi).s_jquota_fmt) { - f2fs_err(sbi, "journaled quota format not specified"); - return -1; - } - } - - if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) { - f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt"); - F2FS_OPTION(sbi).s_jquota_fmt = 0; - } - return 0; -} #endif static int f2fs_parse_test_dummy_encryption(const struct fs_parameter *param, @@ -629,28 +590,28 @@ static bool is_compress_extension_exist(struct f2fs_mount_info *info, * extension will be treated as special cases and will not be compressed. * 3. Don't allow the non-compress extension specifies all files. */ -static int f2fs_test_compress_extension(struct f2fs_sb_info *sbi) +static int f2fs_test_compress_extension(unsigned char (*noext)[F2FS_EXTENSION_LEN], + int noext_cnt, + unsigned char (*ext)[F2FS_EXTENSION_LEN], + int ext_cnt) { - unsigned char (*ext)[F2FS_EXTENSION_LEN]; - unsigned char (*noext)[F2FS_EXTENSION_LEN]; - int ext_cnt, noext_cnt, index = 0, no_index = 0; - - ext = F2FS_OPTION(sbi).extensions; - ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; - noext = F2FS_OPTION(sbi).noextensions; - noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + int index = 0, no_index = 0; if (!noext_cnt) return 0; for (no_index = 0; no_index < noext_cnt; no_index++) { + if (strlen(noext[no_index]) == 0) + continue; if (!strcasecmp("*", noext[no_index])) { - f2fs_info(sbi, "Don't allow the nocompress extension specifies all files"); + f2fs_info(NULL, "Don't allow the nocompress extension specifies all files"); return -EINVAL; } for (index = 0; index < ext_cnt; index++) { + if (strlen(ext[index]) == 0) + continue; if (!strcasecmp(ext[index], noext[no_index])) { - f2fs_info(sbi, "Don't allow the same extension %s appear in both compress and nocompress extension", + f2fs_info(NULL, "Don't allow the same extension %s appear in both compress and nocompress extension", ext[index]); return -EINVAL; } @@ -742,7 +703,6 @@ static int f2fs_set_zstd_level(struct f2fs_fs_context *ctx, const char *str) static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) { struct f2fs_fs_context *ctx = fc->fs_private; - struct f2fs_sb_info *sbi = fc->s_fs_info; #ifdef CONFIG_F2FS_FS_COMPRESSION unsigned char (*ext)[F2FS_EXTENSION_LEN]; unsigned char (*noext)[F2FS_EXTENSION_LEN]; @@ -751,15 +711,12 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) #endif substring_t args[MAX_OPT_ARGS]; struct fs_parse_result result; - bool is_remount; int token, ret, arg; token = fs_parse(fc, f2fs_param_specs, param, &result); if (token < 0) return token; - is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; - switch (token) { case Opt_gc_background: F2FS_CTX_INFO(ctx).bggc_mode = result.uint_32; @@ -773,19 +730,10 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) ctx_set_opt(ctx, F2FS_MOUNT_NORECOVERY); break; case Opt_discard: - if (result.negated) { - if (f2fs_hw_should_discard(sbi)) { - f2fs_warn(NULL, "discard is required for zoned block devices"); - return -EINVAL; - } + if (result.negated) ctx_clear_opt(ctx, F2FS_MOUNT_DISCARD); - } else { - if (!f2fs_hw_support_discard(sbi)) { - f2fs_warn(NULL, "device does not support discard"); - break; - } + else ctx_set_opt(ctx, F2FS_MOUNT_DISCARD); - } break; case Opt_noheap: case Opt_heap: @@ -805,6 +753,12 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) ctx_set_opt(ctx, F2FS_MOUNT_INLINE_XATTR); break; case Opt_inline_xattr_size: + if (result.int_32 < MIN_INLINE_XATTR_SIZE || + result.int_32 > MAX_INLINE_XATTR_SIZE) { + f2fs_err(NULL, "inline xattr size is out of range: %u ~ %u", + (u32)MIN_INLINE_XATTR_SIZE, (u32)MAX_INLINE_XATTR_SIZE); + return -EINVAL; + } ctx_set_opt(ctx, F2FS_MOUNT_INLINE_XATTR_SIZE); F2FS_CTX_INFO(ctx).inline_xattr_size = result.int_32; ctx->spec_mask |= F2FS_SPEC_inline_xattr_size; @@ -866,27 +820,18 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) ctx_set_opt(ctx, F2FS_MOUNT_FASTBOOT); break; case Opt_extent_cache: - if (result.negated) { - if (f2fs_sb_has_device_alias(sbi)) { - f2fs_err(sbi, "device aliasing requires extent cache"); - return -EINVAL; - } + if (result.negated) ctx_clear_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE); - } else + else ctx_set_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE); break; case Opt_data_flush: ctx_set_opt(ctx, F2FS_MOUNT_DATA_FLUSH); break; case Opt_reserve_root: - if (test_opt(sbi, RESERVE_ROOT)) { - f2fs_info(NULL, "Preserve previous reserve_root=%u", - F2FS_OPTION(sbi).root_reserved_blocks); - } else { - ctx_set_opt(ctx, F2FS_MOUNT_RESERVE_ROOT); - F2FS_CTX_INFO(ctx).root_reserved_blocks = result.uint_32; - ctx->spec_mask |= F2FS_SPEC_reserve_root; - } + ctx_set_opt(ctx, F2FS_MOUNT_RESERVE_ROOT); + F2FS_CTX_INFO(ctx).root_reserved_blocks = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_reserve_root; break; case Opt_resuid: F2FS_CTX_INFO(ctx).s_resuid = result.uid; @@ -902,15 +847,13 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; #ifdef CONFIG_F2FS_FAULT_INJECTION case Opt_fault_injection: - if (f2fs_build_fault_attr(sbi, result.int_32, 0, FAULT_RATE)) - return -EINVAL; F2FS_CTX_INFO(ctx).fault_info.inject_rate = result.int_32; ctx->spec_mask |= F2FS_SPEC_fault_injection; ctx_set_opt(ctx, F2FS_MOUNT_FAULT_INJECTION); break; case Opt_fault_type: - if (f2fs_build_fault_attr(sbi, 0, result.int_32, FAULT_TYPE)) + if (result.uint_32 > BIT(FAULT_MAX)) return -EINVAL; F2FS_CTX_INFO(ctx).fault_info.inject_type = result.uint_32; ctx->spec_mask |= F2FS_SPEC_fault_type; @@ -1051,10 +994,6 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; #ifdef CONFIG_F2FS_FS_COMPRESSION case Opt_compress_algorithm: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(NULL, "Image doesn't support compression"); - break; - } name = param->string; if (!strcmp(name, "lzo")) { #ifdef CONFIG_F2FS_FS_LZO @@ -1098,10 +1037,6 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) return -EINVAL; break; case Opt_compress_log_size: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(NULL, "Image doesn't support compression"); - break; - } if (result.uint_32 < MIN_COMPRESS_LOG_SIZE || result.uint_32 > MAX_COMPRESS_LOG_SIZE) { f2fs_err(NULL, @@ -1112,10 +1047,6 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) ctx->spec_mask |= F2FS_SPEC_compress_log_size; break; case Opt_compress_extension: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(NULL, "Image doesn't support compression"); - break; - } name = param->string; ext = F2FS_CTX_INFO(ctx).extensions; ext_cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt; @@ -1136,10 +1067,6 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) ctx->spec_mask |= F2FS_SPEC_compress_extension; break; case Opt_nocompress_extension: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(NULL, "Image doesn't support compression"); - break; - } name = param->string; noext = F2FS_CTX_INFO(ctx).noextensions; noext_cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt; @@ -1160,26 +1087,14 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) ctx->spec_mask |= F2FS_SPEC_nocompress_extension; break; case Opt_compress_chksum: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(NULL, "Image doesn't support compression"); - break; - } F2FS_CTX_INFO(ctx).compress_chksum = true; ctx->spec_mask |= F2FS_SPEC_compress_chksum; break; case Opt_compress_mode: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(NULL, "Image doesn't support compression"); - break; - } F2FS_CTX_INFO(ctx).compress_mode = result.uint_32; ctx->spec_mask |= F2FS_SPEC_compress_mode; break; case Opt_compress_cache: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(NULL, "Image doesn't support compression"); - break; - } ctx_set_opt(ctx, F2FS_MOUNT_COMPRESS_CACHE); break; #else @@ -1224,24 +1139,15 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) return 0; } -static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remount) +static int parse_options(struct fs_context *fc, char *options) { struct fs_parameter param; - struct fs_context fc; - struct f2fs_fs_context ctx; char *key; int ret; if (!options) return 0; - memset(&fc, 0, sizeof(fc)); - fc.s_fs_info = sbi; - fc.fs_private = &ctx; - - if (is_remount) - fc.purpose = FS_CONTEXT_FOR_RECONFIGURE; - while ((key = strsep(&options, ",")) != NULL) { if (*key) { size_t v_len = 0; @@ -1265,7 +1171,7 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remoun param.key = key; param.size = v_len; - ret = handle_mount_opt(&fc, ¶m); + ret = handle_mount_opt(fc, ¶m); kfree(param.string); if (ret < 0) return ret; @@ -1274,24 +1180,298 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remoun return 0; } -static int f2fs_validate_options(struct f2fs_sb_info *sbi) +/* + * Check quota settings consistency. + */ +static int f2fs_check_quota_consistency(struct fs_context *fc, + struct super_block *sb) { -#ifdef CONFIG_QUOTA - if (f2fs_check_quota_options(sbi)) - return -EINVAL; -#else - if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_info(NULL, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + struct f2fs_sb_info *sbi = F2FS_SB(sb); + #ifdef CONFIG_QUOTA + struct f2fs_fs_context *ctx = fc->fs_private; + bool quota_feature = f2fs_sb_has_quota_ino(sbi); + bool quota_turnon = sb_any_quota_loaded(sb); + char *old_qname, *new_qname; + bool usr_qf_name, grp_qf_name, prj_qf_name, usrquota, grpquota, prjquota; + int i; + + /* + * We do the test below only for project quotas. 'usrquota' and + * 'grpquota' mount options are allowed even without quota feature + * to support legacy quotas in quota files. + */ + if (ctx_test_opt(ctx, F2FS_MOUNT_PRJQUOTA) && + !f2fs_sb_has_project_quota(sbi)) { + f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement."); return -EINVAL; } - if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_err(NULL, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + + if (ctx->qname_mask) { + for (i = 0; i < MAXQUOTAS; i++) { + if (!(ctx->qname_mask & (1 << i))) + continue; + + old_qname = F2FS_OPTION(sbi).s_qf_names[i]; + new_qname = F2FS_CTX_INFO(ctx).s_qf_names[i]; + if (quota_turnon && + !!old_qname != !!new_qname) + goto err_jquota_change; + + if (old_qname) { + if (strcmp(old_qname, new_qname) == 0) { + ctx->qname_mask &= ~(1 << i); + continue; + } + goto err_jquota_specified; + } + + if (quota_feature) { + f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name"); + ctx->qname_mask &= ~(1 << i); + kfree(F2FS_CTX_INFO(ctx).s_qf_names[i]); + F2FS_CTX_INFO(ctx).s_qf_names[i] = NULL; + } + } + } + + /* Make sure we don't mix old and new quota format */ + usr_qf_name = F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || + F2FS_CTX_INFO(ctx).s_qf_names[USRQUOTA]; + grp_qf_name = F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || + F2FS_CTX_INFO(ctx).s_qf_names[GRPQUOTA]; + prj_qf_name = F2FS_OPTION(sbi).s_qf_names[PRJQUOTA] || + F2FS_CTX_INFO(ctx).s_qf_names[PRJQUOTA]; + usrquota = test_opt(sbi, USRQUOTA) || + ctx_test_opt(ctx, F2FS_MOUNT_USRQUOTA); + grpquota = test_opt(sbi, GRPQUOTA) || + ctx_test_opt(ctx, F2FS_MOUNT_GRPQUOTA); + prjquota = test_opt(sbi, PRJQUOTA) || + ctx_test_opt(ctx, F2FS_MOUNT_PRJQUOTA); + + if (usr_qf_name) { + ctx_clear_opt(ctx, F2FS_MOUNT_USRQUOTA); + usrquota = false; + } + if (grp_qf_name) { + ctx_clear_opt(ctx, F2FS_MOUNT_GRPQUOTA); + grpquota = false; + } + if (prj_qf_name) { + ctx_clear_opt(ctx, F2FS_MOUNT_PRJQUOTA); + prjquota = false; + } + if (usr_qf_name || grp_qf_name || prj_qf_name) { + if (grpquota || usrquota || prjquota) { + f2fs_err(sbi, "old and new quota format mixing"); + return -EINVAL; + } + if (!(ctx->spec_mask & F2FS_SPEC_jqfmt || + F2FS_OPTION(sbi).s_jquota_fmt)) { + f2fs_err(sbi, "journaled quota format not specified"); + return -EINVAL; + } + } + return 0; + +err_jquota_change: + f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); + return -EINVAL; +err_jquota_specified: + f2fs_err(sbi, "%s quota file already specified", + QTYPE2NAME(i)); + return -EINVAL; + +#else + if (f2fs_readonly(sbi->sb)) + return 0; + if (f2fs_sb_has_quota_ino(sbi)) { + f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + return -EINVAL; + } + if (f2fs_sb_has_project_quota(sbi)) { + f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + return -EINVAL; + } + + return 0; +#endif +} + +static int f2fs_check_test_dummy_encryption(struct fs_context *fc, + struct super_block *sb) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + if (!fscrypt_is_dummy_policy_set(&F2FS_CTX_INFO(ctx).dummy_enc_policy)) + return 0; + + if (!f2fs_sb_has_encrypt(sbi)) { + f2fs_err(sbi, "Encrypt feature is off"); + return -EINVAL; + } + + /* + * This mount option is just for testing, and it's not worthwhile to + * implement the extra complexity (e.g. RCU protection) that would be + * needed to allow it to be set or changed during remount. We do allow + * it to be specified during remount, but only if there is no change. + */ + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { + if (fscrypt_dummy_policies_equal(&F2FS_OPTION(sbi).dummy_enc_policy, + &F2FS_CTX_INFO(ctx).dummy_enc_policy)) + return 0; + f2fs_warn(sbi, "Can't set or change test_dummy_encryption on remount"); + return -EINVAL; + } + return 0; +} + +static inline bool test_compression_spec(unsigned int mask) +{ + return mask & (F2FS_SPEC_compress_algorithm + | F2FS_SPEC_compress_log_size + | F2FS_SPEC_compress_extension + | F2FS_SPEC_nocompress_extension + | F2FS_SPEC_compress_chksum + | F2FS_SPEC_compress_mode); +} + +static inline void clear_compression_spec(struct f2fs_fs_context *ctx) +{ + ctx->spec_mask &= ~(F2FS_SPEC_compress_algorithm + | F2FS_SPEC_compress_log_size + | F2FS_SPEC_compress_extension + | F2FS_SPEC_nocompress_extension + | F2FS_SPEC_compress_chksum + | F2FS_SPEC_compress_mode); +} + +static int f2fs_check_compression(struct fs_context *fc, + struct super_block *sb) +{ +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int i, cnt; + + if (!f2fs_sb_has_compression(sbi)) { + if (test_compression_spec(ctx->spec_mask) || + ctx_test_opt(ctx, F2FS_MOUNT_COMPRESS_CACHE)) + f2fs_info(sbi, "Image doesn't support compression"); + clear_compression_spec(ctx); + ctx->opt_mask &= ~F2FS_MOUNT_COMPRESS_CACHE; + return 0; + } + if (ctx->spec_mask & F2FS_SPEC_compress_extension) { + cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt; + for (i = 0; i < F2FS_CTX_INFO(ctx).compress_ext_cnt; i++) { + if (is_compress_extension_exist(&F2FS_OPTION(sbi), + F2FS_CTX_INFO(ctx).extensions[i], true)) { + F2FS_CTX_INFO(ctx).extensions[i][0] = '\0'; + cnt--; + } + } + if (F2FS_OPTION(sbi).compress_ext_cnt + cnt > COMPRESS_EXT_NUM) { + f2fs_err(sbi, "invalid extension length/number"); + return -EINVAL; + } + } + if (ctx->spec_mask & F2FS_SPEC_nocompress_extension) { + cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt; + for (i = 0; i < F2FS_CTX_INFO(ctx).nocompress_ext_cnt; i++) { + if (is_compress_extension_exist(&F2FS_OPTION(sbi), + F2FS_CTX_INFO(ctx).noextensions[i], false)) { + F2FS_CTX_INFO(ctx).noextensions[i][0] = '\0'; + cnt--; + } + } + if (F2FS_OPTION(sbi).nocompress_ext_cnt + cnt > COMPRESS_EXT_NUM) { + f2fs_err(sbi, "invalid noextension length/number"); + return -EINVAL; + } + } + + if (f2fs_test_compress_extension(F2FS_CTX_INFO(ctx).noextensions, + F2FS_CTX_INFO(ctx).nocompress_ext_cnt, + F2FS_CTX_INFO(ctx).extensions, + F2FS_CTX_INFO(ctx).compress_ext_cnt)) { + f2fs_err(sbi, "new noextensions conflicts with new extensions"); + return -EINVAL; + } + if (f2fs_test_compress_extension(F2FS_CTX_INFO(ctx).noextensions, + F2FS_CTX_INFO(ctx).nocompress_ext_cnt, + F2FS_OPTION(sbi).extensions, + F2FS_OPTION(sbi).compress_ext_cnt)) { + f2fs_err(sbi, "new noextensions conflicts with old extensions"); + return -EINVAL; + } + if (f2fs_test_compress_extension(F2FS_OPTION(sbi).noextensions, + F2FS_OPTION(sbi).nocompress_ext_cnt, + F2FS_CTX_INFO(ctx).extensions, + F2FS_CTX_INFO(ctx).compress_ext_cnt)) { + f2fs_err(sbi, "new extensions conflicts with old noextensions"); return -EINVAL; } #endif + return 0; +} + +static int f2fs_check_opt_consistency(struct fs_context *fc, + struct super_block *sb) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int err; + + if (ctx_test_opt(ctx, F2FS_MOUNT_NORECOVERY) && !f2fs_readonly(sb)) + return -EINVAL; + + if (f2fs_hw_should_discard(sbi) && + (ctx->opt_mask & F2FS_MOUNT_DISCARD) && + !ctx_test_opt(ctx, F2FS_MOUNT_DISCARD)) { + f2fs_warn(sbi, "discard is required for zoned block devices"); + return -EINVAL; + } + + if (!f2fs_hw_support_discard(sbi) && + (ctx->opt_mask & F2FS_MOUNT_DISCARD) && + ctx_test_opt(ctx, F2FS_MOUNT_DISCARD)) { + f2fs_warn(sbi, "device does not support discard"); + ctx_clear_opt(ctx, F2FS_MOUNT_DISCARD); + ctx->opt_mask &= ~F2FS_MOUNT_DISCARD; + } + + if (f2fs_sb_has_device_alias(sbi) && + (ctx->opt_mask & F2FS_MOUNT_READ_EXTENT_CACHE) && + !ctx_test_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE)) { + f2fs_err(sbi, "device aliasing requires extent cache"); + return -EINVAL; + } + + if (test_opt(sbi, RESERVE_ROOT) && + (ctx->opt_mask & F2FS_MOUNT_RESERVE_ROOT) && + ctx_test_opt(ctx, F2FS_MOUNT_RESERVE_ROOT)) { + f2fs_info(sbi, "Preserve previous reserve_root=%u", + F2FS_OPTION(sbi).root_reserved_blocks); + ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_ROOT); + ctx->opt_mask &= ~F2FS_MOUNT_RESERVE_ROOT; + } + + err = f2fs_check_test_dummy_encryption(fc, sb); + if (err) + return err; + + err = f2fs_check_compression(fc, sb); + if (err) + return err; + + err = f2fs_check_quota_consistency(fc, sb); + if (err) + return err; if (!IS_ENABLED(CONFIG_UNICODE) && f2fs_sb_has_casefold(sbi)) { - f2fs_err(NULL, + f2fs_err(sbi, "Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE"); return -EINVAL; } @@ -1302,75 +1482,239 @@ static int f2fs_validate_options(struct f2fs_sb_info *sbi) * devices, but mandatory for host-managed zoned block devices. */ if (f2fs_sb_has_blkzoned(sbi)) { + if (F2FS_CTX_INFO(ctx).bggc_mode == BGGC_MODE_OFF) { + f2fs_warn(sbi, "zoned devices need bggc"); + return -EINVAL; + } #ifdef CONFIG_BLK_DEV_ZONED - if (F2FS_OPTION(sbi).discard_unit != - DISCARD_UNIT_SECTION) { - f2fs_info(NULL, "Zoned block device doesn't need small discard, set discard_unit=section by default"); - F2FS_OPTION(sbi).discard_unit = - DISCARD_UNIT_SECTION; + if ((ctx->spec_mask & F2FS_SPEC_discard_unit) && + F2FS_CTX_INFO(ctx).discard_unit != DISCARD_UNIT_SECTION) { + f2fs_info(sbi, "Zoned block device doesn't need small discard, set discard_unit=section by default"); + F2FS_CTX_INFO(ctx).discard_unit = DISCARD_UNIT_SECTION; } - if (F2FS_OPTION(sbi).fs_mode != FS_MODE_LFS) { - f2fs_info(NULL, "Only lfs mode is allowed with zoned block device feature"); + if ((ctx->spec_mask & F2FS_SPEC_mode) && + F2FS_CTX_INFO(ctx).fs_mode != FS_MODE_LFS) { + f2fs_info(sbi, "Only lfs mode is allowed with zoned block device feature"); return -EINVAL; } #else - f2fs_err(NULL, "Zoned block device support is not enabled"); + f2fs_err(sbi, "Zoned block device support is not enabled"); return -EINVAL; #endif } -#ifdef CONFIG_F2FS_FS_COMPRESSION - if (f2fs_test_compress_extension(sbi)) { - f2fs_err(NULL, "invalid compress or nocompress extension"); - return -EINVAL; - } -#endif - - if (test_opt(sbi, INLINE_XATTR_SIZE)) { - int min_size, max_size; - + if (ctx_test_opt(ctx, F2FS_MOUNT_INLINE_XATTR_SIZE)) { if (!f2fs_sb_has_extra_attr(sbi) || !f2fs_sb_has_flexible_inline_xattr(sbi)) { - f2fs_err(NULL, "extra_attr or flexible_inline_xattr feature is off"); + f2fs_err(sbi, "extra_attr or flexible_inline_xattr feature is off"); return -EINVAL; } - if (!test_opt(sbi, INLINE_XATTR)) { - f2fs_err(NULL, "inline_xattr_size option should be set with inline_xattr option"); - return -EINVAL; - } - - min_size = MIN_INLINE_XATTR_SIZE; - max_size = MAX_INLINE_XATTR_SIZE; - - if (F2FS_OPTION(sbi).inline_xattr_size < min_size || - F2FS_OPTION(sbi).inline_xattr_size > max_size) { - f2fs_err(NULL, "inline xattr size is out of range: %d ~ %d", - min_size, max_size); + if (!ctx_test_opt(ctx, F2FS_MOUNT_INLINE_XATTR) && !test_opt(sbi, INLINE_XATTR)) { + f2fs_err(sbi, "inline_xattr_size option should be set with inline_xattr option"); return -EINVAL; } } - if (test_opt(sbi, ATGC) && f2fs_lfs_mode(sbi)) { - f2fs_err(NULL, "LFS is not compatible with ATGC"); + if (ctx_test_opt(ctx, F2FS_MOUNT_ATGC) && + F2FS_CTX_INFO(ctx).fs_mode == FS_MODE_LFS) { + f2fs_err(sbi, "LFS is not compatible with ATGC"); return -EINVAL; } - if (f2fs_is_readonly(sbi) && test_opt(sbi, FLUSH_MERGE)) { - f2fs_err(NULL, "FLUSH_MERGE not compatible with readonly mode"); + if (f2fs_is_readonly(sbi) && ctx_test_opt(ctx, F2FS_MOUNT_FLUSH_MERGE)) { + f2fs_err(sbi, "FLUSH_MERGE not compatible with readonly mode"); return -EINVAL; } if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_err(NULL, "Allow to mount readonly mode only"); + f2fs_err(sbi, "Allow to mount readonly mode only"); return -EROFS; } + return 0; +} - if (test_opt(sbi, NORECOVERY) && !f2fs_readonly(sbi->sb)) { - f2fs_err(sbi, "norecovery requires readonly mount"); +static void f2fs_apply_quota_options(struct fs_context *fc, + struct super_block *sb) +{ +#ifdef CONFIG_QUOTA + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + bool quota_feature = f2fs_sb_has_quota_ino(sbi); + char *qname; + int i; + + if (quota_feature) + return; + + for (i = 0; i < MAXQUOTAS; i++) { + if (!(ctx->qname_mask & (1 << i))) + continue; + + qname = F2FS_CTX_INFO(ctx).s_qf_names[i]; + if (qname) { + qname = kstrdup(F2FS_CTX_INFO(ctx).s_qf_names[i], + GFP_KERNEL | __GFP_NOFAIL); + set_opt(sbi, QUOTA); + } + F2FS_OPTION(sbi).s_qf_names[i] = qname; + } + + if (ctx->spec_mask & F2FS_SPEC_jqfmt) + F2FS_OPTION(sbi).s_jquota_fmt = F2FS_CTX_INFO(ctx).s_jquota_fmt; + + if (quota_feature && F2FS_OPTION(sbi).s_jquota_fmt) { + f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt"); + F2FS_OPTION(sbi).s_jquota_fmt = 0; + } +#endif +} + +static void f2fs_apply_test_dummy_encryption(struct fs_context *fc, + struct super_block *sb) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + if (!fscrypt_is_dummy_policy_set(&F2FS_CTX_INFO(ctx).dummy_enc_policy) || + /* if already set, it was already verified to be the same */ + fscrypt_is_dummy_policy_set(&F2FS_OPTION(sbi).dummy_enc_policy)) + return; + swap(F2FS_OPTION(sbi).dummy_enc_policy, F2FS_CTX_INFO(ctx).dummy_enc_policy); + f2fs_warn(sbi, "Test dummy encryption mode enabled"); +} + +static void f2fs_apply_compression(struct fs_context *fc, + struct super_block *sb) +{ +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + unsigned char (*ctx_ext)[F2FS_EXTENSION_LEN]; + unsigned char (*sbi_ext)[F2FS_EXTENSION_LEN]; + int ctx_cnt, sbi_cnt, i; + + if (ctx->spec_mask & F2FS_SPEC_compress_level) + F2FS_OPTION(sbi).compress_level = + F2FS_CTX_INFO(ctx).compress_level; + if (ctx->spec_mask & F2FS_SPEC_compress_algorithm) + F2FS_OPTION(sbi).compress_algorithm = + F2FS_CTX_INFO(ctx).compress_algorithm; + if (ctx->spec_mask & F2FS_SPEC_compress_log_size) + F2FS_OPTION(sbi).compress_log_size = + F2FS_CTX_INFO(ctx).compress_log_size; + if (ctx->spec_mask & F2FS_SPEC_compress_chksum) + F2FS_OPTION(sbi).compress_chksum = + F2FS_CTX_INFO(ctx).compress_chksum; + if (ctx->spec_mask & F2FS_SPEC_compress_mode) + F2FS_OPTION(sbi).compress_mode = + F2FS_CTX_INFO(ctx).compress_mode; + if (ctx->spec_mask & F2FS_SPEC_compress_extension) { + ctx_ext = F2FS_CTX_INFO(ctx).extensions; + ctx_cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt; + sbi_ext = F2FS_OPTION(sbi).extensions; + sbi_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + for (i = 0; i < ctx_cnt; i++) { + if (strlen(ctx_ext[i]) == 0) + continue; + strscpy(sbi_ext[sbi_cnt], ctx_ext[i]); + sbi_cnt++; + } + F2FS_OPTION(sbi).compress_ext_cnt = sbi_cnt; + } + if (ctx->spec_mask & F2FS_SPEC_nocompress_extension) { + ctx_ext = F2FS_CTX_INFO(ctx).noextensions; + ctx_cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt; + sbi_ext = F2FS_OPTION(sbi).noextensions; + sbi_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + for (i = 0; i < ctx_cnt; i++) { + if (strlen(ctx_ext[i]) == 0) + continue; + strscpy(sbi_ext[sbi_cnt], ctx_ext[i]); + sbi_cnt++; + } + F2FS_OPTION(sbi).nocompress_ext_cnt = sbi_cnt; + } +#endif +} + +static void f2fs_apply_options(struct fs_context *fc, struct super_block *sb) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + F2FS_OPTION(sbi).opt &= ~ctx->opt_mask; + F2FS_OPTION(sbi).opt |= F2FS_CTX_INFO(ctx).opt; + + if (ctx->spec_mask & F2FS_SPEC_background_gc) + F2FS_OPTION(sbi).bggc_mode = F2FS_CTX_INFO(ctx).bggc_mode; + if (ctx->spec_mask & F2FS_SPEC_inline_xattr_size) + F2FS_OPTION(sbi).inline_xattr_size = + F2FS_CTX_INFO(ctx).inline_xattr_size; + if (ctx->spec_mask & F2FS_SPEC_active_logs) + F2FS_OPTION(sbi).active_logs = F2FS_CTX_INFO(ctx).active_logs; + if (ctx->spec_mask & F2FS_SPEC_reserve_root) + F2FS_OPTION(sbi).root_reserved_blocks = + F2FS_CTX_INFO(ctx).root_reserved_blocks; + if (ctx->spec_mask & F2FS_SPEC_resgid) + F2FS_OPTION(sbi).s_resgid = F2FS_CTX_INFO(ctx).s_resgid; + if (ctx->spec_mask & F2FS_SPEC_resuid) + F2FS_OPTION(sbi).s_resuid = F2FS_CTX_INFO(ctx).s_resuid; + if (ctx->spec_mask & F2FS_SPEC_mode) + F2FS_OPTION(sbi).fs_mode = F2FS_CTX_INFO(ctx).fs_mode; +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (ctx->spec_mask & F2FS_SPEC_fault_injection) + (void)f2fs_build_fault_attr(sbi, + F2FS_CTX_INFO(ctx).fault_info.inject_rate, 0, FAULT_RATE); + if (ctx->spec_mask & F2FS_SPEC_fault_type) + (void)f2fs_build_fault_attr(sbi, 0, + F2FS_CTX_INFO(ctx).fault_info.inject_type, FAULT_TYPE); +#endif + if (ctx->spec_mask & F2FS_SPEC_alloc_mode) + F2FS_OPTION(sbi).alloc_mode = F2FS_CTX_INFO(ctx).alloc_mode; + if (ctx->spec_mask & F2FS_SPEC_fsync_mode) + F2FS_OPTION(sbi).fsync_mode = F2FS_CTX_INFO(ctx).fsync_mode; + if (ctx->spec_mask & F2FS_SPEC_checkpoint_disable_cap) + F2FS_OPTION(sbi).unusable_cap = F2FS_CTX_INFO(ctx).unusable_cap; + if (ctx->spec_mask & F2FS_SPEC_checkpoint_disable_cap_perc) + F2FS_OPTION(sbi).unusable_cap_perc = + F2FS_CTX_INFO(ctx).unusable_cap_perc; + if (ctx->spec_mask & F2FS_SPEC_discard_unit) + F2FS_OPTION(sbi).discard_unit = F2FS_CTX_INFO(ctx).discard_unit; + if (ctx->spec_mask & F2FS_SPEC_memory_mode) + F2FS_OPTION(sbi).memory_mode = F2FS_CTX_INFO(ctx).memory_mode; + if (ctx->spec_mask & F2FS_SPEC_errors) + F2FS_OPTION(sbi).errors = F2FS_CTX_INFO(ctx).errors; + + f2fs_apply_compression(fc, sb); + f2fs_apply_test_dummy_encryption(fc, sb); + f2fs_apply_quota_options(fc, sb); +} + +static int f2fs_sanity_check_options(struct f2fs_sb_info *sbi, bool remount) +{ + if (f2fs_sb_has_device_alias(sbi) && + !test_opt(sbi, READ_EXTENT_CACHE)) { + f2fs_err(sbi, "device aliasing requires extent cache"); return -EINVAL; } + if (!remount) + return 0; + +#ifdef CONFIG_BLK_DEV_ZONED + if (f2fs_sb_has_blkzoned(sbi) && + sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) { + f2fs_err(sbi, + "zoned: max open zones %u is too small, need at least %u open zones", + sbi->max_open_zones, F2FS_OPTION(sbi).active_logs); + return -EINVAL; + } +#endif + if (f2fs_lfs_mode(sbi) && !IS_F2FS_IPU_DISABLE(sbi)) { + f2fs_warn(sbi, "LFS is not compatible with IPU"); + return -EINVAL; + } return 0; } @@ -2281,6 +2625,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) { struct f2fs_sb_info *sbi = F2FS_SB(sb); struct f2fs_mount_info org_mount_opt; + struct f2fs_fs_context ctx; + struct fs_context fc; unsigned long old_sb_flags; int err; bool need_restart_gc = false, need_stop_gc = false; @@ -2337,23 +2683,23 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) default_options(sbi, true); + memset(&fc, 0, sizeof(fc)); + memset(&ctx, 0, sizeof(ctx)); + fc.fs_private = &ctx; + fc.purpose = FS_CONTEXT_FOR_RECONFIGURE; + /* parse mount options */ - err = parse_options(sbi, data, true); + err = parse_options(&fc, data); if (err) goto restore_opts; -#ifdef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_has_blkzoned(sbi) && - sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) { - f2fs_err(sbi, - "zoned: max open zones %u is too small, need at least %u open zones", - sbi->max_open_zones, F2FS_OPTION(sbi).active_logs); - err = -EINVAL; + err = f2fs_check_opt_consistency(&fc, sb); + if (err) goto restore_opts; - } -#endif - err = f2fs_validate_options(sbi); + f2fs_apply_options(&fc, sb); + + err = f2fs_sanity_check_options(sbi, true); if (err) goto restore_opts; @@ -2389,12 +2735,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) } } #endif - if (f2fs_lfs_mode(sbi) && !IS_F2FS_IPU_DISABLE(sbi)) { - err = -EINVAL; - f2fs_warn(sbi, "LFS is not compatible with IPU"); - goto restore_opts; - } - /* disallow enable atgc dynamically */ if (no_atgc == !!test_opt(sbi, ATGC)) { err = -EINVAL; @@ -4475,6 +4815,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; struct f2fs_super_block *raw_super; + struct f2fs_fs_context ctx; + struct fs_context fc; struct inode *root; int err; bool skip_recovery = false, need_fsck = false; @@ -4491,6 +4833,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) raw_super = NULL; valid_super_block = -1; recovery = 0; + memset(&fc, 0, sizeof(fc)); + memset(&ctx, 0, sizeof(ctx)); + fc.fs_private = &ctx; /* allocate memory for f2fs-specific super block info */ sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); @@ -4548,11 +4893,17 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_sb_buf; } - err = parse_options(sbi, options, false); + err = parse_options(&fc, options); if (err) goto free_options; - err = f2fs_validate_options(sbi); + err = f2fs_check_opt_consistency(&fc, sb); + if (err) + goto free_options; + + f2fs_apply_options(&fc, sb); + + err = f2fs_sanity_check_options(sbi, false); if (err) goto free_options; @@ -4977,7 +5328,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) for (i = 0; i < MAXQUOTAS; i++) kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif - fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy); + /* no need to free dummy_enc_policy, we just keep it in ctx when failed */ + swap(F2FS_CTX_INFO(&ctx).dummy_enc_policy, F2FS_OPTION(sbi).dummy_enc_policy); kfree(options); free_sb_buf: kfree(raw_super); From bb463a75ab2fc5b7322d342808d1dacf34abe79e Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 10 Jul 2025 12:14:14 +0000 Subject: [PATCH 0914/2411] f2fs: introduce fs_context_operation structure The handle_mount_opt() helper is used to parse mount parameters, and so we can rename this function to f2fs_parse_param() and set it as .param_param in fs_context_operations. Signed-off-by: Hongbo Li [sandeen: forward port] Signed-off-by: Eric Sandeen Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e0c64b33d254..17786d79cedd 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -700,7 +700,7 @@ static int f2fs_set_zstd_level(struct f2fs_fs_context *ctx, const char *str) #endif #endif -static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) +static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct f2fs_fs_context *ctx = fc->fs_private; #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -1171,7 +1171,7 @@ static int parse_options(struct fs_context *fc, char *options) param.key = key; param.size = v_len; - ret = handle_mount_opt(fc, ¶m); + ret = f2fs_parse_param(fc, ¶m); kfree(param.string); if (ret < 0) return ret; @@ -5352,6 +5352,10 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); } +static const struct fs_context_operations f2fs_context_ops = { + .parse_param = f2fs_parse_param, +}; + static void kill_f2fs_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); From 94b3ce7f1509d91fbe3f84f367b622cbb2c1af7e Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 10 Jul 2025 12:14:15 +0000 Subject: [PATCH 0915/2411] f2fs: switch to the new mount api The new mount api will execute .parse_param, .init_fs_context, .get_tree and will call .remount if remount happened. So we add the necessary functions for the fs_context_operations. If .init_fs_context is added, the old .mount should remove. See Documentation/filesystems/mount_api.rst for more information. Signed-off-by: Hongbo Li [sandeen: forward port] Signed-off-by: Eric Sandeen [hongbo: context modified] Signed-off-by: Hongbo Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 167 ++++++++++++++++++++---------------------------- 1 file changed, 71 insertions(+), 96 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 17786d79cedd..30c038413040 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -532,6 +532,14 @@ static int f2fs_unnote_qf_name(struct fs_context *fc, int qtype) ctx->qname_mask |= 1 << qtype; return 0; } + +static void f2fs_unnote_qf_name_all(struct fs_context *fc) +{ + int i; + + for (i = 0; i < MAXQUOTAS; i++) + f2fs_unnote_qf_name(fc, i); +} #endif static int f2fs_parse_test_dummy_encryption(const struct fs_parameter *param, @@ -1139,47 +1147,6 @@ static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param) return 0; } -static int parse_options(struct fs_context *fc, char *options) -{ - struct fs_parameter param; - char *key; - int ret; - - if (!options) - return 0; - - while ((key = strsep(&options, ",")) != NULL) { - if (*key) { - size_t v_len = 0; - char *value = strchr(key, '='); - - param.type = fs_value_is_flag; - param.string = NULL; - - if (value) { - if (value == key) - continue; - - *value++ = 0; - v_len = strlen(value); - param.string = kmemdup_nul(value, v_len, GFP_KERNEL); - if (!param.string) - return -ENOMEM; - param.type = fs_value_is_string; - } - - param.key = key; - param.size = v_len; - - ret = f2fs_parse_param(fc, ¶m); - kfree(param.string); - if (ret < 0) - return ret; - } - } - return 0; -} - /* * Check quota settings consistency. */ @@ -2621,13 +2588,12 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) f2fs_flush_ckpt_thread(sbi); } -static int f2fs_remount(struct super_block *sb, int *flags, char *data) +static int __f2fs_remount(struct fs_context *fc, struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); struct f2fs_mount_info org_mount_opt; - struct f2fs_fs_context ctx; - struct fs_context fc; unsigned long old_sb_flags; + unsigned int flags = fc->sb_flags; int err; bool need_restart_gc = false, need_stop_gc = false; bool need_restart_flush = false, need_stop_flush = false; @@ -2673,7 +2639,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) #endif /* recover superblocks we couldn't write due to previous RO mount */ - if (!(*flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { + if (!(flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { err = f2fs_commit_super(sbi, false); f2fs_info(sbi, "Try to recover all the superblocks, ret: %d", err); @@ -2683,21 +2649,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) default_options(sbi, true); - memset(&fc, 0, sizeof(fc)); - memset(&ctx, 0, sizeof(ctx)); - fc.fs_private = &ctx; - fc.purpose = FS_CONTEXT_FOR_RECONFIGURE; - - /* parse mount options */ - err = parse_options(&fc, data); + err = f2fs_check_opt_consistency(fc, sb); if (err) goto restore_opts; - err = f2fs_check_opt_consistency(&fc, sb); - if (err) - goto restore_opts; - - f2fs_apply_options(&fc, sb); + f2fs_apply_options(fc, sb); err = f2fs_sanity_check_options(sbi, true); if (err) @@ -2710,20 +2666,20 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * Previous and new state of filesystem is RO, * so skip checking GC and FLUSH_MERGE conditions. */ - if (f2fs_readonly(sb) && (*flags & SB_RDONLY)) + if (f2fs_readonly(sb) && (flags & SB_RDONLY)) goto skip; - if (f2fs_dev_is_readonly(sbi) && !(*flags & SB_RDONLY)) { + if (f2fs_dev_is_readonly(sbi) && !(flags & SB_RDONLY)) { err = -EROFS; goto restore_opts; } #ifdef CONFIG_QUOTA - if (!f2fs_readonly(sb) && (*flags & SB_RDONLY)) { + if (!f2fs_readonly(sb) && (flags & SB_RDONLY)) { err = dquot_suspend(sb, -1); if (err < 0) goto restore_opts; - } else if (f2fs_readonly(sb) && !(*flags & SB_RDONLY)) { + } else if (f2fs_readonly(sb) && !(flags & SB_RDONLY)) { /* dquot_resume needs RW */ sb->s_flags &= ~SB_RDONLY; if (sb_any_quota_suspended(sb)) { @@ -2773,7 +2729,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { + if ((flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { err = -EINVAL; f2fs_warn(sbi, "disabling checkpoint not compatible with read-only"); goto restore_opts; @@ -2784,7 +2740,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * or if background_gc = off is passed in mount * option. Also sync the filesystem. */ - if ((*flags & SB_RDONLY) || + if ((flags & SB_RDONLY) || (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF && !test_opt(sbi, GC_MERGE))) { if (sbi->gc_thread) { @@ -2798,7 +2754,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) need_stop_gc = true; } - if (*flags & SB_RDONLY) { + if (flags & SB_RDONLY) { sync_inodes_sb(sb); set_sbi_flag(sbi, SBI_IS_DIRTY); @@ -2811,7 +2767,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * We stop issue flush thread if FS is mounted as RO * or if flush_merge is not passed in mount option. */ - if ((*flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { + if ((flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { clear_opt(sbi, FLUSH_MERGE); f2fs_destroy_flush_cmd_control(sbi, false); need_restart_flush = true; @@ -2853,7 +2809,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * triggered while remount and we need to take care of it before * returning from remount. */ - if ((*flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) || + if ((flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) || !test_opt(sbi, MERGE_CHECKPOINT)) { f2fs_stop_ckpt_thread(sbi); } else { @@ -2880,7 +2836,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); limit_reserve_root(sbi); - *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); + fc->sb_flags = (flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); sbi->umount_lock_holder = NULL; return 0; @@ -3551,7 +3507,6 @@ static const struct super_operations f2fs_sops = { .freeze_fs = f2fs_freeze, .unfreeze_fs = f2fs_unfreeze, .statfs = f2fs_statfs, - .remount_fs = f2fs_remount, .shutdown = f2fs_shutdown, }; @@ -4811,16 +4766,14 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) sbi->readdir_ra = true; } -static int f2fs_fill_super(struct super_block *sb, void *data, int silent) +static int f2fs_fill_super(struct super_block *sb, struct fs_context *fc) { + struct f2fs_fs_context *ctx = fc->fs_private; struct f2fs_sb_info *sbi; struct f2fs_super_block *raw_super; - struct f2fs_fs_context ctx; - struct fs_context fc; struct inode *root; int err; bool skip_recovery = false, need_fsck = false; - char *options = NULL; int recovery, i, valid_super_block; struct curseg_info *seg_i; int retry_cnt = 1; @@ -4833,9 +4786,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) raw_super = NULL; valid_super_block = -1; recovery = 0; - memset(&fc, 0, sizeof(fc)); - memset(&ctx, 0, sizeof(ctx)); - fc.fs_private = &ctx; /* allocate memory for f2fs-specific super block info */ sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); @@ -4886,22 +4836,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sizeof(raw_super->uuid)); default_options(sbi, false); - /* parse mount options */ - options = kstrdup((const char *)data, GFP_KERNEL); - if (data && !options) { - err = -ENOMEM; + + err = f2fs_check_opt_consistency(fc, sb); + if (err) goto free_sb_buf; - } - err = parse_options(&fc, options); - if (err) - goto free_options; - - err = f2fs_check_opt_consistency(&fc, sb); - if (err) - goto free_options; - - f2fs_apply_options(&fc, sb); + f2fs_apply_options(fc, sb); err = f2fs_sanity_check_options(sbi, false); if (err) @@ -5234,7 +5174,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto sync_free_meta; } - kfree(options); /* recover broken superblock */ if (recovery) { @@ -5329,8 +5268,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif /* no need to free dummy_enc_policy, we just keep it in ctx when failed */ - swap(F2FS_CTX_INFO(&ctx).dummy_enc_policy, F2FS_OPTION(sbi).dummy_enc_policy); - kfree(options); + swap(F2FS_CTX_INFO(ctx).dummy_enc_policy, F2FS_OPTION(sbi).dummy_enc_policy); free_sb_buf: kfree(raw_super); free_sbi: @@ -5346,14 +5284,37 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) return err; } -static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) +static int f2fs_get_tree(struct fs_context *fc) { - return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); + return get_tree_bdev(fc, f2fs_fill_super); +} + +static int f2fs_reconfigure(struct fs_context *fc) +{ + struct super_block *sb = fc->root->d_sb; + + return __f2fs_remount(fc, sb); +} + +static void f2fs_fc_free(struct fs_context *fc) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + + if (!ctx) + return; + +#ifdef CONFIG_QUOTA + f2fs_unnote_qf_name_all(fc); +#endif + fscrypt_free_dummy_policy(&F2FS_CTX_INFO(ctx).dummy_enc_policy); + kfree(ctx); } static const struct fs_context_operations f2fs_context_ops = { .parse_param = f2fs_parse_param, + .get_tree = f2fs_get_tree, + .reconfigure = f2fs_reconfigure, + .free = f2fs_fc_free, }; static void kill_f2fs_super(struct super_block *sb) @@ -5397,10 +5358,24 @@ static void kill_f2fs_super(struct super_block *sb) } } +static int f2fs_init_fs_context(struct fs_context *fc) +{ + struct f2fs_fs_context *ctx; + + ctx = kzalloc(sizeof(struct f2fs_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + fc->fs_private = ctx; + fc->ops = &f2fs_context_ops; + + return 0; +} + static struct file_system_type f2fs_fs_type = { .owner = THIS_MODULE, .name = "f2fs", - .mount = f2fs_mount, + .init_fs_context = f2fs_init_fs_context, .kill_sb = kill_f2fs_super, .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; From 77de19b6867f2740cdcb6c9c7e50d522b47847a4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 17 Jul 2025 21:26:33 +0800 Subject: [PATCH 0916/2411] f2fs: fix to avoid out-of-boundary access in dnode page As Jiaming Zhang reported: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x1c1/0x2a0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0x17e/0x800 mm/kasan/report.c:480 kasan_report+0x147/0x180 mm/kasan/report.c:593 data_blkaddr fs/f2fs/f2fs.h:3053 [inline] f2fs_data_blkaddr fs/f2fs/f2fs.h:3058 [inline] f2fs_get_dnode_of_data+0x1a09/0x1c40 fs/f2fs/node.c:855 f2fs_reserve_block+0x53/0x310 fs/f2fs/data.c:1195 prepare_write_begin fs/f2fs/data.c:3395 [inline] f2fs_write_begin+0xf39/0x2190 fs/f2fs/data.c:3594 generic_perform_write+0x2c7/0x910 mm/filemap.c:4112 f2fs_buffered_write_iter fs/f2fs/file.c:4988 [inline] f2fs_file_write_iter+0x1ec8/0x2410 fs/f2fs/file.c:5216 new_sync_write fs/read_write.c:593 [inline] vfs_write+0x546/0xa90 fs/read_write.c:686 ksys_write+0x149/0x250 fs/read_write.c:738 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xf3/0x3d0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f The root cause is in the corrupted image, there is a dnode has the same node id w/ its inode, so during f2fs_get_dnode_of_data(), it tries to access block address in dnode at offset 934, however it parses the dnode as inode node, so that get_dnode_addr() returns 360, then it tries to access page address from 360 + 934 * 4 = 4096 w/ 4 bytes. To fix this issue, let's add sanity check for node id of all direct nodes during f2fs_get_dnode_of_data(). Cc: stable@kernel.org Reported-by: Jiaming Zhang Closes: https://groups.google.com/g/syzkaller/c/-ZnaaOOfO3M Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4b3d9070e299..76aba1961b54 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -815,6 +815,16 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) for (i = 1; i <= level; i++) { bool done = false; + if (nids[i] && nids[i] == dn->inode->i_ino) { + err = -EFSCORRUPTED; + f2fs_err_ratelimited(sbi, + "inode mapping table is corrupted, run fsck to fix it, " + "ino:%lu, nid:%u, level:%d, offset:%d", + dn->inode->i_ino, nids[i], level, offset[level]); + set_sbi_flag(sbi, SBI_NEED_FSCK); + goto release_pages; + } + if (!nids[i] && mode == ALLOC_NODE) { /* alloc new node */ if (!f2fs_alloc_nid(sbi, &(nids[i]))) { From 2c9e7f857400ffecf16c49bc6d98ac43d4129fef Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2025 18:33:52 +0100 Subject: [PATCH 0917/2411] genirq: Teach handle_simple_irq() to resend an in-progress interrupt It appears that the defect outlined in 9c15eeb5362c4 ("genirq: Allow fasteoi handler to resend interrupts on concurrent handling") also affects some other less stellar MSI controllers, this time using the handle_simple_irq() flow. Teach this flow about irqd_needs_resend_when_in_progress(). Given the invasive nature of this workaround, only this flow is updated. Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20250708173404.1278635-2-maz@kernel.org --- kernel/irq/chip.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index b0e0a7332993..e3948e31e654 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -551,7 +551,13 @@ void handle_simple_irq(struct irq_desc *desc) { guard(raw_spinlock)(&desc->lock); - if (!irq_can_handle(desc)) + if (!irq_can_handle_pm(desc)) { + if (irqd_needs_resend_when_in_progress(&desc->irq_data)) + desc->istate |= IRQS_PENDING; + return; + } + + if (!irq_can_handle_actions(desc)) return; kstat_incr_irqs_this_cpu(desc); From 0d402bd41a075178a9a30d5716abbfda3f123240 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2025 18:33:53 +0100 Subject: [PATCH 0918/2411] PCI: xgene: Defer probing if the MSI widget driver hasn't probed yet As a preparatory work to make the XGene MSI driver probe less of a sorry hack, make the PCI driver check for the availability of the MSI parent domain, and defer the probing otherwise. Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20250708173404.1278635-3-maz@kernel.org --- drivers/pci/controller/pci-xgene.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c index 1e2ebbfa36d1..f26cb58f814e 100644 --- a/drivers/pci/controller/pci-xgene.c +++ b/drivers/pci/controller/pci-xgene.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -594,6 +595,24 @@ static struct pci_ops xgene_pcie_ops = { .write = pci_generic_config_write32, }; +static bool xgene_check_pcie_msi_ready(void) +{ + struct device_node *np; + struct irq_domain *d; + + if (!IS_ENABLED(CONFIG_PCI_XGENE_MSI)) + return true; + + np = of_find_compatible_node(NULL, NULL, "apm,xgene1-msi"); + if (!np) + return true; + + d = irq_find_matching_host(np, DOMAIN_BUS_PCI_MSI); + of_node_put(np); + + return d && irq_domain_is_msi_parent(d); +} + static int xgene_pcie_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -602,6 +621,10 @@ static int xgene_pcie_probe(struct platform_device *pdev) struct pci_host_bridge *bridge; int ret; + if (!xgene_check_pcie_msi_ready()) + return dev_err_probe(&pdev->dev, -EPROBE_DEFER, + "MSI driver not ready\n"); + bridge = devm_pci_alloc_host_bridge(dev, sizeof(*port)); if (!bridge) return -ENOMEM; From e3ac25cc95b814723678c3611591f2b85c731c27 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2025 18:33:54 +0100 Subject: [PATCH 0919/2411] PCI: xgene: Drop useless conditional compilation pci-xgene.c only gets compiled if CONFIG_PCI_XGENE is selected. It is therefore pointless to check for CONFIG_PCI_XGENE inside the driver. Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20250708173404.1278635-4-maz@kernel.org --- drivers/pci/controller/pci-xgene.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c index f26cb58f814e..a848f98203ae 100644 --- a/drivers/pci/controller/pci-xgene.c +++ b/drivers/pci/controller/pci-xgene.c @@ -58,7 +58,6 @@ #define XGENE_PCIE_IP_VER_1 1 #define XGENE_PCIE_IP_VER_2 2 -#if defined(CONFIG_PCI_XGENE) || (defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)) struct xgene_pcie { struct device_node *node; struct device *dev; @@ -189,7 +188,6 @@ static int xgene_pcie_config_read32(struct pci_bus *bus, unsigned int devfn, return PCIBIOS_SUCCESSFUL; } -#endif #if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS) static int xgene_get_csr_resource(struct acpi_device *adev, @@ -280,7 +278,6 @@ const struct pci_ecam_ops xgene_v2_pcie_ecam_ops = { }; #endif -#if defined(CONFIG_PCI_XGENE) static u64 xgene_pcie_set_ib_mask(struct xgene_pcie *port, u32 addr, u32 flags, u64 size) { @@ -670,4 +667,3 @@ static struct platform_driver xgene_pcie_driver = { .probe = xgene_pcie_probe, }; builtin_platform_driver(xgene_pcie_driver); -#endif From fddf72ed7b52c91da37fe5f1d4faed11251b714f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2025 18:33:55 +0100 Subject: [PATCH 0920/2411] PCI: xgene: Drop XGENE_PCIE_IP_VER_UNKN XGENE_PCIE_IP_VER_UNKN is only refered to when probing for the original XGene PCIe implementation, and get immediately overridden if the device has the "apm,xgene-pcie" compatible string. Given that the only way to get there is by finding this very string in the DT, it is obvious that we will always overwrite the version with XGENE_PCIE_IP_VER_1. Drop the whole thing. Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20250708173404.1278635-5-maz@kernel.org --- drivers/pci/controller/pci-xgene.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c index a848f98203ae..b95afa35201d 100644 --- a/drivers/pci/controller/pci-xgene.c +++ b/drivers/pci/controller/pci-xgene.c @@ -54,7 +54,6 @@ #define XGENE_V1_PCI_EXP_CAP 0x40 /* PCIe IP version */ -#define XGENE_PCIE_IP_VER_UNKN 0 #define XGENE_PCIE_IP_VER_1 1 #define XGENE_PCIE_IP_VER_2 2 @@ -630,10 +629,7 @@ static int xgene_pcie_probe(struct platform_device *pdev) port->node = of_node_get(dn); port->dev = dev; - - port->version = XGENE_PCIE_IP_VER_UNKN; - if (of_device_is_compatible(port->node, "apm,xgene-pcie")) - port->version = XGENE_PCIE_IP_VER_1; + port->version = XGENE_PCIE_IP_VER_1; ret = xgene_pcie_map_reg(port, pdev); if (ret) From d17e3f8a933f1e467e2cfbe144ebefc2943a019f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2025 18:33:56 +0100 Subject: [PATCH 0921/2411] PCI: xgene-msi: Make per-CPU interrupt setup robust The way the per-CPU interrupts are dealt with in the XGene MSI driver isn't great: - the affinity is set after the interrupt is enabled - nothing prevents userspace from moving the interrupt around - the affinity setting code pointlessly allocates memory - the driver checks for conditions that cannot possibly happen Address all of this in one go, resulting in slightly simpler setup code. Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20250708173404.1278635-6-maz@kernel.org --- drivers/pci/controller/pci-xgene-msi.c | 29 ++++++-------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c index b05ec8b0bb93..5b6928668917 100644 --- a/drivers/pci/controller/pci-xgene-msi.c +++ b/drivers/pci/controller/pci-xgene-msi.c @@ -355,40 +355,26 @@ static int xgene_msi_hwirq_alloc(unsigned int cpu) { struct xgene_msi *msi = &xgene_msi_ctrl; struct xgene_msi_group *msi_group; - cpumask_var_t mask; int i; int err; for (i = cpu; i < NR_HW_IRQS; i += msi->num_cpus) { msi_group = &msi->msi_groups[i]; - if (!msi_group->gic_irq) - continue; - - irq_set_chained_handler_and_data(msi_group->gic_irq, - xgene_msi_isr, msi_group); /* * Statically allocate MSI GIC IRQs to each CPU core. * With 8-core X-Gene v1, 2 MSI GIC IRQs are allocated * to each core. */ - if (alloc_cpumask_var(&mask, GFP_KERNEL)) { - cpumask_clear(mask); - cpumask_set_cpu(cpu, mask); - err = irq_set_affinity(msi_group->gic_irq, mask); - if (err) - pr_err("failed to set affinity for GIC IRQ"); - free_cpumask_var(mask); - } else { - pr_err("failed to alloc CPU mask for affinity\n"); - err = -EINVAL; - } - + irq_set_status_flags(msi_group->gic_irq, IRQ_NO_BALANCING); + err = irq_set_affinity(msi_group->gic_irq, cpumask_of(cpu)); if (err) { - irq_set_chained_handler_and_data(msi_group->gic_irq, - NULL, NULL); + pr_err("failed to set affinity for GIC IRQ"); return err; } + + irq_set_chained_handler_and_data(msi_group->gic_irq, + xgene_msi_isr, msi_group); } return 0; @@ -402,9 +388,6 @@ static int xgene_msi_hwirq_free(unsigned int cpu) for (i = cpu; i < NR_HW_IRQS; i += msi->num_cpus) { msi_group = &msi->msi_groups[i]; - if (!msi_group->gic_irq) - continue; - irq_set_chained_handler_and_data(msi_group->gic_irq, NULL, NULL); } From 0756244d4cbcd9b1403a39e1e719b9b9bcae3aff Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2025 18:33:57 +0100 Subject: [PATCH 0922/2411] PCI: xgene-msi: Drop superfluous fields from xgene_msi structure The xgene_msi structure remembers both the of_node of the device and the number of CPUs. All of which are perfectly useless. Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20250708173404.1278635-7-maz@kernel.org --- drivers/pci/controller/pci-xgene-msi.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c index 5b6928668917..50a817920cfd 100644 --- a/drivers/pci/controller/pci-xgene-msi.c +++ b/drivers/pci/controller/pci-xgene-msi.c @@ -31,14 +31,12 @@ struct xgene_msi_group { }; struct xgene_msi { - struct device_node *node; struct irq_domain *inner_domain; u64 msi_addr; void __iomem *msi_regs; unsigned long *bitmap; struct mutex bitmap_lock; struct xgene_msi_group *msi_groups; - int num_cpus; }; /* Global data */ @@ -147,7 +145,7 @@ static void xgene_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) */ static int hwirq_to_cpu(unsigned long hwirq) { - return (hwirq % xgene_msi_ctrl.num_cpus); + return (hwirq % num_possible_cpus()); } static unsigned long hwirq_to_canonical_hwirq(unsigned long hwirq) @@ -186,9 +184,9 @@ static int xgene_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, mutex_lock(&msi->bitmap_lock); msi_irq = bitmap_find_next_zero_area(msi->bitmap, NR_MSI_VEC, 0, - msi->num_cpus, 0); + num_possible_cpus(), 0); if (msi_irq < NR_MSI_VEC) - bitmap_set(msi->bitmap, msi_irq, msi->num_cpus); + bitmap_set(msi->bitmap, msi_irq, num_possible_cpus()); else msi_irq = -ENOSPC; @@ -214,7 +212,7 @@ static void xgene_irq_domain_free(struct irq_domain *domain, mutex_lock(&msi->bitmap_lock); hwirq = hwirq_to_canonical_hwirq(d->hwirq); - bitmap_clear(msi->bitmap, hwirq, msi->num_cpus); + bitmap_clear(msi->bitmap, hwirq, num_possible_cpus()); mutex_unlock(&msi->bitmap_lock); @@ -235,10 +233,11 @@ static const struct msi_parent_ops xgene_msi_parent_ops = { .init_dev_msi_info = msi_lib_init_dev_msi_info, }; -static int xgene_allocate_domains(struct xgene_msi *msi) +static int xgene_allocate_domains(struct device_node *node, + struct xgene_msi *msi) { struct irq_domain_info info = { - .fwnode = of_fwnode_handle(msi->node), + .fwnode = of_fwnode_handle(node), .ops = &xgene_msi_domain_ops, .size = NR_MSI_VEC, .host_data = msi, @@ -358,7 +357,7 @@ static int xgene_msi_hwirq_alloc(unsigned int cpu) int i; int err; - for (i = cpu; i < NR_HW_IRQS; i += msi->num_cpus) { + for (i = cpu; i < NR_HW_IRQS; i += num_possible_cpus()) { msi_group = &msi->msi_groups[i]; /* @@ -386,7 +385,7 @@ static int xgene_msi_hwirq_free(unsigned int cpu) struct xgene_msi_group *msi_group; int i; - for (i = cpu; i < NR_HW_IRQS; i += msi->num_cpus) { + for (i = cpu; i < NR_HW_IRQS; i += num_possible_cpus()) { msi_group = &msi->msi_groups[i]; irq_set_chained_handler_and_data(msi_group->gic_irq, NULL, NULL); @@ -417,8 +416,6 @@ static int xgene_msi_probe(struct platform_device *pdev) goto error; } xgene_msi->msi_addr = res->start; - xgene_msi->node = pdev->dev.of_node; - xgene_msi->num_cpus = num_possible_cpus(); rc = xgene_msi_init_allocator(xgene_msi); if (rc) { @@ -426,7 +423,7 @@ static int xgene_msi_probe(struct platform_device *pdev) goto error; } - rc = xgene_allocate_domains(xgene_msi); + rc = xgene_allocate_domains(dev_of_node(&pdev->dev), xgene_msi); if (rc) { dev_err(&pdev->dev, "Failed to allocate MSI domain\n"); goto error; From c9c1578f11af7ebfb62ff683be638ba6f7a9cb44 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2025 18:33:58 +0100 Subject: [PATCH 0923/2411] PCI: xgene-msi: Use device-managed memory allocations Since the MSI driver is probed as a platform device, there is no reason to not use device-managed allocations. That's including the top-level bookkeeping structure, which is better dynamically allocated than being static. Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20250708173404.1278635-8-maz@kernel.org --- drivers/pci/controller/pci-xgene-msi.c | 37 +++++++++++++------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c index 50a817920cfd..8b6724fe8d71 100644 --- a/drivers/pci/controller/pci-xgene-msi.c +++ b/drivers/pci/controller/pci-xgene-msi.c @@ -40,7 +40,7 @@ struct xgene_msi { }; /* Global data */ -static struct xgene_msi xgene_msi_ctrl; +static struct xgene_msi *xgene_msi_ctrl; /* * X-Gene v1 has 16 groups of MSI termination registers MSInIRx, where @@ -253,18 +253,18 @@ static void xgene_free_domains(struct xgene_msi *msi) irq_domain_remove(msi->inner_domain); } -static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi) +static int xgene_msi_init_allocator(struct device *dev) { - xgene_msi->bitmap = bitmap_zalloc(NR_MSI_VEC, GFP_KERNEL); - if (!xgene_msi->bitmap) + xgene_msi_ctrl->bitmap = devm_bitmap_zalloc(dev, NR_MSI_VEC, GFP_KERNEL); + if (!xgene_msi_ctrl->bitmap) return -ENOMEM; - mutex_init(&xgene_msi->bitmap_lock); + mutex_init(&xgene_msi_ctrl->bitmap_lock); - xgene_msi->msi_groups = kcalloc(NR_HW_IRQS, - sizeof(struct xgene_msi_group), - GFP_KERNEL); - if (!xgene_msi->msi_groups) + xgene_msi_ctrl->msi_groups = devm_kcalloc(dev, NR_HW_IRQS, + sizeof(struct xgene_msi_group), + GFP_KERNEL); + if (!xgene_msi_ctrl->msi_groups) return -ENOMEM; return 0; @@ -273,15 +273,14 @@ static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi) static void xgene_msi_isr(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); + struct xgene_msi *xgene_msi = xgene_msi_ctrl; struct xgene_msi_group *msi_groups; - struct xgene_msi *xgene_msi; int msir_index, msir_val, hw_irq, ret; u32 intr_index, grp_select, msi_grp; chained_irq_enter(chip, desc); msi_groups = irq_desc_get_handler_data(desc); - xgene_msi = msi_groups->msi; msi_grp = msi_groups->msi_grp; /* @@ -344,15 +343,12 @@ static void xgene_msi_remove(struct platform_device *pdev) kfree(msi->msi_groups); - bitmap_free(msi->bitmap); - msi->bitmap = NULL; - xgene_free_domains(msi); } static int xgene_msi_hwirq_alloc(unsigned int cpu) { - struct xgene_msi *msi = &xgene_msi_ctrl; + struct xgene_msi *msi = xgene_msi_ctrl; struct xgene_msi_group *msi_group; int i; int err; @@ -381,7 +377,7 @@ static int xgene_msi_hwirq_alloc(unsigned int cpu) static int xgene_msi_hwirq_free(unsigned int cpu) { - struct xgene_msi *msi = &xgene_msi_ctrl; + struct xgene_msi *msi = xgene_msi_ctrl; struct xgene_msi_group *msi_group; int i; @@ -406,7 +402,12 @@ static int xgene_msi_probe(struct platform_device *pdev) int virt_msir; u32 msi_val, msi_idx; - xgene_msi = &xgene_msi_ctrl; + xgene_msi_ctrl = devm_kzalloc(&pdev->dev, sizeof(*xgene_msi_ctrl), + GFP_KERNEL); + if (!xgene_msi_ctrl) + return -ENOMEM; + + xgene_msi = xgene_msi_ctrl; platform_set_drvdata(pdev, xgene_msi); @@ -417,7 +418,7 @@ static int xgene_msi_probe(struct platform_device *pdev) } xgene_msi->msi_addr = res->start; - rc = xgene_msi_init_allocator(xgene_msi); + rc = xgene_msi_init_allocator(&pdev->dev); if (rc) { dev_err(&pdev->dev, "Error allocating MSI bitmap\n"); goto error; From 011f4fc1e8debaf9e749c20bfabc08a180870722 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2025 18:33:59 +0100 Subject: [PATCH 0924/2411] PCI: xgene-msi: Get rid of intermediate tracking structure The xgene-msi driver uses an odd construct in the form of an intermediate tracking structure, evidently designed to deal with multiple instances of the MSI widget. However, the existing HW only has one set, and it is obvious that there won't be new HW coming down that particular line. Simplify the driver by using a bit of pointer arithmetic instead, directly tracking the interrupt and avoiding extra memory allocation. Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20250708173404.1278635-9-maz@kernel.org --- drivers/pci/controller/pci-xgene-msi.c | 60 ++++++++------------------ 1 file changed, 18 insertions(+), 42 deletions(-) diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c index 8b6724fe8d71..cef0488749e1 100644 --- a/drivers/pci/controller/pci-xgene-msi.c +++ b/drivers/pci/controller/pci-xgene-msi.c @@ -24,19 +24,13 @@ #define NR_HW_IRQS 16 #define NR_MSI_VEC (IDX_PER_GROUP * IRQS_PER_IDX * NR_HW_IRQS) -struct xgene_msi_group { - struct xgene_msi *msi; - int gic_irq; - u32 msi_grp; -}; - struct xgene_msi { struct irq_domain *inner_domain; u64 msi_addr; void __iomem *msi_regs; unsigned long *bitmap; struct mutex bitmap_lock; - struct xgene_msi_group *msi_groups; + unsigned int gic_irq[NR_HW_IRQS]; }; /* Global data */ @@ -261,27 +255,20 @@ static int xgene_msi_init_allocator(struct device *dev) mutex_init(&xgene_msi_ctrl->bitmap_lock); - xgene_msi_ctrl->msi_groups = devm_kcalloc(dev, NR_HW_IRQS, - sizeof(struct xgene_msi_group), - GFP_KERNEL); - if (!xgene_msi_ctrl->msi_groups) - return -ENOMEM; - return 0; } static void xgene_msi_isr(struct irq_desc *desc) { + unsigned int *irqp = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); struct xgene_msi *xgene_msi = xgene_msi_ctrl; - struct xgene_msi_group *msi_groups; int msir_index, msir_val, hw_irq, ret; u32 intr_index, grp_select, msi_grp; chained_irq_enter(chip, desc); - msi_groups = irq_desc_get_handler_data(desc); - msi_grp = msi_groups->msi_grp; + msi_grp = irqp - xgene_msi->gic_irq; /* * MSIINTn (n is 0..F) indicates if there is a pending MSI interrupt @@ -341,35 +328,31 @@ static void xgene_msi_remove(struct platform_device *pdev) cpuhp_remove_state(pci_xgene_online); cpuhp_remove_state(CPUHP_PCI_XGENE_DEAD); - kfree(msi->msi_groups); - xgene_free_domains(msi); } static int xgene_msi_hwirq_alloc(unsigned int cpu) { - struct xgene_msi *msi = xgene_msi_ctrl; - struct xgene_msi_group *msi_group; int i; int err; for (i = cpu; i < NR_HW_IRQS; i += num_possible_cpus()) { - msi_group = &msi->msi_groups[i]; + unsigned int irq = xgene_msi_ctrl->gic_irq[i]; /* * Statically allocate MSI GIC IRQs to each CPU core. * With 8-core X-Gene v1, 2 MSI GIC IRQs are allocated * to each core. */ - irq_set_status_flags(msi_group->gic_irq, IRQ_NO_BALANCING); - err = irq_set_affinity(msi_group->gic_irq, cpumask_of(cpu)); + irq_set_status_flags(irq, IRQ_NO_BALANCING); + err = irq_set_affinity(irq, cpumask_of(cpu)); if (err) { pr_err("failed to set affinity for GIC IRQ"); return err; } - irq_set_chained_handler_and_data(msi_group->gic_irq, - xgene_msi_isr, msi_group); + irq_set_chained_handler_and_data(irq, xgene_msi_isr, + &xgene_msi_ctrl->gic_irq[i]); } return 0; @@ -378,14 +361,11 @@ static int xgene_msi_hwirq_alloc(unsigned int cpu) static int xgene_msi_hwirq_free(unsigned int cpu) { struct xgene_msi *msi = xgene_msi_ctrl; - struct xgene_msi_group *msi_group; int i; - for (i = cpu; i < NR_HW_IRQS; i += num_possible_cpus()) { - msi_group = &msi->msi_groups[i]; - irq_set_chained_handler_and_data(msi_group->gic_irq, NULL, - NULL); - } + for (i = cpu; i < NR_HW_IRQS; i += num_possible_cpus()) + irq_set_chained_handler_and_data(msi->gic_irq[i], NULL, NULL); + return 0; } @@ -397,10 +377,9 @@ static const struct of_device_id xgene_msi_match_table[] = { static int xgene_msi_probe(struct platform_device *pdev) { struct resource *res; - int rc, irq_index; struct xgene_msi *xgene_msi; - int virt_msir; u32 msi_val, msi_idx; + int rc; xgene_msi_ctrl = devm_kzalloc(&pdev->dev, sizeof(*xgene_msi_ctrl), GFP_KERNEL); @@ -430,15 +409,12 @@ static int xgene_msi_probe(struct platform_device *pdev) goto error; } - for (irq_index = 0; irq_index < NR_HW_IRQS; irq_index++) { - virt_msir = platform_get_irq(pdev, irq_index); - if (virt_msir < 0) { - rc = virt_msir; + for (int irq_index = 0; irq_index < NR_HW_IRQS; irq_index++) { + rc = platform_get_irq(pdev, irq_index); + if (rc < 0) goto error; - } - xgene_msi->msi_groups[irq_index].gic_irq = virt_msir; - xgene_msi->msi_groups[irq_index].msi_grp = irq_index; - xgene_msi->msi_groups[irq_index].msi = xgene_msi; + + xgene_msi->gic_irq[irq_index] = rc; } /* @@ -446,7 +422,7 @@ static int xgene_msi_probe(struct platform_device *pdev) * interrupt handlers, read all of them to clear spurious * interrupts that may occur before the driver is probed. */ - for (irq_index = 0; irq_index < NR_HW_IRQS; irq_index++) { + for (int irq_index = 0; irq_index < NR_HW_IRQS; irq_index++) { for (msi_idx = 0; msi_idx < IDX_PER_GROUP; msi_idx++) xgene_msi_ir_read(xgene_msi, irq_index, msi_idx); From 17c1f960cbf0b93ba22e2d619718343fbdf819ab Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2025 18:34:00 +0100 Subject: [PATCH 0925/2411] PCI: xgene-msi: Sanitise MSI allocation and affinity setting Plugging a device that doesn't use managed affinity on an XGene-1 machine results in messages such as: genirq: irq_chip PCI-MSIX-0000:01:00.0 did not update eff. affinity mask of irq 39 As it turns out, the driver was never updated to populate the effective affinity on irq_set_affinity() call, and the core code is prickly about that. But upon further investigation, it appears that the driver keeps repainting the hwirq field of the irq_data structure as a way to track the affinity of the MSI, something that is very much frowned upon as it breaks the fundamentals of an IRQ domain (an array indexed by hwirq). Fixing this results more or less in a rewrite of the driver: - Define how a hwirq and a CPU affinity map onto the MSI termination registers - Allocate a single entry in the bitmap per MSI instead of *8* - Correctly track CPU affinity - Fix the documentation so that it actually means something (to me) - Use standard bitmap iterators - and plenty of other cleanups With this, the driver behaves correctly on my vintage Mustang board. Signed-off-by: Marc Zyngier [lpieralisi: replaced open coded GENMASK(6, 4) with MSInRx_HWIRQ_MASK] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20250708173404.1278635-10-maz@kernel.org --- drivers/pci/controller/pci-xgene-msi.c | 222 +++++++++++-------------- 1 file changed, 93 insertions(+), 129 deletions(-) diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c index cef0488749e1..954bc5513164 100644 --- a/drivers/pci/controller/pci-xgene-msi.c +++ b/drivers/pci/controller/pci-xgene-msi.c @@ -6,6 +6,7 @@ * Author: Tanmay Inamdar * Duc Dang */ +#include #include #include #include @@ -22,7 +23,15 @@ #define IDX_PER_GROUP 8 #define IRQS_PER_IDX 16 #define NR_HW_IRQS 16 -#define NR_MSI_VEC (IDX_PER_GROUP * IRQS_PER_IDX * NR_HW_IRQS) +#define NR_MSI_BITS (IDX_PER_GROUP * IRQS_PER_IDX * NR_HW_IRQS) +#define NR_MSI_VEC (NR_MSI_BITS / num_possible_cpus()) + +#define MSI_GROUP_MASK GENMASK(22, 19) +#define MSI_INDEX_MASK GENMASK(18, 16) +#define MSI_INTR_MASK GENMASK(19, 16) + +#define MSInRx_HWIRQ_MASK GENMASK(6, 4) +#define DATA_HWIRQ_MASK GENMASK(3, 0) struct xgene_msi { struct irq_domain *inner_domain; @@ -37,8 +46,26 @@ struct xgene_msi { static struct xgene_msi *xgene_msi_ctrl; /* - * X-Gene v1 has 16 groups of MSI termination registers MSInIRx, where - * n is group number (0..F), x is index of registers in each group (0..7) + * X-Gene v1 has 16 frames of MSI termination registers MSInIRx, where n is + * frame number (0..15), x is index of registers in each frame (0..7). Each + * 32b register is at the beginning of a 64kB region, each frame occupying + * 512kB (and the whole thing 8MB of PA space). + * + * Each register supports 16 MSI vectors (0..15) to generate interrupts. A + * write to the MSInIRx from the PCI side generates an interrupt. A read + * from the MSInRx on the CPU side returns a bitmap of the pending MSIs in + * the lower 16 bits. A side effect of this read is that all pending + * interrupts are acknowledged and cleared). + * + * Additionally, each MSI termination frame has 1 MSIINTn register (n is + * 0..15) to indicate the MSI pending status caused by any of its 8 + * termination registers, reported as a bitmap in the lower 8 bits. Each 32b + * register is at the beginning of a 64kB region (and overall occupying an + * extra 1MB). + * + * There is one GIC IRQ assigned for each MSI termination frame, 16 in + * total. + * * The register layout is as follows: * MSI0IR0 base_addr * MSI0IR1 base_addr + 0x10000 @@ -59,107 +86,74 @@ static struct xgene_msi *xgene_msi_ctrl; * MSIINT1 base_addr + 0x810000 * ... ... * MSIINTF base_addr + 0x8F0000 - * - * Each index register supports 16 MSI vectors (0..15) to generate interrupt. - * There are total 16 GIC IRQs assigned for these 16 groups of MSI termination - * registers. - * - * Each MSI termination group has 1 MSIINTn register (n is 0..15) to indicate - * the MSI pending status caused by 1 of its 8 index registers. */ /* MSInIRx read helper */ -static u32 xgene_msi_ir_read(struct xgene_msi *msi, - u32 msi_grp, u32 msir_idx) +static u32 xgene_msi_ir_read(struct xgene_msi *msi, u32 msi_grp, u32 msir_idx) { return readl_relaxed(msi->msi_regs + MSI_IR0 + - (msi_grp << 19) + (msir_idx << 16)); + (FIELD_PREP(MSI_GROUP_MASK, msi_grp) | + FIELD_PREP(MSI_INDEX_MASK, msir_idx))); } /* MSIINTn read helper */ static u32 xgene_msi_int_read(struct xgene_msi *msi, u32 msi_grp) { - return readl_relaxed(msi->msi_regs + MSI_INT0 + (msi_grp << 16)); + return readl_relaxed(msi->msi_regs + MSI_INT0 + + FIELD_PREP(MSI_INTR_MASK, msi_grp)); } /* - * With 2048 MSI vectors supported, the MSI message can be constructed using - * following scheme: - * - Divide into 8 256-vector groups - * Group 0: 0-255 - * Group 1: 256-511 - * Group 2: 512-767 - * ... - * Group 7: 1792-2047 - * - Each 256-vector group is divided into 16 16-vector groups - * As an example: 16 16-vector groups for 256-vector group 0-255 is - * Group 0: 0-15 - * Group 1: 16-32 - * ... - * Group 15: 240-255 - * - The termination address of MSI vector in 256-vector group n and 16-vector - * group x is the address of MSIxIRn - * - The data for MSI vector in 16-vector group x is x + * In order to allow an MSI to be moved from one CPU to another without + * having to repaint both the address and the data (which cannot be done + * atomically), we statically partitions the MSI frames between CPUs. Given + * that XGene-1 has 8 CPUs, each CPU gets two frames assigned to it + * + * We adopt the convention that when an MSI is moved, it is configured to + * target the same register number in the congruent frame assigned to the + * new target CPU. This reserves a given MSI across all CPUs, and reduces + * the MSI capacity from 2048 to 256. + * + * Effectively, this amounts to: + * - hwirq[7]::cpu[2:0] is the target frame number (n in MSInIRx) + * - hwirq[6:4] is the register index in any given frame (x in MSInIRx) + * - hwirq[3:0] is the MSI data */ -static u32 hwirq_to_reg_set(unsigned long hwirq) +static irq_hw_number_t compute_hwirq(u8 frame, u8 index, u8 data) { - return (hwirq / (NR_HW_IRQS * IRQS_PER_IDX)); -} - -static u32 hwirq_to_group(unsigned long hwirq) -{ - return (hwirq % NR_HW_IRQS); -} - -static u32 hwirq_to_msi_data(unsigned long hwirq) -{ - return ((hwirq / NR_HW_IRQS) % IRQS_PER_IDX); + return (FIELD_PREP(BIT(7), FIELD_GET(BIT(3), frame)) | + FIELD_PREP(MSInRx_HWIRQ_MASK, index) | + FIELD_PREP(DATA_HWIRQ_MASK, data)); } static void xgene_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { struct xgene_msi *msi = irq_data_get_irq_chip_data(data); - u32 reg_set = hwirq_to_reg_set(data->hwirq); - u32 group = hwirq_to_group(data->hwirq); - u64 target_addr = msi->msi_addr + (((8 * group) + reg_set) << 16); + u64 target_addr; + u32 frame, msir; + int cpu; + + cpu = cpumask_first(irq_data_get_effective_affinity_mask(data)); + msir = FIELD_GET(MSInRx_HWIRQ_MASK, data->hwirq); + frame = FIELD_PREP(BIT(3), FIELD_GET(BIT(7), data->hwirq)) | cpu; + + target_addr = msi->msi_addr; + target_addr += (FIELD_PREP(MSI_GROUP_MASK, frame) | + FIELD_PREP(MSI_INTR_MASK, msir)); msg->address_hi = upper_32_bits(target_addr); msg->address_lo = lower_32_bits(target_addr); - msg->data = hwirq_to_msi_data(data->hwirq); -} - -/* - * X-Gene v1 only has 16 MSI GIC IRQs for 2048 MSI vectors. To maintain - * the expected behaviour of .set_affinity for each MSI interrupt, the 16 - * MSI GIC IRQs are statically allocated to 8 X-Gene v1 cores (2 GIC IRQs - * for each core). The MSI vector is moved from 1 MSI GIC IRQ to another - * MSI GIC IRQ to steer its MSI interrupt to correct X-Gene v1 core. As a - * consequence, the total MSI vectors that X-Gene v1 supports will be - * reduced to 256 (2048/8) vectors. - */ -static int hwirq_to_cpu(unsigned long hwirq) -{ - return (hwirq % num_possible_cpus()); -} - -static unsigned long hwirq_to_canonical_hwirq(unsigned long hwirq) -{ - return (hwirq - hwirq_to_cpu(hwirq)); + msg->data = FIELD_GET(DATA_HWIRQ_MASK, data->hwirq); } static int xgene_msi_set_affinity(struct irq_data *irqdata, const struct cpumask *mask, bool force) { int target_cpu = cpumask_first(mask); - int curr_cpu; - curr_cpu = hwirq_to_cpu(irqdata->hwirq); - if (curr_cpu == target_cpu) - return IRQ_SET_MASK_OK_DONE; - - /* Update MSI number to target the new CPU */ - irqdata->hwirq = hwirq_to_canonical_hwirq(irqdata->hwirq) + target_cpu; + irq_data_update_effective_affinity(irqdata, cpumask_of(target_cpu)); + /* Force the core code to regenerate the message */ return IRQ_SET_MASK_OK; } @@ -173,23 +167,20 @@ static int xgene_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *args) { struct xgene_msi *msi = domain->host_data; - int msi_irq; + irq_hw_number_t hwirq; mutex_lock(&msi->bitmap_lock); - msi_irq = bitmap_find_next_zero_area(msi->bitmap, NR_MSI_VEC, 0, - num_possible_cpus(), 0); - if (msi_irq < NR_MSI_VEC) - bitmap_set(msi->bitmap, msi_irq, num_possible_cpus()); - else - msi_irq = -ENOSPC; + hwirq = find_first_zero_bit(msi->bitmap, NR_MSI_VEC); + if (hwirq < NR_MSI_VEC) + set_bit(hwirq, msi->bitmap); mutex_unlock(&msi->bitmap_lock); - if (msi_irq < 0) - return msi_irq; + if (hwirq >= NR_MSI_VEC) + return -ENOSPC; - irq_domain_set_info(domain, virq, msi_irq, + irq_domain_set_info(domain, virq, hwirq, &xgene_msi_bottom_irq_chip, domain->host_data, handle_simple_irq, NULL, NULL); @@ -201,12 +192,10 @@ static void xgene_irq_domain_free(struct irq_domain *domain, { struct irq_data *d = irq_domain_get_irq_data(domain, virq); struct xgene_msi *msi = irq_data_get_irq_chip_data(d); - u32 hwirq; mutex_lock(&msi->bitmap_lock); - hwirq = hwirq_to_canonical_hwirq(d->hwirq); - bitmap_clear(msi->bitmap, hwirq, num_possible_cpus()); + clear_bit(d->hwirq, msi->bitmap); mutex_unlock(&msi->bitmap_lock); @@ -263,55 +252,30 @@ static void xgene_msi_isr(struct irq_desc *desc) unsigned int *irqp = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); struct xgene_msi *xgene_msi = xgene_msi_ctrl; - int msir_index, msir_val, hw_irq, ret; - u32 intr_index, grp_select, msi_grp; + unsigned long grp_pending; + int msir_idx; + u32 msi_grp; chained_irq_enter(chip, desc); msi_grp = irqp - xgene_msi->gic_irq; - /* - * MSIINTn (n is 0..F) indicates if there is a pending MSI interrupt - * If bit x of this register is set (x is 0..7), one or more interrupts - * corresponding to MSInIRx is set. - */ - grp_select = xgene_msi_int_read(xgene_msi, msi_grp); - while (grp_select) { - msir_index = ffs(grp_select) - 1; - /* - * Calculate MSInIRx address to read to check for interrupts - * (refer to termination address and data assignment - * described in xgene_compose_msi_msg() ) - */ - msir_val = xgene_msi_ir_read(xgene_msi, msi_grp, msir_index); - while (msir_val) { - intr_index = ffs(msir_val) - 1; - /* - * Calculate MSI vector number (refer to the termination - * address and data assignment described in - * xgene_compose_msi_msg function) - */ - hw_irq = (((msir_index * IRQS_PER_IDX) + intr_index) * - NR_HW_IRQS) + msi_grp; - /* - * As we have multiple hw_irq that maps to single MSI, - * always look up the virq using the hw_irq as seen from - * CPU0 - */ - hw_irq = hwirq_to_canonical_hwirq(hw_irq); - ret = generic_handle_domain_irq(xgene_msi->inner_domain, hw_irq); - WARN_ON_ONCE(ret); - msir_val &= ~(1 << intr_index); - } - grp_select &= ~(1 << msir_index); + grp_pending = xgene_msi_int_read(xgene_msi, msi_grp); - if (!grp_select) { - /* - * We handled all interrupts happened in this group, - * resample this group MSI_INTx register in case - * something else has been made pending in the meantime - */ - grp_select = xgene_msi_int_read(xgene_msi, msi_grp); + for_each_set_bit(msir_idx, &grp_pending, IDX_PER_GROUP) { + unsigned long msir; + int intr_idx; + + msir = xgene_msi_ir_read(xgene_msi, msi_grp, msir_idx); + + for_each_set_bit(intr_idx, &msir, IRQS_PER_IDX) { + irq_hw_number_t hwirq; + int ret; + + hwirq = compute_hwirq(msi_grp, msir_idx, intr_idx); + ret = generic_handle_domain_irq(xgene_msi->inner_domain, + hwirq); + WARN_ON_ONCE(ret); } } From 3cc8f625e4c6a0e9f936da6b94166e62e387fe1d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2025 18:34:01 +0100 Subject: [PATCH 0926/2411] PCI: xgene-msi: Resend an MSI racing with itself on a different CPU Since changing the affinity of an MSI really is about changing the target address and that it isn't possible to mask an individual MSI, it is completely possible for an interrupt to race with itself, usually resulting in a lost interrupt. Paper over the design blunder by informing the core code of this sad state of affairs. Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20250708173404.1278635-11-maz@kernel.org --- drivers/pci/controller/pci-xgene-msi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c index 954bc5513164..0ae8f29025bf 100644 --- a/drivers/pci/controller/pci-xgene-msi.c +++ b/drivers/pci/controller/pci-xgene-msi.c @@ -183,6 +183,7 @@ static int xgene_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, irq_domain_set_info(domain, virq, hwirq, &xgene_msi_bottom_irq_chip, domain->host_data, handle_simple_irq, NULL, NULL); + irqd_set_resend_when_in_progress(irq_get_irq_data(virq)); return 0; } From cd5ffaf2b1a85f507e668b773575baf77aa6a6d3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2025 18:34:02 +0100 Subject: [PATCH 0927/2411] PCI: xgene-msi: Probe as a standard platform driver Now that we have made the dependency between the PCI driver and the MSI driver explicit, there is no need to use subsys_initcall() as a probing hook, and we can rely on builtin_platform_driver() instead. Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20250708173404.1278635-12-maz@kernel.org --- drivers/pci/controller/pci-xgene-msi.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c index 0ae8f29025bf..988e2f1f2425 100644 --- a/drivers/pci/controller/pci-xgene-msi.c +++ b/drivers/pci/controller/pci-xgene-msi.c @@ -429,9 +429,4 @@ static struct platform_driver xgene_msi_driver = { .probe = xgene_msi_probe, .remove = xgene_msi_remove, }; - -static int __init xgene_pcie_msi_init(void) -{ - return platform_driver_register(&xgene_msi_driver); -} -subsys_initcall(xgene_pcie_msi_init); +builtin_platform_driver(xgene_msi_driver); From 6aceb36f17abf801000835763df7c64a4f11f46d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2025 18:34:03 +0100 Subject: [PATCH 0928/2411] PCI: xgene-msi: Restructure handler setup/teardown Another utterly pointless aspect of the xgene-msi driver is that it is built around CPU hotplug. Which is quite amusing since this is one of the few arm64 platforms that, by construction, cannot do CPU hotplug in a supported way (no EL3, no PSCI, no luck). Drop the CPU hotplug nonsense and just setup the IRQs and handlers in a less overdesigned way, grouping things more logically in the process. Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20250708173404.1278635-13-maz@kernel.org --- drivers/pci/controller/pci-xgene-msi.c | 107 +++++++++---------------- 1 file changed, 37 insertions(+), 70 deletions(-) diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c index 988e2f1f2425..0a37a3f1809c 100644 --- a/drivers/pci/controller/pci-xgene-msi.c +++ b/drivers/pci/controller/pci-xgene-msi.c @@ -231,12 +231,6 @@ static int xgene_allocate_domains(struct device_node *node, return msi->inner_domain ? 0 : -ENOMEM; } -static void xgene_free_domains(struct xgene_msi *msi) -{ - if (msi->inner_domain) - irq_domain_remove(msi->inner_domain); -} - static int xgene_msi_init_allocator(struct device *dev) { xgene_msi_ctrl->bitmap = devm_bitmap_zalloc(dev, NR_MSI_VEC, GFP_KERNEL); @@ -283,26 +277,48 @@ static void xgene_msi_isr(struct irq_desc *desc) chained_irq_exit(chip, desc); } -static enum cpuhp_state pci_xgene_online; - static void xgene_msi_remove(struct platform_device *pdev) { - struct xgene_msi *msi = platform_get_drvdata(pdev); + for (int i = 0; i < NR_HW_IRQS; i++) { + unsigned int irq = xgene_msi_ctrl->gic_irq[i]; + if (!irq) + continue; + irq_set_chained_handler_and_data(irq, NULL, NULL); + } - if (pci_xgene_online) - cpuhp_remove_state(pci_xgene_online); - cpuhp_remove_state(CPUHP_PCI_XGENE_DEAD); - - xgene_free_domains(msi); + if (xgene_msi_ctrl->inner_domain) + irq_domain_remove(xgene_msi_ctrl->inner_domain); } -static int xgene_msi_hwirq_alloc(unsigned int cpu) +static int xgene_msi_handler_setup(struct platform_device *pdev) { + struct xgene_msi *xgene_msi = xgene_msi_ctrl; int i; - int err; - for (i = cpu; i < NR_HW_IRQS; i += num_possible_cpus()) { - unsigned int irq = xgene_msi_ctrl->gic_irq[i]; + for (i = 0; i < NR_HW_IRQS; i++) { + u32 msi_val; + int irq, err; + + /* + * MSInIRx registers are read-to-clear; before registering + * interrupt handlers, read all of them to clear spurious + * interrupts that may occur before the driver is probed. + */ + for (int msi_idx = 0; msi_idx < IDX_PER_GROUP; msi_idx++) + xgene_msi_ir_read(xgene_msi, i, msi_idx); + + /* Read MSIINTn to confirm */ + msi_val = xgene_msi_int_read(xgene_msi, i); + if (msi_val) { + dev_err(&pdev->dev, "Failed to clear spurious IRQ\n"); + return EINVAL; + } + + irq = platform_get_irq(pdev, i); + if (irq < 0) + return irq; + + xgene_msi->gic_irq[i] = irq; /* * Statically allocate MSI GIC IRQs to each CPU core. @@ -310,7 +326,7 @@ static int xgene_msi_hwirq_alloc(unsigned int cpu) * to each core. */ irq_set_status_flags(irq, IRQ_NO_BALANCING); - err = irq_set_affinity(irq, cpumask_of(cpu)); + err = irq_set_affinity(irq, cpumask_of(i % num_possible_cpus())); if (err) { pr_err("failed to set affinity for GIC IRQ"); return err; @@ -323,17 +339,6 @@ static int xgene_msi_hwirq_alloc(unsigned int cpu) return 0; } -static int xgene_msi_hwirq_free(unsigned int cpu) -{ - struct xgene_msi *msi = xgene_msi_ctrl; - int i; - - for (i = cpu; i < NR_HW_IRQS; i += num_possible_cpus()) - irq_set_chained_handler_and_data(msi->gic_irq[i], NULL, NULL); - - return 0; -} - static const struct of_device_id xgene_msi_match_table[] = { {.compatible = "apm,xgene1-msi"}, {}, @@ -343,7 +348,6 @@ static int xgene_msi_probe(struct platform_device *pdev) { struct resource *res; struct xgene_msi *xgene_msi; - u32 msi_val, msi_idx; int rc; xgene_msi_ctrl = devm_kzalloc(&pdev->dev, sizeof(*xgene_msi_ctrl), @@ -353,8 +357,6 @@ static int xgene_msi_probe(struct platform_device *pdev) xgene_msi = xgene_msi_ctrl; - platform_set_drvdata(pdev, xgene_msi); - xgene_msi->msi_regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(xgene_msi->msi_regs)) { rc = PTR_ERR(xgene_msi->msi_regs); @@ -374,48 +376,13 @@ static int xgene_msi_probe(struct platform_device *pdev) goto error; } - for (int irq_index = 0; irq_index < NR_HW_IRQS; irq_index++) { - rc = platform_get_irq(pdev, irq_index); - if (rc < 0) - goto error; - - xgene_msi->gic_irq[irq_index] = rc; - } - - /* - * MSInIRx registers are read-to-clear; before registering - * interrupt handlers, read all of them to clear spurious - * interrupts that may occur before the driver is probed. - */ - for (int irq_index = 0; irq_index < NR_HW_IRQS; irq_index++) { - for (msi_idx = 0; msi_idx < IDX_PER_GROUP; msi_idx++) - xgene_msi_ir_read(xgene_msi, irq_index, msi_idx); - - /* Read MSIINTn to confirm */ - msi_val = xgene_msi_int_read(xgene_msi, irq_index); - if (msi_val) { - dev_err(&pdev->dev, "Failed to clear spurious IRQ\n"); - rc = -EINVAL; - goto error; - } - } - - rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "pci/xgene:online", - xgene_msi_hwirq_alloc, NULL); - if (rc < 0) - goto err_cpuhp; - pci_xgene_online = rc; - rc = cpuhp_setup_state(CPUHP_PCI_XGENE_DEAD, "pci/xgene:dead", NULL, - xgene_msi_hwirq_free); + rc = xgene_msi_handler_setup(pdev); if (rc) - goto err_cpuhp; + goto error; dev_info(&pdev->dev, "APM X-Gene PCIe MSI driver loaded\n"); return 0; - -err_cpuhp: - dev_err(&pdev->dev, "failed to add CPU MSI notifier\n"); error: xgene_msi_remove(pdev); return rc; From e612423be33465d2b9822bf09e03d4e6c165e384 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2025 18:34:04 +0100 Subject: [PATCH 0929/2411] cpu/hotplug: Remove unused cpuhp_state CPUHP_PCI_XGENE_DEAD Now that the XGene MSI driver has been mostly rewritten and doesn't use the CPU hotplug infrastructure, CPUHP_PCI_XGENE_DEAD is unused. Remove it to reduce the size of cpuhp_hp_states[]. Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20250708173404.1278635-14-maz@kernel.org --- include/linux/cpuhotplug.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index df366ee15456..eaca70eb6136 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -90,7 +90,6 @@ enum cpuhp_state { CPUHP_RADIX_DEAD, CPUHP_PAGE_ALLOC, CPUHP_NET_DEV_DEAD, - CPUHP_PCI_XGENE_DEAD, CPUHP_IOMMU_IOVA_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_PADATA_DEAD, From 8c8efa93db68bb9fbdb46b93d5b66ff18bdf3d18 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 2 May 2025 18:45:33 +0900 Subject: [PATCH 0930/2411] x86/bug: Add ARCH_WARN_ASM macro for BUG/WARN asm code sharing with Rust Add new ARCH_WARN_ASM macro for BUG/WARN assembly code sharing with Rust to avoid the duplication. No functional changes. Acked-by: Peter Zijlstra (Intel) Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250502094537.231725-2-fujita.tomonori@gmail.com [ Fixed typo in macro parameter name. - Miguel ] Signed-off-by: Miguel Ojeda --- arch/x86/include/asm/bug.h | 56 +++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index f0e9acf72547..20fcb8507ad1 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -32,45 +32,42 @@ #ifdef CONFIG_GENERIC_BUG #ifdef CONFIG_X86_32 -# define __BUG_REL(val) ".long " __stringify(val) +# define __BUG_REL(val) ".long " val #else -# define __BUG_REL(val) ".long " __stringify(val) " - ." +# define __BUG_REL(val) ".long " val " - ." #endif #ifdef CONFIG_DEBUG_BUGVERBOSE +#define __BUG_ENTRY(file, line, flags) \ + "2:\t" __BUG_REL("1b") "\t# bug_entry::bug_addr\n" \ + "\t" __BUG_REL(file) "\t# bug_entry::file\n" \ + "\t.word " line "\t# bug_entry::line\n" \ + "\t.word " flags "\t# bug_entry::flags\n" +#else +#define __BUG_ENTRY(file, line, flags) \ + "2:\t" __BUG_REL("1b") "\t# bug_entry::bug_addr\n" \ + "\t.word " flags "\t# bug_entry::flags\n" +#endif + +#define _BUG_FLAGS_ASM(ins, file, line, flags, size, extra) \ + "1:\t" ins "\n" \ + ".pushsection __bug_table,\"aw\"\n" \ + __BUG_ENTRY(file, line, flags) \ + "\t.org 2b + " size "\n" \ + ".popsection\n" \ + extra #define _BUG_FLAGS(ins, flags, extra) \ do { \ - asm_inline volatile("1:\t" ins "\n" \ - ".pushsection __bug_table,\"aw\"\n" \ - "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \ - "\t" __BUG_REL(%c0) "\t# bug_entry::file\n" \ - "\t.word %c1" "\t# bug_entry::line\n" \ - "\t.word %c2" "\t# bug_entry::flags\n" \ - "\t.org 2b+%c3\n" \ - ".popsection\n" \ - extra \ + asm_inline volatile(_BUG_FLAGS_ASM(ins, "%c0", \ + "%c1", "%c2", "%c3", extra) \ : : "i" (__FILE__), "i" (__LINE__), \ "i" (flags), \ "i" (sizeof(struct bug_entry))); \ } while (0) -#else /* !CONFIG_DEBUG_BUGVERBOSE */ - -#define _BUG_FLAGS(ins, flags, extra) \ -do { \ - asm_inline volatile("1:\t" ins "\n" \ - ".pushsection __bug_table,\"aw\"\n" \ - "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \ - "\t.word %c0" "\t# bug_entry::flags\n" \ - "\t.org 2b+%c1\n" \ - ".popsection\n" \ - extra \ - : : "i" (flags), \ - "i" (sizeof(struct bug_entry))); \ -} while (0) - -#endif /* CONFIG_DEBUG_BUGVERBOSE */ +#define ARCH_WARN_ASM(file, line, flags, size) \ + _BUG_FLAGS_ASM(ASM_UD2, file, line, flags, size, "") #else @@ -92,11 +89,14 @@ do { \ * were to trigger, we'd rather wreck the machine in an attempt to get the * message out than not know about it. */ + +#define ARCH_WARN_REACHABLE ANNOTATE_REACHABLE(1b) + #define __WARN_FLAGS(flags) \ do { \ __auto_type __flags = BUGFLAG_WARNING|(flags); \ instrumentation_begin(); \ - _BUG_FLAGS(ASM_UD2, __flags, ANNOTATE_REACHABLE(1b)); \ + _BUG_FLAGS(ASM_UD2, __flags, ARCH_WARN_REACHABLE); \ instrumentation_end(); \ } while (0) From 8ad470d4e3dcd3db95d8bda6d35909a2ce897ca7 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 2 May 2025 18:45:34 +0900 Subject: [PATCH 0931/2411] riscv/bug: Add ARCH_WARN_ASM macro for BUG/WARN asm code sharing with Rust Add new ARCH_WARN_ASM macro for BUG/WARN assembly code sharing with Rust to avoid the duplication. No functional changes. Acked-by: Alexandre Ghiti Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250502094537.231725-3-fujita.tomonori@gmail.com [ Remove ending newline in `ARCH_WARN_ASM` content to be closer to the original. - Miguel ] Signed-off-by: Miguel Ojeda --- arch/riscv/include/asm/bug.h | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h index 1aaea81fb141..4c03e20ad11f 100644 --- a/arch/riscv/include/asm/bug.h +++ b/arch/riscv/include/asm/bug.h @@ -31,40 +31,45 @@ typedef u32 bug_insn_t; #ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS #define __BUG_ENTRY_ADDR RISCV_INT " 1b - ." -#define __BUG_ENTRY_FILE RISCV_INT " %0 - ." +#define __BUG_ENTRY_FILE(file) RISCV_INT " " file " - ." #else #define __BUG_ENTRY_ADDR RISCV_PTR " 1b" -#define __BUG_ENTRY_FILE RISCV_PTR " %0" +#define __BUG_ENTRY_FILE(file) RISCV_PTR " " file #endif #ifdef CONFIG_DEBUG_BUGVERBOSE -#define __BUG_ENTRY \ +#define __BUG_ENTRY(file, line, flags) \ __BUG_ENTRY_ADDR "\n\t" \ - __BUG_ENTRY_FILE "\n\t" \ - RISCV_SHORT " %1\n\t" \ - RISCV_SHORT " %2" + __BUG_ENTRY_FILE(file) "\n\t" \ + RISCV_SHORT " " line "\n\t" \ + RISCV_SHORT " " flags #else -#define __BUG_ENTRY \ - __BUG_ENTRY_ADDR "\n\t" \ - RISCV_SHORT " %2" +#define __BUG_ENTRY(file, line, flags) \ + __BUG_ENTRY_ADDR "\n\t" \ + RISCV_SHORT " " flags #endif #ifdef CONFIG_GENERIC_BUG -#define __BUG_FLAGS(flags) \ -do { \ - __asm__ __volatile__ ( \ + +#define ARCH_WARN_ASM(file, line, flags, size) \ "1:\n\t" \ "ebreak\n" \ ".pushsection __bug_table,\"aw\"\n\t" \ "2:\n\t" \ - __BUG_ENTRY "\n\t" \ - ".org 2b + %3\n\t" \ + __BUG_ENTRY(file, line, flags) "\n\t" \ + ".org 2b + " size "\n\t" \ ".popsection" \ + +#define __BUG_FLAGS(flags) \ +do { \ + __asm__ __volatile__ ( \ + ARCH_WARN_ASM("%0", "%1", "%2", "%3") \ : \ : "i" (__FILE__), "i" (__LINE__), \ "i" (flags), \ "i" (sizeof(struct bug_entry))); \ } while (0) + #else /* CONFIG_GENERIC_BUG */ #define __BUG_FLAGS(flags) do { \ __asm__ __volatile__ ("ebreak\n"); \ @@ -78,6 +83,8 @@ do { \ #define __WARN_FLAGS(flags) __BUG_FLAGS(BUGFLAG_WARNING|(flags)) +#define ARCH_WARN_REACHABLE + #define HAVE_ARCH_BUG #include From 826230970a44a50227d4884835ea8a0f8825fe03 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 2 May 2025 18:45:35 +0900 Subject: [PATCH 0932/2411] arm64/bug: Add ARCH_WARN_ASM macro for BUG/WARN asm code sharing with Rust Add new ARCH_WARN_ASM macro for BUG/WARN assembly code sharing with Rust to avoid the duplication. No functional changes. Acked-by: Catalin Marinas Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250502094537.231725-4-fujita.tomonori@gmail.com Signed-off-by: Miguel Ojeda --- arch/arm64/include/asm/asm-bug.h | 33 ++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/asm-bug.h b/arch/arm64/include/asm/asm-bug.h index 6e73809f6492..a5f13801b784 100644 --- a/arch/arm64/include/asm/asm-bug.h +++ b/arch/arm64/include/asm/asm-bug.h @@ -21,16 +21,21 @@ #endif #ifdef CONFIG_GENERIC_BUG - -#define __BUG_ENTRY(flags) \ +#define __BUG_ENTRY_START \ .pushsection __bug_table,"aw"; \ .align 2; \ 14470: .long 14471f - .; \ -_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ - .short flags; \ + +#define __BUG_ENTRY_END \ .align 2; \ .popsection; \ 14471: + +#define __BUG_ENTRY(flags) \ + __BUG_ENTRY_START \ +_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ + .short flags; \ + __BUG_ENTRY_END #else #define __BUG_ENTRY(flags) #endif @@ -41,4 +46,24 @@ _BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ #define ASM_BUG() ASM_BUG_FLAGS(0) +#ifdef CONFIG_DEBUG_BUGVERBOSE +#define __BUG_LOCATION_STRING(file, line) \ + ".long " file "- .;" \ + ".short " line ";" +#else +#define __BUG_LOCATION_STRING(file, line) +#endif + +#define __BUG_ENTRY_STRING(file, line, flags) \ + __stringify(__BUG_ENTRY_START) \ + __BUG_LOCATION_STRING(file, line) \ + ".short " flags ";" \ + __stringify(__BUG_ENTRY_END) + +#define ARCH_WARN_ASM(file, line, flags, size) \ + __BUG_ENTRY_STRING(file, line, flags) \ + __stringify(brk BUG_BRK_IMM) + +#define ARCH_WARN_REACHABLE + #endif /* __ASM_ASM_BUG_H */ From 289642767c2e12df58213f7b34f78d19466d9c28 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Fri, 4 Jul 2025 12:11:44 +0300 Subject: [PATCH 0933/2411] rtc: m41t80: remove HT feature for m41t65 The M41T65 device does not support the "Halt Update Bit" (HT) feature as per its datasheet. This aligns the driver with the actual hardware capabilities. Signed-off-by: Alexander Shiyan Link: https://lore.kernel.org/r/20250704091144.45389-1-eagle.alexander923@gmail.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-m41t80.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index c568639d2151..869358e9305b 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -72,7 +72,7 @@ static const struct i2c_device_id m41t80_id[] = { { "m41t62", M41T80_FEATURE_SQ | M41T80_FEATURE_SQ_ALT }, - { "m41t65", M41T80_FEATURE_HT | M41T80_FEATURE_WD }, + { "m41t65", M41T80_FEATURE_WD }, { "m41t80", M41T80_FEATURE_SQ }, { "m41t81", M41T80_FEATURE_HT | M41T80_FEATURE_SQ}, { "m41t81s", M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ }, @@ -93,7 +93,7 @@ static const __maybe_unused struct of_device_id m41t80_of_match[] = { }, { .compatible = "st,m41t65", - .data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_WD) + .data = (void *)(M41T80_FEATURE_WD) }, { .compatible = "st,m41t80", From 8b52144f0e08e7640bdbaf7b6a2527b3e100a769 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 7 Jul 2025 11:22:01 +0200 Subject: [PATCH 0934/2411] rtc: s3c: Put 'const' just after 'static' keyword for data Convention is to define static data as 'static const ...', not 'static ... const' because of readability, even if the code is functionally equal. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250707092200.48862-2-krzysztof.kozlowski@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-s3c.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 5dd575865adf..79b2a16f15ad 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -549,25 +549,25 @@ static void s3c6410_rtc_irq(struct s3c_rtc *info, int mask) writeb(mask, info->base + S3C2410_INTP); } -static struct s3c_rtc_data const s3c2410_rtc_data = { +static const struct s3c_rtc_data s3c2410_rtc_data = { .irq_handler = s3c24xx_rtc_irq, .enable = s3c24xx_rtc_enable, .disable = s3c24xx_rtc_disable, }; -static struct s3c_rtc_data const s3c2416_rtc_data = { +static const struct s3c_rtc_data s3c2416_rtc_data = { .irq_handler = s3c24xx_rtc_irq, .enable = s3c24xx_rtc_enable, .disable = s3c24xx_rtc_disable, }; -static struct s3c_rtc_data const s3c2443_rtc_data = { +static const struct s3c_rtc_data s3c2443_rtc_data = { .irq_handler = s3c24xx_rtc_irq, .enable = s3c24xx_rtc_enable, .disable = s3c24xx_rtc_disable, }; -static struct s3c_rtc_data const s3c6410_rtc_data = { +static const struct s3c_rtc_data s3c6410_rtc_data = { .needs_src_clk = true, .irq_handler = s3c6410_rtc_irq, .enable = s3c24xx_rtc_enable, From 5c0d0ee36f168f6962a710205436533be31c9a42 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 22 Jul 2025 08:59:26 -0700 Subject: [PATCH 0935/2411] PCI: Support Immediate Readiness on devices without PM capabilities Query support for Immediate Readiness irrespective of whether or not the device supports PM capabilities, as nothing in the PCIe spec suggests that Immediate Readiness is in any way dependent on PM functionality. Fixes: d6112f8def51 ("PCI: Add support for Immediate Readiness") Signed-off-by: Sean Christopherson Signed-off-by: Bjorn Helgaas Cc: David Matlack Cc: Vipin Sharma Cc: Aaron Lewis Link: https://patch.msgid.link/20250722155926.352248-1-seanjc@google.com --- drivers/pci/pci.c | 4 ---- drivers/pci/probe.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e9448d55113b..d3b059067ba0 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3205,7 +3205,6 @@ void pci_pm_power_up_and_verify_state(struct pci_dev *pci_dev) void pci_pm_init(struct pci_dev *dev) { int pm; - u16 status; u16 pmc; device_enable_async_suspend(&dev->dev); @@ -3266,9 +3265,6 @@ void pci_pm_init(struct pci_dev *dev) pci_pme_active(dev, false); } - pci_read_config_word(dev, PCI_STATUS, &status); - if (status & PCI_STATUS_IMM_READY) - dev->imm_ready = 1; pci_pm_power_up_and_verify_state(dev); pm_runtime_forbid(&dev->dev); pm_runtime_set_active(&dev->dev); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4b8693ec9e4c..1571d4b392a6 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2595,6 +2595,15 @@ void pcie_report_downtraining(struct pci_dev *dev) __pcie_print_link_status(dev, false); } +static void pci_imm_ready_init(struct pci_dev *dev) +{ + u16 status; + + pci_read_config_word(dev, PCI_STATUS, &status); + if (status & PCI_STATUS_IMM_READY) + dev->imm_ready = 1; +} + static void pci_init_capabilities(struct pci_dev *dev) { pci_ea_init(dev); /* Enhanced Allocation */ @@ -2604,6 +2613,7 @@ static void pci_init_capabilities(struct pci_dev *dev) /* Buffers for saving PCIe and PCI-X capabilities */ pci_allocate_cap_save_buffers(dev); + pci_imm_ready_init(dev); /* Immediate Readiness */ pci_pm_init(dev); /* Power Management */ pci_vpd_init(dev); /* Vital Product Data */ pci_configure_ari(dev); /* Alternative Routing-ID Forwarding */ From dff64b072708ffef23c117fa1ee1ea59eb417807 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 2 May 2025 18:45:36 +0900 Subject: [PATCH 0936/2411] rust: Add warn_on macro Add warn_on macro, uses the BUG/WARN feature (lib/bug.c) via assembly for x86_64/arm64/riscv. The current Rust code simply wraps BUG() macro but doesn't provide the proper debug information. The BUG/WARN feature can only be used from assembly. This uses the assembly code exported by the C side via ARCH_WARN_ASM macro. To avoid duplicating the assembly code, this approach follows the same strategy as the static branch code: it generates the assembly code for Rust using the C preprocessor at compile time. Similarly, ARCH_WARN_REACHABLE is also used at compile time to generate the assembly code; objtool's reachable annotation code. It's used for only architectures that use objtool. For now, Loongarch and arm just use a wrapper for WARN macro. UML doesn't use the assembly BUG/WARN feature; just wrapping generic BUG/WARN functions implemented in C works. Signed-off-by: FUJITA Tomonori Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250502094537.231725-5-fujita.tomonori@gmail.com [ Avoid evaluating the condition twice (a good idea in general, but it also matches the C side). Simplify with `as_char_ptr()` to avoid a cast. Cast to `ffi` integer types for `warn_slowpath_fmt`. Avoid cast for `null()`. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/Makefile | 8 ++ rust/helpers/bug.c | 5 + rust/kernel/.gitignore | 2 + rust/kernel/bug.rs | 126 ++++++++++++++++++ rust/kernel/generated_arch_reachable_asm.rs.S | 7 + rust/kernel/generated_arch_warn_asm.rs.S | 7 + rust/kernel/lib.rs | 1 + 7 files changed, 156 insertions(+) create mode 100644 rust/kernel/bug.rs create mode 100644 rust/kernel/generated_arch_reachable_asm.rs.S create mode 100644 rust/kernel/generated_arch_warn_asm.rs.S diff --git a/rust/Makefile b/rust/Makefile index 27dec7904c3a..4e675d210dd8 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -34,6 +34,9 @@ obj-$(CONFIG_RUST_KERNEL_DOCTESTS) += doctests_kernel_generated.o obj-$(CONFIG_RUST_KERNEL_DOCTESTS) += doctests_kernel_generated_kunit.o always-$(subst y,$(CONFIG_RUST),$(CONFIG_JUMP_LABEL)) += kernel/generated_arch_static_branch_asm.rs +ifndef CONFIG_UML +always-$(subst y,$(CONFIG_RUST),$(CONFIG_BUG)) += kernel/generated_arch_warn_asm.rs kernel/generated_arch_reachable_asm.rs +endif # Avoids running `$(RUSTC)` when it may not be available. ifdef CONFIG_RUST @@ -540,5 +543,10 @@ $(obj)/kernel.o: $(src)/kernel/lib.rs $(obj)/build_error.o $(obj)/pin_init.o \ ifdef CONFIG_JUMP_LABEL $(obj)/kernel.o: $(obj)/kernel/generated_arch_static_branch_asm.rs endif +ifndef CONFIG_UML +ifdef CONFIG_BUG +$(obj)/kernel.o: $(obj)/kernel/generated_arch_warn_asm.rs $(obj)/kernel/generated_arch_reachable_asm.rs +endif +endif endif # CONFIG_RUST diff --git a/rust/helpers/bug.c b/rust/helpers/bug.c index e2d13babc737..a62c96f507d1 100644 --- a/rust/helpers/bug.c +++ b/rust/helpers/bug.c @@ -6,3 +6,8 @@ __noreturn void rust_helper_BUG(void) { BUG(); } + +bool rust_helper_WARN_ON(bool cond) +{ + return WARN_ON(cond); +} diff --git a/rust/kernel/.gitignore b/rust/kernel/.gitignore index 6ba39a178f30..f636ad95aaf3 100644 --- a/rust/kernel/.gitignore +++ b/rust/kernel/.gitignore @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 /generated_arch_static_branch_asm.rs +/generated_arch_warn_asm.rs +/generated_arch_reachable_asm.rs diff --git a/rust/kernel/bug.rs b/rust/kernel/bug.rs new file mode 100644 index 000000000000..36aef43e5ebe --- /dev/null +++ b/rust/kernel/bug.rs @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024, 2025 FUJITA Tomonori + +//! Support for BUG and WARN functionality. +//! +//! C header: [`include/asm-generic/bug.h`](srctree/include/asm-generic/bug.h) + +#[macro_export] +#[doc(hidden)] +#[cfg(all(CONFIG_BUG, not(CONFIG_UML), not(CONFIG_LOONGARCH), not(CONFIG_ARM)))] +#[cfg(CONFIG_DEBUG_BUGVERBOSE)] +macro_rules! warn_flags { + ($flags:expr) => { + const FLAGS: u32 = $crate::bindings::BUGFLAG_WARNING | $flags; + const _FILE: &[u8] = file!().as_bytes(); + // Plus one for null-terminator. + static FILE: [u8; _FILE.len() + 1] = { + let mut bytes = [0; _FILE.len() + 1]; + let mut i = 0; + while i < _FILE.len() { + bytes[i] = _FILE[i]; + i += 1; + } + bytes + }; + + // SAFETY: + // - `file`, `line`, `flags`, and `size` are all compile-time constants or + // symbols, preventing any invalid memory access. + // - The asm block has no side effects and does not modify any registers + // or memory. It is purely for embedding metadata into the ELF section. + unsafe { + $crate::asm!( + concat!( + "/* {size} */", + include!(concat!(env!("OBJTREE"), "/rust/kernel/generated_arch_warn_asm.rs")), + include!(concat!(env!("OBJTREE"), "/rust/kernel/generated_arch_reachable_asm.rs"))); + file = sym FILE, + line = const line!(), + flags = const FLAGS, + size = const ::core::mem::size_of::<$crate::bindings::bug_entry>(), + ); + } + } +} + +#[macro_export] +#[doc(hidden)] +#[cfg(all(CONFIG_BUG, not(CONFIG_UML), not(CONFIG_LOONGARCH), not(CONFIG_ARM)))] +#[cfg(not(CONFIG_DEBUG_BUGVERBOSE))] +macro_rules! warn_flags { + ($flags:expr) => { + const FLAGS: u32 = $crate::bindings::BUGFLAG_WARNING | $flags; + + // SAFETY: + // - `flags` and `size` are all compile-time constants, preventing + // any invalid memory access. + // - The asm block has no side effects and does not modify any registers + // or memory. It is purely for embedding metadata into the ELF section. + unsafe { + $crate::asm!( + concat!( + "/* {size} */", + include!(concat!(env!("OBJTREE"), "/rust/kernel/generated_arch_warn_asm.rs")), + include!(concat!(env!("OBJTREE"), "/rust/kernel/generated_arch_reachable_asm.rs"))); + flags = const FLAGS, + size = const ::core::mem::size_of::<$crate::bindings::bug_entry>(), + ); + } + } +} + +#[macro_export] +#[doc(hidden)] +#[cfg(all(CONFIG_BUG, CONFIG_UML))] +macro_rules! warn_flags { + ($flags:expr) => { + // SAFETY: It is always safe to call `warn_slowpath_fmt()` + // with a valid null-terminated string. + unsafe { + $crate::bindings::warn_slowpath_fmt( + $crate::c_str!(::core::file!()).as_char_ptr(), + line!() as $crate::ffi::c_int, + $flags as $crate::ffi::c_uint, + ::core::ptr::null(), + ); + } + }; +} + +#[macro_export] +#[doc(hidden)] +#[cfg(all(CONFIG_BUG, any(CONFIG_LOONGARCH, CONFIG_ARM)))] +macro_rules! warn_flags { + ($flags:expr) => { + // SAFETY: It is always safe to call `WARN_ON()`. + unsafe { $crate::bindings::WARN_ON(true) } + }; +} + +#[macro_export] +#[doc(hidden)] +#[cfg(not(CONFIG_BUG))] +macro_rules! warn_flags { + ($flags:expr) => {}; +} + +#[doc(hidden)] +pub const fn bugflag_taint(value: u32) -> u32 { + value << 8 +} + +/// Report a warning if `cond` is true and return the condition's evaluation result. +#[macro_export] +macro_rules! warn_on { + ($cond:expr) => {{ + let cond = $cond; + if cond { + const WARN_ON_FLAGS: u32 = $crate::bug::bugflag_taint($crate::bindings::TAINT_WARN); + + $crate::warn_flags!(WARN_ON_FLAGS); + } + cond + }}; +} diff --git a/rust/kernel/generated_arch_reachable_asm.rs.S b/rust/kernel/generated_arch_reachable_asm.rs.S new file mode 100644 index 000000000000..3886a9ad3a99 --- /dev/null +++ b/rust/kernel/generated_arch_reachable_asm.rs.S @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include + +// Cut here. + +::kernel::concat_literals!(ARCH_WARN_REACHABLE) diff --git a/rust/kernel/generated_arch_warn_asm.rs.S b/rust/kernel/generated_arch_warn_asm.rs.S new file mode 100644 index 000000000000..409eb4c2d3a1 --- /dev/null +++ b/rust/kernel/generated_arch_warn_asm.rs.S @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include + +// Cut here. + +::kernel::concat_literals!(ARCH_WARN_ASM("{file}", "{line}", "{flags}", "{size}")) diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index e88bc4b27d6e..11a6461e98da 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -57,6 +57,7 @@ pub mod bits; #[cfg(CONFIG_BLOCK)] pub mod block; +pub mod bug; #[doc(hidden)] pub mod build_assert; pub mod clk; From c897c1e5b19dd4fc32e84fa1ab2065c2507be3a7 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 26 Jun 2025 17:19:40 +0200 Subject: [PATCH 0937/2411] tracing: Remove pointless memory barriers Memory barriers are useful to ensure memory accesses from one CPU appear in the original order as seen by other CPUs. Some smp_rmb() and smp_wmb() are used, but they are not ordering multiple memory accesses. Remove them. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Gabriele Monaco Link: https://lore.kernel.org/20250626151940.1756398-1-namcao@linutronix.de Signed-off-by: Nam Cao Signed-off-by: Steven Rostedt (Google) --- kernel/trace/rv/rv.c | 6 ------ kernel/trace/trace.c | 7 ------- 2 files changed, 13 deletions(-) diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c index e4077500a91d..c04a49da4328 100644 --- a/kernel/trace/rv/rv.c +++ b/kernel/trace/rv/rv.c @@ -675,8 +675,6 @@ static bool __read_mostly monitoring_on; */ bool rv_monitoring_on(void) { - /* Ensures that concurrent monitors read consistent monitoring_on */ - smp_rmb(); return READ_ONCE(monitoring_on); } @@ -696,8 +694,6 @@ static ssize_t monitoring_on_read_data(struct file *filp, char __user *user_buf, static void turn_monitoring_off(void) { WRITE_ONCE(monitoring_on, false); - /* Ensures that concurrent monitors read consistent monitoring_on */ - smp_wmb(); } static void reset_all_monitors(void) @@ -713,8 +709,6 @@ static void reset_all_monitors(void) static void turn_monitoring_on(void) { WRITE_ONCE(monitoring_on, true); - /* Ensures that concurrent monitors read consistent monitoring_on */ - smp_wmb(); } static void turn_monitoring_on_with_reset(void) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 95ae7c4e5835..0dff4298fc0e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -936,7 +936,6 @@ int tracing_is_enabled(void) * return the mirror variable of the state of the ring buffer. * It's a little racy, but we don't really care. */ - smp_rmb(); return !global_trace.buffer_disabled; } @@ -1107,8 +1106,6 @@ void tracer_tracing_on(struct trace_array *tr) * important to be fast than accurate. */ tr->buffer_disabled = 0; - /* Make the flag seen by readers */ - smp_wmb(); } /** @@ -1640,8 +1637,6 @@ void tracer_tracing_off(struct trace_array *tr) * important to be fast than accurate. */ tr->buffer_disabled = 1; - /* Make the flag seen by readers */ - smp_wmb(); } /** @@ -2710,8 +2705,6 @@ void trace_buffered_event_enable(void) static void enable_trace_buffered_event(void *data) { - /* Probably not needed, but do it anyway */ - smp_rmb(); this_cpu_dec(trace_buffered_event_cnt); } From 07c3f391bcb217b6949b49785ccb5fee02be21fe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 2 Jul 2025 14:36:57 -0400 Subject: [PATCH 0938/2411] tracing: Remove EVENT_FILE_FL_SOFT_MODE flag When soft disabling of trace events was first created, it needed to have a way to know if a file had a user that was using it with soft disabled (for triggers that need to enable or disable events from a context that can not really enable or disable the event, it would set SOFT_DISABLED to state it is disabled). The flag SOFT_MODE was used to denote that an event had a user that would enable or disable it via the SOFT_DISABLED flag. Commit 1cf4c0732db3c ("tracing: Modify soft-mode only if there's no other referrer") fixed a bug where if two users were using the SOFT_DISABLED flag the accounting would get messed up as the SOFT_MODE flag could only handle one user. That commit added the sm_ref counter which kept track of how many users were using the event in "soft mode". This made the SOFT_MODE flag redundant as it should only be set if the sm_ref counter is non zero. Remove the SOFT_MODE flag and just use the sm_ref counter to know the event is in soft mode or not. This makes the code a bit simpler. Link: https://lore.kernel.org/all/20250702111908.03759998@batman.local.home/ Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Gabriele Paoloni Link: https://lore.kernel.org/20250702143657.18dd1882@batman.local.home Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 3 --- kernel/trace/trace_events.c | 24 ++++++++++++------------ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index fa9cf4292dff..04307a19cde3 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -480,7 +480,6 @@ enum { EVENT_FILE_FL_RECORDED_TGID_BIT, EVENT_FILE_FL_FILTERED_BIT, EVENT_FILE_FL_NO_SET_FILTER_BIT, - EVENT_FILE_FL_SOFT_MODE_BIT, EVENT_FILE_FL_SOFT_DISABLED_BIT, EVENT_FILE_FL_TRIGGER_MODE_BIT, EVENT_FILE_FL_TRIGGER_COND_BIT, @@ -618,7 +617,6 @@ extern int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...); * RECORDED_TGID - The tgids should be recorded at sched_switch * FILTERED - The event has a filter attached * NO_SET_FILTER - Set when filter has error and is to be ignored - * SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED * SOFT_DISABLED - When set, do not trace the event (even though its * tracepoint may be enabled) * TRIGGER_MODE - When set, invoke the triggers associated with the event @@ -633,7 +631,6 @@ enum { EVENT_FILE_FL_RECORDED_TGID = (1 << EVENT_FILE_FL_RECORDED_TGID_BIT), EVENT_FILE_FL_FILTERED = (1 << EVENT_FILE_FL_FILTERED_BIT), EVENT_FILE_FL_NO_SET_FILTER = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT), - EVENT_FILE_FL_SOFT_MODE = (1 << EVENT_FILE_FL_SOFT_MODE_BIT), EVENT_FILE_FL_SOFT_DISABLED = (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT), EVENT_FILE_FL_TRIGGER_MODE = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT), EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 120531268abf..0980f4def360 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -768,6 +768,7 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, { struct trace_event_call *call = file->event_call; struct trace_array *tr = file->tr; + bool soft_mode = atomic_read(&file->sm_ref) != 0; int ret = 0; int disable; @@ -782,7 +783,7 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, * is set we do not want the event to be enabled before we * clear the bit. * - * When soft_disable is not set but the SOFT_MODE flag is, + * When soft_disable is not set but the soft_mode is, * we do nothing. Do not disable the tracepoint, otherwise * "soft enable"s (clearing the SOFT_DISABLED bit) wont work. */ @@ -790,11 +791,11 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, if (atomic_dec_return(&file->sm_ref) > 0) break; disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED; - clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags); + soft_mode = false; /* Disable use of trace_buffered_event */ trace_buffered_event_disable(); } else - disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE); + disable = !soft_mode; if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) { clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); @@ -812,8 +813,8 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, WARN_ON_ONCE(ret); } - /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */ - if (file->flags & EVENT_FILE_FL_SOFT_MODE) + /* If in soft mode, just set the SOFT_DISABLE_BIT, else clear it */ + if (soft_mode) set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); else clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); @@ -823,7 +824,7 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, * When soft_disable is set and enable is set, we want to * register the tracepoint for the event, but leave the event * as is. That means, if the event was already enabled, we do - * nothing (but set SOFT_MODE). If the event is disabled, we + * nothing (but set soft_mode). If the event is disabled, we * set SOFT_DISABLED before enabling the event tracepoint, so * it still seems to be disabled. */ @@ -832,7 +833,7 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, else { if (atomic_inc_return(&file->sm_ref) > 1) break; - set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags); + soft_mode = true; /* Enable use of trace_buffered_event */ trace_buffered_event_enable(); } @@ -840,7 +841,7 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, if (!(file->flags & EVENT_FILE_FL_ENABLED)) { bool cmd = false, tgid = false; - /* Keep the event disabled, when going to SOFT_MODE. */ + /* Keep the event disabled, when going to soft mode. */ if (soft_disable) set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); @@ -1792,8 +1793,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, !(flags & EVENT_FILE_FL_SOFT_DISABLED)) strcpy(buf, "1"); - if (flags & EVENT_FILE_FL_SOFT_DISABLED || - flags & EVENT_FILE_FL_SOFT_MODE) + if (atomic_read(&file->sm_ref) != 0) strcat(buf, "*"); strcat(buf, "\n"); @@ -3584,7 +3584,7 @@ static int probe_remove_event_call(struct trace_event_call *call) continue; /* * We can't rely on ftrace_event_enable_disable(enable => 0) - * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress + * we are going to do, soft mode can suppress * TRACE_REG_UNREGISTER. */ if (file->flags & EVENT_FILE_FL_ENABLED) @@ -3997,7 +3997,7 @@ static int free_probe_data(void *data) edata->ref--; if (!edata->ref) { - /* Remove the SOFT_MODE flag */ + /* Remove soft mode */ __ftrace_event_enable_disable(edata->file, 0, 1); trace_event_put_ref(edata->file->event_call); kfree(edata); From 502ffa43994de8f038101e0920e8e87d9756c4d8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Jul 2025 09:56:28 -0400 Subject: [PATCH 0939/2411] tracing: Fix comment in trace_module_remove_events() Fix typo "allocade" -> "allocated". Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250710095628.42ed6b06@batman.local.home Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0980f4def360..6c0783fc4c2c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3695,7 +3695,7 @@ static void trace_module_remove_events(struct module *mod) if (call->module == mod) __trace_remove_event_call(call); } - /* Check for any strings allocade for this module */ + /* Check for any strings allocated for this module */ list_for_each_entry_safe(modstr, m, &module_strings, next) { if (modstr->module != mod) continue; From 129f70bd6063d701c3ecb63ecdd4b5ee520cfd45 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 13 Jun 2025 19:40:47 +0800 Subject: [PATCH 0940/2411] perf: ftrace: add graph tracer options args/retval/retval-hex/retaddr This change adds support for new funcgraph tracer options funcgraph-args, funcgraph-retval, funcgraph-retval-hex and funcgraph-retaddr. The new added options are: - args : Show function arguments. - retval : Show function return value. - retval-hex : Show function return value in hexadecimal format. - retaddr : Show function return address. # ./perf ftrace -G vfs_write --graph-opts retval,retaddr # tracer: function_graph # # CPU DURATION FUNCTION CALLS # | | | | | | | 5) | mutex_unlock() { /* <-rb_simple_write+0xda/0x150 */ 5) 0.188 us | local_clock(); /* <-lock_release+0x2ad/0x440 ret=0x3bf2a3cf90e */ 5) | rt_mutex_slowunlock() { /* <-rb_simple_write+0xda/0x150 */ 5) | _raw_spin_lock_irqsave() { /* <-rt_mutex_slowunlock+0x4f/0x200 */ 5) 0.123 us | preempt_count_add(); /* <-_raw_spin_lock_irqsave+0x23/0x90 ret=0x0 */ 5) 0.128 us | local_clock(); /* <-__lock_acquire.isra.0+0x17a/0x740 ret=0x3bf2a3cfc8b */ 5) 0.086 us | do_raw_spin_trylock(); /* <-_raw_spin_lock_irqsave+0x4a/0x90 ret=0x1 */ 5) 0.845 us | } /* _raw_spin_lock_irqsave ret=0x292 */ 5) | _raw_spin_unlock_irqrestore() { /* <-rt_mutex_slowunlock+0x191/0x200 */ 5) 0.097 us | local_clock(); /* <-lock_release+0x2ad/0x440 ret=0x3bf2a3cff1f */ 5) 0.086 us | do_raw_spin_unlock(); /* <-_raw_spin_unlock_irqrestore+0x23/0x60 ret=0x1 */ 5) 0.104 us | preempt_count_sub(); /* <-_raw_spin_unlock_irqrestore+0x35/0x60 ret=0x0 */ 5) 0.726 us | } /* _raw_spin_unlock_irqrestore ret=0x80000000 */ 5) 1.881 us | } /* rt_mutex_slowunlock ret=0x0 */ 5) 2.931 us | } /* mutex_unlock ret=0x0 */ Signed-off-by: Changbin Du Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250613114048.132336-1-changbin.du@huawei.com Signed-off-by: Namhyung Kim --- tools/perf/Documentation/perf-ftrace.txt | 4 ++ tools/perf/builtin-ftrace.c | 60 +++++++++++++++++++++++- tools/perf/util/ftrace.h | 4 ++ 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 914457853bcf..3f3808e513fe 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -123,6 +123,10 @@ OPTIONS for 'perf ftrace trace' --graph-opts:: List of options allowed to set: + - args - Show function arguments. + - retval - Show function return value. + - retval-hex - Show function return value in hexadecimal format. + - retaddr - Show function return address. - nosleep-time - Measure on-CPU time only for function_graph tracer. - noirqs - Ignore functions that happen inside interrupt. - verbose - Show process names, PIDs, timestamps, etc. diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index e1f2f3fb1b08..6b6eec65f93f 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -301,6 +301,10 @@ static void reset_tracing_options(struct perf_ftrace *ftrace __maybe_unused) write_tracing_option_file("funcgraph-proc", "0"); write_tracing_option_file("funcgraph-abstime", "0"); write_tracing_option_file("funcgraph-tail", "0"); + write_tracing_option_file("funcgraph-args", "0"); + write_tracing_option_file("funcgraph-retval", "0"); + write_tracing_option_file("funcgraph-retval-hex", "0"); + write_tracing_option_file("funcgraph-retaddr", "0"); write_tracing_option_file("latency-format", "0"); write_tracing_option_file("irq-info", "0"); } @@ -542,6 +546,41 @@ static int set_tracing_sleep_time(struct perf_ftrace *ftrace) return 0; } +static int set_tracing_funcgraph_args(struct perf_ftrace *ftrace) +{ + if (ftrace->graph_args) { + if (write_tracing_option_file("funcgraph-args", "1") < 0) + return -1; + } + + return 0; +} + +static int set_tracing_funcgraph_retval(struct perf_ftrace *ftrace) +{ + if (ftrace->graph_retval || ftrace->graph_retval_hex) { + if (write_tracing_option_file("funcgraph-retval", "1") < 0) + return -1; + } + + if (ftrace->graph_retval_hex) { + if (write_tracing_option_file("funcgraph-retval-hex", "1") < 0) + return -1; + } + + return 0; +} + +static int set_tracing_funcgraph_retaddr(struct perf_ftrace *ftrace) +{ + if (ftrace->graph_retaddr) { + if (write_tracing_option_file("funcgraph-retaddr", "1") < 0) + return -1; + } + + return 0; +} + static int set_tracing_funcgraph_irqs(struct perf_ftrace *ftrace) { if (!ftrace->graph_noirqs) @@ -642,6 +681,21 @@ static int set_tracing_options(struct perf_ftrace *ftrace) return -1; } + if (set_tracing_funcgraph_args(ftrace) < 0) { + pr_err("failed to set tracing option funcgraph-args\n"); + return -1; + } + + if (set_tracing_funcgraph_retval(ftrace) < 0) { + pr_err("failed to set tracing option funcgraph-retval\n"); + return -1; + } + + if (set_tracing_funcgraph_retaddr(ftrace) < 0) { + pr_err("failed to set tracing option funcgraph-retaddr\n"); + return -1; + } + if (set_tracing_funcgraph_irqs(ftrace) < 0) { pr_err("failed to set tracing option funcgraph-irqs\n"); return -1; @@ -1634,6 +1688,10 @@ static int parse_graph_tracer_opts(const struct option *opt, int ret; struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value; struct sublevel_option graph_tracer_opts[] = { + { .name = "args", .value_ptr = &ftrace->graph_args }, + { .name = "retval", .value_ptr = &ftrace->graph_retval }, + { .name = "retval-hex", .value_ptr = &ftrace->graph_retval_hex }, + { .name = "retaddr", .value_ptr = &ftrace->graph_retaddr }, { .name = "nosleep-time", .value_ptr = &ftrace->graph_nosleep_time }, { .name = "noirqs", .value_ptr = &ftrace->graph_noirqs }, { .name = "verbose", .value_ptr = &ftrace->graph_verbose }, @@ -1725,7 +1783,7 @@ int cmd_ftrace(int argc, const char **argv) OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func", "Set nograph filter on given functions", parse_filter_func), OPT_CALLBACK(0, "graph-opts", &ftrace, "options", - "Graph tracer options, available options: nosleep-time,noirqs,verbose,thresh=,depth=", + "Graph tracer options, available options: args,retval,retval-hex,retaddr,nosleep-time,noirqs,verbose,thresh=,depth=", parse_graph_tracer_opts), OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size", "Size of per cpu buffer, needs to use a B, K, M or G suffix.", parse_buffer_size), diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h index 3f5094ac5908..950f2efafad2 100644 --- a/tools/perf/util/ftrace.h +++ b/tools/perf/util/ftrace.h @@ -30,6 +30,10 @@ struct perf_ftrace { int graph_depth; int func_stack_trace; int func_irq_info; + int graph_args; + int graph_retval; + int graph_retval_hex; + int graph_retaddr; int graph_nosleep_time; int graph_noirqs; int graph_verbose; From 478272d1cdd9959a6d638e9d81f70642f04290c9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 17 Jul 2025 08:08:53 -0700 Subject: [PATCH 0941/2411] tools subcmd: Tighten the filename size in check_if_command_finished FILENAME_MAX is often PATH_MAX (4kb), far more than needed for the /proc path. Make the buffer size sufficient for the maximum integer plus "/proc/" and "/status" with a '\0' terminator. Fixes: 5ce42b5de461 ("tools subcmd: Add non-waitpid check_if_command_finished()") Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250717150855.1032526-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/lib/subcmd/run-command.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tools/lib/subcmd/run-command.c b/tools/lib/subcmd/run-command.c index 0a764c25c384..b7510f83209a 100644 --- a/tools/lib/subcmd/run-command.c +++ b/tools/lib/subcmd/run-command.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -216,10 +217,20 @@ static int wait_or_whine(struct child_process *cmd, bool block) return result; } +/* + * Conservative estimate of number of characaters needed to hold an a decoded + * integer, assume each 3 bits needs a character byte and plus a possible sign + * character. + */ +#ifndef is_signed_type +#define is_signed_type(type) (((type)(-1)) < (type)1) +#endif +#define MAX_STRLEN_TYPE(type) (sizeof(type) * 8 / 3 + (is_signed_type(type) ? 1 : 0)) + int check_if_command_finished(struct child_process *cmd) { #ifdef __linux__ - char filename[FILENAME_MAX + 12]; + char filename[6 + MAX_STRLEN_TYPE(typeof(cmd->pid)) + 7 + 1]; char status_line[256]; FILE *status_file; @@ -227,7 +238,7 @@ int check_if_command_finished(struct child_process *cmd) * Check by reading /proc//status as calling waitpid causes * stdout/stderr to be closed and data lost. */ - sprintf(filename, "/proc/%d/status", cmd->pid); + sprintf(filename, "/proc/%u/status", cmd->pid); status_file = fopen(filename, "r"); if (status_file == NULL) { /* Open failed assume finish_command was called. */ From 82aac553372cd201b91a8b064be0cd5a501932b2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 17 Jul 2025 08:08:54 -0700 Subject: [PATCH 0942/2411] perf pmu: Switch FILENAME_MAX to NAME_MAX FILENAME_MAX is the same as PATH_MAX (4kb) in glibc rather than NAME_MAX's 255. Switch to using NAME_MAX and ensure the '\0' is accounted for in the path's buffer size. Fixes: 754baf426e09 ("perf pmu: Change aliases from list to hashmap") Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250717150855.1032526-2-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index b09b2ea2407a..f3da6e27bfcb 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -453,7 +453,7 @@ static struct perf_pmu_alias *perf_pmu__find_alias(struct perf_pmu *pmu, { struct perf_pmu_alias *alias; bool has_sysfs_event; - char event_file_name[FILENAME_MAX + 8]; + char event_file_name[NAME_MAX + 8]; if (hashmap__find(pmu->aliases, name, &alias)) return alias; @@ -752,7 +752,7 @@ static int pmu_aliases_parse(struct perf_pmu *pmu) static int pmu_aliases_parse_eager(struct perf_pmu *pmu, int sysfs_fd) { - char path[FILENAME_MAX + 7]; + char path[NAME_MAX + 8]; int ret, events_dir_fd; scnprintf(path, sizeof(path), "%s/events", pmu->name); From 008b75759eb98fa6ee83eae8e9e19722121de633 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 17 Jul 2025 08:08:55 -0700 Subject: [PATCH 0943/2411] perf ui scripts: Switch FILENAME_MAX to NAME_MAX FILENAME_MAX is the same as PATH_MAX (4kb) in glibc rather than NAME_MAX's 255. Switch to using NAME_MAX and ensure the '\0' is accounted for in the path's buffer size. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250717150855.1032526-3-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/ui/browsers/scripts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c index 2d04ece833aa..1e8c2c2f952d 100644 --- a/tools/perf/ui/browsers/scripts.c +++ b/tools/perf/ui/browsers/scripts.c @@ -94,7 +94,7 @@ static int check_ev_match(int dir_fd, const char *scriptname, struct perf_sessio FILE *fp; { - char filename[FILENAME_MAX + 5]; + char filename[NAME_MAX + 5]; int fd; scnprintf(filename, sizeof(filename), "bin/%s-record", scriptname); From b1d4c90bffdeda6c0a304249358608e4ddb80377 Mon Sep 17 00:00:00 2001 From: Jacky Chou Date: Tue, 15 Jul 2025 11:43:17 +0800 Subject: [PATCH 0944/2411] pinctrl: aspeed-g6: Add PCIe RC PERST pin group The PCIe RC PERST uses SSPRST# as PERST# and enable this pin to output. Signed-off-by: Jacky Chou Acked-by: Linus Walleij Link: https://lore.kernel.org/20250715034320.2553837-8-jacky_chou@aspeedtech.com Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c index 51a63cf92023..b0c7e4f6df9c 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c @@ -17,6 +17,7 @@ #include "../pinctrl-utils.h" #include "pinctrl-aspeed.h" +#define SCU040 0x040 /* Reset Control Set 1 */ #define SCU400 0x400 /* Multi-function Pin Control #1 */ #define SCU404 0x404 /* Multi-function Pin Control #2 */ #define SCU40C 0x40C /* Multi-function Pin Control #3 */ @@ -52,7 +53,7 @@ #define SCU6D0 0x6D0 /* Multi-function Pin Control #29 */ #define SCUC20 0xC20 /* PCIE configuration Setting Control */ -#define ASPEED_G6_NR_PINS 256 +#define ASPEED_G6_NR_PINS 258 #define M24 0 SIG_EXPR_LIST_DECL_SESG(M24, MDC3, MDIO3, SIG_DESC_SET(SCU410, 0)); @@ -1636,6 +1637,12 @@ FUNC_DECL_1(USB11BHID, USBB); FUNC_DECL_1(USB2BD, USBB); FUNC_DECL_1(USB2BH, USBB); +#define D7 257 +SIG_EXPR_LIST_DECL_SESG(D7, RCRST, PCIERC1, SIG_DESC_SET(SCU040, 19), + SIG_DESC_SET(SCU500, 24)); +PIN_DECL_(D7, SIG_EXPR_LIST_PTR(D7, RCRST)); +FUNC_GROUP_DECL(PCIERC1, D7); + /* Pins, groups and functions are sort(1):ed alphabetically for sanity */ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = { @@ -1806,6 +1813,7 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = { ASPEED_PINCTRL_PIN(D4), ASPEED_PINCTRL_PIN(D5), ASPEED_PINCTRL_PIN(D6), + ASPEED_PINCTRL_PIN(D7), ASPEED_PINCTRL_PIN(E1), ASPEED_PINCTRL_PIN(E11), ASPEED_PINCTRL_PIN(E12), @@ -2073,6 +2081,7 @@ static const struct aspeed_pin_group aspeed_g6_groups[] = { ASPEED_PINCTRL_GROUP(SALT9G1), ASPEED_PINCTRL_GROUP(SD1), ASPEED_PINCTRL_GROUP(SD2), + ASPEED_PINCTRL_GROUP(PCIERC1), ASPEED_PINCTRL_GROUP(EMMCG1), ASPEED_PINCTRL_GROUP(EMMCG4), ASPEED_PINCTRL_GROUP(EMMCG8), @@ -2314,6 +2323,7 @@ static const struct aspeed_pin_function aspeed_g6_functions[] = { ASPEED_PINCTRL_FUNC(SPI2), ASPEED_PINCTRL_FUNC(SPI2CS1), ASPEED_PINCTRL_FUNC(SPI2CS2), + ASPEED_PINCTRL_FUNC(PCIERC1), ASPEED_PINCTRL_FUNC(TACH0), ASPEED_PINCTRL_FUNC(TACH1), ASPEED_PINCTRL_FUNC(TACH10), From b225010185418d22cb508bd36adc607ac2c28968 Mon Sep 17 00:00:00 2001 From: Cathy Xu Date: Fri, 11 Jul 2025 17:44:57 +0800 Subject: [PATCH 0945/2411] dt-bindings: pinctrl: mediatek: Add support for mt8189 Add the new binding document for pinctrl on MediaTek mt8189. Signed-off-by: Cathy Xu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/20250711094513.17073-2-ot_cathy.xu@mediatek.com Signed-off-by: Linus Walleij --- .../pinctrl/mediatek,mt8189-pinctrl.yaml | 213 ++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 Documentation/devicetree/bindings/pinctrl/mediatek,mt8189-pinctrl.yaml diff --git a/Documentation/devicetree/bindings/pinctrl/mediatek,mt8189-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/mediatek,mt8189-pinctrl.yaml new file mode 100644 index 000000000000..32e4653da5db --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/mediatek,mt8189-pinctrl.yaml @@ -0,0 +1,213 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pinctrl/mediatek,mt8189-pinctrl.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MediaTek MT8189 Pin Controller + +maintainers: + - Lei Xue + - Cathy Xu + +description: + The MediaTek's MT8189 Pin controller is used to control SoC pins. + +properties: + compatible: + const: mediatek,mt8189-pinctrl + + reg: + items: + - description: gpio base + - description: lm group IO + - description: rb0 group IO + - description: rb1 group IO + - description: bm0 group IO + - description: bm1 group IO + - description: bm2 group IO + - description: lt0 group IO + - description: lt1 group IO + - description: rt group IO + - description: eint0 group IO + - description: eint1 group IO + - description: eint2 group IO + - description: eint3 group IO + - description: eint4 group IO + + reg-names: + items: + - const: base + - const: lm + - const: rb0 + - const: rb1 + - const: bm0 + - const: bm1 + - const: bm2 + - const: lt0 + - const: lt1 + - const: rt + - const: eint0 + - const: eint1 + - const: eint2 + - const: eint3 + - const: eint4 + + interrupts: + maxItems: 1 + + interrupt-controller: true + + '#interrupt-cells': + const: 2 + + gpio-controller: true + + '#gpio-cells': + const: 2 + + gpio-ranges: + maxItems: 1 + + gpio-line-names: true + +# PIN CONFIGURATION NODES +patternProperties: + '-pins$': + type: object + additionalProperties: false + + patternProperties: + '^pins': + type: object + $ref: /schemas/pinctrl/pincfg-node.yaml + additionalProperties: false + description: + A pinctrl node should contain at least one subnode representing the + pinctrl groups available on the machine. Each subnode will list the + pins it needs, and how they should be configured, with regard to muxer + configuration, pullups, drive strength, input enable/disable and input + schmitt. + + properties: + pinmux: + description: + Integer array, represents gpio pin number and mux setting. + Supported pin number and mux varies for different SoCs, and are + defined as macros in arch/arm64/boot/dts/mediatek/mt8189-pinfunc.h + directly, for this SoC. + + drive-strength: + enum: [2, 4, 6, 8, 10, 12, 14, 16] + + bias-pull-down: + oneOf: + - type: boolean + - enum: [100, 101, 102, 103] + description: mt8189 pull down PUPD/R0/R1 type define value. + - enum: [75000, 5000] + description: mt8189 pull down RSEL type si unit value(ohm). + description: | + For pull down type is normal, it doesn't need add R1R0 define + and resistance value. + + For pull down type is PUPD/R0/R1 type, it can add R1R0 define to + set different resistance. It can support "MTK_PUPD_SET_R1R0_00" & + "MTK_PUPD_SET_R1R0_01" & "MTK_PUPD_SET_R1R0_10" & + "MTK_PUPD_SET_R1R0_11" define in mt8189. + + For pull down type is PD/RSEL, it can add resistance value(ohm) + to set different resistance by identifying property + "mediatek,rsel-resistance-in-si-unit". + + bias-pull-up: + oneOf: + - type: boolean + - enum: [100, 101, 102, 103] + description: mt8189 pull up PUPD/R0/R1 type define value. + - enum: [1000, 1500, 2000, 3000, 4000, 5000, 75000] + description: mt8189 pull up RSEL type si unit value(ohm). + description: | + For pull up type is normal, it don't need add R1R0 define + and resistance value. + + For pull up type is PUPD/R0/R1 type, it can add R1R0 define to + set different resistance. It can support "MTK_PUPD_SET_R1R0_00" & + "MTK_PUPD_SET_R1R0_01" & "MTK_PUPD_SET_R1R0_10" & + "MTK_PUPD_SET_R1R0_11" define in mt8189. + + For pull up type is PU/RSEL, it can add resistance value(ohm) + to set different resistance by identifying property + "mediatek,rsel-resistance-in-si-unit". + + bias-disable: true + + output-high: true + + output-low: true + + input-enable: true + + input-disable: true + + input-schmitt-enable: true + + input-schmitt-disable: true + + required: + - pinmux + +required: + - compatible + - reg + - interrupts + - interrupt-controller + - '#interrupt-cells' + - gpio-controller + - '#gpio-cells' + - gpio-ranges + +additionalProperties: false + +examples: + - | + #include + #include + #define PINMUX_GPIO51__FUNC_SCL0 (MTK_PIN_NO(51) | 2) + #define PINMUX_GPIO52__FUNC_SDA0 (MTK_PIN_NO(52) | 2) + + pio: pinctrl@10005000 { + compatible = "mediatek,mt8189-pinctrl"; + reg = <0x10005000 0x1000>, + <0x11b50000 0x1000>, + <0x11c50000 0x1000>, + <0x11c60000 0x1000>, + <0x11d20000 0x1000>, + <0x11d30000 0x1000>, + <0x11d40000 0x1000>, + <0x11e20000 0x1000>, + <0x11e30000 0x1000>, + <0x11f20000 0x1000>, + <0x11ce0000 0x1000>, + <0x11de0000 0x1000>, + <0x11e60000 0x1000>, + <0x1c01e000 0x1000>, + <0x11f00000 0x1000>; + reg-names = "base", "lm", "rb0", "rb1", "bm0" , "bm1", + "bm2", "lt0", "lt1", "rt", "eint0", "eint1", + "eint2", "eint3", "eint4"; + gpio-controller; + #gpio-cells = <2>; + gpio-ranges = <&pio 0 0 182>; + interrupt-controller; + interrupts = ; + #interrupt-cells = <2>; + + i2c0-pins { + pins { + pinmux = , + ; + bias-disable; + }; + }; + }; From a3fe1324c3c5c292ec79bd756497c1c44ff247d2 Mon Sep 17 00:00:00 2001 From: Cathy Xu Date: Fri, 11 Jul 2025 17:44:59 +0800 Subject: [PATCH 0946/2411] pinctrl: mediatek: Add pinctrl driver for mt8189 Add pinctrl driver support for MediaTek Soc mt8189. Signed-off-by: Cathy Xu Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/20250711094513.17073-4-ot_cathy.xu@mediatek.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/Kconfig | 12 + drivers/pinctrl/mediatek/Makefile | 1 + drivers/pinctrl/mediatek/pinctrl-mt8189.c | 1700 ++++++++++++ drivers/pinctrl/mediatek/pinctrl-mtk-mt8189.h | 2452 +++++++++++++++++ 4 files changed, 4165 insertions(+) create mode 100644 drivers/pinctrl/mediatek/pinctrl-mt8189.c create mode 100644 drivers/pinctrl/mediatek/pinctrl-mtk-mt8189.h diff --git a/drivers/pinctrl/mediatek/Kconfig b/drivers/pinctrl/mediatek/Kconfig index 2d15af6be276..5b191e12a8aa 100644 --- a/drivers/pinctrl/mediatek/Kconfig +++ b/drivers/pinctrl/mediatek/Kconfig @@ -259,6 +259,18 @@ config PINCTRL_MT8188 In MTK platform, we support virtual gpio and use it to map specific eint which doesn't have real gpio pin. +config PINCTRL_MT8189 + bool "MediaTek MT8189 pin control" + depends on OF + depends on ARM64 || COMPILE_TEST + default ARM64 && ARCH_MEDIATEK + select PINCTRL_MTK_PARIS + help + Say yes here to support pin controller and gpio driver + on MediaTek MT8189 SoC. + In MTK platform, we support virtual gpio and use it to + map specific eint which doesn't have real gpio pin. + config PINCTRL_MT8192 bool "MediaTek MT8192 pin control" depends on OF diff --git a/drivers/pinctrl/mediatek/Makefile b/drivers/pinctrl/mediatek/Makefile index 7518980fba59..5d4646939ba3 100644 --- a/drivers/pinctrl/mediatek/Makefile +++ b/drivers/pinctrl/mediatek/Makefile @@ -35,6 +35,7 @@ obj-$(CONFIG_PINCTRL_MT8173) += pinctrl-mt8173.o obj-$(CONFIG_PINCTRL_MT8183) += pinctrl-mt8183.o obj-$(CONFIG_PINCTRL_MT8186) += pinctrl-mt8186.o obj-$(CONFIG_PINCTRL_MT8188) += pinctrl-mt8188.o +obj-$(CONFIG_PINCTRL_MT8189) += pinctrl-mt8189.o obj-$(CONFIG_PINCTRL_MT8192) += pinctrl-mt8192.o obj-$(CONFIG_PINCTRL_MT8195) += pinctrl-mt8195.o obj-$(CONFIG_PINCTRL_MT8196) += pinctrl-mt8196.o diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8189.c b/drivers/pinctrl/mediatek/pinctrl-mt8189.c new file mode 100644 index 000000000000..7028aff55ae5 --- /dev/null +++ b/drivers/pinctrl/mediatek/pinctrl-mt8189.c @@ -0,0 +1,1700 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025 MediaTek Inc. + * Author: Lei Xue + * Cathy Xu + */ + +#include "pinctrl-mtk-mt8189.h" +#include "pinctrl-paris.h" + +#define PIN_FIELD_BASE(s_pin, e_pin, i_base, s_addr, x_addrs, s_bit, x_bits) \ + PIN_FIELD_CALC(s_pin, e_pin, i_base, s_addr, x_addrs, s_bit, x_bits, \ + 32, 0) + +#define PINS_FIELD_BASE(s_pin, e_pin, i_base, s_addr, x_addrs, s_bit, x_bits) \ + PIN_FIELD_CALC(s_pin, e_pin, i_base, s_addr, x_addrs, s_bit, x_bits, \ + 32, 1) + +static const struct mtk_pin_field_calc mt8189_pin_mode_range[] = { + PIN_FIELD(0, 182, 0x0300, 0x10, 0, 4), +}; + +static const struct mtk_pin_field_calc mt8189_pin_dir_range[] = { + PIN_FIELD(0, 182, 0x0000, 0x10, 0, 1), +}; + +static const struct mtk_pin_field_calc mt8189_pin_di_range[] = { + PIN_FIELD(0, 182, 0x0200, 0x10, 0, 1), +}; + +static const struct mtk_pin_field_calc mt8189_pin_do_range[] = { + PIN_FIELD(0, 182, 0x0100, 0x10, 0, 1), +}; + +static const struct mtk_pin_field_calc mt8189_pin_smt_range[] = { + PIN_FIELD_BASE(0, 0, 7, 0x00e0, 0x10, 5, 1), + PIN_FIELD_BASE(1, 1, 8, 0x00c0, 0x10, 3, 1), + PIN_FIELD_BASE(2, 2, 8, 0x00c0, 0x10, 4, 1), + PIN_FIELD_BASE(3, 3, 8, 0x00c0, 0x10, 5, 1), + PIN_FIELD_BASE(4, 4, 8, 0x00c0, 0x10, 6, 1), + PIN_FIELD_BASE(5, 5, 8, 0x00c0, 0x10, 7, 1), + PIN_FIELD_BASE(6, 6, 7, 0x00e0, 0x10, 6, 1), + PIN_FIELD_BASE(7, 7, 7, 0x00e0, 0x10, 7, 1), + PIN_FIELD_BASE(8, 8, 7, 0x00e0, 0x10, 8, 1), + PIN_FIELD_BASE(9, 9, 7, 0x00e0, 0x10, 9, 1), + PIN_FIELD_BASE(10, 10, 7, 0x00e0, 0x10, 10, 1), + PIN_FIELD_BASE(11, 11, 7, 0x00e0, 0x10, 11, 1), + PIN_FIELD_BASE(12, 12, 2, 0x00e0, 0x10, 5, 1), + PIN_FIELD_BASE(13, 13, 2, 0x00e0, 0x10, 6, 1), + PIN_FIELD_BASE(14, 14, 3, 0x00f0, 0x10, 0, 1), + PIN_FIELD_BASE(15, 15, 3, 0x00f0, 0x10, 1, 1), + PIN_FIELD_BASE(16, 16, 2, 0x00e0, 0x10, 7, 1), + PIN_FIELD_BASE(17, 17, 2, 0x00e0, 0x10, 8, 1), + PIN_FIELD_BASE(18, 18, 7, 0x00e0, 0x10, 0, 1), + PIN_FIELD_BASE(19, 19, 7, 0x00e0, 0x10, 2, 1), + PIN_FIELD_BASE(20, 20, 7, 0x00e0, 0x10, 1, 1), + PIN_FIELD_BASE(21, 21, 7, 0x00e0, 0x10, 3, 1), + PIN_FIELD_BASE(22, 22, 9, 0x00f0, 0x10, 0, 1), + PIN_FIELD_BASE(23, 23, 9, 0x00f0, 0x10, 1, 1), + PIN_FIELD_BASE(24, 24, 9, 0x00f0, 0x10, 2, 1), + PIN_FIELD_BASE(25, 25, 4, 0x00c0, 0x10, 2, 1), + PIN_FIELD_BASE(26, 26, 4, 0x00c0, 0x10, 1, 1), + PIN_FIELD_BASE(27, 27, 2, 0x00e0, 0x10, 1, 1), + PIN_FIELD_BASE(28, 28, 2, 0x00e0, 0x10, 2, 1), + PIN_FIELD_BASE(29, 29, 4, 0x00c0, 0x10, 0, 1), + PIN_FIELD_BASE(30, 30, 2, 0x00e0, 0x10, 0, 1), + PIN_FIELD_BASE(31, 31, 3, 0x00f0, 0x10, 19, 1), + PIN_FIELD_BASE(32, 32, 1, 0x00c0, 0x10, 30, 1), + PIN_FIELD_BASE(33, 33, 3, 0x00f0, 0x10, 21, 1), + PIN_FIELD_BASE(34, 34, 3, 0x00f0, 0x10, 20, 1), + PIN_FIELD_BASE(35, 35, 3, 0x00f0, 0x10, 23, 1), + PIN_FIELD_BASE(36, 36, 3, 0x00f0, 0x10, 22, 1), + PIN_FIELD_BASE(37, 37, 3, 0x00f0, 0x10, 25, 1), + PIN_FIELD_BASE(38, 38, 3, 0x00f0, 0x10, 24, 1), + PIN_FIELD_BASE(39, 39, 3, 0x00f0, 0x10, 5, 1), + PIN_FIELD_BASE(40, 40, 3, 0x00f0, 0x10, 2, 1), + PIN_FIELD_BASE(41, 41, 3, 0x00f0, 0x10, 3, 1), + PIN_FIELD_BASE(42, 42, 3, 0x00f0, 0x10, 4, 1), + PIN_FIELD_BASE(43, 43, 3, 0x00f0, 0x10, 6, 1), + PIN_FIELD_BASE(44, 44, 7, 0x00e0, 0x10, 20, 1), + PIN_FIELD_BASE(45, 45, 7, 0x00e0, 0x10, 21, 1), + PIN_FIELD_BASE(46, 46, 7, 0x00e0, 0x10, 22, 1), + PIN_FIELD_BASE(47, 47, 7, 0x00e0, 0x10, 23, 1), + PIN_FIELD_BASE(48, 48, 4, 0x00c0, 0x10, 5, 1), + PIN_FIELD_BASE(49, 49, 4, 0x00c0, 0x10, 4, 1), + PIN_FIELD_BASE(50, 50, 4, 0x00c0, 0x10, 3, 1), + PIN_FIELD_BASE(51, 51, 8, 0x00c0, 0x10, 8, 1), + PIN_FIELD_BASE(52, 52, 8, 0x00c0, 0x10, 10, 1), + PIN_FIELD_BASE(53, 53, 8, 0x00c0, 0x10, 9, 1), + PIN_FIELD_BASE(54, 54, 8, 0x00c0, 0x10, 11, 1), + PIN_FIELD_BASE(55, 55, 4, 0x00c0, 0x10, 6, 1), + PIN_FIELD_BASE(56, 56, 4, 0x00c0, 0x10, 7, 1), + PIN_FIELD_BASE(57, 57, 2, 0x00e0, 0x10, 13, 1), + PIN_FIELD_BASE(58, 58, 2, 0x00e0, 0x10, 17, 1), + PIN_FIELD_BASE(59, 59, 2, 0x00e0, 0x10, 14, 1), + PIN_FIELD_BASE(60, 60, 2, 0x00e0, 0x10, 18, 1), + PIN_FIELD_BASE(61, 61, 2, 0x00e0, 0x10, 15, 1), + PIN_FIELD_BASE(62, 62, 2, 0x00e0, 0x10, 19, 1), + PIN_FIELD_BASE(63, 63, 2, 0x00e0, 0x10, 16, 1), + PIN_FIELD_BASE(64, 64, 2, 0x00e0, 0x10, 20, 1), + PIN_FIELD_BASE(65, 65, 9, 0x00f0, 0x10, 10, 1), + PIN_FIELD_BASE(66, 66, 9, 0x00f0, 0x10, 12, 1), + PIN_FIELD_BASE(67, 67, 9, 0x00f0, 0x10, 11, 1), + PIN_FIELD_BASE(68, 68, 9, 0x00f0, 0x10, 13, 1), + PIN_FIELD_BASE(69, 69, 2, 0x00e0, 0x10, 22, 1), + PIN_FIELD_BASE(70, 70, 2, 0x00e0, 0x10, 21, 1), + PIN_FIELD_BASE(71, 71, 2, 0x00e0, 0x10, 24, 1), + PIN_FIELD_BASE(72, 72, 2, 0x00e0, 0x10, 23, 1), + PIN_FIELD_BASE(73, 73, 2, 0x00e0, 0x10, 26, 1), + PIN_FIELD_BASE(74, 74, 2, 0x00e0, 0x10, 25, 1), + PIN_FIELD_BASE(75, 75, 3, 0x00f0, 0x10, 13, 1), + PIN_FIELD_BASE(76, 76, 2, 0x00e0, 0x10, 27, 1), + PIN_FIELD_BASE(77, 77, 8, 0x00c0, 0x10, 13, 1), + PIN_FIELD_BASE(78, 78, 8, 0x00c0, 0x10, 12, 1), + PIN_FIELD_BASE(79, 79, 8, 0x00c0, 0x10, 15, 1), + PIN_FIELD_BASE(80, 80, 8, 0x00c0, 0x10, 14, 1), + PIN_FIELD_BASE(81, 81, 2, 0x00e0, 0x10, 29, 1), + PIN_FIELD_BASE(82, 82, 2, 0x00e0, 0x10, 28, 1), + PIN_FIELD_BASE(83, 83, 2, 0x00e0, 0x10, 30, 1), + PIN_FIELD_BASE(84, 84, 7, 0x00e0, 0x10, 24, 1), + PIN_FIELD_BASE(85, 85, 7, 0x00e0, 0x10, 25, 1), + PIN_FIELD_BASE(86, 86, 7, 0x00e0, 0x10, 26, 1), + PIN_FIELD_BASE(87, 87, 7, 0x00e0, 0x10, 27, 1), + PIN_FIELD_BASE(88, 88, 5, 0x0120, 0x10, 20, 1), + PIN_FIELD_BASE(89, 89, 5, 0x0120, 0x10, 19, 1), + PIN_FIELD_BASE(90, 90, 5, 0x0120, 0x10, 22, 1), + PIN_FIELD_BASE(91, 91, 5, 0x0120, 0x10, 21, 1), + PIN_FIELD_BASE(92, 92, 5, 0x0120, 0x10, 16, 1), + PIN_FIELD_BASE(93, 93, 5, 0x0120, 0x10, 17, 1), + PIN_FIELD_BASE(94, 94, 5, 0x0120, 0x10, 23, 1), + PIN_FIELD_BASE(95, 95, 5, 0x0120, 0x10, 15, 1), + PIN_FIELD_BASE(96, 96, 5, 0x0120, 0x10, 18, 1), + PIN_FIELD_BASE(97, 97, 5, 0x0120, 0x10, 0, 1), + PIN_FIELD_BASE(98, 98, 5, 0x0120, 0x10, 5, 1), + PIN_FIELD_BASE(99, 99, 5, 0x0120, 0x10, 3, 1), + PIN_FIELD_BASE(100, 100, 5, 0x0120, 0x10, 4, 1), + PIN_FIELD_BASE(101, 101, 5, 0x0120, 0x10, 1, 1), + PIN_FIELD_BASE(102, 102, 5, 0x0120, 0x10, 2, 1), + PIN_FIELD_BASE(103, 103, 7, 0x00e0, 0x10, 15, 1), + PIN_FIELD_BASE(104, 104, 7, 0x00e0, 0x10, 12, 1), + PIN_FIELD_BASE(105, 105, 7, 0x00e0, 0x10, 14, 1), + PIN_FIELD_BASE(106, 106, 7, 0x00e0, 0x10, 13, 1), + PIN_FIELD_BASE(107, 107, 7, 0x00e0, 0x10, 19, 1), + PIN_FIELD_BASE(108, 108, 7, 0x00e0, 0x10, 16, 1), + PIN_FIELD_BASE(109, 109, 7, 0x00e0, 0x10, 18, 1), + PIN_FIELD_BASE(110, 110, 7, 0x00e0, 0x10, 17, 1), + PIN_FIELD_BASE(111, 111, 7, 0x00e0, 0x10, 4, 1), + PIN_FIELD_BASE(112, 112, 8, 0x00c0, 0x10, 0, 1), + PIN_FIELD_BASE(113, 113, 8, 0x00c0, 0x10, 1, 1), + PIN_FIELD_BASE(114, 114, 8, 0x00c0, 0x10, 2, 1), + PIN_FIELD_BASE(115, 115, 2, 0x00e0, 0x10, 9, 1), + PIN_FIELD_BASE(116, 116, 2, 0x00e0, 0x10, 12, 1), + PIN_FIELD_BASE(117, 117, 2, 0x00e0, 0x10, 10, 1), + PIN_FIELD_BASE(118, 118, 2, 0x00e0, 0x10, 11, 1), + PIN_FIELD_BASE(119, 119, 1, 0x00c0, 0x10, 26, 1), + PIN_FIELD_BASE(120, 120, 1, 0x00c0, 0x10, 25, 1), + PIN_FIELD_BASE(121, 121, 1, 0x00c0, 0x10, 24, 1), + PIN_FIELD_BASE(122, 122, 1, 0x00c0, 0x10, 23, 1), + PIN_FIELD_BASE(123, 123, 1, 0x00c0, 0x10, 19, 1), + PIN_FIELD_BASE(124, 124, 1, 0x00c0, 0x10, 18, 1), + PIN_FIELD_BASE(125, 125, 1, 0x00c0, 0x10, 17, 1), + PIN_FIELD_BASE(126, 126, 1, 0x00c0, 0x10, 16, 1), + PIN_FIELD_BASE(127, 127, 1, 0x00c0, 0x10, 22, 1), + PIN_FIELD_BASE(128, 128, 1, 0x00c0, 0x10, 15, 1), + PIN_FIELD_BASE(129, 129, 1, 0x00c0, 0x10, 20, 1), + PIN_FIELD_BASE(130, 130, 1, 0x00c0, 0x10, 27, 1), + PIN_FIELD_BASE(131, 131, 1, 0x00c0, 0x10, 13, 1), + PIN_FIELD_BASE(132, 132, 1, 0x00c0, 0x10, 14, 1), + PIN_FIELD_BASE(133, 133, 1, 0x00c0, 0x10, 28, 1), + PIN_FIELD_BASE(134, 134, 1, 0x00c0, 0x10, 21, 1), + PIN_FIELD_BASE(135, 135, 1, 0x00c0, 0x10, 11, 1), + PIN_FIELD_BASE(136, 136, 1, 0x00c0, 0x10, 12, 1), + PIN_FIELD_BASE(137, 137, 2, 0x00e0, 0x10, 3, 1), + PIN_FIELD_BASE(138, 138, 2, 0x00e0, 0x10, 4, 1), + PIN_FIELD_BASE(139, 139, 1, 0x00c0, 0x10, 3, 1), + PIN_FIELD_BASE(140, 140, 1, 0x00c0, 0x10, 4, 1), + PIN_FIELD_BASE(141, 141, 1, 0x00c0, 0x10, 0, 1), + PIN_FIELD_BASE(142, 142, 1, 0x00c0, 0x10, 1, 1), + PIN_FIELD_BASE(143, 143, 1, 0x00c0, 0x10, 2, 1), + PIN_FIELD_BASE(144, 144, 1, 0x00c0, 0x10, 5, 1), + PIN_FIELD_BASE(145, 145, 1, 0x00c0, 0x10, 6, 1), + PIN_FIELD_BASE(146, 146, 1, 0x00c0, 0x10, 7, 1), + PIN_FIELD_BASE(147, 147, 1, 0x00c0, 0x10, 8, 1), + PIN_FIELD_BASE(148, 148, 1, 0x00c0, 0x10, 9, 1), + PIN_FIELD_BASE(149, 149, 1, 0x00c0, 0x10, 10, 1), + PIN_FIELD_BASE(150, 150, 3, 0x00f0, 0x10, 14, 1), + PIN_FIELD_BASE(151, 151, 1, 0x00c0, 0x10, 29, 1), + PIN_FIELD_BASE(152, 152, 3, 0x00f0, 0x10, 15, 1), + PIN_FIELD_BASE(153, 153, 3, 0x00f0, 0x10, 16, 1), + PIN_FIELD_BASE(154, 154, 3, 0x00f0, 0x10, 17, 1), + PIN_FIELD_BASE(155, 155, 3, 0x00f0, 0x10, 18, 1), + PIN_FIELD_BASE(156, 156, 5, 0x0120, 0x10, 12, 1), + PIN_FIELD_BASE(157, 157, 5, 0x0120, 0x10, 11, 1), + PIN_FIELD_BASE(158, 158, 5, 0x0120, 0x10, 10, 1), + PIN_FIELD_BASE(159, 159, 6, 0x0090, 0x10, 2, 1), + PIN_FIELD_BASE(160, 160, 5, 0x0120, 0x10, 14, 1), + PIN_FIELD_BASE(161, 161, 5, 0x0120, 0x10, 7, 1), + PIN_FIELD_BASE(162, 162, 5, 0x0120, 0x10, 6, 1), + PIN_FIELD_BASE(163, 163, 6, 0x0090, 0x10, 1, 1), + PIN_FIELD_BASE(164, 164, 5, 0x0120, 0x10, 9, 1), + PIN_FIELD_BASE(165, 165, 5, 0x0120, 0x10, 8, 1), + PIN_FIELD_BASE(166, 166, 6, 0x0090, 0x10, 0, 1), + PIN_FIELD_BASE(167, 167, 5, 0x0120, 0x10, 13, 1), + PIN_FIELD_BASE(168, 168, 3, 0x00f0, 0x10, 8, 1), + PIN_FIELD_BASE(169, 169, 3, 0x00f0, 0x10, 7, 1), + PIN_FIELD_BASE(170, 170, 3, 0x00f0, 0x10, 9, 1), + PIN_FIELD_BASE(171, 171, 3, 0x00f0, 0x10, 10, 1), + PIN_FIELD_BASE(172, 172, 3, 0x00f0, 0x10, 11, 1), + PIN_FIELD_BASE(173, 173, 3, 0x00f0, 0x10, 12, 1), + PIN_FIELD_BASE(174, 174, 9, 0x00f0, 0x10, 5, 1), + PIN_FIELD_BASE(175, 175, 9, 0x00f0, 0x10, 4, 1), + PIN_FIELD_BASE(176, 176, 9, 0x00f0, 0x10, 6, 1), + PIN_FIELD_BASE(177, 177, 9, 0x00f0, 0x10, 7, 1), + PIN_FIELD_BASE(178, 178, 9, 0x00f0, 0x10, 8, 1), + PIN_FIELD_BASE(179, 179, 9, 0x00f0, 0x10, 9, 1), + PIN_FIELD_BASE(180, 180, 5, 0x0120, 0x10, 24, 1), + PIN_FIELD_BASE(181, 181, 5, 0x0120, 0x10, 25, 1), + PIN_FIELD_BASE(182, 182, 9, 0x00f0, 0x10, 3, 1), +}; + +static const struct mtk_pin_field_calc mt8189_pin_ies_range[] = { + PIN_FIELD_BASE(0, 0, 7, 0x0050, 0x10, 5, 1), + PIN_FIELD_BASE(1, 1, 8, 0x0050, 0x10, 3, 1), + PIN_FIELD_BASE(2, 2, 8, 0x0050, 0x10, 4, 1), + PIN_FIELD_BASE(3, 3, 8, 0x0050, 0x10, 5, 1), + PIN_FIELD_BASE(4, 4, 8, 0x0050, 0x10, 6, 1), + PIN_FIELD_BASE(5, 5, 8, 0x0050, 0x10, 7, 1), + PIN_FIELD_BASE(6, 6, 7, 0x0050, 0x10, 6, 1), + PIN_FIELD_BASE(7, 7, 7, 0x0050, 0x10, 7, 1), + PIN_FIELD_BASE(8, 8, 7, 0x0050, 0x10, 8, 1), + PIN_FIELD_BASE(9, 9, 7, 0x0050, 0x10, 9, 1), + PIN_FIELD_BASE(10, 10, 7, 0x0050, 0x10, 10, 1), + PIN_FIELD_BASE(11, 11, 7, 0x0050, 0x10, 11, 1), + PIN_FIELD_BASE(12, 12, 2, 0x0070, 0x10, 5, 1), + PIN_FIELD_BASE(13, 13, 2, 0x0070, 0x10, 6, 1), + PIN_FIELD_BASE(14, 14, 3, 0x0050, 0x10, 0, 1), + PIN_FIELD_BASE(15, 15, 3, 0x0050, 0x10, 1, 1), + PIN_FIELD_BASE(16, 16, 2, 0x0070, 0x10, 7, 1), + PIN_FIELD_BASE(17, 17, 2, 0x0070, 0x10, 8, 1), + PIN_FIELD_BASE(18, 18, 7, 0x0050, 0x10, 0, 1), + PIN_FIELD_BASE(19, 19, 7, 0x0050, 0x10, 2, 1), + PIN_FIELD_BASE(20, 20, 7, 0x0050, 0x10, 1, 1), + PIN_FIELD_BASE(21, 21, 7, 0x0050, 0x10, 3, 1), + PIN_FIELD_BASE(22, 22, 9, 0x0040, 0x10, 0, 1), + PIN_FIELD_BASE(23, 23, 9, 0x0040, 0x10, 1, 1), + PIN_FIELD_BASE(24, 24, 9, 0x0040, 0x10, 2, 1), + PIN_FIELD_BASE(25, 25, 4, 0x0050, 0x10, 2, 1), + PIN_FIELD_BASE(26, 26, 4, 0x0050, 0x10, 1, 1), + PIN_FIELD_BASE(27, 27, 2, 0x0070, 0x10, 1, 1), + PIN_FIELD_BASE(28, 28, 2, 0x0070, 0x10, 2, 1), + PIN_FIELD_BASE(29, 29, 4, 0x0050, 0x10, 0, 1), + PIN_FIELD_BASE(30, 30, 2, 0x0070, 0x10, 0, 1), + PIN_FIELD_BASE(31, 31, 3, 0x0050, 0x10, 19, 1), + PIN_FIELD_BASE(32, 32, 1, 0x0050, 0x10, 30, 1), + PIN_FIELD_BASE(33, 33, 3, 0x0050, 0x10, 21, 1), + PIN_FIELD_BASE(34, 34, 3, 0x0050, 0x10, 20, 1), + PIN_FIELD_BASE(35, 35, 3, 0x0050, 0x10, 23, 1), + PIN_FIELD_BASE(36, 36, 3, 0x0050, 0x10, 22, 1), + PIN_FIELD_BASE(37, 37, 3, 0x0050, 0x10, 25, 1), + PIN_FIELD_BASE(38, 38, 3, 0x0050, 0x10, 24, 1), + PIN_FIELD_BASE(39, 39, 3, 0x0050, 0x10, 5, 1), + PIN_FIELD_BASE(40, 40, 3, 0x0050, 0x10, 2, 1), + PIN_FIELD_BASE(41, 41, 3, 0x0050, 0x10, 3, 1), + PIN_FIELD_BASE(42, 42, 3, 0x0050, 0x10, 4, 1), + PIN_FIELD_BASE(43, 43, 3, 0x0050, 0x10, 6, 1), + PIN_FIELD_BASE(44, 44, 7, 0x0050, 0x10, 20, 1), + PIN_FIELD_BASE(45, 45, 7, 0x0050, 0x10, 21, 1), + PIN_FIELD_BASE(46, 46, 7, 0x0050, 0x10, 22, 1), + PIN_FIELD_BASE(47, 47, 7, 0x0050, 0x10, 23, 1), + PIN_FIELD_BASE(48, 48, 4, 0x0050, 0x10, 5, 1), + PIN_FIELD_BASE(49, 49, 4, 0x0050, 0x10, 4, 1), + PIN_FIELD_BASE(50, 50, 4, 0x0050, 0x10, 3, 1), + PIN_FIELD_BASE(51, 51, 8, 0x0050, 0x10, 8, 1), + PIN_FIELD_BASE(52, 52, 8, 0x0050, 0x10, 10, 1), + PIN_FIELD_BASE(53, 53, 8, 0x0050, 0x10, 9, 1), + PIN_FIELD_BASE(54, 54, 8, 0x0050, 0x10, 11, 1), + PIN_FIELD_BASE(55, 55, 4, 0x0050, 0x10, 6, 1), + PIN_FIELD_BASE(56, 56, 4, 0x0050, 0x10, 7, 1), + PIN_FIELD_BASE(57, 57, 2, 0x0070, 0x10, 13, 1), + PIN_FIELD_BASE(58, 58, 2, 0x0070, 0x10, 17, 1), + PIN_FIELD_BASE(59, 59, 2, 0x0070, 0x10, 14, 1), + PIN_FIELD_BASE(60, 60, 2, 0x0070, 0x10, 18, 1), + PIN_FIELD_BASE(61, 61, 2, 0x0070, 0x10, 15, 1), + PIN_FIELD_BASE(62, 62, 2, 0x0070, 0x10, 19, 1), + PIN_FIELD_BASE(63, 63, 2, 0x0070, 0x10, 16, 1), + PIN_FIELD_BASE(64, 64, 2, 0x0070, 0x10, 20, 1), + PIN_FIELD_BASE(65, 65, 9, 0x0040, 0x10, 10, 1), + PIN_FIELD_BASE(66, 66, 9, 0x0040, 0x10, 12, 1), + PIN_FIELD_BASE(67, 67, 9, 0x0040, 0x10, 11, 1), + PIN_FIELD_BASE(68, 68, 9, 0x0040, 0x10, 13, 1), + PIN_FIELD_BASE(69, 69, 2, 0x0070, 0x10, 22, 1), + PIN_FIELD_BASE(70, 70, 2, 0x0070, 0x10, 21, 1), + PIN_FIELD_BASE(71, 71, 2, 0x0070, 0x10, 24, 1), + PIN_FIELD_BASE(72, 72, 2, 0x0070, 0x10, 23, 1), + PIN_FIELD_BASE(73, 73, 2, 0x0070, 0x10, 26, 1), + PIN_FIELD_BASE(74, 74, 2, 0x0070, 0x10, 25, 1), + PIN_FIELD_BASE(75, 75, 3, 0x0050, 0x10, 13, 1), + PIN_FIELD_BASE(76, 76, 2, 0x0070, 0x10, 27, 1), + PIN_FIELD_BASE(77, 77, 8, 0x0050, 0x10, 13, 1), + PIN_FIELD_BASE(78, 78, 8, 0x0050, 0x10, 12, 1), + PIN_FIELD_BASE(79, 79, 8, 0x0050, 0x10, 15, 1), + PIN_FIELD_BASE(80, 80, 8, 0x0050, 0x10, 14, 1), + PIN_FIELD_BASE(81, 81, 2, 0x0070, 0x10, 29, 1), + PIN_FIELD_BASE(82, 82, 2, 0x0070, 0x10, 28, 1), + PIN_FIELD_BASE(83, 83, 2, 0x0070, 0x10, 30, 1), + PIN_FIELD_BASE(84, 84, 7, 0x0050, 0x10, 24, 1), + PIN_FIELD_BASE(85, 85, 7, 0x0050, 0x10, 25, 1), + PIN_FIELD_BASE(86, 86, 7, 0x0050, 0x10, 26, 1), + PIN_FIELD_BASE(87, 87, 7, 0x0050, 0x10, 27, 1), + PIN_FIELD_BASE(88, 88, 5, 0x0060, 0x10, 20, 1), + PIN_FIELD_BASE(89, 89, 5, 0x0060, 0x10, 19, 1), + PIN_FIELD_BASE(90, 90, 5, 0x0060, 0x10, 22, 1), + PIN_FIELD_BASE(91, 91, 5, 0x0060, 0x10, 21, 1), + PIN_FIELD_BASE(92, 92, 5, 0x0060, 0x10, 16, 1), + PIN_FIELD_BASE(93, 93, 5, 0x0060, 0x10, 17, 1), + PIN_FIELD_BASE(94, 94, 5, 0x0060, 0x10, 23, 1), + PIN_FIELD_BASE(95, 95, 5, 0x0060, 0x10, 15, 1), + PIN_FIELD_BASE(96, 96, 5, 0x0060, 0x10, 18, 1), + PIN_FIELD_BASE(97, 97, 5, 0x0060, 0x10, 0, 1), + PIN_FIELD_BASE(98, 98, 5, 0x0060, 0x10, 5, 1), + PIN_FIELD_BASE(99, 99, 5, 0x0060, 0x10, 3, 1), + PIN_FIELD_BASE(100, 100, 5, 0x0060, 0x10, 4, 1), + PIN_FIELD_BASE(101, 101, 5, 0x0060, 0x10, 1, 1), + PIN_FIELD_BASE(102, 102, 5, 0x0060, 0x10, 2, 1), + PIN_FIELD_BASE(103, 103, 7, 0x0050, 0x10, 15, 1), + PIN_FIELD_BASE(104, 104, 7, 0x0050, 0x10, 12, 1), + PIN_FIELD_BASE(105, 105, 7, 0x0050, 0x10, 14, 1), + PIN_FIELD_BASE(106, 106, 7, 0x0050, 0x10, 13, 1), + PIN_FIELD_BASE(107, 107, 7, 0x0050, 0x10, 19, 1), + PIN_FIELD_BASE(108, 108, 7, 0x0050, 0x10, 16, 1), + PIN_FIELD_BASE(109, 109, 7, 0x0050, 0x10, 18, 1), + PIN_FIELD_BASE(110, 110, 7, 0x0050, 0x10, 17, 1), + PIN_FIELD_BASE(111, 111, 7, 0x0050, 0x10, 4, 1), + PIN_FIELD_BASE(112, 112, 8, 0x0050, 0x10, 0, 1), + PIN_FIELD_BASE(113, 113, 8, 0x0050, 0x10, 1, 1), + PIN_FIELD_BASE(114, 114, 8, 0x0050, 0x10, 2, 1), + PIN_FIELD_BASE(115, 115, 2, 0x0070, 0x10, 9, 1), + PIN_FIELD_BASE(116, 116, 2, 0x0070, 0x10, 12, 1), + PIN_FIELD_BASE(117, 117, 2, 0x0070, 0x10, 10, 1), + PIN_FIELD_BASE(118, 118, 2, 0x0070, 0x10, 11, 1), + PIN_FIELD_BASE(119, 119, 1, 0x0050, 0x10, 26, 1), + PIN_FIELD_BASE(120, 120, 1, 0x0050, 0x10, 25, 1), + PIN_FIELD_BASE(121, 121, 1, 0x0050, 0x10, 24, 1), + PIN_FIELD_BASE(122, 122, 1, 0x0050, 0x10, 23, 1), + PIN_FIELD_BASE(123, 123, 1, 0x0050, 0x10, 19, 1), + PIN_FIELD_BASE(124, 124, 1, 0x0050, 0x10, 18, 1), + PIN_FIELD_BASE(125, 125, 1, 0x0050, 0x10, 17, 1), + PIN_FIELD_BASE(126, 126, 1, 0x0050, 0x10, 16, 1), + PIN_FIELD_BASE(127, 127, 1, 0x0050, 0x10, 22, 1), + PIN_FIELD_BASE(128, 128, 1, 0x0050, 0x10, 15, 1), + PIN_FIELD_BASE(129, 129, 1, 0x0050, 0x10, 20, 1), + PIN_FIELD_BASE(130, 130, 1, 0x0050, 0x10, 27, 1), + PIN_FIELD_BASE(131, 131, 1, 0x0050, 0x10, 13, 1), + PIN_FIELD_BASE(132, 132, 1, 0x0050, 0x10, 14, 1), + PIN_FIELD_BASE(133, 133, 1, 0x0050, 0x10, 28, 1), + PIN_FIELD_BASE(134, 134, 1, 0x0050, 0x10, 21, 1), + PIN_FIELD_BASE(135, 135, 1, 0x0050, 0x10, 11, 1), + PIN_FIELD_BASE(136, 136, 1, 0x0050, 0x10, 12, 1), + PIN_FIELD_BASE(137, 137, 2, 0x0070, 0x10, 3, 1), + PIN_FIELD_BASE(138, 138, 2, 0x0070, 0x10, 4, 1), + PIN_FIELD_BASE(139, 139, 1, 0x0050, 0x10, 3, 1), + PIN_FIELD_BASE(140, 140, 1, 0x0050, 0x10, 4, 1), + PIN_FIELD_BASE(141, 141, 1, 0x0050, 0x10, 0, 1), + PIN_FIELD_BASE(142, 142, 1, 0x0050, 0x10, 1, 1), + PIN_FIELD_BASE(143, 143, 1, 0x0050, 0x10, 2, 1), + PIN_FIELD_BASE(144, 144, 1, 0x0050, 0x10, 5, 1), + PIN_FIELD_BASE(145, 145, 1, 0x0050, 0x10, 6, 1), + PIN_FIELD_BASE(146, 146, 1, 0x0050, 0x10, 7, 1), + PIN_FIELD_BASE(147, 147, 1, 0x0050, 0x10, 8, 1), + PIN_FIELD_BASE(148, 148, 1, 0x0050, 0x10, 9, 1), + PIN_FIELD_BASE(149, 149, 1, 0x0050, 0x10, 10, 1), + PIN_FIELD_BASE(150, 150, 3, 0x0050, 0x10, 14, 1), + PIN_FIELD_BASE(151, 151, 1, 0x0050, 0x10, 29, 1), + PIN_FIELD_BASE(152, 152, 3, 0x0050, 0x10, 15, 1), + PIN_FIELD_BASE(153, 153, 3, 0x0050, 0x10, 16, 1), + PIN_FIELD_BASE(154, 154, 3, 0x0050, 0x10, 17, 1), + PIN_FIELD_BASE(155, 155, 3, 0x0050, 0x10, 18, 1), + PIN_FIELD_BASE(156, 156, 5, 0x0060, 0x10, 12, 1), + PIN_FIELD_BASE(157, 157, 5, 0x0060, 0x10, 11, 1), + PIN_FIELD_BASE(158, 158, 5, 0x0060, 0x10, 10, 1), + PIN_FIELD_BASE(159, 159, 6, 0x0020, 0x10, 2, 1), + PIN_FIELD_BASE(160, 160, 5, 0x0060, 0x10, 14, 1), + PIN_FIELD_BASE(161, 161, 5, 0x0060, 0x10, 7, 1), + PIN_FIELD_BASE(162, 162, 5, 0x0060, 0x10, 6, 1), + PIN_FIELD_BASE(163, 163, 6, 0x0020, 0x10, 1, 1), + PIN_FIELD_BASE(164, 164, 5, 0x0060, 0x10, 9, 1), + PIN_FIELD_BASE(165, 165, 5, 0x0060, 0x10, 8, 1), + PIN_FIELD_BASE(166, 166, 6, 0x0020, 0x10, 0, 1), + PIN_FIELD_BASE(167, 167, 5, 0x0060, 0x10, 13, 1), + PIN_FIELD_BASE(168, 168, 3, 0x0050, 0x10, 8, 1), + PIN_FIELD_BASE(169, 169, 3, 0x0050, 0x10, 7, 1), + PIN_FIELD_BASE(170, 170, 3, 0x0050, 0x10, 9, 1), + PIN_FIELD_BASE(171, 171, 3, 0x0050, 0x10, 10, 1), + PIN_FIELD_BASE(172, 172, 3, 0x0050, 0x10, 11, 1), + PIN_FIELD_BASE(173, 173, 3, 0x0050, 0x10, 12, 1), + PIN_FIELD_BASE(174, 174, 9, 0x0040, 0x10, 5, 1), + PIN_FIELD_BASE(175, 175, 9, 0x0040, 0x10, 4, 1), + PIN_FIELD_BASE(176, 176, 9, 0x0040, 0x10, 6, 1), + PIN_FIELD_BASE(177, 177, 9, 0x0040, 0x10, 7, 1), + PIN_FIELD_BASE(178, 178, 9, 0x0040, 0x10, 8, 1), + PIN_FIELD_BASE(179, 179, 9, 0x0040, 0x10, 9, 1), + PIN_FIELD_BASE(180, 180, 5, 0x0060, 0x10, 24, 1), + PIN_FIELD_BASE(181, 181, 5, 0x0060, 0x10, 25, 1), + PIN_FIELD_BASE(182, 182, 9, 0x0040, 0x10, 3, 1), +}; + +static const struct mtk_pin_field_calc mt8189_pin_tdsel_range[] = { + PIN_FIELD_BASE(0, 0, 7, 0x00f0, 0x10, 0, 4), + PIN_FIELD_BASE(1, 1, 8, 0x00d0, 0x10, 0, 4), + PIN_FIELD_BASE(2, 2, 8, 0x00d0, 0x10, 4, 4), + PIN_FIELD_BASE(3, 3, 8, 0x00d0, 0x10, 8, 4), + PIN_FIELD_BASE(4, 4, 8, 0x00d0, 0x10, 12, 4), + PIN_FIELD_BASE(5, 5, 8, 0x00d0, 0x10, 16, 4), + PIN_FIELD_BASE(6, 6, 7, 0x00f0, 0x10, 4, 4), + PIN_FIELD_BASE(7, 7, 7, 0x00f0, 0x10, 8, 4), + PIN_FIELD_BASE(8, 8, 7, 0x00f0, 0x10, 12, 4), + PIN_FIELD_BASE(9, 9, 7, 0x00f0, 0x10, 16, 4), + PIN_FIELD_BASE(10, 10, 7, 0x00f0, 0x10, 20, 4), + PIN_FIELD_BASE(11, 11, 7, 0x00f0, 0x10, 24, 4), + PIN_FIELD_BASE(12, 12, 2, 0x00f0, 0x10, 12, 4), + PIN_FIELD_BASE(13, 13, 2, 0x00f0, 0x10, 16, 4), + PIN_FIELD_BASE(14, 14, 3, 0x0110, 0x10, 0, 4), + PIN_FIELD_BASE(15, 15, 3, 0x0110, 0x10, 4, 4), + PIN_FIELD_BASE(16, 16, 2, 0x00f0, 0x10, 20, 4), + PIN_FIELD_BASE(17, 17, 2, 0x00f0, 0x10, 28, 4), + PIN_FIELD_BASE(18, 18, 7, 0x0100, 0x10, 28, 4), + PIN_FIELD_BASE(19, 19, 7, 0x0110, 0x10, 0, 4), + PIN_FIELD_BASE(20, 20, 7, 0x0110, 0x10, 0, 4), + PIN_FIELD_BASE(21, 21, 7, 0x0110, 0x10, 0, 4), + PIN_FIELD_BASE(22, 22, 9, 0x0110, 0x10, 0, 4), + PIN_FIELD_BASE(23, 23, 9, 0x0110, 0x10, 4, 4), + PIN_FIELD_BASE(24, 24, 9, 0x0110, 0x10, 8, 4), + PIN_FIELD_BASE(25, 25, 4, 0x00d0, 0x10, 12, 4), + PIN_FIELD_BASE(26, 26, 4, 0x00d0, 0x10, 8, 4), + PIN_FIELD_BASE(27, 27, 2, 0x00f0, 0x10, 4, 4), + PIN_FIELD_BASE(28, 28, 2, 0x00f0, 0x10, 8, 4), + PIN_FIELD_BASE(29, 29, 4, 0x00d0, 0x10, 8, 4), + PIN_FIELD_BASE(30, 30, 2, 0x00f0, 0x10, 0, 4), + PIN_FIELD_BASE(31, 31, 3, 0x0120, 0x10, 8, 4), + PIN_FIELD_BASE(32, 32, 1, 0x00f0, 0x10, 16, 4), + PIN_FIELD_BASE(33, 33, 3, 0x0120, 0x10, 16, 4), + PIN_FIELD_BASE(34, 34, 3, 0x0120, 0x10, 4, 4), + PIN_FIELD_BASE(35, 35, 3, 0x0120, 0x10, 0, 4), + PIN_FIELD_BASE(36, 36, 3, 0x0120, 0x10, 8, 4), + PIN_FIELD_BASE(37, 37, 3, 0x0120, 0x10, 4, 4), + PIN_FIELD_BASE(38, 38, 3, 0x0120, 0x10, 4, 4), + PIN_FIELD_BASE(39, 39, 3, 0x0120, 0x10, 8, 4), + PIN_FIELD_BASE(40, 40, 3, 0x0120, 0x10, 8, 4), + PIN_FIELD_BASE(41, 41, 3, 0x0120, 0x10, 8, 4), + PIN_FIELD_BASE(42, 42, 3, 0x0120, 0x10, 8, 4), + PIN_FIELD_BASE(43, 43, 3, 0x0120, 0x10, 8, 4), + PIN_FIELD_BASE(44, 44, 7, 0x0110, 0x10, 0, 4), + PIN_FIELD_BASE(45, 45, 7, 0x0110, 0x10, 0, 4), + PIN_FIELD_BASE(46, 46, 7, 0x0110, 0x10, 0, 4), + PIN_FIELD_BASE(47, 47, 7, 0x0110, 0x10, 0, 4), + PIN_FIELD_BASE(48, 48, 4, 0x00d0, 0x10, 8, 4), + PIN_FIELD_BASE(49, 49, 4, 0x00d0, 0x10, 4, 4), + PIN_FIELD_BASE(50, 50, 4, 0x00d0, 0x10, 0, 4), + PIN_FIELD_BASE(51, 51, 8, 0x00d0, 0x10, 20, 4), + PIN_FIELD_BASE(52, 52, 8, 0x00d0, 0x10, 20, 4), + PIN_FIELD_BASE(53, 53, 8, 0x00d0, 0x10, 20, 4), + PIN_FIELD_BASE(54, 54, 8, 0x00d0, 0x10, 20, 4), + PIN_FIELD_BASE(55, 55, 4, 0x00d0, 0x10, 12, 4), + PIN_FIELD_BASE(56, 56, 4, 0x00d0, 0x10, 12, 4), + PIN_FIELD_BASE(57, 57, 2, 0x00f0, 0x10, 28, 4), + PIN_FIELD_BASE(58, 58, 2, 0x00f0, 0x10, 28, 4), + PIN_FIELD_BASE(59, 59, 2, 0x00f0, 0x10, 28, 4), + PIN_FIELD_BASE(60, 60, 2, 0x00f0, 0x10, 28, 4), + PIN_FIELD_BASE(61, 61, 2, 0x00f0, 0x10, 28, 4), + PIN_FIELD_BASE(62, 62, 2, 0x00f0, 0x10, 28, 4), + PIN_FIELD_BASE(63, 63, 2, 0x00f0, 0x10, 28, 4), + PIN_FIELD_BASE(64, 64, 2, 0x00f0, 0x10, 28, 4), + PIN_FIELD_BASE(65, 65, 9, 0x0120, 0x10, 4, 4), + PIN_FIELD_BASE(66, 66, 9, 0x0120, 0x10, 4, 4), + PIN_FIELD_BASE(67, 67, 9, 0x0120, 0x10, 4, 4), + PIN_FIELD_BASE(68, 68, 9, 0x0120, 0x10, 4, 4), + PIN_FIELD_BASE(69, 69, 2, 0x0100, 0x10, 4, 4), + PIN_FIELD_BASE(70, 70, 2, 0x0100, 0x10, 0, 4), + PIN_FIELD_BASE(71, 71, 2, 0x0100, 0x10, 12, 4), + PIN_FIELD_BASE(72, 72, 2, 0x0100, 0x10, 8, 4), + PIN_FIELD_BASE(73, 73, 2, 0x0100, 0x10, 20, 4), + PIN_FIELD_BASE(74, 74, 2, 0x0100, 0x10, 16, 4), + PIN_FIELD_BASE(75, 75, 3, 0x0120, 0x10, 12, 4), + PIN_FIELD_BASE(76, 76, 2, 0x0100, 0x10, 24, 4), + PIN_FIELD_BASE(77, 77, 8, 0x00d0, 0x10, 28, 4), + PIN_FIELD_BASE(78, 78, 8, 0x00d0, 0x10, 24, 4), + PIN_FIELD_BASE(79, 79, 8, 0x00e0, 0x10, 4, 4), + PIN_FIELD_BASE(80, 80, 8, 0x00e0, 0x10, 0, 4), + PIN_FIELD_BASE(81, 81, 2, 0x00f0, 0x10, 28, 4), + PIN_FIELD_BASE(82, 82, 2, 0x00f0, 0x10, 28, 4), + PIN_FIELD_BASE(83, 83, 2, 0x00f0, 0x10, 28, 4), + PIN_FIELD_BASE(84, 84, 7, 0x0110, 0x10, 0, 4), + PIN_FIELD_BASE(85, 85, 7, 0x0110, 0x10, 0, 4), + PIN_FIELD_BASE(86, 86, 7, 0x0110, 0x10, 0, 4), + PIN_FIELD_BASE(87, 87, 7, 0x0110, 0x10, 0, 4), + PIN_FIELD_BASE(88, 88, 5, 0x0140, 0x10, 4, 4), + PIN_FIELD_BASE(89, 89, 5, 0x0140, 0x10, 4, 4), + PIN_FIELD_BASE(90, 90, 5, 0x0140, 0x10, 4, 4), + PIN_FIELD_BASE(91, 91, 5, 0x0140, 0x10, 4, 4), + PIN_FIELD_BASE(92, 92, 5, 0x0140, 0x10, 8, 4), + PIN_FIELD_BASE(93, 93, 5, 0x0140, 0x10, 8, 4), + PIN_FIELD_BASE(94, 94, 5, 0x0140, 0x10, 8, 4), + PIN_FIELD_BASE(95, 95, 5, 0x0140, 0x10, 8, 4), + PIN_FIELD_BASE(96, 96, 5, 0x0140, 0x10, 12, 4), + PIN_FIELD_BASE(97, 97, 5, 0x0140, 0x10, 8, 4), + PIN_FIELD_BASE(98, 98, 5, 0x0140, 0x10, 8, 4), + PIN_FIELD_BASE(99, 99, 5, 0x0140, 0x10, 8, 4), + PIN_FIELD_BASE(100, 100, 5, 0x0140, 0x10, 8, 4), + PIN_FIELD_BASE(101, 101, 5, 0x0140, 0x10, 8, 4), + PIN_FIELD_BASE(102, 102, 5, 0x0140, 0x10, 8, 4), + PIN_FIELD_BASE(103, 103, 7, 0x0100, 0x10, 8, 4), + PIN_FIELD_BASE(104, 104, 7, 0x00f0, 0x10, 28, 4), + PIN_FIELD_BASE(105, 105, 7, 0x0100, 0x10, 4, 4), + PIN_FIELD_BASE(106, 106, 7, 0x0100, 0x10, 0, 4), + PIN_FIELD_BASE(107, 107, 7, 0x0100, 0x10, 24, 4), + PIN_FIELD_BASE(108, 108, 7, 0x0100, 0x10, 12, 4), + PIN_FIELD_BASE(109, 109, 7, 0x0100, 0x10, 20, 4), + PIN_FIELD_BASE(110, 110, 7, 0x0100, 0x10, 16, 4), + PIN_FIELD_BASE(111, 111, 7, 0x0110, 0x10, 0, 4), + PIN_FIELD_BASE(112, 112, 8, 0x00d0, 0x10, 20, 4), + PIN_FIELD_BASE(113, 113, 8, 0x00d0, 0x10, 20, 4), + PIN_FIELD_BASE(114, 114, 8, 0x00d0, 0x10, 20, 4), + PIN_FIELD_BASE(115, 115, 2, 0x00f0, 0x10, 24, 4), + PIN_FIELD_BASE(116, 116, 2, 0x00f0, 0x10, 28, 4), + PIN_FIELD_BASE(117, 117, 2, 0x00f0, 0x10, 28, 4), + PIN_FIELD_BASE(118, 118, 2, 0x00f0, 0x10, 28, 4), + PIN_FIELD_BASE(119, 119, 1, 0x00e0, 0x10, 24, 4), + PIN_FIELD_BASE(120, 120, 1, 0x00e0, 0x10, 20, 4), + PIN_FIELD_BASE(121, 121, 1, 0x00e0, 0x10, 16, 4), + PIN_FIELD_BASE(122, 122, 1, 0x00e0, 0x10, 12, 4), + PIN_FIELD_BASE(123, 123, 1, 0x00d0, 0x10, 28, 4), + PIN_FIELD_BASE(124, 124, 1, 0x00d0, 0x10, 24, 4), + PIN_FIELD_BASE(125, 125, 1, 0x00d0, 0x10, 20, 4), + PIN_FIELD_BASE(126, 126, 1, 0x00d0, 0x10, 16, 4), + PIN_FIELD_BASE(127, 127, 1, 0x00e0, 0x10, 8, 4), + PIN_FIELD_BASE(128, 128, 1, 0x00d0, 0x10, 12, 4), + PIN_FIELD_BASE(129, 129, 1, 0x00e0, 0x10, 0, 4), + PIN_FIELD_BASE(130, 130, 1, 0x00e0, 0x10, 28, 4), + PIN_FIELD_BASE(131, 131, 1, 0x00d0, 0x10, 4, 4), + PIN_FIELD_BASE(132, 132, 1, 0x00d0, 0x10, 8, 4), + PIN_FIELD_BASE(133, 133, 1, 0x00f0, 0x10, 0, 4), + PIN_FIELD_BASE(134, 134, 1, 0x00e0, 0x10, 4, 4), + PIN_FIELD_BASE(135, 135, 1, 0x00d0, 0x10, 0, 4), + PIN_FIELD_BASE(136, 136, 1, 0x00f0, 0x10, 4, 4), + PIN_FIELD_BASE(137, 137, 2, 0x00f0, 0x10, 28, 4), + PIN_FIELD_BASE(138, 138, 2, 0x00f0, 0x10, 28, 4), + PIN_FIELD_BASE(139, 139, 1, 0x00f0, 0x10, 12, 4), + PIN_FIELD_BASE(140, 140, 1, 0x00f0, 0x10, 12, 4), + PIN_FIELD_BASE(141, 141, 1, 0x00f0, 0x10, 12, 4), + PIN_FIELD_BASE(142, 142, 1, 0x00f0, 0x10, 12, 4), + PIN_FIELD_BASE(143, 143, 1, 0x00f0, 0x10, 12, 4), + PIN_FIELD_BASE(144, 144, 1, 0x00f0, 0x10, 12, 4), + PIN_FIELD_BASE(145, 145, 1, 0x00f0, 0x10, 8, 4), + PIN_FIELD_BASE(146, 146, 1, 0x00f0, 0x10, 8, 4), + PIN_FIELD_BASE(147, 147, 1, 0x00f0, 0x10, 8, 4), + PIN_FIELD_BASE(148, 148, 1, 0x00f0, 0x10, 8, 4), + PIN_FIELD_BASE(149, 149, 1, 0x00f0, 0x10, 8, 4), + PIN_FIELD_BASE(150, 150, 3, 0x0120, 0x10, 8, 4), + PIN_FIELD_BASE(151, 151, 1, 0x00f0, 0x10, 16, 4), + PIN_FIELD_BASE(152, 152, 3, 0x0120, 0x10, 8, 4), + PIN_FIELD_BASE(153, 153, 3, 0x0120, 0x10, 8, 4), + PIN_FIELD_BASE(154, 154, 3, 0x0120, 0x10, 8, 4), + PIN_FIELD_BASE(155, 155, 3, 0x0120, 0x10, 8, 4), + PIN_FIELD_BASE(156, 156, 5, 0x0130, 0x10, 24, 4), + PIN_FIELD_BASE(157, 157, 5, 0x0130, 0x10, 20, 4), + PIN_FIELD_BASE(158, 158, 5, 0x0130, 0x10, 16, 4), + PIN_FIELD_BASE(159, 159, 6, 0x00a0, 0x10, 8, 4), + PIN_FIELD_BASE(160, 160, 5, 0x0140, 0x10, 0, 4), + PIN_FIELD_BASE(161, 161, 5, 0x0130, 0x10, 4, 4), + PIN_FIELD_BASE(162, 162, 5, 0x0130, 0x10, 0, 4), + PIN_FIELD_BASE(163, 163, 6, 0x00a0, 0x10, 4, 4), + PIN_FIELD_BASE(164, 164, 5, 0x0130, 0x10, 12, 4), + PIN_FIELD_BASE(165, 165, 5, 0x0130, 0x10, 8, 4), + PIN_FIELD_BASE(166, 166, 6, 0x00a0, 0x10, 0, 4), + PIN_FIELD_BASE(167, 167, 5, 0x0130, 0x10, 28, 4), + PIN_FIELD_BASE(168, 168, 3, 0x0110, 0x10, 12, 4), + PIN_FIELD_BASE(169, 169, 3, 0x0110, 0x10, 8, 4), + PIN_FIELD_BASE(170, 170, 3, 0x0110, 0x10, 16, 4), + PIN_FIELD_BASE(171, 171, 3, 0x0110, 0x10, 20, 4), + PIN_FIELD_BASE(172, 172, 3, 0x0110, 0x10, 24, 4), + PIN_FIELD_BASE(173, 173, 3, 0x0110, 0x10, 28, 4), + PIN_FIELD_BASE(174, 174, 9, 0x0110, 0x10, 16, 4), + PIN_FIELD_BASE(175, 175, 9, 0x0110, 0x10, 12, 4), + PIN_FIELD_BASE(176, 176, 9, 0x0110, 0x10, 20, 4), + PIN_FIELD_BASE(177, 177, 9, 0x0110, 0x10, 24, 4), + PIN_FIELD_BASE(178, 178, 9, 0x0110, 0x10, 28, 4), + PIN_FIELD_BASE(179, 179, 9, 0x0120, 0x10, 0, 4), + PIN_FIELD_BASE(180, 180, 5, 0x0140, 0x10, 16, 4), + PIN_FIELD_BASE(181, 181, 5, 0x0140, 0x10, 20, 4), + PIN_FIELD_BASE(182, 182, 9, 0x0120, 0x10, 8, 4), +}; + +static const struct mtk_pin_field_calc mt8189_pin_rdsel_range[] = { + PIN_FIELD_BASE(0, 0, 7, 0x00d0, 0x10, 0, 2), + PIN_FIELD_BASE(1, 1, 8, 0x00a0, 0x10, 0, 2), + PIN_FIELD_BASE(2, 2, 8, 0x00a0, 0x10, 2, 2), + PIN_FIELD_BASE(3, 3, 8, 0x00a0, 0x10, 4, 2), + PIN_FIELD_BASE(4, 4, 8, 0x00a0, 0x10, 6, 2), + PIN_FIELD_BASE(5, 5, 8, 0x00a0, 0x10, 8, 2), + PIN_FIELD_BASE(6, 6, 7, 0x00d0, 0x10, 2, 2), + PIN_FIELD_BASE(7, 7, 7, 0x00d0, 0x10, 4, 2), + PIN_FIELD_BASE(8, 8, 7, 0x00d0, 0x10, 6, 2), + PIN_FIELD_BASE(9, 9, 7, 0x00d0, 0x10, 8, 2), + PIN_FIELD_BASE(10, 10, 7, 0x00d0, 0x10, 10, 2), + PIN_FIELD_BASE(11, 11, 7, 0x00d0, 0x10, 12, 2), + PIN_FIELD_BASE(12, 12, 2, 0x00c0, 0x10, 6, 2), + PIN_FIELD_BASE(13, 13, 2, 0x00c0, 0x10, 8, 2), + PIN_FIELD_BASE(14, 14, 3, 0x00d0, 0x10, 0, 2), + PIN_FIELD_BASE(15, 15, 3, 0x00d0, 0x10, 2, 2), + PIN_FIELD_BASE(16, 16, 2, 0x00c0, 0x10, 10, 2), + PIN_FIELD_BASE(17, 17, 2, 0x00c0, 0x10, 12, 2), + PIN_FIELD_BASE(18, 18, 7, 0x00d0, 0x10, 30, 2), + PIN_FIELD_BASE(19, 19, 7, 0x00d0, 0x10, 30, 2), + PIN_FIELD_BASE(20, 20, 7, 0x00d0, 0x10, 30, 2), + PIN_FIELD_BASE(21, 21, 7, 0x00d0, 0x10, 30, 2), + PIN_FIELD_BASE(22, 22, 9, 0x00c0, 0x10, 0, 2), + PIN_FIELD_BASE(23, 23, 9, 0x00c0, 0x10, 2, 2), + PIN_FIELD_BASE(24, 24, 9, 0x00c0, 0x10, 4, 2), + PIN_FIELD_BASE(25, 25, 4, 0x00a0, 0x10, 6, 2), + PIN_FIELD_BASE(26, 26, 4, 0x00a0, 0x10, 4, 2), + PIN_FIELD_BASE(27, 27, 2, 0x00c0, 0x10, 2, 2), + PIN_FIELD_BASE(28, 28, 2, 0x00c0, 0x10, 4, 2), + PIN_FIELD_BASE(29, 29, 4, 0x00a0, 0x10, 4, 2), + PIN_FIELD_BASE(30, 30, 2, 0x00c0, 0x10, 0, 2), + PIN_FIELD_BASE(31, 31, 3, 0x00e0, 0x10, 16, 2), + PIN_FIELD_BASE(32, 32, 1, 0x00b0, 0x10, 8, 2), + PIN_FIELD_BASE(33, 33, 3, 0x00e0, 0x10, 20, 2), + PIN_FIELD_BASE(34, 34, 3, 0x00e0, 0x10, 14, 2), + PIN_FIELD_BASE(35, 35, 3, 0x00e0, 0x10, 12, 2), + PIN_FIELD_BASE(36, 36, 3, 0x00e0, 0x10, 16, 2), + PIN_FIELD_BASE(37, 37, 3, 0x00e0, 0x10, 14, 2), + PIN_FIELD_BASE(38, 38, 3, 0x00e0, 0x10, 14, 2), + PIN_FIELD_BASE(39, 39, 3, 0x00e0, 0x10, 16, 2), + PIN_FIELD_BASE(40, 40, 3, 0x00e0, 0x10, 16, 2), + PIN_FIELD_BASE(41, 41, 3, 0x00e0, 0x10, 16, 2), + PIN_FIELD_BASE(42, 42, 3, 0x00e0, 0x10, 16, 2), + PIN_FIELD_BASE(43, 43, 3, 0x00e0, 0x10, 16, 2), + PIN_FIELD_BASE(44, 44, 7, 0x00d0, 0x10, 30, 2), + PIN_FIELD_BASE(45, 45, 7, 0x00d0, 0x10, 30, 2), + PIN_FIELD_BASE(46, 46, 7, 0x00d0, 0x10, 30, 2), + PIN_FIELD_BASE(47, 47, 7, 0x00d0, 0x10, 30, 2), + PIN_FIELD_BASE(48, 48, 4, 0x00a0, 0x10, 4, 2), + PIN_FIELD_BASE(49, 49, 4, 0x00a0, 0x10, 2, 2), + PIN_FIELD_BASE(50, 50, 4, 0x00a0, 0x10, 0, 2), + PIN_FIELD_BASE(51, 51, 8, 0x00a0, 0x10, 10, 2), + PIN_FIELD_BASE(52, 52, 8, 0x00a0, 0x10, 10, 2), + PIN_FIELD_BASE(53, 53, 8, 0x00a0, 0x10, 10, 2), + PIN_FIELD_BASE(54, 54, 8, 0x00a0, 0x10, 10, 2), + PIN_FIELD_BASE(55, 55, 4, 0x00a0, 0x10, 6, 2), + PIN_FIELD_BASE(56, 56, 4, 0x00a0, 0x10, 6, 2), + PIN_FIELD_BASE(57, 57, 2, 0x00c0, 0x10, 12, 2), + PIN_FIELD_BASE(58, 58, 2, 0x00c0, 0x10, 12, 2), + PIN_FIELD_BASE(59, 59, 2, 0x00c0, 0x10, 12, 2), + PIN_FIELD_BASE(60, 60, 2, 0x00c0, 0x10, 12, 2), + PIN_FIELD_BASE(61, 61, 2, 0x00c0, 0x10, 12, 2), + PIN_FIELD_BASE(62, 62, 2, 0x00c0, 0x10, 12, 2), + PIN_FIELD_BASE(63, 63, 2, 0x00c0, 0x10, 12, 2), + PIN_FIELD_BASE(64, 64, 2, 0x00c0, 0x10, 12, 2), + PIN_FIELD_BASE(65, 65, 9, 0x00d0, 0x10, 12, 2), + PIN_FIELD_BASE(66, 66, 9, 0x00d0, 0x10, 12, 2), + PIN_FIELD_BASE(67, 67, 9, 0x00d0, 0x10, 12, 2), + PIN_FIELD_BASE(68, 68, 9, 0x00d0, 0x10, 12, 2), + PIN_FIELD_BASE(69, 69, 2, 0x00c0, 0x10, 16, 2), + PIN_FIELD_BASE(70, 70, 2, 0x00c0, 0x10, 14, 2), + PIN_FIELD_BASE(71, 71, 2, 0x00c0, 0x10, 20, 2), + PIN_FIELD_BASE(72, 72, 2, 0x00c0, 0x10, 18, 2), + PIN_FIELD_BASE(73, 73, 2, 0x00c0, 0x10, 24, 2), + PIN_FIELD_BASE(74, 74, 2, 0x00c0, 0x10, 22, 2), + PIN_FIELD_BASE(75, 75, 3, 0x00e0, 0x10, 18, 2), + PIN_FIELD_BASE(76, 76, 2, 0x00c0, 0x10, 26, 2), + PIN_FIELD_BASE(77, 77, 8, 0x00a0, 0x10, 14, 2), + PIN_FIELD_BASE(78, 78, 8, 0x00a0, 0x10, 12, 2), + PIN_FIELD_BASE(79, 79, 8, 0x00a0, 0x10, 18, 2), + PIN_FIELD_BASE(80, 80, 8, 0x00a0, 0x10, 16, 2), + PIN_FIELD_BASE(81, 81, 2, 0x00c0, 0x10, 12, 2), + PIN_FIELD_BASE(82, 82, 2, 0x00c0, 0x10, 12, 2), + PIN_FIELD_BASE(83, 83, 2, 0x00c0, 0x10, 12, 2), + PIN_FIELD_BASE(84, 84, 7, 0x00d0, 0x10, 30, 2), + PIN_FIELD_BASE(85, 85, 7, 0x00d0, 0x10, 30, 2), + PIN_FIELD_BASE(86, 86, 7, 0x00d0, 0x10, 30, 2), + PIN_FIELD_BASE(87, 87, 7, 0x00d0, 0x10, 30, 2), + PIN_FIELD_BASE(88, 88, 5, 0x00f0, 0x10, 24, 2), + PIN_FIELD_BASE(89, 89, 5, 0x00f0, 0x10, 24, 2), + PIN_FIELD_BASE(90, 90, 5, 0x00f0, 0x10, 24, 2), + PIN_FIELD_BASE(91, 91, 5, 0x00f0, 0x10, 24, 2), + PIN_FIELD_BASE(92, 92, 5, 0x00f0, 0x10, 26, 2), + PIN_FIELD_BASE(93, 93, 5, 0x00f0, 0x10, 26, 2), + PIN_FIELD_BASE(94, 94, 5, 0x00f0, 0x10, 26, 2), + PIN_FIELD_BASE(95, 95, 5, 0x00f0, 0x10, 26, 2), + PIN_FIELD_BASE(96, 96, 5, 0x00f0, 0x10, 28, 2), + PIN_FIELD_BASE(97, 97, 5, 0x00f0, 0x10, 26, 2), + PIN_FIELD_BASE(98, 98, 5, 0x00f0, 0x10, 26, 2), + PIN_FIELD_BASE(99, 99, 5, 0x00f0, 0x10, 26, 2), + PIN_FIELD_BASE(100, 100, 5, 0x00f0, 0x10, 26, 2), + PIN_FIELD_BASE(101, 101, 5, 0x00f0, 0x10, 26, 2), + PIN_FIELD_BASE(102, 102, 5, 0x00f0, 0x10, 26, 2), + PIN_FIELD_BASE(103, 103, 7, 0x00d0, 0x10, 20, 2), + PIN_FIELD_BASE(104, 104, 7, 0x00d0, 0x10, 14, 2), + PIN_FIELD_BASE(105, 105, 7, 0x00d0, 0x10, 18, 2), + PIN_FIELD_BASE(106, 106, 7, 0x00d0, 0x10, 16, 2), + PIN_FIELD_BASE(107, 107, 7, 0x00d0, 0x10, 28, 2), + PIN_FIELD_BASE(108, 108, 7, 0x00d0, 0x10, 22, 2), + PIN_FIELD_BASE(109, 109, 7, 0x00d0, 0x10, 26, 2), + PIN_FIELD_BASE(110, 110, 7, 0x00d0, 0x10, 24, 2), + PIN_FIELD_BASE(111, 111, 7, 0x00d0, 0x10, 30, 2), + PIN_FIELD_BASE(112, 112, 8, 0x00a0, 0x10, 10, 2), + PIN_FIELD_BASE(113, 113, 8, 0x00a0, 0x10, 10, 2), + PIN_FIELD_BASE(114, 114, 8, 0x00a0, 0x10, 10, 2), + PIN_FIELD_BASE(115, 115, 2, 0x00c0, 0x10, 12, 2), + PIN_FIELD_BASE(116, 116, 2, 0x00c0, 0x10, 12, 2), + PIN_FIELD_BASE(117, 117, 2, 0x00c0, 0x10, 12, 2), + PIN_FIELD_BASE(118, 118, 2, 0x00c0, 0x10, 12, 2), + PIN_FIELD_BASE(119, 119, 1, 0x00a0, 0x10, 28, 2), + PIN_FIELD_BASE(120, 120, 1, 0x00a0, 0x10, 26, 2), + PIN_FIELD_BASE(121, 121, 1, 0x00a0, 0x10, 24, 2), + PIN_FIELD_BASE(122, 122, 1, 0x00a0, 0x10, 22, 2), + PIN_FIELD_BASE(123, 123, 1, 0x00a0, 0x10, 14, 2), + PIN_FIELD_BASE(124, 124, 1, 0x00a0, 0x10, 12, 2), + PIN_FIELD_BASE(125, 125, 1, 0x00a0, 0x10, 10, 2), + PIN_FIELD_BASE(126, 126, 1, 0x00a0, 0x10, 8, 2), + PIN_FIELD_BASE(127, 127, 1, 0x00a0, 0x10, 20, 2), + PIN_FIELD_BASE(128, 128, 1, 0x00a0, 0x10, 6, 2), + PIN_FIELD_BASE(129, 129, 1, 0x00a0, 0x10, 16, 2), + PIN_FIELD_BASE(130, 130, 1, 0x00a0, 0x10, 30, 2), + PIN_FIELD_BASE(131, 131, 1, 0x00a0, 0x10, 2, 2), + PIN_FIELD_BASE(132, 132, 1, 0x00a0, 0x10, 4, 2), + PIN_FIELD_BASE(133, 133, 1, 0x00b0, 0x10, 0, 2), + PIN_FIELD_BASE(134, 134, 1, 0x00a0, 0x10, 18, 2), + PIN_FIELD_BASE(135, 135, 1, 0x00a0, 0x10, 0, 2), + PIN_FIELD_BASE(136, 136, 1, 0x00b0, 0x10, 2, 2), + PIN_FIELD_BASE(137, 137, 2, 0x00c0, 0x10, 12, 2), + PIN_FIELD_BASE(138, 138, 2, 0x00c0, 0x10, 12, 2), + PIN_FIELD_BASE(139, 139, 1, 0x00b0, 0x10, 6, 2), + PIN_FIELD_BASE(140, 140, 1, 0x00b0, 0x10, 6, 2), + PIN_FIELD_BASE(141, 141, 1, 0x00b0, 0x10, 6, 2), + PIN_FIELD_BASE(142, 142, 1, 0x00b0, 0x10, 6, 2), + PIN_FIELD_BASE(143, 143, 1, 0x00b0, 0x10, 6, 2), + PIN_FIELD_BASE(144, 144, 1, 0x00b0, 0x10, 6, 2), + PIN_FIELD_BASE(145, 145, 1, 0x00b0, 0x10, 4, 2), + PIN_FIELD_BASE(146, 146, 1, 0x00b0, 0x10, 4, 2), + PIN_FIELD_BASE(147, 147, 1, 0x00b0, 0x10, 4, 2), + PIN_FIELD_BASE(148, 148, 1, 0x00b0, 0x10, 4, 2), + PIN_FIELD_BASE(149, 149, 1, 0x00b0, 0x10, 4, 2), + PIN_FIELD_BASE(150, 150, 3, 0x00e0, 0x10, 16, 2), + PIN_FIELD_BASE(151, 151, 1, 0x00b0, 0x10, 8, 2), + PIN_FIELD_BASE(152, 152, 3, 0x00e0, 0x10, 16, 2), + PIN_FIELD_BASE(153, 153, 3, 0x00e0, 0x10, 16, 2), + PIN_FIELD_BASE(154, 154, 3, 0x00e0, 0x10, 16, 2), + PIN_FIELD_BASE(155, 155, 3, 0x00e0, 0x10, 16, 2), + PIN_FIELD_BASE(156, 156, 5, 0x00f0, 0x10, 6, 6), + PIN_FIELD_BASE(157, 157, 5, 0x00f0, 0x10, 0, 6), + PIN_FIELD_BASE(158, 158, 5, 0x00e0, 0x10, 24, 6), + PIN_FIELD_BASE(159, 159, 6, 0x0080, 0x10, 12, 6), + PIN_FIELD_BASE(160, 160, 5, 0x00f0, 0x10, 18, 6), + PIN_FIELD_BASE(161, 161, 5, 0x00e0, 0x10, 6, 6), + PIN_FIELD_BASE(162, 162, 5, 0x00e0, 0x10, 0, 6), + PIN_FIELD_BASE(163, 163, 6, 0x0080, 0x10, 6, 6), + PIN_FIELD_BASE(164, 164, 5, 0x00e0, 0x10, 18, 6), + PIN_FIELD_BASE(165, 165, 5, 0x00e0, 0x10, 12, 6), + PIN_FIELD_BASE(166, 166, 6, 0x0080, 0x10, 0, 6), + PIN_FIELD_BASE(167, 167, 5, 0x00f0, 0x10, 12, 6), + PIN_FIELD_BASE(168, 168, 3, 0x00d0, 0x10, 10, 6), + PIN_FIELD_BASE(169, 169, 3, 0x00d0, 0x10, 4, 6), + PIN_FIELD_BASE(170, 170, 3, 0x00d0, 0x10, 16, 6), + PIN_FIELD_BASE(171, 171, 3, 0x00d0, 0x10, 22, 6), + PIN_FIELD_BASE(172, 172, 3, 0x00e0, 0x10, 0, 6), + PIN_FIELD_BASE(173, 173, 3, 0x00e0, 0x10, 6, 6), + PIN_FIELD_BASE(174, 174, 9, 0x00c0, 0x10, 12, 6), + PIN_FIELD_BASE(175, 175, 9, 0x00c0, 0x10, 6, 6), + PIN_FIELD_BASE(176, 176, 9, 0x00c0, 0x10, 18, 6), + PIN_FIELD_BASE(177, 177, 9, 0x00c0, 0x10, 24, 6), + PIN_FIELD_BASE(178, 178, 9, 0x00d0, 0x10, 0, 6), + PIN_FIELD_BASE(179, 179, 9, 0x00d0, 0x10, 6, 6), + PIN_FIELD_BASE(180, 180, 5, 0x00f0, 0x10, 30, 2), + PIN_FIELD_BASE(181, 181, 5, 0x0100, 0x10, 0, 2), + PIN_FIELD_BASE(182, 182, 9, 0x00d0, 0x10, 14, 2), +}; + +static const struct mtk_pin_field_calc mt8189_pin_pupd_range[] = { + PIN_FIELD_BASE(44, 44, 7, 0x0090, 0x10, 0, 1), + PIN_FIELD_BASE(45, 45, 7, 0x0090, 0x10, 1, 1), + PIN_FIELD_BASE(46, 46, 7, 0x0090, 0x10, 2, 1), + PIN_FIELD_BASE(47, 47, 7, 0x0090, 0x10, 3, 1), + PIN_FIELD_BASE(156, 156, 5, 0x00a0, 0x10, 6, 1), + PIN_FIELD_BASE(157, 157, 5, 0x00a0, 0x10, 5, 1), + PIN_FIELD_BASE(158, 158, 5, 0x00a0, 0x10, 4, 1), + PIN_FIELD_BASE(159, 159, 6, 0x0050, 0x10, 2, 1), + PIN_FIELD_BASE(160, 160, 5, 0x00a0, 0x10, 8, 1), + PIN_FIELD_BASE(161, 161, 5, 0x00a0, 0x10, 1, 1), + PIN_FIELD_BASE(162, 162, 5, 0x00a0, 0x10, 0, 1), + PIN_FIELD_BASE(163, 163, 6, 0x0050, 0x10, 1, 1), + PIN_FIELD_BASE(164, 164, 5, 0x00a0, 0x10, 3, 1), + PIN_FIELD_BASE(165, 165, 5, 0x00a0, 0x10, 2, 1), + PIN_FIELD_BASE(166, 166, 6, 0x0050, 0x10, 0, 1), + PIN_FIELD_BASE(167, 167, 5, 0x00a0, 0x10, 7, 1), + PIN_FIELD_BASE(168, 168, 3, 0x0090, 0x10, 1, 1), + PIN_FIELD_BASE(169, 169, 3, 0x0090, 0x10, 0, 1), + PIN_FIELD_BASE(170, 170, 3, 0x0090, 0x10, 2, 1), + PIN_FIELD_BASE(171, 171, 3, 0x0090, 0x10, 3, 1), + PIN_FIELD_BASE(172, 172, 3, 0x0090, 0x10, 4, 1), + PIN_FIELD_BASE(173, 173, 3, 0x0090, 0x10, 5, 1), + PIN_FIELD_BASE(174, 174, 9, 0x0080, 0x10, 1, 1), + PIN_FIELD_BASE(175, 175, 9, 0x0080, 0x10, 0, 1), + PIN_FIELD_BASE(176, 176, 9, 0x0080, 0x10, 2, 1), + PIN_FIELD_BASE(177, 177, 9, 0x0080, 0x10, 3, 1), + PIN_FIELD_BASE(178, 178, 9, 0x0080, 0x10, 4, 1), + PIN_FIELD_BASE(179, 179, 9, 0x0080, 0x10, 5, 1), +}; + +static const struct mtk_pin_field_calc mt8189_pin_r0_range[] = { + PIN_FIELD_BASE(44, 44, 7, 0x00b0, 0x10, 0, 1), + PIN_FIELD_BASE(45, 45, 7, 0x00b0, 0x10, 1, 1), + PIN_FIELD_BASE(46, 46, 7, 0x00b0, 0x10, 2, 1), + PIN_FIELD_BASE(47, 47, 7, 0x00b0, 0x10, 3, 1), + PIN_FIELD_BASE(156, 156, 5, 0x00c0, 0x10, 6, 1), + PIN_FIELD_BASE(157, 157, 5, 0x00c0, 0x10, 5, 1), + PIN_FIELD_BASE(158, 158, 5, 0x00c0, 0x10, 4, 1), + PIN_FIELD_BASE(159, 159, 6, 0x0060, 0x10, 2, 1), + PIN_FIELD_BASE(160, 160, 5, 0x00c0, 0x10, 8, 1), + PIN_FIELD_BASE(161, 161, 5, 0x00c0, 0x10, 1, 1), + PIN_FIELD_BASE(162, 162, 5, 0x00c0, 0x10, 0, 1), + PIN_FIELD_BASE(163, 163, 6, 0x0060, 0x10, 1, 1), + PIN_FIELD_BASE(164, 164, 5, 0x00c0, 0x10, 3, 1), + PIN_FIELD_BASE(165, 165, 5, 0x00c0, 0x10, 2, 1), + PIN_FIELD_BASE(166, 166, 6, 0x0060, 0x10, 0, 1), + PIN_FIELD_BASE(167, 167, 5, 0x00c0, 0x10, 7, 1), + PIN_FIELD_BASE(168, 168, 3, 0x00b0, 0x10, 1, 1), + PIN_FIELD_BASE(169, 169, 3, 0x00b0, 0x10, 0, 1), + PIN_FIELD_BASE(170, 170, 3, 0x00b0, 0x10, 2, 1), + PIN_FIELD_BASE(171, 171, 3, 0x00b0, 0x10, 3, 1), + PIN_FIELD_BASE(172, 172, 3, 0x00b0, 0x10, 4, 1), + PIN_FIELD_BASE(173, 173, 3, 0x00b0, 0x10, 5, 1), + PIN_FIELD_BASE(174, 174, 9, 0x00a0, 0x10, 1, 1), + PIN_FIELD_BASE(175, 175, 9, 0x00a0, 0x10, 0, 1), + PIN_FIELD_BASE(176, 176, 9, 0x00a0, 0x10, 2, 1), + PIN_FIELD_BASE(177, 177, 9, 0x00a0, 0x10, 3, 1), + PIN_FIELD_BASE(178, 178, 9, 0x00a0, 0x10, 4, 1), + PIN_FIELD_BASE(179, 179, 9, 0x00a0, 0x10, 5, 1), +}; + +static const struct mtk_pin_field_calc mt8189_pin_r1_range[] = { + PIN_FIELD_BASE(44, 44, 7, 0x00c0, 0x10, 0, 1), + PIN_FIELD_BASE(45, 45, 7, 0x00c0, 0x10, 1, 1), + PIN_FIELD_BASE(46, 46, 7, 0x00c0, 0x10, 2, 1), + PIN_FIELD_BASE(47, 47, 7, 0x00c0, 0x10, 3, 1), + PIN_FIELD_BASE(156, 156, 5, 0x00d0, 0x10, 6, 1), + PIN_FIELD_BASE(157, 157, 5, 0x00d0, 0x10, 5, 1), + PIN_FIELD_BASE(158, 158, 5, 0x00d0, 0x10, 4, 1), + PIN_FIELD_BASE(159, 159, 6, 0x0070, 0x10, 2, 1), + PIN_FIELD_BASE(160, 160, 5, 0x00d0, 0x10, 8, 1), + PIN_FIELD_BASE(161, 161, 5, 0x00d0, 0x10, 1, 1), + PIN_FIELD_BASE(162, 162, 5, 0x00d0, 0x10, 0, 1), + PIN_FIELD_BASE(163, 163, 6, 0x0070, 0x10, 1, 1), + PIN_FIELD_BASE(164, 164, 5, 0x00d0, 0x10, 3, 1), + PIN_FIELD_BASE(165, 165, 5, 0x00d0, 0x10, 2, 1), + PIN_FIELD_BASE(166, 166, 6, 0x0070, 0x10, 0, 1), + PIN_FIELD_BASE(167, 167, 5, 0x00d0, 0x10, 7, 1), + PIN_FIELD_BASE(168, 168, 3, 0x00c0, 0x10, 1, 1), + PIN_FIELD_BASE(169, 169, 3, 0x00c0, 0x10, 0, 1), + PIN_FIELD_BASE(170, 170, 3, 0x00c0, 0x10, 2, 1), + PIN_FIELD_BASE(171, 171, 3, 0x00c0, 0x10, 3, 1), + PIN_FIELD_BASE(172, 172, 3, 0x00c0, 0x10, 4, 1), + PIN_FIELD_BASE(173, 173, 3, 0x00c0, 0x10, 5, 1), + PIN_FIELD_BASE(174, 174, 9, 0x00b0, 0x10, 1, 1), + PIN_FIELD_BASE(175, 175, 9, 0x00b0, 0x10, 0, 1), + PIN_FIELD_BASE(176, 176, 9, 0x00b0, 0x10, 2, 1), + PIN_FIELD_BASE(177, 177, 9, 0x00b0, 0x10, 3, 1), + PIN_FIELD_BASE(178, 178, 9, 0x00b0, 0x10, 4, 1), + PIN_FIELD_BASE(179, 179, 9, 0x00b0, 0x10, 5, 1), +}; + +static const struct mtk_pin_field_calc mt8189_pin_pu_range[] = { + PIN_FIELD_BASE(0, 0, 7, 0x00a0, 0x10, 5, 1), + PIN_FIELD_BASE(1, 1, 8, 0x0090, 0x10, 3, 1), + PIN_FIELD_BASE(2, 2, 8, 0x0090, 0x10, 4, 1), + PIN_FIELD_BASE(3, 3, 8, 0x0090, 0x10, 5, 1), + PIN_FIELD_BASE(4, 4, 8, 0x0090, 0x10, 6, 1), + PIN_FIELD_BASE(5, 5, 8, 0x0090, 0x10, 7, 1), + PIN_FIELD_BASE(6, 6, 7, 0x00a0, 0x10, 6, 1), + PIN_FIELD_BASE(7, 7, 7, 0x00a0, 0x10, 7, 1), + PIN_FIELD_BASE(8, 8, 7, 0x00a0, 0x10, 8, 1), + PIN_FIELD_BASE(9, 9, 7, 0x00a0, 0x10, 9, 1), + PIN_FIELD_BASE(10, 10, 7, 0x00a0, 0x10, 10, 1), + PIN_FIELD_BASE(11, 11, 7, 0x00a0, 0x10, 11, 1), + PIN_FIELD_BASE(12, 12, 2, 0x00b0, 0x10, 5, 1), + PIN_FIELD_BASE(13, 13, 2, 0x00b0, 0x10, 6, 1), + PIN_FIELD_BASE(14, 14, 3, 0x00a0, 0x10, 0, 1), + PIN_FIELD_BASE(15, 15, 3, 0x00a0, 0x10, 1, 1), + PIN_FIELD_BASE(16, 16, 2, 0x00b0, 0x10, 7, 1), + PIN_FIELD_BASE(17, 17, 2, 0x00b0, 0x10, 8, 1), + PIN_FIELD_BASE(18, 18, 7, 0x00a0, 0x10, 0, 1), + PIN_FIELD_BASE(19, 19, 7, 0x00a0, 0x10, 2, 1), + PIN_FIELD_BASE(20, 20, 7, 0x00a0, 0x10, 1, 1), + PIN_FIELD_BASE(21, 21, 7, 0x00a0, 0x10, 3, 1), + PIN_FIELD_BASE(22, 22, 9, 0x0090, 0x10, 0, 1), + PIN_FIELD_BASE(23, 23, 9, 0x0090, 0x10, 1, 1), + PIN_FIELD_BASE(24, 24, 9, 0x0090, 0x10, 2, 1), + PIN_FIELD_BASE(25, 25, 4, 0x0090, 0x10, 2, 1), + PIN_FIELD_BASE(26, 26, 4, 0x0090, 0x10, 1, 1), + PIN_FIELD_BASE(27, 27, 2, 0x00b0, 0x10, 1, 1), + PIN_FIELD_BASE(28, 28, 2, 0x00b0, 0x10, 2, 1), + PIN_FIELD_BASE(29, 29, 4, 0x0090, 0x10, 0, 1), + PIN_FIELD_BASE(30, 30, 2, 0x00b0, 0x10, 0, 1), + PIN_FIELD_BASE(31, 31, 3, 0x00a0, 0x10, 13, 1), + PIN_FIELD_BASE(32, 32, 1, 0x0090, 0x10, 30, 1), + PIN_FIELD_BASE(33, 33, 3, 0x00a0, 0x10, 15, 1), + PIN_FIELD_BASE(34, 34, 3, 0x00a0, 0x10, 14, 1), + PIN_FIELD_BASE(35, 35, 3, 0x00a0, 0x10, 17, 1), + PIN_FIELD_BASE(36, 36, 3, 0x00a0, 0x10, 16, 1), + PIN_FIELD_BASE(37, 37, 3, 0x00a0, 0x10, 19, 1), + PIN_FIELD_BASE(38, 38, 3, 0x00a0, 0x10, 18, 1), + PIN_FIELD_BASE(39, 39, 3, 0x00a0, 0x10, 5, 1), + PIN_FIELD_BASE(40, 40, 3, 0x00a0, 0x10, 2, 1), + PIN_FIELD_BASE(41, 41, 3, 0x00a0, 0x10, 3, 1), + PIN_FIELD_BASE(42, 42, 3, 0x00a0, 0x10, 4, 1), + PIN_FIELD_BASE(43, 43, 3, 0x00a0, 0x10, 6, 1), + PIN_FIELD_BASE(48, 48, 4, 0x0090, 0x10, 5, 1), + PIN_FIELD_BASE(49, 49, 4, 0x0090, 0x10, 4, 1), + PIN_FIELD_BASE(50, 50, 4, 0x0090, 0x10, 3, 1), + PIN_FIELD_BASE(51, 51, 8, 0x0090, 0x10, 8, 1), + PIN_FIELD_BASE(52, 52, 8, 0x0090, 0x10, 10, 1), + PIN_FIELD_BASE(53, 53, 8, 0x0090, 0x10, 9, 1), + PIN_FIELD_BASE(54, 54, 8, 0x0090, 0x10, 11, 1), + PIN_FIELD_BASE(55, 55, 4, 0x0090, 0x10, 6, 1), + PIN_FIELD_BASE(56, 56, 4, 0x0090, 0x10, 7, 1), + PIN_FIELD_BASE(57, 57, 2, 0x00b0, 0x10, 13, 1), + PIN_FIELD_BASE(58, 58, 2, 0x00b0, 0x10, 17, 1), + PIN_FIELD_BASE(59, 59, 2, 0x00b0, 0x10, 14, 1), + PIN_FIELD_BASE(60, 60, 2, 0x00b0, 0x10, 18, 1), + PIN_FIELD_BASE(61, 61, 2, 0x00b0, 0x10, 15, 1), + PIN_FIELD_BASE(62, 62, 2, 0x00b0, 0x10, 19, 1), + PIN_FIELD_BASE(63, 63, 2, 0x00b0, 0x10, 16, 1), + PIN_FIELD_BASE(64, 64, 2, 0x00b0, 0x10, 20, 1), + PIN_FIELD_BASE(65, 65, 9, 0x0090, 0x10, 4, 1), + PIN_FIELD_BASE(66, 66, 9, 0x0090, 0x10, 6, 1), + PIN_FIELD_BASE(67, 67, 9, 0x0090, 0x10, 5, 1), + PIN_FIELD_BASE(68, 68, 9, 0x0090, 0x10, 7, 1), + PIN_FIELD_BASE(69, 69, 2, 0x00b0, 0x10, 22, 1), + PIN_FIELD_BASE(70, 70, 2, 0x00b0, 0x10, 21, 1), + PIN_FIELD_BASE(71, 71, 2, 0x00b0, 0x10, 24, 1), + PIN_FIELD_BASE(72, 72, 2, 0x00b0, 0x10, 23, 1), + PIN_FIELD_BASE(73, 73, 2, 0x00b0, 0x10, 26, 1), + PIN_FIELD_BASE(74, 74, 2, 0x00b0, 0x10, 25, 1), + PIN_FIELD_BASE(75, 75, 3, 0x00a0, 0x10, 7, 1), + PIN_FIELD_BASE(76, 76, 2, 0x00b0, 0x10, 27, 1), + PIN_FIELD_BASE(77, 77, 8, 0x0090, 0x10, 13, 1), + PIN_FIELD_BASE(78, 78, 8, 0x0090, 0x10, 12, 1), + PIN_FIELD_BASE(79, 79, 8, 0x0090, 0x10, 15, 1), + PIN_FIELD_BASE(80, 80, 8, 0x0090, 0x10, 14, 1), + PIN_FIELD_BASE(81, 81, 2, 0x00b0, 0x10, 29, 1), + PIN_FIELD_BASE(82, 82, 2, 0x00b0, 0x10, 28, 1), + PIN_FIELD_BASE(83, 83, 2, 0x00b0, 0x10, 30, 1), + PIN_FIELD_BASE(84, 84, 7, 0x00a0, 0x10, 22, 1), + PIN_FIELD_BASE(85, 85, 7, 0x00a0, 0x10, 23, 1), + PIN_FIELD_BASE(86, 86, 7, 0x00a0, 0x10, 24, 1), + PIN_FIELD_BASE(87, 87, 7, 0x00a0, 0x10, 25, 1), + PIN_FIELD_BASE(88, 88, 5, 0x00b0, 0x10, 11, 1), + PIN_FIELD_BASE(89, 89, 5, 0x00b0, 0x10, 10, 1), + PIN_FIELD_BASE(90, 90, 5, 0x00b0, 0x10, 13, 1), + PIN_FIELD_BASE(91, 91, 5, 0x00b0, 0x10, 12, 1), + PIN_FIELD_BASE(92, 92, 5, 0x00b0, 0x10, 7, 1), + PIN_FIELD_BASE(93, 93, 5, 0x00b0, 0x10, 8, 1), + PIN_FIELD_BASE(94, 94, 5, 0x00b0, 0x10, 14, 1), + PIN_FIELD_BASE(95, 95, 5, 0x00b0, 0x10, 6, 1), + PIN_FIELD_BASE(96, 96, 5, 0x00b0, 0x10, 9, 1), + PIN_FIELD_BASE(97, 97, 5, 0x00b0, 0x10, 0, 1), + PIN_FIELD_BASE(98, 98, 5, 0x00b0, 0x10, 5, 1), + PIN_FIELD_BASE(99, 99, 5, 0x00b0, 0x10, 3, 1), + PIN_FIELD_BASE(100, 100, 5, 0x00b0, 0x10, 4, 1), + PIN_FIELD_BASE(101, 101, 5, 0x00b0, 0x10, 1, 1), + PIN_FIELD_BASE(102, 102, 5, 0x00b0, 0x10, 2, 1), + PIN_FIELD_BASE(103, 103, 7, 0x00a0, 0x10, 15, 1), + PIN_FIELD_BASE(104, 104, 7, 0x00a0, 0x10, 12, 1), + PIN_FIELD_BASE(105, 105, 7, 0x00a0, 0x10, 14, 1), + PIN_FIELD_BASE(106, 106, 7, 0x00a0, 0x10, 13, 1), + PIN_FIELD_BASE(107, 107, 7, 0x00a0, 0x10, 19, 1), + PIN_FIELD_BASE(108, 108, 7, 0x00a0, 0x10, 16, 1), + PIN_FIELD_BASE(109, 109, 7, 0x00a0, 0x10, 18, 1), + PIN_FIELD_BASE(110, 110, 7, 0x00a0, 0x10, 17, 1), + PIN_FIELD_BASE(111, 111, 7, 0x00a0, 0x10, 4, 1), + PIN_FIELD_BASE(112, 112, 8, 0x0090, 0x10, 0, 1), + PIN_FIELD_BASE(113, 113, 8, 0x0090, 0x10, 1, 1), + PIN_FIELD_BASE(114, 114, 8, 0x0090, 0x10, 2, 1), + PIN_FIELD_BASE(115, 115, 2, 0x00b0, 0x10, 9, 1), + PIN_FIELD_BASE(116, 116, 2, 0x00b0, 0x10, 12, 1), + PIN_FIELD_BASE(117, 117, 2, 0x00b0, 0x10, 10, 1), + PIN_FIELD_BASE(118, 118, 2, 0x00b0, 0x10, 11, 1), + PIN_FIELD_BASE(119, 119, 1, 0x0090, 0x10, 26, 1), + PIN_FIELD_BASE(120, 120, 1, 0x0090, 0x10, 25, 1), + PIN_FIELD_BASE(121, 121, 1, 0x0090, 0x10, 24, 1), + PIN_FIELD_BASE(122, 122, 1, 0x0090, 0x10, 23, 1), + PIN_FIELD_BASE(123, 123, 1, 0x0090, 0x10, 19, 1), + PIN_FIELD_BASE(124, 124, 1, 0x0090, 0x10, 18, 1), + PIN_FIELD_BASE(125, 125, 1, 0x0090, 0x10, 17, 1), + PIN_FIELD_BASE(126, 126, 1, 0x0090, 0x10, 16, 1), + PIN_FIELD_BASE(127, 127, 1, 0x0090, 0x10, 22, 1), + PIN_FIELD_BASE(128, 128, 1, 0x0090, 0x10, 15, 1), + PIN_FIELD_BASE(129, 129, 1, 0x0090, 0x10, 20, 1), + PIN_FIELD_BASE(130, 130, 1, 0x0090, 0x10, 27, 1), + PIN_FIELD_BASE(131, 131, 1, 0x0090, 0x10, 13, 1), + PIN_FIELD_BASE(132, 132, 1, 0x0090, 0x10, 14, 1), + PIN_FIELD_BASE(133, 133, 1, 0x0090, 0x10, 28, 1), + PIN_FIELD_BASE(134, 134, 1, 0x0090, 0x10, 21, 1), + PIN_FIELD_BASE(135, 135, 1, 0x0090, 0x10, 11, 1), + PIN_FIELD_BASE(136, 136, 1, 0x0090, 0x10, 12, 1), + PIN_FIELD_BASE(137, 137, 2, 0x00b0, 0x10, 3, 1), + PIN_FIELD_BASE(138, 138, 2, 0x00b0, 0x10, 4, 1), + PIN_FIELD_BASE(139, 139, 1, 0x0090, 0x10, 3, 1), + PIN_FIELD_BASE(140, 140, 1, 0x0090, 0x10, 4, 1), + PIN_FIELD_BASE(141, 141, 1, 0x0090, 0x10, 0, 1), + PIN_FIELD_BASE(142, 142, 1, 0x0090, 0x10, 1, 1), + PIN_FIELD_BASE(143, 143, 1, 0x0090, 0x10, 2, 1), + PIN_FIELD_BASE(144, 144, 1, 0x0090, 0x10, 5, 1), + PIN_FIELD_BASE(145, 145, 1, 0x0090, 0x10, 6, 1), + PIN_FIELD_BASE(146, 146, 1, 0x0090, 0x10, 7, 1), + PIN_FIELD_BASE(147, 147, 1, 0x0090, 0x10, 8, 1), + PIN_FIELD_BASE(148, 148, 1, 0x0090, 0x10, 9, 1), + PIN_FIELD_BASE(149, 149, 1, 0x0090, 0x10, 10, 1), + PIN_FIELD_BASE(150, 150, 3, 0x00a0, 0x10, 8, 1), + PIN_FIELD_BASE(151, 151, 1, 0x0090, 0x10, 29, 1), + PIN_FIELD_BASE(152, 152, 3, 0x00a0, 0x10, 9, 1), + PIN_FIELD_BASE(153, 153, 3, 0x00a0, 0x10, 10, 1), + PIN_FIELD_BASE(154, 154, 3, 0x00a0, 0x10, 11, 1), + PIN_FIELD_BASE(155, 155, 3, 0x00a0, 0x10, 12, 1), + PIN_FIELD_BASE(180, 180, 5, 0x00b0, 0x10, 15, 1), + PIN_FIELD_BASE(181, 181, 5, 0x00b0, 0x10, 16, 1), + PIN_FIELD_BASE(182, 182, 9, 0x0090, 0x10, 3, 1), +}; + +static const struct mtk_pin_field_calc mt8189_pin_pd_range[] = { + PIN_FIELD_BASE(0, 0, 7, 0x0080, 0x10, 5, 1), + PIN_FIELD_BASE(1, 1, 8, 0x0080, 0x10, 3, 1), + PIN_FIELD_BASE(2, 2, 8, 0x0080, 0x10, 4, 1), + PIN_FIELD_BASE(3, 3, 8, 0x0080, 0x10, 5, 1), + PIN_FIELD_BASE(4, 4, 8, 0x0080, 0x10, 6, 1), + PIN_FIELD_BASE(5, 5, 8, 0x0080, 0x10, 7, 1), + PIN_FIELD_BASE(6, 6, 7, 0x0080, 0x10, 6, 1), + PIN_FIELD_BASE(7, 7, 7, 0x0080, 0x10, 7, 1), + PIN_FIELD_BASE(8, 8, 7, 0x0080, 0x10, 8, 1), + PIN_FIELD_BASE(9, 9, 7, 0x0080, 0x10, 9, 1), + PIN_FIELD_BASE(10, 10, 7, 0x0080, 0x10, 10, 1), + PIN_FIELD_BASE(11, 11, 7, 0x0080, 0x10, 11, 1), + PIN_FIELD_BASE(12, 12, 2, 0x00a0, 0x10, 5, 1), + PIN_FIELD_BASE(13, 13, 2, 0x00a0, 0x10, 6, 1), + PIN_FIELD_BASE(14, 14, 3, 0x0080, 0x10, 0, 1), + PIN_FIELD_BASE(15, 15, 3, 0x0080, 0x10, 1, 1), + PIN_FIELD_BASE(16, 16, 2, 0x00a0, 0x10, 7, 1), + PIN_FIELD_BASE(17, 17, 2, 0x00a0, 0x10, 8, 1), + PIN_FIELD_BASE(18, 18, 7, 0x0080, 0x10, 0, 1), + PIN_FIELD_BASE(19, 19, 7, 0x0080, 0x10, 2, 1), + PIN_FIELD_BASE(20, 20, 7, 0x0080, 0x10, 1, 1), + PIN_FIELD_BASE(21, 21, 7, 0x0080, 0x10, 3, 1), + PIN_FIELD_BASE(22, 22, 9, 0x0070, 0x10, 0, 1), + PIN_FIELD_BASE(23, 23, 9, 0x0070, 0x10, 1, 1), + PIN_FIELD_BASE(24, 24, 9, 0x0070, 0x10, 2, 1), + PIN_FIELD_BASE(25, 25, 4, 0x0080, 0x10, 2, 1), + PIN_FIELD_BASE(26, 26, 4, 0x0080, 0x10, 1, 1), + PIN_FIELD_BASE(27, 27, 2, 0x00a0, 0x10, 1, 1), + PIN_FIELD_BASE(28, 28, 2, 0x00a0, 0x10, 2, 1), + PIN_FIELD_BASE(29, 29, 4, 0x0080, 0x10, 0, 1), + PIN_FIELD_BASE(30, 30, 2, 0x00a0, 0x10, 0, 1), + PIN_FIELD_BASE(31, 31, 3, 0x0080, 0x10, 13, 1), + PIN_FIELD_BASE(32, 32, 1, 0x0080, 0x10, 30, 1), + PIN_FIELD_BASE(33, 33, 3, 0x0080, 0x10, 15, 1), + PIN_FIELD_BASE(34, 34, 3, 0x0080, 0x10, 14, 1), + PIN_FIELD_BASE(35, 35, 3, 0x0080, 0x10, 17, 1), + PIN_FIELD_BASE(36, 36, 3, 0x0080, 0x10, 16, 1), + PIN_FIELD_BASE(37, 37, 3, 0x0080, 0x10, 19, 1), + PIN_FIELD_BASE(38, 38, 3, 0x0080, 0x10, 18, 1), + PIN_FIELD_BASE(39, 39, 3, 0x0080, 0x10, 5, 1), + PIN_FIELD_BASE(40, 40, 3, 0x0080, 0x10, 2, 1), + PIN_FIELD_BASE(41, 41, 3, 0x0080, 0x10, 3, 1), + PIN_FIELD_BASE(42, 42, 3, 0x0080, 0x10, 4, 1), + PIN_FIELD_BASE(43, 43, 3, 0x0080, 0x10, 6, 1), + PIN_FIELD_BASE(48, 48, 4, 0x0080, 0x10, 5, 1), + PIN_FIELD_BASE(49, 49, 4, 0x0080, 0x10, 4, 1), + PIN_FIELD_BASE(50, 50, 4, 0x0080, 0x10, 3, 1), + PIN_FIELD_BASE(51, 51, 8, 0x0080, 0x10, 8, 1), + PIN_FIELD_BASE(52, 52, 8, 0x0080, 0x10, 10, 1), + PIN_FIELD_BASE(53, 53, 8, 0x0080, 0x10, 9, 1), + PIN_FIELD_BASE(54, 54, 8, 0x0080, 0x10, 11, 1), + PIN_FIELD_BASE(55, 55, 4, 0x0080, 0x10, 6, 1), + PIN_FIELD_BASE(56, 56, 4, 0x0080, 0x10, 7, 1), + PIN_FIELD_BASE(57, 57, 2, 0x00a0, 0x10, 13, 1), + PIN_FIELD_BASE(58, 58, 2, 0x00a0, 0x10, 17, 1), + PIN_FIELD_BASE(59, 59, 2, 0x00a0, 0x10, 14, 1), + PIN_FIELD_BASE(60, 60, 2, 0x00a0, 0x10, 18, 1), + PIN_FIELD_BASE(61, 61, 2, 0x00a0, 0x10, 15, 1), + PIN_FIELD_BASE(62, 62, 2, 0x00a0, 0x10, 19, 1), + PIN_FIELD_BASE(63, 63, 2, 0x00a0, 0x10, 16, 1), + PIN_FIELD_BASE(64, 64, 2, 0x00a0, 0x10, 20, 1), + PIN_FIELD_BASE(65, 65, 9, 0x0070, 0x10, 4, 1), + PIN_FIELD_BASE(66, 66, 9, 0x0070, 0x10, 6, 1), + PIN_FIELD_BASE(67, 67, 9, 0x0070, 0x10, 5, 1), + PIN_FIELD_BASE(68, 68, 9, 0x0070, 0x10, 7, 1), + PIN_FIELD_BASE(69, 69, 2, 0x00a0, 0x10, 22, 1), + PIN_FIELD_BASE(70, 70, 2, 0x00a0, 0x10, 21, 1), + PIN_FIELD_BASE(71, 71, 2, 0x00a0, 0x10, 24, 1), + PIN_FIELD_BASE(72, 72, 2, 0x00a0, 0x10, 23, 1), + PIN_FIELD_BASE(73, 73, 2, 0x00a0, 0x10, 26, 1), + PIN_FIELD_BASE(74, 74, 2, 0x00a0, 0x10, 25, 1), + PIN_FIELD_BASE(75, 75, 3, 0x0080, 0x10, 7, 1), + PIN_FIELD_BASE(76, 76, 2, 0x00a0, 0x10, 27, 1), + PIN_FIELD_BASE(77, 77, 8, 0x0080, 0x10, 13, 1), + PIN_FIELD_BASE(78, 78, 8, 0x0080, 0x10, 12, 1), + PIN_FIELD_BASE(79, 79, 8, 0x0080, 0x10, 15, 1), + PIN_FIELD_BASE(80, 80, 8, 0x0080, 0x10, 14, 1), + PIN_FIELD_BASE(81, 81, 2, 0x00a0, 0x10, 29, 1), + PIN_FIELD_BASE(82, 82, 2, 0x00a0, 0x10, 28, 1), + PIN_FIELD_BASE(83, 83, 2, 0x00a0, 0x10, 30, 1), + PIN_FIELD_BASE(84, 84, 7, 0x0080, 0x10, 22, 1), + PIN_FIELD_BASE(85, 85, 7, 0x0080, 0x10, 23, 1), + PIN_FIELD_BASE(86, 86, 7, 0x0080, 0x10, 24, 1), + PIN_FIELD_BASE(87, 87, 7, 0x0080, 0x10, 25, 1), + PIN_FIELD_BASE(88, 88, 5, 0x0090, 0x10, 11, 1), + PIN_FIELD_BASE(89, 89, 5, 0x0090, 0x10, 10, 1), + PIN_FIELD_BASE(90, 90, 5, 0x0090, 0x10, 13, 1), + PIN_FIELD_BASE(91, 91, 5, 0x0090, 0x10, 12, 1), + PIN_FIELD_BASE(92, 92, 5, 0x0090, 0x10, 7, 1), + PIN_FIELD_BASE(93, 93, 5, 0x0090, 0x10, 8, 1), + PIN_FIELD_BASE(94, 94, 5, 0x0090, 0x10, 14, 1), + PIN_FIELD_BASE(95, 95, 5, 0x0090, 0x10, 6, 1), + PIN_FIELD_BASE(96, 96, 5, 0x0090, 0x10, 9, 1), + PIN_FIELD_BASE(97, 97, 5, 0x0090, 0x10, 0, 1), + PIN_FIELD_BASE(98, 98, 5, 0x0090, 0x10, 5, 1), + PIN_FIELD_BASE(99, 99, 5, 0x0090, 0x10, 3, 1), + PIN_FIELD_BASE(100, 100, 5, 0x0090, 0x10, 4, 1), + PIN_FIELD_BASE(101, 101, 5, 0x0090, 0x10, 1, 1), + PIN_FIELD_BASE(102, 102, 5, 0x0090, 0x10, 2, 1), + PIN_FIELD_BASE(103, 103, 7, 0x0080, 0x10, 15, 1), + PIN_FIELD_BASE(104, 104, 7, 0x0080, 0x10, 12, 1), + PIN_FIELD_BASE(105, 105, 7, 0x0080, 0x10, 14, 1), + PIN_FIELD_BASE(106, 106, 7, 0x0080, 0x10, 13, 1), + PIN_FIELD_BASE(107, 107, 7, 0x0080, 0x10, 19, 1), + PIN_FIELD_BASE(108, 108, 7, 0x0080, 0x10, 16, 1), + PIN_FIELD_BASE(109, 109, 7, 0x0080, 0x10, 18, 1), + PIN_FIELD_BASE(110, 110, 7, 0x0080, 0x10, 17, 1), + PIN_FIELD_BASE(111, 111, 7, 0x0080, 0x10, 4, 1), + PIN_FIELD_BASE(112, 112, 8, 0x0080, 0x10, 0, 1), + PIN_FIELD_BASE(113, 113, 8, 0x0080, 0x10, 1, 1), + PIN_FIELD_BASE(114, 114, 8, 0x0080, 0x10, 2, 1), + PIN_FIELD_BASE(115, 115, 2, 0x00a0, 0x10, 9, 1), + PIN_FIELD_BASE(116, 116, 2, 0x00a0, 0x10, 12, 1), + PIN_FIELD_BASE(117, 117, 2, 0x00a0, 0x10, 10, 1), + PIN_FIELD_BASE(118, 118, 2, 0x00a0, 0x10, 11, 1), + PIN_FIELD_BASE(119, 119, 1, 0x0080, 0x10, 26, 1), + PIN_FIELD_BASE(120, 120, 1, 0x0080, 0x10, 25, 1), + PIN_FIELD_BASE(121, 121, 1, 0x0080, 0x10, 24, 1), + PIN_FIELD_BASE(122, 122, 1, 0x0080, 0x10, 23, 1), + PIN_FIELD_BASE(123, 123, 1, 0x0080, 0x10, 19, 1), + PIN_FIELD_BASE(124, 124, 1, 0x0080, 0x10, 18, 1), + PIN_FIELD_BASE(125, 125, 1, 0x0080, 0x10, 17, 1), + PIN_FIELD_BASE(126, 126, 1, 0x0080, 0x10, 16, 1), + PIN_FIELD_BASE(127, 127, 1, 0x0080, 0x10, 22, 1), + PIN_FIELD_BASE(128, 128, 1, 0x0080, 0x10, 15, 1), + PIN_FIELD_BASE(129, 129, 1, 0x0080, 0x10, 20, 1), + PIN_FIELD_BASE(130, 130, 1, 0x0080, 0x10, 27, 1), + PIN_FIELD_BASE(131, 131, 1, 0x0080, 0x10, 13, 1), + PIN_FIELD_BASE(132, 132, 1, 0x0080, 0x10, 14, 1), + PIN_FIELD_BASE(133, 133, 1, 0x0080, 0x10, 28, 1), + PIN_FIELD_BASE(134, 134, 1, 0x0080, 0x10, 21, 1), + PIN_FIELD_BASE(135, 135, 1, 0x0080, 0x10, 11, 1), + PIN_FIELD_BASE(136, 136, 1, 0x0080, 0x10, 12, 1), + PIN_FIELD_BASE(137, 137, 2, 0x00a0, 0x10, 3, 1), + PIN_FIELD_BASE(138, 138, 2, 0x00a0, 0x10, 4, 1), + PIN_FIELD_BASE(139, 139, 1, 0x0080, 0x10, 3, 1), + PIN_FIELD_BASE(140, 140, 1, 0x0080, 0x10, 4, 1), + PIN_FIELD_BASE(141, 141, 1, 0x0080, 0x10, 0, 1), + PIN_FIELD_BASE(142, 142, 1, 0x0080, 0x10, 1, 1), + PIN_FIELD_BASE(143, 143, 1, 0x0080, 0x10, 2, 1), + PIN_FIELD_BASE(144, 144, 1, 0x0080, 0x10, 5, 1), + PIN_FIELD_BASE(145, 145, 1, 0x0080, 0x10, 6, 1), + PIN_FIELD_BASE(146, 146, 1, 0x0080, 0x10, 7, 1), + PIN_FIELD_BASE(147, 147, 1, 0x0080, 0x10, 8, 1), + PIN_FIELD_BASE(148, 148, 1, 0x0080, 0x10, 9, 1), + PIN_FIELD_BASE(149, 149, 1, 0x0080, 0x10, 10, 1), + PIN_FIELD_BASE(150, 150, 3, 0x0080, 0x10, 8, 1), + PIN_FIELD_BASE(151, 151, 1, 0x0080, 0x10, 29, 1), + PIN_FIELD_BASE(152, 152, 3, 0x0080, 0x10, 9, 1), + PIN_FIELD_BASE(153, 153, 3, 0x0080, 0x10, 10, 1), + PIN_FIELD_BASE(154, 154, 3, 0x0080, 0x10, 11, 1), + PIN_FIELD_BASE(155, 155, 3, 0x0080, 0x10, 12, 1), + PIN_FIELD_BASE(180, 180, 5, 0x0090, 0x10, 15, 1), + PIN_FIELD_BASE(181, 181, 5, 0x0090, 0x10, 16, 1), + PIN_FIELD_BASE(182, 182, 9, 0x0070, 0x10, 3, 1), +}; + +static const struct mtk_pin_field_calc mt8189_pin_drv_range[] = { + PIN_FIELD_BASE(0, 0, 7, 0x0000, 0x10, 15, 3), + PIN_FIELD_BASE(1, 1, 8, 0x0000, 0x10, 9, 3), + PIN_FIELD_BASE(2, 2, 8, 0x0000, 0x10, 12, 3), + PIN_FIELD_BASE(3, 3, 8, 0x0000, 0x10, 15, 3), + PIN_FIELD_BASE(4, 4, 8, 0x0000, 0x10, 18, 3), + PIN_FIELD_BASE(5, 5, 8, 0x0000, 0x10, 21, 3), + PIN_FIELD_BASE(6, 6, 7, 0x0000, 0x10, 18, 3), + PIN_FIELD_BASE(7, 7, 7, 0x0000, 0x10, 21, 3), + PIN_FIELD_BASE(8, 8, 7, 0x0000, 0x10, 24, 3), + PIN_FIELD_BASE(9, 9, 7, 0x0000, 0x10, 27, 3), + PIN_FIELD_BASE(10, 10, 7, 0x0010, 0x10, 0, 3), + PIN_FIELD_BASE(11, 11, 7, 0x0010, 0x10, 3, 3), + PIN_FIELD_BASE(12, 12, 2, 0x0000, 0x10, 15, 3), + PIN_FIELD_BASE(13, 13, 2, 0x0000, 0x10, 18, 3), + PIN_FIELD_BASE(14, 14, 3, 0x0000, 0x10, 0, 3), + PIN_FIELD_BASE(15, 15, 3, 0x0000, 0x10, 3, 3), + PIN_FIELD_BASE(16, 16, 2, 0x0000, 0x10, 21, 3), + PIN_FIELD_BASE(17, 17, 2, 0x0000, 0x10, 24, 3), + PIN_FIELD_BASE(18, 18, 7, 0x0000, 0x10, 0, 3), + PIN_FIELD_BASE(19, 19, 7, 0x0000, 0x10, 6, 3), + PIN_FIELD_BASE(20, 20, 7, 0x0000, 0x10, 3, 3), + PIN_FIELD_BASE(21, 21, 7, 0x0000, 0x10, 9, 3), + PIN_FIELD_BASE(22, 22, 9, 0x0000, 0x10, 0, 3), + PIN_FIELD_BASE(23, 23, 9, 0x0000, 0x10, 3, 3), + PIN_FIELD_BASE(24, 24, 9, 0x0000, 0x10, 6, 3), + PIN_FIELD_BASE(25, 25, 4, 0x0000, 0x10, 6, 3), + PIN_FIELD_BASE(26, 26, 4, 0x0000, 0x10, 3, 3), + PIN_FIELD_BASE(27, 27, 2, 0x0000, 0x10, 3, 3), + PIN_FIELD_BASE(28, 28, 2, 0x0000, 0x10, 6, 3), + PIN_FIELD_BASE(29, 29, 4, 0x0000, 0x10, 0, 3), + PIN_FIELD_BASE(30, 30, 2, 0x0000, 0x10, 0, 3), + PIN_FIELD_BASE(31, 31, 3, 0x0010, 0x10, 27, 3), + PIN_FIELD_BASE(32, 32, 1, 0x0030, 0x10, 0, 3), + PIN_FIELD_BASE(33, 33, 3, 0x0020, 0x10, 3, 3), + PIN_FIELD_BASE(34, 34, 3, 0x0020, 0x10, 0, 3), + PIN_FIELD_BASE(35, 35, 3, 0x0020, 0x10, 9, 3), + PIN_FIELD_BASE(36, 36, 3, 0x0020, 0x10, 6, 3), + PIN_FIELD_BASE(37, 37, 3, 0x0020, 0x10, 15, 3), + PIN_FIELD_BASE(38, 38, 3, 0x0020, 0x10, 12, 3), + PIN_FIELD_BASE(39, 39, 3, 0x0000, 0x10, 15, 3), + PIN_FIELD_BASE(40, 40, 3, 0x0000, 0x10, 6, 3), + PIN_FIELD_BASE(41, 41, 3, 0x0000, 0x10, 9, 3), + PIN_FIELD_BASE(42, 42, 3, 0x0000, 0x10, 12, 3), + PIN_FIELD_BASE(43, 43, 3, 0x0000, 0x10, 18, 3), + PIN_FIELD_BASE(44, 44, 7, 0x0020, 0x10, 0, 3), + PIN_FIELD_BASE(45, 45, 7, 0x0020, 0x10, 3, 3), + PIN_FIELD_BASE(46, 46, 7, 0x0020, 0x10, 6, 3), + PIN_FIELD_BASE(47, 47, 7, 0x0020, 0x10, 9, 3), + PIN_FIELD_BASE(48, 48, 4, 0x0000, 0x10, 15, 3), + PIN_FIELD_BASE(49, 49, 4, 0x0000, 0x10, 12, 3), + PIN_FIELD_BASE(50, 50, 4, 0x0000, 0x10, 9, 3), + PIN_FIELD_BASE(51, 51, 8, 0x0000, 0x10, 24, 3), + PIN_FIELD_BASE(52, 52, 8, 0x0010, 0x10, 0, 3), + PIN_FIELD_BASE(53, 53, 8, 0x0000, 0x10, 27, 3), + PIN_FIELD_BASE(54, 54, 8, 0x0010, 0x10, 3, 3), + PIN_FIELD_BASE(55, 55, 4, 0x0000, 0x10, 18, 3), + PIN_FIELD_BASE(56, 56, 4, 0x0000, 0x10, 21, 3), + PIN_FIELD_BASE(57, 57, 2, 0x0010, 0x10, 9, 3), + PIN_FIELD_BASE(58, 58, 2, 0x0010, 0x10, 21, 3), + PIN_FIELD_BASE(59, 59, 2, 0x0010, 0x10, 12, 3), + PIN_FIELD_BASE(60, 60, 2, 0x0010, 0x10, 24, 3), + PIN_FIELD_BASE(61, 61, 2, 0x0010, 0x10, 15, 3), + PIN_FIELD_BASE(62, 62, 2, 0x0010, 0x10, 27, 3), + PIN_FIELD_BASE(63, 63, 2, 0x0010, 0x10, 18, 3), + PIN_FIELD_BASE(64, 64, 2, 0x0020, 0x10, 0, 3), + PIN_FIELD_BASE(65, 65, 9, 0x0010, 0x10, 0, 3), + PIN_FIELD_BASE(66, 66, 9, 0x0010, 0x10, 6, 3), + PIN_FIELD_BASE(67, 67, 9, 0x0010, 0x10, 3, 3), + PIN_FIELD_BASE(68, 68, 9, 0x0010, 0x10, 9, 3), + PIN_FIELD_BASE(69, 69, 2, 0x0020, 0x10, 6, 3), + PIN_FIELD_BASE(70, 70, 2, 0x0020, 0x10, 3, 3), + PIN_FIELD_BASE(71, 71, 2, 0x0020, 0x10, 12, 3), + PIN_FIELD_BASE(72, 72, 2, 0x0020, 0x10, 9, 3), + PIN_FIELD_BASE(73, 73, 2, 0x0020, 0x10, 18, 3), + PIN_FIELD_BASE(74, 74, 2, 0x0020, 0x10, 15, 3), + PIN_FIELD_BASE(75, 75, 3, 0x0010, 0x10, 9, 3), + PIN_FIELD_BASE(76, 76, 2, 0x0020, 0x10, 21, 3), + PIN_FIELD_BASE(77, 77, 8, 0x0010, 0x10, 9, 3), + PIN_FIELD_BASE(78, 78, 8, 0x0010, 0x10, 6, 3), + PIN_FIELD_BASE(79, 79, 8, 0x0010, 0x10, 15, 3), + PIN_FIELD_BASE(80, 80, 8, 0x0010, 0x10, 12, 3), + PIN_FIELD_BASE(81, 81, 2, 0x0020, 0x10, 27, 3), + PIN_FIELD_BASE(82, 82, 2, 0x0020, 0x10, 24, 3), + PIN_FIELD_BASE(83, 83, 2, 0x0030, 0x10, 0, 3), + PIN_FIELD_BASE(84, 84, 7, 0x0020, 0x10, 12, 3), + PIN_FIELD_BASE(85, 85, 7, 0x0020, 0x10, 15, 3), + PIN_FIELD_BASE(86, 86, 7, 0x0020, 0x10, 18, 3), + PIN_FIELD_BASE(87, 87, 7, 0x0020, 0x10, 21, 3), + PIN_FIELD_BASE(88, 88, 5, 0x0020, 0x10, 0, 3), + PIN_FIELD_BASE(89, 89, 5, 0x0010, 0x10, 27, 3), + PIN_FIELD_BASE(90, 90, 5, 0x0020, 0x10, 6, 3), + PIN_FIELD_BASE(91, 91, 5, 0x0020, 0x10, 3, 3), + PIN_FIELD_BASE(92, 92, 5, 0x0010, 0x10, 18, 3), + PIN_FIELD_BASE(93, 93, 5, 0x0010, 0x10, 21, 3), + PIN_FIELD_BASE(94, 94, 5, 0x0020, 0x10, 9, 3), + PIN_FIELD_BASE(95, 95, 5, 0x0010, 0x10, 15, 3), + PIN_FIELD_BASE(96, 96, 5, 0x0010, 0x10, 24, 3), + PIN_FIELD_BASE(97, 97, 5, 0x0000, 0x10, 0, 3), + PIN_FIELD_BASE(98, 98, 5, 0x0000, 0x10, 15, 3), + PIN_FIELD_BASE(99, 99, 5, 0x0000, 0x10, 9, 3), + PIN_FIELD_BASE(100, 100, 5, 0x0000, 0x10, 12, 3), + PIN_FIELD_BASE(101, 101, 5, 0x0000, 0x10, 3, 3), + PIN_FIELD_BASE(102, 102, 5, 0x0000, 0x10, 6, 3), + PIN_FIELD_BASE(103, 103, 7, 0x0010, 0x10, 15, 3), + PIN_FIELD_BASE(104, 104, 7, 0x0010, 0x10, 6, 3), + PIN_FIELD_BASE(105, 105, 7, 0x0010, 0x10, 12, 3), + PIN_FIELD_BASE(106, 106, 7, 0x0010, 0x10, 9, 3), + PIN_FIELD_BASE(107, 107, 7, 0x0010, 0x10, 27, 3), + PIN_FIELD_BASE(108, 108, 7, 0x0010, 0x10, 18, 3), + PIN_FIELD_BASE(109, 109, 7, 0x0010, 0x10, 24, 3), + PIN_FIELD_BASE(110, 110, 7, 0x0010, 0x10, 21, 3), + PIN_FIELD_BASE(111, 111, 7, 0x0000, 0x10, 12, 3), + PIN_FIELD_BASE(112, 112, 8, 0x0000, 0x10, 0, 3), + PIN_FIELD_BASE(113, 113, 8, 0x0000, 0x10, 3, 3), + PIN_FIELD_BASE(114, 114, 8, 0x0000, 0x10, 6, 3), + PIN_FIELD_BASE(115, 115, 2, 0x0000, 0x10, 27, 3), + PIN_FIELD_BASE(116, 116, 2, 0x0010, 0x10, 6, 3), + PIN_FIELD_BASE(117, 117, 2, 0x0010, 0x10, 0, 3), + PIN_FIELD_BASE(118, 118, 2, 0x0010, 0x10, 3, 3), + PIN_FIELD_BASE(119, 119, 1, 0x0020, 0x10, 18, 3), + PIN_FIELD_BASE(120, 120, 1, 0x0020, 0x10, 15, 3), + PIN_FIELD_BASE(121, 121, 1, 0x0020, 0x10, 12, 3), + PIN_FIELD_BASE(122, 122, 1, 0x0020, 0x10, 9, 3), + PIN_FIELD_BASE(123, 123, 1, 0x0010, 0x10, 27, 3), + PIN_FIELD_BASE(124, 124, 1, 0x0010, 0x10, 24, 3), + PIN_FIELD_BASE(125, 125, 1, 0x0010, 0x10, 21, 3), + PIN_FIELD_BASE(126, 126, 1, 0x0010, 0x10, 18, 3), + PIN_FIELD_BASE(127, 127, 1, 0x0020, 0x10, 6, 3), + PIN_FIELD_BASE(128, 128, 1, 0x0010, 0x10, 15, 3), + PIN_FIELD_BASE(129, 129, 1, 0x0020, 0x10, 0, 3), + PIN_FIELD_BASE(130, 130, 1, 0x0020, 0x10, 21, 3), + PIN_FIELD_BASE(131, 131, 1, 0x0010, 0x10, 9, 3), + PIN_FIELD_BASE(132, 132, 1, 0x0010, 0x10, 12, 3), + PIN_FIELD_BASE(133, 133, 1, 0x0020, 0x10, 24, 3), + PIN_FIELD_BASE(134, 134, 1, 0x0020, 0x10, 3, 3), + PIN_FIELD_BASE(135, 135, 1, 0x0010, 0x10, 3, 3), + PIN_FIELD_BASE(136, 136, 1, 0x0010, 0x10, 6, 3), + PIN_FIELD_BASE(137, 137, 2, 0x0000, 0x10, 9, 3), + PIN_FIELD_BASE(138, 138, 2, 0x0000, 0x10, 12, 3), + PIN_FIELD_BASE(139, 139, 1, 0x0000, 0x10, 9, 3), + PIN_FIELD_BASE(140, 140, 1, 0x0000, 0x10, 12, 3), + PIN_FIELD_BASE(141, 141, 1, 0x0000, 0x10, 0, 3), + PIN_FIELD_BASE(142, 142, 1, 0x0000, 0x10, 3, 3), + PIN_FIELD_BASE(143, 143, 1, 0x0000, 0x10, 6, 3), + PIN_FIELD_BASE(144, 144, 1, 0x0000, 0x10, 15, 3), + PIN_FIELD_BASE(145, 145, 1, 0x0000, 0x10, 18, 3), + PIN_FIELD_BASE(146, 146, 1, 0x0000, 0x10, 21, 3), + PIN_FIELD_BASE(147, 147, 1, 0x0000, 0x10, 24, 3), + PIN_FIELD_BASE(148, 148, 1, 0x0000, 0x10, 27, 3), + PIN_FIELD_BASE(149, 149, 1, 0x0010, 0x10, 0, 3), + PIN_FIELD_BASE(150, 150, 3, 0x0010, 0x10, 12, 3), + PIN_FIELD_BASE(151, 151, 1, 0x0020, 0x10, 27, 3), + PIN_FIELD_BASE(152, 152, 3, 0x0010, 0x10, 15, 3), + PIN_FIELD_BASE(153, 153, 3, 0x0010, 0x10, 18, 3), + PIN_FIELD_BASE(154, 154, 3, 0x0010, 0x10, 21, 3), + PIN_FIELD_BASE(155, 155, 3, 0x0010, 0x10, 24, 3), + PIN_FIELD_BASE(156, 156, 5, 0x0010, 0x10, 6, 3), + PIN_FIELD_BASE(157, 157, 5, 0x0010, 0x10, 3, 3), + PIN_FIELD_BASE(158, 158, 5, 0x0010, 0x10, 0, 3), + PIN_FIELD_BASE(159, 159, 6, 0x0000, 0x10, 6, 3), + PIN_FIELD_BASE(160, 160, 5, 0x0010, 0x10, 12, 3), + PIN_FIELD_BASE(161, 161, 5, 0x0000, 0x10, 21, 3), + PIN_FIELD_BASE(162, 162, 5, 0x0000, 0x10, 18, 3), + PIN_FIELD_BASE(163, 163, 6, 0x0000, 0x10, 3, 3), + PIN_FIELD_BASE(164, 164, 5, 0x0000, 0x10, 27, 3), + PIN_FIELD_BASE(165, 165, 5, 0x0000, 0x10, 24, 3), + PIN_FIELD_BASE(166, 166, 6, 0x0000, 0x10, 0, 3), + PIN_FIELD_BASE(167, 167, 5, 0x0010, 0x10, 9, 3), + PIN_FIELD_BASE(168, 168, 3, 0x0000, 0x10, 24, 3), + PIN_FIELD_BASE(169, 169, 3, 0x0000, 0x10, 21, 3), + PIN_FIELD_BASE(170, 170, 3, 0x0000, 0x10, 27, 3), + PIN_FIELD_BASE(171, 171, 3, 0x0010, 0x10, 0, 3), + PIN_FIELD_BASE(172, 172, 3, 0x0010, 0x10, 3, 3), + PIN_FIELD_BASE(173, 173, 3, 0x0010, 0x10, 6, 3), + PIN_FIELD_BASE(174, 174, 9, 0x0000, 0x10, 15, 3), + PIN_FIELD_BASE(175, 175, 9, 0x0000, 0x10, 12, 3), + PIN_FIELD_BASE(176, 176, 9, 0x0000, 0x10, 18, 3), + PIN_FIELD_BASE(177, 177, 9, 0x0000, 0x10, 21, 3), + PIN_FIELD_BASE(178, 178, 9, 0x0000, 0x10, 24, 3), + PIN_FIELD_BASE(179, 179, 9, 0x0000, 0x10, 27, 3), + PIN_FIELD_BASE(180, 180, 5, 0x0020, 0x10, 12, 3), + PIN_FIELD_BASE(181, 181, 5, 0x0020, 0x10, 15, 3), + PIN_FIELD_BASE(182, 182, 9, 0x0000, 0x10, 9, 3), +}; + +static const struct mtk_pin_field_calc mt8189_pin_drv_adv_range[] = { + PIN_FIELD_BASE(51, 51, 8, 0x0020, 0x10, 0, 3), + PIN_FIELD_BASE(52, 52, 8, 0x0020, 0x10, 6, 3), + PIN_FIELD_BASE(53, 53, 8, 0x0020, 0x10, 3, 3), + PIN_FIELD_BASE(54, 54, 8, 0x0020, 0x10, 9, 3), + PIN_FIELD_BASE(55, 55, 4, 0x0020, 0x10, 0, 3), + PIN_FIELD_BASE(56, 56, 4, 0x0020, 0x10, 3, 3), + PIN_FIELD_BASE(57, 57, 2, 0x0040, 0x10, 0, 3), + PIN_FIELD_BASE(58, 58, 2, 0x0040, 0x10, 12, 3), + PIN_FIELD_BASE(59, 59, 2, 0x0040, 0x10, 3, 3), + PIN_FIELD_BASE(60, 60, 2, 0x0040, 0x10, 15, 3), + PIN_FIELD_BASE(61, 61, 2, 0x0040, 0x10, 6, 3), + PIN_FIELD_BASE(62, 62, 2, 0x0040, 0x10, 18, 3), + PIN_FIELD_BASE(63, 63, 2, 0x0040, 0x10, 9, 3), + PIN_FIELD_BASE(64, 64, 2, 0x0040, 0x10, 21, 3), + PIN_FIELD_BASE(65, 65, 9, 0x0020, 0x10, 0, 3), + PIN_FIELD_BASE(66, 66, 9, 0x0020, 0x10, 6, 3), + PIN_FIELD_BASE(67, 67, 9, 0x0020, 0x10, 3, 3), + PIN_FIELD_BASE(68, 68, 9, 0x0020, 0x10, 9, 3), + PIN_FIELD_BASE(180, 180, 5, 0x0030, 0x10, 0, 3), + PIN_FIELD_BASE(181, 181, 5, 0x0030, 0x10, 3, 3), +}; + +static const struct mtk_pin_field_calc mt8189_pin_rsel_range[] = { + PIN_FIELD_BASE(51, 51, 8, 0x00b0, 0x10, 0, 3), + PIN_FIELD_BASE(52, 52, 8, 0x00b0, 0x10, 6, 3), + PIN_FIELD_BASE(53, 53, 8, 0x00b0, 0x10, 3, 3), + PIN_FIELD_BASE(54, 54, 8, 0x00b0, 0x10, 9, 3), + PIN_FIELD_BASE(55, 55, 4, 0x00b0, 0x10, 0, 3), + PIN_FIELD_BASE(56, 56, 4, 0x00b0, 0x10, 3, 3), + PIN_FIELD_BASE(57, 57, 2, 0x00d0, 0x10, 0, 3), + PIN_FIELD_BASE(58, 58, 2, 0x00d0, 0x10, 12, 3), + PIN_FIELD_BASE(59, 59, 2, 0x00d0, 0x10, 3, 3), + PIN_FIELD_BASE(60, 60, 2, 0x00d0, 0x10, 15, 3), + PIN_FIELD_BASE(61, 61, 2, 0x00d0, 0x10, 6, 3), + PIN_FIELD_BASE(62, 62, 2, 0x00d0, 0x10, 18, 3), + PIN_FIELD_BASE(63, 63, 2, 0x00d0, 0x10, 9, 3), + PIN_FIELD_BASE(64, 64, 2, 0x00d0, 0x10, 21, 3), + PIN_FIELD_BASE(65, 65, 9, 0x00e0, 0x10, 0, 3), + PIN_FIELD_BASE(66, 66, 9, 0x00e0, 0x10, 6, 3), + PIN_FIELD_BASE(67, 67, 9, 0x00e0, 0x10, 3, 3), + PIN_FIELD_BASE(68, 68, 9, 0x00e0, 0x10, 9, 3), + PIN_FIELD_BASE(180, 180, 5, 0x0110, 0x10, 0, 3), + PIN_FIELD_BASE(181, 181, 5, 0x0110, 0x10, 3, 3), +}; + +static const struct mtk_pin_rsel mt8189_pin_rsel_val_range[] = { + PIN_RSEL(51, 68, 0x0, 75000, 75000), + PIN_RSEL(51, 68, 0x1, 10000, 5000), + PIN_RSEL(51, 68, 0x2, 5000, 75000), + PIN_RSEL(51, 68, 0x3, 4000, 5000), + PIN_RSEL(51, 68, 0x4, 3000, 75000), + PIN_RSEL(51, 68, 0x5, 2000, 5000), + PIN_RSEL(51, 68, 0x6, 1500, 75000), + PIN_RSEL(51, 68, 0x7, 1000, 5000), + PIN_RSEL(180, 181, 0x0, 75000, 75000), + PIN_RSEL(180, 181, 0x1, 10000, 5000), + PIN_RSEL(180, 181, 0x2, 5000, 75000), + PIN_RSEL(180, 181, 0x3, 4000, 5000), + PIN_RSEL(180, 181, 0x4, 3000, 75000), + PIN_RSEL(180, 181, 0x5, 2000, 5000), + PIN_RSEL(180, 181, 0x6, 1500, 75000), + PIN_RSEL(180, 181, 0x7, 1000, 5000), +}; + +static const unsigned int mt8189_pull_type[] = { + MTK_PULL_PU_PD_TYPE, /*0*/ + MTK_PULL_PU_PD_TYPE, /*1*/ + MTK_PULL_PU_PD_TYPE, /*2*/ + MTK_PULL_PU_PD_TYPE, /*3*/ + MTK_PULL_PU_PD_TYPE, /*4*/ + MTK_PULL_PU_PD_TYPE, /*5*/ + MTK_PULL_PU_PD_TYPE, /*6*/ + MTK_PULL_PU_PD_TYPE, /*7*/ + MTK_PULL_PU_PD_TYPE, /*8*/ + MTK_PULL_PU_PD_TYPE, /*9*/ + MTK_PULL_PU_PD_TYPE, /*10*/ + MTK_PULL_PU_PD_TYPE, /*11*/ + MTK_PULL_PU_PD_TYPE, /*12*/ + MTK_PULL_PU_PD_TYPE, /*13*/ + MTK_PULL_PU_PD_TYPE, /*14*/ + MTK_PULL_PU_PD_TYPE, /*15*/ + MTK_PULL_PU_PD_TYPE, /*16*/ + MTK_PULL_PU_PD_TYPE, /*17*/ + MTK_PULL_PU_PD_TYPE, /*18*/ + MTK_PULL_PU_PD_TYPE, /*19*/ + MTK_PULL_PU_PD_TYPE, /*20*/ + MTK_PULL_PU_PD_TYPE, /*21*/ + MTK_PULL_PU_PD_TYPE, /*22*/ + MTK_PULL_PU_PD_TYPE, /*23*/ + MTK_PULL_PU_PD_TYPE, /*24*/ + MTK_PULL_PU_PD_TYPE, /*25*/ + MTK_PULL_PU_PD_TYPE, /*26*/ + MTK_PULL_PU_PD_TYPE, /*27*/ + MTK_PULL_PU_PD_TYPE, /*28*/ + MTK_PULL_PU_PD_TYPE, /*29*/ + MTK_PULL_PU_PD_TYPE, /*30*/ + MTK_PULL_PU_PD_TYPE, /*31*/ + MTK_PULL_PU_PD_TYPE, /*32*/ + MTK_PULL_PU_PD_TYPE, /*33*/ + MTK_PULL_PU_PD_TYPE, /*34*/ + MTK_PULL_PU_PD_TYPE, /*35*/ + MTK_PULL_PU_PD_TYPE, /*36*/ + MTK_PULL_PU_PD_TYPE, /*37*/ + MTK_PULL_PU_PD_TYPE, /*38*/ + MTK_PULL_PU_PD_TYPE, /*39*/ + MTK_PULL_PU_PD_TYPE, /*40*/ + MTK_PULL_PU_PD_TYPE, /*41*/ + MTK_PULL_PU_PD_TYPE, /*42*/ + MTK_PULL_PU_PD_TYPE, /*43*/ + MTK_PULL_PUPD_R1R0_TYPE, /*44*/ + MTK_PULL_PUPD_R1R0_TYPE, /*45*/ + MTK_PULL_PUPD_R1R0_TYPE, /*46*/ + MTK_PULL_PUPD_R1R0_TYPE, /*47*/ + MTK_PULL_PU_PD_TYPE, /*48*/ + MTK_PULL_PU_PD_TYPE, /*49*/ + MTK_PULL_PU_PD_TYPE, /*50*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*51*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*52*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*53*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*54*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*55*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*56*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*57*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*58*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*59*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*60*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*61*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*62*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*63*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*64*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*65*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*66*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*67*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*68*/ + MTK_PULL_PU_PD_TYPE, /*69*/ + MTK_PULL_PU_PD_TYPE, /*70*/ + MTK_PULL_PU_PD_TYPE, /*71*/ + MTK_PULL_PU_PD_TYPE, /*72*/ + MTK_PULL_PU_PD_TYPE, /*73*/ + MTK_PULL_PU_PD_TYPE, /*74*/ + MTK_PULL_PU_PD_TYPE, /*75*/ + MTK_PULL_PU_PD_TYPE, /*76*/ + MTK_PULL_PU_PD_TYPE, /*77*/ + MTK_PULL_PU_PD_TYPE, /*78*/ + MTK_PULL_PU_PD_TYPE, /*79*/ + MTK_PULL_PU_PD_TYPE, /*80*/ + MTK_PULL_PU_PD_TYPE, /*81*/ + MTK_PULL_PU_PD_TYPE, /*82*/ + MTK_PULL_PU_PD_TYPE, /*83*/ + MTK_PULL_PU_PD_TYPE, /*84*/ + MTK_PULL_PU_PD_TYPE, /*85*/ + MTK_PULL_PU_PD_TYPE, /*86*/ + MTK_PULL_PU_PD_TYPE, /*87*/ + MTK_PULL_PU_PD_TYPE, /*88*/ + MTK_PULL_PU_PD_TYPE, /*89*/ + MTK_PULL_PU_PD_TYPE, /*90*/ + MTK_PULL_PU_PD_TYPE, /*91*/ + MTK_PULL_PU_PD_TYPE, /*92*/ + MTK_PULL_PU_PD_TYPE, /*93*/ + MTK_PULL_PU_PD_TYPE, /*94*/ + MTK_PULL_PU_PD_TYPE, /*95*/ + MTK_PULL_PU_PD_TYPE, /*96*/ + MTK_PULL_PU_PD_TYPE, /*97*/ + MTK_PULL_PU_PD_TYPE, /*98*/ + MTK_PULL_PU_PD_TYPE, /*99*/ + MTK_PULL_PU_PD_TYPE, /*100*/ + MTK_PULL_PU_PD_TYPE, /*101*/ + MTK_PULL_PU_PD_TYPE, /*102*/ + MTK_PULL_PU_PD_TYPE, /*103*/ + MTK_PULL_PU_PD_TYPE, /*104*/ + MTK_PULL_PU_PD_TYPE, /*105*/ + MTK_PULL_PU_PD_TYPE, /*106*/ + MTK_PULL_PU_PD_TYPE, /*107*/ + MTK_PULL_PU_PD_TYPE, /*108*/ + MTK_PULL_PU_PD_TYPE, /*109*/ + MTK_PULL_PU_PD_TYPE, /*110*/ + MTK_PULL_PU_PD_TYPE, /*111*/ + MTK_PULL_PU_PD_TYPE, /*112*/ + MTK_PULL_PU_PD_TYPE, /*113*/ + MTK_PULL_PU_PD_TYPE, /*114*/ + MTK_PULL_PU_PD_TYPE, /*115*/ + MTK_PULL_PU_PD_TYPE, /*116*/ + MTK_PULL_PU_PD_TYPE, /*117*/ + MTK_PULL_PU_PD_TYPE, /*118*/ + MTK_PULL_PU_PD_TYPE, /*119*/ + MTK_PULL_PU_PD_TYPE, /*120*/ + MTK_PULL_PU_PD_TYPE, /*121*/ + MTK_PULL_PU_PD_TYPE, /*122*/ + MTK_PULL_PU_PD_TYPE, /*123*/ + MTK_PULL_PU_PD_TYPE, /*124*/ + MTK_PULL_PU_PD_TYPE, /*125*/ + MTK_PULL_PU_PD_TYPE, /*126*/ + MTK_PULL_PU_PD_TYPE, /*127*/ + MTK_PULL_PU_PD_TYPE, /*128*/ + MTK_PULL_PU_PD_TYPE, /*129*/ + MTK_PULL_PU_PD_TYPE, /*130*/ + MTK_PULL_PU_PD_TYPE, /*131*/ + MTK_PULL_PU_PD_TYPE, /*132*/ + MTK_PULL_PU_PD_TYPE, /*133*/ + MTK_PULL_PU_PD_TYPE, /*134*/ + MTK_PULL_PU_PD_TYPE, /*135*/ + MTK_PULL_PU_PD_TYPE, /*136*/ + MTK_PULL_PU_PD_TYPE, /*137*/ + MTK_PULL_PU_PD_TYPE, /*138*/ + MTK_PULL_PU_PD_TYPE, /*139*/ + MTK_PULL_PU_PD_TYPE, /*140*/ + MTK_PULL_PU_PD_TYPE, /*141*/ + MTK_PULL_PU_PD_TYPE, /*142*/ + MTK_PULL_PU_PD_TYPE, /*143*/ + MTK_PULL_PU_PD_TYPE, /*144*/ + MTK_PULL_PU_PD_TYPE, /*145*/ + MTK_PULL_PU_PD_TYPE, /*146*/ + MTK_PULL_PU_PD_TYPE, /*147*/ + MTK_PULL_PU_PD_TYPE, /*148*/ + MTK_PULL_PU_PD_TYPE, /*149*/ + MTK_PULL_PU_PD_TYPE, /*150*/ + MTK_PULL_PU_PD_TYPE, /*151*/ + MTK_PULL_PU_PD_TYPE, /*152*/ + MTK_PULL_PU_PD_TYPE, /*153*/ + MTK_PULL_PU_PD_TYPE, /*154*/ + MTK_PULL_PU_PD_TYPE, /*155*/ + MTK_PULL_PUPD_R1R0_TYPE, /*156*/ + MTK_PULL_PUPD_R1R0_TYPE, /*157*/ + MTK_PULL_PUPD_R1R0_TYPE, /*158*/ + MTK_PULL_PUPD_R1R0_TYPE, /*159*/ + MTK_PULL_PUPD_R1R0_TYPE, /*160*/ + MTK_PULL_PUPD_R1R0_TYPE, /*161*/ + MTK_PULL_PUPD_R1R0_TYPE, /*162*/ + MTK_PULL_PUPD_R1R0_TYPE, /*163*/ + MTK_PULL_PUPD_R1R0_TYPE, /*164*/ + MTK_PULL_PUPD_R1R0_TYPE, /*165*/ + MTK_PULL_PUPD_R1R0_TYPE, /*166*/ + MTK_PULL_PUPD_R1R0_TYPE, /*167*/ + MTK_PULL_PUPD_R1R0_TYPE, /*168*/ + MTK_PULL_PUPD_R1R0_TYPE, /*169*/ + MTK_PULL_PUPD_R1R0_TYPE, /*170*/ + MTK_PULL_PUPD_R1R0_TYPE, /*171*/ + MTK_PULL_PUPD_R1R0_TYPE, /*172*/ + MTK_PULL_PUPD_R1R0_TYPE, /*173*/ + MTK_PULL_PUPD_R1R0_TYPE, /*174*/ + MTK_PULL_PUPD_R1R0_TYPE, /*175*/ + MTK_PULL_PUPD_R1R0_TYPE, /*176*/ + MTK_PULL_PUPD_R1R0_TYPE, /*177*/ + MTK_PULL_PUPD_R1R0_TYPE, /*178*/ + MTK_PULL_PUPD_R1R0_TYPE, /*179*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*180*/ + MTK_PULL_PU_PD_RSEL_TYPE, /*181*/ + MTK_PULL_PU_PD_TYPE, /*182*/ +}; + +static const struct mtk_pin_reg_calc mt8189_reg_cals[PINCTRL_PIN_REG_MAX] = { + [PINCTRL_PIN_REG_MODE] = MTK_RANGE(mt8189_pin_mode_range), + [PINCTRL_PIN_REG_DIR] = MTK_RANGE(mt8189_pin_dir_range), + [PINCTRL_PIN_REG_DI] = MTK_RANGE(mt8189_pin_di_range), + [PINCTRL_PIN_REG_DO] = MTK_RANGE(mt8189_pin_do_range), + [PINCTRL_PIN_REG_SMT] = MTK_RANGE(mt8189_pin_smt_range), + [PINCTRL_PIN_REG_IES] = MTK_RANGE(mt8189_pin_ies_range), + [PINCTRL_PIN_REG_TDSEL] = MTK_RANGE(mt8189_pin_tdsel_range), + [PINCTRL_PIN_REG_RDSEL] = MTK_RANGE(mt8189_pin_rdsel_range), + [PINCTRL_PIN_REG_PUPD] = MTK_RANGE(mt8189_pin_pupd_range), + [PINCTRL_PIN_REG_R0] = MTK_RANGE(mt8189_pin_r0_range), + [PINCTRL_PIN_REG_R1] = MTK_RANGE(mt8189_pin_r1_range), + [PINCTRL_PIN_REG_PU] = MTK_RANGE(mt8189_pin_pu_range), + [PINCTRL_PIN_REG_PD] = MTK_RANGE(mt8189_pin_pd_range), + [PINCTRL_PIN_REG_DRV] = MTK_RANGE(mt8189_pin_drv_range), + [PINCTRL_PIN_REG_DRV_ADV] = MTK_RANGE(mt8189_pin_drv_adv_range), + [PINCTRL_PIN_REG_RSEL] = MTK_RANGE(mt8189_pin_rsel_range), +}; + +static const char * const mt8189_pinctrl_register_base_names[] = { + "gpio_base", "iocfg_bm0_base", "iocfg_bm1_base", "iocfg_bm2_base", "iocfg_lm_base", + "iocfg_lt0_base", "iocfg_lt1_base", "iocfg_rb0_base", "iocfg_rb1_base", + "iocfg_rt_base" +}; + +static const struct mtk_eint_hw mt8189_eint_hw = { + .port_mask = 0xf, + .ports = 3, + .ap_num = 210, + .db_cnt = 32, + .db_time = debounce_time_mt6765, +}; + +static const struct mtk_pin_soc mt8189_data = { + .reg_cal = mt8189_reg_cals, + .pins = mtk_pins_mt8189, + .npins = ARRAY_SIZE(mtk_pins_mt8189), + .ngrps = ARRAY_SIZE(mtk_pins_mt8189), + .eint_pin = eint_pins_mt8189, + .eint_hw = &mt8189_eint_hw, + .nfuncs = 8, + .gpio_m = 0, + .base_names = mt8189_pinctrl_register_base_names, + .nbase_names = ARRAY_SIZE(mt8189_pinctrl_register_base_names), + .bias_set_combo = mtk_pinconf_bias_set_combo, + .bias_get_combo = mtk_pinconf_bias_get_combo, + .pull_type = mt8189_pull_type, + .pin_rsel = mt8189_pin_rsel_val_range, + .npin_rsel = ARRAY_SIZE(mt8189_pin_rsel_val_range), + .drive_set = mtk_pinconf_drive_set_rev1, + .drive_get = mtk_pinconf_drive_get_rev1, + .adv_drive_set = mtk_pinconf_adv_drive_set_raw, + .adv_drive_get = mtk_pinconf_adv_drive_get_raw, +}; + +static const struct of_device_id mt8189_pinctrl_of_match[] = { + { .compatible = "mediatek,mt8189-pinctrl", .data = &mt8189_data }, + { /* sentinel */ } +}; + +static struct platform_driver mt8189_pinctrl_driver = { + .driver = { + .name = "mt8189-pinctrl", + .of_match_table = mt8189_pinctrl_of_match, + .pm = pm_sleep_ptr(&mtk_paris_pinctrl_pm_ops), + }, + .probe = mtk_paris_pinctrl_probe, +}; + +static int __init mt8189_pinctrl_init(void) +{ + return platform_driver_register(&mt8189_pinctrl_driver); +} +arch_initcall(mt8189_pinctrl_init); + +MODULE_DESCRIPTION("MediaTek MT8189 Pinctrl Driver"); diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-mt8189.h b/drivers/pinctrl/mediatek/pinctrl-mtk-mt8189.h new file mode 100644 index 000000000000..771efb3da73f --- /dev/null +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-mt8189.h @@ -0,0 +1,2452 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2025 MediaTek Inc. + * Author: Lei Xue + * Cathy Xu + */ + +#ifndef __PINCTRL_MTK_MT8189_H +#define __PINCTRL_MTK_MT8189_H + +#include "pinctrl-paris.h" + +static const struct mtk_pin_desc mtk_pins_mt8189[] = { + MTK_PIN( + 0, "GPIO0", + MTK_EINT_FUNCTION(0, 0), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO0"), + MTK_FUNCTION(1, "TP_GPIO0_AO"), + MTK_FUNCTION(2, "SPIM3_A_CSB"), + MTK_FUNCTION(3, "I2SOUT0_MCK"), + MTK_FUNCTION(4, "SCP_SPI0_CS"), + MTK_FUNCTION(6, "CONN_BPI_BUS6"), + MTK_FUNCTION(7, "DBG_MON_A0") + ), + + MTK_PIN( + 1, "GPIO1", + MTK_EINT_FUNCTION(0, 1), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO1"), + MTK_FUNCTION(1, "TP_GPIO1_AO"), + MTK_FUNCTION(2, "SPIM3_A_CLK"), + MTK_FUNCTION(3, "I2SOUT0_BCK"), + MTK_FUNCTION(4, "SCP_SPI0_CK"), + MTK_FUNCTION(6, "CONN_BPI_BUS7"), + MTK_FUNCTION(7, "DBG_MON_A1") + ), + + MTK_PIN( + 2, "GPIO2", + MTK_EINT_FUNCTION(0, 2), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO2"), + MTK_FUNCTION(1, "TP_GPIO2_AO"), + MTK_FUNCTION(2, "SPIM3_A_MO"), + MTK_FUNCTION(3, "I2SOUT0_LRCK"), + MTK_FUNCTION(4, "SCP_SPI0_MO"), + MTK_FUNCTION(6, "CONN_BPI_BUS8"), + MTK_FUNCTION(7, "DBG_MON_A2") + ), + + MTK_PIN( + 3, "GPIO3", + MTK_EINT_FUNCTION(0, 3), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO3"), + MTK_FUNCTION(1, "TP_GPIO3_AO"), + MTK_FUNCTION(2, "SPIM3_A_MI"), + MTK_FUNCTION(3, "I2SOUT0_DO"), + MTK_FUNCTION(4, "SCP_SPI0_MI"), + MTK_FUNCTION(6, "CONN_BPI_BUS9"), + MTK_FUNCTION(7, "DBG_MON_A3") + ), + + MTK_PIN( + 4, "GPIO4", + MTK_EINT_FUNCTION(0, 4), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO4"), + MTK_FUNCTION(1, "TP_GPIO4_AO"), + MTK_FUNCTION(2, "SPIM4_A_CSB"), + MTK_FUNCTION(3, "I2SIN0_DI"), + MTK_FUNCTION(4, "SCP_SPI1_CS"), + MTK_FUNCTION(6, "CONN_BPI_BUS10"), + MTK_FUNCTION(7, "DBG_MON_A4") + ), + + MTK_PIN( + 5, "GPIO5", + MTK_EINT_FUNCTION(0, 5), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO5"), + MTK_FUNCTION(1, "TP_GPIO5_AO"), + MTK_FUNCTION(2, "SPIM4_A_CLK"), + MTK_FUNCTION(3, "I2SIN0_BCK"), + MTK_FUNCTION(4, "SCP_SPI1_CK"), + MTK_FUNCTION(6, "CONN_BPI_BUS11_OLAT0"), + MTK_FUNCTION(7, "DBG_MON_A5") + ), + + MTK_PIN( + 6, "GPIO6", + MTK_EINT_FUNCTION(0, 6), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO6"), + MTK_FUNCTION(1, "TP_GPIO6_AO"), + MTK_FUNCTION(2, "SPIM4_A_MO"), + MTK_FUNCTION(3, "I2SIN0_LRCK"), + MTK_FUNCTION(4, "SCP_SPI1_MO"), + MTK_FUNCTION(6, "CONN_BPI_BUS12_OLAT1"), + MTK_FUNCTION(7, "DBG_MON_A6") + ), + + MTK_PIN( + 7, "GPIO7", + MTK_EINT_FUNCTION(0, 7), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO7"), + MTK_FUNCTION(1, "TP_GPIO7_AO"), + MTK_FUNCTION(2, "SPIM4_A_MI"), + MTK_FUNCTION(3, "I2SIN0_MCK"), + MTK_FUNCTION(4, "SCP_SPI1_MI"), + MTK_FUNCTION(6, "CONN_BPI_BUS13_OLAT2"), + MTK_FUNCTION(7, "DBG_MON_A7") + ), + + MTK_PIN( + 8, "GPIO8", + MTK_EINT_FUNCTION(0, 8), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO8"), + MTK_FUNCTION(1, "TP_UTXD1_VLP"), + MTK_FUNCTION(2, "SPIM5_A_CSB"), + MTK_FUNCTION(3, "I2SOUT1_MCK"), + MTK_FUNCTION(4, "VADSP_UTXD0"), + MTK_FUNCTION(6, "CONN_BPI_BUS14_OLAT3"), + MTK_FUNCTION(7, "DBG_MON_A8") + ), + + MTK_PIN( + 9, "GPIO9", + MTK_EINT_FUNCTION(0, 9), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO9"), + MTK_FUNCTION(1, "TP_URXD1_VLP"), + MTK_FUNCTION(2, "SPIM5_A_CLK"), + MTK_FUNCTION(3, "I2SOUT1_BCK"), + MTK_FUNCTION(4, "VADSP_URXD0"), + MTK_FUNCTION(6, "CONN_BPI_BUS15_OLAT4"), + MTK_FUNCTION(7, "DBG_MON_A9") + ), + + MTK_PIN( + 10, "GPIO10", + MTK_EINT_FUNCTION(0, 10), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO10"), + MTK_FUNCTION(1, "TP_UCTS1_VLP"), + MTK_FUNCTION(2, "SPIM5_A_MO"), + MTK_FUNCTION(3, "I2SOUT1_LRCK"), + MTK_FUNCTION(4, "SRCLKENAI0"), + MTK_FUNCTION(6, "CONN_BPI_BUS16_OLAT5"), + MTK_FUNCTION(7, "DBG_MON_A10") + ), + + MTK_PIN( + 11, "GPIO11", + MTK_EINT_FUNCTION(0, 11), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO11"), + MTK_FUNCTION(1, "TP_URTS1_VLP"), + MTK_FUNCTION(2, "SPIM5_A_MI"), + MTK_FUNCTION(3, "I2SOUT1_DO"), + MTK_FUNCTION(4, "SRCLKENAI1"), + MTK_FUNCTION(5, "PWM_vlp"), + MTK_FUNCTION(7, "DBG_MON_A11") + ), + + MTK_PIN( + 12, "GPIO12", + MTK_EINT_FUNCTION(0, 12), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO12"), + MTK_FUNCTION(1, "TP_UTXD1_VCORE"), + MTK_FUNCTION(2, "UTXD3"), + MTK_FUNCTION(3, "CLKM0"), + MTK_FUNCTION(4, "CMFLASH0"), + MTK_FUNCTION(6, "ANT_SEL0"), + MTK_FUNCTION(7, "DBG_MON_B20") + ), + + MTK_PIN( + 13, "GPIO13", + MTK_EINT_FUNCTION(0, 13), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO13"), + MTK_FUNCTION(1, "TP_URXD1_VCORE"), + MTK_FUNCTION(2, "URXD3"), + MTK_FUNCTION(3, "CLKM1"), + MTK_FUNCTION(4, "CMFLASH1"), + MTK_FUNCTION(6, "ANT_SEL1"), + MTK_FUNCTION(7, "DBG_MON_B21") + ), + + MTK_PIN( + 14, "GPIO14", + MTK_EINT_FUNCTION(0, 14), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO14"), + MTK_FUNCTION(1, "TP_UCTS1_VCORE"), + MTK_FUNCTION(2, "UCTS3"), + MTK_FUNCTION(3, "CLKM2"), + MTK_FUNCTION(4, "CMFLASH2"), + MTK_FUNCTION(6, "ANT_SEL2"), + MTK_FUNCTION(7, "DBG_MON_B22") + ), + + MTK_PIN( + 15, "GPIO15", + MTK_EINT_FUNCTION(0, 15), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO15"), + MTK_FUNCTION(1, "TP_URTS1_VCORE"), + MTK_FUNCTION(2, "URTS3"), + MTK_FUNCTION(3, "CLKM3"), + MTK_FUNCTION(4, "CMVREF0"), + MTK_FUNCTION(6, "ANT_SEL3"), + MTK_FUNCTION(7, "DBG_MON_B23") + ), + + MTK_PIN( + 16, "GPIO16", + MTK_EINT_FUNCTION(0, 16), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO16"), + MTK_FUNCTION(1, "PWM_0"), + MTK_FUNCTION(2, "UCTS2"), + MTK_FUNCTION(3, "DP_TX_HPD"), + MTK_FUNCTION(4, "CMVREF1"), + MTK_FUNCTION(5, "MD32_0_GPIO0"), + MTK_FUNCTION(6, "ANT_SEL4"), + MTK_FUNCTION(7, "DBG_MON_B24") + ), + + MTK_PIN( + 17, "GPIO17", + MTK_EINT_FUNCTION(0, 17), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO17"), + MTK_FUNCTION(1, "PWM_1"), + MTK_FUNCTION(2, "URTS2"), + MTK_FUNCTION(3, "EDP_TX_HPD"), + MTK_FUNCTION(4, "CMVREF2"), + MTK_FUNCTION(5, "MD32_1_GPIO0"), + MTK_FUNCTION(6, "PMSR_SMAP"), + MTK_FUNCTION(7, "DBG_MON_B25") + ), + + MTK_PIN( + 18, "GPIO18", + MTK_EINT_FUNCTION(0, 18), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO18"), + MTK_FUNCTION(1, "CMFLASH0"), + MTK_FUNCTION(2, "CMVREF3"), + MTK_FUNCTION(3, "UTXD2"), + MTK_FUNCTION(4, "DISP_PWM1"), + MTK_FUNCTION(5, "I2SIN1_MCK"), + MTK_FUNCTION(6, "mbistreaden_trigger"), + MTK_FUNCTION(7, "DBG_MON_A12") + ), + + MTK_PIN( + 19, "GPIO19", + MTK_EINT_FUNCTION(0, 19), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO19"), + MTK_FUNCTION(1, "CMFLASH1"), + MTK_FUNCTION(2, "CMVREF2"), + MTK_FUNCTION(3, "URXD2"), + MTK_FUNCTION(4, "USB_DRVVBUS_1P"), + MTK_FUNCTION(5, "I2SIN1_BCK"), + MTK_FUNCTION(6, "mbistwriteen_trigger"), + MTK_FUNCTION(7, "DBG_MON_A13") + ), + + MTK_PIN( + 20, "GPIO20", + MTK_EINT_FUNCTION(0, 20), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO20"), + MTK_FUNCTION(1, "CMFLASH2"), + MTK_FUNCTION(2, "CMVREF1"), + MTK_FUNCTION(3, "UCTS2"), + MTK_FUNCTION(4, "PERSTN"), + MTK_FUNCTION(5, "I2SIN1_LRCK"), + MTK_FUNCTION(6, "DMIC0_DAT1"), + MTK_FUNCTION(7, "DBG_MON_A14") + ), + + MTK_PIN( + 21, "GPIO21", + MTK_EINT_FUNCTION(0, 21), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO21"), + MTK_FUNCTION(1, "CMFLASH3"), + MTK_FUNCTION(2, "CMVREF0"), + MTK_FUNCTION(3, "URTS2"), + MTK_FUNCTION(4, "CLKREQN"), + MTK_FUNCTION(5, "I2SIN1_DI"), + MTK_FUNCTION(6, "DMIC1_DAT1"), + MTK_FUNCTION(7, "DBG_MON_A15") + ), + + MTK_PIN( + 22, "GPIO22", + MTK_EINT_FUNCTION(0, 22), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO22"), + MTK_FUNCTION(1, "CMMCLK0"), + MTK_FUNCTION(2, "TP_GPIO4_AO") + ), + + MTK_PIN( + 23, "GPIO23", + MTK_EINT_FUNCTION(0, 23), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO23"), + MTK_FUNCTION(1, "CMMCLK1"), + MTK_FUNCTION(2, "TP_GPIO5_AO"), + MTK_FUNCTION(3, "SSPM_UTXD_AO_VLP"), + MTK_FUNCTION(4, "PWM_vlp"), + MTK_FUNCTION(6, "SRCLKENAI0") + ), + + MTK_PIN( + 24, "GPIO24", + MTK_EINT_FUNCTION(0, 24), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO24"), + MTK_FUNCTION(1, "CMMCLK2"), + MTK_FUNCTION(2, "TP_GPIO6_AO"), + MTK_FUNCTION(3, "SSPM_URXD_AO_VLP"), + MTK_FUNCTION(4, "WAKEN"), + MTK_FUNCTION(5, "SPMI_P_TRIG_FLAG"), + MTK_FUNCTION(6, "SRCLKENAI1") + ), + + MTK_PIN( + 25, "GPIO25", + MTK_EINT_FUNCTION(0, 25), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO25"), + MTK_FUNCTION(1, "LCM_RST"), + MTK_FUNCTION(2, "DP_TX_HPD"), + MTK_FUNCTION(3, "CMFLASH3"), + MTK_FUNCTION(4, "MD32_0_GPIO0"), + MTK_FUNCTION(5, "USB_DRVVBUS_2P") + ), + + MTK_PIN( + 26, "GPIO26", + MTK_EINT_FUNCTION(0, 26), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO26"), + MTK_FUNCTION(1, "DSI_TE"), + MTK_FUNCTION(2, "EDP_TX_HPD"), + MTK_FUNCTION(3, "CMVREF3"), + MTK_FUNCTION(4, "MD32_1_GPIO0"), + MTK_FUNCTION(5, "USB_DRVVBUS_3P") + ), + + MTK_PIN( + 27, "GPIO27", + MTK_EINT_FUNCTION(0, 27), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO27"), + MTK_FUNCTION(1, "DP_TX_HPD"), + MTK_FUNCTION(2, "mbistreaden_trigger"), + MTK_FUNCTION(3, "MD32_0_GPIO0"), + MTK_FUNCTION(4, "TP_UCTS1_VCORE"), + MTK_FUNCTION(5, "CMVREF4"), + MTK_FUNCTION(6, "EXTIF0_ACT"), + MTK_FUNCTION(7, "ANT_SEL0") + ), + + MTK_PIN( + 28, "GPIO28", + MTK_EINT_FUNCTION(0, 28), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO28"), + MTK_FUNCTION(1, "EDP_TX_HPD"), + MTK_FUNCTION(2, "mbistwriteen_trigger"), + MTK_FUNCTION(3, "MD32_1_GPIO0"), + MTK_FUNCTION(4, "TP_URTS1_VCORE"), + MTK_FUNCTION(6, "EXTIF0_PRI"), + MTK_FUNCTION(7, "ANT_SEL1") + ), + + MTK_PIN( + 29, "GPIO29", + MTK_EINT_FUNCTION(0, 29), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO29"), + MTK_FUNCTION(1, "DISP_PWM0"), + MTK_FUNCTION(2, "MD32_1_TXD"), + MTK_FUNCTION(3, "SSPM_UTXD_AO_VCORE"), + MTK_FUNCTION(5, "USB_DRVVBUS_4P") + ), + + MTK_PIN( + 30, "GPIO30", + MTK_EINT_FUNCTION(0, 30), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO30"), + MTK_FUNCTION(1, "DISP_PWM1"), + MTK_FUNCTION(2, "MD32_1_RXD"), + MTK_FUNCTION(3, "SSPM_URXD_AO_VCORE"), + MTK_FUNCTION(5, "PMSR_SMAP"), + MTK_FUNCTION(6, "EXTIF0_GNT_B"), + MTK_FUNCTION(7, "ANT_SEL2") + ), + + MTK_PIN( + 31, "GPIO31", + MTK_EINT_FUNCTION(0, 31), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO31"), + MTK_FUNCTION(1, "UTXD0"), + MTK_FUNCTION(2, "MD32_0_TXD") + ), + + MTK_PIN( + 32, "GPIO32", + MTK_EINT_FUNCTION(0, 32), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO32"), + MTK_FUNCTION(1, "URXD0"), + MTK_FUNCTION(2, "MD32_0_RXD") + ), + + MTK_PIN( + 33, "GPIO33", + MTK_EINT_FUNCTION(0, 33), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO33"), + MTK_FUNCTION(1, "UTXD1"), + MTK_FUNCTION(2, "VADSP_UTXD0"), + MTK_FUNCTION(3, "TP_UTXD1_VLP"), + MTK_FUNCTION(4, "MD32_1_TXD"), + MTK_FUNCTION(5, "CONN_BGF_UART0_TXD"), + MTK_FUNCTION(6, "CONN_WIFI_TXD") + ), + + MTK_PIN( + 34, "GPIO34", + MTK_EINT_FUNCTION(0, 34), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO34"), + MTK_FUNCTION(1, "URXD1"), + MTK_FUNCTION(2, "VADSP_URXD0"), + MTK_FUNCTION(3, "TP_URXD1_VLP"), + MTK_FUNCTION(4, "MD32_1_RXD"), + MTK_FUNCTION(5, "CONN_BGF_UART0_RXD") + ), + + MTK_PIN( + 35, "GPIO35", + MTK_EINT_FUNCTION(0, 35), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO35"), + MTK_FUNCTION(1, "UTXD2"), + MTK_FUNCTION(2, "UCTS1"), + MTK_FUNCTION(3, "TP_UCTS1_VLP"), + MTK_FUNCTION(4, "SSPM_UTXD_AO_VLP"), + MTK_FUNCTION(5, "VADSP_UTXD0"), + MTK_FUNCTION(6, "CONN_BT_TXD") + ), + + MTK_PIN( + 36, "GPIO36", + MTK_EINT_FUNCTION(0, 36), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO36"), + MTK_FUNCTION(1, "URXD2"), + MTK_FUNCTION(2, "URTS1"), + MTK_FUNCTION(3, "TP_URTS1_VLP"), + MTK_FUNCTION(4, "SSPM_URXD_AO_VLP"), + MTK_FUNCTION(5, "VADSP_URXD0") + ), + + MTK_PIN( + 37, "GPIO37", + MTK_EINT_FUNCTION(0, 37), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO37"), + MTK_FUNCTION(1, "UTXD3"), + MTK_FUNCTION(2, "UCTS0"), + MTK_FUNCTION(3, "TP_UTXD1_VCORE"), + MTK_FUNCTION(4, "SSPM_UTXD_AO_VCORE"), + MTK_FUNCTION(6, "MD32_0_TXD"), + MTK_FUNCTION(7, "CONN_BGF_UART0_TXD") + ), + + MTK_PIN( + 38, "GPIO38", + MTK_EINT_FUNCTION(0, 38), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO38"), + MTK_FUNCTION(1, "URXD3"), + MTK_FUNCTION(2, "URTS0"), + MTK_FUNCTION(3, "TP_URXD1_VCORE"), + MTK_FUNCTION(4, "SSPM_URXD_AO_VCORE"), + MTK_FUNCTION(6, "MD32_0_RXD"), + MTK_FUNCTION(7, "CONN_BGF_UART0_RXD") + ), + + MTK_PIN( + 39, "GPIO39", + MTK_EINT_FUNCTION(0, 39), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO39"), + MTK_FUNCTION(1, "JTMS_SEL1") + ), + + MTK_PIN( + 40, "GPIO40", + MTK_EINT_FUNCTION(0, 40), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO40"), + MTK_FUNCTION(1, "JTCK_SEL1") + ), + + MTK_PIN( + 41, "GPIO41", + MTK_EINT_FUNCTION(0, 41), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO41"), + MTK_FUNCTION(1, "JTDI_SEL1") + ), + + MTK_PIN( + 42, "GPIO42", + MTK_EINT_FUNCTION(0, 42), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO42"), + MTK_FUNCTION(1, "JTDO_SEL1") + ), + + MTK_PIN( + 43, "GPIO43", + MTK_EINT_FUNCTION(0, 43), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO43"), + MTK_FUNCTION(1, "JTRSTn_SEL1") + ), + + MTK_PIN( + 44, "GPIO44", + MTK_EINT_FUNCTION(0, 44), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO44"), + MTK_FUNCTION(1, "KPCOL0") + ), + + MTK_PIN( + 45, "GPIO45", + MTK_EINT_FUNCTION(0, 45), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO45"), + MTK_FUNCTION(1, "KPCOL1"), + MTK_FUNCTION(2, "TP_GPIO0_AO"), + MTK_FUNCTION(3, "SRCLKENAI1"), + MTK_FUNCTION(7, "DBG_MON_A31") + ), + + MTK_PIN( + 46, "GPIO46", + MTK_EINT_FUNCTION(0, 46), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO46"), + MTK_FUNCTION(1, "KPROW0"), + MTK_FUNCTION(2, "TP_GPIO1_AO") + ), + + MTK_PIN( + 47, "GPIO47", + MTK_EINT_FUNCTION(0, 47), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO47"), + MTK_FUNCTION(1, "KPROW1"), + MTK_FUNCTION(2, "TP_GPIO2_AO"), + MTK_FUNCTION(3, "SRCLKENAI0"), + MTK_FUNCTION(7, "DBG_MON_A32") + ), + + MTK_PIN( + 48, "GPIO48", + MTK_EINT_FUNCTION(0, 48), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO48"), + MTK_FUNCTION(1, "WAKEN"), + MTK_FUNCTION(2, "TP_GPIO3_AO"), + MTK_FUNCTION(3, "SPMI_P_TRIG_FLAG") + ), + + MTK_PIN( + 49, "GPIO49", + MTK_EINT_FUNCTION(0, 49), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO49"), + MTK_FUNCTION(1, "PERSTN"), + MTK_FUNCTION(2, "MD32_0_GPIO0"), + MTK_FUNCTION(3, "UFS_MPHY_SCL"), + MTK_FUNCTION(7, "ANT_SEL3") + ), + + MTK_PIN( + 50, "GPIO50", + MTK_EINT_FUNCTION(0, 50), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO50"), + MTK_FUNCTION(1, "CLKREQN"), + MTK_FUNCTION(2, "MD32_1_GPIO0"), + MTK_FUNCTION(3, "UFS_MPHY_SDA"), + MTK_FUNCTION(7, "ANT_SEL4") + ), + + MTK_PIN( + 51, "GPIO51", + MTK_EINT_FUNCTION(0, 51), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO51"), + MTK_FUNCTION(1, "SCP_SCL0"), + MTK_FUNCTION(2, "SCL0") + ), + + MTK_PIN( + 52, "GPIO52", + MTK_EINT_FUNCTION(0, 52), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO52"), + MTK_FUNCTION(1, "SCP_SDA0"), + MTK_FUNCTION(2, "SDA0") + ), + + MTK_PIN( + 53, "GPIO53", + MTK_EINT_FUNCTION(0, 53), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO53"), + MTK_FUNCTION(1, "SCP_SCL1"), + MTK_FUNCTION(2, "SCL1") + ), + + MTK_PIN( + 54, "GPIO54", + MTK_EINT_FUNCTION(0, 54), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO54"), + MTK_FUNCTION(1, "SCP_SDA1"), + MTK_FUNCTION(2, "SDA1") + ), + + MTK_PIN( + 55, "GPIO55", + MTK_EINT_FUNCTION(0, 55), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO55"), + MTK_FUNCTION(1, "SCL2"), + MTK_FUNCTION(2, "UFS_MPHY_SCL"), + MTK_FUNCTION(3, "SSUSB_U2SIF_SCL") + ), + + MTK_PIN( + 56, "GPIO56", + MTK_EINT_FUNCTION(0, 56), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO56"), + MTK_FUNCTION(1, "SDA2"), + MTK_FUNCTION(2, "UFS_MPHY_SDA"), + MTK_FUNCTION(3, "SSUSB_U2SIF_SDA") + ), + + MTK_PIN( + 57, "GPIO57", + MTK_EINT_FUNCTION(0, 57), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO57"), + MTK_FUNCTION(1, "SCL3"), + MTK_FUNCTION(2, "PCIE_PHY_I2C_SCL"), + MTK_FUNCTION(3, "SSUSB_U2SIF_SCL_1P") + ), + + MTK_PIN( + 58, "GPIO58", + MTK_EINT_FUNCTION(0, 58), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO58"), + MTK_FUNCTION(1, "SDA3"), + MTK_FUNCTION(2, "PCIE_PHY_I2C_SDA"), + MTK_FUNCTION(3, "SSUSB_U2SIF_SDA_1P") + ), + + MTK_PIN( + 59, "GPIO59", + MTK_EINT_FUNCTION(0, 59), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO59"), + MTK_FUNCTION(1, "SCL4"), + MTK_FUNCTION(2, "SSUSB_U3PHY_I2C_SCL") + ), + + MTK_PIN( + 60, "GPIO60", + MTK_EINT_FUNCTION(0, 60), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO60"), + MTK_FUNCTION(1, "SDA4"), + MTK_FUNCTION(2, "SSUSB_U3PHY_I2C_SDA") + ), + + MTK_PIN( + 61, "GPIO61", + MTK_EINT_FUNCTION(0, 61), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO61"), + MTK_FUNCTION(1, "SCL5"), + MTK_FUNCTION(2, "SSPXTP_U3PHY_I2C_SCL") + ), + + MTK_PIN( + 62, "GPIO62", + MTK_EINT_FUNCTION(0, 62), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO62"), + MTK_FUNCTION(1, "SDA5"), + MTK_FUNCTION(2, "SSPXTP_U3PHY_I2C_SDA") + ), + + MTK_PIN( + 63, "GPIO63", + MTK_EINT_FUNCTION(0, 63), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO63"), + MTK_FUNCTION(1, "SCL6") + ), + + MTK_PIN( + 64, "GPIO64", + MTK_EINT_FUNCTION(0, 64), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO64"), + MTK_FUNCTION(1, "SDA6") + ), + + MTK_PIN( + 65, "GPIO65", + MTK_EINT_FUNCTION(0, 65), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO65"), + MTK_FUNCTION(1, "SCL7") + ), + + MTK_PIN( + 66, "GPIO66", + MTK_EINT_FUNCTION(0, 66), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO66"), + MTK_FUNCTION(1, "SDA7") + ), + + MTK_PIN( + 67, "GPIO67", + MTK_EINT_FUNCTION(0, 67), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO67"), + MTK_FUNCTION(1, "SCL8") + ), + + MTK_PIN( + 68, "GPIO68", + MTK_EINT_FUNCTION(0, 68), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO68"), + MTK_FUNCTION(1, "SDA8") + ), + + MTK_PIN( + 69, "GPIO69", + MTK_EINT_FUNCTION(0, 69), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO69"), + MTK_FUNCTION(1, "SPIM0_CSB"), + MTK_FUNCTION(2, "SCP_SPI0_CS"), + MTK_FUNCTION(3, "SPM_JTAG_TMS_VCORE"), + MTK_FUNCTION(4, "VADSP_JTAG0_TMS"), + MTK_FUNCTION(5, "SPM_JTAG_TMS"), + MTK_FUNCTION(6, "SSPM_JTAG_TMS_VLP"), + MTK_FUNCTION(7, "SCP_JTAG0_TMS_VLP") + ), + + MTK_PIN( + 70, "GPIO70", + MTK_EINT_FUNCTION(0, 70), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO70"), + MTK_FUNCTION(1, "SPIM0_CLK"), + MTK_FUNCTION(2, "SCP_SPI0_CK"), + MTK_FUNCTION(3, "SPM_JTAG_TCK_VCORE"), + MTK_FUNCTION(4, "VADSP_JTAG0_TCK"), + MTK_FUNCTION(5, "SPM_JTAG_TCK"), + MTK_FUNCTION(6, "SSPM_JTAG_TCK_VLP"), + MTK_FUNCTION(7, "SCP_JTAG0_TCK_VLP") + ), + + MTK_PIN( + 71, "GPIO71", + MTK_EINT_FUNCTION(0, 71), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO71"), + MTK_FUNCTION(1, "SPIM0_MO"), + MTK_FUNCTION(2, "SCP_SPI0_MO"), + MTK_FUNCTION(3, "SPM_JTAG_TDI_VCORE"), + MTK_FUNCTION(4, "VADSP_JTAG0_TDI"), + MTK_FUNCTION(5, "SPM_JTAG_TDI"), + MTK_FUNCTION(6, "SSPM_JTAG_TDI_VLP"), + MTK_FUNCTION(7, "SCP_JTAG0_TDI_VLP") + ), + + MTK_PIN( + 72, "GPIO72", + MTK_EINT_FUNCTION(0, 72), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO72"), + MTK_FUNCTION(1, "SPIM0_MI"), + MTK_FUNCTION(2, "SCP_SPI0_MI"), + MTK_FUNCTION(3, "SPM_JTAG_TDO_VCORE"), + MTK_FUNCTION(4, "VADSP_JTAG0_TDO"), + MTK_FUNCTION(5, "SPM_JTAG_TDO"), + MTK_FUNCTION(6, "SSPM_JTAG_TDO_VLP"), + MTK_FUNCTION(7, "SCP_JTAG0_TDO_VLP") + ), + + MTK_PIN( + 73, "GPIO73", + MTK_EINT_FUNCTION(0, 73), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO73"), + MTK_FUNCTION(1, "SPIM1_CSB"), + MTK_FUNCTION(2, "SCP_SPI1_CS"), + MTK_FUNCTION(3, "SPM_JTAG_TRSTN_VCORE"), + MTK_FUNCTION(4, "VADSP_JTAG0_TRSTN"), + MTK_FUNCTION(5, "SPM_JTAG_TRSTN"), + MTK_FUNCTION(6, "SSPM_JTAG_TRSTN_VLP"), + MTK_FUNCTION(7, "SCP_JTAG0_TRSTN_VLP") + ), + + MTK_PIN( + 74, "GPIO74", + MTK_EINT_FUNCTION(0, 74), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO74"), + MTK_FUNCTION(1, "SPIM1_CLK"), + MTK_FUNCTION(2, "SCP_SPI1_CK") + ), + + MTK_PIN( + 75, "GPIO75", + MTK_EINT_FUNCTION(0, 75), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO75"), + MTK_FUNCTION(1, "SPIM1_MO"), + MTK_FUNCTION(2, "SCP_SPI1_MO") + ), + + MTK_PIN( + 76, "GPIO76", + MTK_EINT_FUNCTION(0, 76), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO76"), + MTK_FUNCTION(1, "SPIM1_MI"), + MTK_FUNCTION(2, "SCP_SPI1_MI") + ), + + MTK_PIN( + 77, "GPIO77", + MTK_EINT_FUNCTION(0, 77), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO77"), + MTK_FUNCTION(1, "SPIM2_CSB"), + MTK_FUNCTION(2, "PCM0_SYNC"), + MTK_FUNCTION(3, "SSUSB_U2SIF_SCL"), + MTK_FUNCTION(7, "DBG_MON_A27") + ), + + MTK_PIN( + 78, "GPIO78", + MTK_EINT_FUNCTION(0, 78), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO78"), + MTK_FUNCTION(1, "SPIM2_CLK"), + MTK_FUNCTION(2, "PCM0_CLK"), + MTK_FUNCTION(3, "SSUSB_U2SIF_SDA"), + MTK_FUNCTION(7, "DBG_MON_A28") + ), + + MTK_PIN( + 79, "GPIO79", + MTK_EINT_FUNCTION(0, 79), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO79"), + MTK_FUNCTION(1, "SPIM2_MO"), + MTK_FUNCTION(2, "PCM0_DO"), + MTK_FUNCTION(3, "SSUSB_U2SIF_SCL_1P"), + MTK_FUNCTION(7, "DBG_MON_A29") + ), + + MTK_PIN( + 80, "GPIO80", + MTK_EINT_FUNCTION(0, 80), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO80"), + MTK_FUNCTION(1, "SPIM2_MI"), + MTK_FUNCTION(2, "PCM0_DI"), + MTK_FUNCTION(3, "SSUSB_U2SIF_SDA_1P"), + MTK_FUNCTION(7, "DBG_MON_A30") + ), + + MTK_PIN( + 81, "GPIO81", + MTK_EINT_FUNCTION(0, 81), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO81"), + MTK_FUNCTION(1, "IDDIG"), + MTK_FUNCTION(7, "DBG_MON_B32") + ), + + MTK_PIN( + 82, "GPIO82", + MTK_EINT_FUNCTION(0, 82), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO82"), + MTK_FUNCTION(1, "USB_DRVVBUS") + ), + + MTK_PIN( + 83, "GPIO83", + MTK_EINT_FUNCTION(0, 83), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO83"), + MTK_FUNCTION(1, "VBUSVALID") + ), + + MTK_PIN( + 84, "GPIO84", + MTK_EINT_FUNCTION(0, 84), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO84"), + MTK_FUNCTION(1, "USB_DRVVBUS_1P"), + MTK_FUNCTION(7, "DBG_MON_A16") + ), + + MTK_PIN( + 85, "GPIO85", + MTK_EINT_FUNCTION(0, 85), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO85"), + MTK_FUNCTION(1, "USB_DRVVBUS_2P"), + MTK_FUNCTION(7, "DBG_MON_A17") + ), + + MTK_PIN( + 86, "GPIO86", + MTK_EINT_FUNCTION(0, 86), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO86"), + MTK_FUNCTION(1, "USB_DRVVBUS_3P"), + MTK_FUNCTION(7, "DBG_MON_A18") + ), + + MTK_PIN( + 87, "GPIO87", + MTK_EINT_FUNCTION(0, 87), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO87"), + MTK_FUNCTION(1, "USB_DRVVBUS_4P"), + MTK_FUNCTION(6, "CMVREF4"), + MTK_FUNCTION(7, "DBG_MON_A19") + ), + + MTK_PIN( + 88, "GPIO88", + MTK_EINT_FUNCTION(0, 88), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO88"), + MTK_FUNCTION(1, "PWRAP_SPI0_CSN") + ), + + MTK_PIN( + 89, "GPIO89", + MTK_EINT_FUNCTION(0, 89), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO89"), + MTK_FUNCTION(1, "PWRAP_SPI0_CK") + ), + + MTK_PIN( + 90, "GPIO90", + MTK_EINT_FUNCTION(0, 90), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO90"), + MTK_FUNCTION(1, "PWRAP_SPI0_MO") + ), + + MTK_PIN( + 91, "GPIO91", + MTK_EINT_FUNCTION(0, 91), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO91"), + MTK_FUNCTION(1, "PWRAP_SPI0_MI") + ), + + MTK_PIN( + 92, "GPIO92", + MTK_EINT_FUNCTION(0, 92), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO92"), + MTK_FUNCTION(1, "SRCLKENA0") + ), + + MTK_PIN( + 93, "GPIO93", + MTK_EINT_FUNCTION(0, 93), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO93"), + MTK_FUNCTION(1, "SRCLKENA1") + ), + + MTK_PIN( + 94, "GPIO94", + MTK_EINT_FUNCTION(0, 94), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO94"), + MTK_FUNCTION(1, "SCP_VREQ_VAO") + ), + + MTK_PIN( + 95, "GPIO95", + MTK_EINT_FUNCTION(0, 95), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO95"), + MTK_FUNCTION(1, "RTC32K_CK") + ), + + MTK_PIN( + 96, "GPIO96", + MTK_EINT_FUNCTION(0, 96), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO96"), + MTK_FUNCTION(1, "WATCHDOG") + ), + + MTK_PIN( + 97, "GPIO97", + MTK_EINT_FUNCTION(0, 97), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO97"), + MTK_FUNCTION(1, "AUD_CLK_MOSI") + ), + + MTK_PIN( + 98, "GPIO98", + MTK_EINT_FUNCTION(0, 98), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO98"), + MTK_FUNCTION(1, "AUD_SYNC_MOSI") + ), + + MTK_PIN( + 99, "GPIO99", + MTK_EINT_FUNCTION(0, 99), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO99"), + MTK_FUNCTION(1, "AUD_DAT_MOSI0") + ), + + MTK_PIN( + 100, "GPIO100", + MTK_EINT_FUNCTION(0, 100), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO100"), + MTK_FUNCTION(1, "AUD_DAT_MOSI1") + ), + + MTK_PIN( + 101, "GPIO101", + MTK_EINT_FUNCTION(0, 101), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO101"), + MTK_FUNCTION(1, "AUD_DAT_MISO0") + ), + + MTK_PIN( + 102, "GPIO102", + MTK_EINT_FUNCTION(0, 102), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO102"), + MTK_FUNCTION(1, "AUD_DAT_MISO1") + ), + + MTK_PIN( + 103, "GPIO103", + MTK_EINT_FUNCTION(0, 103), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO103"), + MTK_FUNCTION(1, "I2SIN0_MCK"), + MTK_FUNCTION(2, "SPIM3_B_CSB"), + MTK_FUNCTION(3, "APU_JTAG_TMS"), + MTK_FUNCTION(4, "SCP_JTAG0_TMS_VCORE"), + MTK_FUNCTION(5, "CONN_WF_MCU_TMS"), + MTK_FUNCTION(6, "SSPM_JTAG_TMS_VCORE"), + MTK_FUNCTION(7, "IPU_JTAG_TMS") + ), + + MTK_PIN( + 104, "GPIO104", + MTK_EINT_FUNCTION(0, 104), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO104"), + MTK_FUNCTION(1, "I2SIN0_BCK"), + MTK_FUNCTION(2, "SPIM3_B_CLK"), + MTK_FUNCTION(3, "APU_JTAG_TCK"), + MTK_FUNCTION(4, "SCP_JTAG0_TCK_VCORE"), + MTK_FUNCTION(5, "CONN_WF_MCU_TCK"), + MTK_FUNCTION(6, "SSPM_JTAG_TCK_VCORE"), + MTK_FUNCTION(7, "IPU_JTAG_TCK") + ), + + MTK_PIN( + 105, "GPIO105", + MTK_EINT_FUNCTION(0, 105), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO105"), + MTK_FUNCTION(1, "I2SIN0_LRCK"), + MTK_FUNCTION(2, "SPIM3_B_MO"), + MTK_FUNCTION(3, "APU_JTAG_TDI"), + MTK_FUNCTION(4, "SCP_JTAG0_TDI_VCORE"), + MTK_FUNCTION(5, "CONN_WF_MCU_TDI"), + MTK_FUNCTION(6, "SSPM_JTAG_TDI_VCORE"), + MTK_FUNCTION(7, "IPU_JTAG_TDI") + ), + + MTK_PIN( + 106, "GPIO106", + MTK_EINT_FUNCTION(0, 106), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO106"), + MTK_FUNCTION(1, "I2SIN0_DI"), + MTK_FUNCTION(2, "SPIM3_B_MI"), + MTK_FUNCTION(3, "APU_JTAG_TDO"), + MTK_FUNCTION(4, "SCP_JTAG0_TDO_VCORE"), + MTK_FUNCTION(5, "CONN_WF_MCU_TDO"), + MTK_FUNCTION(6, "SSPM_JTAG_TDO_VCORE"), + MTK_FUNCTION(7, "IPU_JTAG_TDO") + ), + + MTK_PIN( + 107, "GPIO107", + MTK_EINT_FUNCTION(0, 107), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO107"), + MTK_FUNCTION(1, "I2SOUT0_MCK"), + MTK_FUNCTION(2, "SPIM4_B_CSB"), + MTK_FUNCTION(3, "APU_JTAG_TRST"), + MTK_FUNCTION(4, "SCP_JTAG0_TRSTN_VCORE"), + MTK_FUNCTION(5, "CONN_WF_MCU_TRST_B"), + MTK_FUNCTION(6, "SSPM_JTAG_TRSTN_VCORE"), + MTK_FUNCTION(7, "IPU_JTAG_TRST") + ), + + MTK_PIN( + 108, "GPIO108", + MTK_EINT_FUNCTION(0, 108), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO108"), + MTK_FUNCTION(1, "I2SOUT0_BCK"), + MTK_FUNCTION(2, "SPIM4_B_CLK"), + MTK_FUNCTION(3, "EXTIF0_ACT"), + MTK_FUNCTION(4, "SPM_JTAG_TMS_VCORE"), + MTK_FUNCTION(6, "CLKM2"), + MTK_FUNCTION(7, "DBG_MON_A20") + ), + + MTK_PIN( + 109, "GPIO109", + MTK_EINT_FUNCTION(0, 109), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO109"), + MTK_FUNCTION(1, "I2SOUT0_LRCK"), + MTK_FUNCTION(2, "SPIM4_B_MO"), + MTK_FUNCTION(3, "EXTIF0_PRI"), + MTK_FUNCTION(4, "SPM_JTAG_TCK_VCORE"), + MTK_FUNCTION(6, "CLKM3"), + MTK_FUNCTION(7, "DBG_MON_A21") + ), + + MTK_PIN( + 110, "GPIO110", + MTK_EINT_FUNCTION(0, 110), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO110"), + MTK_FUNCTION(1, "I2SOUT0_DO"), + MTK_FUNCTION(2, "SPIM4_B_MI"), + MTK_FUNCTION(3, "EXTIF0_GNT_B"), + MTK_FUNCTION(4, "SPM_JTAG_TDI_VCORE"), + MTK_FUNCTION(7, "DBG_MON_A22") + ), + + MTK_PIN( + 111, "GPIO111", + MTK_EINT_FUNCTION(0, 111), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO111"), + MTK_FUNCTION(1, "DMIC0_CLK"), + MTK_FUNCTION(2, "I2SIN1_MCK"), + MTK_FUNCTION(3, "I2SOUT1_MCK"), + MTK_FUNCTION(4, "SPM_JTAG_TDO_VCORE"), + MTK_FUNCTION(6, "CONN_MIPI0_SDATA"), + MTK_FUNCTION(7, "DBG_MON_A23") + ), + + MTK_PIN( + 112, "GPIO112", + MTK_EINT_FUNCTION(0, 112), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO112"), + MTK_FUNCTION(1, "DMIC0_DAT0"), + MTK_FUNCTION(2, "I2SIN1_BCK"), + MTK_FUNCTION(3, "I2SOUT1_BCK"), + MTK_FUNCTION(4, "SPM_JTAG_TRSTN_VCORE"), + MTK_FUNCTION(6, "CONN_MIPI0_SCLK"), + MTK_FUNCTION(7, "DBG_MON_A24") + ), + + MTK_PIN( + 113, "GPIO113", + MTK_EINT_FUNCTION(0, 113), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO113"), + MTK_FUNCTION(1, "DMIC1_CLK"), + MTK_FUNCTION(2, "I2SIN1_LRCK"), + MTK_FUNCTION(3, "I2SOUT1_LRCK"), + MTK_FUNCTION(4, "PMSR_SMAP"), + MTK_FUNCTION(6, "CONN_MIPI1_SDATA"), + MTK_FUNCTION(7, "DBG_MON_A25") + ), + + MTK_PIN( + 114, "GPIO114", + MTK_EINT_FUNCTION(0, 114), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO114"), + MTK_FUNCTION(1, "DMIC1_DAT0"), + MTK_FUNCTION(2, "I2SIN1_DI"), + MTK_FUNCTION(3, "I2SOUT1_DO"), + MTK_FUNCTION(6, "CONN_MIPI1_SCLK"), + MTK_FUNCTION(7, "DBG_MON_A26") + ), + + MTK_PIN( + 115, "GPIO115", + MTK_EINT_FUNCTION(0, 115), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO115"), + MTK_FUNCTION(1, "PCM0_CLK"), + MTK_FUNCTION(2, "USB_DRVVBUS_1P"), + MTK_FUNCTION(3, "PCIE_PHY_I2C_SCL"), + MTK_FUNCTION(4, "SSUSB_U3PHY_I2C_SCL"), + MTK_FUNCTION(6, "CMFLASH0"), + MTK_FUNCTION(7, "EXTIF0_ACT") + ), + + MTK_PIN( + 116, "GPIO116", + MTK_EINT_FUNCTION(0, 116), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO116"), + MTK_FUNCTION(1, "PCM0_SYNC"), + MTK_FUNCTION(2, "USB_DRVVBUS_2P"), + MTK_FUNCTION(3, "PCIE_PHY_I2C_SDA"), + MTK_FUNCTION(4, "SSUSB_U3PHY_I2C_SDA"), + MTK_FUNCTION(6, "CMFLASH1"), + MTK_FUNCTION(7, "EXTIF0_PRI") + ), + + MTK_PIN( + 117, "GPIO117", + MTK_EINT_FUNCTION(0, 117), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO117"), + MTK_FUNCTION(1, "PCM0_DI"), + MTK_FUNCTION(2, "USB_DRVVBUS_3P"), + MTK_FUNCTION(3, "DP_TX_HPD"), + MTK_FUNCTION(4, "SSPXTP_U3PHY_I2C_SCL"), + MTK_FUNCTION(6, "CMVREF0"), + MTK_FUNCTION(7, "EXTIF0_GNT_B") + ), + + MTK_PIN( + 118, "GPIO118", + MTK_EINT_FUNCTION(0, 118), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO118"), + MTK_FUNCTION(1, "PCM0_DO"), + MTK_FUNCTION(2, "USB_DRVVBUS_4P"), + MTK_FUNCTION(3, "EDP_TX_HPD"), + MTK_FUNCTION(4, "SSPXTP_U3PHY_I2C_SDA"), + MTK_FUNCTION(6, "CMVREF1") + ), + + MTK_PIN( + 119, "GPIO119", + MTK_EINT_FUNCTION(0, 119), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO119"), + MTK_FUNCTION(1, "GBE_TXD3"), + MTK_FUNCTION(2, "DMIC0_CLK"), + MTK_FUNCTION(3, "LVTS_FOUT"), + MTK_FUNCTION(4, "CONN_BGF_MCU_TMS"), + MTK_FUNCTION(5, "UDI_TMS"), + MTK_FUNCTION(6, "ANT_SEL5"), + MTK_FUNCTION(7, "DBG_MON_B0") + ), + + MTK_PIN( + 120, "GPIO120", + MTK_EINT_FUNCTION(0, 120), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO120"), + MTK_FUNCTION(1, "GBE_TXD2"), + MTK_FUNCTION(2, "DMIC0_DAT0"), + MTK_FUNCTION(3, "LVTS_SDO"), + MTK_FUNCTION(4, "CONN_BGF_MCU_TCK"), + MTK_FUNCTION(5, "UDI_TCK"), + MTK_FUNCTION(6, "ANT_SEL6"), + MTK_FUNCTION(7, "DBG_MON_B1") + ), + + MTK_PIN( + 121, "GPIO121", + MTK_EINT_FUNCTION(0, 121), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO121"), + MTK_FUNCTION(1, "GBE_TXD1"), + MTK_FUNCTION(2, "DMIC0_DAT1"), + MTK_FUNCTION(3, "LVTS_26M"), + MTK_FUNCTION(4, "CONN_BGF_MCU_TDI"), + MTK_FUNCTION(5, "UDI_TDI"), + MTK_FUNCTION(6, "ANT_SEL7"), + MTK_FUNCTION(7, "DBG_MON_B2") + ), + + MTK_PIN( + 122, "GPIO122", + MTK_EINT_FUNCTION(0, 122), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO122"), + MTK_FUNCTION(1, "GBE_TXD0"), + MTK_FUNCTION(2, "DMIC1_CLK"), + MTK_FUNCTION(3, "LVTS_SCF"), + MTK_FUNCTION(4, "CONN_BGF_MCU_TDO"), + MTK_FUNCTION(5, "UDI_TDO"), + MTK_FUNCTION(6, "ANT_SEL8"), + MTK_FUNCTION(7, "DBG_MON_B3") + ), + + MTK_PIN( + 123, "GPIO123", + MTK_EINT_FUNCTION(0, 123), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO123"), + MTK_FUNCTION(1, "GBE_RXD3"), + MTK_FUNCTION(2, "DMIC1_DAT0"), + MTK_FUNCTION(3, "LVTS_SCK"), + MTK_FUNCTION(4, "CONN_BGF_MCU_TRST_B"), + MTK_FUNCTION(5, "UDI_NTRST"), + MTK_FUNCTION(6, "ANT_SEL9"), + MTK_FUNCTION(7, "DBG_MON_B4") + ), + + MTK_PIN( + 124, "GPIO124", + MTK_EINT_FUNCTION(0, 124), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO124"), + MTK_FUNCTION(1, "GBE_RXD2"), + MTK_FUNCTION(2, "DMIC1_DAT1"), + MTK_FUNCTION(3, "LVTS_SDI"), + MTK_FUNCTION(4, "CONN_WF_MCU_TMS"), + MTK_FUNCTION(5, "SCP_JTAG0_TMS_VCORE"), + MTK_FUNCTION(6, "ANT_SEL10"), + MTK_FUNCTION(7, "DBG_MON_B5") + ), + + MTK_PIN( + 125, "GPIO125", + MTK_EINT_FUNCTION(0, 125), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO125"), + MTK_FUNCTION(1, "GBE_RXD1"), + MTK_FUNCTION(2, "CLKM2"), + MTK_FUNCTION(4, "CONN_WF_MCU_TCK"), + MTK_FUNCTION(5, "SCP_JTAG0_TCK_VCORE"), + MTK_FUNCTION(6, "ANT_SEL11"), + MTK_FUNCTION(7, "DBG_MON_B6") + ), + + MTK_PIN( + 126, "GPIO126", + MTK_EINT_FUNCTION(0, 126), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO126"), + MTK_FUNCTION(1, "GBE_RXD0"), + MTK_FUNCTION(2, "CLKM3"), + MTK_FUNCTION(4, "CONN_WF_MCU_TDI"), + MTK_FUNCTION(5, "SCP_JTAG0_TDI_VCORE"), + MTK_FUNCTION(6, "ANT_SEL12"), + MTK_FUNCTION(7, "DBG_MON_B7") + ), + + MTK_PIN( + 127, "GPIO127", + MTK_EINT_FUNCTION(0, 127), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO127"), + MTK_FUNCTION(1, "GBE_TXC"), + MTK_FUNCTION(2, "I2SIN1_MCK"), + MTK_FUNCTION(4, "CONN_WF_MCU_TDO"), + MTK_FUNCTION(5, "SCP_JTAG0_TDO_VCORE"), + MTK_FUNCTION(6, "ANT_SEL13"), + MTK_FUNCTION(7, "DBG_MON_B8") + ), + + MTK_PIN( + 128, "GPIO128", + MTK_EINT_FUNCTION(0, 128), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO128"), + MTK_FUNCTION(1, "GBE_RXC"), + MTK_FUNCTION(2, "I2SIN1_BCK"), + MTK_FUNCTION(4, "CONN_WF_MCU_TRST_B"), + MTK_FUNCTION(5, "SCP_JTAG0_TRSTN_VCORE"), + MTK_FUNCTION(6, "ANT_SEL14"), + MTK_FUNCTION(7, "DBG_MON_B9") + ), + + MTK_PIN( + 129, "GPIO129", + MTK_EINT_FUNCTION(0, 129), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO129"), + MTK_FUNCTION(1, "GBE_RXDV"), + MTK_FUNCTION(2, "I2SIN1_LRCK"), + MTK_FUNCTION(4, "CONN_BGF_MCU_AICE_TMSC"), + MTK_FUNCTION(5, "IPU_JTAG_TMS"), + MTK_FUNCTION(6, "ANT_SEL15"), + MTK_FUNCTION(7, "DBG_MON_B10") + ), + + MTK_PIN( + 130, "GPIO130", + MTK_EINT_FUNCTION(0, 130), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO130"), + MTK_FUNCTION(1, "GBE_TXEN"), + MTK_FUNCTION(2, "I2SIN1_DI"), + MTK_FUNCTION(4, "CONN_BGF_MCU_AICE_TCKC"), + MTK_FUNCTION(5, "IPU_JTAG_TCK"), + MTK_FUNCTION(6, "ANT_SEL16"), + MTK_FUNCTION(7, "DBG_MON_B11") + ), + + MTK_PIN( + 131, "GPIO131", + MTK_EINT_FUNCTION(0, 131), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO131"), + MTK_FUNCTION(1, "GBE_MDC"), + MTK_FUNCTION(2, "CLKM0"), + MTK_FUNCTION(3, "mbistreaden_trigger"), + MTK_FUNCTION(4, "CONN_BGF_UART0_TXD"), + MTK_FUNCTION(5, "IPU_JTAG_TDI"), + MTK_FUNCTION(6, "ANT_SEL17"), + MTK_FUNCTION(7, "DBG_MON_B12") + ), + + MTK_PIN( + 132, "GPIO132", + MTK_EINT_FUNCTION(0, 132), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO132"), + MTK_FUNCTION(1, "GBE_MDIO"), + MTK_FUNCTION(2, "CLKM1"), + MTK_FUNCTION(3, "mbistwriteen_trigger"), + MTK_FUNCTION(4, "CONN_BGF_UART0_RXD"), + MTK_FUNCTION(5, "IPU_JTAG_TDO"), + MTK_FUNCTION(6, "ANT_SEL18"), + MTK_FUNCTION(7, "DBG_MON_B13") + ), + + MTK_PIN( + 133, "GPIO133", + MTK_EINT_FUNCTION(0, 133), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO133"), + MTK_FUNCTION(1, "GBE_TXER"), + MTK_FUNCTION(2, "GBE_AUX_PPS2"), + MTK_FUNCTION(4, "CONN_BT_TXD"), + MTK_FUNCTION(5, "IPU_JTAG_TRST"), + MTK_FUNCTION(6, "ANT_SEL19"), + MTK_FUNCTION(7, "DBG_MON_B14") + ), + + MTK_PIN( + 134, "GPIO134", + MTK_EINT_FUNCTION(0, 134), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO134"), + MTK_FUNCTION(1, "GBE_RXER"), + MTK_FUNCTION(2, "GBE_AUX_PPS3"), + MTK_FUNCTION(3, "MCUPM_JTAG_TMS"), + MTK_FUNCTION(4, "CONN_WF_MCU_AICE_TMSC"), + MTK_FUNCTION(5, "APU_JTAG_TMS"), + MTK_FUNCTION(6, "ANT_SEL20"), + MTK_FUNCTION(7, "DBG_MON_B15") + ), + + MTK_PIN( + 135, "GPIO135", + MTK_EINT_FUNCTION(0, 135), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO135"), + MTK_FUNCTION(1, "GBE_COL"), + MTK_FUNCTION(2, "I2SOUT1_MCK"), + MTK_FUNCTION(3, "MCUPM_JTAG_TCK"), + MTK_FUNCTION(4, "CONN_WF_MCU_AICE_TCKC"), + MTK_FUNCTION(5, "APU_JTAG_TCK"), + MTK_FUNCTION(6, "ANT_SEL21"), + MTK_FUNCTION(7, "DBG_MON_B16") + ), + + MTK_PIN( + 136, "GPIO136", + MTK_EINT_FUNCTION(0, 136), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO136"), + MTK_FUNCTION(1, "GBE_INTR"), + MTK_FUNCTION(2, "I2SOUT1_BCK"), + MTK_FUNCTION(3, "MCUPM_JTAG_TDI"), + MTK_FUNCTION(4, "CONN_WIFI_TXD"), + MTK_FUNCTION(5, "APU_JTAG_TDI"), + MTK_FUNCTION(6, "PWM_0"), + MTK_FUNCTION(7, "DBG_MON_B17") + ), + + MTK_PIN( + 137, "GPIO137", + MTK_EINT_FUNCTION(0, 137), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO137"), + MTK_FUNCTION(1, "GBE_AUX_PPS0"), + MTK_FUNCTION(2, "I2SOUT1_LRCK"), + MTK_FUNCTION(3, "MCUPM_JTAG_TDO"), + MTK_FUNCTION(4, "DP_TX_HPD"), + MTK_FUNCTION(5, "APU_JTAG_TDO"), + MTK_FUNCTION(6, "PWM_1"), + MTK_FUNCTION(7, "DBG_MON_B18") + ), + + MTK_PIN( + 138, "GPIO138", + MTK_EINT_FUNCTION(0, 138), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO138"), + MTK_FUNCTION(1, "GBE_AUX_PPS1"), + MTK_FUNCTION(2, "I2SOUT1_DO"), + MTK_FUNCTION(3, "MCUPM_JTAG_TRSTN"), + MTK_FUNCTION(4, "EDP_TX_HPD"), + MTK_FUNCTION(5, "APU_JTAG_TRST"), + MTK_FUNCTION(6, "PWM_2"), + MTK_FUNCTION(7, "DBG_MON_B19") + ), + + MTK_PIN( + 139, "GPIO139", + MTK_EINT_FUNCTION(0, 139), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO139"), + MTK_FUNCTION(1, "CONN_TOP_CLK") + ), + + MTK_PIN( + 140, "GPIO140", + MTK_EINT_FUNCTION(0, 140), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO140"), + MTK_FUNCTION(1, "CONN_TOP_DATA") + ), + + MTK_PIN( + 141, "GPIO141", + MTK_EINT_FUNCTION(0, 141), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO141"), + MTK_FUNCTION(1, "CONN_BT_CLK") + ), + + MTK_PIN( + 142, "GPIO142", + MTK_EINT_FUNCTION(0, 142), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO142"), + MTK_FUNCTION(1, "CONN_BT_DATA") + ), + + MTK_PIN( + 143, "GPIO143", + MTK_EINT_FUNCTION(0, 143), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO143"), + MTK_FUNCTION(1, "CONN_HRST_B") + ), + + MTK_PIN( + 144, "GPIO144", + MTK_EINT_FUNCTION(0, 144), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO144"), + MTK_FUNCTION(1, "CONN_WB_PTA") + ), + + MTK_PIN( + 145, "GPIO145", + MTK_EINT_FUNCTION(0, 145), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO145"), + MTK_FUNCTION(1, "CONN_WF_CTRL0") + ), + + MTK_PIN( + 146, "GPIO146", + MTK_EINT_FUNCTION(0, 146), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO146"), + MTK_FUNCTION(1, "CONN_WF_CTRL1") + ), + + MTK_PIN( + 147, "GPIO147", + MTK_EINT_FUNCTION(0, 147), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO147"), + MTK_FUNCTION(1, "CONN_WF_CTRL2") + ), + + MTK_PIN( + 148, "GPIO148", + MTK_EINT_FUNCTION(0, 148), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO148"), + MTK_FUNCTION(1, "CONN_WF_CTRL3") + ), + + MTK_PIN( + 149, "GPIO149", + MTK_EINT_FUNCTION(0, 149), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO149"), + MTK_FUNCTION(1, "CONN_WF_CTRL4") + ), + + MTK_PIN( + 150, "GPIO150", + MTK_EINT_FUNCTION(0, 150), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO150"), + MTK_FUNCTION(1, "SPINOR_CK"), + MTK_FUNCTION(2, "DMIC0_CLK"), + MTK_FUNCTION(3, "DP_TX_HPD"), + MTK_FUNCTION(4, "PWM_0"), + MTK_FUNCTION(5, "CONN_BPI_BUS17_ANT0"), + MTK_FUNCTION(6, "LVTS_FOUT"), + MTK_FUNCTION(7, "DBG_MON_B26") + ), + + MTK_PIN( + 151, "GPIO151", + MTK_EINT_FUNCTION(0, 151), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO151"), + MTK_FUNCTION(1, "SPINOR_CS"), + MTK_FUNCTION(2, "DMIC0_DAT0"), + MTK_FUNCTION(3, "EDP_TX_HPD"), + MTK_FUNCTION(4, "PWM_1"), + MTK_FUNCTION(5, "CONN_BPI_BUS18_ANT1"), + MTK_FUNCTION(6, "LVTS_SDO"), + MTK_FUNCTION(7, "DBG_MON_B27") + ), + + MTK_PIN( + 152, "GPIO152", + MTK_EINT_FUNCTION(0, 152), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO152"), + MTK_FUNCTION(1, "SPINOR_IO0"), + MTK_FUNCTION(2, "DMIC0_DAT1"), + MTK_FUNCTION(3, "UTXD2"), + MTK_FUNCTION(4, "USB_DRVVBUS_1P"), + MTK_FUNCTION(5, "CONN_BPI_BUS19_ANT2"), + MTK_FUNCTION(6, "LVTS_26M"), + MTK_FUNCTION(7, "DBG_MON_B28") + ), + + MTK_PIN( + 153, "GPIO153", + MTK_EINT_FUNCTION(0, 153), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO153"), + MTK_FUNCTION(1, "SPINOR_IO1"), + MTK_FUNCTION(2, "DMIC1_CLK"), + MTK_FUNCTION(3, "UCTS2"), + MTK_FUNCTION(4, "USB_DRVVBUS_2P"), + MTK_FUNCTION(5, "CONN_BPI_BUS20_ANT3"), + MTK_FUNCTION(6, "LVTS_SCF"), + MTK_FUNCTION(7, "DBG_MON_B29") + ), + + MTK_PIN( + 154, "GPIO154", + MTK_EINT_FUNCTION(0, 154), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO154"), + MTK_FUNCTION(1, "SPINOR_IO2"), + MTK_FUNCTION(2, "DMIC1_DAT0"), + MTK_FUNCTION(3, "URTS2"), + MTK_FUNCTION(4, "USB_DRVVBUS_3P"), + MTK_FUNCTION(5, "CONN_BPI_BUS21_ANT4"), + MTK_FUNCTION(6, "LVTS_SCK"), + MTK_FUNCTION(7, "DBG_MON_B30") + ), + + MTK_PIN( + 155, "GPIO155", + MTK_EINT_FUNCTION(0, 155), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO155"), + MTK_FUNCTION(1, "SPINOR_IO3"), + MTK_FUNCTION(2, "DMIC1_DAT1"), + MTK_FUNCTION(3, "URXD2"), + MTK_FUNCTION(4, "USB_DRVVBUS_4P"), + MTK_FUNCTION(5, "DISP_PWM1"), + MTK_FUNCTION(6, "LVTS_SDI"), + MTK_FUNCTION(7, "DBG_MON_B31") + ), + + MTK_PIN( + 156, "GPIO156", + MTK_EINT_FUNCTION(0, 156), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO156"), + MTK_FUNCTION(1, "MSDC0_DAT7") + ), + + MTK_PIN( + 157, "GPIO157", + MTK_EINT_FUNCTION(0, 157), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO157"), + MTK_FUNCTION(1, "MSDC0_DAT6") + ), + + MTK_PIN( + 158, "GPIO158", + MTK_EINT_FUNCTION(0, 158), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO158"), + MTK_FUNCTION(1, "MSDC0_DAT5") + ), + + MTK_PIN( + 159, "GPIO159", + MTK_EINT_FUNCTION(0, 159), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO159"), + MTK_FUNCTION(1, "MSDC0_DAT4") + ), + + MTK_PIN( + 160, "GPIO160", + MTK_EINT_FUNCTION(0, 160), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO160"), + MTK_FUNCTION(1, "MSDC0_RSTB") + ), + + MTK_PIN( + 161, "GPIO161", + MTK_EINT_FUNCTION(0, 161), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO161"), + MTK_FUNCTION(1, "MSDC0_CMD") + ), + + MTK_PIN( + 162, "GPIO162", + MTK_EINT_FUNCTION(0, 162), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO162"), + MTK_FUNCTION(1, "MSDC0_CLK") + ), + + MTK_PIN( + 163, "GPIO163", + MTK_EINT_FUNCTION(0, 163), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO163"), + MTK_FUNCTION(1, "MSDC0_DAT3") + ), + + MTK_PIN( + 164, "GPIO164", + MTK_EINT_FUNCTION(0, 164), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO164"), + MTK_FUNCTION(1, "MSDC0_DAT2") + ), + + MTK_PIN( + 165, "GPIO165", + MTK_EINT_FUNCTION(0, 165), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO165"), + MTK_FUNCTION(1, "MSDC0_DAT1") + ), + + MTK_PIN( + 166, "GPIO166", + MTK_EINT_FUNCTION(0, 166), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO166"), + MTK_FUNCTION(1, "MSDC0_DAT0") + ), + + MTK_PIN( + 167, "GPIO167", + MTK_EINT_FUNCTION(0, 167), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO167"), + MTK_FUNCTION(1, "MSDC0_DSL") + ), + + MTK_PIN( + 168, "GPIO168", + MTK_EINT_FUNCTION(0, 168), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO168"), + MTK_FUNCTION(1, "MSDC1_CMD"), + MTK_FUNCTION(2, "CONN_WF_MCU_AICE_TMSC"), + MTK_FUNCTION(3, "UCTS1"), + MTK_FUNCTION(4, "UDI_TMS"), + MTK_FUNCTION(5, "SSPM_JTAG_TMS_VCORE"), + MTK_FUNCTION(6, "MCUPM_JTAG_TMS"), + MTK_FUNCTION(7, "CONN_BGF_MCU_TMS") + ), + + MTK_PIN( + 169, "GPIO169", + MTK_EINT_FUNCTION(0, 169), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO169"), + MTK_FUNCTION(1, "MSDC1_CLK"), + MTK_FUNCTION(2, "CONN_WF_MCU_AICE_TCKC"), + MTK_FUNCTION(3, "URTS1"), + MTK_FUNCTION(4, "UDI_TCK"), + MTK_FUNCTION(5, "SSPM_JTAG_TCK_VCORE"), + MTK_FUNCTION(6, "MCUPM_JTAG_TCK"), + MTK_FUNCTION(7, "CONN_BGF_MCU_TCK") + ), + + MTK_PIN( + 170, "GPIO170", + MTK_EINT_FUNCTION(0, 170), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO170"), + MTK_FUNCTION(1, "MSDC1_DAT0"), + MTK_FUNCTION(2, "SPIM5_B_CSB"), + MTK_FUNCTION(3, "UCTS2"), + MTK_FUNCTION(4, "UDI_TDI"), + MTK_FUNCTION(5, "SSPM_JTAG_TDI_VCORE"), + MTK_FUNCTION(6, "MCUPM_JTAG_TDI"), + MTK_FUNCTION(7, "CONN_BGF_MCU_TDI") + ), + + MTK_PIN( + 171, "GPIO171", + MTK_EINT_FUNCTION(0, 171), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO171"), + MTK_FUNCTION(1, "MSDC1_DAT1"), + MTK_FUNCTION(2, "SPIM5_B_CLK"), + MTK_FUNCTION(3, "URTS2"), + MTK_FUNCTION(4, "UDI_TDO"), + MTK_FUNCTION(5, "SSPM_JTAG_TDO_VCORE"), + MTK_FUNCTION(6, "MCUPM_JTAG_TDO"), + MTK_FUNCTION(7, "CONN_BGF_MCU_TDO") + ), + + MTK_PIN( + 172, "GPIO172", + MTK_EINT_FUNCTION(0, 172), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO172"), + MTK_FUNCTION(1, "MSDC1_DAT2"), + MTK_FUNCTION(2, "SPIM5_B_MO"), + MTK_FUNCTION(3, "UCTS3"), + MTK_FUNCTION(4, "UDI_NTRST"), + MTK_FUNCTION(5, "SSPM_JTAG_TRSTN_VCORE"), + MTK_FUNCTION(6, "MCUPM_JTAG_TRSTN"), + MTK_FUNCTION(7, "CONN_BGF_MCU_TRST_B") + ), + + MTK_PIN( + 173, "GPIO173", + MTK_EINT_FUNCTION(0, 173), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO173"), + MTK_FUNCTION(1, "MSDC1_DAT3"), + MTK_FUNCTION(2, "SPIM5_B_MI"), + MTK_FUNCTION(3, "URTS3"), + MTK_FUNCTION(4, "CLKM0"), + MTK_FUNCTION(5, "PWM_2") + ), + + MTK_PIN( + 174, "GPIO174", + MTK_EINT_FUNCTION(0, 174), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO174"), + MTK_FUNCTION(1, "MSDC2_CMD"), + MTK_FUNCTION(2, "CONN_BGF_MCU_AICE_TMSC"), + MTK_FUNCTION(3, "UTXD1"), + MTK_FUNCTION(4, "VADSP_JTAG0_TMS"), + MTK_FUNCTION(5, "SSPM_JTAG_TMS_VLP"), + MTK_FUNCTION(6, "SPM_JTAG_TMS"), + MTK_FUNCTION(7, "SCP_JTAG0_TMS_VLP") + ), + + MTK_PIN( + 175, "GPIO175", + MTK_EINT_FUNCTION(0, 175), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO175"), + MTK_FUNCTION(1, "MSDC2_CLK"), + MTK_FUNCTION(2, "CONN_BGF_MCU_AICE_TCKC"), + MTK_FUNCTION(3, "URXD1"), + MTK_FUNCTION(4, "VADSP_JTAG0_TCK"), + MTK_FUNCTION(5, "SSPM_JTAG_TCK_VLP"), + MTK_FUNCTION(6, "SPM_JTAG_TCK"), + MTK_FUNCTION(7, "SCP_JTAG0_TCK_VLP") + ), + + MTK_PIN( + 176, "GPIO176", + MTK_EINT_FUNCTION(0, 176), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO176"), + MTK_FUNCTION(1, "MSDC2_DAT0"), + MTK_FUNCTION(2, "SRCLKENAI0"), + MTK_FUNCTION(3, "UTXD2"), + MTK_FUNCTION(4, "VADSP_JTAG0_TDI"), + MTK_FUNCTION(5, "SSPM_JTAG_TDI_VLP"), + MTK_FUNCTION(6, "SPM_JTAG_TDI"), + MTK_FUNCTION(7, "SCP_JTAG0_TDI_VLP") + ), + + MTK_PIN( + 177, "GPIO177", + MTK_EINT_FUNCTION(0, 177), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO177"), + MTK_FUNCTION(1, "MSDC2_DAT1"), + MTK_FUNCTION(2, "SRCLKENAI1"), + MTK_FUNCTION(3, "URXD2"), + MTK_FUNCTION(4, "VADSP_JTAG0_TDO"), + MTK_FUNCTION(5, "SSPM_JTAG_TDO_VLP"), + MTK_FUNCTION(6, "SPM_JTAG_TDO"), + MTK_FUNCTION(7, "SCP_JTAG0_TDO_VLP") + ), + + MTK_PIN( + 178, "GPIO178", + MTK_EINT_FUNCTION(0, 178), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO178"), + MTK_FUNCTION(1, "MSDC2_DAT2"), + MTK_FUNCTION(3, "UTXD3"), + MTK_FUNCTION(4, "VADSP_JTAG0_TRSTN"), + MTK_FUNCTION(5, "SSPM_JTAG_TRSTN_VLP"), + MTK_FUNCTION(6, "SPM_JTAG_TRSTN"), + MTK_FUNCTION(7, "SCP_JTAG0_TRSTN_VLP") + ), + + MTK_PIN( + 179, "GPIO179", + MTK_EINT_FUNCTION(0, 179), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO179"), + MTK_FUNCTION(1, "MSDC2_DAT3"), + MTK_FUNCTION(3, "URXD3"), + MTK_FUNCTION(4, "CLKM1"), + MTK_FUNCTION(5, "PWM_vlp"), + MTK_FUNCTION(7, "TP_GPIO7_AO") + ), + + MTK_PIN( + 180, "GPIO180", + MTK_EINT_FUNCTION(0, 180), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO180"), + MTK_FUNCTION(1, "SPMI_P_SCL") + ), + + MTK_PIN( + 181, "GPIO181", + MTK_EINT_FUNCTION(0, 181), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO181"), + MTK_FUNCTION(1, "SPMI_P_SDA") + ), + + MTK_PIN( + 182, "GPIO182", + MTK_EINT_FUNCTION(0, NO_EINT_SUPPORT), + DRV_GRP4, + MTK_FUNCTION(0, "GPIO182"), + MTK_FUNCTION(1, "DDR_PAD_RRESETB") + ), + + MTK_PIN( + 183, "GPIO183", + MTK_EINT_FUNCTION(0, 182), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 184, "GPIO184", + MTK_EINT_FUNCTION(0, 183), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 185, "GPIO185", + MTK_EINT_FUNCTION(0, 184), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 186, "GPIO186", + MTK_EINT_FUNCTION(0, 185), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 187, "GPIO187", + MTK_EINT_FUNCTION(0, 186), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 188, "GPIO188", + MTK_EINT_FUNCTION(0, 187), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 189, "GPIO189", + MTK_EINT_FUNCTION(0, 188), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 190, "GPIO190", + MTK_EINT_FUNCTION(0, 189), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 191, "GPIO191", + MTK_EINT_FUNCTION(0, 190), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 192, "GPIO192", + MTK_EINT_FUNCTION(0, 191), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 193, "GPIO193", + MTK_EINT_FUNCTION(0, 192), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 194, "GPIO194", + MTK_EINT_FUNCTION(0, 193), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 195, "GPIO195", + MTK_EINT_FUNCTION(0, 194), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 196, "GPIO196", + MTK_EINT_FUNCTION(0, 195), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 197, "GPIO197", + MTK_EINT_FUNCTION(0, 196), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 198, "GPIO198", + MTK_EINT_FUNCTION(0, 197), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 199, "GPIO199", + MTK_EINT_FUNCTION(0, 198), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 200, "GPIO200", + MTK_EINT_FUNCTION(0, 199), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 201, "GPIO201", + MTK_EINT_FUNCTION(0, 200), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 202, "GPIO202", + MTK_EINT_FUNCTION(0, 201), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 203, "GPIO203", + MTK_EINT_FUNCTION(0, 202), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 204, "GPIO204", + MTK_EINT_FUNCTION(0, 203), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 205, "GPIO205", + MTK_EINT_FUNCTION(0, 204), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 206, "GPIO206", + MTK_EINT_FUNCTION(0, 205), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 207, "GPIO207", + MTK_EINT_FUNCTION(0, 206), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 208, "GPIO208", + MTK_EINT_FUNCTION(0, 207), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 209, "GPIO209", + MTK_EINT_FUNCTION(0, 208), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + + MTK_PIN( + 210, "GPIO210", + MTK_EINT_FUNCTION(0, 209), + DRV_FIXED, + MTK_FUNCTION(0, NULL) + ), + +}; + +static struct mtk_eint_pin eint_pins_mt8189[] = { + MTK_EINT_PIN(0, 0, 0, 1), + MTK_EINT_PIN(1, 0, 1, 1), + MTK_EINT_PIN(2, 0, 2, 1), + MTK_EINT_PIN(3, 0, 3, 1), + MTK_EINT_PIN(4, 0, 4, 1), + MTK_EINT_PIN(5, 0, 5, 1), + MTK_EINT_PIN(6, 0, 6, 1), + MTK_EINT_PIN(7, 0, 7, 1), + MTK_EINT_PIN(8, 0, 8, 1), + MTK_EINT_PIN(9, 0, 9, 1), + MTK_EINT_PIN(10, 0, 10, 1), + MTK_EINT_PIN(11, 0, 11, 1), + MTK_EINT_PIN(12, 1, 0, 1), + MTK_EINT_PIN(13, 1, 1, 1), + MTK_EINT_PIN(14, 1, 2, 1), + MTK_EINT_PIN(15, 1, 3, 1), + MTK_EINT_PIN(16, 1, 4, 1), + MTK_EINT_PIN(17, 1, 5, 1), + MTK_EINT_PIN(18, 0, 12, 1), + MTK_EINT_PIN(19, 0, 13, 1), + MTK_EINT_PIN(20, 0, 14, 1), + MTK_EINT_PIN(21, 0, 15, 1), + MTK_EINT_PIN(22, 0, 16, 1), + MTK_EINT_PIN(23, 0, 17, 1), + MTK_EINT_PIN(24, 0, 18, 1), + MTK_EINT_PIN(25, 2, 0, 1), + MTK_EINT_PIN(26, 2, 1, 1), + MTK_EINT_PIN(27, 1, 6, 1), + MTK_EINT_PIN(28, 1, 7, 1), + MTK_EINT_PIN(29, 2, 2, 1), + MTK_EINT_PIN(30, 1, 8, 1), + MTK_EINT_PIN(31, 1, 9, 1), + MTK_EINT_PIN(32, 1, 10, 1), + MTK_EINT_PIN(33, 1, 11, 1), + MTK_EINT_PIN(34, 1, 12, 1), + MTK_EINT_PIN(35, 1, 13, 1), + MTK_EINT_PIN(36, 1, 14, 1), + MTK_EINT_PIN(37, 1, 15, 1), + MTK_EINT_PIN(38, 1, 16, 1), + MTK_EINT_PIN(39, 1, 17, 1), + MTK_EINT_PIN(40, 1, 18, 1), + MTK_EINT_PIN(41, 1, 19, 1), + MTK_EINT_PIN(42, 1, 20, 1), + MTK_EINT_PIN(43, 1, 21, 1), + MTK_EINT_PIN(44, 0, 19, 1), + MTK_EINT_PIN(45, 0, 20, 1), + MTK_EINT_PIN(46, 0, 21, 1), + MTK_EINT_PIN(47, 0, 22, 1), + MTK_EINT_PIN(48, 2, 3, 1), + MTK_EINT_PIN(49, 2, 4, 1), + MTK_EINT_PIN(50, 2, 5, 1), + MTK_EINT_PIN(51, 0, 23, 1), + MTK_EINT_PIN(52, 0, 24, 1), + MTK_EINT_PIN(53, 0, 25, 1), + MTK_EINT_PIN(54, 0, 26, 1), + MTK_EINT_PIN(55, 2, 6, 1), + MTK_EINT_PIN(56, 2, 7, 1), + MTK_EINT_PIN(57, 1, 22, 1), + MTK_EINT_PIN(58, 1, 23, 1), + MTK_EINT_PIN(59, 1, 24, 1), + MTK_EINT_PIN(60, 1, 25, 1), + MTK_EINT_PIN(61, 1, 26, 1), + MTK_EINT_PIN(62, 1, 27, 1), + MTK_EINT_PIN(63, 1, 28, 1), + MTK_EINT_PIN(64, 1, 29, 1), + MTK_EINT_PIN(65, 0, 27, 1), + MTK_EINT_PIN(66, 0, 28, 1), + MTK_EINT_PIN(67, 0, 29, 1), + MTK_EINT_PIN(68, 0, 30, 1), + MTK_EINT_PIN(69, 1, 30, 1), + MTK_EINT_PIN(70, 1, 31, 1), + MTK_EINT_PIN(71, 1, 32, 1), + MTK_EINT_PIN(72, 1, 33, 1), + MTK_EINT_PIN(73, 1, 34, 1), + MTK_EINT_PIN(74, 1, 35, 1), + MTK_EINT_PIN(75, 1, 36, 1), + MTK_EINT_PIN(76, 1, 37, 1), + MTK_EINT_PIN(77, 0, 31, 1), + MTK_EINT_PIN(78, 0, 32, 1), + MTK_EINT_PIN(79, 0, 33, 1), + MTK_EINT_PIN(80, 0, 34, 1), + MTK_EINT_PIN(81, 1, 38, 1), + MTK_EINT_PIN(82, 1, 39, 1), + MTK_EINT_PIN(83, 1, 40, 1), + MTK_EINT_PIN(84, 0, 35, 1), + MTK_EINT_PIN(85, 0, 36, 1), + MTK_EINT_PIN(86, 0, 37, 1), + MTK_EINT_PIN(87, 0, 38, 1), + MTK_EINT_PIN(88, 2, 8, 1), + MTK_EINT_PIN(89, 2, 9, 1), + MTK_EINT_PIN(90, 2, 10, 1), + MTK_EINT_PIN(91, 2, 11, 1), + MTK_EINT_PIN(92, 2, 12, 1), + MTK_EINT_PIN(93, 2, 13, 1), + MTK_EINT_PIN(94, 2, 14, 1), + MTK_EINT_PIN(95, 2, 15, 1), + MTK_EINT_PIN(96, 2, 16, 1), + MTK_EINT_PIN(97, 2, 17, 1), + MTK_EINT_PIN(98, 2, 18, 1), + MTK_EINT_PIN(99, 2, 19, 1), + MTK_EINT_PIN(100, 2, 20, 1), + MTK_EINT_PIN(101, 2, 21, 1), + MTK_EINT_PIN(102, 2, 22, 1), + MTK_EINT_PIN(103, 0, 39, 1), + MTK_EINT_PIN(104, 0, 40, 1), + MTK_EINT_PIN(105, 0, 41, 1), + MTK_EINT_PIN(106, 0, 42, 1), + MTK_EINT_PIN(107, 0, 43, 1), + MTK_EINT_PIN(108, 0, 44, 1), + MTK_EINT_PIN(109, 0, 45, 1), + MTK_EINT_PIN(110, 0, 46, 1), + MTK_EINT_PIN(111, 0, 47, 1), + MTK_EINT_PIN(112, 0, 48, 0), + MTK_EINT_PIN(113, 0, 49, 1), + MTK_EINT_PIN(114, 0, 50, 0), + MTK_EINT_PIN(115, 1, 41, 1), + MTK_EINT_PIN(116, 1, 42, 1), + MTK_EINT_PIN(117, 1, 43, 1), + MTK_EINT_PIN(118, 1, 44, 1), + MTK_EINT_PIN(119, 1, 45, 1), + MTK_EINT_PIN(120, 1, 46, 1), + MTK_EINT_PIN(121, 1, 47, 1), + MTK_EINT_PIN(122, 1, 48, 1), + MTK_EINT_PIN(123, 1, 49, 1), + MTK_EINT_PIN(124, 1, 50, 1), + MTK_EINT_PIN(125, 1, 51, 1), + MTK_EINT_PIN(126, 1, 52, 1), + MTK_EINT_PIN(127, 1, 53, 1), + MTK_EINT_PIN(128, 1, 54, 1), + MTK_EINT_PIN(129, 1, 55, 1), + MTK_EINT_PIN(130, 1, 56, 1), + MTK_EINT_PIN(131, 1, 57, 1), + MTK_EINT_PIN(132, 1, 58, 1), + MTK_EINT_PIN(133, 1, 59, 1), + MTK_EINT_PIN(134, 1, 60, 1), + MTK_EINT_PIN(135, 1, 61, 1), + MTK_EINT_PIN(136, 1, 62, 1), + MTK_EINT_PIN(137, 1, 63, 1), + MTK_EINT_PIN(138, 1, 64, 1), + MTK_EINT_PIN(139, 1, 65, 1), + MTK_EINT_PIN(140, 1, 66, 1), + MTK_EINT_PIN(141, 1, 67, 1), + MTK_EINT_PIN(142, 1, 68, 1), + MTK_EINT_PIN(143, 1, 69, 1), + MTK_EINT_PIN(144, 1, 70, 1), + MTK_EINT_PIN(145, 1, 71, 1), + MTK_EINT_PIN(146, 1, 72, 1), + MTK_EINT_PIN(147, 1, 73, 1), + MTK_EINT_PIN(148, 1, 74, 1), + MTK_EINT_PIN(149, 1, 75, 1), + MTK_EINT_PIN(150, 1, 76, 1), + MTK_EINT_PIN(151, 1, 77, 1), + MTK_EINT_PIN(152, 1, 78, 1), + MTK_EINT_PIN(153, 1, 79, 1), + MTK_EINT_PIN(154, 1, 80, 1), + MTK_EINT_PIN(155, 1, 81, 1), + MTK_EINT_PIN(156, 2, 23, 1), + MTK_EINT_PIN(157, 2, 24, 1), + MTK_EINT_PIN(158, 2, 25, 1), + MTK_EINT_PIN(159, 4, 0, 1), + MTK_EINT_PIN(160, 2, 26, 1), + MTK_EINT_PIN(161, 2, 27, 1), + MTK_EINT_PIN(162, 2, 28, 1), + MTK_EINT_PIN(163, 4, 1, 1), + MTK_EINT_PIN(164, 2, 29, 1), + MTK_EINT_PIN(165, 2, 30, 1), + MTK_EINT_PIN(166, 4, 2, 1), + MTK_EINT_PIN(167, 2, 31, 0), + MTK_EINT_PIN(168, 1, 82, 1), + MTK_EINT_PIN(169, 1, 83, 1), + MTK_EINT_PIN(170, 1, 84, 1), + MTK_EINT_PIN(171, 1, 85, 0), + MTK_EINT_PIN(172, 1, 86, 1), + MTK_EINT_PIN(173, 1, 87, 0), + MTK_EINT_PIN(174, 4, 3, 1), + MTK_EINT_PIN(175, 4, 4, 1), + MTK_EINT_PIN(176, 4, 5, 1), + MTK_EINT_PIN(177, 4, 6, 1), + MTK_EINT_PIN(178, 4, 7, 1), + MTK_EINT_PIN(179, 4, 8, 1), + MTK_EINT_PIN(180, 2, 32, 1), + MTK_EINT_PIN(181, 2, 33, 0), + MTK_EINT_PIN(182, 3, 0, 1), + MTK_EINT_PIN(183, 3, 1, 1), + MTK_EINT_PIN(184, 3, 2, 1), + MTK_EINT_PIN(185, 3, 3, 1), + MTK_EINT_PIN(186, 3, 4, 1), + MTK_EINT_PIN(187, 3, 5, 1), + MTK_EINT_PIN(188, 3, 6, 1), + MTK_EINT_PIN(189, 3, 7, 1), + MTK_EINT_PIN(190, 3, 8, 1), + MTK_EINT_PIN(191, 3, 9, 1), + MTK_EINT_PIN(192, 3, 10, 1), + MTK_EINT_PIN(193, 3, 11, 1), + MTK_EINT_PIN(194, 3, 12, 1), + MTK_EINT_PIN(195, 3, 13, 1), + MTK_EINT_PIN(196, 3, 14, 1), + MTK_EINT_PIN(197, 3, 15, 1), + MTK_EINT_PIN(198, 3, 16, 1), + MTK_EINT_PIN(199, 3, 17, 1), + MTK_EINT_PIN(200, 3, 18, 1), + MTK_EINT_PIN(201, 3, 19, 1), + MTK_EINT_PIN(202, 3, 20, 1), + MTK_EINT_PIN(203, 3, 21, 1), + MTK_EINT_PIN(204, 3, 22, 1), + MTK_EINT_PIN(205, 3, 23, 1), + MTK_EINT_PIN(206, 3, 24, 1), + MTK_EINT_PIN(207, 3, 25, 1), + MTK_EINT_PIN(208, 3, 26, 1), + MTK_EINT_PIN(209, 3, 27, 1) +}; + +#endif /* __PINCTRL_MTK_MT8189_H */ From b330d77c5da2cfece98a89cbb51b8ef948691e6f Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Sun, 13 Jul 2025 10:05:31 +0200 Subject: [PATCH 0947/2411] dt-bindings: dma: qcom,gpi: document the Milos GPI DMA Engine Document the GPI DMA Engine on the Milos SoC. Signed-off-by: Luca Weiss Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250713-sm7635-fp6-initial-v2-9-e8f9a789505b@fairphone.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/qcom,gpi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml index 19764452d2cf..bbe4da2a1105 100644 --- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml @@ -24,6 +24,7 @@ properties: - qcom,sm6350-gpi-dma - items: - enum: + - qcom,milos-gpi-dma - qcom,qcm2290-gpi-dma - qcom,qcs8300-gpi-dma - qcom,qdu1000-gpi-dma From 60095aca6b471b7b7a79c80b7395f7e4e414b479 Mon Sep 17 00:00:00 2001 From: Thomas Fourier Date: Tue, 1 Jul 2025 14:37:52 +0200 Subject: [PATCH 0948/2411] dmaengine: mv_xor: Fix missing check after DMA map and missing unmap The DMA map functions can fail and should be tested for errors. In case of error, unmap the already mapped regions. Fixes: 22843545b200 ("dma: mv_xor: Add support for DMA_INTERRUPT") Signed-off-by: Thomas Fourier Link: https://lore.kernel.org/r/20250701123753.46935-2-fourier.thomas@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/mv_xor.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index fa6e4646fdc2..1fdcb0f5c9e7 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -1061,8 +1061,16 @@ mv_xor_channel_add(struct mv_xor_device *xordev, */ mv_chan->dummy_src_addr = dma_map_single(dma_dev->dev, mv_chan->dummy_src, MV_XOR_MIN_BYTE_COUNT, DMA_FROM_DEVICE); + if (dma_mapping_error(dma_dev->dev, mv_chan->dummy_src_addr)) + return ERR_PTR(-ENOMEM); + mv_chan->dummy_dst_addr = dma_map_single(dma_dev->dev, mv_chan->dummy_dst, MV_XOR_MIN_BYTE_COUNT, DMA_TO_DEVICE); + if (dma_mapping_error(dma_dev->dev, mv_chan->dummy_dst_addr)) { + ret = -ENOMEM; + goto err_unmap_src; + } + /* allocate coherent memory for hardware descriptors * note: writecombine gives slightly better performance, but @@ -1071,8 +1079,10 @@ mv_xor_channel_add(struct mv_xor_device *xordev, mv_chan->dma_desc_pool_virt = dma_alloc_wc(&pdev->dev, MV_XOR_POOL_SIZE, &mv_chan->dma_desc_pool, GFP_KERNEL); - if (!mv_chan->dma_desc_pool_virt) - return ERR_PTR(-ENOMEM); + if (!mv_chan->dma_desc_pool_virt) { + ret = -ENOMEM; + goto err_unmap_dst; + } /* discover transaction capabilities from the platform data */ dma_dev->cap_mask = cap_mask; @@ -1155,6 +1165,13 @@ mv_xor_channel_add(struct mv_xor_device *xordev, err_free_dma: dma_free_coherent(&pdev->dev, MV_XOR_POOL_SIZE, mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool); +err_unmap_dst: + dma_unmap_single(dma_dev->dev, mv_chan->dummy_dst_addr, + MV_XOR_MIN_BYTE_COUNT, DMA_TO_DEVICE); +err_unmap_src: + dma_unmap_single(dma_dev->dev, mv_chan->dummy_src_addr, + MV_XOR_MIN_BYTE_COUNT, DMA_FROM_DEVICE); + return ERR_PTR(ret); } From c6ee78fc8f3e653bec427cfd06fec7877ee782bd Mon Sep 17 00:00:00 2001 From: Thomas Fourier Date: Mon, 7 Jul 2025 09:57:16 +0200 Subject: [PATCH 0949/2411] dmaengine: nbpfaxi: Add missing check after DMA map The DMA map functions can fail and should be tested for errors. If the mapping fails, unmap and return an error. Fixes: b45b262cefd5 ("dmaengine: add a driver for AMBA AXI NBPF DMAC IP cores") Signed-off-by: Thomas Fourier Link: https://lore.kernel.org/r/20250707075752.28674-2-fourier.thomas@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/nbpfaxi.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c index 0d6324c4e2be..0b75bb122898 100644 --- a/drivers/dma/nbpfaxi.c +++ b/drivers/dma/nbpfaxi.c @@ -711,6 +711,9 @@ static int nbpf_desc_page_alloc(struct nbpf_channel *chan) list_add_tail(&ldesc->node, &lhead); ldesc->hwdesc_dma_addr = dma_map_single(dchan->device->dev, hwdesc, sizeof(*hwdesc), DMA_TO_DEVICE); + if (dma_mapping_error(dchan->device->dev, + ldesc->hwdesc_dma_addr)) + goto unmap_error; dev_dbg(dev, "%s(): mapped 0x%p to %pad\n", __func__, hwdesc, &ldesc->hwdesc_dma_addr); @@ -737,6 +740,16 @@ static int nbpf_desc_page_alloc(struct nbpf_channel *chan) spin_unlock_irq(&chan->lock); return ARRAY_SIZE(dpage->desc); + +unmap_error: + while (i--) { + ldesc--; hwdesc--; + + dma_unmap_single(dchan->device->dev, ldesc->hwdesc_dma_addr, + sizeof(hwdesc), DMA_TO_DEVICE); + } + + return -ENOMEM; } static void nbpf_desc_put(struct nbpf_desc *desc) From ec896de28c9ad1a4155c518588d9153c454abd39 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 2 Jul 2025 17:26:15 -0500 Subject: [PATCH 0950/2411] dt-bindings: dma: Convert brcm,iproc-sba to DT schema Convert the Broadcom SBA RAID engine binding to schema. It is a straight forward conversion. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250702222616.2760974-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../bindings/dma/brcm,iproc-sba.txt | 29 ------------- .../bindings/dma/brcm,iproc-sba.yaml | 41 +++++++++++++++++++ 2 files changed, 41 insertions(+), 29 deletions(-) delete mode 100644 Documentation/devicetree/bindings/dma/brcm,iproc-sba.txt create mode 100644 Documentation/devicetree/bindings/dma/brcm,iproc-sba.yaml diff --git a/Documentation/devicetree/bindings/dma/brcm,iproc-sba.txt b/Documentation/devicetree/bindings/dma/brcm,iproc-sba.txt deleted file mode 100644 index 092913a28457..000000000000 --- a/Documentation/devicetree/bindings/dma/brcm,iproc-sba.txt +++ /dev/null @@ -1,29 +0,0 @@ -* Broadcom SBA RAID engine - -Required properties: -- compatible: Should be one of the following - "brcm,iproc-sba" - "brcm,iproc-sba-v2" - The "brcm,iproc-sba" has support for only 6 PQ coefficients - The "brcm,iproc-sba-v2" has support for only 30 PQ coefficients -- mboxes: List of phandle and mailbox channel specifiers - -Example: - -raid_mbox: mbox@67400000 { - ... - #mbox-cells = <3>; - ... -}; - -raid0 { - compatible = "brcm,iproc-sba-v2"; - mboxes = <&raid_mbox 0 0x1 0xffff>, - <&raid_mbox 1 0x1 0xffff>, - <&raid_mbox 2 0x1 0xffff>, - <&raid_mbox 3 0x1 0xffff>, - <&raid_mbox 4 0x1 0xffff>, - <&raid_mbox 5 0x1 0xffff>, - <&raid_mbox 6 0x1 0xffff>, - <&raid_mbox 7 0x1 0xffff>; -}; diff --git a/Documentation/devicetree/bindings/dma/brcm,iproc-sba.yaml b/Documentation/devicetree/bindings/dma/brcm,iproc-sba.yaml new file mode 100644 index 000000000000..f3fed576cacf --- /dev/null +++ b/Documentation/devicetree/bindings/dma/brcm,iproc-sba.yaml @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dma/brcm,iproc-sba.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Broadcom SBA RAID engine + +maintainers: + - Ray Jui + - Scott Branden + +properties: + compatible: + enum: + - brcm,iproc-sba + - brcm,iproc-sba-v2 + + mboxes: + minItems: 1 + maxItems: 8 + +required: + - compatible + - mboxes + +additionalProperties: false + +examples: + - | + raid0 { + compatible = "brcm,iproc-sba-v2"; + mboxes = <&raid_mbox 0 0x1 0xffff>, + <&raid_mbox 1 0x1 0xffff>, + <&raid_mbox 2 0x1 0xffff>, + <&raid_mbox 3 0x1 0xffff>, + <&raid_mbox 4 0x1 0xffff>, + <&raid_mbox 5 0x1 0xffff>, + <&raid_mbox 6 0x1 0xffff>, + <&raid_mbox 7 0x1 0xffff>; + }; From 245dd180ac861fea31abe69c722061a3c2c65a66 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 3 Jul 2025 10:59:10 -0500 Subject: [PATCH 0951/2411] dt-bindings: dma: Convert marvell,orion-xor to DT schema Convert the Marvell Orion XOR engine binding to schema. The "clocks" property is optional for some platforms (though not distinguished by compatble). The child node names used are 'channel' or 'xor'. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250703155912.1713518-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../bindings/dma/marvell,orion-xor.yaml | 84 +++++++++++++++++++ .../devicetree/bindings/dma/mv-xor.txt | 40 --------- 2 files changed, 84 insertions(+), 40 deletions(-) create mode 100644 Documentation/devicetree/bindings/dma/marvell,orion-xor.yaml delete mode 100644 Documentation/devicetree/bindings/dma/mv-xor.txt diff --git a/Documentation/devicetree/bindings/dma/marvell,orion-xor.yaml b/Documentation/devicetree/bindings/dma/marvell,orion-xor.yaml new file mode 100644 index 000000000000..add08257ec59 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/marvell,orion-xor.yaml @@ -0,0 +1,84 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dma/marvell,orion-xor.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Marvell XOR engine + +maintainers: + - Andrew Lunn + - Gregory Clement + +properties: + compatible: + oneOf: + - items: + - const: marvell,armada-380-xor + - const: marvell,orion-xor + - enum: + - marvell,armada-3700-xor + - marvell,orion-xor + + reg: + items: + - description: Low registers for the XOR engine + - description: High registers for the XOR engine + + clocks: + maxItems: 1 + +patternProperties: + "^(channel|xor)[0-9]+$": + description: XOR channel sub-node + type: object + additionalProperties: false + + properties: + interrupts: + description: Interrupt specifier for the XOR channel + items: + - description: Interrupt for this channel + + dmacap,memcpy: + type: boolean + deprecated: true + description: + Indicates that the XOR channel is capable of memcpy operations + + dmacap,memset: + type: boolean + deprecated: true + description: + Indicates that the XOR channel is capable of memset operations + + dmacap,xor: + type: boolean + deprecated: true + description: + Indicates that the XOR channel is capable of xor operations + + required: + - interrupts + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + xor@d0060900 { + compatible = "marvell,orion-xor"; + reg = <0xd0060900 0x100>, + <0xd0060b00 0x100>; + clocks = <&coreclk 0>; + + xor00 { + interrupts = <51>; + }; + xor01 { + interrupts = <52>; + }; + }; diff --git a/Documentation/devicetree/bindings/dma/mv-xor.txt b/Documentation/devicetree/bindings/dma/mv-xor.txt deleted file mode 100644 index 0ffb4d8766a8..000000000000 --- a/Documentation/devicetree/bindings/dma/mv-xor.txt +++ /dev/null @@ -1,40 +0,0 @@ -* Marvell XOR engines - -Required properties: -- compatible: Should be one of the following: - - "marvell,orion-xor" - - "marvell,armada-380-xor" - - "marvell,armada-3700-xor". -- reg: Should contain registers location and length (two sets) - the first set is the low registers, the second set the high - registers for the XOR engine. -- clocks: pointer to the reference clock - -The DT node must also contains sub-nodes for each XOR channel that the -XOR engine has. Those sub-nodes have the following required -properties: -- interrupts: interrupt of the XOR channel - -The sub-nodes used to contain one or several of the following -properties, but they are now deprecated: -- dmacap,memcpy to indicate that the XOR channel is capable of memcpy operations -- dmacap,memset to indicate that the XOR channel is capable of memset operations -- dmacap,xor to indicate that the XOR channel is capable of xor operations -- dmacap,interrupt to indicate that the XOR channel is capable of - generating interrupts - -Example: - -xor@d0060900 { - compatible = "marvell,orion-xor"; - reg = <0xd0060900 0x100 - 0xd0060b00 0x100>; - clocks = <&coreclk 0>; - - xor00 { - interrupts = <51>; - }; - xor01 { - interrupts = <52>; - }; -}; From e56982021f5303b2523ac247e3c79b063459d012 Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Wed, 2 Jul 2025 20:36:06 +0200 Subject: [PATCH 0952/2411] dmaengine: xdmac: make it selectable for ARCH_MICROCHIP LAN969x uses the Atmel XDMAC, so make it selectable for ARCH_MICROCHIP to avoid needing to update depends in future if other Microchip SoC-s use it as well. Signed-off-by: Robert Marko Link: https://lore.kernel.org/r/20250702183856.1727275-9-robert.marko@sartura.hr Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 3bc79f320540..05c7c7d9e5a4 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -110,7 +110,7 @@ config AT_HDMAC config AT_XDMAC tristate "Atmel XDMA support" - depends on ARCH_AT91 + depends on ARCH_MICROCHIP select DMA_ENGINE help Support the Atmel XDMA controller. From e3a9ccd21897a59d02cf2b7a95297086249306d6 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 23 May 2025 17:32:52 -0400 Subject: [PATCH 0953/2411] dt-bindings: dma: fsl-mxs-dma: allow interrupt-names for fsl,imx23-dma-apbx Allow interrupt-names for fsl,imx23-dma-apbx and keep the same restriction for others. Signed-off-by: Frank Li Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250523213252.582366-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/dma/fsl,mxs-dma.yaml | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/Documentation/devicetree/bindings/dma/fsl,mxs-dma.yaml b/Documentation/devicetree/bindings/dma/fsl,mxs-dma.yaml index 75a7d9556699..9102b615dbd6 100644 --- a/Documentation/devicetree/bindings/dma/fsl,mxs-dma.yaml +++ b/Documentation/devicetree/bindings/dma/fsl,mxs-dma.yaml @@ -23,6 +23,35 @@ allOf: properties: power-domains: false + - if: + properties: + compatible: + contains: + const: fsl,imx23-dma-apbx + then: + properties: + interrupt-names: + items: + - const: audio-adc + - const: audio-dac + - const: spdif-tx + - const: i2c + - const: saif0 + - const: empty0 + - const: auart0-rx + - const: auart0-tx + - const: auart1-rx + - const: auart1-tx + - const: saif1 + - const: empty1 + - const: empty2 + - const: empty3 + - const: empty4 + - const: empty5 + else: + properties: + interrupt-names: false + properties: compatible: oneOf: @@ -54,6 +83,10 @@ properties: minItems: 4 maxItems: 16 + interrupt-names: + minItems: 4 + maxItems: 16 + "#dma-cells": const: 1 From 9ba817fb7c6afd3c86a6d4c3b822924b87ef0348 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 22 Jul 2025 17:08:06 -0400 Subject: [PATCH 0954/2411] tracing: Deprecate auto-mounting tracefs in debugfs In January 2015, tracefs was created to allow access to the tracing infrastructure without needing to compile in debugfs. When tracefs is configured, the directory /sys/kernel/tracing will exist and tooling is expected to use that path to access the tracing infrastructure. To allow backward compatibility, when debugfs is mounted, it would automount tracefs in its "tracing" directory so that tooling that had hard coded /sys/kernel/debug/tracing would still work. It has been over 10 years since the new interface was introduced, and all tooling should now be using it. Start the process of deprecating the old path so that it doesn't need to be maintained anymore. A new config is added to allow distributions to disable automounting of tracefs on debugfs. If /sys/kernel/debug/tracing is accessed, a pr_warn() will trigger stating: "NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030" Expect to remove this feature in 5 years (2030). Cc: Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Sebastian Andrzej Siewior Cc: Namhyung Kim Cc: Linus Torvalds Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: Al Viro Cc: Christian Brauner Cc: Jan Kara Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Ian Rogers Link: https://lore.kernel.org/20250722170806.40c068c6@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- .../ABI/obsolete/automount-tracefs-debugfs | 20 +++++++++++++++++++ kernel/trace/Kconfig | 13 ++++++++++++ kernel/trace/trace.c | 14 +++++++++---- 3 files changed, 43 insertions(+), 4 deletions(-) create mode 100644 Documentation/ABI/obsolete/automount-tracefs-debugfs diff --git a/Documentation/ABI/obsolete/automount-tracefs-debugfs b/Documentation/ABI/obsolete/automount-tracefs-debugfs new file mode 100644 index 000000000000..a5196ec78cb5 --- /dev/null +++ b/Documentation/ABI/obsolete/automount-tracefs-debugfs @@ -0,0 +1,20 @@ +What: /sys/kernel/debug/tracing +Date: May 2008 +KernelVersion: 2.6.27 +Contact: linux-trace-kernel@vger.kernel.org +Description: + + The ftrace was first added to the kernel, its interface was placed + into the debugfs file system under the "tracing" directory. Access + to the files were in /sys/kernel/debug/tracing. As systems wanted + access to the tracing interface without having to enable debugfs, a + new interface was created called "tracefs". This was a stand alone + file system and was usually mounted in /sys/kernel/tracing. + + To allow older tooling to continue to operate, when mounting + debugfs, the tracefs file system would automatically get mounted in + the "tracing" directory of debugfs. The tracefs interface was added + in January 2015 in the v4.1 kernel. + + All tooling should now be using tracefs directly and the "tracing" + directory in debugfs should be removed by January 2030. diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index a3f35c7d83b6..93e8e7fc11c0 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -199,6 +199,19 @@ menuconfig FTRACE if FTRACE +config TRACEFS_AUTOMOUNT_DEPRECATED + bool "Automount tracefs on debugfs [DEPRECATED]" + depends on TRACING + default y + help + The tracing interface was moved from /sys/kernel/debug/tracing + to /sys/kernel/tracing in 2015, but the tracing file system + was still automounted in /sys/kernel/debug for backward + compatibility with tooling. + + The new interface has been around for more than 10 years and + the old debug mount will soon be removed. + config BOOTTIME_TRACING bool "Boot-time Tracing support" depends on TRACING diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0dff4298fc0e..06ab5b7a8711 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6296,7 +6296,7 @@ static bool tracer_options_updated; static void add_tracer_options(struct trace_array *tr, struct tracer *t) { /* Only enable if the directory has been created already. */ - if (!tr->dir) + if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL)) return; /* Only create trace option files after update_tracer_options finish */ @@ -8977,13 +8977,13 @@ static inline __init int register_snapshot_cmd(void) { return 0; } static struct dentry *tracing_get_dentry(struct trace_array *tr) { - if (WARN_ON(!tr->dir)) - return ERR_PTR(-ENODEV); - /* Top directory uses NULL as the parent */ if (tr->flags & TRACE_ARRAY_FL_GLOBAL) return NULL; + if (WARN_ON(!tr->dir)) + return ERR_PTR(-ENODEV); + /* All sub buffers have a descriptor */ return tr->dir; } @@ -10249,6 +10249,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) ftrace_init_tracefs(tr, d_tracer); } +#ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) { struct vfsmount *mnt; @@ -10270,6 +10271,8 @@ static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) if (IS_ERR(fc)) return ERR_CAST(fc); + pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n"); + ret = vfs_parse_fs_string(fc, "source", "tracefs", strlen("tracefs")); if (!ret) @@ -10280,6 +10283,7 @@ static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) put_fs_context(fc); return mnt; } +#endif /** * tracing_init_dentry - initialize top level trace array @@ -10304,6 +10308,7 @@ int tracing_init_dentry(void) if (WARN_ON(!tracefs_initialized())) return -ENODEV; +#ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED /* * As there may still be users that expect the tracing * files to exist in debugfs/tracing, we must automount @@ -10312,6 +10317,7 @@ int tracing_init_dentry(void) */ tr->dir = debugfs_create_automount("tracing", NULL, trace_automount, NULL); +#endif return 0; } From c79a7ca8fb72a17db03e916438c44d9afc98998f Mon Sep 17 00:00:00 2001 From: Salah Triki Date: Sat, 19 Jul 2025 05:34:40 +0100 Subject: [PATCH 0955/2411] PCI: mvebu: Use devm_add_action_or_reset() instead of devm_add_action() Replace devm_add_action() with devm_add_action_or_reset() to avoid explicitly dropping the 'port->clk' reference in error path. Signed-off-by: Salah Triki [mani: reworded commit subject and description] Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/aHsgYALHfQbrgq0t@pc --- drivers/pci/controller/pci-mvebu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c index a4a2bac4f4b2..755651f33811 100644 --- a/drivers/pci/controller/pci-mvebu.c +++ b/drivers/pci/controller/pci-mvebu.c @@ -1353,11 +1353,9 @@ static int mvebu_pcie_parse_port(struct mvebu_pcie *pcie, goto skip; } - ret = devm_add_action(dev, mvebu_pcie_port_clk_put, port); - if (ret < 0) { - clk_put(port->clk); + ret = devm_add_action_or_reset(dev, mvebu_pcie_port_clk_put, port); + if (ret < 0) goto err; - } return 1; From b265cb1d68a9ab75cd0048cd604283a152fcf633 Mon Sep 17 00:00:00 2001 From: Antoni Pokusinski Date: Sun, 13 Apr 2025 15:07:53 +0200 Subject: [PATCH 0956/2411] dt-bindings: rtc: pcf85063: add binding for RV8063 Microcrystal RV8063 is a real-time clock module with SPI interface. Reviewed-by: Rob Herring (Arm) Signed-off-by: Antoni Pokusinski Link: https://lore.kernel.org/r/20250413130755.159373-2-apokusinski01@gmail.com Signed-off-by: Alexandre Belloni --- .../devicetree/bindings/rtc/nxp,pcf85063.yaml | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml b/Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml index 2f892f8640d1..1e6277e524c2 100644 --- a/Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml +++ b/Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml @@ -12,6 +12,7 @@ maintainers: properties: compatible: enum: + - microcrystal,rv8063 - microcrystal,rv8263 - nxp,pcf85063 - nxp,pcf85063a @@ -44,13 +45,19 @@ properties: wakeup-source: true + spi-cs-high: true + + spi-3wire: true + allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# - $ref: rtc.yaml# - if: properties: compatible: contains: enum: + - microcrystal,rv8063 - microcrystal,rv8263 then: properties: @@ -65,12 +72,23 @@ allOf: properties: quartz-load-femtofarads: const: 7000 + - if: + properties: + compatible: + not: + contains: + enum: + - microcrystal,rv8063 + then: + properties: + spi-cs-high: false + spi-3wire: false required: - compatible - reg -additionalProperties: false +unevaluatedProperties: false examples: - | @@ -90,3 +108,16 @@ examples: }; }; }; + + - | + spi { + #address-cells = <1>; + #size-cells = <0>; + + rtc@0 { + compatible = "microcrystal,rv8063"; + reg = <0>; + spi-cs-high; + spi-3wire; + }; + }; From 29ac4cedb00e2df366418f768c70d0e2d60fd007 Mon Sep 17 00:00:00 2001 From: Antoni Pokusinski Date: Sun, 13 Apr 2025 15:07:54 +0200 Subject: [PATCH 0957/2411] rtc: pcf85063: create pcf85063_i2c_probe Move the i2c-specific code from pcf85063_probe to the newly created function. This is a preparation for introducing the support for RV8063 real-time clock with SPI interface. Signed-off-by: Antoni Pokusinski Link: https://lore.kernel.org/r/20250413130755.159373-3-apokusinski01@gmail.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf85063.c | 97 +++++++++++++++++++++++++++----------- 1 file changed, 70 insertions(+), 27 deletions(-) diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index 4fa5c4ecdd5a..03dfc58f4cd7 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -559,12 +559,12 @@ static const struct pcf85063_config config_rv8263 = { .force_cap_7000 = 1, }; -static int pcf85063_probe(struct i2c_client *client) +static int pcf85063_probe(struct device *dev, struct regmap *regmap, int irq, + const struct pcf85063_config *config) { struct pcf85063 *pcf85063; unsigned int tmp; int err; - const struct pcf85063_config *config; struct nvmem_config nvmem_cfg = { .name = "pcf85063_nvram", .reg_read = pcf85063_nvmem_read, @@ -573,28 +573,22 @@ static int pcf85063_probe(struct i2c_client *client) .size = 1, }; - dev_dbg(&client->dev, "%s\n", __func__); + dev_dbg(dev, "%s\n", __func__); - pcf85063 = devm_kzalloc(&client->dev, sizeof(struct pcf85063), + pcf85063 = devm_kzalloc(dev, sizeof(struct pcf85063), GFP_KERNEL); if (!pcf85063) return -ENOMEM; - config = i2c_get_match_data(client); - if (!config) - return -ENODEV; + pcf85063->regmap = regmap; - pcf85063->regmap = devm_regmap_init_i2c(client, &config->regmap); - if (IS_ERR(pcf85063->regmap)) - return PTR_ERR(pcf85063->regmap); - - i2c_set_clientdata(client, pcf85063); + dev_set_drvdata(dev, pcf85063); err = regmap_read(pcf85063->regmap, PCF85063_REG_SC, &tmp); if (err) - return dev_err_probe(&client->dev, err, "RTC chip is not present\n"); + return dev_err_probe(dev, err, "RTC chip is not present\n"); - pcf85063->rtc = devm_rtc_allocate_device(&client->dev); + pcf85063->rtc = devm_rtc_allocate_device(dev); if (IS_ERR(pcf85063->rtc)) return PTR_ERR(pcf85063->rtc); @@ -605,19 +599,17 @@ static int pcf85063_probe(struct i2c_client *client) * of the registers after the automatic power-on reset... */ if (tmp & PCF85063_REG_SC_OS) { - dev_warn(&client->dev, - "POR issue detected, sending a SW reset\n"); + dev_warn(dev, "POR issue detected, sending a SW reset\n"); err = regmap_write(pcf85063->regmap, PCF85063_REG_CTRL1, PCF85063_REG_CTRL1_SWR); if (err < 0) - dev_warn(&client->dev, - "SW reset failed, trying to continue\n"); + dev_warn(dev, "SW reset failed, trying to continue\n"); } - err = pcf85063_load_capacitance(pcf85063, client->dev.of_node, + err = pcf85063_load_capacitance(pcf85063, dev->of_node, config->force_cap_7000 ? 7000 : 0); if (err < 0) - dev_warn(&client->dev, "failed to set xtal load capacitance: %d", + dev_warn(dev, "failed to set xtal load capacitance: %d", err); pcf85063->rtc->ops = &pcf85063_rtc_ops; @@ -627,13 +619,13 @@ static int pcf85063_probe(struct i2c_client *client) clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, pcf85063->rtc->features); clear_bit(RTC_FEATURE_ALARM, pcf85063->rtc->features); - if (config->has_alarms && client->irq > 0) { + if (config->has_alarms && irq > 0) { unsigned long irqflags = IRQF_TRIGGER_LOW; - if (dev_fwnode(&client->dev)) + if (dev_fwnode(dev)) irqflags = 0; - err = devm_request_threaded_irq(&client->dev, client->irq, + err = devm_request_threaded_irq(dev, irq, NULL, pcf85063_rtc_handle_irq, irqflags | IRQF_ONESHOT, "pcf85063", pcf85063); @@ -642,8 +634,8 @@ static int pcf85063_probe(struct i2c_client *client) "unable to request IRQ, alarms disabled\n"); } else { set_bit(RTC_FEATURE_ALARM, pcf85063->rtc->features); - device_init_wakeup(&client->dev, true); - err = dev_pm_set_wake_irq(&client->dev, client->irq); + device_init_wakeup(dev, true); + err = dev_pm_set_wake_irq(dev, irq); if (err) dev_err(&pcf85063->rtc->dev, "failed to enable irq wake\n"); @@ -661,6 +653,8 @@ static int pcf85063_probe(struct i2c_client *client) return devm_rtc_register_device(pcf85063->rtc); } +#if IS_ENABLED(CONFIG_I2C) + static const struct i2c_device_id pcf85063_ids[] = { { "pca85073a", .driver_data = (kernel_ulong_t)&config_pcf85063a }, { "pcf85063", .driver_data = (kernel_ulong_t)&config_pcf85063 }, @@ -683,16 +677,65 @@ static const struct of_device_id pcf85063_of_match[] = { MODULE_DEVICE_TABLE(of, pcf85063_of_match); #endif +static int pcf85063_i2c_probe(struct i2c_client *client) +{ + const struct pcf85063_config *config; + struct regmap *regmap; + + config = i2c_get_match_data(client); + if (!config) + return -ENODEV; + + regmap = devm_regmap_init_i2c(client, &config->regmap); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + return pcf85063_probe(&client->dev, regmap, client->irq, config); +} + static struct i2c_driver pcf85063_driver = { .driver = { .name = "rtc-pcf85063", .of_match_table = of_match_ptr(pcf85063_of_match), }, - .probe = pcf85063_probe, + .probe = pcf85063_i2c_probe, .id_table = pcf85063_ids, }; -module_i2c_driver(pcf85063_driver); +static int pcf85063_register_driver(void) +{ + return i2c_add_driver(&pcf85063_driver); +} + +static void pcf85063_unregister_driver(void) +{ + i2c_del_driver(&pcf85063_driver); +} + +#else + +static int pcf85063_register_driver(void) +{ + return 0; +} + +static void pcf85063_unregister_driver(void) +{ +} + +#endif /* IS_ENABLED(CONFIG_I2C) */ + +static int __init pcf85063_init(void) +{ + return pcf85063_register_driver(); +} +module_init(pcf85063_init); + +static void __exit pcf85063_exit(void) +{ + pcf85063_unregister_driver(); +} +module_exit(pcf85063_exit); MODULE_AUTHOR("Søren Andersen "); MODULE_DESCRIPTION("PCF85063 RTC driver"); From a3c7f7e16ea8f1c8a34227c7ea08a7e002c2608b Mon Sep 17 00:00:00 2001 From: Antoni Pokusinski Date: Sun, 13 Apr 2025 15:07:55 +0200 Subject: [PATCH 0958/2411] rtc: pcf85063: add support for RV8063 Microcrystal RV8063 is a real-time clock with SPI interface. Its functionality is very similar to the RV8263 rtc. Signed-off-by: Antoni Pokusinski Link: https://lore.kernel.org/r/20250413130755.159373-4-apokusinski01@gmail.com Signed-off-by: Alexandre Belloni --- drivers/rtc/Kconfig | 21 +++++---- drivers/rtc/rtc-pcf85063.c | 87 +++++++++++++++++++++++++++++++++++++- 2 files changed, 98 insertions(+), 10 deletions(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 9aec922613ce..64f6e9756aff 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -483,15 +483,6 @@ config RTC_DRV_PCF8523 This driver can also be built as a module. If so, the module will be called rtc-pcf8523. -config RTC_DRV_PCF85063 - tristate "NXP PCF85063" - select REGMAP_I2C - help - If you say yes here you get support for the PCF85063 RTC chip - - This driver can also be built as a module. If so, the module - will be called rtc-pcf85063. - config RTC_DRV_PCF85363 tristate "NXP PCF85363" select REGMAP_I2C @@ -971,6 +962,18 @@ config RTC_DRV_PCF2127 This driver can also be built as a module. If so, the module will be called rtc-pcf2127. +config RTC_DRV_PCF85063 + tristate "NXP PCF85063" + depends on RTC_I2C_AND_SPI + select REGMAP_I2C if I2C + select REGMAP_SPI if SPI_MASTER + help + If you say yes here you get support for the PCF85063 and RV8063 + RTC chips. + + This driver can also be built as a module. If so, the module + will be called rtc-pcf85063. + config RTC_DRV_RV3029C2 tristate "Micro Crystal RV3029/3049" depends on RTC_I2C_AND_SPI diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index 03dfc58f4cd7..d9b67b959d18 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -17,6 +17,7 @@ #include #include #include +#include /* * Information for this driver was pulled from the following datasheets. @@ -29,6 +30,9 @@ * * https://www.microcrystal.com/fileadmin/Media/Products/RTC/App.Manual/RV-8263-C7_App-Manual.pdf * RV8263 -- Rev. 1.0 — January 2019 + * + * https://www.microcrystal.com/fileadmin/Media/Products/RTC/App.Manual/RV-8063-C7_App-Manual.pdf + * RV8063 -- Rev. 1.1 - October 2018 */ #define PCF85063_REG_CTRL1 0x00 /* status */ @@ -559,6 +563,18 @@ static const struct pcf85063_config config_rv8263 = { .force_cap_7000 = 1, }; +static const struct pcf85063_config config_rv8063 = { + .regmap = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0x11, + .read_flag_mask = BIT(7) | BIT(5), + .write_flag_mask = BIT(5), + }, + .has_alarms = 1, + .force_cap_7000 = 1, +}; + static int pcf85063_probe(struct device *dev, struct regmap *regmap, int irq, const struct pcf85063_config *config) { @@ -725,14 +741,83 @@ static void pcf85063_unregister_driver(void) #endif /* IS_ENABLED(CONFIG_I2C) */ +#if IS_ENABLED(CONFIG_SPI_MASTER) + +static const struct spi_device_id rv8063_id[] = { + { "rv8063" }, + {} +}; +MODULE_DEVICE_TABLE(spi, rv8063_id); + +static const struct of_device_id rv8063_of_match[] = { + { .compatible = "microcrystal,rv8063" }, + {} +}; +MODULE_DEVICE_TABLE(of, rv8063_of_match); + +static int rv8063_probe(struct spi_device *spi) +{ + const struct pcf85063_config *config = &config_rv8063; + struct regmap *regmap; + + regmap = devm_regmap_init_spi(spi, &config->regmap); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + return pcf85063_probe(&spi->dev, regmap, spi->irq, config); +} + +static struct spi_driver rv8063_driver = { + .driver = { + .name = "rv8063", + .of_match_table = rv8063_of_match, + }, + .probe = rv8063_probe, + .id_table = rv8063_id, +}; + +static int __init rv8063_register_driver(void) +{ + return spi_register_driver(&rv8063_driver); +} + +static void __exit rv8063_unregister_driver(void) +{ + spi_unregister_driver(&rv8063_driver); +} + +#else + +static int __init rv8063_register_driver(void) +{ + return 0; +} + +static void __exit rv8063_unregister_driver(void) +{ +} + +#endif /* IS_ENABLED(CONFIG_SPI_MASTER) */ + static int __init pcf85063_init(void) { - return pcf85063_register_driver(); + int ret; + + ret = pcf85063_register_driver(); + if (ret) + return ret; + + ret = rv8063_register_driver(); + if (ret) + pcf85063_unregister_driver(); + + return ret; } module_init(pcf85063_init); static void __exit pcf85063_exit(void) { + rv8063_unregister_driver(); pcf85063_unregister_driver(); } module_exit(pcf85063_exit); From db12d7ec6bdfdac39850198cc97a797b2c4dcda6 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 23 Jul 2025 15:04:18 +0800 Subject: [PATCH 0959/2411] perf stat: Remove duplicated include in stat-shadow.c The header files rblist.h is included twice in stat-shadow.c, so one inclusion of each can be removed. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=22933 Signed-off-by: Yang Li Link: https://lore.kernel.org/r/20250723070418.2195172-1-yang.lee@linux.alibaba.com Signed-off-by: Namhyung Kim --- tools/perf/util/stat-shadow.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 2b4950f56fae..abaf6b579bfc 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -15,7 +15,6 @@ #include #include "iostat.h" #include "util/hashmap.h" -#include "rblist.h" #include "tool_pmu.h" struct stats walltime_nsecs_stats; From 50fcd1c14e364a2d65e6049578db320d063e9fa1 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 22 Jul 2025 16:37:34 -0500 Subject: [PATCH 0960/2411] PCI: Fix typos Fix typos. Signed-off-by: Bjorn Helgaas Acked-by: Thomas Gleixner Link: https://patch.msgid.link/20250722213743.2822761-1-helgaas@kernel.org --- drivers/pci/controller/pcie-brcmstb.c | 2 +- drivers/pci/msi/msi.c | 2 +- drivers/pci/pcie/ptm.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c index 92887b394eb4..4d59ce231a64 100644 --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -970,7 +970,7 @@ static int brcm_pcie_get_inbound_wins(struct brcm_pcie *pcie, * * The PCIe host controller by design must set the inbound viewport to * be a contiguous arrangement of all of the system's memory. In - * addition, its size mut be a power of two. To further complicate + * addition, its size must be a power of two. To further complicate * matters, the viewport must start on a pcie-address that is aligned * on a multiple of its size. If a portion of the viewport does not * represent system memory -- e.g. 3GB of memory requires a 4GB diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 6ede55a7c5e6..7683635261e0 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -941,7 +941,7 @@ int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int index, u16 tag) /* * This is a horrible hack, but short of implementing a PCI * specific interrupt chip callback and a huge pile of - * infrastructure, this is the minor nuissance. It provides the + * infrastructure, this is the minor nuisance. It provides the * protection against concurrent operations on this entry and keeps * the control word cache in sync. */ diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c index ee5f615a9023..b5103448eb4d 100644 --- a/drivers/pci/pcie/ptm.c +++ b/drivers/pci/pcie/ptm.c @@ -506,7 +506,7 @@ struct pci_ptm_debugfs *pcie_ptm_create_debugfs(struct device *dev, void *pdata, if (!ops->check_capability) return NULL; - /* Check for PTM capability before creating debugfs attrbutes */ + /* Check for PTM capability before creating debugfs attributes */ ret = ops->check_capability(pdata); if (!ret) { dev_dbg(dev, "PTM capability not present\n"); From 48458654659c9c2e149c211d86637f1592470da5 Mon Sep 17 00:00:00 2001 From: Meagan Lloyd Date: Wed, 11 Jun 2025 11:14:15 -0700 Subject: [PATCH 0961/2411] rtc: ds1307: remove clear of oscillator stop flag (OSF) in probe In using CONFIG_RTC_HCTOSYS, rtc_hctosys() will sync the RTC time to the kernel time as long as rtc_read_time() succeeds. In some power loss situations, our supercapacitor-backed DS1342 RTC comes up with either an unpredictable future time or the default 01/01/00 from the datasheet. The oscillator stop flag (OSF) is set in these scenarios due to the power loss and can be used to determine the validity of the RTC data. Some chip types in the ds1307 driver already have OSF handling to determine whether .read_time provides valid RTC data or returns -EINVAL. This change removes the clear of the OSF in .probe as the OSF needs to be preserved to expand the OSF handling to the ds1341 chip type (note that DS1341 and DS1342 share a datasheet). Signed-off-by: Meagan Lloyd Reviewed-by: Tyler Hicks Acked-by: Rodolfo Giometti Link: https://lore.kernel.org/r/1749665656-30108-2-git-send-email-meaganlloyd@linux.microsoft.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds1307.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 5efbe69bf5ca..65beb7067e3f 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -1813,10 +1813,8 @@ static int ds1307_probe(struct i2c_client *client) regmap_write(ds1307->regmap, DS1337_REG_CONTROL, regs[0]); - /* oscillator fault? clear flag, and warn */ + /* oscillator fault? warn */ if (regs[1] & DS1337_BIT_OSF) { - regmap_write(ds1307->regmap, DS1337_REG_STATUS, - regs[1] & ~DS1337_BIT_OSF); dev_warn(ds1307->dev, "SET TIME!\n"); } break; From 523923cfd5d622b8f4ba893fdaf29fa6adeb8c3e Mon Sep 17 00:00:00 2001 From: Meagan Lloyd Date: Wed, 11 Jun 2025 11:14:16 -0700 Subject: [PATCH 0962/2411] rtc: ds1307: handle oscillator stop flag (OSF) for ds1341 In using CONFIG_RTC_HCTOSYS, rtc_hctosys() will sync the RTC time to the kernel time as long as rtc_read_time() succeeds. In some power loss situations, our supercapacitor-backed DS1342 RTC comes up with either an unpredictable future time or the default 01/01/00 from the datasheet. The oscillator stop flag (OSF) is set in these scenarios due to the power loss and can be used to determine the validity of the RTC data. This change expands the oscillator stop flag (OSF) handling that has already been implemented for some chips to the ds1341 chip (DS1341 and DS1342 share a datasheet). This handling manages the validity of the RTC data in .read_time and .set_time based on the OSF. Signed-off-by: Meagan Lloyd Reviewed-by: Tyler Hicks Acked-by: Rodolfo Giometti Link: https://lore.kernel.org/r/1749665656-30108-3-git-send-email-meaganlloyd@linux.microsoft.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds1307.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 65beb7067e3f..ce0994d9219a 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -279,6 +279,13 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t) if (tmp & DS1340_BIT_OSF) return -EINVAL; break; + case ds_1341: + ret = regmap_read(ds1307->regmap, DS1337_REG_STATUS, &tmp); + if (ret) + return ret; + if (tmp & DS1337_BIT_OSF) + return -EINVAL; + break; case ds_1388: ret = regmap_read(ds1307->regmap, DS1388_REG_FLAG, &tmp); if (ret) @@ -377,6 +384,10 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t) regmap_update_bits(ds1307->regmap, DS1340_REG_FLAG, DS1340_BIT_OSF, 0); break; + case ds_1341: + regmap_update_bits(ds1307->regmap, DS1337_REG_STATUS, + DS1337_BIT_OSF, 0); + break; case ds_1388: regmap_update_bits(ds1307->regmap, DS1388_REG_FLAG, DS1388_BIT_OSF, 0); From db22fd8880a2cb58d8684ba4345b4a8c152b8a4f Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Thu, 17 Jul 2025 17:38:37 +0800 Subject: [PATCH 0963/2411] dt-bindings: rtc: amlogic,a4-rtc: Add compatible string for C3 Amlogic C3 SoCs uses the same rtc controller as A5 SoCs. There is no need for an extra compatible line in the driver, but add C3 compatible line for documentation. Signed-off-by: Xianwei Zhao Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250717-rtc-c3-node-v1-1-4f9ae059b8e6@amlogic.com Signed-off-by: Alexandre Belloni --- .../devicetree/bindings/rtc/amlogic,a4-rtc.yaml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/rtc/amlogic,a4-rtc.yaml b/Documentation/devicetree/bindings/rtc/amlogic,a4-rtc.yaml index 5d3ac737abcb..e61f22eca85b 100644 --- a/Documentation/devicetree/bindings/rtc/amlogic,a4-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/amlogic,a4-rtc.yaml @@ -16,9 +16,14 @@ allOf: properties: compatible: - enum: - - amlogic,a4-rtc - - amlogic,a5-rtc + oneOf: + - enum: + - amlogic,a4-rtc + - amlogic,a5-rtc + - items: + - enum: + - amlogic,c3-rtc + - const: amlogic,a5-rtc reg: maxItems: 1 From ae48d3542783cdb826774a751084aa1c536029d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 13 Jun 2025 16:24:05 +0200 Subject: [PATCH 0964/2411] rtc: Optimize calculations in rtc_time64_to_tm() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recently (in commit 7df4cfef8b35 ("rtc: Make rtc_time64_to_tm() support dates before 1970")) the function rtc_time64_to_tm() was repaired for times before 1970. This introduced two if blocks. Cassio Neri pointed out that to be not neccessary and suggested an adaption that allows to drop the two branch points again. This is implemented here. Also adapt the reference to the theoretical paper to link to the final published article instead of the preprint on Cassio's request. Suggested-by: Cassio Neri Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20250613142405.253420-2-u.kleine-koenig@baylibre.com Signed-off-by: Alexandre Belloni --- drivers/rtc/lib.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/rtc/lib.c b/drivers/rtc/lib.c index 13b5b1f20465..f7051592a6e3 100644 --- a/drivers/rtc/lib.c +++ b/drivers/rtc/lib.c @@ -51,7 +51,7 @@ EXPORT_SYMBOL(rtc_year_days); */ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm) { - int days, secs; + int secs; u64 u64tmp; u32 u32tmp, udays, century, day_of_century, year_of_century, year, @@ -59,28 +59,26 @@ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm) bool is_Jan_or_Feb, is_leap_year; /* - * Get days and seconds while preserving the sign to - * handle negative time values (dates before 1970-01-01) + * The time represented by `time` is given in seconds since 1970-01-01 + * (UTC). As the division done below might misbehave for negative + * values, we convert it to seconds since 0000-03-01 and then assume it + * will be non-negative. + * Below we do 4 * udays + 3 which should fit into a 32 bit unsigned + * variable. So the latest date this algorithm works for is 1073741823 + * days after 0000-03-01 which is in the year 2939805. */ - days = div_s64_rem(time, 86400, &secs); + time += (u64)719468 * 86400; + + udays = div_s64_rem(time, 86400, &secs); /* - * We need 0 <= secs < 86400 which isn't given for negative - * values of time. Fixup accordingly. + * day of the week, 0000-03-01 was a Wednesday (in the proleptic + * Gregorian calendar) */ - if (secs < 0) { - days -= 1; - secs += 86400; - } - - /* day of the week, 1970-01-01 was a Thursday */ - tm->tm_wday = (days + 4) % 7; - /* Ensure tm_wday is always positive */ - if (tm->tm_wday < 0) - tm->tm_wday += 7; + tm->tm_wday = (udays + 3) % 7; /* - * The following algorithm is, basically, Proposition 6.3 of Neri + * The following algorithm is, basically, Figure 12 of Neri * and Schneider [1]. In a few words: it works on the computational * (fictitious) calendar where the year starts in March, month = 2 * (*), and finishes in February, month = 13. This calendar is @@ -100,15 +98,15 @@ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm) * (using just arithmetics) it's easy to convert it to the * corresponding date in the Gregorian calendar. * - * [1] "Euclidean Affine Functions and Applications to Calendar - * Algorithms". https://arxiv.org/abs/2102.06959 + * [1] Neri C, Schneider L. Euclidean affine functions and their + * application to calendar algorithms. Softw Pract Exper. + * 2023;53(4):937-970. doi: 10.1002/spe.3172 + * https://doi.org/10.1002/spe.3172 * * (*) The numbering of months follows rtc_time more closely and * thus, is slightly different from [1]. */ - udays = days + 719468; - u32tmp = 4 * udays + 3; century = u32tmp / 146097; day_of_century = u32tmp % 146097 / 4; From 061fade7a67f6cdfe918a675270d84107abbef61 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 18 Jul 2025 14:54:31 +0100 Subject: [PATCH 0965/2411] ASoC: SDCA: Fix some holes in the regmap readable/writeable helpers The current regmap readable/writeable helper functions always allow the Next flag and allows any Control Number. Mask the Next flag based on SDCA_ACCESS_MODE_DUAL which is the only Mode that supports it. Also check that the Control Number is valid for the given control. Fixes: e3f7caf74b79 ("ASoC: SDCA: Add generic regmap SDCA helpers") Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20250718135432.1048566-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/sdca/sdca_regmap.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/sound/soc/sdca/sdca_regmap.c b/sound/soc/sdca/sdca_regmap.c index 66e7eee7d7f4..c41c67c2204a 100644 --- a/sound/soc/sdca/sdca_regmap.c +++ b/sound/soc/sdca/sdca_regmap.c @@ -72,12 +72,18 @@ bool sdca_regmap_readable(struct sdca_function_data *function, unsigned int reg) if (!control) return false; + if (!(BIT(SDW_SDCA_CTL_CNUM(reg)) & control->cn_list)) + return false; + switch (control->mode) { case SDCA_ACCESS_MODE_RW: case SDCA_ACCESS_MODE_RO: - case SDCA_ACCESS_MODE_DUAL: case SDCA_ACCESS_MODE_RW1S: case SDCA_ACCESS_MODE_RW1C: + if (SDW_SDCA_NEXT_CTL(0) & reg) + return false; + fallthrough; + case SDCA_ACCESS_MODE_DUAL: /* No access to registers marked solely for device use */ return control->layers & ~SDCA_ACCESS_LAYER_DEVICE; default: @@ -104,11 +110,17 @@ bool sdca_regmap_writeable(struct sdca_function_data *function, unsigned int reg if (!control) return false; + if (!(BIT(SDW_SDCA_CTL_CNUM(reg)) & control->cn_list)) + return false; + switch (control->mode) { case SDCA_ACCESS_MODE_RW: - case SDCA_ACCESS_MODE_DUAL: case SDCA_ACCESS_MODE_RW1S: case SDCA_ACCESS_MODE_RW1C: + if (SDW_SDCA_NEXT_CTL(0) & reg) + return false; + fallthrough; + case SDCA_ACCESS_MODE_DUAL: /* No access to registers marked solely for device use */ return control->layers & ~SDCA_ACCESS_LAYER_DEVICE; default: From 50a479527ef01f9b36dde1803a7e81741a222509 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 18 Jul 2025 14:54:32 +0100 Subject: [PATCH 0966/2411] ASoC: SDCA: Add support for -cn- value properties Many of the DisCo properties that specify Control values have an additional variant that specifies a separate value for each Control Number. Add support for these. Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20250718135432.1048566-3-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/sdca_function.h | 14 ++--- sound/soc/sdca/sdca_functions.c | 99 +++++++++++++++++++++------------ sound/soc/sdca/sdca_regmap.c | 13 +++-- 3 files changed, 78 insertions(+), 48 deletions(-) diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h index 90d77fc46416..06ec126cdcc3 100644 --- a/include/sound/sdca_function.h +++ b/include/sound/sdca_function.h @@ -742,14 +742,14 @@ struct sdca_control_range { * struct sdca_control - information for one SDCA Control * @label: Name for the Control, from SDCA Specification v1.0, section 7.1.7. * @sel: Identifier used for addressing. - * @value: Holds the Control value for constants and defaults. * @nbits: Number of bits used in the Control. - * @interrupt_position: SCDA interrupt line that will alert to changes on this - * Control. + * @values: Holds the Control value for constants and defaults. * @cn_list: A bitmask showing the valid Control Numbers within this Control, * Control Numbers typically represent channels. - * @range: Buffer describing valid range of values for the Control. + * @interrupt_position: SCDA interrupt line that will alert to changes on this + * Control. * @type: Format of the data in the Control. + * @range: Buffer describing valid range of values for the Control. * @mode: Access mode of the Control. * @layers: Bitmask of access layers of the Control. * @deferrable: Indicates if the access to the Control can be deferred. @@ -760,13 +760,13 @@ struct sdca_control { const char *label; int sel; - int value; int nbits; - int interrupt_position; + int *values; u64 cn_list; + int interrupt_position; - struct sdca_control_range range; enum sdca_control_datatype type; + struct sdca_control_range range; enum sdca_access_mode mode; u8 layers; diff --git a/sound/soc/sdca/sdca_functions.c b/sound/soc/sdca/sdca_functions.c index 4b6da587c4ac..0faee522b3af 100644 --- a/sound/soc/sdca/sdca_functions.c +++ b/sound/soc/sdca/sdca_functions.c @@ -814,6 +814,43 @@ static int find_sdca_control_range(struct device *dev, return 0; } +static int find_sdca_control_value(struct device *dev, struct sdca_entity *entity, + struct fwnode_handle *control_node, + struct sdca_control *control, + const char * const label) +{ + char property[SDCA_PROPERTY_LENGTH]; + bool global = true; + int ret, cn, i; + u32 tmp; + + snprintf(property, sizeof(property), "mipi-sdca-control-%s", label); + + ret = fwnode_property_read_u32(control_node, property, &tmp); + if (ret == -EINVAL) + global = false; + else if (ret) + return ret; + + i = 0; + for_each_set_bit(cn, (unsigned long *)&control->cn_list, + BITS_PER_TYPE(control->cn_list)) { + if (!global) { + snprintf(property, sizeof(property), + "mipi-sdca-control-cn-%d-%s", cn, label); + + ret = fwnode_property_read_u32(control_node, property, &tmp); + if (ret) + return ret; + } + + control->values[i] = tmp; + i++; + } + + return 0; +} + /* * TODO: Add support for -cn- properties, allowing different channels to have * different defaults etc. @@ -843,44 +880,44 @@ static int find_sdca_entity_control(struct device *dev, struct sdca_entity *enti control->layers = tmp; + ret = fwnode_property_read_u64(control_node, "mipi-sdca-control-cn-list", + &control->cn_list); + if (ret == -EINVAL) { + /* Spec allows not specifying cn-list if only the first number is used */ + control->cn_list = 0x1; + } else if (ret || !control->cn_list) { + dev_err(dev, "%s: control %#x: cn list missing: %d\n", + entity->label, control->sel, ret); + return ret; + } + + control->values = devm_kzalloc(dev, hweight64(control->cn_list), GFP_KERNEL); + if (!control->values) + return -ENOMEM; + switch (control->mode) { case SDCA_ACCESS_MODE_DC: - ret = fwnode_property_read_u32(control_node, - "mipi-sdca-control-dc-value", - &tmp); + ret = find_sdca_control_value(dev, entity, control_node, control, + "dc-value"); if (ret) { dev_err(dev, "%s: control %#x: dc value missing: %d\n", entity->label, control->sel, ret); return ret; } - control->value = tmp; control->has_fixed = true; break; case SDCA_ACCESS_MODE_RW: case SDCA_ACCESS_MODE_DUAL: - ret = fwnode_property_read_u32(control_node, - "mipi-sdca-control-default-value", - &tmp); - if (!ret) { - control->value = tmp; + ret = find_sdca_control_value(dev, entity, control_node, control, + "default-value"); + if (!ret) control->has_default = true; - } - ret = fwnode_property_read_u32(control_node, - "mipi-sdca-control-fixed-value", - &tmp); - if (!ret) { - if (control->has_default && control->value != tmp) { - dev_err(dev, - "%s: control %#x: default and fixed value don't match\n", - entity->label, control->sel); - return -EINVAL; - } - - control->value = tmp; + ret = find_sdca_control_value(dev, entity, control_node, control, + "fixed-value"); + if (!ret) control->has_fixed = true; - } fallthrough; case SDCA_ACCESS_MODE_RO: control->deferrable = fwnode_property_read_bool(control_node, @@ -897,17 +934,6 @@ static int find_sdca_entity_control(struct device *dev, struct sdca_entity *enti return ret; } - ret = fwnode_property_read_u64(control_node, "mipi-sdca-control-cn-list", - &control->cn_list); - if (ret == -EINVAL) { - /* Spec allows not specifying cn-list if only the first number is used */ - control->cn_list = 0x1; - } else if (ret || !control->cn_list) { - dev_err(dev, "%s: control %#x: cn list missing: %d\n", - entity->label, control->sel, ret); - return ret; - } - ret = fwnode_property_read_u32(control_node, "mipi-sdca-control-interrupt-position", &tmp); @@ -923,11 +949,10 @@ static int find_sdca_entity_control(struct device *dev, struct sdca_entity *enti control->type = find_sdca_control_datatype(entity, control); control->nbits = find_sdca_control_bits(entity, control); - dev_info(dev, "%s: %s: control %#x mode %#x layers %#x cn %#llx int %d value %#x %s\n", + dev_info(dev, "%s: %s: control %#x mode %#x layers %#x cn %#llx int %d %s\n", entity->label, control->label, control->sel, control->mode, control->layers, control->cn_list, - control->interrupt_position, control->value, - control->deferrable ? "deferrable" : ""); + control->interrupt_position, control->deferrable ? "deferrable" : ""); return 0; } diff --git a/sound/soc/sdca/sdca_regmap.c b/sound/soc/sdca/sdca_regmap.c index c41c67c2204a..5cb3048ea8cf 100644 --- a/sound/soc/sdca/sdca_regmap.c +++ b/sound/soc/sdca/sdca_regmap.c @@ -253,7 +253,7 @@ int sdca_regmap_populate_constants(struct device *dev, struct sdca_function_data *function, struct reg_default *consts) { - int i, j, k; + int i, j, k, l; for (i = 0, k = 0; i < function->num_entities; i++) { struct sdca_entity *entity = &function->entities[i]; @@ -265,13 +265,15 @@ int sdca_regmap_populate_constants(struct device *dev, if (control->mode != SDCA_ACCESS_MODE_DC) continue; + l = 0; for_each_set_bit(cn, (unsigned long *)&control->cn_list, BITS_PER_TYPE(control->cn_list)) { consts[k].reg = SDW_SDCA_CTL(function->desc->adr, entity->id, control->sel, cn); - consts[k].def = control->value; + consts[k].def = control->values[l]; k++; + l++; } } } @@ -295,7 +297,7 @@ EXPORT_SYMBOL_NS(sdca_regmap_populate_constants, "SND_SOC_SDCA"); int sdca_regmap_write_defaults(struct device *dev, struct regmap *regmap, struct sdca_function_data *function) { - int i, j; + int i, j, k; int ret; for (i = 0; i < function->num_entities; i++) { @@ -311,6 +313,7 @@ int sdca_regmap_write_defaults(struct device *dev, struct regmap *regmap, if (!control->has_default && !control->has_fixed) continue; + k = 0; for_each_set_bit(cn, (unsigned long *)&control->cn_list, BITS_PER_TYPE(control->cn_list)) { unsigned int reg; @@ -318,9 +321,11 @@ int sdca_regmap_write_defaults(struct device *dev, struct regmap *regmap, reg = SDW_SDCA_CTL(function->desc->adr, entity->id, control->sel, cn); - ret = regmap_write(regmap, reg, control->value); + ret = regmap_write(regmap, reg, control->values[k]); if (ret) return ret; + + k++; } } } From ca592e20659e0304ebd8f4dabb273da4f9385848 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 10 Jul 2025 11:04:04 +0800 Subject: [PATCH 0967/2411] ASoC: fsl_xcvr: get channel status data when PHY is not exists There is no PHY for the XCVR module on i.MX93, the channel status needs to be obtained from FSL_XCVR_RX_CS_DATA_* registers. And channel status acknowledge (CSA) bit should be set once channel status is processed. Fixes: e240b9329a30 ("ASoC: fsl_xcvr: Add support for i.MX93 platform") Signed-off-by: Shengjiu Wang Link: https://patch.msgid.link/20250710030405.3370671-2-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_xcvr.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c index e3111dd80be4..405433144515 100644 --- a/sound/soc/fsl/fsl_xcvr.c +++ b/sound/soc/fsl/fsl_xcvr.c @@ -1423,6 +1423,26 @@ static irqreturn_t irq0_isr(int irq, void *devid) /* clear CS control register */ memset_io(reg_ctrl, 0, sizeof(val)); } + } else { + regmap_read(xcvr->regmap, FSL_XCVR_RX_CS_DATA_0, + (u32 *)&xcvr->rx_iec958.status[0]); + regmap_read(xcvr->regmap, FSL_XCVR_RX_CS_DATA_1, + (u32 *)&xcvr->rx_iec958.status[4]); + regmap_read(xcvr->regmap, FSL_XCVR_RX_CS_DATA_2, + (u32 *)&xcvr->rx_iec958.status[8]); + regmap_read(xcvr->regmap, FSL_XCVR_RX_CS_DATA_3, + (u32 *)&xcvr->rx_iec958.status[12]); + regmap_read(xcvr->regmap, FSL_XCVR_RX_CS_DATA_4, + (u32 *)&xcvr->rx_iec958.status[16]); + regmap_read(xcvr->regmap, FSL_XCVR_RX_CS_DATA_5, + (u32 *)&xcvr->rx_iec958.status[20]); + for (i = 0; i < 6; i++) { + val = *(u32 *)(xcvr->rx_iec958.status + i * 4); + *(u32 *)(xcvr->rx_iec958.status + i * 4) = + bitrev32(val); + } + regmap_set_bits(xcvr->regmap, FSL_XCVR_RX_DPTH_CTRL, + FSL_XCVR_RX_DPTH_CTRL_CSA); } } if (isr & FSL_XCVR_IRQ_NEW_UD) { From 6776ecc9dd587c08a6bb334542f9f8821a091013 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 10 Jul 2025 11:04:05 +0800 Subject: [PATCH 0968/2411] ASoC: fsl_xcvr: get channel status data with firmware exists For the XCVR module on i.MX95, even though it only supports SPDIF, the channel status needs to be obtained from RAM space, which is processed by firmware. Firmware is necessary to trigger the FSL_XCVR_IRQ_NEW_CS interrupt. This change also applies for the SPDIF & ARC function on i.MX8MP which has the firmware. Fixes: e6a9750a346b ("ASoC: fsl_xcvr: Add suspend and resume support") Signed-off-by: Shengjiu Wang Link: https://patch.msgid.link/20250710030405.3370671-3-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_xcvr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c index 405433144515..5d804860f7d8 100644 --- a/sound/soc/fsl/fsl_xcvr.c +++ b/sound/soc/fsl/fsl_xcvr.c @@ -1395,7 +1395,7 @@ static irqreturn_t irq0_isr(int irq, void *devid) if (isr & FSL_XCVR_IRQ_NEW_CS) { dev_dbg(dev, "Received new CS block\n"); isr_clr |= FSL_XCVR_IRQ_NEW_CS; - if (!xcvr->soc_data->spdif_only) { + if (xcvr->soc_data->fw_name) { /* Data RAM is 4KiB, last two pages: 8 and 9. Select page 8. */ regmap_update_bits(xcvr->regmap, FSL_XCVR_EXT_CTRL, FSL_XCVR_EXT_CTRL_PAGE_MASK, @@ -1517,6 +1517,7 @@ static const struct fsl_xcvr_soc_data fsl_xcvr_imx93_data = { }; static const struct fsl_xcvr_soc_data fsl_xcvr_imx95_data = { + .fw_name = "imx/xcvr/xcvr-imx95.bin", .spdif_only = true, .use_phy = true, .use_edma = true, @@ -1806,7 +1807,7 @@ static int fsl_xcvr_runtime_resume(struct device *dev) } } - if (xcvr->mode == FSL_XCVR_MODE_EARC) { + if (xcvr->soc_data->fw_name) { ret = fsl_xcvr_load_firmware(xcvr); if (ret) { dev_err(dev, "failed to load firmware.\n"); From 2260bc6ea8bd57aec92cbda770de9cc95232f64d Mon Sep 17 00:00:00 2001 From: Chancel Liu Date: Wed, 23 Jul 2025 16:37:25 +0900 Subject: [PATCH 0969/2411] ASoC: imx-card: Add WM8524 support WM8524 is a stereo DAC. Add support for this codec in imx-card ASoC machine driver. Signed-off-by: Chancel Liu Link: https://patch.msgid.link/20250723073725.787844-1-chancel.liu@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/imx-card.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/sound/soc/fsl/imx-card.c b/sound/soc/fsl/imx-card.c index ea5dbb54b584..28699d7b75ca 100644 --- a/sound/soc/fsl/imx-card.c +++ b/sound/soc/fsl/imx-card.c @@ -26,6 +26,7 @@ enum codec_type { CODEC_AK4497, CODEC_AK5552, CODEC_CS42888, + CODEC_WM8524, }; /* @@ -196,6 +197,13 @@ static struct imx_akcodec_tdm_fs_mul cs42888_tdm_fs_mul[] = { { .min = 256, .max = 256, .mul = 256 }, }; +static struct imx_akcodec_fs_mul wm8524_fs_mul[] = { + { .rmin = 8000, .rmax = 32000, .wmin = 256, .wmax = 1152, }, + { .rmin = 44100, .rmax = 48000, .wmin = 256, .wmax = 768, }, + { .rmin = 88200, .rmax = 96000, .wmin = 128, .wmax = 384, }, + { .rmin = 176400, .rmax = 192000, .wmin = 128, .wmax = 192, }, +}; + static const u32 akcodec_rates[] = { 8000, 11025, 16000, 22050, 32000, 44100, 48000, 88200, 96000, 176400, 192000, 352800, 384000, 705600, 768000, @@ -229,6 +237,10 @@ static const u32 cs42888_tdm_channels[] = { 1, 2, 3, 4, 5, 6, 7, 8, }; +static const u32 wm8524_channels[] = { + 2, +}; + static bool format_is_dsd(struct snd_pcm_hw_params *params) { snd_pcm_format_t format = params_format(params); @@ -261,6 +273,7 @@ static bool codec_is_akcodec(unsigned int type) case CODEC_AK5558: case CODEC_AK5552: case CODEC_CS42888: + case CODEC_WM8524: return true; default: break; @@ -477,9 +490,24 @@ static int imx_aif_startup(struct snd_pcm_substream *substream) return ret; } +static void imx_aif_shutdown(struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); + struct snd_soc_dai *cpu_dai; + struct snd_soc_dai *codec_dai; + int i; + + for_each_rtd_cpu_dais(rtd, i, cpu_dai) + snd_soc_dai_set_sysclk(cpu_dai, 0, 0, SND_SOC_CLOCK_OUT); + + for_each_rtd_codec_dais(rtd, i, codec_dai) + snd_soc_dai_set_sysclk(codec_dai, 0, 0, SND_SOC_CLOCK_IN); +} + static const struct snd_soc_ops imx_aif_ops = { .hw_params = imx_aif_hw_params, .startup = imx_aif_startup, + .shutdown = imx_aif_shutdown, }; static const struct snd_soc_ops imx_aif_ops_be = { @@ -632,6 +660,8 @@ static int imx_card_parse_of(struct imx_card_data *data) plat_data->type = CODEC_AK5552; else if (!strcmp(link->codecs->dai_name, "cs42888")) plat_data->type = CODEC_CS42888; + else if (!strcmp(link->codecs->dai_name, "wm8524-hifi")) + plat_data->type = CODEC_WM8524; } else { link->codecs = &snd_soc_dummy_dlc; @@ -805,6 +835,10 @@ static int imx_card_probe(struct platform_device *pdev) data->dapm_routes[1].sink = "CPU-Capture"; data->dapm_routes[1].source = "Capture"; break; + case CODEC_WM8524: + data->dapm_routes[0].sink = "Playback"; + data->dapm_routes[0].source = "CPU-Playback"; + break; default: break; } @@ -854,6 +888,12 @@ static int imx_card_probe(struct platform_device *pdev) plat_data->support_tdm_channels = cs42888_tdm_channels; plat_data->num_tdm_channels = ARRAY_SIZE(cs42888_tdm_channels); break; + case CODEC_WM8524: + plat_data->fs_mul = wm8524_fs_mul; + plat_data->num_fs_mul = ARRAY_SIZE(wm8524_fs_mul); + plat_data->support_channels = wm8524_channels; + plat_data->num_channels = ARRAY_SIZE(wm8524_channels); + break; default: break; } From da98e8b97058c73b5c58e9976af2e7286f1c799b Mon Sep 17 00:00:00 2001 From: Varshini Rajendran Date: Tue, 10 Jun 2025 12:20:05 +0530 Subject: [PATCH 0970/2411] ASoC: dt-bindings: atmel,at91-ssc: add microchip,sam9x7-ssc Add microchip,sam9x7-ssc to DT bindings documentation. Signed-off-by: Varshini Rajendran Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250610065005.64070-1-varshini.rajendran@microchip.com Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/atmel,at91-ssc.yaml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/atmel,at91-ssc.yaml b/Documentation/devicetree/bindings/sound/atmel,at91-ssc.yaml index a05e61431824..ce99c2d8c35d 100644 --- a/Documentation/devicetree/bindings/sound/atmel,at91-ssc.yaml +++ b/Documentation/devicetree/bindings/sound/atmel,at91-ssc.yaml @@ -16,9 +16,14 @@ description: properties: compatible: - enum: - - atmel,at91rm9200-ssc - - atmel,at91sam9g45-ssc + oneOf: + - enum: + - atmel,at91rm9200-ssc + - atmel,at91sam9g45-ssc + - items: + - enum: + - microchip,sam9x7-ssc + - const: atmel,at91sam9g45-ssc reg: maxItems: 1 From d31eb217425591e100b475fad6360cd3da2073c6 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Wed, 9 Jul 2025 11:37:21 +0800 Subject: [PATCH 0971/2411] PCI: imx6: Remove apps_reset toggling from imx_pcie_{assert/deassert}_core_reset apps_reset corresponds to LTSSM_EN in i.MX7, i.MX8MQ, i.MX8MM and i.MX8MP platforms. Since assertion/de-assertion of apps_reset is done in imx_pcie_ltssm_enable() and imx_pcie_ltssm_disable(), remove it from imx_pcie_assert_core_reset() and imx_pcie_deassert_core_reset(). This also fixes a failure in enumerating the PI7C9X2G608GP (hotplug) chip reliably on i.MX8MM, as reported by Tim. It should be noted that only i.MX7D, i.MX8MQ, i.MX8MM, and i.MX8MP platforms have the apps_reset logic, so this change doesn't have any effect on other platforms. Fixes: ef61c7d8d032 ("PCI: imx6: Deassert apps_reset in imx_pcie_deassert_core_reset()") Reported-by: Tim Harvey Closes: https://lore.kernel.org/all/CAJ+vNU3ohR2YKTwC4xoYrc1z-neDoH2TTZcMHDy+poj9=jSy+w@mail.gmail.com/ Signed-off-by: Richard Zhu [mani: reworded commit subject and description] Signed-off-by: Manivannan Sadhasivam [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Tested-by: Tim Harvey # imx8mp-venice-gw74xx (i.MX8MP + hotplug capable switch) Reviewed-by: Frank Li Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250709033722.2924372-2-hongxing.zhu@nxp.com --- drivers/pci/controller/dwc/pci-imx6.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index eefe922d533b..240e080825bc 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -860,7 +860,6 @@ static int imx95_pcie_core_reset(struct imx_pcie *imx_pcie, bool assert) static void imx_pcie_assert_core_reset(struct imx_pcie *imx_pcie) { reset_control_assert(imx_pcie->pciephy_reset); - reset_control_assert(imx_pcie->apps_reset); if (imx_pcie->drvdata->core_reset) imx_pcie->drvdata->core_reset(imx_pcie, true); @@ -872,7 +871,6 @@ static void imx_pcie_assert_core_reset(struct imx_pcie *imx_pcie) static int imx_pcie_deassert_core_reset(struct imx_pcie *imx_pcie) { reset_control_deassert(imx_pcie->pciephy_reset); - reset_control_deassert(imx_pcie->apps_reset); if (imx_pcie->drvdata->core_reset) imx_pcie->drvdata->core_reset(imx_pcie, false); @@ -1253,6 +1251,9 @@ static int imx_pcie_host_init(struct dw_pcie_rp *pp) } } + /* Make sure that PCIe LTSSM is cleared */ + imx_pcie_ltssm_disable(dev); + ret = imx_pcie_deassert_core_reset(imx_pcie); if (ret < 0) { dev_err(dev, "pcie deassert core reset failed: %d\n", ret); From 2e6ea70690ddd1ffa422423fd0d4523e4dfe4b62 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Wed, 9 Jul 2025 11:37:22 +0800 Subject: [PATCH 0972/2411] PCI: imx6: Delay link start until configfs 'start' written According to Documentation/PCI/endpoint/pci-endpoint-cfs.rst, the Endpoint controller (EPC) should only start the link when userspace writes '1' to the '/sys/kernel/config/pci_ep/controllers//start' attribute, which ultimately results in calling imx_pcie_start_link() via pci_epc_start_store(). To align with the documented behavior, do not start the link automatically when adding the EP controller. Fixes: 75c2f26da03f ("PCI: imx6: Add i.MX PCIe EP mode support") Signed-off-by: Richard Zhu [mani: reworded commit subject and description] Signed-off-by: Manivannan Sadhasivam [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Frank Li Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250709033722.2924372-3-hongxing.zhu@nxp.com --- drivers/pci/controller/dwc/pci-imx6.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 240e080825bc..80e48746bbaf 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -1474,9 +1474,6 @@ static int imx_add_pcie_ep(struct imx_pcie *imx_pcie, pci_epc_init_notify(ep->epc); - /* Start LTSSM. */ - imx_pcie_ltssm_enable(dev); - return 0; } From 08a7efc5b02a0620ae16aa9584060e980a69cb55 Mon Sep 17 00:00:00 2001 From: Jan Prusakowski Date: Thu, 24 Jul 2025 17:31:15 +0200 Subject: [PATCH 0973/2411] f2fs: vm_unmap_ram() may be called from an invalid context When testing F2FS with xfstests using UFS backed virtual disks the kernel complains sometimes that f2fs_release_decomp_mem() calls vm_unmap_ram() from an invalid context. Example trace from f2fs/007 test: f2fs/007 5s ... [12:59:38][ 8.902525] run fstests f2fs/007 [ 11.468026] BUG: sleeping function called from invalid context at mm/vmalloc.c:2978 [ 11.471849] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 68, name: irq/22-ufshcd [ 11.475357] preempt_count: 1, expected: 0 [ 11.476970] RCU nest depth: 0, expected: 0 [ 11.478531] CPU: 0 UID: 0 PID: 68 Comm: irq/22-ufshcd Tainted: G W 6.16.0-rc5-xfstests-ufs-g40f92e79b0aa #9 PREEMPT(none) [ 11.478535] Tainted: [W]=WARN [ 11.478536] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 11.478537] Call Trace: [ 11.478543] [ 11.478545] dump_stack_lvl+0x4e/0x70 [ 11.478554] __might_resched.cold+0xaf/0xbe [ 11.478557] vm_unmap_ram+0x21/0xb0 [ 11.478560] f2fs_release_decomp_mem+0x59/0x80 [ 11.478563] f2fs_free_dic+0x18/0x1a0 [ 11.478565] f2fs_finish_read_bio+0xd7/0x290 [ 11.478570] blk_update_request+0xec/0x3b0 [ 11.478574] ? sbitmap_queue_clear+0x3b/0x60 [ 11.478576] scsi_end_request+0x27/0x1a0 [ 11.478582] scsi_io_completion+0x40/0x300 [ 11.478583] ufshcd_mcq_poll_cqe_lock+0xa3/0xe0 [ 11.478588] ufshcd_sl_intr+0x194/0x1f0 [ 11.478592] ufshcd_threaded_intr+0x68/0xb0 [ 11.478594] ? __pfx_irq_thread_fn+0x10/0x10 [ 11.478599] irq_thread_fn+0x20/0x60 [ 11.478602] ? __pfx_irq_thread_fn+0x10/0x10 [ 11.478603] irq_thread+0xb9/0x180 [ 11.478605] ? __pfx_irq_thread_dtor+0x10/0x10 [ 11.478607] ? __pfx_irq_thread+0x10/0x10 [ 11.478609] kthread+0x10a/0x230 [ 11.478614] ? __pfx_kthread+0x10/0x10 [ 11.478615] ret_from_fork+0x7e/0xd0 [ 11.478619] ? __pfx_kthread+0x10/0x10 [ 11.478621] ret_from_fork_asm+0x1a/0x30 [ 11.478623] This patch modifies in_task() check inside f2fs_read_end_io() to also check if interrupts are disabled. This ensures that pages are unmapped asynchronously in an interrupt handler. Fixes: bff139b49d9f ("f2fs: handle decompress only post processing in softirq") Signed-off-by: Jan Prusakowski Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d1a2616d41be..0acc25f996b3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -280,7 +280,7 @@ static void f2fs_read_end_io(struct bio *bio) { struct f2fs_sb_info *sbi = F2FS_F_SB(bio_first_folio_all(bio)); struct bio_post_read_ctx *ctx; - bool intask = in_task(); + bool intask = in_task() && !irqs_disabled(); iostat_update_and_unbind_ctx(bio); ctx = bio->bi_private; From b93bf64e349b1952170f47a0e68fc52f666b9e25 Mon Sep 17 00:00:00 2001 From: "mason.zhang" Date: Wed, 23 Jul 2025 22:58:37 +0800 Subject: [PATCH 0974/2411] f2fs: merge the two conditions to avoid code duplication No functional changes. Signed-off-by: mason.zhang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 271c7f90741b..1a47a7645790 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -278,12 +278,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - if (p->alloc_mode == SSR) { - p->gc_mode = GC_GREEDY; - p->dirty_bitmap = dirty_i->dirty_segmap[type]; - p->max_search = dirty_i->nr_dirty[type]; - p->ofs_unit = 1; - } else if (p->alloc_mode == AT_SSR) { + if (p->alloc_mode == SSR || p->alloc_mode == AT_SSR) { p->gc_mode = GC_GREEDY; p->dirty_bitmap = dirty_i->dirty_segmap[type]; p->max_search = dirty_i->nr_dirty[type]; From 95d7c508b21235144f6cef611ec5686bbdeeec25 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Wed, 23 Jul 2025 22:24:56 +0800 Subject: [PATCH 0975/2411] f2fs: remove unnecessary tracepoint enabled check There is no extra work before trace_f2fs_[dataread|datawrite]_end(), so there is no need to check trace__enabled(). Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4039ccb5022c..09ba8bef2f63 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4855,8 +4855,7 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_update_iostat(F2FS_I_SB(inode), inode, APP_BUFFERED_READ_IO, ret); } - if (trace_f2fs_dataread_end_enabled()) - trace_f2fs_dataread_end(inode, pos, ret); + trace_f2fs_dataread_end(inode, pos, ret); return ret; } @@ -4879,8 +4878,7 @@ static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos, f2fs_update_iostat(F2FS_I_SB(inode), inode, APP_BUFFERED_READ_IO, ret); - if (trace_f2fs_dataread_end_enabled()) - trace_f2fs_dataread_end(inode, pos, ret); + trace_f2fs_dataread_end(inode, pos, ret); return ret; } @@ -5225,8 +5223,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) f2fs_dio_write_iter(iocb, from, &may_need_sync) : f2fs_buffered_write_iter(iocb, from); - if (trace_f2fs_datawrite_end_enabled()) - trace_f2fs_datawrite_end(inode, orig_pos, ret); + trace_f2fs_datawrite_end(inode, orig_pos, ret); } /* Don't leave any preallocated blocks around past i_size. */ From f0a7adfedcc8c7e0b13ffd11dd69bf0ac25b2cd3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 21 Jul 2025 10:02:31 +0800 Subject: [PATCH 0976/2411] f2fs: don't break allocation when crossing contiguous sections Commit 0638a3197c19 ("f2fs: avoid unused block when dio write in LFS mode") has fixed unused block issue for dio write in lfs mode. However, f2fs_map_blocks() may break and return smaller extent when last allocated block locates in the end of section, even allocator can allocate contiguous blocks across sections. Actually, for the case that allocator returns a block address which is not contiguous w/ current extent, we can record the block address in iomap->private, in the next round, skip reallocating for the last allocated block, then we can fix unused block issue, meanwhile, also, we can allocates contiguous physical blocks as much as possible for dio write in lfs mode. Testcase: - mkfs.f2fs -f /dev/vdb - mount -o mode=lfs /dev/vdb /mnt/f2fs - dd if=/dev/zero of=/mnt/f2fs/file bs=1M count=3; sync; - dd if=/dev/zero of=/mnt/f2fs/dio bs=2M count=1 oflag=direct; - umount /mnt/f2fs Before: f2fs_map_blocks: dev = (253,16), ino = 4, file offset = 0, start blkaddr = 0x0, len = 0x100, flags = 1, seg_type = 8, may_create = 1, multidevice = 0, flag = 5, err = 0 f2fs_map_blocks: dev = (253,16), ino = 4, file offset = 256, start blkaddr = 0x0, len = 0x100, flags = 1, seg_type = 8, may_create = 1, multidevice = 0, flag = 5, err = 0 f2fs_map_blocks: dev = (253,16), ino = 4, file offset = 512, start blkaddr = 0x0, len = 0x100, flags = 1, seg_type = 8, may_create = 1, multidevice = 0, flag = 5, err = 0 f2fs_map_blocks: dev = (253,16), ino = 5, file offset = 0, start blkaddr = 0x4700, len = 0x100, flags = 3, seg_type = 1, may_create = 1, multidevice = 0, flag = 3, err = 0 f2fs_map_blocks: dev = (253,16), ino = 5, file offset = 256, start blkaddr = 0x4800, len = 0x100, flags = 3, seg_type = 1, may_create = 1, multidevice = 0, flag = 3, err = 0 After: f2fs_map_blocks: dev = (253,16), ino = 4, file offset = 0, start blkaddr = 0x0, len = 0x100, flags = 1, seg_type = 8, may_create = 1, multidevice = 0, flag = 5, err = 0 f2fs_map_blocks: dev = (253,16), ino = 4, file offset = 256, start blkaddr = 0x0, len = 0x100, flags = 1, seg_type = 8, may_create = 1, multidevice = 0, flag = 5, err = 0 f2fs_map_blocks: dev = (253,16), ino = 4, file offset = 512, start blkaddr = 0x0, len = 0x100, flags = 1, seg_type = 8, may_create = 1, multidevice = 0, flag = 5, err = 0 f2fs_map_blocks: dev = (253,16), ino = 5, file offset = 0, start blkaddr = 0x4700, len = 0x200, flags = 3, seg_type = 1, may_create = 1, multidevice = 0, flag = 3, err = 0 Cc: Daejun Park Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 28 ++++++++++++++++++---------- fs/f2fs/f2fs.h | 1 + 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0acc25f996b3..e11dd1431e5b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1550,10 +1550,14 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) unsigned int start_pgofs; int bidx = 0; bool is_hole; + bool lfs_dio_write; if (!maxblocks) return 0; + lfs_dio_write = (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && + map->m_may_create); + if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) goto out; @@ -1600,7 +1604,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) /* use out-place-update for direct IO under LFS mode */ if (map->m_may_create && (is_hole || (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && - !f2fs_is_pinned_file(inode)))) { + !f2fs_is_pinned_file(inode) && map->m_last_pblk != blkaddr))) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; goto sync_out; @@ -1684,10 +1688,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) if (map->m_multidev_dio) map->m_bdev = FDEV(bidx).bdev; + + if (lfs_dio_write) + map->m_last_pblk = NULL_ADDR; } else if (map_is_mergeable(sbi, map, blkaddr, flag, bidx, ofs)) { ofs++; map->m_len++; } else { + if (lfs_dio_write && !f2fs_is_pinned_file(inode)) + map->m_last_pblk = blkaddr; goto sync_out; } @@ -1712,14 +1721,6 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) dn.ofs_in_node = end_offset; } - if (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && - map->m_may_create) { - /* the next block to be allocated may not be contiguous. */ - if (GET_SEGOFF_FROM_SEG0(sbi, blkaddr) % BLKS_PER_SEC(sbi) == - CAP_BLKS_PER_SEC(sbi) - 1) - goto sync_out; - } - if (pgofs >= end) goto sync_out; else if (dn.ofs_in_node < end_offset) @@ -4162,7 +4163,7 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { - struct f2fs_map_blocks map = {}; + struct f2fs_map_blocks map = { NULL, }; pgoff_t next_pgofs = 0; int err; @@ -4171,6 +4172,10 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, map.m_next_pgofs = &next_pgofs; map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode), inode->i_write_hint); + if (flags & IOMAP_WRITE && iomap->private) { + map.m_last_pblk = (unsigned long)iomap->private; + iomap->private = NULL; + } /* * If the blocks being overwritten are already allocated, @@ -4209,6 +4214,9 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, iomap->flags |= IOMAP_F_MERGED; iomap->bdev = map.m_bdev; iomap->addr = F2FS_BLK_TO_BYTES(map.m_pblk); + + if (flags & IOMAP_WRITE && map.m_last_pblk) + iomap->private = (void *)map.m_last_pblk; } else { if (flags & IOMAP_WRITE) return -ENOTBLK; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dfddb66910b3..97c1a2a3fbd7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -732,6 +732,7 @@ struct f2fs_map_blocks { block_t m_lblk; unsigned int m_len; unsigned int m_flags; + unsigned long m_last_pblk; /* last allocated block, only used for DIO in LFS mode */ pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */ pgoff_t *m_next_extent; /* point to next possible extent */ int m_seg_type; From e6d5e789c3b2df219d6f6a6c7fa0539ce8b563c0 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Fri, 18 Jul 2025 15:04:31 -0700 Subject: [PATCH 0977/2411] f2fs: ignore valid ratio when free section count is low Otherwise F2FS will not do GC in background in low free section. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 1a47a7645790..18b9db2e98ba 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -384,14 +384,15 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) } static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, - unsigned int segno, struct victim_sel_policy *p) + unsigned int segno, struct victim_sel_policy *p, + unsigned int valid_thresh_ratio) { if (p->alloc_mode == SSR) return get_seg_entry(sbi, segno)->ckpt_valid_blocks; - if (p->one_time_gc && (get_valid_blocks(sbi, segno, true) >= - CAP_BLKS_PER_SEC(sbi) * sbi->gc_thread->valid_thresh_ratio / - 100)) + if (p->one_time_gc && (valid_thresh_ratio < 100) && + (get_valid_blocks(sbi, segno, true) >= + CAP_BLKS_PER_SEC(sbi) * valid_thresh_ratio / 100)) return UINT_MAX; /* alloc_mode == LFS */ @@ -772,6 +773,7 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, unsigned int secno, last_victim; unsigned int last_segment; unsigned int nsearched; + unsigned int valid_thresh_ratio = 100; bool is_atgc; int ret = 0; @@ -781,7 +783,11 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, p.alloc_mode = alloc_mode; p.age = age; p.age_threshold = sbi->am.age_threshold; - p.one_time_gc = one_time; + if (one_time) { + p.one_time_gc = one_time; + if (has_enough_free_secs(sbi, 0, NR_PERSISTENT_LOG)) + valid_thresh_ratio = sbi->gc_thread->valid_thresh_ratio; + } retry: select_policy(sbi, gc_type, type, &p); @@ -907,7 +913,7 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, goto next; } - cost = get_gc_cost(sbi, segno, &p); + cost = get_gc_cost(sbi, segno, &p, valid_thresh_ratio); if (p.min_cost > cost) { p.min_segno = segno; From 3bf1bab503a58ed7dcfcd399c30ad0b976eb2620 Mon Sep 17 00:00:00 2001 From: "yohan.joung" Date: Tue, 22 Jul 2025 15:02:40 +0900 Subject: [PATCH 0978/2411] f2fs: zone: wait for inflight dio completion, excluding pinned files read using dio read for the pinfile using Direct I/O do not wait for dio write. Signed-off-by: yohan.joung Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 09ba8bef2f63..c1641c693655 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4834,6 +4834,7 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(iocb->ki_filp); const loff_t pos = iocb->ki_pos; ssize_t ret; + bool dio; if (!f2fs_is_compress_backend_ready(inode)) return -EOPNOTSUPP; @@ -4842,12 +4843,15 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, iov_iter_count(to), READ); + dio = f2fs_should_use_dio(inode, iocb, to); + /* In LFS mode, if there is inflight dio, wait for its completion */ if (f2fs_lfs_mode(F2FS_I_SB(inode)) && - get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE)) + get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) && + (!f2fs_is_pinned_file(inode) || !dio)) inode_dio_wait(inode); - if (f2fs_should_use_dio(inode, iocb, to)) { + if (dio) { ret = f2fs_dio_read_iter(iocb, to); } else { ret = filemap_read(iocb, to, 0); From 12d30725bf997ffd5baa849d4b20be86105fc070 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 21 Jul 2025 18:34:49 -0700 Subject: [PATCH 0979/2411] perf pfm: Don't force loading of all PMUs Force loading all PMUs adds significant cost because DRM and other PMUs are loaded, it should also not be required if the pmus__ functions are used. Tested by run perf test, in particular the pfm related tests. Also `perf list` is identical before and after. Before: $ time ./perf test pfm 54: Test libpfm4 support : 54.1: test of individual --pfm-events : Ok 54.2: test groups of --pfm-events : Ok 103: perf all libpfm4 events test : Ok real 0m8.933s user 0m1.824s sys 0m7.122s After: $ time ./perf test pfm 54: Test libpfm4 support : 54.1: test of individual --pfm-events : Ok 54.2: test groups of --pfm-events : Ok 103: perf all libpfm4 events test : Ok real 0m5.259s user 0m1.793s sys 0m3.570s Signed-off-by: Ian Rogers Tested-by: Namhyung Kim Link: https://lore.kernel.org/r/20250722013449.146233-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/pfm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index 0dacc133ed39..e89395814e88 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -47,10 +47,6 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, p_orig = p = strdup(str); if (!p) return -1; - /* - * force loading of the PMU list - */ - perf_pmus__scan(NULL); for (q = p; strsep(&p, ",{}"); q = p) { sep = p ? str + (p - p_orig - 1) : ""; From 62f4512238f5541d864a783cbcd8d95d067a17b3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 18 Jul 2025 20:05:03 -0700 Subject: [PATCH 0980/2411] perf parse-events: Warn if a cpu term is unsupported by a CPU Factor requested CPU warning out of evlist and into evsel. At the end of adding an event, perform the warning check. To avoid repeatedly testing if the cpu_list is empty, add a local variable. ``` $ perf stat -e cpu_atom/cycles,cpu=1/ -a true WARNING: A requested CPU in '1' is not supported by PMU 'cpu_atom' (CPUs 16-27) for event 'cpu_atom/cycles/' Performance counter stats for 'system wide': cpu_atom/cycles/ 0.000781511 seconds time elapsed ``` Reviewed-by: Thomas Falcon Signed-off-by: Ian Rogers Tested-by: James Clark Link: https://lore.kernel.org/r/20250719030517.1990983-2-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/evlist.c | 15 +-------------- tools/perf/util/evsel.c | 24 ++++++++++++++++++++++++ tools/perf/util/evsel.h | 2 ++ tools/perf/util/parse-events.c | 12 ++++++++---- 4 files changed, 35 insertions(+), 18 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 995ad5f654d0..80d8387e6b97 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -2549,20 +2549,7 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis return; evlist__for_each_entry(evlist, pos) { - struct perf_cpu_map *intersect, *to_test, *online = cpu_map__online(); - const struct perf_pmu *pmu = evsel__find_pmu(pos); - - to_test = pmu && pmu->is_core ? pmu->cpus : online; - intersect = perf_cpu_map__intersect(to_test, user_requested_cpus); - if (!perf_cpu_map__equal(intersect, user_requested_cpus)) { - char buf[128]; - - cpu_map__snprint(to_test, buf, sizeof(buf)); - pr_warning("WARNING: A requested CPU in '%s' is not supported by PMU '%s' (CPUs %s) for event '%s'\n", - cpu_list, pmu ? pmu->name : "cpu", buf, evsel__name(pos)); - } - perf_cpu_map__put(intersect); - perf_cpu_map__put(online); + evsel__warn_user_requested_cpus(pos, user_requested_cpus); } perf_cpu_map__put(user_requested_cpus); } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 3896a04d90af..d9b6bf78d67b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -4091,3 +4091,27 @@ void evsel__uniquify_counter(struct evsel *counter) counter->uniquified_name = false; } } + +void evsel__warn_user_requested_cpus(struct evsel *evsel, struct perf_cpu_map *user_requested_cpus) +{ + struct perf_cpu_map *intersect, *online = NULL; + const struct perf_pmu *pmu = evsel__find_pmu(evsel); + + if (pmu && pmu->is_core) { + intersect = perf_cpu_map__intersect(pmu->cpus, user_requested_cpus); + } else { + online = cpu_map__online(); + intersect = perf_cpu_map__intersect(online, user_requested_cpus); + } + if (!perf_cpu_map__equal(intersect, user_requested_cpus)) { + char buf1[128]; + char buf2[128]; + + cpu_map__snprint(user_requested_cpus, buf1, sizeof(buf1)); + cpu_map__snprint(online ?: pmu->cpus, buf2, sizeof(buf2)); + pr_warning("WARNING: A requested CPU in '%s' is not supported by PMU '%s' (CPUs %s) for event '%s'\n", + buf1, pmu ? pmu->name : "cpu", buf2, evsel__name(evsel)); + } + perf_cpu_map__put(intersect); + perf_cpu_map__put(online); +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index b84ee274602d..cefa8e64c0d5 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -574,4 +574,6 @@ void evsel__set_config_if_unset(struct perf_pmu *pmu, struct evsel *evsel, bool evsel__is_offcpu_event(struct evsel *evsel); +void evsel__warn_user_requested_cpus(struct evsel *evsel, struct perf_cpu_map *user_requested_cpus); + #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index a59ae5ca0f89..3fd6cc0c2794 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -252,6 +252,7 @@ __add_event(struct list_head *list, int *idx, struct evsel *evsel; bool is_pmu_core; struct perf_cpu_map *cpus; + bool has_cpu_list = !perf_cpu_map__is_empty(cpu_list); /* * Ensure the first_wildcard_match's PMU matches that of the new event @@ -276,7 +277,7 @@ __add_event(struct list_head *list, int *idx, if (pmu) { is_pmu_core = pmu->is_core; - cpus = perf_cpu_map__get(perf_cpu_map__is_empty(cpu_list) ? pmu->cpus : cpu_list); + cpus = perf_cpu_map__get(has_cpu_list ? cpu_list : pmu->cpus); perf_pmu__warn_invalid_formats(pmu); if (attr->type == PERF_TYPE_RAW || attr->type >= PERF_TYPE_MAX) { perf_pmu__warn_invalid_config(pmu, attr->config, name, @@ -291,10 +292,10 @@ __add_event(struct list_head *list, int *idx, } else { is_pmu_core = (attr->type == PERF_TYPE_HARDWARE || attr->type == PERF_TYPE_HW_CACHE); - if (perf_cpu_map__is_empty(cpu_list)) - cpus = is_pmu_core ? perf_cpu_map__new_online_cpus() : NULL; - else + if (has_cpu_list) cpus = perf_cpu_map__get(cpu_list); + else + cpus = is_pmu_core ? cpu_map__online() : NULL; } if (init_attr) event_attr_init(attr); @@ -326,6 +327,9 @@ __add_event(struct list_head *list, int *idx, if (list) list_add_tail(&evsel->core.node, list); + if (has_cpu_list) + evsel__warn_user_requested_cpus(evsel, cpu_list); + return evsel; } From 848e7a06fea9be249c5b788b3f498196925e4d7e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 18 Jul 2025 20:05:04 -0700 Subject: [PATCH 0981/2411] perf stat: Avoid buffer overflow to the aggregation map CPUs may be created and passed to perf_stat__get_aggr (via config->aggr_get_id), such as in the stat display should_skip_zero_counter. There may be no such aggr_id, for example, if running with a thread. Add a missing bound check and just create IDs for these cases. Reviewed-by: Thomas Falcon Signed-off-by: Ian Rogers Tested-by: James Clark Link: https://lore.kernel.org/r/20250719030517.1990983-3-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 77e2248fa7fc..73b4521ab8af 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1365,7 +1365,7 @@ static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, struct aggr_cpu_id id; /* per-process mode - should use global aggr mode */ - if (cpu.cpu == -1) + if (cpu.cpu == -1 || cpu.cpu >= config->cpus_aggr_map->nr) return get_id(config, cpu); if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu])) From ced4c249569ab25c32b0d36e2ebdb19c74394bdf Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 18 Jul 2025 20:05:05 -0700 Subject: [PATCH 0982/2411] perf stat: Don't size aggregation ids from user_requested_cpus As evsels may have additional CPU terms, the user_requested_cpus may not reflect all the CPUs requested. Use evlist->all_cpus to size the array as that reflects all the CPUs potentially needed by the evlist. Reviewed-by: Thomas Falcon Signed-off-by: Ian Rogers Tested-by: James Clark Link: https://lore.kernel.org/r/20250719030517.1990983-4-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-stat.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 73b4521ab8af..00fce828cd5e 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1513,11 +1513,8 @@ static int perf_stat_init_aggr_mode(void) * taking the highest cpu number to be the size of * the aggregation translate cpumap. */ - if (!perf_cpu_map__is_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus)) - nr = perf_cpu_map__max(evsel_list->core.user_requested_cpus).cpu; - else - nr = 0; - stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr + 1); + nr = perf_cpu_map__max(evsel_list->core.all_cpus).cpu + 1; + stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr); return stat_config.cpus_aggr_map ? 0 : -ENOMEM; } From bd741d80dc65922c7d6e5fd855a934f5d2cf2309 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 18 Jul 2025 20:05:06 -0700 Subject: [PATCH 0983/2411] perf parse-events: Allow the cpu term to be a PMU or CPU range On hybrid systems, events like msr/tsc/ will aggregate counts across all CPUs. Often metrics only want a value like msr/tsc/ for the cores on which the metric is being computed. Listing each CPU with terms cpu=0,cpu=1.. is laborious and would need to be encoded for all variations of a CPU model. Allow the cpumask from a PMU to be an argument to the cpu term. For example in the following the cpumask of the cstate_pkg PMU selects the CPUs to count msr/tsc/ counter upon: ``` $ cat /sys/bus/event_source/devices/cstate_pkg/cpumask 0 $ perf stat -A -e 'msr/tsc,cpu=cstate_pkg/' -a sleep 0.1 Performance counter stats for 'system wide': CPU0 252,621,253 msr/tsc,cpu=cstate_pkg/ 0.101184092 seconds time elapsed ``` As the cpu term is now also allowed to be a string, allow it to encode a range of CPUs (a list can't be supported as ',' is already a special token). The "event qualifiers" section of the `perf list` man page is updated to detail the additional behavior. The man page formatting is tidied up in this section, as it was incorrectly appearing within the "parameterized events" section. Reviewed-by: Thomas Falcon Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250719030517.1990983-5-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/Documentation/perf-list.txt | 25 ++++++++------ tools/perf/util/parse-events.c | 45 +++++++++++++++++++++----- 2 files changed, 53 insertions(+), 17 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index ce0735021473..28215306a78a 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -278,26 +278,33 @@ also be supplied. For example: perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ... -EVENT QUALIFIERS: +EVENT QUALIFIERS +---------------- It is also possible to add extra qualifiers to an event: percore: -Sums up the event counts for all hardware threads in a core, e.g.: - - - perf stat -e cpu/event=0,umask=0x3,percore=1/ + Sums up the event counts for all hardware threads in a core, e.g.: + perf stat -e cpu/event=0,umask=0x3,percore=1/ cpu: -Specifies the CPU to open the event upon. The value may be repeated to -specify opening the event on multiple CPUs: + Specifies a CPU or a range of CPUs to open the event upon. It may + also reference a PMU to copy the CPU mask from. The value may be + repeated to specify opening the event on multiple CPUs. + Example 1: to open the instructions event on CPUs 0 and 2, the + cycles event on CPUs 1 and 2: + perf stat -e instructions/cpu=0,cpu=2/,cycles/cpu=1-2/ -a sleep 1 - perf stat -e instructions/cpu=0,cpu=2/,cycles/cpu=1,cpu=2/ -a sleep 1 - perf stat -e data_read/cpu=0/,data_write/cpu=1/ -a sleep 1 + Example 2: to open the data_read uncore event on CPU 0 and the + data_write uncore event on CPU 1: + perf stat -e data_read/cpu=0/,data_write/cpu=1/ -a sleep 1 + Example 3: to open the software msr/tsc/ event only on the CPUs + matching those from the cpu_core PMU: + perf stat -e msr/tsc,cpu=cpu_core/ -a sleep 1 EVENT GROUPS ------------ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 3fd6cc0c2794..a337e4d22ff2 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -187,10 +187,22 @@ static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head list_for_each_entry(term, &head_terms->terms, list) { if (term->type_term == PARSE_EVENTS__TERM_TYPE_CPU) { - struct perf_cpu_map *cpu = perf_cpu_map__new_int(term->val.num); + struct perf_cpu_map *term_cpus; - perf_cpu_map__merge(&cpus, cpu); - perf_cpu_map__put(cpu); + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { + term_cpus = perf_cpu_map__new_int(term->val.num); + } else { + struct perf_pmu *pmu = perf_pmus__find(term->val.str); + + if (pmu && perf_cpu_map__is_empty(pmu->cpus)) + term_cpus = pmu->is_core ? cpu_map__online() : NULL; + else if (pmu) + term_cpus = perf_cpu_map__get(pmu->cpus); + else + term_cpus = perf_cpu_map__new(term->val.str); + } + perf_cpu_map__merge(&cpus, term_cpus); + perf_cpu_map__put(term_cpus); } } @@ -1048,15 +1060,32 @@ do { \ return -EINVAL; } break; - case PARSE_EVENTS__TERM_TYPE_CPU: - CHECK_TYPE_VAL(NUM); - if (term->val.num >= (u64)cpu__max_present_cpu().cpu) { + case PARSE_EVENTS__TERM_TYPE_CPU: { + struct perf_cpu_map *map; + + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { + if (term->val.num >= (u64)cpu__max_present_cpu().cpu) { + parse_events_error__handle(err, term->err_val, + strdup("too big"), + /*help=*/NULL); + return -EINVAL; + } + break; + } + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_STR); + if (perf_pmus__find(term->val.str) != NULL) + break; + + map = perf_cpu_map__new(term->val.str); + if (!map) { parse_events_error__handle(err, term->err_val, - strdup("too big"), - NULL); + strdup("not a valid PMU or CPU number"), + /*help=*/NULL); return -EINVAL; } + perf_cpu_map__put(map); break; + } case PARSE_EVENTS__TERM_TYPE_DRV_CFG: case PARSE_EVENTS__TERM_TYPE_USER: case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: From 175c852325a1f566426e2470e5d5d67efc7621dd Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 18 Jul 2025 20:05:07 -0700 Subject: [PATCH 0984/2411] perf tool_pmu: Allow num_cpus(_online) to be specific to a cpumask For hybrid metrics it is useful to know the number of p-core or e-core CPUs. If a cpumask is specified for the num_cpus or num_cpus_online tool events, compute the value relative to the given mask rather than for the full system. ``` $ sudo /tmp/perf/perf stat -e 'tool/num_cpus/,tool/num_cpus,cpu=cpu_core/, tool/num_cpus,cpu=cpu_atom/,tool/num_cpus_online/,tool/num_cpus_online, cpu=cpu_core/,tool/num_cpus_online,cpu=cpu_atom/' true Performance counter stats for 'true': 28 tool/num_cpus/ 16 tool/num_cpus,cpu=cpu_core/ 12 tool/num_cpus,cpu=cpu_atom/ 28 tool/num_cpus_online/ 16 tool/num_cpus_online,cpu=cpu_core/ 12 tool/num_cpus_online,cpu=cpu_atom/ 0.000767205 seconds time elapsed 0.000938000 seconds user 0.000000000 seconds sys ``` Reviewed-by: Thomas Falcon Signed-off-by: Ian Rogers Tested-by: James Clark Link: https://lore.kernel.org/r/20250719030517.1990983-6-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/expr.c | 2 +- tools/perf/util/tool_pmu.c | 56 +++++++++++++++++++++++++++++++++----- tools/perf/util/tool_pmu.h | 2 +- 3 files changed, 51 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index ca70a14c7cdf..7fda0ff89c16 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -401,7 +401,7 @@ double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx if (ev != TOOL_PMU__EVENT_NONE) { u64 count; - if (tool_pmu__read_event(ev, &count)) + if (tool_pmu__read_event(ev, /*evsel=*/NULL, &count)) result = count; else pr_err("Failure to read '%s'", literal); diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c index 4630b8cc8e52..7aa4f315b0ac 100644 --- a/tools/perf/util/tool_pmu.c +++ b/tools/perf/util/tool_pmu.c @@ -332,7 +332,7 @@ static bool has_pmem(void) return has_pmem; } -bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result) +bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result) { const struct cpu_topology *topology; @@ -347,18 +347,60 @@ bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result) return true; case TOOL_PMU__EVENT_NUM_CPUS: - *result = cpu__max_present_cpu().cpu; + if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { + /* No evsel to be specific to. */ + *result = cpu__max_present_cpu().cpu; + } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { + /* Evsel just has specific CPUs. */ + *result = perf_cpu_map__nr(evsel->core.cpus); + } else { + /* + * "Any CPU" event that can be scheduled on any CPU in + * the PMU's cpumask. The PMU cpumask should be saved in + * own_cpus. If not present fall back to max. + */ + if (!perf_cpu_map__is_empty(evsel->core.own_cpus)) + *result = perf_cpu_map__nr(evsel->core.own_cpus); + else + *result = cpu__max_present_cpu().cpu; + } return true; case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: { struct perf_cpu_map *online = cpu_map__online(); - if (online) { + if (!online) + return false; + + if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { + /* No evsel to be specific to. */ *result = perf_cpu_map__nr(online); - perf_cpu_map__put(online); - return true; + } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { + /* Evsel just has specific CPUs. */ + struct perf_cpu_map *tmp = + perf_cpu_map__intersect(online, evsel->core.cpus); + + *result = perf_cpu_map__nr(tmp); + perf_cpu_map__put(tmp); + } else { + /* + * "Any CPU" event that can be scheduled on any CPU in + * the PMU's cpumask. The PMU cpumask should be saved in + * own_cpus, if not present then just the online cpu + * mask. + */ + if (!perf_cpu_map__is_empty(evsel->core.own_cpus)) { + struct perf_cpu_map *tmp = + perf_cpu_map__intersect(online, evsel->core.own_cpus); + + *result = perf_cpu_map__nr(tmp); + perf_cpu_map__put(tmp); + } else { + *result = perf_cpu_map__nr(online); + } } - return false; + perf_cpu_map__put(online); + return true; } case TOOL_PMU__EVENT_NUM_DIES: topology = online_topology(); @@ -417,7 +459,7 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); val = 0; if (cpu_map_idx == 0 && thread == 0) { - if (!tool_pmu__read_event(ev, &val)) { + if (!tool_pmu__read_event(ev, evsel, &val)) { count->lost++; val = 0; } diff --git a/tools/perf/util/tool_pmu.h b/tools/perf/util/tool_pmu.h index c6ad1dd90a56..d642e7d73910 100644 --- a/tools/perf/util/tool_pmu.h +++ b/tools/perf/util/tool_pmu.h @@ -34,7 +34,7 @@ enum tool_pmu_event tool_pmu__str_to_event(const char *str); bool tool_pmu__skip_event(const char *name); int tool_pmu__num_skip_events(void); -bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result); +bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result); u64 tool_pmu__cpu_slots_per_cycle(void); From 6d765f5f7ec669f2a16b44afd23cd877efa640de Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 18 Jul 2025 20:05:08 -0700 Subject: [PATCH 0985/2411] libperf evsel: Rename own_cpus to pmu_cpus own_cpus is generally the cpumask from the PMU. Rename to pmu_cpus to try to make this clearer. Variable rename with no other changes. Reviewed-by: Thomas Falcon Signed-off-by: Ian Rogers Tested-by: James Clark Link: https://lore.kernel.org/r/20250719030517.1990983-7-irogers@google.com Signed-off-by: Namhyung Kim --- tools/lib/perf/evlist.c | 8 ++++---- tools/lib/perf/evsel.c | 2 +- tools/lib/perf/include/internal/evsel.h | 2 +- tools/perf/tests/event_update.c | 4 ++-- tools/perf/util/evsel.c | 6 +++--- tools/perf/util/header.c | 4 ++-- tools/perf/util/parse-events.c | 2 +- tools/perf/util/synthetic-events.c | 4 ++-- tools/perf/util/tool_pmu.c | 12 ++++++------ 9 files changed, 22 insertions(+), 22 deletions(-) diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index b1f4c8176b32..9d9dec21f510 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -46,7 +46,7 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, * are valid by intersecting with those of the PMU. */ perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__intersect(evlist->user_requested_cpus, evsel->own_cpus); + evsel->cpus = perf_cpu_map__intersect(evlist->user_requested_cpus, evsel->pmu_cpus); /* * Empty cpu lists would eventually get opened as "any" so remove @@ -61,7 +61,7 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, list_for_each_entry_from(next, &evlist->entries, node) next->idx--; } - } else if (!evsel->own_cpus || evlist->has_user_cpus || + } else if (!evsel->pmu_cpus || evlist->has_user_cpus || (!evsel->requires_cpu && perf_cpu_map__has_any_cpu(evlist->user_requested_cpus))) { /* * The PMU didn't specify a default cpu map, this isn't a core @@ -72,13 +72,13 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, */ perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); - } else if (evsel->cpus != evsel->own_cpus) { + } else if (evsel->cpus != evsel->pmu_cpus) { /* * No user requested cpu map but the PMU cpu map doesn't match * the evsel's. Reset it back to the PMU cpu map. */ perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__get(evsel->own_cpus); + evsel->cpus = perf_cpu_map__get(evsel->pmu_cpus); } if (evsel->system_wide) { diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 2a85e0bfee1e..127abe7df63d 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -46,7 +46,7 @@ void perf_evsel__delete(struct perf_evsel *evsel) assert(evsel->mmap == NULL); /* If not munmap wasn't called. */ assert(evsel->sample_id == NULL); /* If not free_id wasn't called. */ perf_cpu_map__put(evsel->cpus); - perf_cpu_map__put(evsel->own_cpus); + perf_cpu_map__put(evsel->pmu_cpus); perf_thread_map__put(evsel->threads); free(evsel); } diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index ea78defa77d0..b97dc8c92882 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -99,7 +99,7 @@ struct perf_evsel { * cpu map for opening the event on, for example, the first CPU on a * socket for an uncore event. */ - struct perf_cpu_map *own_cpus; + struct perf_cpu_map *pmu_cpus; struct perf_thread_map *threads; struct xyarray *fd; struct xyarray *mmap; diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index 9301fde11366..cb9e6de2e033 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -109,8 +109,8 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes TEST_ASSERT_VAL("failed to synthesize attr update name", !perf_event__synthesize_event_update_name(&tmp.tool, evsel, process_event_name)); - perf_cpu_map__put(evsel->core.own_cpus); - evsel->core.own_cpus = perf_cpu_map__new("1,2,3"); + perf_cpu_map__put(evsel->core.pmu_cpus); + evsel->core.pmu_cpus = perf_cpu_map__new("1,2,3"); TEST_ASSERT_VAL("failed to synthesize attr update cpus", !perf_event__synthesize_event_update_cpus(&tmp.tool, evsel, process_event_cpus)); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index d9b6bf78d67b..ba0c9799928b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -488,7 +488,7 @@ struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig) return NULL; evsel->core.cpus = perf_cpu_map__get(orig->core.cpus); - evsel->core.own_cpus = perf_cpu_map__get(orig->core.own_cpus); + evsel->core.pmu_cpus = perf_cpu_map__get(orig->core.pmu_cpus); evsel->core.threads = perf_thread_map__get(orig->core.threads); evsel->core.nr_members = orig->core.nr_members; evsel->core.system_wide = orig->core.system_wide; @@ -1527,7 +1527,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, attr->exclude_user = 1; } - if (evsel->core.own_cpus || evsel->unit) + if (evsel->core.pmu_cpus || evsel->unit) evsel->core.attr.read_format |= PERF_FORMAT_ID; /* @@ -1680,7 +1680,7 @@ void evsel__exit(struct evsel *evsel) evsel__free_config_terms(evsel); cgroup__put(evsel->cgrp); perf_cpu_map__put(evsel->core.cpus); - perf_cpu_map__put(evsel->core.own_cpus); + perf_cpu_map__put(evsel->core.pmu_cpus); perf_thread_map__put(evsel->core.threads); zfree(&evsel->group_name); zfree(&evsel->name); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 53d54fbda10d..d941d7aa0f49 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -4507,8 +4507,8 @@ int perf_event__process_event_update(const struct perf_tool *tool __maybe_unused case PERF_EVENT_UPDATE__CPUS: map = cpu_map__new_data(&ev->cpus.cpus); if (map) { - perf_cpu_map__put(evsel->core.own_cpus); - evsel->core.own_cpus = map; + perf_cpu_map__put(evsel->core.pmu_cpus); + evsel->core.pmu_cpus = map; } else pr_err("failed to get event_update cpus\n"); default: diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index a337e4d22ff2..d506f9943506 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -320,7 +320,7 @@ __add_event(struct list_head *list, int *idx, (*idx)++; evsel->core.cpus = cpus; - evsel->core.own_cpus = perf_cpu_map__get(cpus); + evsel->core.pmu_cpus = perf_cpu_map__get(cpus); evsel->core.requires_cpu = pmu ? pmu->is_uncore : false; evsel->core.is_pmu_core = is_pmu_core; evsel->pmu = pmu; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 2fc4d0537840..7c00b09e3a93 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -2045,7 +2045,7 @@ int perf_event__synthesize_event_update_name(const struct perf_tool *tool, struc int perf_event__synthesize_event_update_cpus(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process) { - struct synthesize_cpu_map_data syn_data = { .map = evsel->core.own_cpus }; + struct synthesize_cpu_map_data syn_data = { .map = evsel->core.pmu_cpus }; struct perf_record_event_update *ev; int err; @@ -2126,7 +2126,7 @@ int perf_event__synthesize_extra_attr(const struct perf_tool *tool, struct evlis } } - if (evsel->core.own_cpus) { + if (evsel->core.pmu_cpus) { err = perf_event__synthesize_event_update_cpus(tool, evsel, process); if (err < 0) { pr_err("Couldn't synthesize evsel cpus.\n"); diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c index 7aa4f315b0ac..d99e699e646d 100644 --- a/tools/perf/util/tool_pmu.c +++ b/tools/perf/util/tool_pmu.c @@ -357,10 +357,10 @@ bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *resu /* * "Any CPU" event that can be scheduled on any CPU in * the PMU's cpumask. The PMU cpumask should be saved in - * own_cpus. If not present fall back to max. + * pmu_cpus. If not present fall back to max. */ - if (!perf_cpu_map__is_empty(evsel->core.own_cpus)) - *result = perf_cpu_map__nr(evsel->core.own_cpus); + if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) + *result = perf_cpu_map__nr(evsel->core.pmu_cpus); else *result = cpu__max_present_cpu().cpu; } @@ -386,12 +386,12 @@ bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *resu /* * "Any CPU" event that can be scheduled on any CPU in * the PMU's cpumask. The PMU cpumask should be saved in - * own_cpus, if not present then just the online cpu + * pmu_cpus, if not present then just the online cpu * mask. */ - if (!perf_cpu_map__is_empty(evsel->core.own_cpus)) { + if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) { struct perf_cpu_map *tmp = - perf_cpu_map__intersect(online, evsel->core.own_cpus); + perf_cpu_map__intersect(online, evsel->core.pmu_cpus); *result = perf_cpu_map__nr(tmp); perf_cpu_map__put(tmp); From 9a711ef3bd57c124cb7255a4bb8a5166c6b0cef0 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 18 Jul 2025 20:05:09 -0700 Subject: [PATCH 0986/2411] libperf evsel: Factor perf_evsel__exit out of perf_evsel__delete This allows the perf_evsel__exit to be called when the struct perf_evsel is embedded inside another struct, such as struct evsel in perf. Reviewed-by: Thomas Falcon Signed-off-by: Ian Rogers Tested-by: James Clark Link: https://lore.kernel.org/r/20250719030517.1990983-8-irogers@google.com Signed-off-by: Namhyung Kim --- tools/lib/perf/evsel.c | 7 ++++++- tools/lib/perf/include/internal/evsel.h | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 127abe7df63d..13a307fc75ae 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -40,7 +40,7 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr) return evsel; } -void perf_evsel__delete(struct perf_evsel *evsel) +void perf_evsel__exit(struct perf_evsel *evsel) { assert(evsel->fd == NULL); /* If not fds were not closed. */ assert(evsel->mmap == NULL); /* If not munmap wasn't called. */ @@ -48,6 +48,11 @@ void perf_evsel__delete(struct perf_evsel *evsel) perf_cpu_map__put(evsel->cpus); perf_cpu_map__put(evsel->pmu_cpus); perf_thread_map__put(evsel->threads); +} + +void perf_evsel__delete(struct perf_evsel *evsel) +{ + perf_evsel__exit(evsel); free(evsel); } diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index b97dc8c92882..fefe64ba5e26 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -133,6 +133,7 @@ struct perf_evsel { void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx); +void perf_evsel__exit(struct perf_evsel *evsel); int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__close_fd(struct perf_evsel *evsel); void perf_evsel__free_fd(struct perf_evsel *evsel); From f958537f185216b2be028ed793508248503bef83 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 18 Jul 2025 20:05:10 -0700 Subject: [PATCH 0987/2411] perf evsel: Use libperf perf_evsel__exit Avoid the duplicated code and better enable perf_evsel to change. Reviewed-by: Thomas Falcon Signed-off-by: Ian Rogers Tested-by: James Clark Link: https://lore.kernel.org/r/20250719030517.1990983-9-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/evsel.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ba0c9799928b..af2b26c6456a 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1679,9 +1679,7 @@ void evsel__exit(struct evsel *evsel) perf_evsel__free_id(&evsel->core); evsel__free_config_terms(evsel); cgroup__put(evsel->cgrp); - perf_cpu_map__put(evsel->core.cpus); - perf_cpu_map__put(evsel->core.pmu_cpus); - perf_thread_map__put(evsel->core.threads); + perf_evsel__exit(&evsel->core); zfree(&evsel->group_name); zfree(&evsel->name); #ifdef HAVE_LIBTRACEEVENT From 3cb614a261e43a82acfef437c3242820c1444e2d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 18 Jul 2025 20:05:11 -0700 Subject: [PATCH 0988/2411] perf pmus: Factor perf_pmus__find_by_attr out of evsel__find_pmu Allow a PMU to be found by a perf_event_attr, useful when creating evsels. Reviewed-by: Thomas Falcon Signed-off-by: Ian Rogers Tested-by: James Clark Link: https://lore.kernel.org/r/20250719030517.1990983-10-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/pmus.c | 29 +++++++++++++++++------------ tools/perf/util/pmus.h | 2 ++ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c index 409b909cfa02..9137bb9036ed 100644 --- a/tools/perf/util/pmus.c +++ b/tools/perf/util/pmus.c @@ -814,24 +814,18 @@ bool perf_pmus__supports_extended_type(void) return perf_pmus__do_support_extended_type; } -struct perf_pmu *evsel__find_pmu(const struct evsel *evsel) +struct perf_pmu *perf_pmus__find_by_attr(const struct perf_event_attr *attr) { - struct perf_pmu *pmu = evsel->pmu; - bool legacy_core_type; + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); + u32 type = attr->type; + bool legacy_core_type = type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE; - if (pmu) - return pmu; - - pmu = perf_pmus__find_by_type(evsel->core.attr.type); - legacy_core_type = - evsel->core.attr.type == PERF_TYPE_HARDWARE || - evsel->core.attr.type == PERF_TYPE_HW_CACHE; if (!pmu && legacy_core_type && perf_pmus__supports_extended_type()) { - u32 type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT; + type = attr->config >> PERF_PMU_TYPE_SHIFT; pmu = perf_pmus__find_by_type(type); } - if (!pmu && (legacy_core_type || evsel->core.attr.type == PERF_TYPE_RAW)) { + if (!pmu && (legacy_core_type || type == PERF_TYPE_RAW)) { /* * For legacy events, if there was no extended type info then * assume the PMU is the first core PMU. @@ -842,6 +836,17 @@ struct perf_pmu *evsel__find_pmu(const struct evsel *evsel) */ pmu = perf_pmus__find_core_pmu(); } + return pmu; +} + +struct perf_pmu *evsel__find_pmu(const struct evsel *evsel) +{ + struct perf_pmu *pmu = evsel->pmu; + + if (pmu) + return pmu; + + pmu = perf_pmus__find_by_attr(&evsel->core.attr); ((struct evsel *)evsel)->pmu = pmu; return pmu; } diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h index 86842ee5f539..7cb36863711a 100644 --- a/tools/perf/util/pmus.h +++ b/tools/perf/util/pmus.h @@ -5,6 +5,7 @@ #include #include +struct perf_event_attr; struct perf_pmu; struct print_callbacks; @@ -16,6 +17,7 @@ void perf_pmus__destroy(void); struct perf_pmu *perf_pmus__find(const char *name); struct perf_pmu *perf_pmus__find_by_type(unsigned int type); +struct perf_pmu *perf_pmus__find_by_attr(const struct perf_event_attr *attr); struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu); struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu); From cd63c22168257a0b0b59245394915e2488065f7d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 18 Jul 2025 20:05:12 -0700 Subject: [PATCH 0989/2411] perf parse-events: Minor __add_event refactoring Rename cpu_list to user_cpus. If a PMU isn't given, find it early from the perf_event_attr. Make the pmu_cpus more explicitly a copy from the PMU (except when user_cpus are given). Derive the cpus from pmu_cpus and user_cpus as appropriate. Handle strdup errors on name and metric_id. Reviewed-by: Thomas Falcon Signed-off-by: Ian Rogers Tested-by: James Clark Link: https://lore.kernel.org/r/20250719030517.1990983-11-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/parse-events.c | 69 +++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d506f9943506..bd2d831d5123 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -259,12 +259,12 @@ __add_event(struct list_head *list, int *idx, bool init_attr, const char *name, const char *metric_id, struct perf_pmu *pmu, struct list_head *config_terms, struct evsel *first_wildcard_match, - struct perf_cpu_map *cpu_list, u64 alternate_hw_config) + struct perf_cpu_map *user_cpus, u64 alternate_hw_config) { struct evsel *evsel; bool is_pmu_core; - struct perf_cpu_map *cpus; - bool has_cpu_list = !perf_cpu_map__is_empty(cpu_list); + struct perf_cpu_map *cpus, *pmu_cpus; + bool has_user_cpus = !perf_cpu_map__is_empty(user_cpus); /* * Ensure the first_wildcard_match's PMU matches that of the new event @@ -288,8 +288,6 @@ __add_event(struct list_head *list, int *idx, } if (pmu) { - is_pmu_core = pmu->is_core; - cpus = perf_cpu_map__get(has_cpu_list ? cpu_list : pmu->cpus); perf_pmu__warn_invalid_formats(pmu); if (attr->type == PERF_TYPE_RAW || attr->type >= PERF_TYPE_MAX) { perf_pmu__warn_invalid_config(pmu, attr->config, name, @@ -301,48 +299,77 @@ __add_event(struct list_head *list, int *idx, perf_pmu__warn_invalid_config(pmu, attr->config3, name, PERF_PMU_FORMAT_VALUE_CONFIG3, "config3"); } + } + /* + * If a PMU wasn't given, such as for legacy events, find now that + * warnings won't be generated. + */ + if (!pmu) + pmu = perf_pmus__find_by_attr(attr); + + if (pmu) { + is_pmu_core = pmu->is_core; + pmu_cpus = perf_cpu_map__get(pmu->cpus); } else { is_pmu_core = (attr->type == PERF_TYPE_HARDWARE || attr->type == PERF_TYPE_HW_CACHE); - if (has_cpu_list) - cpus = perf_cpu_map__get(cpu_list); - else - cpus = is_pmu_core ? cpu_map__online() : NULL; + pmu_cpus = is_pmu_core ? cpu_map__online() : NULL; } + + if (has_user_cpus) { + cpus = perf_cpu_map__get(user_cpus); + /* Existing behavior that pmu_cpus matches the given user ones. */ + perf_cpu_map__put(pmu_cpus); + pmu_cpus = perf_cpu_map__get(user_cpus); + } else { + cpus = perf_cpu_map__get(pmu_cpus); + } + if (init_attr) event_attr_init(attr); evsel = evsel__new_idx(attr, *idx); - if (!evsel) { - perf_cpu_map__put(cpus); - return NULL; + if (!evsel) + goto out_err; + + if (name) { + evsel->name = strdup(name); + if (!evsel->name) + goto out_err; + } + + if (metric_id) { + evsel->metric_id = strdup(metric_id); + if (!evsel->metric_id) + goto out_err; } (*idx)++; evsel->core.cpus = cpus; - evsel->core.pmu_cpus = perf_cpu_map__get(cpus); + evsel->core.pmu_cpus = pmu_cpus; evsel->core.requires_cpu = pmu ? pmu->is_uncore : false; evsel->core.is_pmu_core = is_pmu_core; evsel->pmu = pmu; evsel->alternate_hw_config = alternate_hw_config; evsel->first_wildcard_match = first_wildcard_match; - if (name) - evsel->name = strdup(name); - - if (metric_id) - evsel->metric_id = strdup(metric_id); - if (config_terms) list_splice_init(config_terms, &evsel->config_terms); if (list) list_add_tail(&evsel->core.node, list); - if (has_cpu_list) - evsel__warn_user_requested_cpus(evsel, cpu_list); + if (has_user_cpus) + evsel__warn_user_requested_cpus(evsel, user_cpus); return evsel; +out_err: + perf_cpu_map__put(cpus); + perf_cpu_map__put(pmu_cpus); + zfree(&evsel->name); + zfree(&evsel->metric_id); + free(evsel); + return NULL; } struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, From e9387ba56918eb3c16aab3e6f0155a7251e339ec Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 18 Jul 2025 20:05:13 -0700 Subject: [PATCH 0990/2411] perf evsel: Add evsel__open_per_cpu_and_thread Add evsel__open_per_cpu_and_thread that combines the operation of evsel__open_per_cpu and evsel__open_per_thread so that an event without the "any" cpumask can be opened with its cpumask and with threads it specifies. Change the implementation of evsel__open_per_cpu and evsel__open_per_thread to use evsel__open_per_cpu_and_thread to make the implementation of those functions clearer. Reviewed-by: Thomas Falcon Signed-off-by: Ian Rogers Tested-by: James Clark Link: https://lore.kernel.org/r/20250719030517.1990983-12-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/evsel.c | 23 +++++++++++++++++++---- tools/perf/util/evsel.h | 3 +++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index af2b26c6456a..ae11df1e7902 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2761,17 +2761,32 @@ void evsel__close(struct evsel *evsel) perf_evsel__free_id(&evsel->core); } -int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx) +int evsel__open_per_cpu_and_thread(struct evsel *evsel, + struct perf_cpu_map *cpus, int cpu_map_idx, + struct perf_thread_map *threads) { if (cpu_map_idx == -1) - return evsel__open_cpu(evsel, cpus, NULL, 0, perf_cpu_map__nr(cpus)); + return evsel__open_cpu(evsel, cpus, threads, 0, perf_cpu_map__nr(cpus)); - return evsel__open_cpu(evsel, cpus, NULL, cpu_map_idx, cpu_map_idx + 1); + return evsel__open_cpu(evsel, cpus, threads, cpu_map_idx, cpu_map_idx + 1); +} + +int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx) +{ + struct perf_thread_map *threads = thread_map__new_by_tid(-1); + int ret = evsel__open_per_cpu_and_thread(evsel, cpus, cpu_map_idx, threads); + + perf_thread_map__put(threads); + return ret; } int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads) { - return evsel__open(evsel, NULL, threads); + struct perf_cpu_map *cpus = perf_cpu_map__new_any_cpu(); + int ret = evsel__open_per_cpu_and_thread(evsel, cpus, -1, threads); + + perf_cpu_map__put(cpus); + return ret; } static int perf_evsel__parse_id_sample(const struct evsel *evsel, diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index cefa8e64c0d5..8e79eb6d41b3 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -351,6 +351,9 @@ int evsel__enable(struct evsel *evsel); int evsel__disable(struct evsel *evsel); int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx); +int evsel__open_per_cpu_and_thread(struct evsel *evsel, + struct perf_cpu_map *cpus, int cpu_map_idx, + struct perf_thread_map *threads); int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx); int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads); int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, From 811082e4b668db9689f8ce927a106036b4ed4e96 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 18 Jul 2025 20:05:14 -0700 Subject: [PATCH 0991/2411] perf parse-events: Support user CPUs mixed with threads/processes Counting events system-wide with a specified CPU prior to this change worked: ``` $ perf stat -e 'msr/tsc/,msr/tsc,cpu=cpu_core/,msr/tsc,cpu=cpu_atom/' -a sleep 1 Performance counter stats for 'system wide': 59,393,419,099 msr/tsc/ 33,927,965,927 msr/tsc,cpu=cpu_core/ 25,465,608,044 msr/tsc,cpu=cpu_atom/ ``` However, when counting with process the counts became system wide: ``` $ perf stat -e 'msr/tsc/,msr/tsc,cpu=cpu_core/,msr/tsc,cpu=cpu_atom/' perf test -F 10 10.1: Basic parsing test : Ok 10.2: Parsing without PMU name : Ok 10.3: Parsing with PMU name : Ok Performance counter stats for 'perf test -F 10': 59,233,549 msr/tsc/ 59,227,556 msr/tsc,cpu=cpu_core/ 59,224,053 msr/tsc,cpu=cpu_atom/ ``` Make the handling of CPU maps with event parsing clearer. When an event is parsed creating an evsel the cpus should be either the PMU's cpumask or user specified CPUs. Update perf_evlist__propagate_maps so that it doesn't clobber the user specified CPUs. Try to make the behavior clearer, firstly fix up missing cpumasks. Next, perform sanity checks and adjustments from the global evlist CPU requests and for the PMU including simplifying to the "any CPU"(-1) value. Finally remove the event if the cpumask is empty. So that events are opened with a CPU and a thread change stat's create_perf_stat_counter to give both. With the change things are fixed: ``` $ perf stat --no-scale -e 'msr/tsc/,msr/tsc,cpu=cpu_core/,msr/tsc,cpu=cpu_atom/' perf test -F 10 10.1: Basic parsing test : Ok 10.2: Parsing without PMU name : Ok 10.3: Parsing with PMU name : Ok Performance counter stats for 'perf test -F 10': 63,704,975 msr/tsc/ 47,060,704 msr/tsc,cpu=cpu_core/ (4.62%) 16,640,591 msr/tsc,cpu=cpu_atom/ (2.18%) ``` However, note the "--no-scale" option is used. This is necessary as the running time for the event on the counter isn't the same as the enabled time because the thread doesn't necessarily run on the CPUs specified for the counter. All counter values are scaled with: scaled_value = value * time_enabled / time_running and so without --no-scale the scaled_value becomes very large. This problem already exists on hybrid systems for the same reason. Here are 2 runs of the same code with an instructions event that counts the same on both types of core, there is no real multiplexing happening on the event: ``` $ perf stat -e instructions perf test -F 10 ... Performance counter stats for 'perf test -F 10': 87,896,447 cpu_atom/instructions/ (14.37%) 98,171,964 cpu_core/instructions/ (85.63%) ... $ perf stat --no-scale -e instructions perf test -F 10 ... Performance counter stats for 'perf test -F 10': 13,069,890 cpu_atom/instructions/ (19.32%) 83,460,274 cpu_core/instructions/ (80.68%) ... ``` The scaling has inflated per-PMU instruction counts and the overall count by 2x. To fix this the kernel needs changing when a task+CPU event (or just task event on hybrid) is scheduled out. A fix could be that the state isn't inactive but off for such events, so that time_enabled counts don't accumulate on them. Reviewed-by: Thomas Falcon Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250719030517.1990983-13-irogers@google.com Signed-off-by: Namhyung Kim --- tools/lib/perf/evlist.c | 123 ++++++++++++++++++++++----------- tools/perf/util/parse-events.c | 10 ++- tools/perf/util/stat.c | 6 +- 3 files changed, 89 insertions(+), 50 deletions(-) diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 9d9dec21f510..3ed023f4b190 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -36,51 +36,90 @@ void perf_evlist__init(struct perf_evlist *evlist) static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, struct perf_evsel *evsel) { - if (evsel->system_wide) { - /* System wide: set the cpu map of the evsel to all online CPUs. */ - perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__new_online_cpus(); - } else if (evlist->has_user_cpus && evsel->is_pmu_core) { - /* - * User requested CPUs on a core PMU, ensure the requested CPUs - * are valid by intersecting with those of the PMU. - */ - perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__intersect(evlist->user_requested_cpus, evsel->pmu_cpus); - - /* - * Empty cpu lists would eventually get opened as "any" so remove - * genuinely empty ones before they're opened in the wrong place. - */ - if (perf_cpu_map__is_empty(evsel->cpus)) { - struct perf_evsel *next = perf_evlist__next(evlist, evsel); - - perf_evlist__remove(evlist, evsel); - /* Keep idx contiguous */ - if (next) - list_for_each_entry_from(next, &evlist->entries, node) - next->idx--; + if (perf_cpu_map__is_empty(evsel->cpus)) { + if (perf_cpu_map__is_empty(evsel->pmu_cpus)) { + /* + * Assume the unset PMU cpus were for a system-wide + * event, like a software or tracepoint. + */ + evsel->pmu_cpus = perf_cpu_map__new_online_cpus(); } - } else if (!evsel->pmu_cpus || evlist->has_user_cpus || - (!evsel->requires_cpu && perf_cpu_map__has_any_cpu(evlist->user_requested_cpus))) { - /* - * The PMU didn't specify a default cpu map, this isn't a core - * event and the user requested CPUs or the evlist user - * requested CPUs have the "any CPU" (aka dummy) CPU value. In - * which case use the user requested CPUs rather than the PMU - * ones. - */ - perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); - } else if (evsel->cpus != evsel->pmu_cpus) { - /* - * No user requested cpu map but the PMU cpu map doesn't match - * the evsel's. Reset it back to the PMU cpu map. - */ + if (evlist->has_user_cpus && !evsel->system_wide) { + /* + * Use the user CPUs unless the evsel is set to be + * system wide, such as the dummy event. + */ + evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); + } else { + /* + * System wide and other modes, assume the cpu map + * should be set to all PMU CPUs. + */ + evsel->cpus = perf_cpu_map__get(evsel->pmu_cpus); + } + } + /* + * Avoid "any CPU"(-1) for uncore and PMUs that require a CPU, even if + * requested. + */ + if (evsel->requires_cpu && perf_cpu_map__has_any_cpu(evsel->cpus)) { perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__get(evsel->pmu_cpus); } + /* + * Globally requested CPUs replace user requested unless the evsel is + * set to be system wide. + */ + if (evlist->has_user_cpus && !evsel->system_wide) { + assert(!perf_cpu_map__has_any_cpu(evlist->user_requested_cpus)); + if (!perf_cpu_map__equal(evsel->cpus, evlist->user_requested_cpus)) { + perf_cpu_map__put(evsel->cpus); + evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); + } + } + + /* Ensure cpus only references valid PMU CPUs. */ + if (!perf_cpu_map__has_any_cpu(evsel->cpus) && + !perf_cpu_map__is_subset(evsel->pmu_cpus, evsel->cpus)) { + struct perf_cpu_map *tmp = perf_cpu_map__intersect(evsel->pmu_cpus, evsel->cpus); + + perf_cpu_map__put(evsel->cpus); + evsel->cpus = tmp; + } + + /* + * Was event requested on all the PMU's CPUs but the user requested is + * any CPU (-1)? If so switch to using any CPU (-1) to reduce the number + * of events. + */ + if (!evsel->system_wide && + !evsel->requires_cpu && + perf_cpu_map__equal(evsel->cpus, evsel->pmu_cpus) && + perf_cpu_map__has_any_cpu(evlist->user_requested_cpus)) { + perf_cpu_map__put(evsel->cpus); + evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); + } + + /* Sanity check assert before the evsel is potentially removed. */ + assert(!evsel->requires_cpu || !perf_cpu_map__has_any_cpu(evsel->cpus)); + + /* + * Empty cpu lists would eventually get opened as "any" so remove + * genuinely empty ones before they're opened in the wrong place. + */ + if (perf_cpu_map__is_empty(evsel->cpus)) { + struct perf_evsel *next = perf_evlist__next(evlist, evsel); + + perf_evlist__remove(evlist, evsel); + /* Keep idx contiguous */ + if (next) + list_for_each_entry_from(next, &evlist->entries, node) + next->idx--; + + return; + } + if (evsel->system_wide) { perf_thread_map__put(evsel->threads); evsel->threads = perf_thread_map__new_dummy(); @@ -98,6 +137,10 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist) evlist->needs_map_propagation = true; + /* Clear the all_cpus set which will be merged into during propagation. */ + perf_cpu_map__put(evlist->all_cpus); + evlist->all_cpus = NULL; + list_for_each_entry_safe(evsel, n, &evlist->entries, node) __perf_evlist__propagate_maps(evlist, evsel); } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index bd2d831d5123..fe2073c6b549 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -310,20 +310,18 @@ __add_event(struct list_head *list, int *idx, if (pmu) { is_pmu_core = pmu->is_core; pmu_cpus = perf_cpu_map__get(pmu->cpus); + if (perf_cpu_map__is_empty(pmu_cpus)) + pmu_cpus = cpu_map__online(); } else { is_pmu_core = (attr->type == PERF_TYPE_HARDWARE || attr->type == PERF_TYPE_HW_CACHE); pmu_cpus = is_pmu_core ? cpu_map__online() : NULL; } - if (has_user_cpus) { + if (has_user_cpus) cpus = perf_cpu_map__get(user_cpus); - /* Existing behavior that pmu_cpus matches the given user ones. */ - perf_cpu_map__put(pmu_cpus); - pmu_cpus = perf_cpu_map__get(user_cpus); - } else { + else cpus = perf_cpu_map__get(pmu_cpus); - } if (init_attr) event_attr_init(attr); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index b0205e99a4c9..50b1a92d16df 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -769,8 +769,6 @@ int create_perf_stat_counter(struct evsel *evsel, attr->enable_on_exec = 1; } - if (target__has_cpu(target) && !target__has_per_thread(target)) - return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu_map_idx); - - return evsel__open_per_thread(evsel, evsel->core.threads); + return evsel__open_per_cpu_and_thread(evsel, evsel__cpus(evsel), cpu_map_idx, + evsel->core.threads); } From 5b546de9cc177936a3ed07d7d46ef072db4fdbab Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 18 Jul 2025 20:05:15 -0700 Subject: [PATCH 0992/2411] perf topdown: Use attribute to see an event is a topdown metic or slots The string comparisons were overly broad and could fire for the incorrect PMU and events. Switch to using the config in the attribute then add a perf test to confirm the attribute config values match those of parsed events of that name and don't match others. This exposed matches for slots events that shouldn't have matched as the slots fixed counter event, such as topdown.slots_p. Fixes: fbc798316bef ("perf x86/topdown: Refine helper arch_is_topdown_metrics()") Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250719030517.1990983-14-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/arch/x86/include/arch-tests.h | 4 ++ tools/perf/arch/x86/tests/Build | 1 + tools/perf/arch/x86/tests/arch-tests.c | 1 + tools/perf/arch/x86/tests/topdown.c | 76 ++++++++++++++++++++++++ tools/perf/arch/x86/util/evsel.c | 46 ++++---------- tools/perf/arch/x86/util/topdown.c | 31 ++++------ tools/perf/arch/x86/util/topdown.h | 4 ++ 7 files changed, 108 insertions(+), 55 deletions(-) create mode 100644 tools/perf/arch/x86/tests/topdown.c diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index 4fd425157d7d..8713e9122d4c 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -2,6 +2,8 @@ #ifndef ARCH_TESTS_H #define ARCH_TESTS_H +#include "tests/tests.h" + struct test_suite; /* Tests */ @@ -17,6 +19,8 @@ int test__amd_ibs_via_core_pmu(struct test_suite *test, int subtest); int test__amd_ibs_period(struct test_suite *test, int subtest); int test__hybrid(struct test_suite *test, int subtest); +DECLARE_SUITE(x86_topdown); + extern struct test_suite *arch_tests[]; #endif diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 01d5527f38c7..311b6b53d3d8 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -11,6 +11,7 @@ endif perf-test-$(CONFIG_X86_64) += bp-modify.o perf-test-y += amd-ibs-via-core-pmu.o perf-test-y += amd-ibs-period.o +perf-test-y += topdown.o ifdef SHELLCHECK SHELL_TESTS := gen-insn-x86-dat.sh diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index bfee2432515b..29ec1861ccef 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -53,5 +53,6 @@ struct test_suite *arch_tests[] = { &suite__amd_ibs_via_core_pmu, &suite__amd_ibs_period, &suite__hybrid, + &suite__x86_topdown, NULL, }; diff --git a/tools/perf/arch/x86/tests/topdown.c b/tools/perf/arch/x86/tests/topdown.c new file mode 100644 index 000000000000..8d0ea7a4bbc1 --- /dev/null +++ b/tools/perf/arch/x86/tests/topdown.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "arch-tests.h" +#include "../util/topdown.h" +#include "evlist.h" +#include "parse-events.h" +#include "pmu.h" +#include "pmus.h" + +static int event_cb(void *state, struct pmu_event_info *info) +{ + char buf[256]; + struct parse_events_error parse_err; + int *ret = state, err; + struct evlist *evlist = evlist__new(); + struct evsel *evsel; + + if (!evlist) + return -ENOMEM; + + parse_events_error__init(&parse_err); + snprintf(buf, sizeof(buf), "%s/%s/", info->pmu->name, info->name); + err = parse_events(evlist, buf, &parse_err); + if (err) { + parse_events_error__print(&parse_err, buf); + *ret = TEST_FAIL; + } + parse_events_error__exit(&parse_err); + evlist__for_each_entry(evlist, evsel) { + bool fail = false; + bool p_core_pmu = evsel->pmu->type == PERF_TYPE_RAW; + const char *name = evsel__name(evsel); + + if (strcasestr(name, "uops_retired.slots") || + strcasestr(name, "topdown.backend_bound_slots") || + strcasestr(name, "topdown.br_mispredict_slots") || + strcasestr(name, "topdown.memory_bound_slots") || + strcasestr(name, "topdown.bad_spec_slots") || + strcasestr(name, "topdown.slots_p")) { + if (arch_is_topdown_slots(evsel) || arch_is_topdown_metrics(evsel)) + fail = true; + } else if (strcasestr(name, "slots")) { + if (arch_is_topdown_slots(evsel) != p_core_pmu || + arch_is_topdown_metrics(evsel)) + fail = true; + } else if (strcasestr(name, "topdown")) { + if (arch_is_topdown_slots(evsel) || + arch_is_topdown_metrics(evsel) != p_core_pmu) + fail = true; + } else if (arch_is_topdown_slots(evsel) || arch_is_topdown_metrics(evsel)) { + fail = true; + } + if (fail) { + pr_debug("Broken topdown information for '%s'\n", evsel__name(evsel)); + *ret = TEST_FAIL; + } + } + evlist__delete(evlist); + return 0; +} + +static int test__x86_topdown(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +{ + int ret = TEST_OK; + struct perf_pmu *pmu = NULL; + + if (!topdown_sys_has_perf_metrics()) + return TEST_OK; + + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + if (perf_pmu__for_each_event(pmu, /*skip_duplicate_pmus=*/false, &ret, event_cb)) + break; + } + return ret; +} + +DEFINE_SUITE("x86 topdown", x86_topdown); diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c index 3dd29ba2c23b..9bc80fff3aa0 100644 --- a/tools/perf/arch/x86/util/evsel.c +++ b/tools/perf/arch/x86/util/evsel.c @@ -23,47 +23,25 @@ void arch_evsel__set_sample_weight(struct evsel *evsel) bool evsel__sys_has_perf_metrics(const struct evsel *evsel) { struct perf_pmu *pmu; - u32 type = evsel->core.attr.type; + + if (!topdown_sys_has_perf_metrics()) + return false; /* - * The PERF_TYPE_RAW type is the core PMU type, e.g., "cpu" PMU - * on a non-hybrid machine, "cpu_core" PMU on a hybrid machine. - * The slots event is only available for the core PMU, which - * supports the perf metrics feature. - * Checking both the PERF_TYPE_RAW type and the slots event - * should be good enough to detect the perf metrics feature. + * The PERF_TYPE_RAW type is the core PMU type, e.g., "cpu" PMU on a + * non-hybrid machine, "cpu_core" PMU on a hybrid machine. The + * topdown_sys_has_perf_metrics checks the slots event is only available + * for the core PMU, which supports the perf metrics feature. Checking + * both the PERF_TYPE_RAW type and the slots event should be good enough + * to detect the perf metrics feature. */ -again: - switch (type) { - case PERF_TYPE_HARDWARE: - case PERF_TYPE_HW_CACHE: - type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT; - if (type) - goto again; - break; - case PERF_TYPE_RAW: - break; - default: - return false; - } - - pmu = evsel->pmu; - if (pmu && perf_pmu__is_fake(pmu)) - pmu = NULL; - - if (!pmu) { - while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { - if (pmu->type == PERF_TYPE_RAW) - break; - } - } - return pmu && perf_pmu__have_event(pmu, "slots"); + pmu = evsel__find_pmu(evsel); + return pmu && pmu->type == PERF_TYPE_RAW; } bool arch_evsel__must_be_in_group(const struct evsel *evsel) { - if (!evsel__sys_has_perf_metrics(evsel) || !evsel->name || - strcasestr(evsel->name, "uops_retired.slots")) + if (!evsel__sys_has_perf_metrics(evsel)) return false; return arch_is_topdown_metrics(evsel) || arch_is_topdown_slots(evsel); diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c index d1c654839049..66b231fbf52e 100644 --- a/tools/perf/arch/x86/util/topdown.c +++ b/tools/perf/arch/x86/util/topdown.c @@ -1,6 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#include "api/fs/fs.h" -#include "util/evsel.h" #include "util/evlist.h" #include "util/pmu.h" #include "util/pmus.h" @@ -8,6 +6,9 @@ #include "topdown.h" #include "evsel.h" +// cmask=0, inv=0, pc=0, edge=0, umask=4, event=0 +#define TOPDOWN_SLOTS 0x0400 + /* Check whether there is a PMU which supports the perf metrics. */ bool topdown_sys_has_perf_metrics(void) { @@ -32,31 +33,19 @@ bool topdown_sys_has_perf_metrics(void) return has_perf_metrics; } -#define TOPDOWN_SLOTS 0x0400 bool arch_is_topdown_slots(const struct evsel *evsel) { - if (evsel->core.attr.config == TOPDOWN_SLOTS) - return true; - - return false; + return evsel->core.attr.type == PERF_TYPE_RAW && + evsel->core.attr.config == TOPDOWN_SLOTS && + evsel->core.attr.config1 == 0; } bool arch_is_topdown_metrics(const struct evsel *evsel) { - int config = evsel->core.attr.config; - const char *name_from_config; - struct perf_pmu *pmu; - - /* All topdown events have an event code of 0. */ - if ((config & 0xFF) != 0) - return false; - - pmu = evsel__find_pmu(evsel); - if (!pmu || !pmu->is_core) - return false; - - name_from_config = perf_pmu__name_from_config(pmu, config); - return name_from_config && strcasestr(name_from_config, "topdown"); + // cmask=0, inv=0, pc=0, edge=0, umask=0x80-0x87, event=0 + return evsel->core.attr.type == PERF_TYPE_RAW && + (evsel->core.attr.config & 0xFFFFF8FF) == 0x8000 && + evsel->core.attr.config1 == 0; } /* diff --git a/tools/perf/arch/x86/util/topdown.h b/tools/perf/arch/x86/util/topdown.h index 1bae9b1822d7..2349536cf882 100644 --- a/tools/perf/arch/x86/util/topdown.h +++ b/tools/perf/arch/x86/util/topdown.h @@ -2,6 +2,10 @@ #ifndef _TOPDOWN_H #define _TOPDOWN_H 1 +#include + +struct evsel; + bool topdown_sys_has_perf_metrics(void); bool arch_is_topdown_slots(const struct evsel *evsel); bool arch_is_topdown_metrics(const struct evsel *evsel); From 8dcd27b1b8661f64e220bc26a499865261d5d0f1 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 18 Jul 2025 20:05:16 -0700 Subject: [PATCH 0993/2411] perf parse-events: Fix missing slots for Intel topdown metric events Topdown metric events require grouping with a slots event. In perf metrics this is currently achieved by metrics adding an unnecessary "0 * tma_info_thread_slots". New TMA metrics trigger optimizations of the metric expression that removes the event and breaks the metric due to the missing but required event. Add a pass immediately before sorting and fixing parsed events, that insert a slots event if one is missing. Update test expectations to match this. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250719030517.1990983-15-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/arch/x86/util/evlist.c | 24 ++++++++++++++++++++++++ tools/perf/arch/x86/util/topdown.c | 28 ++++++++++++++++++++++++++++ tools/perf/arch/x86/util/topdown.h | 2 ++ tools/perf/tests/parse-events.c | 24 ++++++++++++------------ tools/perf/util/evlist.h | 1 + tools/perf/util/parse-events.c | 10 ++++++++++ 6 files changed, 77 insertions(+), 12 deletions(-) diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index 1969758cc8c1..75e9d00a1494 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -81,3 +81,27 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs) /* Default ordering by insertion index. */ return lhs->core.idx - rhs->core.idx; } + +int arch_evlist__add_required_events(struct list_head *list) +{ + struct evsel *pos, *metric_event = NULL; + int idx = 0; + + if (!topdown_sys_has_perf_metrics()) + return 0; + + list_for_each_entry(pos, list, core.node) { + if (arch_is_topdown_slots(pos)) { + /* Slots event already present, nothing to do. */ + return 0; + } + if (metric_event == NULL && arch_is_topdown_metrics(pos)) + metric_event = pos; + idx++; + } + if (metric_event == NULL) { + /* No topdown metric events, nothing to do. */ + return 0; + } + return topdown_insert_slots_event(list, idx + 1, metric_event); +} diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c index 66b231fbf52e..0d01b662627a 100644 --- a/tools/perf/arch/x86/util/topdown.c +++ b/tools/perf/arch/x86/util/topdown.c @@ -77,3 +77,31 @@ bool arch_topdown_sample_read(struct evsel *leader) return false; } + +/* + * Make a copy of the topdown metric event metric_event with the given index but + * change its configuration to be a topdown slots event. Copying from + * metric_event ensures modifiers are the same. + */ +int topdown_insert_slots_event(struct list_head *list, int idx, struct evsel *metric_event) +{ + struct evsel *evsel = evsel__new_idx(&metric_event->core.attr, idx); + + if (!evsel) + return -ENOMEM; + + evsel->core.attr.config = TOPDOWN_SLOTS; + evsel->core.cpus = perf_cpu_map__get(metric_event->core.cpus); + evsel->core.pmu_cpus = perf_cpu_map__get(metric_event->core.pmu_cpus); + evsel->core.is_pmu_core = true; + evsel->pmu = metric_event->pmu; + evsel->name = strdup("slots"); + evsel->precise_max = metric_event->precise_max; + evsel->sample_read = metric_event->sample_read; + evsel->weak_group = metric_event->weak_group; + evsel->bpf_counter = metric_event->bpf_counter; + evsel->retire_lat = metric_event->retire_lat; + evsel__set_leader(evsel, evsel__leader(metric_event)); + list_add_tail(&evsel->core.node, list); + return 0; +} diff --git a/tools/perf/arch/x86/util/topdown.h b/tools/perf/arch/x86/util/topdown.h index 2349536cf882..69035565e649 100644 --- a/tools/perf/arch/x86/util/topdown.h +++ b/tools/perf/arch/x86/util/topdown.h @@ -5,9 +5,11 @@ #include struct evsel; +struct list_head; bool topdown_sys_has_perf_metrics(void); bool arch_is_topdown_slots(const struct evsel *evsel); bool arch_is_topdown_metrics(const struct evsel *evsel); +int topdown_insert_slots_event(struct list_head *list, int idx, struct evsel *metric_event); #endif diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 5ec2e5607987..bb8004397650 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -719,20 +719,20 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist) static int test__checkevent_pmu_events(struct evlist *evlist) { - struct evsel *evsel = evlist__first(evlist); + struct evsel *evsel; - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type || - strcmp(evsel->pmu->name, "cpu")); - TEST_ASSERT_VAL("wrong exclude_user", - !evsel->core.attr.exclude_user); - TEST_ASSERT_VAL("wrong exclude_kernel", - evsel->core.attr.exclude_kernel); - TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); - TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); + TEST_ASSERT_VAL("wrong number of entries", 1 <= evlist->core.nr_entries); + evlist__for_each_entry(evlist, evsel) { + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type || + strcmp(evsel->pmu->name, "cpu")); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); + TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); + } return TEST_OK; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index fac1a01ba13f..1472d2179be1 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -111,6 +111,7 @@ void evlist__add(struct evlist *evlist, struct evsel *entry); void evlist__remove(struct evlist *evlist, struct evsel *evsel); int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs); +int arch_evlist__add_required_events(struct list_head *list); int evlist__add_dummy(struct evlist *evlist); struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index fe2073c6b549..01fa8c80998b 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2190,6 +2190,11 @@ static int evlist__cmp(void *_fg_idx, const struct list_head *l, const struct li return arch_evlist__cmp(lhs, rhs); } +int __weak arch_evlist__add_required_events(struct list_head *list __always_unused) +{ + return 0; +} + static int parse_events__sort_events_and_fix_groups(struct list_head *list) { int idx = 0, force_grouped_idx = -1; @@ -2201,6 +2206,11 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list) struct evsel *force_grouped_leader = NULL; bool last_event_was_forced_leader = false; + /* On x86 topdown metrics events require a slots event. */ + ret = arch_evlist__add_required_events(list); + if (ret) + return ret; + /* * Compute index to insert ungrouped events at. Place them where the * first ungrouped event appears. From fcc7cc31239d0fbf0ebf25e65f7f572caed40206 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 18 Jul 2025 20:05:17 -0700 Subject: [PATCH 0994/2411] perf metricgroups: Add NO_THRESHOLD_AND_NMI constraint Thresholds can increase the number of counters a metric needs. The NMI watchdog can take away a counter (hopefully the buddy watchdog will become the default and this will no longer be true). Add a new constraint for the case that a metric and its thresholds would fit in counters but only if the NMI watchdog isn't enabled. Either the threshold or the NMI watchdog should be disabled to make the metric fit. Wire this up into the metric__group_events logic. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250719030517.1990983-16-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/pmu-events/jevents.py | 1 + tools/perf/pmu-events/pmu-events.h | 14 ++++++++++---- tools/perf/util/metricgroup.c | 16 ++++++++++++---- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index e821155151ec..0abd3cfb15ea 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -235,6 +235,7 @@ class JsonEvent: 'NO_GROUP_EVENTS_NMI': '2', 'NO_NMI_WATCHDOG': '2', 'NO_GROUP_EVENTS_SMT': '3', + 'NO_THRESHOLD_AND_NMI': '4', } return metric_constraint_to_enum[metric_constraint] diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index a523936846e0..ea022ea55087 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -25,15 +25,21 @@ enum metric_event_groups { */ MetricNoGroupEvents = 1, /** - * @MetricNoGroupEventsNmi: Don't group events for the metric if the NMI - * watchdog is enabled. + * @MetricNoGroupEventsNmi: + * Don't group events for the metric if the NMI watchdog is enabled. */ MetricNoGroupEventsNmi = 2, /** - * @MetricNoGroupEventsSmt: Don't group events for the metric if SMT is - * enabled. + * @MetricNoGroupEventsSmt: + * Don't group events for the metric if SMT is enabled. */ MetricNoGroupEventsSmt = 3, + /** + * @MetricNoGroupEventsThresholdAndNmi: + * Don't group events for the metric thresholds and if the NMI watchdog + * is enabled. + */ + MetricNoGroupEventsThresholdAndNmi = 4, }; /* * Describe each PMU event. Each CPU has a table of PMU events. diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 3cc6c47402bd..595b83142d2c 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -179,7 +179,7 @@ static void metric__watchdog_constraint_hint(const char *name, bool foot) " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); } -static bool metric__group_events(const struct pmu_metric *pm) +static bool metric__group_events(const struct pmu_metric *pm, bool metric_no_threshold) { switch (pm->event_grouping) { case MetricNoGroupEvents: @@ -191,6 +191,13 @@ static bool metric__group_events(const struct pmu_metric *pm) return false; case MetricNoGroupEventsSmt: return !smt_on(); + case MetricNoGroupEventsThresholdAndNmi: + if (metric_no_threshold) + return true; + if (!sysctl__nmi_watchdog_enabled()) + return true; + metric__watchdog_constraint_hint(pm->metric_name, /*foot=*/false); + return false; case MetricGroupEvents: default: return true; @@ -212,6 +219,7 @@ static void metric__free(struct metric *m) static struct metric *metric__new(const struct pmu_metric *pm, const char *modifier, bool metric_no_group, + bool metric_no_threshold, int runtime, const char *user_requested_cpu_list, bool system_wide) @@ -246,7 +254,7 @@ static struct metric *metric__new(const struct pmu_metric *pm, } m->pctx->sctx.runtime = runtime; m->pctx->sctx.system_wide = system_wide; - m->group_events = !metric_no_group && metric__group_events(pm); + m->group_events = !metric_no_group && metric__group_events(pm, metric_no_threshold); m->metric_refs = NULL; m->evlist = NULL; @@ -831,8 +839,8 @@ static int __add_metric(struct list_head *metric_list, * This metric is the root of a tree and may reference other * metrics that are added recursively. */ - root_metric = metric__new(pm, modifier, metric_no_group, runtime, - user_requested_cpu_list, system_wide); + root_metric = metric__new(pm, modifier, metric_no_group, metric_no_threshold, + runtime, user_requested_cpu_list, system_wide); if (!root_metric) return -ENOMEM; From f3982385bc507991f1ed732c3c7907bff703f4d4 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:41 -0700 Subject: [PATCH 0995/2411] perf build-id: Reduce size of "size" variable Later clean up of the dso_id to include a build_id will suffer from alignment and size issues. The size can only hold up to a value of BUILD_ID_SIZE (20) and the mmap2 event uses a byte for the value. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-2-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/build-id.h | 2 +- tools/perf/util/synthetic-events.c | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index a212497bfdb0..e3e0a446ff0c 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -13,7 +13,7 @@ struct build_id { u8 data[BUILD_ID_SIZE]; - size_t size; + u8 size; }; struct dso; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 7c00b09e3a93..d3c454174602 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -2248,7 +2248,9 @@ int perf_event__synthesize_build_id(const struct perf_tool *tool, memset(&ev, 0, len); - ev.build_id.size = min(bid->size, sizeof(ev.build_id.build_id)); + ev.build_id.size = bid->size; + if (ev.build_id.size > sizeof(ev.build_id.build_id)) + ev.build_id.size = sizeof(ev.build_id.build_id); memcpy(ev.build_id.build_id, bid->data, ev.build_id.size); ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID; ev.build_id.header.misc = misc | PERF_RECORD_MISC_BUILD_ID_SIZE; @@ -2308,7 +2310,9 @@ int perf_event__synthesize_mmap2_build_id(const struct perf_tool *tool, ev.mmap2.len = len; ev.mmap2.pgoff = pgoff; - ev.mmap2.build_id_size = min(bid->size, sizeof(ev.mmap2.build_id)); + ev.mmap2.build_id_size = bid->size; + if (ev.mmap2.build_id_size > sizeof(ev.mmap2.build_id)) + ev.build_id.size = sizeof(ev.mmap2.build_id); memcpy(ev.mmap2.build_id, bid->data, ev.mmap2.build_id_size); ev.mmap2.prot = prot; From 5a2ceebd8175874ae0e91a304ad6600d82806973 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:42 -0700 Subject: [PATCH 0996/2411] perf build-id: Truncate to avoid overflowing the build_id data Warning when the build_id data would be overflowed would lead to memory corruption, switch to truncation. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-3-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/build-id.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index e763e8d99a43..5bc2040bdd0d 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -951,7 +951,10 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits) void build_id__init(struct build_id *bid, const u8 *data, size_t size) { - WARN_ON(size > BUILD_ID_SIZE); + if (size > BUILD_ID_SIZE) { + pr_debug("Truncating build_id size from %zd\n", size); + size = BUILD_ID_SIZE; + } memcpy(bid->data, data, size); bid->size = size; } From a103d2dede5683dabbac2c3374bc24b6a9434478 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 11 Jun 2025 12:43:44 +0200 Subject: [PATCH 0997/2411] PCI: controller: Use dev_fwnode() instead of of_fwnode_handle() All irq_domain functions now accept fwnode instead of of_node. But many PCI controllers still extract dev to of_node and then of_node to fwnode. Instead, clean this up and simply use the dev_fwnode() helper to extract fwnode directly from dev. Internally, it still does dev => of_node => fwnode steps, but it's now hidden from the users. In the case of altera, this also removes an unused 'node' variable that is only used when CONFIG_OF is enabled: drivers/pci/controller/pcie-altera.c: In function 'altera_pcie_init_irq_domain': drivers/pci/controller/pcie-altera.c:855:29: error: unused variable 'node' [-Werror=unused-variable] 855 | struct device_node *node = dev->of_node; Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Arnd Bergmann # altera [bhelgaas: squash together, rebase to precede msi-parent] Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250521163329.2137973-1-arnd@kernel.org Link: https://patch.msgid.link/20250611104348.192092-16-jirislaby@kernel.org Link: https://patch.msgid.link/20250723065907.1841758-1-jirislaby@kernel.org --- drivers/pci/controller/dwc/pcie-designware-host.c | 2 +- drivers/pci/controller/mobiveil/pcie-mobiveil-host.c | 7 +++---- drivers/pci/controller/pcie-altera-msi.c | 2 +- drivers/pci/controller/pcie-altera.c | 3 +-- drivers/pci/controller/pcie-mediatek-gen3.c | 3 +-- drivers/pci/controller/pcie-mediatek.c | 2 +- drivers/pci/controller/pcie-xilinx-dma-pl.c | 2 +- drivers/pci/controller/pcie-xilinx-nwl.c | 2 +- drivers/pci/controller/plda/pcie-plda-host.c | 2 +- 9 files changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index 906277f9ffaf..1e1291f2e2af 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -227,7 +227,7 @@ static const struct irq_domain_ops dw_pcie_msi_domain_ops = { int dw_pcie_allocate_domains(struct dw_pcie_rp *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); - struct fwnode_handle *fwnode = of_fwnode_handle(pci->dev->of_node); + struct fwnode_handle *fwnode = dev_fwnode(pci->dev); pp->irq_domain = irq_domain_create_linear(fwnode, pp->num_vectors, &dw_pcie_msi_domain_ops, pp); diff --git a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c index a600f46ee3c3..98e90fcbeceb 100644 --- a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c +++ b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c @@ -435,7 +435,7 @@ static const struct irq_domain_ops msi_domain_ops = { static int mobiveil_allocate_msi_domains(struct mobiveil_pcie *pcie) { struct device *dev = &pcie->pdev->dev; - struct fwnode_handle *fwnode = of_fwnode_handle(dev->of_node); + struct fwnode_handle *fwnode = dev_fwnode(dev); struct mobiveil_msi *msi = &pcie->rp.msi; mutex_init(&msi->lock); @@ -464,9 +464,8 @@ static int mobiveil_pcie_init_irq_domain(struct mobiveil_pcie *pcie) struct mobiveil_root_port *rp = &pcie->rp; /* setup INTx */ - rp->intx_domain = irq_domain_create_linear(of_fwnode_handle(dev->of_node), PCI_NUM_INTX, - &intx_domain_ops, pcie); - + rp->intx_domain = irq_domain_create_linear(dev_fwnode(dev), PCI_NUM_INTX, &intx_domain_ops, + pcie); if (!rp->intx_domain) { dev_err(dev, "Failed to get a INTx IRQ domain\n"); return -ENOMEM; diff --git a/drivers/pci/controller/pcie-altera-msi.c b/drivers/pci/controller/pcie-altera-msi.c index a43f21eb8fbb..08e07c819202 100644 --- a/drivers/pci/controller/pcie-altera-msi.c +++ b/drivers/pci/controller/pcie-altera-msi.c @@ -164,7 +164,7 @@ static const struct irq_domain_ops msi_domain_ops = { static int altera_allocate_domains(struct altera_msi *msi) { - struct fwnode_handle *fwnode = of_fwnode_handle(msi->pdev->dev.of_node); + struct fwnode_handle *fwnode = dev_fwnode(&msi->pdev->dev); msi->inner_domain = irq_domain_create_linear(NULL, msi->num_of_vectors, &msi_domain_ops, msi); diff --git a/drivers/pci/controller/pcie-altera.c b/drivers/pci/controller/pcie-altera.c index 0fc77176a52e..3dbb7adc421c 100644 --- a/drivers/pci/controller/pcie-altera.c +++ b/drivers/pci/controller/pcie-altera.c @@ -852,10 +852,9 @@ static void aglx_isr(struct irq_desc *desc) static int altera_pcie_init_irq_domain(struct altera_pcie *pcie) { struct device *dev = &pcie->pdev->dev; - struct device_node *node = dev->of_node; /* Setup INTx */ - pcie->irq_domain = irq_domain_create_linear(of_fwnode_handle(node), PCI_NUM_INTX, + pcie->irq_domain = irq_domain_create_linear(dev_fwnode(dev), PCI_NUM_INTX, &intx_domain_ops, pcie); if (!pcie->irq_domain) { dev_err(dev, "Failed to get a INTx IRQ domain\n"); diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c index b55f5973414c..5464b4ae5c20 100644 --- a/drivers/pci/controller/pcie-mediatek-gen3.c +++ b/drivers/pci/controller/pcie-mediatek-gen3.c @@ -756,8 +756,7 @@ static int mtk_pcie_init_irq_domains(struct mtk_gen3_pcie *pcie) /* Setup MSI */ mutex_init(&pcie->lock); - pcie->msi_bottom_domain = irq_domain_create_linear(of_fwnode_handle(node), - PCIE_MSI_IRQS_NUM, + pcie->msi_bottom_domain = irq_domain_create_linear(dev_fwnode(dev), PCIE_MSI_IRQS_NUM, &mtk_msi_bottom_domain_ops, pcie); if (!pcie->msi_bottom_domain) { dev_err(dev, "failed to create MSI bottom domain\n"); diff --git a/drivers/pci/controller/pcie-mediatek.c b/drivers/pci/controller/pcie-mediatek.c index e1934aa06c8d..594b16929fe4 100644 --- a/drivers/pci/controller/pcie-mediatek.c +++ b/drivers/pci/controller/pcie-mediatek.c @@ -485,7 +485,7 @@ static struct msi_domain_info mtk_msi_domain_info = { static int mtk_pcie_allocate_msi_domains(struct mtk_pcie_port *port) { - struct fwnode_handle *fwnode = of_fwnode_handle(port->pcie->dev->of_node); + struct fwnode_handle *fwnode = dev_fwnode(port->pcie->dev); mutex_init(&port->lock); diff --git a/drivers/pci/controller/pcie-xilinx-dma-pl.c b/drivers/pci/controller/pcie-xilinx-dma-pl.c index dc9690a535e1..e044715b7f6a 100644 --- a/drivers/pci/controller/pcie-xilinx-dma-pl.c +++ b/drivers/pci/controller/pcie-xilinx-dma-pl.c @@ -470,7 +470,7 @@ static int xilinx_pl_dma_pcie_init_msi_irq_domain(struct pl_dma_pcie *port) struct device *dev = port->dev; struct xilinx_msi *msi = &port->msi; int size = BITS_TO_LONGS(XILINX_NUM_MSI_IRQS) * sizeof(long); - struct fwnode_handle *fwnode = of_fwnode_handle(port->dev->of_node); + struct fwnode_handle *fwnode = dev_fwnode(port->dev); msi->dev_domain = irq_domain_create_linear(NULL, XILINX_NUM_MSI_IRQS, &dev_msi_domain_ops, port); diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c index c8b05477b719..eda87e917430 100644 --- a/drivers/pci/controller/pcie-xilinx-nwl.c +++ b/drivers/pci/controller/pcie-xilinx-nwl.c @@ -495,7 +495,7 @@ static int nwl_pcie_init_msi_irq_domain(struct nwl_pcie *pcie) { #ifdef CONFIG_PCI_MSI struct device *dev = pcie->dev; - struct fwnode_handle *fwnode = of_fwnode_handle(dev->of_node); + struct fwnode_handle *fwnode = dev_fwnode(dev); struct nwl_msi *msi = &pcie->msi; msi->dev_domain = irq_domain_create_linear(NULL, INT_PCI_MSI_NR, &dev_msi_domain_ops, pcie); diff --git a/drivers/pci/controller/plda/pcie-plda-host.c b/drivers/pci/controller/plda/pcie-plda-host.c index 3abedf723215..fdf9ec110e7a 100644 --- a/drivers/pci/controller/plda/pcie-plda-host.c +++ b/drivers/pci/controller/plda/pcie-plda-host.c @@ -150,7 +150,7 @@ static struct msi_domain_info plda_msi_domain_info = { static int plda_allocate_msi_domains(struct plda_pcie_rp *port) { struct device *dev = port->dev; - struct fwnode_handle *fwnode = of_fwnode_handle(dev->of_node); + struct fwnode_handle *fwnode = dev_fwnode(dev); struct plda_msi *msi = &port->msi; mutex_init(&port->msi.lock); From 8e717112caf35998b198d3762b381de70711bdec Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 26 Jun 2025 16:47:51 +0200 Subject: [PATCH 0998/2411] PCI: dwc: Switch to msi_create_parent_irq_domain() Switch to msi_create_parent_irq_domain() from pci_msi_create_irq_domain() which was using legacy MSI domain setup. Signed-off-by: Nam Cao [mani: reworded commit message] Signed-off-by: Manivannan Sadhasivam [bhelgaas: rebase on dev_fwnode() conversion] Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Link: https://patch.msgid.link/04d4a96046490e50139826c16423954e033cdf89.1750858083.git.namcao@linutronix.de --- drivers/pci/controller/dwc/Kconfig | 1 + .../pci/controller/dwc/pcie-designware-host.c | 65 +++++++------------ drivers/pci/controller/dwc/pcie-designware.h | 1 - 3 files changed, 23 insertions(+), 44 deletions(-) diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig index d9f0386396ed..5dfa5592bf07 100644 --- a/drivers/pci/controller/dwc/Kconfig +++ b/drivers/pci/controller/dwc/Kconfig @@ -19,6 +19,7 @@ config PCIE_DW_DEBUGFS config PCIE_DW_HOST bool select PCIE_DW + select IRQ_MSI_LIB config PCIE_DW_EP bool diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index 1e1291f2e2af..101a64187f2a 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -23,35 +24,21 @@ static struct pci_ops dw_pcie_ops; static struct pci_ops dw_child_pcie_ops; -static void dw_msi_ack_irq(struct irq_data *d) -{ - irq_chip_ack_parent(d); -} +#define DW_PCIE_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ + MSI_FLAG_USE_DEF_CHIP_OPS | \ + MSI_FLAG_NO_AFFINITY | \ + MSI_FLAG_PCI_MSI_MASK_PARENT) +#define DW_PCIE_MSI_FLAGS_SUPPORTED (MSI_FLAG_MULTI_PCI_MSI | \ + MSI_FLAG_PCI_MSIX | \ + MSI_GENERIC_FLAGS_MASK) -static void dw_msi_mask_irq(struct irq_data *d) -{ - pci_msi_mask_irq(d); - irq_chip_mask_parent(d); -} - -static void dw_msi_unmask_irq(struct irq_data *d) -{ - pci_msi_unmask_irq(d); - irq_chip_unmask_parent(d); -} - -static struct irq_chip dw_pcie_msi_irq_chip = { - .name = "PCI-MSI", - .irq_ack = dw_msi_ack_irq, - .irq_mask = dw_msi_mask_irq, - .irq_unmask = dw_msi_unmask_irq, -}; - -static struct msi_domain_info dw_pcie_msi_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_NO_AFFINITY | MSI_FLAG_PCI_MSIX | - MSI_FLAG_MULTI_PCI_MSI, - .chip = &dw_pcie_msi_irq_chip, +static const struct msi_parent_ops dw_pcie_msi_parent_ops = { + .required_flags = DW_PCIE_MSI_FLAGS_REQUIRED, + .supported_flags = DW_PCIE_MSI_FLAGS_SUPPORTED, + .bus_select_token = DOMAIN_BUS_PCI_MSI, + .chip_flags = MSI_CHIP_FLAG_SET_ACK, + .prefix = "DW-", + .init_dev_msi_info = msi_lib_init_dev_msi_info, }; /* MSI int handler */ @@ -227,26 +214,19 @@ static const struct irq_domain_ops dw_pcie_msi_domain_ops = { int dw_pcie_allocate_domains(struct dw_pcie_rp *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); - struct fwnode_handle *fwnode = dev_fwnode(pci->dev); + struct irq_domain_info info = { + .fwnode = dev_fwnode(pci->dev), + .ops = &dw_pcie_msi_domain_ops, + .size = pp->num_vectors, + .host_data = pp, + }; - pp->irq_domain = irq_domain_create_linear(fwnode, pp->num_vectors, - &dw_pcie_msi_domain_ops, pp); + pp->irq_domain = msi_create_parent_irq_domain(&info, &dw_pcie_msi_parent_ops); if (!pp->irq_domain) { dev_err(pci->dev, "Failed to create IRQ domain\n"); return -ENOMEM; } - irq_domain_update_bus_token(pp->irq_domain, DOMAIN_BUS_NEXUS); - - pp->msi_domain = pci_msi_create_irq_domain(fwnode, - &dw_pcie_msi_domain_info, - pp->irq_domain); - if (!pp->msi_domain) { - dev_err(pci->dev, "Failed to create MSI domain\n"); - irq_domain_remove(pp->irq_domain); - return -ENOMEM; - } - return 0; } @@ -260,7 +240,6 @@ static void dw_pcie_free_msi(struct dw_pcie_rp *pp) NULL, NULL); } - irq_domain_remove(pp->msi_domain); irq_domain_remove(pp->irq_domain); } diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h index ce9e18554e42..d9daee4ce220 100644 --- a/drivers/pci/controller/dwc/pcie-designware.h +++ b/drivers/pci/controller/dwc/pcie-designware.h @@ -417,7 +417,6 @@ struct dw_pcie_rp { const struct dw_pcie_host_ops *ops; int msi_irq[MAX_MSI_CTRLS]; struct irq_domain *irq_domain; - struct irq_domain *msi_domain; dma_addr_t msi_data; struct irq_chip *msi_irq_chip; u32 num_vectors; From 0cb6d733983cb3be88a7c1e44400fdd231efd053 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 26 Jun 2025 16:47:52 +0200 Subject: [PATCH 0999/2411] PCI: mobiveil: Switch to msi_create_parent_irq_domain() Switch to msi_create_parent_irq_domain() from pci_msi_create_irq_domain() which was using legacy MSI domain setup. Signed-off-by: Nam Cao [mani: reworded commit message] Signed-off-by: Manivannan Sadhasivam [bhelgaas: rebase on dev_fwnode() conversion, drop fwnode local var] Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Link: https://patch.msgid.link/af46c15c47a7716f7e0c50d0f7391509c95b49c2.1750858083.git.namcao@linutronix.de --- drivers/pci/controller/mobiveil/Kconfig | 1 + .../controller/mobiveil/pcie-mobiveil-host.c | 43 ++++++++++--------- .../pci/controller/mobiveil/pcie-mobiveil.h | 1 - 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/pci/controller/mobiveil/Kconfig b/drivers/pci/controller/mobiveil/Kconfig index 58ce034f701a..c50c4625937f 100644 --- a/drivers/pci/controller/mobiveil/Kconfig +++ b/drivers/pci/controller/mobiveil/Kconfig @@ -9,6 +9,7 @@ config PCIE_MOBIVEIL config PCIE_MOBIVEIL_HOST bool depends on PCI_MSI + select IRQ_MSI_LIB select PCIE_MOBIVEIL config PCIE_LAYERSCAPE_GEN4 diff --git a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c index 98e90fcbeceb..dbc72c73fd0a 100644 --- a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c +++ b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -353,16 +354,19 @@ static const struct irq_domain_ops intx_domain_ops = { .map = mobiveil_pcie_intx_map, }; -static struct irq_chip mobiveil_msi_irq_chip = { - .name = "Mobiveil PCIe MSI", - .irq_mask = pci_msi_mask_irq, - .irq_unmask = pci_msi_unmask_irq, -}; +#define MOBIVEIL_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ + MSI_FLAG_USE_DEF_CHIP_OPS | \ + MSI_FLAG_NO_AFFINITY) -static struct msi_domain_info mobiveil_msi_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_NO_AFFINITY | MSI_FLAG_PCI_MSIX, - .chip = &mobiveil_msi_irq_chip, +#define MOBIVEIL_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \ + MSI_FLAG_PCI_MSIX) + +static const struct msi_parent_ops mobiveil_msi_parent_ops = { + .required_flags = MOBIVEIL_MSI_FLAGS_REQUIRED, + .supported_flags = MOBIVEIL_MSI_FLAGS_SUPPORTED, + .bus_select_token = DOMAIN_BUS_PCI_MSI, + .prefix = "Mobiveil-", + .init_dev_msi_info = msi_lib_init_dev_msi_info, }; static void mobiveil_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) @@ -435,23 +439,20 @@ static const struct irq_domain_ops msi_domain_ops = { static int mobiveil_allocate_msi_domains(struct mobiveil_pcie *pcie) { struct device *dev = &pcie->pdev->dev; - struct fwnode_handle *fwnode = dev_fwnode(dev); struct mobiveil_msi *msi = &pcie->rp.msi; mutex_init(&msi->lock); - msi->dev_domain = irq_domain_create_linear(NULL, msi->num_of_vectors, - &msi_domain_ops, pcie); - if (!msi->dev_domain) { - dev_err(dev, "failed to create IRQ domain\n"); - return -ENOMEM; - } - msi->msi_domain = pci_msi_create_irq_domain(fwnode, - &mobiveil_msi_domain_info, - msi->dev_domain); - if (!msi->msi_domain) { + struct irq_domain_info info = { + .fwnode = dev_fwnode(dev), + .ops = &msi_domain_ops, + .host_data = pcie, + .size = msi->num_of_vectors, + }; + + msi->dev_domain = msi_create_parent_irq_domain(&info, &mobiveil_msi_parent_ops); + if (!msi->dev_domain) { dev_err(dev, "failed to create MSI domain\n"); - irq_domain_remove(msi->dev_domain); return -ENOMEM; } diff --git a/drivers/pci/controller/mobiveil/pcie-mobiveil.h b/drivers/pci/controller/mobiveil/pcie-mobiveil.h index 662f17f9bf65..7246de6a7176 100644 --- a/drivers/pci/controller/mobiveil/pcie-mobiveil.h +++ b/drivers/pci/controller/mobiveil/pcie-mobiveil.h @@ -135,7 +135,6 @@ struct mobiveil_msi { /* MSI information */ struct mutex lock; /* protect bitmap variable */ - struct irq_domain *msi_domain; struct irq_domain *dev_domain; phys_addr_t msi_pages_phys; int num_of_vectors; From 750277048afe7ce8ebfc0b120de7dfbc745058a7 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 26 Jun 2025 16:47:53 +0200 Subject: [PATCH 1000/2411] PCI: aardvark: Switch to msi_create_parent_irq_domain() Switch to msi_create_parent_irq_domain() from pci_msi_create_irq_domain() which was using legacy MSI domain setup. Signed-off-by: Nam Cao [mani: reworded commit message] Signed-off-by: Manivannan Sadhasivam [bhelgaas: rebase on dev_fwnode() conversion] Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Link: https://patch.msgid.link/68b2f9387bbe4f08bcd428bfab83ad1219fb8d80.1750858083.git.namcao@linutronix.de --- drivers/pci/controller/Kconfig | 1 + drivers/pci/controller/pci-aardvark.c | 57 +++++++++++---------------- 2 files changed, 23 insertions(+), 35 deletions(-) diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 886f6f43a895..91a2d4ffc3ac 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -13,6 +13,7 @@ config PCI_AARDVARK depends on OF depends on PCI_MSI select PCI_BRIDGE_EMUL + select IRQ_MSI_LIB help Add support for Aardvark 64bit PCIe Host Controller. This controller is part of the South Bridge of the Marvel Armada diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 7bac64533b14..e34bea1ff0ac 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -278,7 +279,6 @@ struct advk_pcie { struct irq_domain *irq_domain; struct irq_chip irq_chip; raw_spinlock_t irq_lock; - struct irq_domain *msi_domain; struct irq_domain *msi_inner_domain; raw_spinlock_t msi_irq_lock; DECLARE_BITMAP(msi_used, MSI_IRQ_NUM); @@ -1332,18 +1332,6 @@ static void advk_msi_irq_unmask(struct irq_data *d) raw_spin_unlock_irqrestore(&pcie->msi_irq_lock, flags); } -static void advk_msi_top_irq_mask(struct irq_data *d) -{ - pci_msi_mask_irq(d); - irq_chip_mask_parent(d); -} - -static void advk_msi_top_irq_unmask(struct irq_data *d) -{ - pci_msi_unmask_irq(d); - irq_chip_unmask_parent(d); -} - static struct irq_chip advk_msi_bottom_irq_chip = { .name = "MSI", .irq_compose_msi_msg = advk_msi_irq_compose_msi_msg, @@ -1436,17 +1424,20 @@ static const struct irq_domain_ops advk_pcie_irq_domain_ops = { .xlate = irq_domain_xlate_onecell, }; -static struct irq_chip advk_msi_irq_chip = { - .name = "advk-MSI", - .irq_mask = advk_msi_top_irq_mask, - .irq_unmask = advk_msi_top_irq_unmask, -}; +#define ADVK_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ + MSI_FLAG_USE_DEF_CHIP_OPS | \ + MSI_FLAG_PCI_MSI_MASK_PARENT | \ + MSI_FLAG_NO_AFFINITY) +#define ADVK_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \ + MSI_FLAG_PCI_MSIX | \ + MSI_FLAG_MULTI_PCI_MSI) -static struct msi_domain_info advk_msi_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_NO_AFFINITY | MSI_FLAG_MULTI_PCI_MSI | - MSI_FLAG_PCI_MSIX, - .chip = &advk_msi_irq_chip, +static const struct msi_parent_ops advk_msi_parent_ops = { + .required_flags = ADVK_MSI_FLAGS_REQUIRED, + .supported_flags = ADVK_MSI_FLAGS_SUPPORTED, + .bus_select_token = DOMAIN_BUS_PCI_MSI, + .prefix = "advk-", + .init_dev_msi_info = msi_lib_init_dev_msi_info, }; static int advk_pcie_init_msi_irq_domain(struct advk_pcie *pcie) @@ -1456,26 +1447,22 @@ static int advk_pcie_init_msi_irq_domain(struct advk_pcie *pcie) raw_spin_lock_init(&pcie->msi_irq_lock); mutex_init(&pcie->msi_used_lock); - pcie->msi_inner_domain = irq_domain_create_linear(NULL, MSI_IRQ_NUM, - &advk_msi_domain_ops, pcie); + struct irq_domain_info info = { + .fwnode = dev_fwnode(dev), + .ops = &advk_msi_domain_ops, + .host_data = pcie, + .size = MSI_IRQ_NUM, + }; + + pcie->msi_inner_domain = msi_create_parent_irq_domain(&info, &advk_msi_parent_ops); if (!pcie->msi_inner_domain) return -ENOMEM; - pcie->msi_domain = - pci_msi_create_irq_domain(dev_fwnode(dev), - &advk_msi_domain_info, - pcie->msi_inner_domain); - if (!pcie->msi_domain) { - irq_domain_remove(pcie->msi_inner_domain); - return -ENOMEM; - } - return 0; } static void advk_pcie_remove_msi_irq_domain(struct advk_pcie *pcie) { - irq_domain_remove(pcie->msi_domain); irq_domain_remove(pcie->msi_inner_domain); } From cf154cccd8c9b9be80e4f9e367975d8f3cf5a497 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 26 Jun 2025 16:47:54 +0200 Subject: [PATCH 1001/2411] PCI: altera-msi: Switch to msi_create_parent_irq_domain() Switch to msi_create_parent_irq_domain() from pci_msi_create_irq_domain() which was using legacy MSI domain setup. Signed-off-by: Nam Cao [mani: reworded commit message] Signed-off-by: Manivannan Sadhasivam [bhelgaas: rebase on dev_fwnode() conversion] Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Link: https://patch.msgid.link/0a88da04bb82bd588828a7889e9d58c515ea5dbb.1750858083.git.namcao@linutronix.de --- drivers/pci/controller/Kconfig | 1 + drivers/pci/controller/pcie-altera-msi.c | 43 +++++++++++------------- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 91a2d4ffc3ac..012c18c67d9c 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -30,6 +30,7 @@ config PCIE_ALTERA_MSI tristate "Altera PCIe MSI feature" depends on PCIE_ALTERA depends on PCI_MSI + select IRQ_MSI_LIB help Say Y here if you want PCIe MSI support for the Altera FPGA. This MSI driver supports Altera MSI to GIC controller IP. diff --git a/drivers/pci/controller/pcie-altera-msi.c b/drivers/pci/controller/pcie-altera-msi.c index 08e07c819202..ea2ca2e70f20 100644 --- a/drivers/pci/controller/pcie-altera-msi.c +++ b/drivers/pci/controller/pcie-altera-msi.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -29,7 +30,6 @@ struct altera_msi { DECLARE_BITMAP(used, MAX_MSI_VECTORS); struct mutex lock; /* protect "used" bitmap */ struct platform_device *pdev; - struct irq_domain *msi_domain; struct irq_domain *inner_domain; void __iomem *csr_base; void __iomem *vector_base; @@ -74,18 +74,20 @@ static void altera_msi_isr(struct irq_desc *desc) chained_irq_exit(chip, desc); } -static struct irq_chip altera_msi_irq_chip = { - .name = "Altera PCIe MSI", - .irq_mask = pci_msi_mask_irq, - .irq_unmask = pci_msi_unmask_irq, -}; +#define ALTERA_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ + MSI_FLAG_USE_DEF_CHIP_OPS | \ + MSI_FLAG_NO_AFFINITY) -static struct msi_domain_info altera_msi_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_NO_AFFINITY | MSI_FLAG_PCI_MSIX, - .chip = &altera_msi_irq_chip, -}; +#define ALTERA_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \ + MSI_FLAG_PCI_MSIX) +static const struct msi_parent_ops altera_msi_parent_ops = { + .required_flags = ALTERA_MSI_FLAGS_REQUIRED, + .supported_flags = ALTERA_MSI_FLAGS_SUPPORTED, + .bus_select_token = DOMAIN_BUS_PCI_MSI, + .prefix = "Altera-", + .init_dev_msi_info = msi_lib_init_dev_msi_info, +}; static void altera_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { struct altera_msi *msi = irq_data_get_irq_chip_data(data); @@ -164,20 +166,16 @@ static const struct irq_domain_ops msi_domain_ops = { static int altera_allocate_domains(struct altera_msi *msi) { - struct fwnode_handle *fwnode = dev_fwnode(&msi->pdev->dev); + struct irq_domain_info info = { + .fwnode = dev_fwnode(&msi->pdev->dev), + .ops = &msi_domain_ops, + .host_data = msi, + .size = msi->num_of_vectors, + }; - msi->inner_domain = irq_domain_create_linear(NULL, msi->num_of_vectors, - &msi_domain_ops, msi); + msi->inner_domain = msi_create_parent_irq_domain(&info, &altera_msi_parent_ops); if (!msi->inner_domain) { - dev_err(&msi->pdev->dev, "failed to create IRQ domain\n"); - return -ENOMEM; - } - - msi->msi_domain = pci_msi_create_irq_domain(fwnode, - &altera_msi_domain_info, msi->inner_domain); - if (!msi->msi_domain) { dev_err(&msi->pdev->dev, "failed to create MSI domain\n"); - irq_domain_remove(msi->inner_domain); return -ENOMEM; } @@ -186,7 +184,6 @@ static int altera_allocate_domains(struct altera_msi *msi) static void altera_free_domains(struct altera_msi *msi) { - irq_domain_remove(msi->msi_domain); irq_domain_remove(msi->inner_domain); } From ebcc2fbd33985b0cb1aa05776ec0313444e96647 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 26 Jun 2025 16:47:55 +0200 Subject: [PATCH 1002/2411] PCI: brcmstb: Switch to msi_create_parent_irq_domain() Switch to msi_create_parent_irq_domain() from pci_msi_create_irq_domain() which was using legacy MSI domain setup. Signed-off-by: Nam Cao [mani: reworded commit message] Signed-off-by: Manivannan Sadhasivam [bhelgaas: rebase on dev_fwnode() conversion, drop fwnode local var] Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/fa72703e06c2ee2c7554082c7152913eb0dd294f.1750858083.git.namcao@linutronix.de --- drivers/pci/controller/Kconfig | 1 + drivers/pci/controller/pcie-brcmstb.c | 45 +++++++++++++-------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 012c18c67d9c..0f6cec244d4f 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -64,6 +64,7 @@ config PCIE_BRCMSTB BMIPS_GENERIC || COMPILE_TEST depends on OF depends on PCI_MSI + select IRQ_MSI_LIB default ARCH_BRCMSTB || BMIPS_GENERIC help Say Y here to enable PCIe host controller support for diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c index 92887b394eb4..912a3cf86134 100644 --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -265,7 +266,6 @@ struct brcm_msi { struct device *dev; void __iomem *base; struct device_node *np; - struct irq_domain *msi_domain; struct irq_domain *inner_domain; struct mutex lock; /* guards the alloc/free operations */ u64 target_addr; @@ -465,17 +465,20 @@ static void brcm_pcie_set_outbound_win(struct brcm_pcie *pcie, writel(tmp, pcie->base + PCIE_MEM_WIN0_LIMIT_HI(win)); } -static struct irq_chip brcm_msi_irq_chip = { - .name = "BRCM STB PCIe MSI", - .irq_ack = irq_chip_ack_parent, - .irq_mask = pci_msi_mask_irq, - .irq_unmask = pci_msi_unmask_irq, -}; +#define BRCM_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ + MSI_FLAG_USE_DEF_CHIP_OPS | \ + MSI_FLAG_NO_AFFINITY) -static struct msi_domain_info brcm_msi_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_NO_AFFINITY | MSI_FLAG_MULTI_PCI_MSI, - .chip = &brcm_msi_irq_chip, +#define BRCM_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \ + MSI_FLAG_MULTI_PCI_MSI) + +static const struct msi_parent_ops brcm_msi_parent_ops = { + .required_flags = BRCM_MSI_FLAGS_REQUIRED, + .supported_flags = BRCM_MSI_FLAGS_SUPPORTED, + .bus_select_token = DOMAIN_BUS_PCI_MSI, + .chip_flags = MSI_CHIP_FLAG_SET_ACK, + .prefix = "BRCM-", + .init_dev_msi_info = msi_lib_init_dev_msi_info, }; static void brcm_pcie_msi_isr(struct irq_desc *desc) @@ -581,21 +584,18 @@ static const struct irq_domain_ops msi_domain_ops = { static int brcm_allocate_domains(struct brcm_msi *msi) { - struct fwnode_handle *fwnode = of_fwnode_handle(msi->np); struct device *dev = msi->dev; - msi->inner_domain = irq_domain_create_linear(NULL, msi->nr, &msi_domain_ops, msi); - if (!msi->inner_domain) { - dev_err(dev, "failed to create IRQ domain\n"); - return -ENOMEM; - } + struct irq_domain_info info = { + .fwnode = of_fwnode_handle(msi->np), + .ops = &msi_domain_ops, + .host_data = msi, + .size = msi->nr, + }; - msi->msi_domain = pci_msi_create_irq_domain(fwnode, - &brcm_msi_domain_info, - msi->inner_domain); - if (!msi->msi_domain) { + msi->inner_domain = msi_create_parent_irq_domain(&info, &brcm_msi_parent_ops); + if (!msi->inner_domain) { dev_err(dev, "failed to create MSI domain\n"); - irq_domain_remove(msi->inner_domain); return -ENOMEM; } @@ -604,7 +604,6 @@ static int brcm_allocate_domains(struct brcm_msi *msi) static void brcm_free_domains(struct brcm_msi *msi) { - irq_domain_remove(msi->msi_domain); irq_domain_remove(msi->inner_domain); } From e275e38a61a10d1a85a6efc12b292daf0dd814e1 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 26 Jun 2025 16:47:56 +0200 Subject: [PATCH 1003/2411] PCI: iproc: Switch to msi_create_parent_irq_domain() Switch to msi_create_parent_irq_domain() from pci_msi_create_irq_domain() which was using legacy MSI domain setup. Signed-off-by: Nam Cao [mani: reworded commit message & squashed the kdoc cleanup patch] Signed-off-by: Manivannan Sadhasivam [bhelgaas: rebase on dev_fwnode() conversion] Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/53946d74caf1fd134a1820eac82c3cf64d48779f.1750858083.git.namcao@linutronix.de --- drivers/pci/controller/Kconfig | 1 + drivers/pci/controller/pcie-iproc-msi.c | 44 +++++++++++-------------- 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 0f6cec244d4f..375a019f35bd 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -101,6 +101,7 @@ config PCIE_IPROC_MSI bool "Broadcom iProc PCIe MSI support" depends on PCIE_IPROC_PLATFORM || PCIE_IPROC_BCMA depends on PCI_MSI + select IRQ_MSI_LIB default ARCH_BCM_IPROC help Say Y here if you want to enable MSI support for Broadcom's iProc diff --git a/drivers/pci/controller/pcie-iproc-msi.c b/drivers/pci/controller/pcie-iproc-msi.c index d2cb4c4f821a..9ba242ab9596 100644 --- a/drivers/pci/controller/pcie-iproc-msi.c +++ b/drivers/pci/controller/pcie-iproc-msi.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -81,7 +82,6 @@ struct iproc_msi_grp { * @bitmap_lock: lock to protect access to the MSI bitmap * @nr_msi_vecs: total number of MSI vectors * @inner_domain: inner IRQ domain - * @msi_domain: MSI IRQ domain * @nr_eq_region: required number of 4K aligned memory region for MSI event * queues * @nr_msi_region: required number of 4K aligned address region for MSI posted @@ -101,7 +101,6 @@ struct iproc_msi { struct mutex bitmap_lock; unsigned int nr_msi_vecs; struct irq_domain *inner_domain; - struct irq_domain *msi_domain; unsigned int nr_eq_region; unsigned int nr_msi_region; void *eq_cpu; @@ -165,16 +164,18 @@ static inline unsigned int iproc_msi_eq_offset(struct iproc_msi *msi, u32 eq) return eq * EQ_LEN * sizeof(u32); } -static struct irq_chip iproc_msi_irq_chip = { - .name = "iProc-MSI", -}; +#define IPROC_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ + MSI_FLAG_USE_DEF_CHIP_OPS) +#define IPROC_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \ + MSI_FLAG_PCI_MSIX) -static struct msi_domain_info iproc_msi_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_PCI_MSIX, - .chip = &iproc_msi_irq_chip, +static struct msi_parent_ops iproc_msi_parent_ops = { + .required_flags = IPROC_MSI_FLAGS_REQUIRED, + .supported_flags = IPROC_MSI_FLAGS_SUPPORTED, + .bus_select_token = DOMAIN_BUS_PCI_MSI, + .prefix = "iProc-", + .init_dev_msi_info = msi_lib_init_dev_msi_info, }; - /* * In iProc PCIe core, each MSI group is serviced by a GIC interrupt and a * dedicated event queue. Each MSI group can support up to 64 MSI vectors. @@ -446,27 +447,22 @@ static void iproc_msi_disable(struct iproc_msi *msi) static int iproc_msi_alloc_domains(struct device_node *node, struct iproc_msi *msi) { - msi->inner_domain = irq_domain_create_linear(NULL, msi->nr_msi_vecs, - &msi_domain_ops, msi); + struct irq_domain_info info = { + .fwnode = of_fwnode_handle(node), + .ops = &msi_domain_ops, + .host_data = msi, + .size = msi->nr_msi_vecs, + }; + + msi->inner_domain = msi_create_parent_irq_domain(&info, &iproc_msi_parent_ops); if (!msi->inner_domain) return -ENOMEM; - msi->msi_domain = pci_msi_create_irq_domain(of_fwnode_handle(node), - &iproc_msi_domain_info, - msi->inner_domain); - if (!msi->msi_domain) { - irq_domain_remove(msi->inner_domain); - return -ENOMEM; - } - return 0; } static void iproc_msi_free_domains(struct iproc_msi *msi) { - if (msi->msi_domain) - irq_domain_remove(msi->msi_domain); - if (msi->inner_domain) irq_domain_remove(msi->inner_domain); } @@ -542,7 +538,7 @@ int iproc_msi_init(struct iproc_pcie *pcie, struct device_node *node) msi->nr_cpus = num_possible_cpus(); if (msi->nr_cpus == 1) - iproc_msi_domain_info.flags |= MSI_FLAG_MULTI_PCI_MSI; + iproc_msi_parent_ops.supported_flags |= MSI_FLAG_MULTI_PCI_MSI; msi->nr_irqs = of_irq_count(node); if (!msi->nr_irqs) { From 9a35a26485b3d8677f8cc44103f554b0ce8d62d0 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 26 Jun 2025 16:47:57 +0200 Subject: [PATCH 1004/2411] PCI: mediatek-gen3: Switch to msi_create_parent_irq_domain() Switch to msi_create_parent_irq_domain() from pci_msi_create_irq_domain() which was using legacy MSI domain setup. Signed-off-by: Nam Cao [mani: reworded commit message & fixed merge conflict] Signed-off-by: Manivannan Sadhasivam [bhelgaas: rebase on dev_fwnode() conversion] Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Link: https://patch.msgid.link/bfbd2e375269071b69e1aa85e629ee4b7c99518f.1750858083.git.namcao@linutronix.de --- drivers/pci/controller/Kconfig | 1 + drivers/pci/controller/pcie-mediatek-gen3.c | 63 ++++++++------------- 2 files changed, 24 insertions(+), 40 deletions(-) diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 375a019f35bd..ec32c343a751 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -203,6 +203,7 @@ config PCIE_MEDIATEK_GEN3 tristate "MediaTek Gen3 PCIe controller" depends on ARCH_AIROHA || ARCH_MEDIATEK || COMPILE_TEST depends on PCI_MSI + select IRQ_MSI_LIB help Adds support for PCIe Gen3 MAC controller for MediaTek SoCs. This PCIe controller is compatible with Gen3, Gen2 and Gen1 speed, diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c index 5464b4ae5c20..97147f43e41c 100644 --- a/drivers/pci/controller/pcie-mediatek-gen3.c +++ b/drivers/pci/controller/pcie-mediatek-gen3.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -187,7 +188,6 @@ struct mtk_msi_set { * @saved_irq_state: IRQ enable state saved at suspend time * @irq_lock: lock protecting IRQ register access * @intx_domain: legacy INTx IRQ domain - * @msi_domain: MSI IRQ domain * @msi_bottom_domain: MSI IRQ bottom domain * @msi_sets: MSI sets information * @lock: lock protecting IRQ bit map @@ -210,7 +210,6 @@ struct mtk_gen3_pcie { u32 saved_irq_state; raw_spinlock_t irq_lock; struct irq_domain *intx_domain; - struct irq_domain *msi_domain; struct irq_domain *msi_bottom_domain; struct mtk_msi_set msi_sets[PCIE_MSI_SET_NUM]; struct mutex lock; @@ -526,30 +525,22 @@ static int mtk_pcie_startup_port(struct mtk_gen3_pcie *pcie) return 0; } -static void mtk_pcie_msi_irq_mask(struct irq_data *data) -{ - pci_msi_mask_irq(data); - irq_chip_mask_parent(data); -} +#define MTK_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ + MSI_FLAG_USE_DEF_CHIP_OPS | \ + MSI_FLAG_NO_AFFINITY | \ + MSI_FLAG_PCI_MSI_MASK_PARENT) -static void mtk_pcie_msi_irq_unmask(struct irq_data *data) -{ - pci_msi_unmask_irq(data); - irq_chip_unmask_parent(data); -} +#define MTK_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \ + MSI_FLAG_PCI_MSIX | \ + MSI_FLAG_MULTI_PCI_MSI) -static struct irq_chip mtk_msi_irq_chip = { - .irq_ack = irq_chip_ack_parent, - .irq_mask = mtk_pcie_msi_irq_mask, - .irq_unmask = mtk_pcie_msi_irq_unmask, - .name = "MSI", -}; - -static struct msi_domain_info mtk_msi_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_NO_AFFINITY | MSI_FLAG_PCI_MSIX | - MSI_FLAG_MULTI_PCI_MSI, - .chip = &mtk_msi_irq_chip, +static const struct msi_parent_ops mtk_msi_parent_ops = { + .required_flags = MTK_MSI_FLAGS_REQUIRED, + .supported_flags = MTK_MSI_FLAGS_SUPPORTED, + .bus_select_token = DOMAIN_BUS_PCI_MSI, + .chip_flags = MSI_CHIP_FLAG_SET_ACK, + .prefix = "MTK3-", + .init_dev_msi_info = msi_lib_init_dev_msi_info, }; static void mtk_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) @@ -756,28 +747,23 @@ static int mtk_pcie_init_irq_domains(struct mtk_gen3_pcie *pcie) /* Setup MSI */ mutex_init(&pcie->lock); - pcie->msi_bottom_domain = irq_domain_create_linear(dev_fwnode(dev), PCIE_MSI_IRQS_NUM, - &mtk_msi_bottom_domain_ops, pcie); + struct irq_domain_info info = { + .fwnode = dev_fwnode(dev), + .ops = &mtk_msi_bottom_domain_ops, + .host_data = pcie, + .size = PCIE_MSI_IRQS_NUM, + }; + + pcie->msi_bottom_domain = msi_create_parent_irq_domain(&info, &mtk_msi_parent_ops); if (!pcie->msi_bottom_domain) { dev_err(dev, "failed to create MSI bottom domain\n"); ret = -ENODEV; goto err_msi_bottom_domain; } - pcie->msi_domain = pci_msi_create_irq_domain(dev->fwnode, - &mtk_msi_domain_info, - pcie->msi_bottom_domain); - if (!pcie->msi_domain) { - dev_err(dev, "failed to create MSI domain\n"); - ret = -ENODEV; - goto err_msi_domain; - } - of_node_put(intc_node); return 0; -err_msi_domain: - irq_domain_remove(pcie->msi_bottom_domain); err_msi_bottom_domain: irq_domain_remove(pcie->intx_domain); out_put_node: @@ -792,9 +778,6 @@ static void mtk_pcie_irq_teardown(struct mtk_gen3_pcie *pcie) if (pcie->intx_domain) irq_domain_remove(pcie->intx_domain); - if (pcie->msi_domain) - irq_domain_remove(pcie->msi_domain); - if (pcie->msi_bottom_domain) irq_domain_remove(pcie->msi_bottom_domain); From e449cb9afc963cf9cf47139bb873c412605c83e7 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 26 Jun 2025 16:47:58 +0200 Subject: [PATCH 1005/2411] PCI: mediatek: Switch to msi_create_parent_irq_domain() Switch to msi_create_parent_irq_domain() from pci_msi_create_irq_domain() which was using legacy MSI domain setup. Signed-off-by: Nam Cao [mani: reworded commit message] Signed-off-by: Manivannan Sadhasivam [bhelgaas: rebase on dev_fwnode() conversion, drop fwnode local var] Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Link: https://patch.msgid.link/76f6e6ce6021607cd0fdfd79fef7d2eb69d9f361.1750858083.git.namcao@linutronix.de --- drivers/pci/controller/Kconfig | 1 + drivers/pci/controller/pcie-mediatek.c | 48 ++++++++++++-------------- 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index ec32c343a751..65289a171333 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -195,6 +195,7 @@ config PCIE_MEDIATEK depends on ARCH_AIROHA || ARCH_MEDIATEK || COMPILE_TEST depends on OF depends on PCI_MSI + select IRQ_MSI_LIB help Say Y here if you want to enable PCIe controller support on MediaTek SoCs. diff --git a/drivers/pci/controller/pcie-mediatek.c b/drivers/pci/controller/pcie-mediatek.c index 594b16929fe4..24cc30a2ab6c 100644 --- a/drivers/pci/controller/pcie-mediatek.c +++ b/drivers/pci/controller/pcie-mediatek.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -180,7 +181,6 @@ struct mtk_pcie_soc { * @irq: GIC irq * @irq_domain: legacy INTx IRQ domain * @inner_domain: inner IRQ domain - * @msi_domain: MSI IRQ domain * @lock: protect the msi_irq_in_use bitmap * @msi_irq_in_use: bit map for assigned MSI IRQ */ @@ -200,7 +200,6 @@ struct mtk_pcie_port { int irq; struct irq_domain *irq_domain; struct irq_domain *inner_domain; - struct irq_domain *msi_domain; struct mutex lock; DECLARE_BITMAP(msi_irq_in_use, MTK_MSI_IRQS_NUM); }; @@ -470,40 +469,39 @@ static const struct irq_domain_ops msi_domain_ops = { .free = mtk_pcie_irq_domain_free, }; -static struct irq_chip mtk_msi_irq_chip = { - .name = "MTK PCIe MSI", - .irq_ack = irq_chip_ack_parent, - .irq_mask = pci_msi_mask_irq, - .irq_unmask = pci_msi_unmask_irq, -}; +#define MTK_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ + MSI_FLAG_USE_DEF_CHIP_OPS | \ + MSI_FLAG_NO_AFFINITY) -static struct msi_domain_info mtk_msi_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_NO_AFFINITY | MSI_FLAG_PCI_MSIX, - .chip = &mtk_msi_irq_chip, +#define MTK_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \ + MSI_FLAG_PCI_MSIX) + +static const struct msi_parent_ops mtk_msi_parent_ops = { + .required_flags = MTK_MSI_FLAGS_REQUIRED, + .supported_flags = MTK_MSI_FLAGS_SUPPORTED, + .bus_select_token = DOMAIN_BUS_PCI_MSI, + .chip_flags = MSI_CHIP_FLAG_SET_ACK, + .prefix = "MTK-", + .init_dev_msi_info = msi_lib_init_dev_msi_info, }; static int mtk_pcie_allocate_msi_domains(struct mtk_pcie_port *port) { - struct fwnode_handle *fwnode = dev_fwnode(port->pcie->dev); - mutex_init(&port->lock); - port->inner_domain = irq_domain_create_linear(fwnode, MTK_MSI_IRQS_NUM, - &msi_domain_ops, port); + struct irq_domain_info info = { + .fwnode = dev_fwnode(port->pcie->dev), + .ops = &msi_domain_ops, + .host_data = port, + .size = MTK_MSI_IRQS_NUM, + }; + + port->inner_domain = msi_create_parent_irq_domain(&info, &mtk_msi_parent_ops); if (!port->inner_domain) { dev_err(port->pcie->dev, "failed to create IRQ domain\n"); return -ENOMEM; } - port->msi_domain = pci_msi_create_irq_domain(fwnode, &mtk_msi_domain_info, - port->inner_domain); - if (!port->msi_domain) { - dev_err(port->pcie->dev, "failed to create MSI domain\n"); - irq_domain_remove(port->inner_domain); - return -ENOMEM; - } - return 0; } @@ -532,8 +530,6 @@ static void mtk_pcie_irq_teardown(struct mtk_pcie *pcie) irq_domain_remove(port->irq_domain); if (IS_ENABLED(CONFIG_PCI_MSI)) { - if (port->msi_domain) - irq_domain_remove(port->msi_domain); if (port->inner_domain) irq_domain_remove(port->inner_domain); } From dd26c1a23fd5a607c50738ea0dcb6cdbb8185cfe Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 26 Jun 2025 16:47:59 +0200 Subject: [PATCH 1006/2411] PCI: rcar-host: Switch to msi_create_parent_irq_domain() Switch to msi_create_parent_irq_domain() from pci_msi_create_irq_domain() which was using legacy MSI domain setup. Signed-off-by: Nam Cao [mani: reworded commit message] Signed-off-by: Manivannan Sadhasivam [bhelgaas: rebase on dev_fwnode() conversion, drop fwnode local var] Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Link: https://patch.msgid.link/ab4005db0a829549be1f348f6c27be50a2118b5e.1750858083.git.namcao@linutronix.de --- drivers/pci/controller/Kconfig | 1 + drivers/pci/controller/pcie-rcar-host.c | 68 +++++++++---------------- 2 files changed, 25 insertions(+), 44 deletions(-) diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 65289a171333..8b9492e9ae69 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -243,6 +243,7 @@ config PCIE_RCAR_HOST bool "Renesas R-Car PCIe controller (host mode)" depends on ARCH_RENESAS || COMPILE_TEST depends on PCI_MSI + select IRQ_MSI_LIB help Say Y here if you want PCIe controller support on R-Car SoCs in host mode. diff --git a/drivers/pci/controller/pcie-rcar-host.c b/drivers/pci/controller/pcie-rcar-host.c index c32b803a47c7..fe288fd770c4 100644 --- a/drivers/pci/controller/pcie-rcar-host.c +++ b/drivers/pci/controller/pcie-rcar-host.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -597,30 +598,6 @@ static irqreturn_t rcar_pcie_msi_irq(int irq, void *data) return IRQ_HANDLED; } -static void rcar_msi_top_irq_ack(struct irq_data *d) -{ - irq_chip_ack_parent(d); -} - -static void rcar_msi_top_irq_mask(struct irq_data *d) -{ - pci_msi_mask_irq(d); - irq_chip_mask_parent(d); -} - -static void rcar_msi_top_irq_unmask(struct irq_data *d) -{ - pci_msi_unmask_irq(d); - irq_chip_unmask_parent(d); -} - -static struct irq_chip rcar_msi_top_chip = { - .name = "PCIe MSI", - .irq_ack = rcar_msi_top_irq_ack, - .irq_mask = rcar_msi_top_irq_mask, - .irq_unmask = rcar_msi_top_irq_unmask, -}; - static void rcar_msi_irq_ack(struct irq_data *d) { struct rcar_msi *msi = irq_data_get_irq_chip_data(d); @@ -718,30 +695,36 @@ static const struct irq_domain_ops rcar_msi_domain_ops = { .free = rcar_msi_domain_free, }; -static struct msi_domain_info rcar_msi_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_NO_AFFINITY | MSI_FLAG_MULTI_PCI_MSI, - .chip = &rcar_msi_top_chip, +#define RCAR_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ + MSI_FLAG_USE_DEF_CHIP_OPS | \ + MSI_FLAG_PCI_MSI_MASK_PARENT | \ + MSI_FLAG_NO_AFFINITY) + +#define RCAR_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \ + MSI_FLAG_MULTI_PCI_MSI) + +static const struct msi_parent_ops rcar_msi_parent_ops = { + .required_flags = RCAR_MSI_FLAGS_REQUIRED, + .supported_flags = RCAR_MSI_FLAGS_SUPPORTED, + .bus_select_token = DOMAIN_BUS_PCI_MSI, + .chip_flags = MSI_CHIP_FLAG_SET_ACK, + .prefix = "RCAR-", + .init_dev_msi_info = msi_lib_init_dev_msi_info, }; static int rcar_allocate_domains(struct rcar_msi *msi) { struct rcar_pcie *pcie = &msi_to_host(msi)->pcie; - struct fwnode_handle *fwnode = dev_fwnode(pcie->dev); - struct irq_domain *parent; + struct irq_domain_info info = { + .fwnode = dev_fwnode(pcie->dev), + .ops = &rcar_msi_domain_ops, + .host_data = msi, + .size = INT_PCI_MSI_NR, + }; - parent = irq_domain_create_linear(fwnode, INT_PCI_MSI_NR, - &rcar_msi_domain_ops, msi); - if (!parent) { - dev_err(pcie->dev, "failed to create IRQ domain\n"); - return -ENOMEM; - } - irq_domain_update_bus_token(parent, DOMAIN_BUS_NEXUS); - - msi->domain = pci_msi_create_irq_domain(fwnode, &rcar_msi_info, parent); + msi->domain = msi_create_parent_irq_domain(&info, &rcar_msi_parent_ops); if (!msi->domain) { - dev_err(pcie->dev, "failed to create MSI domain\n"); - irq_domain_remove(parent); + dev_err(pcie->dev, "failed to create IRQ domain\n"); return -ENOMEM; } @@ -750,10 +733,7 @@ static int rcar_allocate_domains(struct rcar_msi *msi) static void rcar_free_domains(struct rcar_msi *msi) { - struct irq_domain *parent = msi->domain->parent; - irq_domain_remove(msi->domain); - irq_domain_remove(parent); } static int rcar_pcie_enable_msi(struct rcar_pcie_host *host) From d08c7e502c9f3212ff5d64903a75e26742b37f2c Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 26 Jun 2025 16:48:00 +0200 Subject: [PATCH 1007/2411] PCI: xilinx-xdma: Switch to msi_create_parent_irq_domain() Switch to msi_create_parent_irq_domain() from pci_msi_create_irq_domain() which was using legacy MSI domain setup. Signed-off-by: Nam Cao [mani: reworded commit message] Signed-off-by: Manivannan Sadhasivam [bhelgaas: rebase on dev_fwnode() conversion] Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Link: https://patch.msgid.link/b4620dc1808f217a69d0ae50700ffa12ffd657eb.1750858083.git.namcao@linutronix.de --- drivers/pci/controller/Kconfig | 1 + drivers/pci/controller/pcie-xilinx-dma-pl.c | 47 +++++++++------------ 2 files changed, 21 insertions(+), 27 deletions(-) diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 8b9492e9ae69..c9b618023973 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -331,6 +331,7 @@ config PCIE_XILINX_DMA_PL depends on ARCH_ZYNQMP || COMPILE_TEST depends on PCI_MSI select PCI_HOST_COMMON + select IRQ_MSI_LIB help Say 'Y' here if you want kernel support for the Xilinx PL DMA PCIe host bridge. The controller is a Soft IP which can act as diff --git a/drivers/pci/controller/pcie-xilinx-dma-pl.c b/drivers/pci/controller/pcie-xilinx-dma-pl.c index e044715b7f6a..b037c8f315e4 100644 --- a/drivers/pci/controller/pcie-xilinx-dma-pl.c +++ b/drivers/pci/controller/pcie-xilinx-dma-pl.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -90,7 +91,6 @@ struct xilinx_pl_dma_variant { }; struct xilinx_msi { - struct irq_domain *msi_domain; unsigned long *bitmap; struct irq_domain *dev_domain; struct mutex lock; /* Protect bitmap variable */ @@ -373,20 +373,20 @@ static irqreturn_t xilinx_pl_dma_pcie_intr_handler(int irq, void *dev_id) return IRQ_HANDLED; } -static struct irq_chip xilinx_msi_irq_chip = { - .name = "pl_dma:PCIe MSI", - .irq_enable = pci_msi_unmask_irq, - .irq_disable = pci_msi_mask_irq, - .irq_mask = pci_msi_mask_irq, - .irq_unmask = pci_msi_unmask_irq, -}; +#define XILINX_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ + MSI_FLAG_USE_DEF_CHIP_OPS | \ + MSI_FLAG_NO_AFFINITY) -static struct msi_domain_info xilinx_msi_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_NO_AFFINITY | MSI_FLAG_MULTI_PCI_MSI, - .chip = &xilinx_msi_irq_chip, -}; +#define XILINX_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \ + MSI_FLAG_MULTI_PCI_MSI) +static const struct msi_parent_ops xilinx_msi_parent_ops = { + .required_flags = XILINX_MSI_FLAGS_REQUIRED, + .supported_flags = XILINX_MSI_FLAGS_SUPPORTED, + .bus_select_token = DOMAIN_BUS_PCI_MSI, + .prefix = "pl_dma-", + .init_dev_msi_info = msi_lib_init_dev_msi_info, +}; static void xilinx_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { struct pl_dma_pcie *pcie = irq_data_get_irq_chip_data(data); @@ -458,11 +458,6 @@ static void xilinx_pl_dma_pcie_free_irq_domains(struct pl_dma_pcie *port) irq_domain_remove(msi->dev_domain); msi->dev_domain = NULL; } - - if (msi->msi_domain) { - irq_domain_remove(msi->msi_domain); - msi->msi_domain = NULL; - } } static int xilinx_pl_dma_pcie_init_msi_irq_domain(struct pl_dma_pcie *port) @@ -470,19 +465,17 @@ static int xilinx_pl_dma_pcie_init_msi_irq_domain(struct pl_dma_pcie *port) struct device *dev = port->dev; struct xilinx_msi *msi = &port->msi; int size = BITS_TO_LONGS(XILINX_NUM_MSI_IRQS) * sizeof(long); - struct fwnode_handle *fwnode = dev_fwnode(port->dev); + struct irq_domain_info info = { + .fwnode = dev_fwnode(port->dev), + .ops = &dev_msi_domain_ops, + .host_data = port, + .size = XILINX_NUM_MSI_IRQS, + }; - msi->dev_domain = irq_domain_create_linear(NULL, XILINX_NUM_MSI_IRQS, - &dev_msi_domain_ops, port); + msi->dev_domain = msi_create_parent_irq_domain(&info, &xilinx_msi_parent_ops); if (!msi->dev_domain) goto out; - msi->msi_domain = pci_msi_create_irq_domain(fwnode, - &xilinx_msi_domain_info, - msi->dev_domain); - if (!msi->msi_domain) - goto out; - mutex_init(&msi->lock); msi->bitmap = kzalloc(size, GFP_KERNEL); if (!msi->bitmap) From 710a1494e157f2d59876761f51de0a4a4c75b2af Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 26 Jun 2025 16:48:01 +0200 Subject: [PATCH 1008/2411] PCI: xilinx-nwl: Switch to msi_create_parent_irq_domain() Switch to msi_create_parent_irq_domain() from pci_msi_create_irq_domain() which was using legacy MSI domain setup. Signed-off-by: Nam Cao [mani: reworded commit message] Signed-off-by: Manivannan Sadhasivam [bhelgaas: rebase on dev_fwnode() conversion, drop fwnode local var] Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Link: https://patch.msgid.link/5ac6e216bf2eaa438c8854baf2ff3e5cf0b2284f.1750858083.git.namcao@linutronix.de --- drivers/pci/controller/Kconfig | 1 + drivers/pci/controller/pcie-xilinx-nwl.c | 44 ++++++++++++------------ 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index c9b618023973..118ff622fa69 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -342,6 +342,7 @@ config PCIE_XILINX_NWL bool "Xilinx NWL PCIe controller" depends on ARCH_ZYNQMP || COMPILE_TEST depends on PCI_MSI + select IRQ_MSI_LIB help Say 'Y' here if you want kernel support for Xilinx NWL PCIe controller. The controller can act as Root Port diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c index eda87e917430..05b8c205493c 100644 --- a/drivers/pci/controller/pcie-xilinx-nwl.c +++ b/drivers/pci/controller/pcie-xilinx-nwl.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -145,7 +146,6 @@ #define LINK_WAIT_USLEEP_MAX 100000 struct nwl_msi { /* MSI information */ - struct irq_domain *msi_domain; DECLARE_BITMAP(bitmap, INT_PCI_MSI_NR); struct irq_domain *dev_domain; struct mutex lock; /* protect bitmap variable */ @@ -418,19 +418,22 @@ static const struct irq_domain_ops intx_domain_ops = { }; #ifdef CONFIG_PCI_MSI -static struct irq_chip nwl_msi_irq_chip = { - .name = "nwl_pcie:msi", - .irq_enable = pci_msi_unmask_irq, - .irq_disable = pci_msi_mask_irq, - .irq_mask = pci_msi_mask_irq, - .irq_unmask = pci_msi_unmask_irq, + +#define NWL_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ + MSI_FLAG_USE_DEF_CHIP_OPS | \ + MSI_FLAG_NO_AFFINITY) + +#define NWL_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \ + MSI_FLAG_MULTI_PCI_MSI) + +static const struct msi_parent_ops nwl_msi_parent_ops = { + .required_flags = NWL_MSI_FLAGS_REQUIRED, + .supported_flags = NWL_MSI_FLAGS_SUPPORTED, + .bus_select_token = DOMAIN_BUS_PCI_MSI, + .prefix = "nwl-", + .init_dev_msi_info = msi_lib_init_dev_msi_info, }; -static struct msi_domain_info nwl_msi_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_NO_AFFINITY | MSI_FLAG_MULTI_PCI_MSI, - .chip = &nwl_msi_irq_chip, -}; #endif static void nwl_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) @@ -495,22 +498,19 @@ static int nwl_pcie_init_msi_irq_domain(struct nwl_pcie *pcie) { #ifdef CONFIG_PCI_MSI struct device *dev = pcie->dev; - struct fwnode_handle *fwnode = dev_fwnode(dev); struct nwl_msi *msi = &pcie->msi; + struct irq_domain_info info = { + .fwnode = dev_fwnode(dev), + .ops = &dev_msi_domain_ops, + .host_data = pcie, + .size = INT_PCI_MSI_NR, + }; - msi->dev_domain = irq_domain_create_linear(NULL, INT_PCI_MSI_NR, &dev_msi_domain_ops, pcie); + msi->dev_domain = msi_create_parent_irq_domain(&info, &nwl_msi_parent_ops); if (!msi->dev_domain) { dev_err(dev, "failed to create dev IRQ domain\n"); return -ENOMEM; } - msi->msi_domain = pci_msi_create_irq_domain(fwnode, - &nwl_msi_domain_info, - msi->dev_domain); - if (!msi->msi_domain) { - dev_err(dev, "failed to create msi IRQ domain\n"); - irq_domain_remove(msi->dev_domain); - return -ENOMEM; - } #endif return 0; } From f29861aa301c5333ad0a64d41de43e169aa9ac15 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 26 Jun 2025 16:48:02 +0200 Subject: [PATCH 1009/2411] PCI: xilinx: Switch to msi_create_parent_irq_domain() Switch to msi_create_parent_irq_domain() from pci_msi_create_irq_domain() which was using legacy MSI domain setup. Signed-off-by: Nam Cao [mani: reworded commit message] Signed-off-by: Manivannan Sadhasivam [bhelgaas: rebase on dev_fwnode() conversion, drop fwnode local var] Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Link: https://patch.msgid.link/b1353c797ce53714c22823de3bd2ae3d09fcd84f.1750858083.git.namcao@linutronix.de --- drivers/pci/controller/Kconfig | 1 + drivers/pci/controller/pcie-xilinx.c | 54 +++++++++++++++------------- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 118ff622fa69..8f56ffd029ba 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -322,6 +322,7 @@ config PCIE_XILINX bool "Xilinx AXI PCIe controller" depends on OF depends on PCI_MSI + select IRQ_MSI_LIB help Say 'Y' here if you want kernel to support the Xilinx AXI PCIe Host Bridge driver. diff --git a/drivers/pci/controller/pcie-xilinx.c b/drivers/pci/controller/pcie-xilinx.c index e36aa874bae9..f121836c3cf4 100644 --- a/drivers/pci/controller/pcie-xilinx.c +++ b/drivers/pci/controller/pcie-xilinx.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -203,11 +204,6 @@ static void xilinx_msi_top_irq_ack(struct irq_data *d) */ } -static struct irq_chip xilinx_msi_top_chip = { - .name = "PCIe MSI", - .irq_ack = xilinx_msi_top_irq_ack, -}; - static void xilinx_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { struct xilinx_pcie *pcie = irq_data_get_irq_chip_data(data); @@ -264,29 +260,42 @@ static const struct irq_domain_ops xilinx_msi_domain_ops = { .free = xilinx_msi_domain_free, }; -static struct msi_domain_info xilinx_msi_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_NO_AFFINITY, - .chip = &xilinx_msi_top_chip, +static bool xilinx_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *real_parent, struct msi_domain_info *info) +{ + struct irq_chip *chip = info->chip; + + if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) + return false; + + chip->irq_ack = xilinx_msi_top_irq_ack; + return true; +} + +#define XILINX_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ + MSI_FLAG_USE_DEF_CHIP_OPS | \ + MSI_FLAG_NO_AFFINITY) + +static const struct msi_parent_ops xilinx_msi_parent_ops = { + .required_flags = XILINX_MSI_FLAGS_REQUIRED, + .supported_flags = MSI_GENERIC_FLAGS_MASK, + .bus_select_token = DOMAIN_BUS_PCI_MSI, + .prefix = "xilinx-", + .init_dev_msi_info = xilinx_init_dev_msi_info, }; static int xilinx_allocate_msi_domains(struct xilinx_pcie *pcie) { - struct fwnode_handle *fwnode = dev_fwnode(pcie->dev); - struct irq_domain *parent; + struct irq_domain_info info = { + .fwnode = dev_fwnode(pcie->dev), + .ops = &xilinx_msi_domain_ops, + .host_data = pcie, + .size = XILINX_NUM_MSI_IRQS, + }; - parent = irq_domain_create_linear(fwnode, XILINX_NUM_MSI_IRQS, - &xilinx_msi_domain_ops, pcie); - if (!parent) { - dev_err(pcie->dev, "failed to create IRQ domain\n"); - return -ENOMEM; - } - irq_domain_update_bus_token(parent, DOMAIN_BUS_NEXUS); - - pcie->msi_domain = pci_msi_create_irq_domain(fwnode, &xilinx_msi_info, parent); + pcie->msi_domain = msi_create_parent_irq_domain(&info, &xilinx_msi_parent_ops); if (!pcie->msi_domain) { dev_err(pcie->dev, "failed to create MSI domain\n"); - irq_domain_remove(parent); return -ENOMEM; } @@ -295,10 +304,7 @@ static int xilinx_allocate_msi_domains(struct xilinx_pcie *pcie) static void xilinx_free_msi_domains(struct xilinx_pcie *pcie) { - struct irq_domain *parent = pcie->msi_domain->parent; - irq_domain_remove(pcie->msi_domain); - irq_domain_remove(parent); } /* INTx Functions */ From d7703cf5c40210f54cfd7a642576895e1eb80a15 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 26 Jun 2025 16:48:03 +0200 Subject: [PATCH 1010/2411] PCI: plda: Switch to msi_create_parent_irq_domain() Switch to msi_create_parent_irq_domain() from pci_msi_create_irq_domain() which was using legacy MSI domain setup. Signed-off-by: Nam Cao [mani: reworded commit message] Signed-off-by: Manivannan Sadhasivam [bhelgaas: rebase on dev_fwnode() conversion, drop fwnode local var] Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Link: https://patch.msgid.link/1279fe6500a1d8135d8f5feb2f055df008746c88.1750858083.git.namcao@linutronix.de --- drivers/pci/controller/plda/Kconfig | 1 + drivers/pci/controller/plda/pcie-plda-host.c | 43 ++++++++++---------- drivers/pci/controller/plda/pcie-plda.h | 1 - 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/drivers/pci/controller/plda/Kconfig b/drivers/pci/controller/plda/Kconfig index c0e14146d7e4..62120101139c 100644 --- a/drivers/pci/controller/plda/Kconfig +++ b/drivers/pci/controller/plda/Kconfig @@ -5,6 +5,7 @@ menu "PLDA-based PCIe controllers" config PCIE_PLDA_HOST bool + select IRQ_MSI_LIB config PCIE_MICROCHIP_HOST tristate "Microchip AXI PCIe controller" diff --git a/drivers/pci/controller/plda/pcie-plda-host.c b/drivers/pci/controller/plda/pcie-plda-host.c index fdf9ec110e7a..8e2db2e5b64b 100644 --- a/drivers/pci/controller/plda/pcie-plda-host.c +++ b/drivers/pci/controller/plda/pcie-plda-host.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -134,42 +135,41 @@ static const struct irq_domain_ops msi_domain_ops = { .free = plda_irq_msi_domain_free, }; -static struct irq_chip plda_msi_irq_chip = { - .name = "PLDA PCIe MSI", - .irq_ack = irq_chip_ack_parent, - .irq_mask = pci_msi_mask_irq, - .irq_unmask = pci_msi_unmask_irq, -}; +#define PLDA_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ + MSI_FLAG_USE_DEF_CHIP_OPS | \ + MSI_FLAG_NO_AFFINITY) +#define PLDA_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \ + MSI_FLAG_PCI_MSIX) -static struct msi_domain_info plda_msi_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_NO_AFFINITY | MSI_FLAG_PCI_MSIX, - .chip = &plda_msi_irq_chip, +static const struct msi_parent_ops plda_msi_parent_ops = { + .required_flags = PLDA_MSI_FLAGS_REQUIRED, + .supported_flags = PLDA_MSI_FLAGS_SUPPORTED, + .chip_flags = MSI_CHIP_FLAG_SET_ACK, + .bus_select_token = DOMAIN_BUS_PCI_MSI, + .prefix = "PLDA-", + .init_dev_msi_info = msi_lib_init_dev_msi_info, }; static int plda_allocate_msi_domains(struct plda_pcie_rp *port) { struct device *dev = port->dev; - struct fwnode_handle *fwnode = dev_fwnode(dev); struct plda_msi *msi = &port->msi; mutex_init(&port->msi.lock); - msi->dev_domain = irq_domain_create_linear(NULL, msi->num_vectors, &msi_domain_ops, port); + struct irq_domain_info info = { + .fwnode = dev_fwnode(dev), + .ops = &msi_domain_ops, + .host_data = port, + .size = msi->num_vectors, + }; + + msi->dev_domain = msi_create_parent_irq_domain(&info, &plda_msi_parent_ops); if (!msi->dev_domain) { dev_err(dev, "failed to create IRQ domain\n"); return -ENOMEM; } - msi->msi_domain = pci_msi_create_irq_domain(fwnode, - &plda_msi_domain_info, - msi->dev_domain); - if (!msi->msi_domain) { - dev_err(dev, "failed to create MSI domain\n"); - irq_domain_remove(msi->dev_domain); - return -ENOMEM; - } - return 0; } @@ -563,7 +563,6 @@ static void plda_pcie_irq_domain_deinit(struct plda_pcie_rp *pcie) irq_set_chained_handler_and_data(pcie->msi_irq, NULL, NULL); irq_set_chained_handler_and_data(pcie->intx_irq, NULL, NULL); - irq_domain_remove(pcie->msi.msi_domain); irq_domain_remove(pcie->msi.dev_domain); irq_domain_remove(pcie->intx_domain); diff --git a/drivers/pci/controller/plda/pcie-plda.h b/drivers/pci/controller/plda/pcie-plda.h index 61ece26065ea..6b8665df7bf0 100644 --- a/drivers/pci/controller/plda/pcie-plda.h +++ b/drivers/pci/controller/plda/pcie-plda.h @@ -164,7 +164,6 @@ struct plda_pcie_host_ops { struct plda_msi { struct mutex lock; /* Protect used bitmap */ - struct irq_domain *msi_domain; struct irq_domain *dev_domain; u32 num_vectors; u64 vector_phy; From 63984ea71a6caf9a823e7301ca60942fbc511497 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 26 Jun 2025 16:48:05 +0200 Subject: [PATCH 1011/2411] PCI: vmd: Convert to lock guards Convert lock/unlock pairs to lock guard and tidy up the code. Signed-off-by: Nam Cao Signed-off-by: Manivannan Sadhasivam [bhelgaas: rebase on dev_fwnode() conversion] Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Link: https://patch.msgid.link/836cca37449c70922a2bea1fb13f37940a7a7132.1750858083.git.namcao@linutronix.de --- drivers/pci/controller/vmd.c | 73 ++++++++++++++---------------------- 1 file changed, 29 insertions(+), 44 deletions(-) diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 8df064b62a2f..f922fa445aef 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -180,13 +180,12 @@ static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) static void vmd_irq_enable(struct irq_data *data) { struct vmd_irq *vmdirq = data->chip_data; - unsigned long flags; - raw_spin_lock_irqsave(&list_lock, flags); - WARN_ON(vmdirq->enabled); - list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list); - vmdirq->enabled = true; - raw_spin_unlock_irqrestore(&list_lock, flags); + scoped_guard(raw_spinlock_irqsave, &list_lock) { + WARN_ON(vmdirq->enabled); + list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list); + vmdirq->enabled = true; + } data->chip->irq_unmask(data); } @@ -194,16 +193,15 @@ static void vmd_irq_enable(struct irq_data *data) static void vmd_irq_disable(struct irq_data *data) { struct vmd_irq *vmdirq = data->chip_data; - unsigned long flags; data->chip->irq_mask(data); - raw_spin_lock_irqsave(&list_lock, flags); - if (vmdirq->enabled) { - list_del_rcu(&vmdirq->node); - vmdirq->enabled = false; + scoped_guard(raw_spinlock_irqsave, &list_lock) { + if (vmdirq->enabled) { + list_del_rcu(&vmdirq->node); + vmdirq->enabled = false; + } } - raw_spin_unlock_irqrestore(&list_lock, flags); } static struct irq_chip vmd_msi_controller = { @@ -225,7 +223,6 @@ static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info, */ static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc) { - unsigned long flags; int i, best; if (vmd->msix_count == 1 + vmd->first_vec) @@ -242,13 +239,13 @@ static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *d return &vmd->irqs[vmd->first_vec]; } - raw_spin_lock_irqsave(&list_lock, flags); - best = vmd->first_vec + 1; - for (i = best; i < vmd->msix_count; i++) - if (vmd->irqs[i].count < vmd->irqs[best].count) - best = i; - vmd->irqs[best].count++; - raw_spin_unlock_irqrestore(&list_lock, flags); + scoped_guard(raw_spinlock_irq, &list_lock) { + best = vmd->first_vec + 1; + for (i = best; i < vmd->msix_count; i++) + if (vmd->irqs[i].count < vmd->irqs[best].count) + best = i; + vmd->irqs[best].count++; + } return &vmd->irqs[best]; } @@ -277,14 +274,12 @@ static void vmd_msi_free(struct irq_domain *domain, struct msi_domain_info *info, unsigned int virq) { struct vmd_irq *vmdirq = irq_get_chip_data(virq); - unsigned long flags; synchronize_srcu(&vmdirq->irq->srcu); /* XXX: Potential optimization to rebalance */ - raw_spin_lock_irqsave(&list_lock, flags); - vmdirq->irq->count--; - raw_spin_unlock_irqrestore(&list_lock, flags); + scoped_guard(raw_spinlock_irq, &list_lock) + vmdirq->irq->count--; kfree(vmdirq); } @@ -387,29 +382,24 @@ static int vmd_pci_read(struct pci_bus *bus, unsigned int devfn, int reg, { struct vmd_dev *vmd = vmd_from_bus(bus); void __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len); - unsigned long flags; - int ret = 0; if (!addr) return -EFAULT; - raw_spin_lock_irqsave(&vmd->cfg_lock, flags); + guard(raw_spinlock_irqsave)(&vmd->cfg_lock); switch (len) { case 1: *value = readb(addr); - break; + return 0; case 2: *value = readw(addr); - break; + return 0; case 4: *value = readl(addr); - break; + return 0; default: - ret = -EINVAL; - break; + return -EINVAL; } - raw_spin_unlock_irqrestore(&vmd->cfg_lock, flags); - return ret; } /* @@ -422,32 +412,27 @@ static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg, { struct vmd_dev *vmd = vmd_from_bus(bus); void __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len); - unsigned long flags; - int ret = 0; if (!addr) return -EFAULT; - raw_spin_lock_irqsave(&vmd->cfg_lock, flags); + guard(raw_spinlock_irqsave)(&vmd->cfg_lock); switch (len) { case 1: writeb(value, addr); readb(addr); - break; + return 0; case 2: writew(value, addr); readw(addr); - break; + return 0; case 4: writel(value, addr); readl(addr); - break; + return 0; default: - ret = -EINVAL; - break; + return -EINVAL; } - raw_spin_unlock_irqrestore(&vmd->cfg_lock, flags); - return ret; } static struct pci_ops vmd_ops = { From d7d8ab87e3e7413e3ed2b6eee51ceaddc7e594f2 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 26 Jun 2025 16:48:06 +0200 Subject: [PATCH 1012/2411] PCI: vmd: Switch to msi_create_parent_irq_domain() Switch to msi_create_parent_irq_domain() from pci_msi_create_irq_domain() which was using legacy MSI domain setup. Signed-off-by: Nam Cao [mani: reworded commit message] Signed-off-by: Manivannan Sadhasivam [bhelgaas: rebase on dev_fwnode() conversion, wrap long lines, squash fix from https://lore.kernel.org/r/20250716201216.TsY3Kn45@linutronix.de] Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Link: https://patch.msgid.link/de3f1d737831b251e9cd2cbf9e4c732a5bbba13a.1750858083.git.namcao@linutronix.de --- drivers/pci/controller/Kconfig | 1 + drivers/pci/controller/vmd.c | 178 +++++++++++++++++---------------- 2 files changed, 95 insertions(+), 84 deletions(-) diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 8f56ffd029ba..41748d083b93 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -156,6 +156,7 @@ config PCI_IXP4XX config VMD depends on PCI_MSI && X86_64 && !UML tristate "Intel Volume Management Device Driver" + select IRQ_MSI_LIB help Adds support for the Intel Volume Management Device (VMD). VMD is a secondary PCI host bridge that allows PCI Express root ports, diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index f922fa445aef..50f0c91d561c 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -174,9 +175,6 @@ static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) msg->arch_addr_lo.destid_0_7 = index_from_irqs(vmd, irq); } -/* - * We rely on MSI_FLAG_USE_DEF_CHIP_OPS to set the IRQ mask/unmask ops. - */ static void vmd_irq_enable(struct irq_data *data) { struct vmd_irq *vmdirq = data->chip_data; @@ -186,7 +184,11 @@ static void vmd_irq_enable(struct irq_data *data) list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list); vmdirq->enabled = true; } +} +static void vmd_pci_msi_enable(struct irq_data *data) +{ + vmd_irq_enable(data->parent_data); data->chip->irq_unmask(data); } @@ -194,8 +196,6 @@ static void vmd_irq_disable(struct irq_data *data) { struct vmd_irq *vmdirq = data->chip_data; - data->chip->irq_mask(data); - scoped_guard(raw_spinlock_irqsave, &list_lock) { if (vmdirq->enabled) { list_del_rcu(&vmdirq->node); @@ -204,19 +204,17 @@ static void vmd_irq_disable(struct irq_data *data) } } +static void vmd_pci_msi_disable(struct irq_data *data) +{ + data->chip->irq_mask(data); + vmd_irq_disable(data->parent_data); +} + static struct irq_chip vmd_msi_controller = { .name = "VMD-MSI", - .irq_enable = vmd_irq_enable, - .irq_disable = vmd_irq_disable, .irq_compose_msi_msg = vmd_compose_msi_msg, }; -static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info, - msi_alloc_info_t *arg) -{ - return 0; -} - /* * XXX: We can be even smarter selecting the best IRQ once we solve the * affinity problem. @@ -250,73 +248,108 @@ static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *d return &vmd->irqs[best]; } -static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info, - unsigned int virq, irq_hw_number_t hwirq, - msi_alloc_info_t *arg) +static void vmd_msi_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs); + +static int vmd_msi_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) { - struct msi_desc *desc = arg->desc; - struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus); - struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL); + struct msi_desc *desc = ((msi_alloc_info_t *)arg)->desc; + struct vmd_dev *vmd = domain->host_data; + struct vmd_irq *vmdirq; - if (!vmdirq) - return -ENOMEM; + for (int i = 0; i < nr_irqs; ++i) { + vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL); + if (!vmdirq) { + vmd_msi_free(domain, virq, i); + return -ENOMEM; + } - INIT_LIST_HEAD(&vmdirq->node); - vmdirq->irq = vmd_next_irq(vmd, desc); - vmdirq->virq = virq; + INIT_LIST_HEAD(&vmdirq->node); + vmdirq->irq = vmd_next_irq(vmd, desc); + vmdirq->virq = virq + i; + + irq_domain_set_info(domain, virq + i, vmdirq->irq->virq, + &vmd_msi_controller, vmdirq, + handle_untracked_irq, vmd, NULL); + } - irq_domain_set_info(domain, virq, vmdirq->irq->virq, info->chip, vmdirq, - handle_untracked_irq, vmd, NULL); return 0; } -static void vmd_msi_free(struct irq_domain *domain, - struct msi_domain_info *info, unsigned int virq) +static void vmd_msi_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) { - struct vmd_irq *vmdirq = irq_get_chip_data(virq); + struct vmd_irq *vmdirq; - synchronize_srcu(&vmdirq->irq->srcu); + for (int i = 0; i < nr_irqs; ++i) { + vmdirq = irq_get_chip_data(virq + i); - /* XXX: Potential optimization to rebalance */ - scoped_guard(raw_spinlock_irq, &list_lock) - vmdirq->irq->count--; + synchronize_srcu(&vmdirq->irq->srcu); - kfree(vmdirq); + /* XXX: Potential optimization to rebalance */ + scoped_guard(raw_spinlock_irq, &list_lock) + vmdirq->irq->count--; + + kfree(vmdirq); + } } -static int vmd_msi_prepare(struct irq_domain *domain, struct device *dev, - int nvec, msi_alloc_info_t *arg) +static const struct irq_domain_ops vmd_msi_domain_ops = { + .alloc = vmd_msi_alloc, + .free = vmd_msi_free, +}; + +static bool vmd_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *real_parent, + struct msi_domain_info *info) { - struct pci_dev *pdev = to_pci_dev(dev); - struct vmd_dev *vmd = vmd_from_bus(pdev->bus); + if (WARN_ON_ONCE(info->bus_token != DOMAIN_BUS_PCI_DEVICE_MSIX)) + return false; - if (nvec > vmd->msix_count) - return vmd->msix_count; + if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) + return false; + + info->chip->irq_enable = vmd_pci_msi_enable; + info->chip->irq_disable = vmd_pci_msi_disable; + return true; +} + +#define VMD_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX) +#define VMD_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_NO_AFFINITY) + +static const struct msi_parent_ops vmd_msi_parent_ops = { + .supported_flags = VMD_MSI_FLAGS_SUPPORTED, + .required_flags = VMD_MSI_FLAGS_REQUIRED, + .bus_select_token = DOMAIN_BUS_VMD_MSI, + .bus_select_mask = MATCH_PCI_MSI, + .prefix = "VMD-", + .init_dev_msi_info = vmd_init_dev_msi_info, +}; + +static int vmd_create_irq_domain(struct vmd_dev *vmd) +{ + struct irq_domain_info info = { + .size = vmd->msix_count, + .ops = &vmd_msi_domain_ops, + .host_data = vmd, + }; + + info.fwnode = irq_domain_alloc_named_id_fwnode("VMD-MSI", + vmd->sysdata.domain); + if (!info.fwnode) + return -ENODEV; + + vmd->irq_domain = msi_create_parent_irq_domain(&info, + &vmd_msi_parent_ops); + if (!vmd->irq_domain) { + irq_domain_free_fwnode(info.fwnode); + return -ENODEV; + } - memset(arg, 0, sizeof(*arg)); return 0; } -static void vmd_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) -{ - arg->desc = desc; -} - -static struct msi_domain_ops vmd_msi_domain_ops = { - .get_hwirq = vmd_get_hwirq, - .msi_init = vmd_msi_init, - .msi_free = vmd_msi_free, - .msi_prepare = vmd_msi_prepare, - .set_desc = vmd_set_desc, -}; - -static struct msi_domain_info vmd_msi_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_NO_AFFINITY | MSI_FLAG_PCI_MSIX, - .ops = &vmd_msi_domain_ops, - .chip = &vmd_msi_controller, -}; - static void vmd_set_msi_remapping(struct vmd_dev *vmd, bool enable) { u16 reg; @@ -327,23 +360,6 @@ static void vmd_set_msi_remapping(struct vmd_dev *vmd, bool enable) pci_write_config_word(vmd->dev, PCI_REG_VMCONFIG, reg); } -static int vmd_create_irq_domain(struct vmd_dev *vmd) -{ - struct fwnode_handle *fn; - - fn = irq_domain_alloc_named_id_fwnode("VMD-MSI", vmd->sysdata.domain); - if (!fn) - return -ENODEV; - - vmd->irq_domain = pci_msi_create_irq_domain(fn, &vmd_msi_domain_info, NULL); - if (!vmd->irq_domain) { - irq_domain_free_fwnode(fn); - return -ENODEV; - } - - return 0; -} - static void vmd_remove_irq_domain(struct vmd_dev *vmd) { /* @@ -874,12 +890,6 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) ret = vmd_create_irq_domain(vmd); if (ret) return ret; - - /* - * Override the IRQ domain bus token so the domain can be - * distinguished from a regular PCI/MSI domain. - */ - irq_domain_update_bus_token(vmd->irq_domain, DOMAIN_BUS_VMD_MSI); } else { vmd_set_msi_remapping(vmd, false); } From 467d9c0348d6fd37b3d3a82e46c113ee9228d84b Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Sun, 4 May 2025 08:44:19 +0800 Subject: [PATCH 1013/2411] PCI: dwc: Add Sophgo SG2044 PCIe controller driver in Root Complex mode Add driver support for DesignWare based PCIe controller in SG2044 SoC. The driver currently supports the Root Complex mode. Signed-off-by: Inochi Amaoto [mani: renamed the driver to 'pcie-sophgo.c' and Kconfig fix] Signed-off-by: Manivannan Sadhasivam [bhelgaas: whitespace] Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250504004420.202685-3-inochiama@gmail.com --- drivers/pci/controller/dwc/Kconfig | 10 + drivers/pci/controller/dwc/Makefile | 1 + drivers/pci/controller/dwc/pcie-sophgo.c | 257 +++++++++++++++++++++++ 3 files changed, 268 insertions(+) create mode 100644 drivers/pci/controller/dwc/pcie-sophgo.c diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig index d9f0386396ed..bb95877b2c6c 100644 --- a/drivers/pci/controller/dwc/Kconfig +++ b/drivers/pci/controller/dwc/Kconfig @@ -402,6 +402,16 @@ config PCIE_UNIPHIER_EP Say Y here if you want PCIe endpoint controller support on UniPhier SoCs. This driver supports Pro5 SoC. +config PCIE_SOPHGO_DW + bool "Sophgo DesignWare PCIe controller (host mode)" + depends on ARCH_SOPHGO || COMPILE_TEST + depends on PCI_MSI + depends on OF + select PCIE_DW_HOST + help + Say Y here if you want PCIe host controller support on + Sophgo SoCs. + config PCIE_SPEAR13XX bool "STMicroelectronics SPEAr PCIe controller" depends on ARCH_SPEAR13XX || COMPILE_TEST diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile index 908cb7f345db..6919d27798d1 100644 --- a/drivers/pci/controller/dwc/Makefile +++ b/drivers/pci/controller/dwc/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_PCIE_QCOM_EP) += pcie-qcom-ep.o obj-$(CONFIG_PCIE_ARMADA_8K) += pcie-armada8k.o obj-$(CONFIG_PCIE_ARTPEC6) += pcie-artpec6.o obj-$(CONFIG_PCIE_ROCKCHIP_DW) += pcie-dw-rockchip.o +obj-$(CONFIG_PCIE_SOPHGO_DW) += pcie-sophgo.o obj-$(CONFIG_PCIE_INTEL_GW) += pcie-intel-gw.o obj-$(CONFIG_PCIE_KEEMBAY) += pcie-keembay.o obj-$(CONFIG_PCIE_KIRIN) += pcie-kirin.o diff --git a/drivers/pci/controller/dwc/pcie-sophgo.c b/drivers/pci/controller/dwc/pcie-sophgo.c new file mode 100644 index 000000000000..ad4baaa34ffa --- /dev/null +++ b/drivers/pci/controller/dwc/pcie-sophgo.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Sophgo DesignWare based PCIe host controller driver + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "pcie-designware.h" + +#define to_sophgo_pcie(x) dev_get_drvdata((x)->dev) + +#define PCIE_INT_SIGNAL 0xc48 +#define PCIE_INT_EN 0xca0 + +#define PCIE_INT_SIGNAL_INTX GENMASK(8, 5) + +#define PCIE_INT_EN_INTX GENMASK(4, 1) +#define PCIE_INT_EN_INT_MSI BIT(5) + +struct sophgo_pcie { + struct dw_pcie pci; + void __iomem *app_base; + struct clk_bulk_data *clks; + unsigned int clk_cnt; + struct irq_domain *irq_domain; +}; + +static int sophgo_pcie_readl_app(struct sophgo_pcie *sophgo, u32 reg) +{ + return readl_relaxed(sophgo->app_base + reg); +} + +static void sophgo_pcie_writel_app(struct sophgo_pcie *sophgo, u32 val, u32 reg) +{ + writel_relaxed(val, sophgo->app_base + reg); +} + +static void sophgo_pcie_intx_handler(struct irq_desc *desc) +{ + struct dw_pcie_rp *pp = irq_desc_get_handler_data(desc); + struct irq_chip *chip = irq_desc_get_chip(desc); + struct dw_pcie *pci = to_dw_pcie_from_pp(pp); + struct sophgo_pcie *sophgo = to_sophgo_pcie(pci); + unsigned long hwirq, reg; + + chained_irq_enter(chip, desc); + + reg = sophgo_pcie_readl_app(sophgo, PCIE_INT_SIGNAL); + reg = FIELD_GET(PCIE_INT_SIGNAL_INTX, reg); + + for_each_set_bit(hwirq, ®, PCI_NUM_INTX) + generic_handle_domain_irq(sophgo->irq_domain, hwirq); + + chained_irq_exit(chip, desc); +} + +static void sophgo_intx_irq_mask(struct irq_data *d) +{ + struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(d); + struct dw_pcie *pci = to_dw_pcie_from_pp(pp); + struct sophgo_pcie *sophgo = to_sophgo_pcie(pci); + unsigned long flags; + u32 val; + + raw_spin_lock_irqsave(&pp->lock, flags); + + val = sophgo_pcie_readl_app(sophgo, PCIE_INT_EN); + val &= ~FIELD_PREP(PCIE_INT_EN_INTX, BIT(d->hwirq)); + sophgo_pcie_writel_app(sophgo, val, PCIE_INT_EN); + + raw_spin_unlock_irqrestore(&pp->lock, flags); +}; + +static void sophgo_intx_irq_unmask(struct irq_data *d) +{ + struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(d); + struct dw_pcie *pci = to_dw_pcie_from_pp(pp); + struct sophgo_pcie *sophgo = to_sophgo_pcie(pci); + unsigned long flags; + u32 val; + + raw_spin_lock_irqsave(&pp->lock, flags); + + val = sophgo_pcie_readl_app(sophgo, PCIE_INT_EN); + val |= FIELD_PREP(PCIE_INT_EN_INTX, BIT(d->hwirq)); + sophgo_pcie_writel_app(sophgo, val, PCIE_INT_EN); + + raw_spin_unlock_irqrestore(&pp->lock, flags); +}; + +static struct irq_chip sophgo_intx_irq_chip = { + .name = "INTx", + .irq_mask = sophgo_intx_irq_mask, + .irq_unmask = sophgo_intx_irq_unmask, +}; + +static int sophgo_pcie_intx_map(struct irq_domain *domain, unsigned int irq, + irq_hw_number_t hwirq) +{ + irq_set_chip_and_handler(irq, &sophgo_intx_irq_chip, handle_level_irq); + irq_set_chip_data(irq, domain->host_data); + + return 0; +} + +static const struct irq_domain_ops intx_domain_ops = { + .map = sophgo_pcie_intx_map, +}; + +static int sophgo_pcie_init_irq_domain(struct dw_pcie_rp *pp) +{ + struct dw_pcie *pci = to_dw_pcie_from_pp(pp); + struct sophgo_pcie *sophgo = to_sophgo_pcie(pci); + struct device *dev = sophgo->pci.dev; + struct fwnode_handle *intc; + int irq; + + intc = device_get_named_child_node(dev, "interrupt-controller"); + if (!intc) { + dev_err(dev, "missing child interrupt-controller node\n"); + return -ENODEV; + } + + irq = fwnode_irq_get(intc, 0); + if (irq < 0) { + dev_err(dev, "failed to get INTx irq number\n"); + fwnode_handle_put(intc); + return irq; + } + + sophgo->irq_domain = irq_domain_create_linear(intc, PCI_NUM_INTX, + &intx_domain_ops, pp); + fwnode_handle_put(intc); + if (!sophgo->irq_domain) { + dev_err(dev, "failed to get a INTx irq domain\n"); + return -EINVAL; + } + + return irq; +} + +static void sophgo_pcie_msi_enable(struct dw_pcie_rp *pp) +{ + struct dw_pcie *pci = to_dw_pcie_from_pp(pp); + struct sophgo_pcie *sophgo = to_sophgo_pcie(pci); + unsigned long flags; + u32 val; + + raw_spin_lock_irqsave(&pp->lock, flags); + + val = sophgo_pcie_readl_app(sophgo, PCIE_INT_EN); + val |= PCIE_INT_EN_INT_MSI; + sophgo_pcie_writel_app(sophgo, val, PCIE_INT_EN); + + raw_spin_unlock_irqrestore(&pp->lock, flags); +} + +static int sophgo_pcie_host_init(struct dw_pcie_rp *pp) +{ + int irq; + + irq = sophgo_pcie_init_irq_domain(pp); + if (irq < 0) + return irq; + + irq_set_chained_handler_and_data(irq, sophgo_pcie_intx_handler, pp); + + sophgo_pcie_msi_enable(pp); + + return 0; +} + +static const struct dw_pcie_host_ops sophgo_pcie_host_ops = { + .init = sophgo_pcie_host_init, +}; + +static int sophgo_pcie_clk_init(struct sophgo_pcie *sophgo) +{ + struct device *dev = sophgo->pci.dev; + int ret; + + ret = devm_clk_bulk_get_all_enabled(dev, &sophgo->clks); + if (ret < 0) + return dev_err_probe(dev, ret, "failed to get clocks\n"); + + sophgo->clk_cnt = ret; + + return 0; +} + +static int sophgo_pcie_resource_get(struct platform_device *pdev, + struct sophgo_pcie *sophgo) +{ + sophgo->app_base = devm_platform_ioremap_resource_byname(pdev, "app"); + if (IS_ERR(sophgo->app_base)) + return dev_err_probe(&pdev->dev, PTR_ERR(sophgo->app_base), + "failed to map app registers\n"); + + return 0; +} + +static int sophgo_pcie_configure_rc(struct sophgo_pcie *sophgo) +{ + struct dw_pcie_rp *pp; + + pp = &sophgo->pci.pp; + pp->ops = &sophgo_pcie_host_ops; + + return dw_pcie_host_init(pp); +} + +static int sophgo_pcie_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct sophgo_pcie *sophgo; + int ret; + + sophgo = devm_kzalloc(dev, sizeof(*sophgo), GFP_KERNEL); + if (!sophgo) + return -ENOMEM; + + platform_set_drvdata(pdev, sophgo); + + sophgo->pci.dev = dev; + + ret = sophgo_pcie_resource_get(pdev, sophgo); + if (ret) + return ret; + + ret = sophgo_pcie_clk_init(sophgo); + if (ret) + return ret; + + return sophgo_pcie_configure_rc(sophgo); +} + +static const struct of_device_id sophgo_pcie_of_match[] = { + { .compatible = "sophgo,sg2044-pcie" }, + { } +}; +MODULE_DEVICE_TABLE(of, sophgo_pcie_of_match); + +static struct platform_driver sophgo_pcie_driver = { + .driver = { + .name = "sophgo-pcie", + .of_match_table = sophgo_pcie_of_match, + .suppress_bind_attrs = true, + }, + .probe = sophgo_pcie_probe, +}; +builtin_platform_driver(sophgo_pcie_driver); From 1c3b002c6bf684b445a7107609979bca5f21bc03 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 10 Jul 2025 15:13:49 -0400 Subject: [PATCH 1014/2411] PCI: endpoint: Add RC-to-EP doorbell support using platform MSI controller Implement the doorbell feature by mapping the EP's MSI interrupt controller message address to a dedicated BAR. The EPF driver should pass the actual message data to be written to the message address by the host through implementation-specific logic. Signed-off-by: Frank Li [mani: minor code cleanups and reworded commit message] Signed-off-by: Manivannan Sadhasivam [bhelgaas: fix kernel-doc] Signed-off-by: Bjorn Helgaas Tested-by: Niklas Cassel Link: https://patch.msgid.link/20250710-ep-msi-v21-3-57683fc7fb25@nxp.com --- drivers/pci/endpoint/Kconfig | 8 +++ drivers/pci/endpoint/Makefile | 1 + drivers/pci/endpoint/pci-ep-msi.c | 92 +++++++++++++++++++++++++++++++ include/linux/pci-ep-msi.h | 28 ++++++++++ include/linux/pci-epf.h | 15 +++++ 5 files changed, 144 insertions(+) create mode 100644 drivers/pci/endpoint/pci-ep-msi.c create mode 100644 include/linux/pci-ep-msi.h diff --git a/drivers/pci/endpoint/Kconfig b/drivers/pci/endpoint/Kconfig index 1c5d82eb57d4..8dad291be8b8 100644 --- a/drivers/pci/endpoint/Kconfig +++ b/drivers/pci/endpoint/Kconfig @@ -28,6 +28,14 @@ config PCI_ENDPOINT_CONFIGFS configure the endpoint function and used to bind the function with an endpoint controller. +config PCI_ENDPOINT_MSI_DOORBELL + bool "PCI Endpoint MSI Doorbell Support" + depends on PCI_ENDPOINT && GENERIC_MSI_IRQ + help + This enables the EP's MSI interrupt controller to function as a + doorbell. The RC can trigger doorbell in EP by writing data to a + dedicated BAR, which the EP maps to the controller's message address. + source "drivers/pci/endpoint/functions/Kconfig" endmenu diff --git a/drivers/pci/endpoint/Makefile b/drivers/pci/endpoint/Makefile index 95b2fe47e3b0..b4869d52053a 100644 --- a/drivers/pci/endpoint/Makefile +++ b/drivers/pci/endpoint/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_PCI_ENDPOINT_CONFIGFS) += pci-ep-cfs.o obj-$(CONFIG_PCI_ENDPOINT) += pci-epc-core.o pci-epf-core.o\ pci-epc-mem.o functions/ +obj-$(CONFIG_PCI_ENDPOINT_MSI_DOORBELL) += pci-ep-msi.o diff --git a/drivers/pci/endpoint/pci-ep-msi.c b/drivers/pci/endpoint/pci-ep-msi.c new file mode 100644 index 000000000000..95a47ce155ac --- /dev/null +++ b/drivers/pci/endpoint/pci-ep-msi.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCI Endpoint *Controller* (EPC) MSI library + * + * Copyright (C) 2025 NXP + * Author: Frank Li + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void pci_epf_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) +{ + struct pci_epc *epc; + struct pci_epf *epf; + + epc = pci_epc_get(dev_name(msi_desc_to_dev(desc))); + if (!epc) + return; + + epf = list_first_entry_or_null(&epc->pci_epf, struct pci_epf, list); + + if (epf && epf->db_msg && desc->msi_index < epf->num_db) + memcpy(&epf->db_msg[desc->msi_index].msg, msg, sizeof(*msg)); + + pci_epc_put(epc); +} + +int pci_epf_alloc_doorbell(struct pci_epf *epf, u16 num_db) +{ + struct pci_epc *epc = epf->epc; + struct device *dev = &epf->dev; + struct irq_domain *domain; + void *msg; + int ret; + int i; + + /* TODO: Multi-EPF support */ + if (list_first_entry_or_null(&epc->pci_epf, struct pci_epf, list) != epf) { + dev_err(dev, "MSI doorbell doesn't support multiple EPF\n"); + return -EINVAL; + } + + domain = of_msi_map_get_device_domain(epc->dev.parent, 0, + DOMAIN_BUS_PLATFORM_MSI); + if (!domain) { + dev_err(dev, "Can't find MSI domain for EPC\n"); + return -ENODEV; + } + + dev_set_msi_domain(epc->dev.parent, domain); + + msg = kcalloc(num_db, sizeof(struct pci_epf_doorbell_msg), GFP_KERNEL); + if (!msg) + return -ENOMEM; + + epf->num_db = num_db; + epf->db_msg = msg; + + ret = platform_device_msi_init_and_alloc_irqs(epc->dev.parent, num_db, + pci_epf_write_msi_msg); + if (ret) { + dev_err(dev, "Failed to allocate MSI\n"); + kfree(msg); + return ret; + } + + for (i = 0; i < num_db; i++) + epf->db_msg[i].virq = msi_get_virq(epc->dev.parent, i); + + return ret; +} +EXPORT_SYMBOL_GPL(pci_epf_alloc_doorbell); + +void pci_epf_free_doorbell(struct pci_epf *epf) +{ + platform_device_msi_free_irqs_all(epf->epc->dev.parent); + + kfree(epf->db_msg); + epf->db_msg = NULL; + epf->num_db = 0; +} +EXPORT_SYMBOL_GPL(pci_epf_free_doorbell); diff --git a/include/linux/pci-ep-msi.h b/include/linux/pci-ep-msi.h new file mode 100644 index 000000000000..7c5db90f9620 --- /dev/null +++ b/include/linux/pci-ep-msi.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * PCI Endpoint *Function* side MSI header file + * + * Copyright (C) 2024 NXP + * Author: Frank Li + */ + +#ifndef __PCI_EP_MSI__ +#define __PCI_EP_MSI__ + +struct pci_epf; + +#ifdef CONFIG_PCI_ENDPOINT_MSI_DOORBELL +int pci_epf_alloc_doorbell(struct pci_epf *epf, u16 nums); +void pci_epf_free_doorbell(struct pci_epf *epf); +#else +static inline int pci_epf_alloc_doorbell(struct pci_epf *epf, u16 nums) +{ + return -ENODATA; +} + +static inline void pci_epf_free_doorbell(struct pci_epf *epf) +{ +} +#endif /* CONFIG_GENERIC_MSI_IRQ */ + +#endif /* __PCI_EP_MSI__ */ diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 749cee0bcf2c..52e07602f08e 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -12,6 +12,7 @@ #include #include #include +#include #include struct pci_epf; @@ -128,6 +129,16 @@ struct pci_epf_bar { int flags; }; +/** + * struct pci_epf_doorbell_msg - represents doorbell message + * @msg: MSI message + * @virq: IRQ number of this doorbell MSI message + */ +struct pci_epf_doorbell_msg { + struct msi_msg msg; + int virq; +}; + /** * struct pci_epf - represents the PCI EPF device * @dev: the PCI EPF device @@ -155,6 +166,8 @@ struct pci_epf_bar { * @vfunction_num_map: bitmap to manage virtual function number * @pci_vepf: list of virtual endpoint functions associated with this function * @event_ops: callbacks for capturing the EPC events + * @db_msg: data for MSI from RC side + * @num_db: number of doorbells */ struct pci_epf { struct device dev; @@ -185,6 +198,8 @@ struct pci_epf { unsigned long vfunction_num_map; struct list_head pci_vepf; const struct pci_epc_event_ops *event_ops; + struct pci_epf_doorbell_msg *db_msg; + u16 num_db; }; /** From c822392280aa9bc57ad3b5079020388950cce9c8 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 10 Jul 2025 15:13:50 -0400 Subject: [PATCH 1015/2411] PCI: endpoint: pci-ep-msi: Add checks for MSI parent and mutability Some MSI controllers can change address/data pair during the execution of irq_chip::irq_set_affinity() callback. Since the current PCI Endpoint framework cannot support mutable MSI controllers, call irq_domain_is_msi_immutable() API to check if the controller is immutable or not. Also ensure that the MSI domain is a parent MSI domain so that it can allocate address/data pairs. Signed-off-by: Frank Li [mani: reworded error message and commit message] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Tested-by: Niklas Cassel Link: https://patch.msgid.link/20250710-ep-msi-v21-4-57683fc7fb25@nxp.com --- drivers/pci/endpoint/pci-ep-msi.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pci/endpoint/pci-ep-msi.c b/drivers/pci/endpoint/pci-ep-msi.c index 95a47ce155ac..9ca89cbfec15 100644 --- a/drivers/pci/endpoint/pci-ep-msi.c +++ b/drivers/pci/endpoint/pci-ep-msi.c @@ -57,6 +57,14 @@ int pci_epf_alloc_doorbell(struct pci_epf *epf, u16 num_db) return -ENODEV; } + if (!irq_domain_is_msi_parent(domain)) + return -ENODEV; + + if (!irq_domain_is_msi_immutable(domain)) { + dev_err(dev, "Mutable MSI controller not supported\n"); + return -ENODEV; + } + dev_set_msi_domain(epc->dev.parent, domain); msg = kcalloc(num_db, sizeof(struct pci_epf_doorbell_msg), GFP_KERNEL); From 4ff4252a2355f585c5cad8dc959ff1097300aa47 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 10 Jul 2025 15:13:51 -0400 Subject: [PATCH 1016/2411] PCI: endpoint: Add pci_epf_align_inbound_addr() helper for inbound address alignment Add pci_epf_align_inbound_addr() to align the inbound addresses according to PCI BAR alignment requirements. The aligned base address and offset are returned via 'base' and 'off' parameters. Signed-off-by: Frank Li [mani: reworded kernel-doc and commit message] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Tested-by: Niklas Cassel Link: https://patch.msgid.link/20250710-ep-msi-v21-5-57683fc7fb25@nxp.com --- drivers/pci/endpoint/pci-epf-core.c | 38 +++++++++++++++++++++++++++++ include/linux/pci-epf.h | 3 +++ 2 files changed, 41 insertions(+) diff --git a/drivers/pci/endpoint/pci-epf-core.c b/drivers/pci/endpoint/pci-epf-core.c index 577a9e490115..09b90e1631d5 100644 --- a/drivers/pci/endpoint/pci-epf-core.c +++ b/drivers/pci/endpoint/pci-epf-core.c @@ -477,6 +477,44 @@ struct pci_epf *pci_epf_create(const char *name) } EXPORT_SYMBOL_GPL(pci_epf_create); +/** + * pci_epf_align_inbound_addr() - Align the given address based on the BAR + * alignment requirement + * @epf: the EPF device + * @addr: inbound address to be aligned + * @bar: the BAR number corresponding to the given addr + * @base: base address matching the @bar alignment requirement + * @off: offset to be added to the @base address + * + * Helper function to align input @addr based on BAR's alignment requirement. + * The aligned base address and offset are returned via @base and @off. + * + * NOTE: The pci_epf_alloc_space() function already accounts for alignment. + * This API is primarily intended for use with other memory regions not + * allocated by pci_epf_alloc_space(), such as peripheral register spaces or + * the message address of a platform MSI controller. + * + * Return: 0 on success, errno otherwise. + */ +int pci_epf_align_inbound_addr(struct pci_epf *epf, enum pci_barno bar, + u64 addr, dma_addr_t *base, size_t *off) +{ + /* + * Most EP controllers require the BAR start address to be aligned to + * the BAR size, because they mask off the lower bits. + * + * Alignment to BAR size also works for controllers that support + * unaligned addresses. + */ + u64 align = epf->bar[bar].size; + + *base = round_down(addr, align); + *off = addr & (align - 1); + + return 0; +} +EXPORT_SYMBOL_GPL(pci_epf_align_inbound_addr); + static void pci_epf_dev_release(struct device *dev) { struct pci_epf *epf = to_pci_epf(dev); diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 52e07602f08e..2e85504ba2ba 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -241,6 +241,9 @@ void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar, enum pci_epc_interface_type type); void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar, enum pci_epc_interface_type type); + +int pci_epf_align_inbound_addr(struct pci_epf *epf, enum pci_barno bar, + u64 addr, dma_addr_t *base, size_t *off); int pci_epf_bind(struct pci_epf *epf); void pci_epf_unbind(struct pci_epf *epf); int pci_epf_add_vepf(struct pci_epf *epf_pf, struct pci_epf *epf_vf); From eff0c286aa916221a69126a43eee7c218d6f4011 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 10 Jul 2025 15:13:52 -0400 Subject: [PATCH 1017/2411] PCI: endpoint: pci-epf-test: Add doorbell test support Add doorbell support by allocating a dedicated BAR using the pci_epf_alloc_doorbell() API and mapping the Endpoint MSI controller message data address to it. The data to be written in the message address is stored in the 'pci_epf_test_reg::doorbell_data' register. Finally, the RC can trigger doorbell in the Endpoint by writing the content of 'doorbell_data' register to the offset specified in 'doorbell_offset' of the 'doorbell_bar' BAR. Triggering of the doorbell is detected by pci_epf_test_doorbell_handler(), which is bound to the doorbell IRQ. On successful completion, STATUS_DOORBELL_SUCCESS status is set in the above mentioned handler. To avoid breaking compatibility between host and endpoint, add two new commands: COMMAND_ENABLE_DOORBELL and COMMAND_DISABLE_DOORBELL. The doorbell is allocated when COMMAND_ENABLE_DOORBELL command is called and destroyed when COMMAND_DISABLE_DOORBELL is called. This doorbell feature only works when both RC and EP drivers support it. If one of them doesn't support the feature, the testcase will fail. Signed-off-by: Frank Li [mani: code cleanups and reworded commit message] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Tested-by: Niklas Cassel Link: https://patch.msgid.link/20250710-ep-msi-v21-6-57683fc7fb25@nxp.com --- drivers/pci/endpoint/functions/pci-epf-test.c | 130 ++++++++++++++++++ 1 file changed, 130 insertions(+) diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index 50eb4106369f..e091193bd8a8 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -11,12 +11,14 @@ #include #include #include +#include #include #include #include #include #include +#include #include #define IRQ_TYPE_INTX 0 @@ -29,6 +31,8 @@ #define COMMAND_READ BIT(3) #define COMMAND_WRITE BIT(4) #define COMMAND_COPY BIT(5) +#define COMMAND_ENABLE_DOORBELL BIT(6) +#define COMMAND_DISABLE_DOORBELL BIT(7) #define STATUS_READ_SUCCESS BIT(0) #define STATUS_READ_FAIL BIT(1) @@ -39,6 +43,11 @@ #define STATUS_IRQ_RAISED BIT(6) #define STATUS_SRC_ADDR_INVALID BIT(7) #define STATUS_DST_ADDR_INVALID BIT(8) +#define STATUS_DOORBELL_SUCCESS BIT(9) +#define STATUS_DOORBELL_ENABLE_SUCCESS BIT(10) +#define STATUS_DOORBELL_ENABLE_FAIL BIT(11) +#define STATUS_DOORBELL_DISABLE_SUCCESS BIT(12) +#define STATUS_DOORBELL_DISABLE_FAIL BIT(13) #define FLAG_USE_DMA BIT(0) @@ -66,6 +75,7 @@ struct pci_epf_test { bool dma_supported; bool dma_private; const struct pci_epc_features *epc_features; + struct pci_epf_bar db_bar; }; struct pci_epf_test_reg { @@ -80,6 +90,9 @@ struct pci_epf_test_reg { __le32 irq_number; __le32 flags; __le32 caps; + __le32 doorbell_bar; + __le32 doorbell_offset; + __le32 doorbell_data; } __packed; static struct pci_epf_header test_header = { @@ -667,6 +680,115 @@ static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test, } } +static irqreturn_t pci_epf_test_doorbell_handler(int irq, void *data) +{ + struct pci_epf_test *epf_test = data; + enum pci_barno test_reg_bar = epf_test->test_reg_bar; + struct pci_epf_test_reg *reg = epf_test->reg[test_reg_bar]; + u32 status = le32_to_cpu(reg->status); + + status |= STATUS_DOORBELL_SUCCESS; + reg->status = cpu_to_le32(status); + pci_epf_test_raise_irq(epf_test, reg); + + return IRQ_HANDLED; +} + +static void pci_epf_test_doorbell_cleanup(struct pci_epf_test *epf_test) +{ + struct pci_epf_test_reg *reg = epf_test->reg[epf_test->test_reg_bar]; + struct pci_epf *epf = epf_test->epf; + + free_irq(epf->db_msg[0].virq, epf_test); + reg->doorbell_bar = cpu_to_le32(NO_BAR); + + pci_epf_free_doorbell(epf); +} + +static void pci_epf_test_enable_doorbell(struct pci_epf_test *epf_test, + struct pci_epf_test_reg *reg) +{ + u32 status = le32_to_cpu(reg->status); + struct pci_epf *epf = epf_test->epf; + struct pci_epc *epc = epf->epc; + struct msi_msg *msg; + enum pci_barno bar; + size_t offset; + int ret; + + ret = pci_epf_alloc_doorbell(epf, 1); + if (ret) + goto set_status_err; + + msg = &epf->db_msg[0].msg; + bar = pci_epc_get_next_free_bar(epf_test->epc_features, epf_test->test_reg_bar + 1); + if (bar < BAR_0) + goto err_doorbell_cleanup; + + ret = request_irq(epf->db_msg[0].virq, pci_epf_test_doorbell_handler, 0, + "pci-ep-test-doorbell", epf_test); + if (ret) { + dev_err(&epf->dev, + "Failed to request doorbell IRQ: %d\n", + epf->db_msg[0].virq); + goto err_doorbell_cleanup; + } + + reg->doorbell_data = cpu_to_le32(msg->data); + reg->doorbell_bar = cpu_to_le32(bar); + + msg = &epf->db_msg[0].msg; + ret = pci_epf_align_inbound_addr(epf, bar, ((u64)msg->address_hi << 32) | msg->address_lo, + &epf_test->db_bar.phys_addr, &offset); + + if (ret) + goto err_doorbell_cleanup; + + reg->doorbell_offset = cpu_to_le32(offset); + + epf_test->db_bar.barno = bar; + epf_test->db_bar.size = epf->bar[bar].size; + epf_test->db_bar.flags = epf->bar[bar].flags; + + ret = pci_epc_set_bar(epc, epf->func_no, epf->vfunc_no, &epf_test->db_bar); + if (ret) + goto err_doorbell_cleanup; + + status |= STATUS_DOORBELL_ENABLE_SUCCESS; + reg->status = cpu_to_le32(status); + return; + +err_doorbell_cleanup: + pci_epf_test_doorbell_cleanup(epf_test); +set_status_err: + status |= STATUS_DOORBELL_ENABLE_FAIL; + reg->status = cpu_to_le32(status); +} + +static void pci_epf_test_disable_doorbell(struct pci_epf_test *epf_test, + struct pci_epf_test_reg *reg) +{ + enum pci_barno bar = le32_to_cpu(reg->doorbell_bar); + u32 status = le32_to_cpu(reg->status); + struct pci_epf *epf = epf_test->epf; + struct pci_epc *epc = epf->epc; + + if (bar < BAR_0) + goto set_status_err; + + pci_epf_test_doorbell_cleanup(epf_test); + pci_epc_clear_bar(epc, epf->func_no, epf->vfunc_no, &epf_test->db_bar); + + status |= STATUS_DOORBELL_DISABLE_SUCCESS; + reg->status = cpu_to_le32(status); + + return; + +set_status_err: + status |= STATUS_DOORBELL_DISABLE_FAIL; + reg->status = cpu_to_le32(status); +} + static void pci_epf_test_cmd_handler(struct work_struct *work) { u32 command; @@ -714,6 +836,14 @@ static void pci_epf_test_cmd_handler(struct work_struct *work) pci_epf_test_copy(epf_test, reg); pci_epf_test_raise_irq(epf_test, reg); break; + case COMMAND_ENABLE_DOORBELL: + pci_epf_test_enable_doorbell(epf_test, reg); + pci_epf_test_raise_irq(epf_test, reg); + break; + case COMMAND_DISABLE_DOORBELL: + pci_epf_test_disable_doorbell(epf_test, reg); + pci_epf_test_raise_irq(epf_test, reg); + break; default: dev_err(dev, "Invalid command 0x%x\n", command); break; From eefb83790a0dda112d1755e4f5e213738d717e76 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 10 Jul 2025 15:13:53 -0400 Subject: [PATCH 1018/2411] misc: pci_endpoint_test: Add doorbell test case Add doorbell support with the help of three new registers: PCIE_ENDPOINT_TEST_DB_BAR, PCIE_ENDPOINT_TEST_DB_ADDR, and PCIE_ENDPOINT_TEST_DB_DATA. The testcase works by triggering the doorbell in Endpoint by writing the value from PCI_ENDPOINT_TEST_DB_DATA register to the address provided by PCI_ENDPOINT_TEST_DB_OFFSET register of the BAR indicated by the PCIE_ENDPOINT_TEST_DB_BAR register and waiting for the completion status from the Endpoint. Signed-off-by: Frank Li [mani: removed one spurious change and reworded the commit message] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Tested-by: Niklas Cassel Link: https://patch.msgid.link/20250710-ep-msi-v21-7-57683fc7fb25@nxp.com --- drivers/misc/pci_endpoint_test.c | 83 ++++++++++++++++++++++++++++++++ include/uapi/linux/pcitest.h | 1 + 2 files changed, 84 insertions(+) diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index c4e5e2c977be..1c156a3f845e 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -37,6 +37,8 @@ #define COMMAND_READ BIT(3) #define COMMAND_WRITE BIT(4) #define COMMAND_COPY BIT(5) +#define COMMAND_ENABLE_DOORBELL BIT(6) +#define COMMAND_DISABLE_DOORBELL BIT(7) #define PCI_ENDPOINT_TEST_STATUS 0x8 #define STATUS_READ_SUCCESS BIT(0) @@ -48,6 +50,11 @@ #define STATUS_IRQ_RAISED BIT(6) #define STATUS_SRC_ADDR_INVALID BIT(7) #define STATUS_DST_ADDR_INVALID BIT(8) +#define STATUS_DOORBELL_SUCCESS BIT(9) +#define STATUS_DOORBELL_ENABLE_SUCCESS BIT(10) +#define STATUS_DOORBELL_ENABLE_FAIL BIT(11) +#define STATUS_DOORBELL_DISABLE_SUCCESS BIT(12) +#define STATUS_DOORBELL_DISABLE_FAIL BIT(13) #define PCI_ENDPOINT_TEST_LOWER_SRC_ADDR 0x0c #define PCI_ENDPOINT_TEST_UPPER_SRC_ADDR 0x10 @@ -62,6 +69,7 @@ #define PCI_ENDPOINT_TEST_IRQ_NUMBER 0x28 #define PCI_ENDPOINT_TEST_FLAGS 0x2c + #define FLAG_USE_DMA BIT(0) #define PCI_ENDPOINT_TEST_CAPS 0x30 @@ -70,6 +78,10 @@ #define CAP_MSIX BIT(2) #define CAP_INTX BIT(3) +#define PCI_ENDPOINT_TEST_DB_BAR 0x34 +#define PCI_ENDPOINT_TEST_DB_OFFSET 0x38 +#define PCI_ENDPOINT_TEST_DB_DATA 0x3c + #define PCI_DEVICE_ID_TI_AM654 0xb00c #define PCI_DEVICE_ID_TI_J7200 0xb00f #define PCI_DEVICE_ID_TI_AM64 0xb010 @@ -100,6 +112,7 @@ enum pci_barno { BAR_3, BAR_4, BAR_5, + NO_BAR = -1, }; struct pci_endpoint_test { @@ -841,6 +854,73 @@ static int pci_endpoint_test_set_irq(struct pci_endpoint_test *test, return 0; } +static int pci_endpoint_test_doorbell(struct pci_endpoint_test *test) +{ + struct pci_dev *pdev = test->pdev; + struct device *dev = &pdev->dev; + int irq_type = test->irq_type; + enum pci_barno bar; + u32 data, status; + u32 addr; + int left; + + if (irq_type < PCITEST_IRQ_TYPE_INTX || + irq_type > PCITEST_IRQ_TYPE_MSIX) { + dev_err(dev, "Invalid IRQ type\n"); + return -EINVAL; + } + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, irq_type); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, + COMMAND_ENABLE_DOORBELL); + + left = wait_for_completion_timeout(&test->irq_raised, msecs_to_jiffies(1000)); + + status = pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_STATUS); + if (!left || (status & STATUS_DOORBELL_ENABLE_FAIL)) { + dev_err(dev, "Failed to enable doorbell\n"); + return -EINVAL; + } + + data = pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_DB_DATA); + addr = pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_DB_OFFSET); + bar = pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_DB_BAR); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, irq_type); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_STATUS, 0); + + bar = pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_DB_BAR); + + writel(data, test->bar[bar] + addr); + + left = wait_for_completion_timeout(&test->irq_raised, msecs_to_jiffies(1000)); + + status = pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_STATUS); + + if (!left || !(status & STATUS_DOORBELL_SUCCESS)) + dev_err(dev, "Failed to trigger doorbell in endpoint\n"); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, + COMMAND_DISABLE_DOORBELL); + + wait_for_completion_timeout(&test->irq_raised, msecs_to_jiffies(1000)); + + status |= pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_STATUS); + + if (status & STATUS_DOORBELL_DISABLE_FAIL) { + dev_err(dev, "Failed to disable doorbell\n"); + return -EINVAL; + } + + if (!(status & STATUS_DOORBELL_SUCCESS)) + return -EINVAL; + + return 0; +} + static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -891,6 +971,9 @@ static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd, case PCITEST_CLEAR_IRQ: ret = pci_endpoint_test_clear_irq(test); break; + case PCITEST_DOORBELL: + ret = pci_endpoint_test_doorbell(test); + break; } ret: diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h index d3aa8715a525..d6023a45a9d0 100644 --- a/include/uapi/linux/pcitest.h +++ b/include/uapi/linux/pcitest.h @@ -21,6 +21,7 @@ #define PCITEST_SET_IRQTYPE _IOW('P', 0x8, int) #define PCITEST_GET_IRQTYPE _IO('P', 0x9) #define PCITEST_BARS _IO('P', 0xa) +#define PCITEST_DOORBELL _IO('P', 0xb) #define PCITEST_CLEAR_IRQ _IO('P', 0x10) #define PCITEST_IRQ_TYPE_UNDEFINED -1 From b351e9c93a4fc0a1b789c0b89eeecb9d5bf564cd Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 10 Jul 2025 15:13:54 -0400 Subject: [PATCH 1019/2411] selftests: pci_endpoint: Add doorbell test case Add doorbell test case. Signed-off-by: Frank Li [mani: Reworded the testcase description] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Tested-by: Niklas Cassel Link: https://patch.msgid.link/20250710-ep-msi-v21-8-57683fc7fb25@nxp.com --- Documentation/PCI/endpoint/pci-test-howto.rst | 15 ++++++++++ .../pci_endpoint/pci_endpoint_test.c | 28 +++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/Documentation/PCI/endpoint/pci-test-howto.rst b/Documentation/PCI/endpoint/pci-test-howto.rst index aafc17ef3fd3..dd66858cde46 100644 --- a/Documentation/PCI/endpoint/pci-test-howto.rst +++ b/Documentation/PCI/endpoint/pci-test-howto.rst @@ -203,3 +203,18 @@ controllers, it is advisable to skip this testcase using this command:: # pci_endpoint_test -f pci_ep_bar -f pci_ep_basic -v memcpy -T COPY_TEST -v dma + +Kselftest EP Doorbell +~~~~~~~~~~~~~~~~~~~~~ + +If the Endpoint MSI controller is used for the doorbell usecase, run below +command for testing it: + + # pci_endpoint_test -f pcie_ep_doorbell + + # Starting 1 tests from 1 test cases. + # RUN pcie_ep_doorbell.DOORBELL_TEST ... + # OK pcie_ep_doorbell.DOORBELL_TEST + ok 1 pcie_ep_doorbell.DOORBELL_TEST + # PASSED: 1 / 1 tests passed. + # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:0 diff --git a/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c index ac26481d29d9..da0db0e7c969 100644 --- a/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c +++ b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c @@ -229,4 +229,32 @@ TEST_F(pci_ep_data_transfer, COPY_TEST) test_size[i]); } } + +FIXTURE(pcie_ep_doorbell) +{ + int fd; +}; + +FIXTURE_SETUP(pcie_ep_doorbell) +{ + self->fd = open(test_device, O_RDWR); + + ASSERT_NE(-1, self->fd) TH_LOG("Can't open PCI Endpoint Test device"); +}; + +FIXTURE_TEARDOWN(pcie_ep_doorbell) +{ + close(self->fd); +}; + +TEST_F(pcie_ep_doorbell, DOORBELL_TEST) +{ + int ret; + + pci_ep_ioctl(PCITEST_SET_IRQTYPE, PCITEST_IRQ_TYPE_AUTO); + ASSERT_EQ(0, ret) TH_LOG("Can't set AUTO IRQ type"); + + pci_ep_ioctl(PCITEST_DOORBELL, 0); + EXPECT_FALSE(ret) TH_LOG("Test failed for Doorbell\n"); +} TEST_HARNESS_MAIN From 3e90b38781e3bdd651edaf789585687611638862 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Wed, 23 Jul 2025 17:30:18 +0200 Subject: [PATCH 1020/2411] scsi: mpt3sas: Fix a fw_event memory leak In _mpt3sas_fw_work() the fw_event reference is removed, it should also be freed in all cases. Fixes: 4318c7347847 ("scsi: mpt3sas: Handle NVMe PCIe device related events generated from firmware.") Signed-off-by: Tomas Henzl Link: https://lore.kernel.org/r/20250723153018.50518-1-thenzl@redhat.com Acked-by: Sathya Prakash Veerichetty Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index d7d8244dfedc..967af259118e 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -10809,8 +10809,7 @@ _mpt3sas_fw_work(struct MPT3SAS_ADAPTER *ioc, struct fw_event_work *fw_event) break; case MPI2_EVENT_PCIE_TOPOLOGY_CHANGE_LIST: _scsih_pcie_topology_change_event(ioc, fw_event); - ioc->current_event = NULL; - return; + break; } out: fw_event_work_put(fw_event); From 33b3120cb20fde80bf601413b635f957c46ad631 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 24 Jul 2025 14:23:52 +0200 Subject: [PATCH 1021/2411] scsi: ufs: qcom: Drop dead compile guard SCSI_UFSHCD already selects DEVFREQ_GOV_SIMPLE_ONDEMAND, drop the check. Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250724-topic-ufs_compile_check-v1-1-5ba9e99dbd52@oss.qualcomm.com Reviewed-by: Dmitry Baryshkov Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-qcom.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 2a72e7c1d131..d15f1a13b3b5 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -1894,7 +1894,6 @@ static int ufs_qcom_device_reset(struct ufs_hba *hba) return 0; } -#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) static void ufs_qcom_config_scaling_param(struct ufs_hba *hba, struct devfreq_dev_profile *p, struct devfreq_simple_ondemand_data *d) @@ -1906,13 +1905,6 @@ static void ufs_qcom_config_scaling_param(struct ufs_hba *hba, hba->clk_scaling.suspend_on_no_request = true; } -#else -static void ufs_qcom_config_scaling_param(struct ufs_hba *hba, - struct devfreq_dev_profile *p, - struct devfreq_simple_ondemand_data *data) -{ -} -#endif /* Resources */ static const struct ufshcd_res_info ufs_res_info[RES_MAX] = { From dafeaf2c03e71255438ffe5a341d94d180e6c88e Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 15 Jul 2025 11:15:35 +0000 Subject: [PATCH 1022/2411] scsi: aacraid: Stop using PCI_IRQ_AFFINITY When PCI_IRQ_AFFINITY is set for calling pci_alloc_irq_vectors(), it means interrupts are spread around the available CPUs. It also means that the interrupts become managed, which means that an interrupt is shutdown when all the CPUs in the interrupt affinity mask go offline. Using managed interrupts in this way means that we should ensure that completions should not occur on HW queues where the associated interrupt is shutdown. This is typically achieved by ensuring only CPUs which are online can generate IO completion traffic to the HW queue which they are mapped to (so that they can also serve completion interrupts for that HW queue). The problem in the driver is that a CPU can generate completions to a HW queue whose interrupt may be shutdown, as the CPUs in the HW queue interrupt affinity mask may be offline. This can cause IOs to never complete and hang the system. The driver maintains its own CPU <-> HW queue mapping for submissions, see aac_fib_vector_assign(), but this does not reflect the CPU <-> HW queue interrupt affinity mapping. Commit 9dc704dcc09e ("scsi: aacraid: Reply queue mapping to CPUs based on IRQ affinity") tried to remedy this issue may mapping CPUs properly to HW queue interrupts. However this was later reverted in commit c5becf57dd56 ("Revert "scsi: aacraid: Reply queue mapping to CPUs based on IRQ affinity") - it seems that there were other reports of hangs. I guess that this was due to some implementation issue in the original commit or maybe a HW issue. Fix the very original hang by just not using managed interrupts by not setting PCI_IRQ_AFFINITY. In this way, all CPUs will be in each HW queue affinity mask, so should not create completion problems if any CPUs go offline. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20250715111535.499853-1-john.g.garry@oracle.com Closes: https://lore.kernel.org/linux-scsi/20250618192427.3845724-1-jmeneghi@redhat.com/ Reviewed-by: John Meneghini Tested-by: John Meneghini Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/comminit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index 28cf18955a08..726c8531b7d3 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -481,8 +481,7 @@ void aac_define_int_mode(struct aac_dev *dev) pci_find_capability(dev->pdev, PCI_CAP_ID_MSIX)) { min_msix = 2; i = pci_alloc_irq_vectors(dev->pdev, - min_msix, msi_count, - PCI_IRQ_MSIX | PCI_IRQ_AFFINITY); + min_msix, msi_count, PCI_IRQ_MSIX); if (i > 0) { dev->msi_enabled = 1; msi_count = i; From 7bdc68921481c19cd8c85ddf805a834211c19e61 Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Tue, 15 Jul 2025 15:39:26 +0800 Subject: [PATCH 1023/2411] scsi: Revert "scsi: iscsi: Fix HW conn removal use after free" This reverts commit c577ab7ba5f3bf9062db8a58b6e89d4fe370447e. The invocation of iscsi_put_conn() in iscsi_iter_destory_conn_fn() is used to free the initial reference counter of iscsi_cls_conn. For non-qla4xxx cases, the ->destroy_conn() callback (e.g., iscsi_conn_teardown) will call iscsi_remove_conn() and iscsi_put_conn() to remove the connection from the children list of session and free the connection at last. However for qla4xxx, it is not the case. The ->destroy_conn() callback of qla4xxx will keep the connection in the session conn_list and doesn't use iscsi_put_conn() to free the initial reference counter. Therefore, it seems necessary to keep the iscsi_put_conn() in the iscsi_iter_destroy_conn_fn(), otherwise, there will be memory leak problem. Link: https://lore.kernel.org/all/88334658-072b-4b90-a949-9c74ef93cfd1@huawei.com/ Fixes: c577ab7ba5f3 ("scsi: iscsi: Fix HW conn removal use after free") Signed-off-by: Li Lingfeng Link: https://lore.kernel.org/r/20250715073926.3529456-1-lilingfeng3@huawei.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 0b8c91bf793f..a9ae947f905c 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2143,6 +2143,8 @@ static int iscsi_iter_destroy_conn_fn(struct device *dev, void *data) return 0; iscsi_remove_conn(iscsi_dev_to_conn(dev)); + iscsi_put_conn(iscsi_dev_to_conn(dev)); + return 0; } From 35dabf4503b94a697bababe94678a8bc989c3223 Mon Sep 17 00:00:00 2001 From: Seunghui Lee Date: Thu, 17 Jul 2025 17:12:13 +0900 Subject: [PATCH 1024/2411] scsi: ufs: core: Use link recovery when h8 exit fails during runtime resume If the h8 exit fails during runtime resume process, the runtime thread enters runtime suspend immediately and the error handler operates at the same time. It becomes stuck and cannot be recovered through the error handler. To fix this, use link recovery instead of the error handler. Fixes: 4db7a2360597 ("scsi: ufs: Fix concurrency of error handler and other error recovery paths") Signed-off-by: Seunghui Lee Link: https://lore.kernel.org/r/20250717081213.6811-1-sh043.lee@samsung.com Reviewed-by: Bean Huo Acked-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index acfc1b4691fa..ad7cfdf0244f 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -4383,7 +4383,7 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) hba->uic_async_done = NULL; if (reenable_intr) ufshcd_enable_intr(hba, UIC_COMMAND_COMPL); - if (ret) { + if (ret && !hba->pm_op_in_progress) { ufshcd_set_link_broken(hba); ufshcd_schedule_eh_work(hba); } @@ -4391,6 +4391,14 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) spin_unlock_irqrestore(hba->host->host_lock, flags); mutex_unlock(&hba->uic_cmd_mutex); + /* + * If the h8 exit fails during the runtime resume process, it becomes + * stuck and cannot be recovered through the error handler. To fix + * this, use link recovery instead of the error handler. + */ + if (ret && hba->pm_op_in_progress) + ret = ufshcd_link_recovery(hba); + return ret; } From 7ffbf335e325ed3f36ebcfed8149a8d0d7e20076 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 21 Jul 2025 13:51:45 -0500 Subject: [PATCH 1025/2411] scsi: target: iblock: Allow iblock devices to be shared We might be running a local application that also interacts with the backing device. In this setup we have some clustering type of software that manages the ownwer of it, so we don't want the kernel to restrict us. This patch allows the user to control if the driver gets exclusive access. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20250721185145.20913-1-michael.christie@oracle.com Signed-off-by: Martin K. Petersen --- drivers/target/target_core_iblock.c | 33 ++++++++++++++++++++++++----- drivers/target/target_core_iblock.h | 1 + 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 73564efd11d2..66c292b7d74b 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -64,6 +64,7 @@ static struct se_device *iblock_alloc_device(struct se_hba *hba, const char *nam pr_err("Unable to allocate struct iblock_dev\n"); return NULL; } + ib_dev->ibd_exclusive = true; ib_dev->ibd_plug = kcalloc(nr_cpu_ids, sizeof(*ib_dev->ibd_plug), GFP_KERNEL); @@ -95,6 +96,7 @@ static int iblock_configure_device(struct se_device *dev) struct block_device *bd; struct blk_integrity *bi; blk_mode_t mode = BLK_OPEN_READ; + void *holder = ib_dev; unsigned int max_write_zeroes_sectors; int ret; @@ -109,15 +111,18 @@ static int iblock_configure_device(struct se_device *dev) goto out; } - pr_debug( "IBLOCK: Claiming struct block_device: %s\n", - ib_dev->ibd_udev_path); + pr_debug("IBLOCK: Claiming struct block_device: %s: %d\n", + ib_dev->ibd_udev_path, ib_dev->ibd_exclusive); if (!ib_dev->ibd_readonly) mode |= BLK_OPEN_WRITE; else dev->dev_flags |= DF_READ_ONLY; - bdev_file = bdev_file_open_by_path(ib_dev->ibd_udev_path, mode, ib_dev, + if (!ib_dev->ibd_exclusive) + holder = NULL; + + bdev_file = bdev_file_open_by_path(ib_dev->ibd_udev_path, mode, holder, NULL); if (IS_ERR(bdev_file)) { ret = PTR_ERR(bdev_file); @@ -560,13 +565,14 @@ iblock_execute_write_same(struct se_cmd *cmd) } enum { - Opt_udev_path, Opt_readonly, Opt_force, Opt_err + Opt_udev_path, Opt_readonly, Opt_force, Opt_exclusive, Opt_err, }; static match_table_t tokens = { {Opt_udev_path, "udev_path=%s"}, {Opt_readonly, "readonly=%d"}, {Opt_force, "force=%d"}, + {Opt_exclusive, "exclusive=%d"}, {Opt_err, NULL} }; @@ -576,7 +582,7 @@ static ssize_t iblock_set_configfs_dev_params(struct se_device *dev, struct iblock_dev *ib_dev = IBLOCK_DEV(dev); char *orig, *ptr, *arg_p, *opts; substring_t args[MAX_OPT_ARGS]; - int ret = 0, token; + int ret = 0, token, tmp_exclusive; unsigned long tmp_readonly; opts = kstrdup(page, GFP_KERNEL); @@ -623,6 +629,22 @@ static ssize_t iblock_set_configfs_dev_params(struct se_device *dev, ib_dev->ibd_readonly = tmp_readonly; pr_debug("IBLOCK: readonly: %d\n", ib_dev->ibd_readonly); break; + case Opt_exclusive: + arg_p = match_strdup(&args[0]); + if (!arg_p) { + ret = -ENOMEM; + break; + } + ret = kstrtoint(arg_p, 0, &tmp_exclusive); + kfree(arg_p); + if (ret < 0) { + pr_err("kstrtoul() failed for exclusive=\n"); + goto out; + } + ib_dev->ibd_exclusive = tmp_exclusive; + pr_debug("IBLOCK: exclusive: %d\n", + ib_dev->ibd_exclusive); + break; case Opt_force: break; default: @@ -647,6 +669,7 @@ static ssize_t iblock_show_configfs_dev_params(struct se_device *dev, char *b) bl += sprintf(b + bl, " UDEV PATH: %s", ib_dev->ibd_udev_path); bl += sprintf(b + bl, " readonly: %d\n", ib_dev->ibd_readonly); + bl += sprintf(b + bl, " exclusive: %d\n", ib_dev->ibd_exclusive); bl += sprintf(b + bl, " "); if (bd) { diff --git a/drivers/target/target_core_iblock.h b/drivers/target/target_core_iblock.h index 91f6f4280666..e2f28a69a11c 100644 --- a/drivers/target/target_core_iblock.h +++ b/drivers/target/target_core_iblock.h @@ -34,6 +34,7 @@ struct iblock_dev { struct block_device *ibd_bd; struct file *ibd_bdev_file; bool ibd_readonly; + bool ibd_exclusive; struct iblock_dev_plug *ibd_plug; } ____cacheline_aligned; From 220e6083e8bdc11c414c2a44643f739d5c826d7b Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Wed, 2 Jul 2025 09:24:23 +0800 Subject: [PATCH 1026/2411] scsi: MAINTAINERS: Update hisi_sas entry liyihang9@huawei.com no longer works. So update information for hisi_sas. Signed-off-by: Yihang Li Link: https://lore.kernel.org/r/20250702012423.1947238-1-liyihang9@h-partners.com Acked-by: Wei Xu Signed-off-by: Martin K. Petersen --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..05325fab7a6b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10949,7 +10949,7 @@ F: Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt F: drivers/infiniband/hw/hns/ HISILICON SAS Controller -M: Yihang Li +M: Yihang Li S: Supported W: http://www.hisilicon.com F: Documentation/devicetree/bindings/scsi/hisilicon-sas.txt From 6e0f6aa44b68335df404a2df955055f416b5f2aa Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 14 Jul 2025 15:37:38 +0200 Subject: [PATCH 1027/2411] scsi: target: core: Generate correct identifiers for PR OUT transport IDs Fix target_parse_pr_out_transport_id() to return a string representing the transport ID in a human-readable format (e.g., naa.xxxxxxxx...) for various SCSI protocol types (SAS, FCP, SRP, SBP). Previously, the function returned a pointer to the raw binary buffer, which was incorrectly compared against human-readable strings, causing comparisons to fail. Now, the function writes a properly formatted string into a buffer provided by the caller. The output format depends on the transport protocol: * SAS: 64-bit identifier, "naa." prefix. * FCP: 64-bit identifier, colon separated values. * SBP: 64-bit identifier, no prefix. * SRP: 128-bit identifier, "0x" prefix. * iSCSI: IQN string. Signed-off-by: Maurizio Lombardi Link: https://lore.kernel.org/r/20250714133738.11054-1-mlombard@redhat.com Reviewed-by: Dmitry Bogdanov Signed-off-by: Martin K. Petersen --- drivers/target/target_core_fabric_lib.c | 65 +++++++++++++++++++------ drivers/target/target_core_internal.h | 4 +- drivers/target/target_core_pr.c | 18 ++++--- 3 files changed, 61 insertions(+), 26 deletions(-) diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c index 43f47e3aa448..ec7bc6e30228 100644 --- a/drivers/target/target_core_fabric_lib.c +++ b/drivers/target/target_core_fabric_lib.c @@ -257,11 +257,41 @@ static int iscsi_get_pr_transport_id_len( return len; } -static char *iscsi_parse_pr_out_transport_id( +static void sas_parse_pr_out_transport_id(char *buf, char *i_str) +{ + char hex[17] = {}; + + bin2hex(hex, buf + 4, 8); + snprintf(i_str, TRANSPORT_IQN_LEN, "naa.%s", hex); +} + +static void srp_parse_pr_out_transport_id(char *buf, char *i_str) +{ + char hex[33] = {}; + + bin2hex(hex, buf + 8, 16); + snprintf(i_str, TRANSPORT_IQN_LEN, "0x%s", hex); +} + +static void fcp_parse_pr_out_transport_id(char *buf, char *i_str) +{ + snprintf(i_str, TRANSPORT_IQN_LEN, "%8phC", buf + 8); +} + +static void sbp_parse_pr_out_transport_id(char *buf, char *i_str) +{ + char hex[17] = {}; + + bin2hex(hex, buf + 8, 8); + snprintf(i_str, TRANSPORT_IQN_LEN, "%s", hex); +} + +static bool iscsi_parse_pr_out_transport_id( struct se_portal_group *se_tpg, char *buf, u32 *out_tid_len, - char **port_nexus_ptr) + char **port_nexus_ptr, + char *i_str) { char *p; int i; @@ -282,7 +312,7 @@ static char *iscsi_parse_pr_out_transport_id( if ((format_code != 0x00) && (format_code != 0x40)) { pr_err("Illegal format code: 0x%02x for iSCSI" " Initiator Transport ID\n", format_code); - return NULL; + return false; } /* * If the caller wants the TransportID Length, we set that value for the @@ -306,7 +336,7 @@ static char *iscsi_parse_pr_out_transport_id( pr_err("Unable to locate \",i,0x\" separator" " for Initiator port identifier: %s\n", &buf[4]); - return NULL; + return false; } *p = '\0'; /* Terminate iSCSI Name */ p += 5; /* Skip over ",i,0x" separator */ @@ -339,7 +369,8 @@ static char *iscsi_parse_pr_out_transport_id( } else *port_nexus_ptr = NULL; - return &buf[4]; + strscpy(i_str, &buf[4], TRANSPORT_IQN_LEN); + return true; } int target_get_pr_transport_id_len(struct se_node_acl *nacl, @@ -387,33 +418,35 @@ int target_get_pr_transport_id(struct se_node_acl *nacl, } } -const char *target_parse_pr_out_transport_id(struct se_portal_group *tpg, - char *buf, u32 *out_tid_len, char **port_nexus_ptr) +bool target_parse_pr_out_transport_id(struct se_portal_group *tpg, + char *buf, u32 *out_tid_len, char **port_nexus_ptr, char *i_str) { - u32 offset; - switch (tpg->proto_id) { case SCSI_PROTOCOL_SAS: /* * Assume the FORMAT CODE 00b from spc4r17, 7.5.4.7 TransportID * for initiator ports using SCSI over SAS Serial SCSI Protocol. */ - offset = 4; + sas_parse_pr_out_transport_id(buf, i_str); + break; + case SCSI_PROTOCOL_SRP: + srp_parse_pr_out_transport_id(buf, i_str); + break; + case SCSI_PROTOCOL_FCP: + fcp_parse_pr_out_transport_id(buf, i_str); break; case SCSI_PROTOCOL_SBP: - case SCSI_PROTOCOL_SRP: - case SCSI_PROTOCOL_FCP: - offset = 8; + sbp_parse_pr_out_transport_id(buf, i_str); break; case SCSI_PROTOCOL_ISCSI: return iscsi_parse_pr_out_transport_id(tpg, buf, out_tid_len, - port_nexus_ptr); + port_nexus_ptr, i_str); default: pr_err("Unknown proto_id: 0x%02x\n", tpg->proto_id); - return NULL; + return false; } *port_nexus_ptr = NULL; *out_tid_len = 24; - return buf + offset; + return true; } diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 408be26d2e9b..20aab1f50565 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -103,8 +103,8 @@ int target_get_pr_transport_id_len(struct se_node_acl *nacl, int target_get_pr_transport_id(struct se_node_acl *nacl, struct t10_pr_registration *pr_reg, int *format_code, unsigned char *buf); -const char *target_parse_pr_out_transport_id(struct se_portal_group *tpg, - char *buf, u32 *out_tid_len, char **port_nexus_ptr); +bool target_parse_pr_out_transport_id(struct se_portal_group *tpg, + char *buf, u32 *out_tid_len, char **port_nexus_ptr, char *i_str); /* target_core_hba.c */ struct se_hba *core_alloc_hba(const char *, u32, u32); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 34cf2c399b39..0240ec0a8ce4 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -1478,11 +1478,12 @@ core_scsi3_decode_spec_i_port( LIST_HEAD(tid_dest_list); struct pr_transport_id_holder *tidh_new, *tidh, *tidh_tmp; unsigned char *buf, *ptr, proto_ident; - const unsigned char *i_str = NULL; + unsigned char i_str[TRANSPORT_IQN_LEN]; char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN]; sense_reason_t ret; u32 tpdl, tid_len = 0; u32 dest_rtpi = 0; + bool tid_found; /* * Allocate a struct pr_transport_id_holder and setup the @@ -1571,9 +1572,9 @@ core_scsi3_decode_spec_i_port( dest_rtpi = tmp_lun->lun_tpg->tpg_rtpi; iport_ptr = NULL; - i_str = target_parse_pr_out_transport_id(tmp_tpg, - ptr, &tid_len, &iport_ptr); - if (!i_str) + tid_found = target_parse_pr_out_transport_id(tmp_tpg, + ptr, &tid_len, &iport_ptr, i_str); + if (!tid_found) continue; /* * Determine if this SCSI device server requires that @@ -3151,13 +3152,14 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key, struct t10_pr_registration *pr_reg, *pr_res_holder, *dest_pr_reg; struct t10_reservation *pr_tmpl = &dev->t10_pr; unsigned char *buf; - const unsigned char *initiator_str; + unsigned char initiator_str[TRANSPORT_IQN_LEN]; char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN] = { }; u32 tid_len, tmp_tid_len; int new_reg = 0, type, scope, matching_iname; sense_reason_t ret; unsigned short rtpi; unsigned char proto_ident; + bool tid_found; if (!se_sess || !se_lun) { pr_err("SPC-3 PR: se_sess || struct se_lun is NULL!\n"); @@ -3276,9 +3278,9 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key, ret = TCM_INVALID_PARAMETER_LIST; goto out; } - initiator_str = target_parse_pr_out_transport_id(dest_se_tpg, - &buf[24], &tmp_tid_len, &iport_ptr); - if (!initiator_str) { + tid_found = target_parse_pr_out_transport_id(dest_se_tpg, + &buf[24], &tmp_tid_len, &iport_ptr, initiator_str); + if (!tid_found) { pr_err("SPC-3 PR REGISTER_AND_MOVE: Unable to locate" " initiator_str from Transport ID\n"); ret = TCM_INVALID_PARAMETER_LIST; From 37c4e72b0651e7697eb338cd1fb09feef472cc1a Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Tue, 24 Jun 2025 11:46:49 +0530 Subject: [PATCH 1028/2411] scsi: Fix sas_user_scan() to handle wildcard and multi-channel scans sas_user_scan() did not fully process wildcard channel scans (SCAN_WILD_CARD) when a transport-specific user_scan() callback was present. Only channel 0 would be scanned via user_scan(), while the remaining channels were skipped, potentially missing devices. user_scan() invokes updated sas_user_scan() for channel 0, and if successful, iteratively scans remaining channels (1 to shost->max_channel) via scsi_scan_host_selected(). This ensures complete wildcard scanning without affecting transport-specific scanning behavior. Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20250624061649.17990-1-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_scan.c | 2 +- drivers/scsi/scsi_transport_sas.c | 62 ++++++++++++++++++++++++------- 2 files changed, 50 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 160c2f74c7e7..3c6e089e80c3 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1900,7 +1900,7 @@ int scsi_scan_host_selected(struct Scsi_Host *shost, unsigned int channel, return 0; } - +EXPORT_SYMBOL(scsi_scan_host_selected); static void scsi_sysfs_add_devices(struct Scsi_Host *shost) { struct scsi_device *sdev; diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 351b028ef893..d69c7c444a31 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -40,6 +40,8 @@ #include #include "scsi_sas_internal.h" +#include "scsi_priv.h" + struct sas_host_attrs { struct list_head rphy_list; struct mutex lock; @@ -1683,6 +1685,22 @@ int scsi_is_sas_rphy(const struct device *dev) } EXPORT_SYMBOL(scsi_is_sas_rphy); +static void scan_channel_zero(struct Scsi_Host *shost, uint id, u64 lun) +{ + struct sas_host_attrs *sas_host = to_sas_host_attrs(shost); + struct sas_rphy *rphy; + + list_for_each_entry(rphy, &sas_host->rphy_list, list) { + if (rphy->identify.device_type != SAS_END_DEVICE || + rphy->scsi_target_id == -1) + continue; + + if (id == SCAN_WILD_CARD || id == rphy->scsi_target_id) { + scsi_scan_target(&rphy->dev, 0, rphy->scsi_target_id, + lun, SCSI_SCAN_MANUAL); + } + } +} /* * SCSI scan helper @@ -1692,23 +1710,41 @@ static int sas_user_scan(struct Scsi_Host *shost, uint channel, uint id, u64 lun) { struct sas_host_attrs *sas_host = to_sas_host_attrs(shost); - struct sas_rphy *rphy; + int res = 0; + int i; - mutex_lock(&sas_host->lock); - list_for_each_entry(rphy, &sas_host->rphy_list, list) { - if (rphy->identify.device_type != SAS_END_DEVICE || - rphy->scsi_target_id == -1) - continue; + switch (channel) { + case 0: + mutex_lock(&sas_host->lock); + scan_channel_zero(shost, id, lun); + mutex_unlock(&sas_host->lock); + break; - if ((channel == SCAN_WILD_CARD || channel == 0) && - (id == SCAN_WILD_CARD || id == rphy->scsi_target_id)) { - scsi_scan_target(&rphy->dev, 0, rphy->scsi_target_id, - lun, SCSI_SCAN_MANUAL); + case SCAN_WILD_CARD: + mutex_lock(&sas_host->lock); + scan_channel_zero(shost, id, lun); + mutex_unlock(&sas_host->lock); + + for (i = 1; i <= shost->max_channel; i++) { + res = scsi_scan_host_selected(shost, i, id, lun, + SCSI_SCAN_MANUAL); + if (res) + goto exit_scan; } - } - mutex_unlock(&sas_host->lock); + break; - return 0; + default: + if (channel < shost->max_channel) { + res = scsi_scan_host_selected(shost, channel, id, lun, + SCSI_SCAN_MANUAL); + } else { + res = -EINVAL; + } + break; + } + +exit_scan: + return res; } From 51b6f738ebfafba4e309e1cde3e8e1745782f128 Mon Sep 17 00:00:00 2001 From: Liu Song Date: Mon, 21 Jul 2025 20:01:38 +0800 Subject: [PATCH 1029/2411] scsi: ufs: core: Use str_true_false() helper in UFS_FLAG() Remove hard-coded strings by using the str_true_false() helper function. Signed-off-by: Liu Song Link: https://lore.kernel.org/r/20250721200138431dOU9KyajGyGi5339ma26p@zte.com.cn Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufs-sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index 10006ae5ee35..11566afd5657 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -1516,7 +1517,7 @@ static ssize_t _name##_show(struct device *dev, \ ret = -EINVAL; \ goto out; \ } \ - ret = sysfs_emit(buf, "%s\n", flag ? "true" : "false"); \ + ret = sysfs_emit(buf, "%s\n", str_true_false(flag)); \ out: \ up(&hba->host_sem); \ return ret; \ From 262893939604204d14d7621b6d2658199d1672bb Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 22 Jul 2025 11:07:16 +0800 Subject: [PATCH 1030/2411] scsi: ufs: host: mediatek: Simplify boolean conversion Simplify the conversion from unsigned int to boolean by removing explicit conversions and parentheses, relying on implicit conversion instead. This change ensures consistency with other usages in ufs-mediatek.c and streamlines the code. Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20250722030841.1998783-2-peter.wang@mediatek.com Reviewed-by: Chun-Hung Wu Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 182f58d0c9db..744efcde1fff 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -96,49 +96,49 @@ static bool ufs_mtk_is_boost_crypt_enabled(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); - return !!(host->caps & UFS_MTK_CAP_BOOST_CRYPT_ENGINE); + return host->caps & UFS_MTK_CAP_BOOST_CRYPT_ENGINE; } static bool ufs_mtk_is_va09_supported(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); - return !!(host->caps & UFS_MTK_CAP_VA09_PWR_CTRL); + return host->caps & UFS_MTK_CAP_VA09_PWR_CTRL; } static bool ufs_mtk_is_broken_vcc(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); - return !!(host->caps & UFS_MTK_CAP_BROKEN_VCC); + return host->caps & UFS_MTK_CAP_BROKEN_VCC; } static bool ufs_mtk_is_pmc_via_fastauto(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); - return !!(host->caps & UFS_MTK_CAP_PMC_VIA_FASTAUTO); + return host->caps & UFS_MTK_CAP_PMC_VIA_FASTAUTO; } static bool ufs_mtk_is_tx_skew_fix(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); - return (host->caps & UFS_MTK_CAP_TX_SKEW_FIX); + return host->caps & UFS_MTK_CAP_TX_SKEW_FIX; } static bool ufs_mtk_is_rtff_mtcmos(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); - return (host->caps & UFS_MTK_CAP_RTFF_MTCMOS); + return host->caps & UFS_MTK_CAP_RTFF_MTCMOS; } static bool ufs_mtk_is_allow_vccqx_lpm(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); - return (host->caps & UFS_MTK_CAP_ALLOW_VCCQX_LPM); + return host->caps & UFS_MTK_CAP_ALLOW_VCCQX_LPM; } static void ufs_mtk_cfg_unipro_cg(struct ufs_hba *hba, bool enable) From a84a9ba7888fabc00c9585a0626343dfd5538d59 Mon Sep 17 00:00:00 2001 From: Naomi Chu Date: Tue, 22 Jul 2025 11:07:17 +0800 Subject: [PATCH 1031/2411] scsi: ufs: host: mediatek: Add DDR_EN setting On MT6989 and later platforms, control of DDR_EN has been switched from SPM to EMI. To prevent abnormal access to DRAM, it is necessary to wait for 'ddren_ack' or assert 'ddren_urgent' after sending 'ddren_req'. Introduce the DDR_EN configuration in the UFS initialization flow, utilizing the assertion of 'ddren_urgent' to maintain performance. Signed-off-by: Naomi Chu Link: https://lore.kernel.org/r/20250722030841.1998783-3-peter.wang@mediatek.com Reviewed-by: Peter Wang Reviewed-by: Chun-Hung Wu Signed-off-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 7 +++++++ drivers/ufs/host/ufs-mediatek.h | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 744efcde1fff..90351fff501c 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -267,6 +267,13 @@ static int ufs_mtk_hce_enable_notify(struct ufs_hba *hba, ufshcd_writel(hba, ufshcd_readl(hba, REG_UFS_XOUFS_CTRL) | 0x80, REG_UFS_XOUFS_CTRL); + + /* DDR_EN setting */ + if (host->ip_ver >= IP_VER_MT6989) { + ufshcd_rmwl(hba, UFS_MASK(0x7FFF, 8), + 0x453000, REG_UFS_MMIO_OPT_CTRL_0); + } + } return 0; diff --git a/drivers/ufs/host/ufs-mediatek.h b/drivers/ufs/host/ufs-mediatek.h index 05d76a6bd772..1082f761bb44 100644 --- a/drivers/ufs/host/ufs-mediatek.h +++ b/drivers/ufs/host/ufs-mediatek.h @@ -192,4 +192,16 @@ struct ufs_mtk_host { /* MTK RTT support number */ #define MTK_MAX_NUM_RTT 2 +/* UFSHCI MTK ip version value */ +enum { + /* UFSHCI 3.1 */ + IP_VER_MT6878 = 0x10420200, + + /* UFSHCI 4.0 */ + IP_VER_MT6897 = 0x10440000, + IP_VER_MT6989 = 0x10450000, + + IP_VER_NONE = 0xFFFFFFFF +}; + #endif /* !_UFS_MEDIATEK_H */ From 16b30c7a4c564e80fefe7e6416320f4f5b776d60 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 22 Jul 2025 11:07:18 +0800 Subject: [PATCH 1032/2411] scsi: ufs: host: mediatek: Change ref-clk timeout policy Update the timeout policy for ref-clk control. - If a clock-on operation times out, it is assumed that the clock is off. The system will notify TFA to perform clock-off settings. - If a clock-off operation times out, it is assumed that the clock will eventually turn off. The 'ref_clk_enabled' flag is set directly. Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20250722030841.1998783-4-peter.wang@mediatek.com Reviewed-by: Chun-Hung Wu Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 90351fff501c..b30203d83ef1 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -351,7 +351,16 @@ static int ufs_mtk_setup_ref_clk(struct ufs_hba *hba, bool on) dev_err(hba->dev, "missing ack of refclk req, reg: 0x%x\n", value); - ufs_mtk_ref_clk_notify(host->ref_clk_enabled, POST_CHANGE, res); + /* + * If clock on timeout, assume clock is off, notify tfa do clock + * off setting.(keep DIFN disable, release resource) + * If clock off timeout, assume clock will off finally, + * set ref_clk_enabled directly.(keep DIFN disable, keep resource) + */ + if (on) + ufs_mtk_ref_clk_notify(false, POST_CHANGE, res); + else + host->ref_clk_enabled = false; return -ETIMEDOUT; From a44ff97f895bd8615ebb53e6e199b74152c18bba Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 22 Jul 2025 11:07:19 +0800 Subject: [PATCH 1033/2411] scsi: ufs: host: mediatek: Handle broken RTC based on DTS setting Introduce a mechanism to handle broken RTC by checking the DTS setting. The configuration is specifically required for legacy platform. Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20250722030841.1998783-5-peter.wang@mediatek.com Reviewed-by: Chun-Hung Wu Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 8 +++++++- drivers/ufs/host/ufs-mediatek.h | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index b30203d83ef1..112056e5d8e0 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -679,6 +679,9 @@ static void ufs_mtk_init_host_caps(struct ufs_hba *hba) if (of_property_read_bool(np, "mediatek,ufs-rtff-mtcmos")) host->caps |= UFS_MTK_CAP_RTFF_MTCMOS; + if (of_property_read_bool(np, "mediatek,ufs-broken-rtc")) + host->caps |= UFS_MTK_CAP_MCQ_BROKEN_RTC; + dev_info(hba->dev, "caps: 0x%x", host->caps); } @@ -1035,8 +1038,11 @@ static int ufs_mtk_init(struct ufs_hba *hba) shost->rpm_autosuspend_delay = MTK_RPM_AUTOSUSPEND_DELAY_MS; hba->quirks |= UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL; + hba->quirks |= UFSHCD_QUIRK_MCQ_BROKEN_INTR; - hba->quirks |= UFSHCD_QUIRK_MCQ_BROKEN_RTC; + if (host->caps & UFS_MTK_CAP_MCQ_BROKEN_RTC) + hba->quirks |= UFSHCD_QUIRK_MCQ_BROKEN_RTC; + hba->vps->wb_flush_threshold = UFS_WB_BUF_REMAIN_PERCENT(80); if (host->caps & UFS_MTK_CAP_DISABLE_AH8) diff --git a/drivers/ufs/host/ufs-mediatek.h b/drivers/ufs/host/ufs-mediatek.h index 1082f761bb44..abb4a4fd4402 100644 --- a/drivers/ufs/host/ufs-mediatek.h +++ b/drivers/ufs/host/ufs-mediatek.h @@ -133,6 +133,8 @@ enum ufs_mtk_host_caps { UFS_MTK_CAP_DISABLE_MCQ = 1 << 8, /* Control MTCMOS with RTFF */ UFS_MTK_CAP_RTFF_MTCMOS = 1 << 9, + + UFS_MTK_CAP_MCQ_BROKEN_RTC = 1 << 10, }; struct ufs_mtk_crypt_cfg { From 66e26a4b8a7793137551e77a7e9f6eb1263a49c2 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 22 Jul 2025 11:07:20 +0800 Subject: [PATCH 1034/2411] scsi: ufs: host: mediatek: Set IRQ affinity policy for MCQ mode Set the IRQ affinity for MCQ mode to improve performance. Specifically, it migrates the IRQ from CPU0 to CPU3 to enhance IRQ handling efficiency. Setting IRQ affinity directly from the kernel allows the configuration to take effect earlier, and provides greater security and consistency, especially important for systems with strict performanceor real-time requirements. Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20250722030841.1998783-6-peter.wang@mediatek.com Reviewed-by: Chun-Hung Wu Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 47 +++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 112056e5d8e0..78eaf057cdc3 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -798,6 +798,46 @@ static int ufs_mtk_setup_clocks(struct ufs_hba *hba, bool on, return ret; } +static u32 ufs_mtk_mcq_get_irq(struct ufs_hba *hba, unsigned int cpu) +{ + struct ufs_mtk_host *host = ufshcd_get_variant(hba); + struct blk_mq_tag_set *tag_set = &hba->host->tag_set; + struct blk_mq_queue_map *map = &tag_set->map[HCTX_TYPE_DEFAULT]; + unsigned int nr = map->nr_queues; + unsigned int q_index; + + q_index = map->mq_map[cpu]; + if (q_index > nr) { + dev_err(hba->dev, "hwq index %d exceed %d\n", + q_index, nr); + return MTK_MCQ_INVALID_IRQ; + } + + return host->mcq_intr_info[q_index].irq; +} + +static void ufs_mtk_mcq_set_irq_affinity(struct ufs_hba *hba, unsigned int cpu) +{ + unsigned int irq, _cpu; + int ret; + + irq = ufs_mtk_mcq_get_irq(hba, cpu); + if (irq == MTK_MCQ_INVALID_IRQ) { + dev_err(hba->dev, "invalid irq. unable to bind irq to cpu%d", cpu); + return; + } + + /* force migrate irq of cpu0 to cpu3 */ + _cpu = (cpu == 0) ? 3 : cpu; + ret = irq_set_affinity(irq, cpumask_of(_cpu)); + if (ret) { + dev_err(hba->dev, "set irq %d affinity to CPU %d failed\n", + irq, _cpu); + return; + } + dev_info(hba->dev, "set irq %d affinity to CPU: %d\n", irq, _cpu); +} + static void ufs_mtk_get_controller_version(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); @@ -1527,6 +1567,13 @@ static int ufs_mtk_apply_dev_quirks(struct ufs_hba *hba) { struct ufs_dev_info *dev_info = &hba->dev_info; u16 mid = dev_info->wmanufacturerid; + unsigned int cpu; + + if (hba->mcq_enabled) { + /* Iterate all cpus to set affinity for mcq irqs */ + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + ufs_mtk_mcq_set_irq_affinity(hba, cpu); + } if (mid == UFS_VENDOR_SAMSUNG) { ufshcd_dme_set(hba, UIC_ARG_MIB(PA_TACTIVATE), 6); From 7996746394df569355113ce4643ab892442cfe1d Mon Sep 17 00:00:00 2001 From: Alice Chao Date: Tue, 22 Jul 2025 11:07:21 +0800 Subject: [PATCH 1035/2411] scsi: ufs: host: mediatek: Add more UFSCHI hardware versions Introduce a function for version control to distinguish between new and old platforms. Update the handling of hardware IP versions, ensuring correct version comparisons by adjusting the version format for specific projects. Signed-off-by: Alice Chao Link: https://lore.kernel.org/r/20250722030841.1998783-7-peter.wang@mediatek.com Reviewed-by: Peter Wang Reviewed-by: Chun-Hung Wu Signed-off-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 47 ++++++++++++++++++++++++++++++++- drivers/ufs/host/ufs-mediatek.h | 12 +++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 78eaf057cdc3..28aba44068da 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -838,6 +838,51 @@ static void ufs_mtk_mcq_set_irq_affinity(struct ufs_hba *hba, unsigned int cpu) dev_info(hba->dev, "set irq %d affinity to CPU: %d\n", irq, _cpu); } +static bool ufs_mtk_is_legacy_chipset(struct ufs_hba *hba, u32 hw_ip_ver) +{ + bool is_legacy = false; + + switch (hw_ip_ver) { + case IP_LEGACY_VER_MT6893: + case IP_LEGACY_VER_MT6781: + /* can add other legacy chipset ID here accordingly */ + is_legacy = true; + break; + default: + break; + } + dev_info(hba->dev, "legacy IP version - 0x%x, is legacy : %d", hw_ip_ver, is_legacy); + + return is_legacy; +} + +/* + * HW version format has been changed from 01MMmmmm to 1MMMmmmm, since + * project MT6878. In order to perform correct version comparison, + * version number is changed by SW for the following projects. + * IP_VER_MT6983 0x00360000 to 0x10360000 + * IP_VER_MT6897 0x01440000 to 0x10440000 + * IP_VER_MT6989 0x01450000 to 0x10450000 + * IP_VER_MT6991 0x01460000 to 0x10460000 + */ +static void ufs_mtk_get_hw_ip_version(struct ufs_hba *hba) +{ + struct ufs_mtk_host *host = ufshcd_get_variant(hba); + u32 hw_ip_ver; + + hw_ip_ver = ufshcd_readl(hba, REG_UFS_MTK_IP_VER); + + if (((hw_ip_ver & (0xFF << 24)) == (0x1 << 24)) || + ((hw_ip_ver & (0xFF << 24)) == 0)) { + hw_ip_ver &= ~(0xFF << 24); + hw_ip_ver |= (0x1 << 28); + } + + host->ip_ver = hw_ip_ver; + + host->legacy_ip_ver = ufs_mtk_is_legacy_chipset(hba, hw_ip_ver); +} + static void ufs_mtk_get_controller_version(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); @@ -1112,7 +1157,7 @@ static int ufs_mtk_init(struct ufs_hba *hba) ufs_mtk_setup_clocks(hba, true, POST_CHANGE); - host->ip_ver = ufshcd_readl(hba, REG_UFS_MTK_IP_VER); + ufs_mtk_get_hw_ip_version(hba); goto out; diff --git a/drivers/ufs/host/ufs-mediatek.h b/drivers/ufs/host/ufs-mediatek.h index abb4a4fd4402..fd229514384e 100644 --- a/drivers/ufs/host/ufs-mediatek.h +++ b/drivers/ufs/host/ufs-mediatek.h @@ -181,6 +181,7 @@ struct ufs_mtk_host { u16 ref_clk_ungating_wait_us; u16 ref_clk_gating_wait_us; u32 ip_ver; + bool legacy_ip_ver; bool mcq_set_intr; bool is_mcq_intr_enabled; @@ -197,13 +198,24 @@ struct ufs_mtk_host { /* UFSHCI MTK ip version value */ enum { /* UFSHCI 3.1 */ + IP_VER_MT6983 = 0x10360000, IP_VER_MT6878 = 0x10420200, /* UFSHCI 4.0 */ IP_VER_MT6897 = 0x10440000, IP_VER_MT6989 = 0x10450000, + IP_VER_MT6899 = 0x10450100, + IP_VER_MT6991_A0 = 0x10460000, + IP_VER_MT6991_B0 = 0x10470000, + IP_VER_MT6993 = 0x10480000, IP_VER_NONE = 0xFFFFFFFF }; +enum ip_ver_legacy { + IP_LEGACY_VER_MT6781 = 0x10380000, + IP_LEGACY_VER_MT6879 = 0x10360000, + IP_LEGACY_VER_MT6893 = 0x20160706 +}; + #endif /* !_UFS_MEDIATEK_H */ From ff40f31216fffc1b7f7e5a9e27a317a29a798289 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 22 Jul 2025 11:07:22 +0800 Subject: [PATCH 1036/2411] scsi: ufs: host: mediatek: Add clock scaling query function Introduce a clock scaling readiness query function to streamline the process of checking clock scaling parameters. This function simplifies the code by encapsulating the logic for determining if clock scaling is ready. Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20250722030841.1998783-8-peter.wang@mediatek.com Reviewed-by: Chun-Hung Wu Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 28aba44068da..0b3cce8d9787 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -141,6 +141,16 @@ static bool ufs_mtk_is_allow_vccqx_lpm(struct ufs_hba *hba) return host->caps & UFS_MTK_CAP_ALLOW_VCCQX_LPM; } +static bool ufs_mtk_is_clk_scale_ready(struct ufs_hba *hba) +{ + struct ufs_mtk_host *host = ufshcd_get_variant(hba); + struct ufs_mtk_clk *mclk = &host->mclk; + + return mclk->ufs_sel_clki && + mclk->ufs_sel_max_clki && + mclk->ufs_sel_min_clki; +} + static void ufs_mtk_cfg_unipro_cg(struct ufs_hba *hba, bool enable) { u32 tmp; @@ -922,7 +932,6 @@ static void ufs_mtk_init_clocks(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); struct list_head *head = &hba->clk_list_head; - struct ufs_mtk_clk *mclk = &host->mclk; struct ufs_clk_info *clki, *clki_tmp; /* @@ -944,8 +953,7 @@ static void ufs_mtk_init_clocks(struct ufs_hba *hba) } } - if (!mclk->ufs_sel_clki || !mclk->ufs_sel_max_clki || - !mclk->ufs_sel_min_clki) { + if (!ufs_mtk_is_clk_scale_ready(hba)) { hba->caps &= ~UFSHCD_CAP_CLK_SCALING; dev_info(hba->dev, "%s: Clk-scaling not ready. Feature disabled.", From 31a20e9f7c766896fbfea45897969bfd1490b466 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 22 Jul 2025 11:07:23 +0800 Subject: [PATCH 1037/2411] scsi: ufs: host: mediatek: Support clock scaling with Vcore binding Add support for clock scaling with Vcore binding: 1. Parse the DTS setting for Vcore voltage. 2. Set the Vcore voltage to the DTS-specified value before scaling up. 3. Reset the Vcore voltage to the default setting after scaling down. These changes ensure that the Vcore voltage is appropriately managed during clock scaling operations to maintain system stability and performance. Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20250722030841.1998783-9-peter.wang@mediatek.com Reviewed-by: Chun-Hung Wu Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 129 +++++++++++++++++++++++++++----- drivers/ufs/host/ufs-mediatek.h | 3 + 2 files changed, 112 insertions(+), 20 deletions(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 0b3cce8d9787..a0c53d796a60 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -933,6 +933,9 @@ static void ufs_mtk_init_clocks(struct ufs_hba *hba) struct ufs_mtk_host *host = ufshcd_get_variant(hba); struct list_head *head = &hba->clk_list_head; struct ufs_clk_info *clki, *clki_tmp; + struct device *dev = hba->dev; + struct regulator *reg; + u32 volt; /* * Find private clocks and store them in struct ufs_mtk_clk. @@ -958,6 +961,35 @@ static void ufs_mtk_init_clocks(struct ufs_hba *hba) dev_info(hba->dev, "%s: Clk-scaling not ready. Feature disabled.", __func__); + return; + } + + /* + * Default get vcore if dts have these settings. + * No matter clock scaling support or not. (may disable by customer) + */ + reg = devm_regulator_get_optional(dev, "dvfsrc-vcore"); + if (IS_ERR(reg)) { + dev_info(dev, "failed to get dvfsrc-vcore: %ld", + PTR_ERR(reg)); + return; + } + + if (of_property_read_u32(dev->of_node, "clk-scale-up-vcore-min", + &volt)) { + dev_info(dev, "failed to get clk-scale-up-vcore-min"); + return; + } + + host->mclk.reg_vcore = reg; + host->mclk.vcore_volt = volt; + + /* If default boot is max gear, request vcore */ + if (reg && volt && host->clk_scale_up) { + if (regulator_set_voltage(reg, volt, INT_MAX)) { + dev_info(hba->dev, + "Failed to set vcore to %d\n", volt); + } } } @@ -1126,6 +1158,7 @@ static int ufs_mtk_init(struct ufs_hba *hba) /* Enable clk scaling*/ hba->caps |= UFSHCD_CAP_CLK_SCALING; + host->clk_scale_up = true; /* default is max freq */ /* Set runtime pm delay to replace default */ shost->rpm_autosuspend_delay = MTK_RPM_AUTOSUSPEND_DELAY_MS; @@ -1720,6 +1753,69 @@ static void ufs_mtk_config_scaling_param(struct ufs_hba *hba, hba->vps->ondemand_data.downdifferential = 20; } +static void _ufs_mtk_clk_scale(struct ufs_hba *hba, bool scale_up) +{ + struct ufs_mtk_host *host = ufshcd_get_variant(hba); + struct ufs_mtk_clk *mclk = &host->mclk; + struct ufs_clk_info *clki = mclk->ufs_sel_clki; + struct regulator *reg; + int volt, ret = 0; + bool clk_bind_vcore = false; + + if (!hba->clk_scaling.is_initialized) + return; + + if (!clki) + return; + + reg = host->mclk.reg_vcore; + volt = host->mclk.vcore_volt; + if (reg && volt != 0) + clk_bind_vcore = true; + + ret = clk_prepare_enable(clki->clk); + if (ret) { + dev_info(hba->dev, + "clk_prepare_enable() fail, ret: %d\n", ret); + return; + } + + if (scale_up) { + if (clk_bind_vcore) { + ret = regulator_set_voltage(reg, volt, INT_MAX); + if (ret) { + dev_info(hba->dev, + "Failed to set vcore to %d\n", volt); + goto out; + } + } + + ret = clk_set_parent(clki->clk, mclk->ufs_sel_max_clki->clk); + if (ret) { + dev_info(hba->dev, "Failed to set clk mux, ret = %d\n", + ret); + } + } else { + ret = clk_set_parent(clki->clk, mclk->ufs_sel_min_clki->clk); + if (ret) { + dev_info(hba->dev, "Failed to set clk mux, ret = %d\n", + ret); + goto out; + } + + if (clk_bind_vcore) { + ret = regulator_set_voltage(reg, 0, INT_MAX); + if (ret) { + dev_info(hba->dev, + "failed to set vcore to MIN\n"); + } + } + } + +out: + clk_disable_unprepare(clki->clk); +} + /** * ufs_mtk_clk_scale - Internal clk scaling operation * @@ -1737,30 +1833,23 @@ static void ufs_mtk_clk_scale(struct ufs_hba *hba, bool scale_up) struct ufs_mtk_host *host = ufshcd_get_variant(hba); struct ufs_mtk_clk *mclk = &host->mclk; struct ufs_clk_info *clki = mclk->ufs_sel_clki; - int ret = 0; - ret = clk_prepare_enable(clki->clk); - if (ret) { - dev_info(hba->dev, - "clk_prepare_enable() fail, ret: %d\n", ret); - return; - } + if (host->clk_scale_up == scale_up) + goto out; - if (scale_up) { - ret = clk_set_parent(clki->clk, mclk->ufs_sel_max_clki->clk); + if (scale_up) + _ufs_mtk_clk_scale(hba, true); + else + _ufs_mtk_clk_scale(hba, false); + + host->clk_scale_up = scale_up; + + /* Must always set before clk_set_rate() */ + if (scale_up) clki->curr_freq = clki->max_freq; - } else { - ret = clk_set_parent(clki->clk, mclk->ufs_sel_min_clki->clk); + else clki->curr_freq = clki->min_freq; - } - - if (ret) { - dev_info(hba->dev, - "Failed to set ufs_sel_clki, ret: %d\n", ret); - } - - clk_disable_unprepare(clki->clk); - +out: trace_ufs_mtk_clk_scale(clki->name, scale_up, clk_get_rate(clki->clk)); } diff --git a/drivers/ufs/host/ufs-mediatek.h b/drivers/ufs/host/ufs-mediatek.h index fd229514384e..0b25ce5aa836 100644 --- a/drivers/ufs/host/ufs-mediatek.h +++ b/drivers/ufs/host/ufs-mediatek.h @@ -149,6 +149,8 @@ struct ufs_mtk_clk { struct ufs_clk_info *ufs_sel_clki; /* Mux */ struct ufs_clk_info *ufs_sel_max_clki; /* Max src */ struct ufs_clk_info *ufs_sel_min_clki; /* Min src */ + struct regulator *reg_vcore; + int vcore_volt; }; struct ufs_mtk_hw_ver { @@ -178,6 +180,7 @@ struct ufs_mtk_host { bool mphy_powered_on; bool unipro_lpm; bool ref_clk_enabled; + bool clk_scale_up; u16 ref_clk_ungating_wait_us; u16 ref_clk_gating_wait_us; u32 ip_ver; From 5e5976f5242de61b9c09c32795b3d7b90364af51 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 22 Jul 2025 11:07:24 +0800 Subject: [PATCH 1038/2411] scsi: ufs: host: mediatek: Support FDE (AES) clock scaling Add support for scaling the FDE (AES) clock to achieve higher performance, particularly for HS-G5: 1. Parse DTS settings for FDE min/max mux. 2. Scale up the FDE clock when required for enhanced performance. These changes ensure that the FDE clock can be dynamically adjusted based on performance needs, leveraging DTS configurations. Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20250722030841.1998783-10-peter.wang@mediatek.com Reviewed-by: Chun-Hung Wu Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 54 ++++++++++++++++++++++++++++++++- drivers/ufs/host/ufs-mediatek.h | 3 ++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index a0c53d796a60..91a2f3428b9f 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -953,9 +953,23 @@ static void ufs_mtk_init_clocks(struct ufs_hba *hba) host->mclk.ufs_sel_min_clki = clki; clk_disable_unprepare(clki->clk); list_del(&clki->list); + } else if (!strcmp(clki->name, "ufs_fde")) { + host->mclk.ufs_fde_clki = clki; + } else if (!strcmp(clki->name, "ufs_fde_max_src")) { + host->mclk.ufs_fde_max_clki = clki; + clk_disable_unprepare(clki->clk); + list_del(&clki->list); + } else if (!strcmp(clki->name, "ufs_fde_min_src")) { + host->mclk.ufs_fde_min_clki = clki; + clk_disable_unprepare(clki->clk); + list_del(&clki->list); } } + list_for_each_entry(clki, head, list) { + dev_info(hba->dev, "clk \"%s\" present", clki->name); + } + if (!ufs_mtk_is_clk_scale_ready(hba)) { hba->caps &= ~UFSHCD_CAP_CLK_SCALING; dev_info(hba->dev, @@ -1758,14 +1772,16 @@ static void _ufs_mtk_clk_scale(struct ufs_hba *hba, bool scale_up) struct ufs_mtk_host *host = ufshcd_get_variant(hba); struct ufs_mtk_clk *mclk = &host->mclk; struct ufs_clk_info *clki = mclk->ufs_sel_clki; + struct ufs_clk_info *fde_clki = mclk->ufs_fde_clki; struct regulator *reg; int volt, ret = 0; bool clk_bind_vcore = false; + bool clk_fde_scale = false; if (!hba->clk_scaling.is_initialized) return; - if (!clki) + if (!clki || !fde_clki) return; reg = host->mclk.reg_vcore; @@ -1773,6 +1789,9 @@ static void _ufs_mtk_clk_scale(struct ufs_hba *hba, bool scale_up) if (reg && volt != 0) clk_bind_vcore = true; + if (mclk->ufs_fde_max_clki && mclk->ufs_fde_min_clki) + clk_fde_scale = true; + ret = clk_prepare_enable(clki->clk); if (ret) { dev_info(hba->dev, @@ -1780,6 +1799,15 @@ static void _ufs_mtk_clk_scale(struct ufs_hba *hba, bool scale_up) return; } + if (clk_fde_scale) { + ret = clk_prepare_enable(fde_clki->clk); + if (ret) { + dev_info(hba->dev, + "fde clk_prepare_enable() fail, ret: %d\n", ret); + return; + } + } + if (scale_up) { if (clk_bind_vcore) { ret = regulator_set_voltage(reg, volt, INT_MAX); @@ -1795,7 +1823,28 @@ static void _ufs_mtk_clk_scale(struct ufs_hba *hba, bool scale_up) dev_info(hba->dev, "Failed to set clk mux, ret = %d\n", ret); } + + if (clk_fde_scale) { + ret = clk_set_parent(fde_clki->clk, + mclk->ufs_fde_max_clki->clk); + if (ret) { + dev_info(hba->dev, + "Failed to set fde clk mux, ret = %d\n", + ret); + } + } } else { + if (clk_fde_scale) { + ret = clk_set_parent(fde_clki->clk, + mclk->ufs_fde_min_clki->clk); + if (ret) { + dev_info(hba->dev, + "Failed to set fde clk mux, ret = %d\n", + ret); + goto out; + } + } + ret = clk_set_parent(clki->clk, mclk->ufs_sel_min_clki->clk); if (ret) { dev_info(hba->dev, "Failed to set clk mux, ret = %d\n", @@ -1814,6 +1863,9 @@ static void _ufs_mtk_clk_scale(struct ufs_hba *hba, bool scale_up) out: clk_disable_unprepare(clki->clk); + + if (clk_fde_scale) + clk_disable_unprepare(fde_clki->clk); } /** diff --git a/drivers/ufs/host/ufs-mediatek.h b/drivers/ufs/host/ufs-mediatek.h index 0b25ce5aa836..e46dc5fa209d 100644 --- a/drivers/ufs/host/ufs-mediatek.h +++ b/drivers/ufs/host/ufs-mediatek.h @@ -149,6 +149,9 @@ struct ufs_mtk_clk { struct ufs_clk_info *ufs_sel_clki; /* Mux */ struct ufs_clk_info *ufs_sel_max_clki; /* Max src */ struct ufs_clk_info *ufs_sel_min_clki; /* Min src */ + struct ufs_clk_info *ufs_fde_clki; /* Mux */ + struct ufs_clk_info *ufs_fde_max_clki; /* Max src */ + struct ufs_clk_info *ufs_fde_min_clki; /* Min src */ struct regulator *reg_vcore; int vcore_volt; }; From 4428ddea832cfdb63e476eb2e5c8feb5d36057fe Mon Sep 17 00:00:00 2001 From: Archana Patni Date: Wed, 23 Jul 2025 19:58:49 +0300 Subject: [PATCH 1039/2411] scsi: ufs: ufs-pci: Fix hibernate state transition for Intel MTL-like host controllers UFSHCD core disables the UIC completion interrupt when issuing UIC hibernation commands, and re-enables it afterwards if it was enabled to start with, refer ufshcd_uic_pwr_ctrl(). For Intel MTL-like host controllers, accessing the register to re-enable the interrupt disrupts the state transition. Use hibern8_notify variant operation to disable the interrupt during the entire hibernation, thereby preventing the disruption. Fixes: 4049f7acef3e ("scsi: ufs: ufs-pci: Add support for Intel MTL") Cc: stable@vger.kernel.org Signed-off-by: Archana Patni Link: https://lore.kernel.org/r/20250723165856.145750-2-adrian.hunter@intel.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufshcd-pci.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c index 996387906aa1..af1c272eef1c 100644 --- a/drivers/ufs/host/ufshcd-pci.c +++ b/drivers/ufs/host/ufshcd-pci.c @@ -216,6 +216,32 @@ static int ufs_intel_lkf_apply_dev_quirks(struct ufs_hba *hba) return ret; } +static void ufs_intel_ctrl_uic_compl(struct ufs_hba *hba, bool enable) +{ + u32 set = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); + + if (enable) + set |= UIC_COMMAND_COMPL; + else + set &= ~UIC_COMMAND_COMPL; + ufshcd_writel(hba, set, REG_INTERRUPT_ENABLE); +} + +static void ufs_intel_mtl_h8_notify(struct ufs_hba *hba, + enum uic_cmd_dme cmd, + enum ufs_notify_change_status status) +{ + /* + * Disable UIC COMPL INTR to prevent access to UFSHCI after + * checking HCS.UPMCRS + */ + if (status == PRE_CHANGE && cmd == UIC_CMD_DME_HIBER_ENTER) + ufs_intel_ctrl_uic_compl(hba, false); + + if (status == POST_CHANGE && cmd == UIC_CMD_DME_HIBER_EXIT) + ufs_intel_ctrl_uic_compl(hba, true); +} + #define INTEL_ACTIVELTR 0x804 #define INTEL_IDLELTR 0x808 @@ -533,6 +559,7 @@ static struct ufs_hba_variant_ops ufs_intel_mtl_hba_vops = { .init = ufs_intel_mtl_init, .exit = ufs_intel_common_exit, .hce_enable_notify = ufs_intel_hce_enable_notify, + .hibern8_notify = ufs_intel_mtl_h8_notify, .link_startup_notify = ufs_intel_link_startup_notify, .resume = ufs_intel_resume, .device_reset = ufs_intel_device_reset, From 6de7435e6b81fe52c0ab4c7e181f6b5decd18eb1 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 23 Jul 2025 19:58:50 +0300 Subject: [PATCH 1040/2411] scsi: ufs: ufs-pci: Fix default runtime and system PM levels Intel MTL-like host controllers support auto-hibernate. Using auto-hibernate with manual (driver initiated) hibernate produces more complex operation. For example, the host controller will have to exit auto-hibernate simply to allow the driver to enter hibernate state manually. That is not recommended. The default rpm_lvl and spm_lvl is 3, which includes manual hibernate. Change the default values to 2, which does not. Note, to be simpler to backport to stable kernels, utilize the UFS PCI driver's ->late_init() call back. Recent commits have made it possible to set up a controller-specific default in the regular ->init() call back, but not all stable kernels have those changes. Fixes: 4049f7acef3e ("scsi: ufs: ufs-pci: Add support for Intel MTL") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20250723165856.145750-3-adrian.hunter@intel.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufshcd-pci.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c index af1c272eef1c..8aff32d7057d 100644 --- a/drivers/ufs/host/ufshcd-pci.c +++ b/drivers/ufs/host/ufshcd-pci.c @@ -468,10 +468,23 @@ static int ufs_intel_adl_init(struct ufs_hba *hba) return ufs_intel_common_init(hba); } +static void ufs_intel_mtl_late_init(struct ufs_hba *hba) +{ + hba->rpm_lvl = UFS_PM_LVL_2; + hba->spm_lvl = UFS_PM_LVL_2; +} + static int ufs_intel_mtl_init(struct ufs_hba *hba) { + struct ufs_host *ufs_host; + int err; + hba->caps |= UFSHCD_CAP_CRYPTO | UFSHCD_CAP_WB_EN; - return ufs_intel_common_init(hba); + err = ufs_intel_common_init(hba); + /* Get variant after it is set in ufs_intel_common_init() */ + ufs_host = ufshcd_get_variant(hba); + ufs_host->late_init = ufs_intel_mtl_late_init; + return err; } static int ufs_qemu_get_hba_mac(struct ufs_hba *hba) From 28a60bbbe739c5c895d2d36d23c93045667b4566 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 23 Jul 2025 19:58:51 +0300 Subject: [PATCH 1041/2411] scsi: ufs: ufs-pci: Remove UFS PCI driver's ->late_init() call back ->late_init() was introduced to allow the default values for rpm_lvl and spm_lvl to be set. Since commit bb9850704c04 ("scsi: ufs: core: Honor runtime/system PM levels if set by host controller drivers") and commit fe06b7c07f3f ("scsi: ufs: core: Set default runtime/system PM levels before ufshcd_hba_init()"), those default values can be set in the ->init() variant call back. Move the setting of default values for rpm_lvl and spm_lvl to ->init() and remove ->late_init(). Reviewed-by: Bart Van Assche Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20250723165856.145750-4-adrian.hunter@intel.com Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufshcd-pci.c | 46 +++++++---------------------------- 1 file changed, 9 insertions(+), 37 deletions(-) diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c index 8aff32d7057d..b29ec1904482 100644 --- a/drivers/ufs/host/ufshcd-pci.c +++ b/drivers/ufs/host/ufshcd-pci.c @@ -22,17 +22,12 @@ #define MAX_SUPP_MAC 64 -struct ufs_host { - void (*late_init)(struct ufs_hba *hba); -}; - enum intel_ufs_dsm_func_id { INTEL_DSM_FNS = 0, INTEL_DSM_RESET = 1, }; struct intel_host { - struct ufs_host ufs_host; u32 dsm_fns; u32 active_ltr; u32 idle_ltr; @@ -434,8 +429,14 @@ static int ufs_intel_ehl_init(struct ufs_hba *hba) return ufs_intel_common_init(hba); } -static void ufs_intel_lkf_late_init(struct ufs_hba *hba) +static int ufs_intel_lkf_init(struct ufs_hba *hba) { + int err; + + hba->nop_out_timeout = 200; + hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8; + hba->caps |= UFSHCD_CAP_CRYPTO; + err = ufs_intel_common_init(hba); /* LKF always needs a full reset, so set PM accordingly */ if (hba->caps & UFSHCD_CAP_DEEPSLEEP) { hba->spm_lvl = UFS_PM_LVL_6; @@ -444,19 +445,6 @@ static void ufs_intel_lkf_late_init(struct ufs_hba *hba) hba->spm_lvl = UFS_PM_LVL_5; hba->rpm_lvl = UFS_PM_LVL_5; } -} - -static int ufs_intel_lkf_init(struct ufs_hba *hba) -{ - struct ufs_host *ufs_host; - int err; - - hba->nop_out_timeout = 200; - hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8; - hba->caps |= UFSHCD_CAP_CRYPTO; - err = ufs_intel_common_init(hba); - ufs_host = ufshcd_get_variant(hba); - ufs_host->late_init = ufs_intel_lkf_late_init; return err; } @@ -468,23 +456,12 @@ static int ufs_intel_adl_init(struct ufs_hba *hba) return ufs_intel_common_init(hba); } -static void ufs_intel_mtl_late_init(struct ufs_hba *hba) +static int ufs_intel_mtl_init(struct ufs_hba *hba) { hba->rpm_lvl = UFS_PM_LVL_2; hba->spm_lvl = UFS_PM_LVL_2; -} - -static int ufs_intel_mtl_init(struct ufs_hba *hba) -{ - struct ufs_host *ufs_host; - int err; - hba->caps |= UFSHCD_CAP_CRYPTO | UFSHCD_CAP_WB_EN; - err = ufs_intel_common_init(hba); - /* Get variant after it is set in ufs_intel_common_init() */ - ufs_host = ufshcd_get_variant(hba); - ufs_host->late_init = ufs_intel_mtl_late_init; - return err; + return ufs_intel_common_init(hba); } static int ufs_qemu_get_hba_mac(struct ufs_hba *hba) @@ -614,7 +591,6 @@ static void ufshcd_pci_remove(struct pci_dev *pdev) static int ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { - struct ufs_host *ufs_host; struct ufs_hba *hba; void __iomem *mmio_base; int err; @@ -647,10 +623,6 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return err; } - ufs_host = ufshcd_get_variant(hba); - if (ufs_host && ufs_host->late_init) - ufs_host->late_init(hba); - pm_runtime_put_noidle(&pdev->dev); pm_runtime_allow(&pdev->dev); From 497027eade8c02afdb6c5d21a193ef5cf4a26d0f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 23 Jul 2025 19:58:52 +0300 Subject: [PATCH 1042/2411] scsi: ufs: core: Move ufshcd_enable_intr() and ufshcd_disable_intr() Move ufshcd_enable_intr() and ufshcd_disable_intr() so they can be called in subsequent patches without forward declarations. No functional change. Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20250723165856.145750-5-adrian.hunter@intel.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 52 +++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 4410e7d93b7d..b202626b75d4 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -364,6 +364,32 @@ void ufshcd_disable_irq(struct ufs_hba *hba) } EXPORT_SYMBOL_GPL(ufshcd_disable_irq); +/** + * ufshcd_enable_intr - enable interrupts + * @hba: per adapter instance + * @intrs: interrupt bits + */ +static void ufshcd_enable_intr(struct ufs_hba *hba, u32 intrs) +{ + u32 set = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); + + set |= intrs; + ufshcd_writel(hba, set, REG_INTERRUPT_ENABLE); +} + +/** + * ufshcd_disable_intr - disable interrupts + * @hba: per adapter instance + * @intrs: interrupt bits + */ +static void ufshcd_disable_intr(struct ufs_hba *hba, u32 intrs) +{ + u32 set = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); + + set &= ~intrs; + ufshcd_writel(hba, set, REG_INTERRUPT_ENABLE); +} + static void ufshcd_configure_wb(struct ufs_hba *hba) { if (!ufshcd_is_wb_allowed(hba)) @@ -2681,32 +2707,6 @@ static int ufshcd_map_sg(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) return ufshcd_crypto_fill_prdt(hba, lrbp); } -/** - * ufshcd_enable_intr - enable interrupts - * @hba: per adapter instance - * @intrs: interrupt bits - */ -static void ufshcd_enable_intr(struct ufs_hba *hba, u32 intrs) -{ - u32 set = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); - - set |= intrs; - ufshcd_writel(hba, set, REG_INTERRUPT_ENABLE); -} - -/** - * ufshcd_disable_intr - disable interrupts - * @hba: per adapter instance - * @intrs: interrupt bits - */ -static void ufshcd_disable_intr(struct ufs_hba *hba, u32 intrs) -{ - u32 set = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); - - set &= ~intrs; - ufshcd_writel(hba, set, REG_INTERRUPT_ENABLE); -} - /** * ufshcd_prepare_req_desc_hdr - Fill UTP Transfer request descriptor header according to request * descriptor according to request From c5977c4c0731b60c8c0b3f7cc4b0082a688a07f8 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 23 Jul 2025 19:58:53 +0300 Subject: [PATCH 1043/2411] scsi: ufs: core: Remove duplicated code in ufshcd_send_bsg_uic_cmd() Make ufshcd_send_bsg_uic_cmd() call ufshcd_send_uic_cmd() instead of duplicating its code. Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20250723165856.145750-6-adrian.hunter@intel.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index b202626b75d4..6beb169016fd 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -4362,28 +4362,17 @@ int ufshcd_send_bsg_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) { int ret; + if (uic_cmd->argument1 != UIC_ARG_MIB(PA_PWRMODE) || + uic_cmd->command != UIC_CMD_DME_SET) + return ufshcd_send_uic_cmd(hba, uic_cmd); + if (hba->quirks & UFSHCD_QUIRK_BROKEN_UIC_CMD) return 0; ufshcd_hold(hba); - - if (uic_cmd->argument1 == UIC_ARG_MIB(PA_PWRMODE) && - uic_cmd->command == UIC_CMD_DME_SET) { - ret = ufshcd_uic_pwr_ctrl(hba, uic_cmd); - goto out; - } - - mutex_lock(&hba->uic_cmd_mutex); - ufshcd_add_delay_before_dme_cmd(hba); - - ret = __ufshcd_send_uic_cmd(hba, uic_cmd); - if (!ret) - ret = ufshcd_wait_for_uic_cmd(hba, uic_cmd); - - mutex_unlock(&hba->uic_cmd_mutex); - -out: + ret = ufshcd_uic_pwr_ctrl(hba, uic_cmd); ufshcd_release(hba); + return ret; } From b4c0cab4eb8d79cf426ac7bca20864881c8b9b8b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 23 Jul 2025 19:58:54 +0300 Subject: [PATCH 1044/2411] scsi: ufs: core: Set and clear UIC Completion interrupt as needed Currently the UIC Completion interrupt is left enabled except for when issuing link hibernate commands, in which case the interrupt is disabled and then re-enabled. Instead, set and clear the interrupt enable bit as needed. That is slightly simpler and less error prone, but also avoids side effects of accessing the interrupt enable register after entering link hibernation. Specifically, for some host controllers like Intel MTL, doing so disrupts the link state transition. Note also, the interrupt register is not read back anymore after it is updated. No other code does that, so it is assumed to be no longer necessary if it ever was. Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20250723165856.145750-7-adrian.hunter@intel.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 6beb169016fd..54082af7f65e 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -2622,6 +2622,7 @@ __ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) */ int ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) { + unsigned long flags; int ret; if (hba->quirks & UFSHCD_QUIRK_BROKEN_UIC_CMD) @@ -2631,6 +2632,10 @@ int ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) mutex_lock(&hba->uic_cmd_mutex); ufshcd_add_delay_before_dme_cmd(hba); + spin_lock_irqsave(hba->host->host_lock, flags); + ufshcd_enable_intr(hba, UIC_COMMAND_COMPL); + spin_unlock_irqrestore(hba->host->host_lock, flags); + ret = __ufshcd_send_uic_cmd(hba, uic_cmd); if (!ret) ret = ufshcd_wait_for_uic_cmd(hba, uic_cmd); @@ -4275,7 +4280,6 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) unsigned long flags; u8 status; int ret; - bool reenable_intr = false; mutex_lock(&hba->uic_cmd_mutex); ufshcd_add_delay_before_dme_cmd(hba); @@ -4286,15 +4290,7 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) goto out_unlock; } hba->uic_async_done = &uic_async_done; - if (ufshcd_readl(hba, REG_INTERRUPT_ENABLE) & UIC_COMMAND_COMPL) { - ufshcd_disable_intr(hba, UIC_COMMAND_COMPL); - /* - * Make sure UIC command completion interrupt is disabled before - * issuing UIC command. - */ - ufshcd_readl(hba, REG_INTERRUPT_ENABLE); - reenable_intr = true; - } + ufshcd_disable_intr(hba, UIC_COMMAND_COMPL); spin_unlock_irqrestore(hba->host->host_lock, flags); ret = __ufshcd_send_uic_cmd(hba, cmd); if (ret) { @@ -4338,8 +4334,6 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) spin_lock_irqsave(hba->host->host_lock, flags); hba->active_uic_cmd = NULL; hba->uic_async_done = NULL; - if (reenable_intr) - ufshcd_enable_intr(hba, UIC_COMMAND_COMPL); if (ret) { ufshcd_set_link_broken(hba); ufshcd_schedule_eh_work(hba); From d402b20f9c31e477f3cf3512be22c7943dbb0ee4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 23 Jul 2025 19:58:55 +0300 Subject: [PATCH 1045/2411] scsi: ufs: core: Do not write interrupt enable register unnecessarily Write a new value to the interrupt enable register only if it is different from the old value, thereby saving a register write operation. Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20250723165856.145750-8-adrian.hunter@intel.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 54082af7f65e..fa1fdba37267 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -371,10 +371,11 @@ EXPORT_SYMBOL_GPL(ufshcd_disable_irq); */ static void ufshcd_enable_intr(struct ufs_hba *hba, u32 intrs) { - u32 set = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); + u32 old_val = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); + u32 new_val = old_val | intrs; - set |= intrs; - ufshcd_writel(hba, set, REG_INTERRUPT_ENABLE); + if (new_val != old_val) + ufshcd_writel(hba, new_val, REG_INTERRUPT_ENABLE); } /** @@ -384,10 +385,11 @@ static void ufshcd_enable_intr(struct ufs_hba *hba, u32 intrs) */ static void ufshcd_disable_intr(struct ufs_hba *hba, u32 intrs) { - u32 set = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); + u32 old_val = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); + u32 new_val = old_val & ~intrs; - set &= ~intrs; - ufshcd_writel(hba, set, REG_INTERRUPT_ENABLE); + if (new_val != old_val) + ufshcd_writel(hba, new_val, REG_INTERRUPT_ENABLE); } static void ufshcd_configure_wb(struct ufs_hba *hba) From 22b246e3fc5eb450fffad1eb322e08e3af0e6e3d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 23 Jul 2025 19:58:56 +0300 Subject: [PATCH 1046/2411] scsi: ufs: ufs-pci: Remove control of UIC Completion interrupt for Intel MTL Now that UFS core enables the UIC Completion interrupt only when needed, Intel MTL driver no longer needs to control the interrupt itself. So remove the associated code. Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20250723165856.145750-9-adrian.hunter@intel.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufshcd-pci.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c index b29ec1904482..b39239f641f2 100644 --- a/drivers/ufs/host/ufshcd-pci.c +++ b/drivers/ufs/host/ufshcd-pci.c @@ -211,32 +211,6 @@ static int ufs_intel_lkf_apply_dev_quirks(struct ufs_hba *hba) return ret; } -static void ufs_intel_ctrl_uic_compl(struct ufs_hba *hba, bool enable) -{ - u32 set = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); - - if (enable) - set |= UIC_COMMAND_COMPL; - else - set &= ~UIC_COMMAND_COMPL; - ufshcd_writel(hba, set, REG_INTERRUPT_ENABLE); -} - -static void ufs_intel_mtl_h8_notify(struct ufs_hba *hba, - enum uic_cmd_dme cmd, - enum ufs_notify_change_status status) -{ - /* - * Disable UIC COMPL INTR to prevent access to UFSHCI after - * checking HCS.UPMCRS - */ - if (status == PRE_CHANGE && cmd == UIC_CMD_DME_HIBER_ENTER) - ufs_intel_ctrl_uic_compl(hba, false); - - if (status == POST_CHANGE && cmd == UIC_CMD_DME_HIBER_EXIT) - ufs_intel_ctrl_uic_compl(hba, true); -} - #define INTEL_ACTIVELTR 0x804 #define INTEL_IDLELTR 0x808 @@ -549,7 +523,6 @@ static struct ufs_hba_variant_ops ufs_intel_mtl_hba_vops = { .init = ufs_intel_mtl_init, .exit = ufs_intel_common_exit, .hce_enable_notify = ufs_intel_hce_enable_notify, - .hibern8_notify = ufs_intel_mtl_h8_notify, .link_startup_notify = ufs_intel_link_startup_notify, .resume = ufs_intel_resume, .device_reset = ufs_intel_device_reset, From 6f1fd3e0279f0b06cd8d53133a25bd83ac0fcb8a Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Tue, 22 Jul 2025 16:57:17 +0800 Subject: [PATCH 1047/2411] scsi: ufs: ufs-mediatek: Add UFS host support for MT8195 SoC Add "mediatek,mt8195-ufshci" to the of_device_id table to enable support for MediaTek MT8195/MT8395 UFS host controller. This matches the device node entry in the MT8195/MT8395 device tree and allows proper driver binding. Signed-off-by: Macpaul Lin Link: https://lore.kernel.org/r/20250722085721.2062657-1-macpaul.lin@mediatek.com Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 91a2f3428b9f..86ae73b89d4d 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -50,6 +50,7 @@ static const struct ufs_dev_quirk ufs_mtk_dev_fixups[] = { static const struct of_device_id ufs_mtk_of_match[] = { { .compatible = "mediatek,mt8183-ufshci" }, + { .compatible = "mediatek,mt8195-ufshci" }, {}, }; MODULE_DEVICE_TABLE(of, ufs_mtk_of_match); From 794ff7a0a6e76af93c5ec09a49b86fe73373ca59 Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Tue, 22 Jul 2025 16:57:18 +0800 Subject: [PATCH 1048/2411] scsi: dt-bindings: mediatek,ufs: Add ufs-disable-mcq flag for UFS host Add the 'mediatek,ufs-disable-mcq' property to the UFS device-tree bindings. This flag corresponds to the UFS_MTK_CAP_DISABLE_MCQ host capability recently introduced in the UFS host driver, allowing it to disable the Multiple Circular Queue (MCQ) feature when present. The binding schema has also been updated to resolve DTBS check errors. Cc: stable@vger.kernel.org Fixes: 46bd3e31d74b ("scsi: ufs: mediatek: Add UFS_MTK_CAP_DISABLE_MCQ") Signed-off-by: Macpaul Lin Link: https://lore.kernel.org/r/20250722085721.2062657-2-macpaul.lin@mediatek.com Reviewed-by: Rob Herring (Arm) Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml b/Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml index 32fd535a514a..20f341d25ebc 100644 --- a/Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml +++ b/Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml @@ -33,6 +33,10 @@ properties: vcc-supply: true + mediatek,ufs-disable-mcq: + $ref: /schemas/types.yaml#/definitions/flag + description: The mask to disable MCQ (Multi-Circular Queue) for UFS host. + required: - compatible - clocks From d01cfeac89e956b74e17dc9b1c8e10c0d3b4e403 Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Tue, 22 Jul 2025 16:57:19 +0800 Subject: [PATCH 1049/2411] scsi: dt-bindings: mediatek,ufs: add MT8195 compatible and update clock nodes Add MT8195 UFSHCI compatible string. Relax the schema to allow between one to eight clocks/clock-names entries for all MediaTek UFS nodes. Legacy platforms may only need a few clocks, whereas newer devices such as the MT8195 require additional clock-gating domains. For MT8195 specifically, enforce exactly eight clocks and clock-names entries to satisfy its hardware requirements. Signed-off-by: Macpaul Lin Link: https://lore.kernel.org/r/20250722085721.2062657-3-macpaul.lin@mediatek.com Signed-off-by: Martin K. Petersen --- .../devicetree/bindings/ufs/mediatek,ufs.yaml | 42 ++++++++++++++++--- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml b/Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml index 20f341d25ebc..1dec54fb00f3 100644 --- a/Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml +++ b/Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml @@ -9,21 +9,20 @@ title: Mediatek Universal Flash Storage (UFS) Controller maintainers: - Stanley Chu -allOf: - - $ref: ufs-common.yaml - properties: compatible: enum: - mediatek,mt8183-ufshci - mediatek,mt8192-ufshci + - mediatek,mt8195-ufshci clocks: - maxItems: 1 + minItems: 1 + maxItems: 8 clock-names: - items: - - const: ufs + minItems: 1 + maxItems: 8 phys: maxItems: 1 @@ -47,6 +46,37 @@ required: unevaluatedProperties: false +allOf: + - $ref: ufs-common.yaml + + - if: + properties: + compatible: + contains: + enum: + - mediatek,mt8195-ufshci + then: + properties: + clocks: + minItems: 8 + clock-names: + items: + - const: ufs + - const: ufs_aes + - const: ufs_tick + - const: unipro_sysclk + - const: unipro_tick + - const: unipro_mp_bclk + - const: ufs_tx_symbol + - const: ufs_mem_sub + else: + properties: + clocks: + maxItems: 1 + clock-names: + items: + - const: ufs + examples: - | #include From a28f98103890403717008a3a016744721f87b03e Mon Sep 17 00:00:00 2001 From: Rice Lee Date: Tue, 22 Jul 2025 16:57:20 +0800 Subject: [PATCH 1050/2411] scsi: arm64: dts: mediatek: mt8195: Add UFSHCI node Add a UFS host controller interface (UFSHCI) node to mt8195.dtsi. Introduce the 'mediatek,ufs-disable-mcq' property to allow disabling Multiple Circular Queue (MCQ) support. Signed-off-by: Rice Lee Signed-off-by: Eric Lin Signed-off-by: Macpaul Lin Link: https://lore.kernel.org/r/20250722085721.2062657-4-macpaul.lin@mediatek.com Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 25 ++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index dd065b1bf94a..8877953ce292 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -1430,6 +1430,31 @@ mmc2: mmc@11250000 { status = "disabled"; }; + ufshci: ufshci@11270000 { + compatible = "mediatek,mt8195-ufshci"; + reg = <0 0x11270000 0 0x2300>; + interrupts = ; + phys = <&ufsphy>; + clocks = <&infracfg_ao CLK_INFRA_AO_AES_UFSFDE>, + <&infracfg_ao CLK_INFRA_AO_AES>, + <&infracfg_ao CLK_INFRA_AO_UFS_TICK>, + <&infracfg_ao CLK_INFRA_AO_UNIPRO_SYS>, + <&infracfg_ao CLK_INFRA_AO_UNIPRO_TICK>, + <&infracfg_ao CLK_INFRA_AO_UFS_MP_SAP_B>, + <&infracfg_ao CLK_INFRA_AO_UFS_TX_SYMBOL>, + <&infracfg_ao CLK_INFRA_AO_PERI_UFS_MEM_SUB>; + clock-names = "ufs", "ufs_aes", "ufs_tick", + "unipro_sysclk", "unipro_tick", + "unipro_mp_bclk", "ufs_tx_symbol", + "ufs_mem_sub"; + freq-table-hz = <0 0>, <0 0>, <0 0>, + <0 0>, <0 0>, <0 0>, + <0 0>, <0 0>; + + mediatek,ufs-disable-mcq; + status = "disabled"; + }; + lvts_mcu: thermal-sensor@11278000 { compatible = "mediatek,mt8195-lvts-mcu"; reg = <0 0x11278000 0 0x1000>; From e95122a32e777309412e30dc638dbc88b9036811 Mon Sep 17 00:00:00 2001 From: Weidong Wang Date: Fri, 25 Jul 2025 17:46:02 +0800 Subject: [PATCH 1051/2411] ASoC: codecs: Add acpi_match_table for aw88399 driver Add acpi_match_table to the aw88399 driver so that it can be used on more platforms. Signed-off-by: Weidong Wang Link: https://patch.msgid.link/20250725094602.10017-1-wangweidong.a@awinic.com Signed-off-by: Mark Brown --- sound/soc/codecs/aw88399.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/soc/codecs/aw88399.c b/sound/soc/codecs/aw88399.c index bad3ad6b8c0e..c23e70d64d0c 100644 --- a/sound/soc/codecs/aw88399.c +++ b/sound/soc/codecs/aw88399.c @@ -2330,9 +2330,18 @@ static const struct i2c_device_id aw88399_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, aw88399_i2c_id); +#ifdef CONFIG_ACPI +static const struct acpi_device_id aw88399_acpi_match[] = { + { "AWDZ8399", 0 }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, aw88399_acpi_match); +#endif + static struct i2c_driver aw88399_i2c_driver = { .driver = { .name = AW88399_I2C_NAME, + .acpi_match_table = ACPI_PTR(aw88399_acpi_match), }, .probe = aw88399_i2c_probe, .id_table = aw88399_i2c_id, From 8e48727c26c4d839ff9b4b73d1cae486bea7fe19 Mon Sep 17 00:00:00 2001 From: Salomon Dushimirimana Date: Thu, 24 Jul 2025 21:45:20 +0000 Subject: [PATCH 1052/2411] scsi: sd: Make sd shutdown issue START STOP UNIT appropriately Commit aa3998dbeb3a ("ata: libata-scsi: Disable scsi device manage_system_start_stop") enabled libata EH to manage device power mode trasitions for system suspend/resume and removed the flag from ata_scsi_dev_config. However, since the sd_shutdown() function still relies on the manage_system_start_stop flag, a spin-down command is not issued to the disk with command "echo 1 > /sys/block/sdb/device/delete" sd_shutdown() can be called for both system/runtime start stop operations, so utilize the manage_run_time_start_stop flag set in the ata_scsi_dev_config and issue a spin-down command during disk removal when the system is running. This is in addition to when the system is powering off and manage_shutdown flag is set. The manage_system_start_stop flag will still be used for drivers that still set the flag. Fixes: aa3998dbeb3a ("ata: libata-scsi: Disable scsi device manage_system_start_stop") Signed-off-by: Salomon Dushimirimana Link: https://lore.kernel.org/r/20250724214520.112927-1-salomondush@google.com Tested-by: Damien Le Moal Reviewed-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index daddef2e9e87..9e9b905b2881 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -4168,7 +4168,9 @@ static void sd_shutdown(struct device *dev) if ((system_state != SYSTEM_RESTART && sdkp->device->manage_system_start_stop) || (system_state == SYSTEM_POWER_OFF && - sdkp->device->manage_shutdown)) { + sdkp->device->manage_shutdown) || + (system_state == SYSTEM_RUNNING && + sdkp->device->manage_runtime_start_stop)) { sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); sd_start_stop_device(sdkp, 0); } From 54091eee08acebfb5e971611c3f189e7577a1058 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 25 Jul 2025 10:58:14 +0900 Subject: [PATCH 1053/2411] scsi: libsas: Refactor dev_is_sata() Use a switch statement in dev_is_sata() to make the code more readable (and probably slightly better than a series of or conditions). Also have this inline function return a boolean instead of an integer. No functional changes. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20250725015818.171252-2-dlemoal@kernel.org Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- include/scsi/sas_ata.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h index 92e27e7bf088..8dddd0036f99 100644 --- a/include/scsi/sas_ata.h +++ b/include/scsi/sas_ata.h @@ -15,10 +15,17 @@ #ifdef CONFIG_SCSI_SAS_ATA -static inline int dev_is_sata(struct domain_device *dev) +static inline bool dev_is_sata(struct domain_device *dev) { - return dev->dev_type == SAS_SATA_DEV || dev->dev_type == SAS_SATA_PM || - dev->dev_type == SAS_SATA_PM_PORT || dev->dev_type == SAS_SATA_PENDING; + switch (dev->dev_type) { + case SAS_SATA_DEV: + case SAS_SATA_PENDING: + case SAS_SATA_PM: + case SAS_SATA_PM_PORT: + return true; + default: + return false; + } } int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy); @@ -49,9 +56,9 @@ static inline void sas_ata_disabled_notice(void) pr_notice_once("ATA device seen but CONFIG_SCSI_SAS_ATA=N\n"); } -static inline int dev_is_sata(struct domain_device *dev) +static inline bool dev_is_sata(struct domain_device *dev) { - return 0; + return false; } static inline int sas_ata_init(struct domain_device *dev) { From 0dd03570512a305bc44ac9c8326da95dd8fc3a1d Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 25 Jul 2025 10:58:15 +0900 Subject: [PATCH 1054/2411] scsi: libsas: Simplify sas_ata_wait_eh() Simplify the code of sas_ata_wait_eh(), removing the local variable ap for the pointer to the device ata_port structure. The test using dev_is_sata() is also removed as all call sites of this function check if the device is a SATA one before calling this function. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20250725015818.171252-3-dlemoal@kernel.org Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_ata.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 7b4e7a61965a..2cbf38b18c5c 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -927,13 +927,7 @@ EXPORT_SYMBOL_GPL(sas_ata_schedule_reset); void sas_ata_wait_eh(struct domain_device *dev) { - struct ata_port *ap; - - if (!dev_is_sata(dev)) - return; - - ap = dev->sata_dev.ap; - ata_port_wait_eh(ap); + ata_port_wait_eh(dev->sata_dev.ap); } void sas_ata_device_link_abort(struct domain_device *device, bool force_reset) From bd31394aabf36ee18781c6371e02d789484ffda3 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 25 Jul 2025 10:58:16 +0900 Subject: [PATCH 1055/2411] scsi: libsas: Make sas_get_ata_info() static The function sas_get_ata_info() is used only in drivers/scsi/libsas/sas_ata.c. Remove its definition from include/scsi/sas_ata.h and make this function static. No functional changes. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20250725015818.171252-4-dlemoal@kernel.org Reviewed-by: Johannes Thumshirn Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_ata.c | 2 +- include/scsi/sas_ata.h | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 2cbf38b18c5c..cc093cdc9c69 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -252,7 +252,7 @@ static int sas_get_ata_command_set(struct domain_device *dev) return ata_dev_classify(&tf); } -int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy) +static int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy) { if (phy->attached_tproto & SAS_PROTOCOL_STP) dev->tproto = phy->attached_tproto; diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h index 8dddd0036f99..5e3475975aee 100644 --- a/include/scsi/sas_ata.h +++ b/include/scsi/sas_ata.h @@ -28,7 +28,6 @@ static inline bool dev_is_sata(struct domain_device *dev) } } -int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy); int sas_ata_init(struct domain_device *dev); void sas_ata_task_abort(struct sas_task *task); void sas_ata_strategy_handler(struct Scsi_Host *shost); @@ -96,11 +95,6 @@ static inline void sas_resume_sata(struct asd_sas_port *port) { } -static inline int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy) -{ - return 0; -} - static inline void sas_ata_end_eh(struct ata_port *ap) { } From 704ed03abf6b1c2752a8b16446a5ebf18694fefe Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 25 Jul 2025 10:58:17 +0900 Subject: [PATCH 1056/2411] scsi: libsas: Move declarations of internal functions to sas_internal.h Move the declaration of all functions used only within libsas from include/scsi/sas_ata.h to drivers/scsi/libsas/sas_internal.h. No functional changes. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20250725015818.171252-5-dlemoal@kernel.org Reviewed-by: Johannes Thumshirn Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_internal.h | 74 ++++++++++++++++++++++++++++++ include/scsi/sas_ata.h | 68 +-------------------------- 2 files changed, 75 insertions(+), 67 deletions(-) diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index 03d6ec1eb970..16f8d81d7531 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -222,4 +222,78 @@ static inline void sas_put_device(struct domain_device *dev) kref_put(&dev->kref, sas_free_device); } +#ifdef CONFIG_SCSI_SAS_ATA + +int sas_ata_init(struct domain_device *dev); +void sas_ata_task_abort(struct sas_task *task); +int sas_discover_sata(struct domain_device *dev); +int sas_ata_add_dev(struct domain_device *parent, struct ex_phy *phy, + struct domain_device *child, int phy_id); +void sas_ata_strategy_handler(struct Scsi_Host *shost); +void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q); +void sas_ata_end_eh(struct ata_port *ap); +void sas_ata_wait_eh(struct domain_device *dev); +void sas_probe_sata(struct asd_sas_port *port); +void sas_suspend_sata(struct asd_sas_port *port); +void sas_resume_sata(struct asd_sas_port *port); + +#else + +static inline int sas_ata_init(struct domain_device *dev) +{ + return 0; +} + +static inline void sas_ata_task_abort(struct sas_task *task) +{ +} + +static inline void sas_ata_strategy_handler(struct Scsi_Host *shost) +{ +} + +static inline void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q) +{ +} + +static inline void sas_ata_end_eh(struct ata_port *ap) +{ +} + +static inline void sas_ata_wait_eh(struct domain_device *dev) +{ +} + +static inline void sas_probe_sata(struct asd_sas_port *port) +{ +} + +static inline void sas_suspend_sata(struct asd_sas_port *port) +{ +} + +static inline void sas_resume_sata(struct asd_sas_port *port) +{ +} + +static inline void sas_ata_disabled_notice(void) +{ + pr_notice_once("ATA device seen but CONFIG_SCSI_SAS_ATA=N\n"); +} + +static inline int sas_discover_sata(struct domain_device *dev) +{ + sas_ata_disabled_notice(); + return -ENXIO; +} + +static inline int sas_ata_add_dev(struct domain_device *parent, struct ex_phy *phy, + struct domain_device *child, int phy_id) +{ + sas_ata_disabled_notice(); + return -ENODEV; +} + +#endif + #endif /* _SAS_INTERNAL_H_ */ diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h index 5e3475975aee..a161c0222931 100644 --- a/include/scsi/sas_ata.h +++ b/include/scsi/sas_ata.h @@ -28,77 +28,24 @@ static inline bool dev_is_sata(struct domain_device *dev) } } -int sas_ata_init(struct domain_device *dev); -void sas_ata_task_abort(struct sas_task *task); -void sas_ata_strategy_handler(struct Scsi_Host *shost); -void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q); void sas_ata_schedule_reset(struct domain_device *dev); -void sas_ata_wait_eh(struct domain_device *dev); -void sas_probe_sata(struct asd_sas_port *port); -void sas_suspend_sata(struct asd_sas_port *port); -void sas_resume_sata(struct asd_sas_port *port); -void sas_ata_end_eh(struct ata_port *ap); void sas_ata_device_link_abort(struct domain_device *dev, bool force_reset); -int sas_execute_ata_cmd(struct domain_device *device, u8 *fis, - int force_phy_id); +int sas_execute_ata_cmd(struct domain_device *device, u8 *fis, int force_phy_id); int smp_ata_check_ready_type(struct ata_link *link); -int sas_discover_sata(struct domain_device *dev); -int sas_ata_add_dev(struct domain_device *parent, struct ex_phy *phy, - struct domain_device *child, int phy_id); extern const struct attribute_group sas_ata_sdev_attr_group; #else -static inline void sas_ata_disabled_notice(void) -{ - pr_notice_once("ATA device seen but CONFIG_SCSI_SAS_ATA=N\n"); -} - static inline bool dev_is_sata(struct domain_device *dev) { return false; } -static inline int sas_ata_init(struct domain_device *dev) -{ - return 0; -} -static inline void sas_ata_task_abort(struct sas_task *task) -{ -} - -static inline void sas_ata_strategy_handler(struct Scsi_Host *shost) -{ -} - -static inline void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q) -{ -} static inline void sas_ata_schedule_reset(struct domain_device *dev) { } -static inline void sas_ata_wait_eh(struct domain_device *dev) -{ -} - -static inline void sas_probe_sata(struct asd_sas_port *port) -{ -} - -static inline void sas_suspend_sata(struct asd_sas_port *port) -{ -} - -static inline void sas_resume_sata(struct asd_sas_port *port) -{ -} - -static inline void sas_ata_end_eh(struct ata_port *ap) -{ -} - static inline void sas_ata_device_link_abort(struct domain_device *dev, bool force_reset) { @@ -115,19 +62,6 @@ static inline int smp_ata_check_ready_type(struct ata_link *link) return 0; } -static inline int sas_discover_sata(struct domain_device *dev) -{ - sas_ata_disabled_notice(); - return -ENXIO; -} - -static inline int sas_ata_add_dev(struct domain_device *parent, struct ex_phy *phy, - struct domain_device *child, int phy_id) -{ - sas_ata_disabled_notice(); - return -ENODEV; -} - #define sas_ata_sdev_attr_group ((struct attribute_group) {}) #endif From 75fe230b9bed364d7ddca482ff29979d873718fa Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 25 Jul 2025 10:58:18 +0900 Subject: [PATCH 1057/2411] scsi: libsas: Use a bool for sas_deform_port() second argument Change the type of the "gone" argument of sas_deform_port() from int to bool. Simliarly, to be consistent, do the same change to the function sas_unregister_domain_devices(). No functional changes. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20250725015818.171252-6-dlemoal@kernel.org Reviewed-by: Johannes Thumshirn Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_discover.c | 2 +- drivers/scsi/libsas/sas_internal.h | 4 ++-- drivers/scsi/libsas/sas_phy.c | 6 +++--- drivers/scsi/libsas/sas_port.c | 13 ++++++------- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index 951bdc554a10..b07062db50b2 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -406,7 +406,7 @@ void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *dev) } } -void sas_unregister_domain_devices(struct asd_sas_port *port, int gone) +void sas_unregister_domain_devices(struct asd_sas_port *port, bool gone) { struct domain_device *dev, *n; diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index 16f8d81d7531..6706f2be8d27 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -44,7 +44,7 @@ void sas_hash_addr(u8 *hashed, const u8 *sas_addr); int sas_discover_root_expander(struct domain_device *dev); int sas_ex_revalidate_domain(struct domain_device *dev); -void sas_unregister_domain_devices(struct asd_sas_port *port, int gone); +void sas_unregister_domain_devices(struct asd_sas_port *port, bool gone); void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *port); void sas_discover_event(struct asd_sas_port *port, enum discover_event ev); @@ -70,7 +70,7 @@ void sas_enable_revalidation(struct sas_ha_struct *ha); void sas_queue_deferred_work(struct sas_ha_struct *ha); void __sas_drain_work(struct sas_ha_struct *ha); -void sas_deform_port(struct asd_sas_phy *phy, int gone); +void sas_deform_port(struct asd_sas_phy *phy, bool gone); void sas_porte_bytes_dmaed(struct work_struct *work); void sas_porte_broadcast_rcvd(struct work_struct *work); diff --git a/drivers/scsi/libsas/sas_phy.c b/drivers/scsi/libsas/sas_phy.c index 57494ac97076..635835c28ecd 100644 --- a/drivers/scsi/libsas/sas_phy.c +++ b/drivers/scsi/libsas/sas_phy.c @@ -20,7 +20,7 @@ static void sas_phye_loss_of_signal(struct work_struct *work) struct asd_sas_phy *phy = ev->phy; phy->error = 0; - sas_deform_port(phy, 1); + sas_deform_port(phy, true); } static void sas_phye_oob_done(struct work_struct *work) @@ -40,7 +40,7 @@ static void sas_phye_oob_error(struct work_struct *work) struct sas_internal *i = to_sas_internal(sas_ha->shost->transportt); - sas_deform_port(phy, 1); + sas_deform_port(phy, true); if (!port && phy->enabled && i->dft->lldd_control_phy) { phy->error++; @@ -85,7 +85,7 @@ static void sas_phye_resume_timeout(struct work_struct *work) phy->error = 0; phy->suspended = 0; - sas_deform_port(phy, 1); + sas_deform_port(phy, true); } diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c index e3f2ed913419..de7556070048 100644 --- a/drivers/scsi/libsas/sas_port.c +++ b/drivers/scsi/libsas/sas_port.c @@ -113,7 +113,7 @@ static void sas_form_port(struct asd_sas_phy *phy) if (port) { if (!phy_is_wideport_member(port, phy)) - sas_deform_port(phy, 0); + sas_deform_port(phy, false); else if (phy->suspended) { phy->suspended = 0; sas_resume_port(phy); @@ -206,7 +206,7 @@ static void sas_form_port(struct asd_sas_phy *phy) * This is called when the physical link to the other phy has been * lost (on this phy), in Event thread context. We cannot delay here. */ -void sas_deform_port(struct asd_sas_phy *phy, int gone) +void sas_deform_port(struct asd_sas_phy *phy, bool gone) { struct sas_ha_struct *sas_ha = phy->ha; struct asd_sas_port *port = phy->port; @@ -301,7 +301,7 @@ void sas_porte_link_reset_err(struct work_struct *work) struct asd_sas_event *ev = to_asd_sas_event(work); struct asd_sas_phy *phy = ev->phy; - sas_deform_port(phy, 1); + sas_deform_port(phy, true); } void sas_porte_timer_event(struct work_struct *work) @@ -309,7 +309,7 @@ void sas_porte_timer_event(struct work_struct *work) struct asd_sas_event *ev = to_asd_sas_event(work); struct asd_sas_phy *phy = ev->phy; - sas_deform_port(phy, 1); + sas_deform_port(phy, true); } void sas_porte_hard_reset(struct work_struct *work) @@ -317,7 +317,7 @@ void sas_porte_hard_reset(struct work_struct *work) struct asd_sas_event *ev = to_asd_sas_event(work); struct asd_sas_phy *phy = ev->phy; - sas_deform_port(phy, 1); + sas_deform_port(phy, true); } /* ---------- SAS port registration ---------- */ @@ -358,8 +358,7 @@ void sas_unregister_ports(struct sas_ha_struct *sas_ha) for (i = 0; i < sas_ha->num_phys; i++) if (sas_ha->sas_phy[i]->port) - sas_deform_port(sas_ha->sas_phy[i], 0); - + sas_deform_port(sas_ha->sas_phy[i], false); } const work_func_t sas_port_event_fns[PORT_NUM_EVENTS] = { From e60737dbfb92fc32511afa68ea70513df7548919 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 30 Jun 2025 09:47:17 +0100 Subject: [PATCH 1058/2411] ARM: 9449/1: coresight: Finish removal of Coresight support in arch/arm/kernel Commit 184901a06a36 ("ARM: removing support for etb/etm in "arch/arm/kernel/"") removed asm/hardware/coresight.h which is included by this file. Therefore this is dead code so delete it. Acked-by: Suzuki K Poulose Signed-off-by: James Clark Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/cti.h | 160 ------------------------------------- 1 file changed, 160 deletions(-) delete mode 100644 arch/arm/include/asm/cti.h diff --git a/arch/arm/include/asm/cti.h b/arch/arm/include/asm/cti.h deleted file mode 100644 index f8500e5d6ea8..000000000000 --- a/arch/arm/include/asm/cti.h +++ /dev/null @@ -1,160 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASMARM_CTI_H -#define __ASMARM_CTI_H - -#include -#include - -/* The registers' definition is from section 3.2 of - * Embedded Cross Trigger Revision: r0p0 - */ -#define CTICONTROL 0x000 -#define CTISTATUS 0x004 -#define CTILOCK 0x008 -#define CTIPROTECTION 0x00C -#define CTIINTACK 0x010 -#define CTIAPPSET 0x014 -#define CTIAPPCLEAR 0x018 -#define CTIAPPPULSE 0x01c -#define CTIINEN 0x020 -#define CTIOUTEN 0x0A0 -#define CTITRIGINSTATUS 0x130 -#define CTITRIGOUTSTATUS 0x134 -#define CTICHINSTATUS 0x138 -#define CTICHOUTSTATUS 0x13c -#define CTIPERIPHID0 0xFE0 -#define CTIPERIPHID1 0xFE4 -#define CTIPERIPHID2 0xFE8 -#define CTIPERIPHID3 0xFEC -#define CTIPCELLID0 0xFF0 -#define CTIPCELLID1 0xFF4 -#define CTIPCELLID2 0xFF8 -#define CTIPCELLID3 0xFFC - -/* The below are from section 3.6.4 of - * CoreSight v1.0 Architecture Specification - */ -#define LOCKACCESS 0xFB0 -#define LOCKSTATUS 0xFB4 - -/** - * struct cti - cross trigger interface struct - * @base: mapped virtual address for the cti base - * @irq: irq number for the cti - * @trig_out_for_irq: triger out number which will cause - * the @irq happen - * - * cti struct used to operate cti registers. - */ -struct cti { - void __iomem *base; - int irq; - int trig_out_for_irq; -}; - -/** - * cti_init - initialize the cti instance - * @cti: cti instance - * @base: mapped virtual address for the cti base - * @irq: irq number for the cti - * @trig_out: triger out number which will cause - * the @irq happen - * - * called by machine code to pass the board dependent - * @base, @irq and @trig_out to cti. - */ -static inline void cti_init(struct cti *cti, - void __iomem *base, int irq, int trig_out) -{ - cti->base = base; - cti->irq = irq; - cti->trig_out_for_irq = trig_out; -} - -/** - * cti_map_trigger - use the @chan to map @trig_in to @trig_out - * @cti: cti instance - * @trig_in: trigger in number - * @trig_out: trigger out number - * @channel: channel number - * - * This function maps one trigger in of @trig_in to one trigger - * out of @trig_out using the channel @chan. - */ -static inline void cti_map_trigger(struct cti *cti, - int trig_in, int trig_out, int chan) -{ - void __iomem *base = cti->base; - unsigned long val; - - val = __raw_readl(base + CTIINEN + trig_in * 4); - val |= BIT(chan); - __raw_writel(val, base + CTIINEN + trig_in * 4); - - val = __raw_readl(base + CTIOUTEN + trig_out * 4); - val |= BIT(chan); - __raw_writel(val, base + CTIOUTEN + trig_out * 4); -} - -/** - * cti_enable - enable the cti module - * @cti: cti instance - * - * enable the cti module - */ -static inline void cti_enable(struct cti *cti) -{ - __raw_writel(0x1, cti->base + CTICONTROL); -} - -/** - * cti_disable - disable the cti module - * @cti: cti instance - * - * enable the cti module - */ -static inline void cti_disable(struct cti *cti) -{ - __raw_writel(0, cti->base + CTICONTROL); -} - -/** - * cti_irq_ack - clear the cti irq - * @cti: cti instance - * - * clear the cti irq - */ -static inline void cti_irq_ack(struct cti *cti) -{ - void __iomem *base = cti->base; - unsigned long val; - - val = __raw_readl(base + CTIINTACK); - val |= BIT(cti->trig_out_for_irq); - __raw_writel(val, base + CTIINTACK); -} - -/** - * cti_unlock - unlock cti module - * @cti: cti instance - * - * unlock the cti module, or else any writes to the cti - * module is not allowed. - */ -static inline void cti_unlock(struct cti *cti) -{ - __raw_writel(CS_LAR_KEY, cti->base + LOCKACCESS); -} - -/** - * cti_lock - lock cti module - * @cti: cti instance - * - * lock the cti module, so any writes to the cti - * module will be not allowed. - */ -static inline void cti_lock(struct cti *cti) -{ - __raw_writel(~CS_LAR_KEY, cti->base + LOCKACCESS); -} -#endif From f11a5f89910a7ae970fbce4fdc02d86a8ba8570f Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Thu, 24 Jul 2025 15:43:06 -0700 Subject: [PATCH 1059/2411] Documentation/ABI/testing/debugfs-cxl: Add 'cxl' to clear_poison path 'cxl' is missing from the path to the clear_poison attribute. Add it. Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250724224308.2101255-1-alison.schofield@intel.com Signed-off-by: Dave Jiang --- Documentation/ABI/testing/debugfs-cxl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl index 12488c14be64..e95e21f131e9 100644 --- a/Documentation/ABI/testing/debugfs-cxl +++ b/Documentation/ABI/testing/debugfs-cxl @@ -20,7 +20,7 @@ Description: visible for devices supporting the capability. -What: /sys/kernel/debug/memX/clear_poison +What: /sys/kernel/debug/cxl/memX/clear_poison Date: April, 2023 KernelVersion: v6.4 Contact: linux-cxl@vger.kernel.org From fccaaf6fbbc59910edcf276f97a5b2ef5778c55e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:43 -0700 Subject: [PATCH 1060/2411] perf build-id: Change sprintf functions to snprintf Pass in a size argument rather than implying all build id strings must be SBUILD_ID_SIZE. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-4-irogers@google.com [ fixed some build errors ] Signed-off-by: Namhyung Kim --- tools/perf/builtin-buildid-cache.c | 12 +++---- tools/perf/builtin-buildid-list.c | 6 ++-- tools/perf/tests/sdt.c | 2 +- tools/perf/util/build-id.c | 33 ++++++++----------- tools/perf/util/build-id.h | 6 ++-- tools/perf/util/disasm.c | 2 +- tools/perf/util/dso.c | 4 +-- tools/perf/util/dsos.c | 2 +- tools/perf/util/event.c | 2 +- tools/perf/util/header.c | 2 +- tools/perf/util/map.c | 2 +- tools/perf/util/probe-event.c | 4 +-- tools/perf/util/probe-file.c | 4 +-- tools/perf/util/probe-finder.c | 2 +- .../scripting-engines/trace-event-python.c | 7 ++-- tools/perf/util/symbol.c | 2 +- 16 files changed, 42 insertions(+), 50 deletions(-) diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index b0511d16aeb6..3f7739b21148 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -31,7 +31,7 @@ #include #include -static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) +static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid, size_t sbuildid_size) { char root_dir[PATH_MAX]; char *p; @@ -42,7 +42,7 @@ static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) if (!p) return -1; *p = '\0'; - return sysfs__sprintf_build_id(root_dir, sbuildid); + return sysfs__snprintf_build_id(root_dir, sbuildid, sbuildid_size); } static int build_id_cache__kcore_dir(char *dir, size_t sz) @@ -128,7 +128,7 @@ static int build_id_cache__add_kcore(const char *filename, bool force) return -1; *p = '\0'; - if (build_id_cache__kcore_buildid(from_dir, sbuildid) < 0) + if (build_id_cache__kcore_buildid(from_dir, sbuildid, sizeof(sbuildid)) < 0) return -1; scnprintf(to_dir, sizeof(to_dir), "%s/%s/%s", @@ -187,7 +187,7 @@ static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi) return -1; } - build_id__sprintf(&bid, sbuild_id); + build_id__snprintf(&bid, sbuild_id, sizeof(sbuild_id)); err = build_id_cache__add_s(sbuild_id, filename, nsi, false, false); pr_debug("Adding %s %s: %s\n", sbuild_id, filename, @@ -211,7 +211,7 @@ static int build_id_cache__remove_file(const char *filename, struct nsinfo *nsi) return -1; } - build_id__sprintf(&bid, sbuild_id); + build_id__snprintf(&bid, sbuild_id, sizeof(sbuild_id)); err = build_id_cache__remove_s(sbuild_id); pr_debug("Removing %s %s: %s\n", sbuild_id, filename, err ? "FAIL" : "Ok"); @@ -317,7 +317,7 @@ static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi) } err = 0; - build_id__sprintf(&bid, sbuild_id); + build_id__snprintf(&bid, sbuild_id, sizeof(sbuild_id)); if (build_id_cache__cached(sbuild_id)) err = build_id_cache__remove_s(sbuild_id); diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 52dfacaff8e3..ba8ba0303920 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -31,7 +31,7 @@ static int buildid__map_cb(struct map *map, void *arg __maybe_unused) memset(bid_buf, 0, sizeof(bid_buf)); if (dso__has_build_id(dso)) - build_id__sprintf(dso__bid_const(dso), bid_buf); + build_id__snprintf(dso__bid_const(dso), bid_buf, sizeof(bid_buf)); printf("%s %16" PRIx64 " %16" PRIx64, bid_buf, map__start(map), map__end(map)); if (dso_long_name != NULL) printf(" %s", dso_long_name); @@ -57,7 +57,7 @@ static int sysfs__fprintf_build_id(FILE *fp) char sbuild_id[SBUILD_ID_SIZE]; int ret; - ret = sysfs__sprintf_build_id("/", sbuild_id); + ret = sysfs__snprintf_build_id("/", sbuild_id, sizeof(sbuild_id)); if (ret != sizeof(sbuild_id)) return ret < 0 ? ret : -EINVAL; @@ -69,7 +69,7 @@ static int filename__fprintf_build_id(const char *name, FILE *fp) char sbuild_id[SBUILD_ID_SIZE]; int ret; - ret = filename__sprintf_build_id(name, sbuild_id); + ret = filename__snprintf_build_id(name, sbuild_id, sizeof(sbuild_id)); if (ret != sizeof(sbuild_id)) return ret < 0 ? ret : -EINVAL; diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index 919712899251..663c8f700069 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -37,7 +37,7 @@ static int build_id_cache__add_file(const char *filename) return err; } - build_id__sprintf(&bid, sbuild_id); + build_id__snprintf(&bid, sbuild_id, sizeof(sbuild_id)); err = build_id_cache__add_s(sbuild_id, filename, NULL, false, false); if (err < 0) pr_debug("Failed to add build id cache of %s\n", filename); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 5bc2040bdd0d..aa35dceace90 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -67,24 +67,17 @@ int build_id__mark_dso_hit(const struct perf_tool *tool __maybe_unused, return 0; } -int build_id__sprintf(const struct build_id *build_id, char *bf) +int build_id__snprintf(const struct build_id *build_id, char *bf, size_t bf_size) { - char *bid = bf; - const u8 *raw = build_id->data; - size_t i; + size_t offs = 0; - bf[0] = 0x0; + for (size_t i = 0; i < build_id->size && offs < bf_size; ++i) + offs += snprintf(bf + offs, bf_size - offs, "%02x", build_id->data[i]); - for (i = 0; i < build_id->size; ++i) { - sprintf(bid, "%02x", *raw); - ++raw; - bid += 2; - } - - return (bid - bf) + 1; + return offs; } -int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id) +int sysfs__snprintf_build_id(const char *root_dir, char *sbuild_id, size_t sbuild_id_size) { char notes[PATH_MAX]; struct build_id bid; @@ -99,10 +92,10 @@ int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id) if (ret < 0) return ret; - return build_id__sprintf(&bid, sbuild_id); + return build_id__snprintf(&bid, sbuild_id, sbuild_id_size); } -int filename__sprintf_build_id(const char *pathname, char *sbuild_id) +int filename__snprintf_build_id(const char *pathname, char *sbuild_id, size_t sbuild_id_size) { struct build_id bid; int ret; @@ -111,7 +104,7 @@ int filename__sprintf_build_id(const char *pathname, char *sbuild_id) if (ret < 0) return ret; - return build_id__sprintf(&bid, sbuild_id); + return build_id__snprintf(&bid, sbuild_id, sbuild_id_size); } /* asnprintf consolidates asprintf and snprintf */ @@ -212,9 +205,9 @@ static bool build_id_cache__valid_id(char *sbuild_id) return false; if (!strcmp(pathname, DSO__NAME_KALLSYMS)) - ret = sysfs__sprintf_build_id("/", real_sbuild_id); + ret = sysfs__snprintf_build_id("/", real_sbuild_id, sizeof(real_sbuild_id)); else if (pathname[0] == '/') - ret = filename__sprintf_build_id(pathname, real_sbuild_id); + ret = filename__snprintf_build_id(pathname, real_sbuild_id, sizeof(real_sbuild_id)); else ret = -EINVAL; /* Should we support other special DSO cache? */ if (ret >= 0) @@ -243,7 +236,7 @@ char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size, if (!dso__has_build_id(dso)) return NULL; - build_id__sprintf(dso__bid_const(dso), sbuild_id); + build_id__snprintf(dso__bid_const(dso), sbuild_id, sizeof(sbuild_id)); linkname = build_id_cache__linkname(sbuild_id, NULL, 0); if (!linkname) return NULL; @@ -769,7 +762,7 @@ static int build_id_cache__add_b(const struct build_id *bid, { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(bid, sbuild_id); + build_id__snprintf(bid, sbuild_id, sizeof(sbuild_id)); return __build_id_cache__add_s(sbuild_id, name, nsi, is_kallsyms, is_vdso, proper_name, root_dir); diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index e3e0a446ff0c..47e621cebe1b 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -21,10 +21,10 @@ struct feat_fd; struct nsinfo; void build_id__init(struct build_id *bid, const u8 *data, size_t size); -int build_id__sprintf(const struct build_id *build_id, char *bf); +int build_id__snprintf(const struct build_id *build_id, char *bf, size_t bf_size); bool build_id__is_defined(const struct build_id *bid); -int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); -int filename__sprintf_build_id(const char *pathname, char *sbuild_id); +int sysfs__snprintf_build_id(const char *root_dir, char *sbuild_id, size_t sbuild_id_size); +int filename__snprintf_build_id(const char *pathname, char *sbuild_id, size_t sbuild_id_size); char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, size_t size); diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c index ff475a239f4b..b1e4919d016f 100644 --- a/tools/perf/util/disasm.c +++ b/tools/perf/util/disasm.c @@ -1218,7 +1218,7 @@ int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, s char *build_id_msg = NULL; if (dso__has_build_id(dso)) { - build_id__sprintf(dso__bid(dso), bf + 15); + build_id__snprintf(dso__bid(dso), bf + 15, sizeof(bf) - 15); build_id_msg = bf; } scnprintf(buf, buflen, diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index c6c1637e098c..4ff94029632e 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -217,7 +217,7 @@ int dso__read_binary_type_filename(const struct dso *dso, break; } - build_id__sprintf(dso__bid_const(dso), build_id_hex); + build_id__snprintf(dso__bid_const(dso), build_id_hex, sizeof(build_id_hex)); len = __symbol__join_symfs(filename, size, "/usr/lib/debug/.build-id/"); snprintf(filename + len, size - len, "%.2s/%s.debug", build_id_hex, build_id_hex + 2); @@ -1708,7 +1708,7 @@ static size_t dso__fprintf_buildid(struct dso *dso, FILE *fp) { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(dso__bid(dso), sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); return fprintf(fp, "%s", sbuild_id); } diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 4d213017d202..47538273915d 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -373,7 +373,7 @@ static int dsos__fprintf_buildid_cb(struct dso *dso, void *data) if (args->skip && args->skip(dso, args->parm)) return 0; - build_id__sprintf(dso__bid(dso), sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); args->ret += fprintf(args->fp, "%-40s %s\n", sbuild_id, dso__long_name(dso)); return 0; } diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 14b0d3689137..fcf44149feb2 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -334,7 +334,7 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp) build_id__init(&bid, event->mmap2.build_id, event->mmap2.build_id_size); - build_id__sprintf(&bid, sbuild_id); + build_id__snprintf(&bid, sbuild_id, sizeof(sbuild_id)); return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 " <%s>]: %c%c%c%c %s\n", diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index d941d7aa0f49..4f8133a18312 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2303,7 +2303,7 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, free(m.name); } - build_id__sprintf(dso__bid(dso), sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); pr_debug("build id event received for %s: %s [%zu]\n", dso__long_name(dso), sbuild_id, size); dso__put(dso); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index d729438b7d65..0f6b185f9589 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -354,7 +354,7 @@ int map__load(struct map *map) if (dso__has_build_id(dso)) { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(dso__bid(dso), sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); pr_debug("%s with build id %s not found", name, sbuild_id); } else pr_debug("Failed to open %s", name); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 307ad6242a4e..c10549fc451b 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -502,7 +502,7 @@ static struct debuginfo *open_from_debuginfod(struct dso *dso, struct nsinfo *ns if (!c) return NULL; - build_id__sprintf(dso__bid(dso), sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); fd = debuginfod_find_debuginfo(c, (const unsigned char *)sbuild_id, 0, &path); if (fd >= 0) @@ -1089,7 +1089,7 @@ static int __show_line_range(struct line_range *lr, const char *module, } if (dinfo->build_id) { build_id__init(&bid, dinfo->build_id, BUILD_ID_SIZE); - build_id__sprintf(&bid, sbuild_id); + build_id__snprintf(&bid, sbuild_id, sizeof(sbuild_id)); } debuginfo__delete(dinfo); if (ret == 0 || ret == -ENOENT) { diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index ec8ac242fedb..5069fb61f48c 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -448,10 +448,10 @@ static int probe_cache__open(struct probe_cache *pcache, const char *target, if (!target || !strcmp(target, DSO__NAME_KALLSYMS)) { target = DSO__NAME_KALLSYMS; is_kallsyms = true; - ret = sysfs__sprintf_build_id("/", sbuildid); + ret = sysfs__snprintf_build_id("/", sbuildid, sizeof(sbuildid)); } else { nsinfo__mountns_enter(nsi, &nsc); - ret = filename__sprintf_build_id(target, sbuildid); + ret = filename__snprintf_build_id(target, sbuildid, sizeof(sbuildid)); nsinfo__mountns_exit(&nsc); } diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 3cc7c40f5097..b74f6fe24bb6 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -859,7 +859,7 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) comp_dir = cu_get_comp_dir(&pf->cu_die); if (pf->dbg->build_id) { build_id__init(&bid, pf->dbg->build_id, BUILD_ID_SIZE); - build_id__sprintf(&bid, sbuild_id); + build_id__snprintf(&bid, sbuild_id, sizeof(sbuild_id)); } ret = find_source_path(pf->fname, sbuild_id, comp_dir, &fpath); if (ret < 0) { diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 00f2c6c5114d..6655c0bbe0d8 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -780,14 +780,13 @@ static void set_sym_in_dict(PyObject *dict, struct addr_location *al, const char *sym_field, const char *symoff_field, const char *map_pgoff) { - char sbuild_id[SBUILD_ID_SIZE]; - if (al->map) { + char sbuild_id[SBUILD_ID_SIZE]; struct dso *dso = map__dso(al->map); pydict_set_item_string_decref(dict, dso_field, _PyUnicode_FromString(dso__name(dso))); - build_id__sprintf(dso__bid(dso), sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); pydict_set_item_string_decref(dict, dso_bid_field, _PyUnicode_FromString(sbuild_id)); pydict_set_item_string_decref(dict, dso_map_start, @@ -1238,7 +1237,7 @@ static int python_export_dso(struct db_export *dbe, struct dso *dso, char sbuild_id[SBUILD_ID_SIZE]; PyObject *t; - build_id__sprintf(dso__bid(dso), sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); t = tuple_new(5); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index ae0bd568ac45..573c65da9fe0 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2152,7 +2152,7 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) goto proc_kallsyms; } - build_id__sprintf(dso__bid(dso), sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); /* Find kallsyms in build-id cache with kcore */ scnprintf(path, sizeof(path), "%s/%s/%s", From 29be60c93d2d9300571230edaa484930cdbec437 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:44 -0700 Subject: [PATCH 1061/2411] perf build-id: Mark DSO in sample callchains Previously only the sample IP's map DSO would be marked hit for the purposes of populating the build ID cache. Walk the call chain to mark all IPs and DSOs. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-5-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/build-id.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index aa35dceace90..3386fa8e1e7e 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -42,10 +42,20 @@ static bool no_buildid_cache; +static int mark_dso_hit_callback(struct callchain_cursor_node *node, void *data __maybe_unused) +{ + struct map *map = node->ms.map; + + if (map) + dso__set_hit(map__dso(map)); + + return 0; +} + int build_id__mark_dso_hit(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, - struct evsel *evsel __maybe_unused, + struct evsel *evsel, struct machine *machine) { struct addr_location al; @@ -63,6 +73,11 @@ int build_id__mark_dso_hit(const struct perf_tool *tool __maybe_unused, dso__set_hit(map__dso(al.map)); addr_location__exit(&al); + + sample__for_each_callchain_node(thread, evsel, sample, PERF_MAX_STACK_DEPTH, + /*symbols=*/false, mark_dso_hit_callback, /*data=*/NULL); + + thread__put(thread); return 0; } From eee4b66105a6fa3b85fe5260d3791d607570ba95 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:45 -0700 Subject: [PATCH 1062/2411] perf build-id: Ensure struct build_id is empty before use If a build ID is read then not all code paths may ensure it is empty before use. Initialize the build_id to be zero-ed unless there is clear initialization such as a call to build_id__init. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-6-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/bench/inject-buildid.c | 2 +- tools/perf/builtin-buildid-cache.c | 8 ++++---- tools/perf/tests/pe-file-parsing.c | 2 +- tools/perf/tests/sdt.c | 2 +- tools/perf/util/build-id.c | 6 +++--- tools/perf/util/debuginfo.c | 2 +- tools/perf/util/probe-event.c | 3 ++- tools/perf/util/probe-finder.c | 3 ++- tools/perf/util/symbol-minimal.c | 2 +- tools/perf/util/symbol.c | 5 +++-- tools/perf/util/synthetic-events.c | 2 +- 11 files changed, 20 insertions(+), 17 deletions(-) diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index f55c07e4be94..aad572a78d7f 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -80,7 +80,7 @@ static int add_dso(const char *fpath, const struct stat *sb __maybe_unused, int typeflag, struct FTW *ftwbuf __maybe_unused) { struct bench_dso *dso = &dsos[nr_dsos]; - struct build_id bid; + struct build_id bid = { .size = 0, }; if (typeflag == FTW_D || typeflag == FTW_SL) return 0; diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 3f7739b21148..e936a34b7d37 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -175,7 +175,7 @@ static int build_id_cache__add_kcore(const char *filename, bool force) static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi) { char sbuild_id[SBUILD_ID_SIZE]; - struct build_id bid; + struct build_id bid = { .size = 0, }; int err; struct nscookie nsc; @@ -198,7 +198,7 @@ static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi) static int build_id_cache__remove_file(const char *filename, struct nsinfo *nsi) { char sbuild_id[SBUILD_ID_SIZE]; - struct build_id bid; + struct build_id bid = { .size = 0, }; struct nscookie nsc; int err; @@ -275,7 +275,7 @@ static int build_id_cache__purge_all(void) static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) { char filename[PATH_MAX]; - struct build_id bid; + struct build_id bid = { .size = 0, }; if (!dso__build_id_filename(dso, filename, sizeof(filename), false)) return true; @@ -303,7 +303,7 @@ static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *f static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi) { char sbuild_id[SBUILD_ID_SIZE]; - struct build_id bid; + struct build_id bid = { .size = 0, }; struct nscookie nsc; int err; diff --git a/tools/perf/tests/pe-file-parsing.c b/tools/perf/tests/pe-file-parsing.c index fff58b220c07..30c7da79e109 100644 --- a/tools/perf/tests/pe-file-parsing.c +++ b/tools/perf/tests/pe-file-parsing.c @@ -24,7 +24,7 @@ static int run_dir(const char *d) { char filename[PATH_MAX]; char debugfile[PATH_MAX]; - struct build_id bid; + struct build_id bid = { .size = 0, }; char debuglink[PATH_MAX]; char expect_build_id[] = { 0x5a, 0x0f, 0xd8, 0x82, 0xb5, 0x30, 0x84, 0x22, diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index 663c8f700069..93baee2eae42 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -28,7 +28,7 @@ static int target_function(void) static int build_id_cache__add_file(const char *filename) { char sbuild_id[SBUILD_ID_SIZE]; - struct build_id bid; + struct build_id bid = { .size = 0, }; int err; err = filename__read_build_id(filename, &bid); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 3386fa8e1e7e..1abd5a670665 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -95,7 +95,7 @@ int build_id__snprintf(const struct build_id *build_id, char *bf, size_t bf_size int sysfs__snprintf_build_id(const char *root_dir, char *sbuild_id, size_t sbuild_id_size) { char notes[PATH_MAX]; - struct build_id bid; + struct build_id bid = { .size = 0, }; int ret; if (!root_dir) @@ -112,7 +112,7 @@ int sysfs__snprintf_build_id(const char *root_dir, char *sbuild_id, size_t sbuil int filename__snprintf_build_id(const char *pathname, char *sbuild_id, size_t sbuild_id_size) { - struct build_id bid; + struct build_id bid = { .size = 0, }; int ret; ret = filename__read_build_id(pathname, &bid); @@ -849,7 +849,7 @@ static int filename__read_build_id_ns(const char *filename, static bool dso__build_id_mismatch(struct dso *dso, const char *name) { - struct build_id bid; + struct build_id bid = { .size = 0, }; bool ret = false; mutex_lock(dso__lock(dso)); diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c index b5deea7cbdf2..a44c70f93156 100644 --- a/tools/perf/util/debuginfo.c +++ b/tools/perf/util/debuginfo.c @@ -103,7 +103,7 @@ struct debuginfo *debuginfo__new(const char *path) char buf[PATH_MAX], nil = '\0'; struct dso *dso; struct debuginfo *dinfo = NULL; - struct build_id bid; + struct build_id bid = { .size = 0}; /* Try to open distro debuginfo files */ dso = dso__new(path); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index c10549fc451b..57ad150f8c43 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1063,7 +1063,6 @@ static int sprint_line_description(char *sbuf, size_t size, struct line_range *l static int __show_line_range(struct line_range *lr, const char *module, bool user) { - struct build_id bid; int l = 1; struct int_node *ln; struct debuginfo *dinfo; @@ -1088,6 +1087,8 @@ static int __show_line_range(struct line_range *lr, const char *module, ret = -ENOENT; } if (dinfo->build_id) { + struct build_id bid; + build_id__init(&bid, dinfo->build_id, BUILD_ID_SIZE); build_id__snprintf(&bid, sbuild_id, sizeof(sbuild_id)); } diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index b74f6fe24bb6..5ffd97ee4898 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -848,7 +848,6 @@ static int probe_point_lazy_walker(const char *fname, int lineno, /* Find probe points from lazy pattern */ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) { - struct build_id bid; char sbuild_id[SBUILD_ID_SIZE] = ""; int ret = 0; char *fpath; @@ -858,6 +857,8 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) comp_dir = cu_get_comp_dir(&pf->cu_die); if (pf->dbg->build_id) { + struct build_id bid; + build_id__init(&bid, pf->dbg->build_id, BUILD_ID_SIZE); build_id__snprintf(&bid, sbuild_id, sizeof(sbuild_id)); } diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index c73fe2e09fe9..7201494c5c20 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -317,7 +317,7 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused, struct symsrc *runtime_ss __maybe_unused, int kmodule __maybe_unused) { - struct build_id bid; + struct build_id bid = { .size = 0, }; int ret; ret = fd__is_64_bit(ss->fd); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 573c65da9fe0..e816e4220d33 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1813,7 +1813,6 @@ int dso__load(struct dso *dso, struct map *map) struct symsrc *syms_ss = NULL, *runtime_ss = NULL; bool kmod; bool perfmap; - struct build_id bid; struct nscookie nsc; char newmapname[PATH_MAX]; const char *map_path = dso__long_name(dso); @@ -1874,6 +1873,8 @@ int dso__load(struct dso *dso, struct map *map) */ if (!dso__has_build_id(dso) && is_regular_file(dso__long_name(dso))) { + struct build_id bid = { .size = 0, }; + __symbol__join_symfs(name, PATH_MAX, dso__long_name(dso)); if (filename__read_build_id(name, &bid) > 0) dso__set_build_id(dso, &bid); @@ -2122,7 +2123,7 @@ static bool filename__readable(const char *file) static char *dso__find_kallsyms(struct dso *dso, struct map *map) { - struct build_id bid; + struct build_id bid = { .size = 0, }; char sbuild_id[SBUILD_ID_SIZE]; bool is_host = false; char path[PATH_MAX]; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index d3c454174602..d6f9a4548c92 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -368,7 +368,7 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event, struct machine *machine, bool is_kernel) { - struct build_id bid; + struct build_id bid = { .size = 0, }; struct nsinfo *nsi; struct nscookie nc; struct dso *dso = NULL; From d9f2ecbc5e47fca7bda7c13cff3b3534b1467b32 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:46 -0700 Subject: [PATCH 1063/2411] perf dso: Move build_id to dso_id The dso_id previously contained the major, minor, inode and inode generation information from a mmap2 event - the inode generation would be zero when reading from /proc/pid/maps. The build_id was in the dso. With build ID mmap2 events these fields wouldn't be initialized which would largely mean the special empty case where any dso would match for equality. This isn't desirable as if a dso is replaced we want the comparison to yield a difference. To support detecting the difference between DSOs based on build_id, move the build_id out of the DSO and into the dso_id. The dso_id is also stored in the DSO so nothing is lost. Capture in the dso_id what parts have been initialized and rename dso_id__inject to dso_id__improve_id so that it is clear the dso_id is being improved upon with additional information. With the build_id in the dso_id, use memcmp to compare for equality. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-7-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-buildid-list.c | 2 +- tools/perf/builtin-inject.c | 36 ++++---- tools/perf/builtin-report.c | 11 ++- tools/perf/include/perf/perf_dlfilter.h | 2 +- tools/perf/tests/symbols.c | 4 +- tools/perf/util/build-id.c | 4 +- tools/perf/util/dso.c | 109 +++++++++++++----------- tools/perf/util/dso.h | 75 ++++++++-------- tools/perf/util/dsos.c | 18 ++-- tools/perf/util/machine.c | 28 +++--- tools/perf/util/map.c | 13 ++- tools/perf/util/map.h | 5 +- tools/perf/util/sort.c | 27 +++--- tools/perf/util/synthetic-events.c | 18 ++-- 14 files changed, 197 insertions(+), 155 deletions(-) diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index ba8ba0303920..151cd84b6dfe 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -31,7 +31,7 @@ static int buildid__map_cb(struct map *map, void *arg __maybe_unused) memset(bid_buf, 0, sizeof(bid_buf)); if (dso__has_build_id(dso)) - build_id__snprintf(dso__bid_const(dso), bid_buf, sizeof(bid_buf)); + build_id__snprintf(dso__bid(dso), bid_buf, sizeof(bid_buf)); printf("%s %16" PRIx64 " %16" PRIx64, bid_buf, map__start(map), map__end(map)); if (dso_long_name != NULL) printf(" %s", dso_long_name); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index b15eac0716f7..13bbb493141f 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -587,15 +587,17 @@ static int perf_event__repipe_mmap2(const struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct dso_id id; - struct dso_id *dso_id = NULL; + struct dso_id id = dso_id_empty; - if (!(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) { + if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { + build_id__init(&id.build_id, event->mmap2.build_id, event->mmap2.build_id_size); + } else { id.maj = event->mmap2.maj; id.min = event->mmap2.min; id.ino = event->mmap2.ino; id.ino_generation = event->mmap2.ino_generation; - dso_id = &id; + id.mmap2_valid = true; + id.mmap2_ino_generation_valid = true; } return perf_event__repipe_common_mmap( @@ -603,7 +605,7 @@ static int perf_event__repipe_mmap2(const struct perf_tool *tool, event->mmap2.pid, event->mmap2.tid, event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, event->mmap2.flags, event->mmap2.prot, - event->mmap2.filename, dso_id, + event->mmap2.filename, &id, perf_event__process_mmap2); } @@ -671,19 +673,20 @@ static int perf_event__repipe_tracing_data(struct perf_session *session, static int dso__read_build_id(struct dso *dso) { struct nscookie nsc; + struct build_id bid = { .size = 0, }; if (dso__has_build_id(dso)) return 0; mutex_lock(dso__lock(dso)); nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); - if (filename__read_build_id(dso__long_name(dso), dso__bid(dso)) > 0) - dso__set_has_build_id(dso); + if (filename__read_build_id(dso__long_name(dso), &bid) > 0) + dso__set_build_id(dso, &bid); else if (dso__nsinfo(dso)) { char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso)); - if (new_name && filename__read_build_id(new_name, dso__bid(dso)) > 0) - dso__set_has_build_id(dso); + if (new_name && filename__read_build_id(new_name, &bid) > 0) + dso__set_build_id(dso, &bid); free(new_name); } nsinfo__mountns_exit(&nsc); @@ -732,23 +735,26 @@ static bool perf_inject__lookup_known_build_id(struct perf_inject *inject, struct dso *dso) { struct str_node *pos; - int bid_len; strlist__for_each_entry(pos, inject->known_build_ids) { + struct build_id bid; const char *build_id, *dso_name; + size_t bid_len; build_id = skip_spaces(pos->s); dso_name = strchr(build_id, ' '); bid_len = dso_name - pos->s; + if (bid_len > sizeof(bid.data)) + bid_len = sizeof(bid.data); dso_name = skip_spaces(dso_name); if (strcmp(dso__long_name(dso), dso_name)) continue; - for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) { - dso__bid(dso)->data[ix] = (hex(build_id[2 * ix]) << 4 | - hex(build_id[2 * ix + 1])); + for (size_t ix = 0; 2 * ix + 1 < bid_len; ++ix) { + bid.data[ix] = (hex(build_id[2 * ix]) << 4 | + hex(build_id[2 * ix + 1])); } - dso__bid(dso)->size = bid_len / 2; - dso__set_has_build_id(dso); + bid.size = bid_len / 2; + dso__set_build_id(dso, &bid); return true; } return false; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e662e1c3a7c6..26186717fe9b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -861,17 +861,24 @@ static int maps__fprintf_task_cb(struct map *map, void *data) struct maps__fprintf_task_args *args = data; const struct dso *dso = map__dso(map); u32 prot = map__prot(map); + const struct dso_id *dso_id = dso__id_const(dso); int ret; + char buf[SBUILD_ID_SIZE]; + + if (dso_id->mmap2_valid) + snprintf(buf, sizeof(buf), "%" PRIu64, dso_id->ino); + else + build_id__snprintf(&dso_id->build_id, buf, sizeof(buf)); ret = fprintf(args->fp, - "%*s %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n", + "%*s %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %s %s\n", args->indent, "", map__start(map), map__end(map), prot & PROT_READ ? 'r' : '-', prot & PROT_WRITE ? 'w' : '-', prot & PROT_EXEC ? 'x' : '-', map__flags(map) ? 's' : 'p', map__pgoff(map), - dso__id_const(dso)->ino, dso__name(dso)); + buf, dso__name(dso)); if (ret < 0) return ret; diff --git a/tools/perf/include/perf/perf_dlfilter.h b/tools/perf/include/perf/perf_dlfilter.h index 16fc4568ac53..2d3540ed3c58 100644 --- a/tools/perf/include/perf/perf_dlfilter.h +++ b/tools/perf/include/perf/perf_dlfilter.h @@ -87,7 +87,7 @@ struct perf_dlfilter_al { __u8 is_64_bit; /* Only valid if dso is not NULL */ __u8 is_kernel_ip; /* True if in kernel space */ __u32 buildid_size; - __u8 *buildid; + const __u8 *buildid; /* Below members are only populated by resolve_ip() */ __u8 filtered; /* True if this sample event will be filtered out */ const char *comm; diff --git a/tools/perf/tests/symbols.c b/tools/perf/tests/symbols.c index ee20a366f32f..b07fdf831868 100644 --- a/tools/perf/tests/symbols.c +++ b/tools/perf/tests/symbols.c @@ -96,8 +96,8 @@ static int create_map(struct test_info *ti, char *filename, struct map **map_p) dso__put(dso); /* Create a dummy map at 0x100000 */ - *map_p = map__new(ti->machine, 0x100000, 0xffffffff, 0, NULL, - PROT_EXEC, 0, NULL, filename, ti->thread); + *map_p = map__new(ti->machine, 0x100000, 0xffffffff, 0, &dso_id_empty, + PROT_EXEC, /*flags=*/0, filename, ti->thread); if (!*map_p) { pr_debug("Failed to create map!"); return TEST_FAIL; diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 1abd5a670665..e2b295fe4d2f 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -251,7 +251,7 @@ char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size, if (!dso__has_build_id(dso)) return NULL; - build_id__snprintf(dso__bid_const(dso), sbuild_id, sizeof(sbuild_id)); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); linkname = build_id_cache__linkname(sbuild_id, NULL, 0); if (!linkname) return NULL; @@ -334,7 +334,7 @@ static int machine__write_buildid_table_cb(struct dso *dso, void *data) } in_kernel = dso__kernel(dso) || is_kernel_module(name, PERF_RECORD_MISC_CPUMODE_UNKNOWN); - return write_buildid(name, name_len, dso__bid(dso), args->machine->pid, + return write_buildid(name, name_len, &dso__id(dso)->build_id, args->machine->pid, in_kernel ? args->kmisc : args->umisc, args->fd); } diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 4ff94029632e..282e3af85d5a 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -217,7 +217,7 @@ int dso__read_binary_type_filename(const struct dso *dso, break; } - build_id__snprintf(dso__bid_const(dso), build_id_hex, sizeof(build_id_hex)); + build_id__snprintf(dso__bid(dso), build_id_hex, sizeof(build_id_hex)); len = __symbol__join_symfs(filename, size, "/usr/lib/debug/.build-id/"); snprintf(filename + len, size - len, "%.2s/%s.debug", build_id_hex, build_id_hex + 2); @@ -1382,64 +1382,76 @@ static void dso__set_long_name_id(struct dso *dso, const char *name, bool name_a static int __dso_id__cmp(const struct dso_id *a, const struct dso_id *b) { - if (a->maj > b->maj) return -1; - if (a->maj < b->maj) return 1; + if (a->mmap2_valid && b->mmap2_valid) { + if (a->maj > b->maj) return -1; + if (a->maj < b->maj) return 1; - if (a->min > b->min) return -1; - if (a->min < b->min) return 1; + if (a->min > b->min) return -1; + if (a->min < b->min) return 1; - if (a->ino > b->ino) return -1; - if (a->ino < b->ino) return 1; - - /* - * Synthesized MMAP events have zero ino_generation, avoid comparing - * them with MMAP events with actual ino_generation. - * - * I found it harmful because the mismatch resulted in a new - * dso that did not have a build ID whereas the original dso did have a - * build ID. The build ID was essential because the object was not found - * otherwise. - Adrian - */ - if (a->ino_generation && b->ino_generation) { + if (a->ino > b->ino) return -1; + if (a->ino < b->ino) return 1; + } + if (a->mmap2_ino_generation_valid && b->mmap2_ino_generation_valid) { if (a->ino_generation > b->ino_generation) return -1; if (a->ino_generation < b->ino_generation) return 1; } - + if (build_id__is_defined(&a->build_id) && build_id__is_defined(&b->build_id)) { + if (a->build_id.size != b->build_id.size) + return a->build_id.size < b->build_id.size ? -1 : 1; + return memcmp(a->build_id.data, b->build_id.data, a->build_id.size); + } return 0; } -bool dso_id__empty(const struct dso_id *id) -{ - if (!id) - return true; +const struct dso_id dso_id_empty = { + { + .maj = 0, + .min = 0, + .ino = 0, + .ino_generation = 0, + }, + .mmap2_valid = false, + .mmap2_ino_generation_valid = false, + { + .size = 0, + } +}; - return !id->maj && !id->min && !id->ino && !id->ino_generation; -} - -void __dso__inject_id(struct dso *dso, const struct dso_id *id) +void __dso__improve_id(struct dso *dso, const struct dso_id *id) { struct dsos *dsos = dso__dsos(dso); struct dso_id *dso_id = dso__id(dso); + bool changed = false; /* dsos write lock held by caller. */ - dso_id->maj = id->maj; - dso_id->min = id->min; - dso_id->ino = id->ino; - dso_id->ino_generation = id->ino_generation; - - if (dsos) + if (id->mmap2_valid && !dso_id->mmap2_valid) { + dso_id->maj = id->maj; + dso_id->min = id->min; + dso_id->ino = id->ino; + dso_id->mmap2_valid = true; + changed = true; + } + if (id->mmap2_ino_generation_valid && !dso_id->mmap2_ino_generation_valid) { + dso_id->ino_generation = id->ino_generation; + dso_id->mmap2_ino_generation_valid = true; + changed = true; + } + if (build_id__is_defined(&id->build_id) && !build_id__is_defined(&dso_id->build_id)) { + dso_id->build_id = id->build_id; + changed = true; + } + if (changed && dsos) dsos->sorted = false; } int dso_id__cmp(const struct dso_id *a, const struct dso_id *b) { - /* - * The second is always dso->id, so zeroes if not set, assume passing - * NULL for a means a zeroed id - */ - if (dso_id__empty(a) || dso_id__empty(b)) + if (a == &dso_id_empty || b == &dso_id_empty) { + /* There is no valid data to compare so the comparison always returns identical. */ return 0; + } return __dso_id__cmp(a, b); } @@ -1540,7 +1552,6 @@ struct dso *dso__new_id(const char *name, const struct dso_id *id) dso->loaded = 0; dso->rel = 0; dso->sorted_by_name = 0; - dso->has_build_id = 0; dso->has_srcline = 1; dso->a2l_fails = 1; dso->kernel = DSO_SPACE__USER; @@ -1649,15 +1660,14 @@ int dso__swap_init(struct dso *dso, unsigned char eidata) return 0; } -void dso__set_build_id(struct dso *dso, struct build_id *bid) +void dso__set_build_id(struct dso *dso, const struct build_id *bid) { - RC_CHK_ACCESS(dso)->bid = *bid; - RC_CHK_ACCESS(dso)->has_build_id = 1; + dso__id(dso)->build_id = *bid; } -bool dso__build_id_equal(const struct dso *dso, struct build_id *bid) +bool dso__build_id_equal(const struct dso *dso, const struct build_id *bid) { - const struct build_id *dso_bid = dso__bid_const(dso); + const struct build_id *dso_bid = dso__bid(dso); if (dso_bid->size > bid->size && dso_bid->size == BUILD_ID_SIZE) { /* @@ -1676,18 +1686,20 @@ bool dso__build_id_equal(const struct dso *dso, struct build_id *bid) void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine) { char path[PATH_MAX]; + struct build_id bid = { .size = 0, }; if (machine__is_default_guest(machine)) return; sprintf(path, "%s/sys/kernel/notes", machine->root_dir); - if (sysfs__read_build_id(path, dso__bid(dso)) == 0) - dso__set_has_build_id(dso); + sysfs__read_build_id(path, &bid); + dso__set_build_id(dso, &bid); } int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir) { char filename[PATH_MAX]; + struct build_id bid = { .size = 0, }; /* * kernel module short names are of the form "[module]" and * we need just "module" here. @@ -1698,9 +1710,8 @@ int dso__kernel_module_get_build_id(struct dso *dso, "%s/sys/module/%.*s/notes/.note.gnu.build-id", root_dir, (int)strlen(name) - 1, name); - if (sysfs__read_build_id(filename, dso__bid(dso)) == 0) - dso__set_has_build_id(dso); - + sysfs__read_build_id(filename, &bid); + dso__set_build_id(dso, &bid); return 0; } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index c87564471f9b..3457d713d3c5 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -185,14 +185,33 @@ enum dso_load_errno { #define DSO__DATA_CACHE_SIZE 4096 #define DSO__DATA_CACHE_MASK ~(DSO__DATA_CACHE_SIZE - 1) -/* - * Data about backing storage DSO, comes from PERF_RECORD_MMAP2 meta events +/** + * struct dso_id + * + * Data about backing storage DSO, comes from PERF_RECORD_MMAP2 meta events, + * reading from /proc/pid/maps or synthesis of build_ids from DSOs. Possibly + * incomplete at any particular use. */ struct dso_id { - u32 maj; - u32 min; - u64 ino; - u64 ino_generation; + /* Data related to the mmap2 event or read from /proc/pid/maps. */ + struct { + u32 maj; + u32 min; + u64 ino; + u64 ino_generation; + }; + /** @mmap2_valid: Are the maj, min and ino fields valid? */ + bool mmap2_valid; + /** + * @mmap2_ino_generation_valid: Is the ino_generation valid? Generally + * false for /proc/pid/maps mmap event. + */ + bool mmap2_ino_generation_valid; + /** + * @build_id: A possibly populated build_id. build_id__is_defined checks + * whether it is populated. + */ + struct build_id build_id; }; struct dso_cache { @@ -243,7 +262,6 @@ DECLARE_RC_STRUCT(dso) { u64 addr; struct symbol *symbol; } last_find_result; - struct build_id bid; u64 text_offset; u64 text_end; const char *short_name; @@ -276,7 +294,6 @@ DECLARE_RC_STRUCT(dso) { enum dso_swap_type needs_swap:2; bool is_kmod:1; u8 adjust_symbols:1; - u8 has_build_id:1; u8 header_build_id:1; u8 has_srcline:1; u8 hit:1; @@ -292,6 +309,9 @@ DECLARE_RC_STRUCT(dso) { }; extern struct mutex _dso__data_open_lock; +extern const struct dso_id dso_id_empty; + +int dso_id__cmp(const struct dso_id *a, const struct dso_id *b); /* dso__for_each_symbol - iterate over the symbols of given type * @@ -362,31 +382,11 @@ static inline void dso__set_auxtrace_cache(struct dso *dso, struct auxtrace_cach RC_CHK_ACCESS(dso)->auxtrace_cache = cache; } -static inline struct build_id *dso__bid(struct dso *dso) -{ - return &RC_CHK_ACCESS(dso)->bid; -} - -static inline const struct build_id *dso__bid_const(const struct dso *dso) -{ - return &RC_CHK_ACCESS(dso)->bid; -} - static inline struct dso_bpf_prog *dso__bpf_prog(struct dso *dso) { return &RC_CHK_ACCESS(dso)->bpf_prog; } -static inline bool dso__has_build_id(const struct dso *dso) -{ - return RC_CHK_ACCESS(dso)->has_build_id; -} - -static inline void dso__set_has_build_id(struct dso *dso) -{ - RC_CHK_ACCESS(dso)->has_build_id = true; -} - static inline bool dso__has_srcline(const struct dso *dso) { return RC_CHK_ACCESS(dso)->has_srcline; @@ -462,6 +462,16 @@ static inline const struct dso_id *dso__id_const(const struct dso *dso) return &RC_CHK_ACCESS(dso)->id; } +static inline const struct build_id *dso__bid(const struct dso *dso) +{ + return &dso__id_const(dso)->build_id; +} + +static inline bool dso__has_build_id(const struct dso *dso) +{ + return build_id__is_defined(dso__bid(dso)); +} + static inline struct rb_root_cached *dso__inlined_nodes(struct dso *dso) { return &RC_CHK_ACCESS(dso)->inlined_nodes; @@ -699,9 +709,6 @@ static inline void dso__set_text_offset(struct dso *dso, u64 val) RC_CHK_ACCESS(dso)->text_offset = val; } -int dso_id__cmp(const struct dso_id *a, const struct dso_id *b); -bool dso_id__empty(const struct dso_id *id); - struct dso *dso__new_id(const char *name, const struct dso_id *id); struct dso *dso__new(const char *name); void dso__delete(struct dso *dso); @@ -709,7 +716,7 @@ void dso__delete(struct dso *dso); int dso__cmp_id(struct dso *a, struct dso *b); void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated); void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated); -void __dso__inject_id(struct dso *dso, const struct dso_id *id); +void __dso__improve_id(struct dso *dso, const struct dso_id *id); int dso__name_len(const struct dso *dso); @@ -739,8 +746,8 @@ void dso__sort_by_name(struct dso *dso); int dso__swap_init(struct dso *dso, unsigned char eidata); -void dso__set_build_id(struct dso *dso, struct build_id *bid); -bool dso__build_id_equal(const struct dso *dso, struct build_id *bid); +void dso__set_build_id(struct dso *dso, const struct build_id *bid); +bool dso__build_id_equal(const struct dso *dso, const struct build_id *bid); void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine); int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir); diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 47538273915d..0a7645c7fae7 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -72,6 +72,7 @@ static int dsos__read_build_ids_cb(struct dso *dso, void *data) { struct dsos__read_build_ids_cb_args *args = data; struct nscookie nsc; + struct build_id bid = { .size = 0, }; if (args->with_hits && !dso__hit(dso) && !dso__is_vdso(dso)) return 0; @@ -80,15 +81,15 @@ static int dsos__read_build_ids_cb(struct dso *dso, void *data) return 0; } nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); - if (filename__read_build_id(dso__long_name(dso), dso__bid(dso)) > 0) { + if (filename__read_build_id(dso__long_name(dso), &bid) > 0) { + dso__set_build_id(dso, &bid); args->have_build_id = true; - dso__set_has_build_id(dso); } else if (errno == ENOENT && dso__nsinfo(dso)) { char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso)); - if (new_name && filename__read_build_id(new_name, dso__bid(dso)) > 0) { + if (new_name && filename__read_build_id(new_name, &bid) > 0) { + dso__set_build_id(dso, &bid); args->have_build_id = true; - dso__set_has_build_id(dso); } free(new_name); } @@ -286,7 +287,7 @@ struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short) struct dso *res; down_read(&dsos->lock); - res = __dsos__find_id(dsos, name, NULL, cmp_short, /*write_locked=*/false); + res = __dsos__find_id(dsos, name, &dso_id_empty, cmp_short, /*write_locked=*/false); up_read(&dsos->lock); return res; } @@ -344,8 +345,8 @@ static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, const { struct dso *dso = __dsos__find_id(dsos, name, id, false, /*write_locked=*/true); - if (dso && dso_id__empty(dso__id(dso)) && !dso_id__empty(id)) - __dso__inject_id(dso, id); + if (dso) + __dso__improve_id(dso, id); return dso ? dso : __dsos__addnew_id(dsos, name, id); } @@ -436,7 +437,8 @@ struct dso *dsos__findnew_module_dso(struct dsos *dsos, down_write(&dsos->lock); - dso = __dsos__find_id(dsos, m->name, NULL, /*cmp_short=*/true, /*write_locked=*/true); + dso = __dsos__find_id(dsos, m->name, &dso_id_empty, /*cmp_short=*/true, + /*write_locked=*/true); if (dso) { up_write(&dsos->lock); return dso; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 7ec12c207970..2ef8c1cfae1e 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1731,21 +1731,21 @@ int machine__process_mmap2_event(struct machine *machine, { struct thread *thread; struct map *map; - struct dso_id dso_id = { - .maj = event->mmap2.maj, - .min = event->mmap2.min, - .ino = event->mmap2.ino, - .ino_generation = event->mmap2.ino_generation, - }; - struct build_id __bid, *bid = NULL; + struct dso_id dso_id = dso_id_empty; int ret = 0; if (dump_trace) perf_event__fprintf_mmap2(event, stdout); if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { - bid = &__bid; - build_id__init(bid, event->mmap2.build_id, event->mmap2.build_id_size); + build_id__init(&dso_id.build_id, event->mmap2.build_id, event->mmap2.build_id_size); + } else { + dso_id.maj = event->mmap2.maj; + dso_id.min = event->mmap2.min; + dso_id.ino = event->mmap2.ino; + dso_id.ino_generation = event->mmap2.ino_generation; + dso_id.mmap2_valid = true; + dso_id.mmap2_ino_generation_valid = true; } if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL || @@ -1757,7 +1757,7 @@ int machine__process_mmap2_event(struct machine *machine, }; strlcpy(xm.name, event->mmap2.filename, KMAP_NAME_LEN); - ret = machine__process_kernel_mmap_event(machine, &xm, bid); + ret = machine__process_kernel_mmap_event(machine, &xm, &dso_id.build_id); if (ret < 0) goto out_problem; return 0; @@ -1771,7 +1771,7 @@ int machine__process_mmap2_event(struct machine *machine, map = map__new(machine, event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, &dso_id, event->mmap2.prot, - event->mmap2.flags, bid, + event->mmap2.flags, event->mmap2.filename, thread); if (map == NULL) @@ -1829,8 +1829,8 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event prot = PROT_EXEC; map = map__new(machine, event->mmap.start, - event->mmap.len, event->mmap.pgoff, - NULL, prot, 0, NULL, event->mmap.filename, thread); + event->mmap.len, event->mmap.pgoff, + &dso_id_empty, prot, /*flags=*/0, event->mmap.filename, thread); if (map == NULL) goto out_problem_map; @@ -3192,7 +3192,7 @@ struct dso *machine__findnew_dso_id(struct machine *machine, const char *filenam struct dso *machine__findnew_dso(struct machine *machine, const char *filename) { - return machine__findnew_dso_id(machine, filename, NULL); + return machine__findnew_dso_id(machine, filename, &dso_id_empty); } char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 0f6b185f9589..b46c68c24d1c 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -120,8 +120,8 @@ static void map__init(struct map *map, u64 start, u64 end, u64 pgoff, } struct map *map__new(struct machine *machine, u64 start, u64 len, - u64 pgoff, struct dso_id *id, - u32 prot, u32 flags, struct build_id *bid, + u64 pgoff, const struct dso_id *id, + u32 prot, u32 flags, char *filename, struct thread *thread) { struct map *result; @@ -132,7 +132,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, map = zalloc(sizeof(*map)); if (ADD_RC_CHK(result, map)) { char newfilename[PATH_MAX]; - struct dso *dso, *header_bid_dso; + struct dso *dso; int anon, no_dso, vdso, android; android = is_android_lib(filename); @@ -189,16 +189,15 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, dso__set_nsinfo(dso, nsi); mutex_unlock(dso__lock(dso)); - if (build_id__is_defined(bid)) { - dso__set_build_id(dso, bid); - } else { + if (!build_id__is_defined(&id->build_id)) { /* * If the mmap event had no build ID, search for an existing dso from the * build ID header by name. Otherwise only the dso loaded at the time of * reading the header will have the build ID set and all future mmaps will * have it missing. */ - header_bid_dso = dsos__find(&machine->dsos, filename, false); + struct dso *header_bid_dso = dsos__find(&machine->dsos, filename, false); + if (header_bid_dso && dso__header_build_id(header_bid_dso)) { dso__set_build_id(dso, dso__bid(header_bid_dso)); dso__set_header_build_id(dso, 1); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 4262f5a143be..9cadf533a561 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -173,11 +173,10 @@ struct thread; __map__for_each_symbol_by_name(map, sym_name, (pos), idx) struct dso_id; -struct build_id; struct map *map__new(struct machine *machine, u64 start, u64 len, - u64 pgoff, struct dso_id *id, u32 prot, u32 flags, - struct build_id *bid, char *filename, struct thread *thread); + u64 pgoff, const struct dso_id *id, u32 prot, u32 flags, + char *filename, struct thread *thread); struct map *map__new2(u64 start, struct dso *dso); void map__delete(struct map *map); struct map *map__clone(struct map *map); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 45e654653960..7969d64a47bf 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1746,22 +1746,27 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) if (rc) return rc; /* - * Addresses with no major/minor numbers are assumed to be + * Addresses with no major/minor numbers or build ID are assumed to be * anonymous in userspace. Sort those on pid then address. * * The kernel and non-zero major/minor mapped areas are * assumed to be unity mapped. Sort those on address. */ + if (left->cpumode != PERF_RECORD_MISC_KERNEL && (map__flags(l_map) & MAP_SHARED) == 0) { + const struct dso_id *dso_id = dso__id_const(l_dso); - if ((left->cpumode != PERF_RECORD_MISC_KERNEL) && - (!(map__flags(l_map) & MAP_SHARED)) && !dso__id(l_dso)->maj && !dso__id(l_dso)->min && - !dso__id(l_dso)->ino && !dso__id(l_dso)->ino_generation) { - /* userspace anonymous */ + if (!dso_id->mmap2_valid) + dso_id = dso__id_const(r_dso); - if (thread__pid(left->thread) > thread__pid(right->thread)) - return -1; - if (thread__pid(left->thread) < thread__pid(right->thread)) - return 1; + if (!build_id__is_defined(&dso_id->build_id) && + (!dso_id->mmap2_valid || (dso_id->maj == 0 && dso_id->min == 0))) { + /* userspace anonymous */ + + if (thread__pid(left->thread) > thread__pid(right->thread)) + return -1; + if (thread__pid(left->thread) < thread__pid(right->thread)) + return 1; + } } addr: @@ -1786,6 +1791,7 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf, if (he->mem_info) { struct map *map = mem_info__daddr(he->mem_info)->ms.map; struct dso *dso = map ? map__dso(map) : NULL; + const struct dso_id *dso_id = dso ? dso__id_const(dso) : &dso_id_empty; addr = cl_address(mem_info__daddr(he->mem_info)->al_addr, chk_double_cl); ms = &mem_info__daddr(he->mem_info)->ms; @@ -1794,8 +1800,7 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf, if ((he->cpumode != PERF_RECORD_MISC_KERNEL) && map && !(map__prot(map) & PROT_EXEC) && (map__flags(map) & MAP_SHARED) && - (dso__id(dso)->maj || dso__id(dso)->min || dso__id(dso)->ino || - dso__id(dso)->ino_generation)) + (!dso_id->mmap2_valid || (dso_id->maj == 0 && dso_id->min == 0))) level = 's'; else if (!map) level = 'X'; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index d6f9a4548c92..3b1240c82b30 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -372,7 +372,7 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event, struct nsinfo *nsi; struct nscookie nc; struct dso *dso = NULL; - struct dso_id id; + struct dso_id dso_id = dso_id_empty; int rc; if (is_kernel) { @@ -380,12 +380,18 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event, goto out; } - id.maj = event->maj; - id.min = event->min; - id.ino = event->ino; - id.ino_generation = event->ino_generation; + if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { + build_id__init(&dso_id.build_id, event->build_id, event->build_id_size); + } else { + dso_id.maj = event->maj; + dso_id.min = event->min; + dso_id.ino = event->ino; + dso_id.ino_generation = event->ino_generation; + dso_id.mmap2_valid = true; + dso_id.mmap2_ino_generation_valid = true; + }; - dso = dsos__findnew_id(&machine->dsos, event->filename, &id); + dso = dsos__findnew_id(&machine->dsos, event->filename, &dso_id); if (dso && dso__has_build_id(dso)) { bid = *dso__bid(dso); rc = 0; From 5b11409b924631745eef60a65218ffa496acafd6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:47 -0700 Subject: [PATCH 1064/2411] perf jitdump: Directly mark the jitdump DSO The DSO being generated was being accessed through a thread's maps, this is unnecessary as the dso can just be directly found. This avoids problems with passing a NULL evsel which may be inspected to determine properties of a callchain when using the buildid DSO marking code. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-8-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/jitdump.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 624964f01b5f..b062b1f234b6 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -14,9 +14,9 @@ #include #include -#include "build-id.h" #include "event.h" #include "debug.h" +#include "dso.h" #include "evlist.h" #include "namespaces.h" #include "symbol.h" @@ -531,9 +531,22 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) /* * mark dso as use to generate buildid in the header */ - if (!ret) - build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine); + if (!ret) { + struct dso_id dso_id = { + { + .maj = event->mmap2.maj, + .min = event->mmap2.min, + .ino = event->mmap2.ino, + .ino_generation = event->mmap2.ino_generation, + }, + .mmap2_valid = true, + .mmap2_ino_generation_valid = true, + }; + struct dso *dso = machine__findnew_dso_id(jd->machine, filename, &dso_id); + if (dso) + dso__set_hit(dso); + } out: perf_sample__exit(&sample); free(event); From 53b00ff358dc75b12042b2b2aaf1d0e998fd0075 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:48 -0700 Subject: [PATCH 1065/2411] perf record: Make --buildid-mmap the default Support for build IDs in mmap2 perf events has been present since Linux v5.12: https://lore.kernel.org/lkml/20210219194619.1780437-1-acme@kernel.org/ Build ID mmap events don't avoid the need to inject build IDs for DSO touched by samples as the build ID cache is populated by perf record. They can avoid some cases of symbol mis-resolution caused by the file system changing from when a sample occurred and when the DSO is sought. Unlike the --buildid-mmap option, this chnage doesn't disable the build ID cache but it does disable the processing of samples looking for DSOs to inject build IDs for. To disable the build ID cache the -B (--no-buildid) option should be used. Making this option the default was raised on the list in: https://lore.kernel.org/linux-perf-users/CAP-5=fXP7jN_QrGUcd55_QH5J-Y-FCaJ6=NaHVtyx0oyNh8_-Q@mail.gmail.com/ Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-9-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/Documentation/perf-record.txt | 4 ++- tools/perf/builtin-record.c | 34 +++++++++++++++--------- tools/perf/util/symbol_conf.h | 2 +- tools/perf/util/synthetic-events.c | 16 +++++------ 4 files changed, 34 insertions(+), 22 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 612612fa2d80..067891bd7da6 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -563,7 +563,9 @@ Specify vmlinux path which has debuginfo. Record build-id of all DSOs regardless whether it's actually hit or not. --buildid-mmap:: -Record build ids in mmap2 events, disables build id cache (implies --no-buildid). +Legacy record build-id in map events option which is now the +default. Behaves indentically to --no-buildid. Disable with +--no-buildid-mmap. --aio[=n]:: Use control blocks in asynchronous (Posix AIO) trace writing mode (default: 1, max: 4). diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 53971b9de3ba..a59c4e15575c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -171,6 +171,7 @@ struct record { bool no_buildid_cache_set; bool buildid_all; bool buildid_mmap; + bool buildid_mmap_set; bool timestamp_filename; bool timestamp_boundary; bool off_cpu; @@ -1811,6 +1812,7 @@ record__finish_output(struct record *rec) data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); } + /* Buildid scanning disabled or build ID in kernel and synthesized map events. */ if (!rec->no_buildid) { process_buildids(rec); @@ -3005,6 +3007,8 @@ static int perf_record_config(const char *var, const char *value, void *cb) rec->no_buildid = true; else if (!strcmp(value, "mmap")) rec->buildid_mmap = true; + else if (!strcmp(value, "no-mmap")) + rec->buildid_mmap = false; else return -1; return 0; @@ -3411,6 +3415,7 @@ static struct record record = { .synth = PERF_SYNTH_ALL, .off_cpu_thresh_ns = OFFCPU_THRESH, }, + .buildid_mmap = true, }; const char record_callchain_help[] = CALLCHAIN_RECORD_HELP @@ -3577,8 +3582,8 @@ static struct option __record_options[] = { "file", "vmlinux pathname"), OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, "Record build-id of all DSOs regardless of hits"), - OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, - "Record build-id in map events"), + OPT_BOOLEAN_SET(0, "buildid-mmap", &record.buildid_mmap, &record.buildid_mmap_set, + "Record build-id in mmap events and skip build-id processing."), OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, "append timestamp to output filename"), OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, @@ -4108,19 +4113,24 @@ int cmd_record(int argc, const char **argv) record.opts.record_switch_events = true; } + if (!rec->buildid_mmap) { + pr_debug("Disabling build id in synthesized mmap2 events.\n"); + symbol_conf.no_buildid_mmap2 = true; + } else if (rec->buildid_mmap_set) { + /* + * Explicitly passing --buildid-mmap disables buildid processing + * and cache generation. + */ + rec->no_buildid = true; + } + if (rec->buildid_mmap && !perf_can_record_build_id()) { + pr_warning("Missing support for build id in kernel mmap events.\n" + "Disable this warning with --no-buildid-mmap\n"); + rec->buildid_mmap = false; + } if (rec->buildid_mmap) { - if (!perf_can_record_build_id()) { - pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); - err = -EINVAL; - goto out_opts; - } - pr_debug("Enabling build id in mmap2 events.\n"); - /* Enable mmap build id synthesizing. */ - symbol_conf.buildid_mmap2 = true; /* Enable perf_event_attr::build_id bit. */ rec->opts.build_id = true; - /* Disable build id cache. */ - rec->no_buildid = true; } if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index cd9aa82c7d5a..7a80d2c14d9b 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -43,7 +43,7 @@ struct symbol_conf { report_individual_block, inline_name, disable_add2line_warn, - buildid_mmap2, + no_buildid_mmap2, guest_code, lazy_load_kernel_maps, keep_exited_threads, diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 3b1240c82b30..e7ca3f5eb493 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -532,7 +532,7 @@ int perf_event__synthesize_mmap_events(const struct perf_tool *tool, event->mmap2.pid = tgid; event->mmap2.tid = pid; - if (symbol_conf.buildid_mmap2) + if (!symbol_conf.no_buildid_mmap2) perf_record_mmap2__read_build_id(&event->mmap2, machine, false); if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { @@ -690,7 +690,7 @@ static int perf_event__synthesize_modules_maps_cb(struct map *map, void *data) return 0; dso = map__dso(map); - if (symbol_conf.buildid_mmap2) { + if (!symbol_conf.no_buildid_mmap2) { size = PERF_ALIGN(dso__long_name_len(dso) + 1, sizeof(u64)); event->mmap2.header.type = PERF_RECORD_MMAP2; event->mmap2.header.size = (sizeof(event->mmap2) - @@ -734,9 +734,9 @@ int perf_event__synthesize_modules(const struct perf_tool *tool, perf_event__han .process = process, .machine = machine, }; - size_t size = symbol_conf.buildid_mmap2 - ? sizeof(args.event->mmap2) - : sizeof(args.event->mmap); + size_t size = symbol_conf.no_buildid_mmap2 + ? sizeof(args.event->mmap) + : sizeof(args.event->mmap2); args.event = zalloc(size + machine->id_hdr_size); if (args.event == NULL) { @@ -1124,8 +1124,8 @@ static int __perf_event__synthesize_kernel_mmap(const struct perf_tool *tool, struct machine *machine) { union perf_event *event; - size_t size = symbol_conf.buildid_mmap2 ? - sizeof(event->mmap2) : sizeof(event->mmap); + size_t size = symbol_conf.no_buildid_mmap2 ? + sizeof(event->mmap) : sizeof(event->mmap2); struct map *map = machine__kernel_map(machine); struct kmap *kmap; int err; @@ -1159,7 +1159,7 @@ static int __perf_event__synthesize_kernel_mmap(const struct perf_tool *tool, event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; } - if (symbol_conf.buildid_mmap2) { + if (!symbol_conf.no_buildid_mmap2) { size = snprintf(event->mmap2.filename, sizeof(event->mmap2.filename), "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; size = PERF_ALIGN(size, sizeof(u64)); From c3e5b9ec96dee864c2d6b00fbfe52e784f0d7bee Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:49 -0700 Subject: [PATCH 1066/2411] perf session: Add accessor for session->header.env The perf_env from the header in the session is frequently accessed, add an accessor function rather than access directly. Cache the value to avoid repeated calls. No behavioral change. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-10-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-annotate.c | 4 +-- tools/perf/builtin-buildid-cache.c | 2 +- tools/perf/builtin-c2c.c | 16 ++++++------ tools/perf/builtin-inject.c | 2 +- tools/perf/builtin-kmem.c | 2 +- tools/perf/builtin-kvm.c | 4 +-- tools/perf/builtin-kwork.c | 2 +- tools/perf/builtin-lock.c | 4 +-- tools/perf/builtin-mem.c | 2 +- tools/perf/builtin-record.c | 22 +++++++++-------- tools/perf/builtin-report.c | 8 +++--- tools/perf/builtin-sched.c | 8 +++--- tools/perf/builtin-script.c | 14 ++++++----- tools/perf/builtin-stat.c | 23 ++++++++--------- tools/perf/builtin-timechart.c | 2 +- tools/perf/builtin-top.c | 5 ++-- tools/perf/builtin-trace.c | 2 +- tools/perf/tests/topology.c | 38 +++++++++++++---------------- tools/perf/util/bpf-event.c | 2 +- tools/perf/util/branch.c | 2 +- tools/perf/util/data-convert-bt.c | 16 ++++++------ tools/perf/util/data-convert-json.c | 36 +++++++++++++-------------- tools/perf/util/session.c | 7 +++++- tools/perf/util/session.h | 2 ++ tools/perf/util/tool.c | 2 +- 25 files changed, 120 insertions(+), 107 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 9833c2c82a2f..326593862998 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -562,7 +562,7 @@ static int __cmd_annotate(struct perf_annotate *ann) } if (!annotate_opts.objdump_path) { - ret = perf_env__lookup_objdump(&session->header.env, + ret = perf_env__lookup_objdump(perf_session__env(session), &annotate_opts.objdump_path); if (ret) goto out; @@ -896,7 +896,7 @@ int cmd_annotate(int argc, const char **argv) symbol_conf.try_vmlinux_path = true; - ret = symbol__init(&annotate.session->header.env); + ret = symbol__init(perf_session__env(annotate.session)); if (ret < 0) goto out_delete; diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index e936a34b7d37..c98104481c8a 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -453,7 +453,7 @@ int cmd_buildid_cache(int argc, const char **argv) return PTR_ERR(session); } - if (symbol__init(session ? &session->header.env : NULL) < 0) + if (symbol__init(session ? perf_session__env(session) : NULL) < 0) goto out; setup_pager(); diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index e2e257bcc461..8cb36d9433f8 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2267,14 +2267,15 @@ static int setup_nodes(struct perf_session *session) int node, idx; struct perf_cpu cpu; int *cpu2node; + struct perf_env *env = perf_session__env(session); if (c2c.node_info > 2) c2c.node_info = 2; - c2c.nodes_cnt = session->header.env.nr_numa_nodes; - c2c.cpus_cnt = session->header.env.nr_cpus_avail; + c2c.nodes_cnt = env->nr_numa_nodes; + c2c.cpus_cnt = env->nr_cpus_avail; - n = session->header.env.numa_nodes; + n = env->numa_nodes; if (!n) return -EINVAL; @@ -3030,6 +3031,7 @@ static int perf_c2c__report(int argc, const char **argv) }; int err = 0; const char *output_str, *sort_str = NULL; + struct perf_env *env; argc = parse_options(argc, argv, options, report_c2c_usage, PARSE_OPT_STOP_AT_NON_OPTION); @@ -3072,14 +3074,14 @@ static int perf_c2c__report(int argc, const char **argv) pr_debug("Error creating perf session\n"); goto out; } - + env = perf_session__env(session); /* * Use the 'tot' as default display type if user doesn't specify it; * since Arm64 platform doesn't support HITMs flag, use 'peer' as the * default display type. */ if (!display) { - if (!strcmp(perf_env__arch(&session->header.env), "arm64")) + if (!strcmp(perf_env__arch(env), "arm64")) display = "peer"; else display = "tot"; @@ -3109,7 +3111,7 @@ static int perf_c2c__report(int argc, const char **argv) goto out_session; } - err = mem2node__init(&c2c.mem2node, &session->header.env); + err = mem2node__init(&c2c.mem2node, env); if (err) goto out_session; @@ -3117,7 +3119,7 @@ static int perf_c2c__report(int argc, const char **argv) if (err) goto out_mem2node; - if (symbol__init(&session->header.env) < 0) + if (symbol__init(env) < 0) goto out_mem2node; /* No pipe support at the moment. */ diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 13bbb493141f..f73350a3417a 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -2608,7 +2608,7 @@ int cmd_inject(int argc, const char **argv) inject.tool.finished_round = perf_event__drop_oe; } #endif - ret = symbol__init(&inject.session->header.env); + ret = symbol__init(perf_session__env(inject.session)); if (ret < 0) goto out_delete; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 67fb1946ef13..7929a5fa5f46 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -2024,7 +2024,7 @@ int cmd_kmem(int argc, const char **argv) symbol_conf.use_callchain = true; } - symbol__init(&session->header.env); + symbol__init(perf_session__env(session)); if (perf_time__parse_str(&ptime, time_str) != 0) { pr_err("Invalid time string\n"); diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index d75bd3684980..7b15b4a705e4 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1175,7 +1175,7 @@ static int cpu_isa_config(struct perf_kvm_stat *kvm) } cpuid = buf; } else - cpuid = kvm->session->header.env.cpuid; + cpuid = perf_session__env(kvm->session)->cpuid; if (!cpuid) { pr_err("Failed to look up CPU type\n"); @@ -1561,7 +1561,7 @@ static int read_events(struct perf_kvm_stat *kvm) return PTR_ERR(kvm->session); } - symbol__init(&kvm->session->header.env); + symbol__init(perf_session__env(kvm->session)); if (!perf_session__has_traces(kvm->session, "kvm record")) { ret = -EINVAL; diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c index c41a68d073de..d2e08de5976d 100644 --- a/tools/perf/builtin-kwork.c +++ b/tools/perf/builtin-kwork.c @@ -1804,7 +1804,7 @@ static int perf_kwork__read_events(struct perf_kwork *kwork) return PTR_ERR(session); } - symbol__init(&session->header.env); + symbol__init(perf_session__env(session)); if (perf_kwork__check_config(kwork, session) != 0) goto out_delete; diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 3b3ade7a39ca..fd49703021fd 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1876,7 +1876,7 @@ static int __cmd_report(bool display_info) } symbol_conf.allow_aliases = true; - symbol__init(&session->header.env); + symbol__init(perf_session__env(session)); if (!data.is_pipe) { if (!perf_session__has_traces(session, "lock record")) @@ -2042,7 +2042,7 @@ static int __cmd_contention(int argc, const char **argv) con.save_callstack = true; symbol_conf.allow_aliases = true; - symbol__init(&session->header.env); + symbol__init(perf_session__env(session)); if (use_bpf) { err = target__validate(&target); diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 5ec83cd85650..c6496adff3fe 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -304,7 +304,7 @@ static int report_raw_events(struct perf_mem *mem) goto out_delete; } - ret = symbol__init(&session->header.env); + ret = symbol__init(perf_session__env(session)); if (ret < 0) goto out_delete; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a59c4e15575c..8a829ddff6f2 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2203,7 +2203,7 @@ static int record__setup_sb_evlist(struct record *rec) } } - if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { + if (evlist__add_bpf_sb_event(rec->sb_evlist, perf_session__env(rec->session))) { pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); return -1; } @@ -2222,15 +2222,16 @@ static int record__init_clock(struct record *rec) struct perf_session *session = rec->session; struct timespec ref_clockid; struct timeval ref_tod; + struct perf_env *env = perf_session__env(session); u64 ref; if (!rec->opts.use_clockid) return 0; if (rec->opts.use_clockid && rec->opts.clockid_res_ns) - session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; + env->clock.clockid_res_ns = rec->opts.clockid_res_ns; - session->header.env.clock.clockid = rec->opts.clockid; + env->clock.clockid = rec->opts.clockid; if (gettimeofday(&ref_tod, NULL) != 0) { pr_err("gettimeofday failed, cannot set reference time.\n"); @@ -2245,12 +2246,12 @@ static int record__init_clock(struct record *rec) ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + (u64) ref_tod.tv_usec * NSEC_PER_USEC; - session->header.env.clock.tod_ns = ref; + env->clock.tod_ns = ref; ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + (u64) ref_clockid.tv_nsec; - session->header.env.clock.clockid_ns = ref; + env->clock.clockid_ns = ref; return 0; } @@ -2396,6 +2397,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) int fd; float ratio = 0; enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; + struct perf_env *env; atexit(record__sig_exit); signal(SIGCHLD, sig_handler); @@ -2437,7 +2439,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) pr_err("Perf session creation failed.\n"); return PTR_ERR(session); } - + env = perf_session__env(session); if (record__threads_enabled(rec)) { if (perf_data__is_pipe(&rec->data)) { pr_err("Parallel trace streaming is not available in pipe mode.\n"); @@ -2471,8 +2473,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) } #endif // HAVE_EVENTFD_SUPPORT - session->header.env.comp_type = PERF_COMP_ZSTD; - session->header.env.comp_level = rec->opts.comp_level; + env->comp_type = PERF_COMP_ZSTD; + env->comp_level = rec->opts.comp_level; if (rec->opts.kcore && !record__kcore_readable(&session->machines.host)) { @@ -2525,7 +2527,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) } /* Debug message used by test scripts */ pr_debug3("perf record done opening and mmapping events\n"); - session->header.env.comp_mmap_len = session->evlist->core.mmap_len; + env->comp_mmap_len = session->evlist->core.mmap_len; if (rec->opts.kcore) { err = record__kcore_copy(&session->machines.host, data); @@ -2855,7 +2857,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (rec->session->bytes_transferred && rec->session->bytes_compressed) { ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; - session->header.env.comp_ratio = ratio + 0.5; + env->comp_ratio = ratio + 0.5; } if (forks) { diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 26186717fe9b..704576e46e4b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -447,7 +447,7 @@ static int report__setup_sample_type(struct report *rep) } } - callchain_param_setup(sample_type, perf_env__arch(&rep->session->header.env)); + callchain_param_setup(sample_type, perf_env__arch(perf_session__env(rep->session))); if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) { ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n" @@ -550,7 +550,7 @@ static int evlist__tui_block_hists_browse(struct evlist *evlist, struct report * evlist__for_each_entry(evlist, pos) { ret = report__browse_block_hists(&rep->block_reports[i++].hist, rep->min_percent, pos, - &rep->session->header.env); + perf_session__env(rep->session)); if (ret != 0) return ret; } @@ -685,7 +685,7 @@ static int report__browse_hists(struct report *rep) } ret = evlist__tui_browse_hists(evlist, help, NULL, rep->min_percent, - &session->header.env, true); + perf_session__env(session), true); /* * Usually "ret" is the last pressed key, and we only * care if the key notifies us to switch data file. @@ -1842,7 +1842,7 @@ int cmd_report(int argc, const char **argv) annotation_config__init(); } - if (symbol__init(&session->header.env) < 0) + if (symbol__init(perf_session__env(session)) < 0) goto error; if (report.time_str) { diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 34051ad23493..f166d6cbc083 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1939,7 +1939,7 @@ static int perf_sched__read_events(struct perf_sched *sched) return PTR_ERR(session); } - symbol__init(&session->header.env); + symbol__init(perf_session__env(session)); /* prefer sched_waking if it is captured */ if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking")) @@ -3294,6 +3294,7 @@ static int perf_sched__timehist(struct perf_sched *sched) }; struct perf_session *session; + struct perf_env *env; struct evlist *evlist; int err = -1; @@ -3318,6 +3319,7 @@ static int perf_sched__timehist(struct perf_sched *sched) if (IS_ERR(session)) return PTR_ERR(session); + env = perf_session__env(session); if (cpu_list) { err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap); if (err < 0) @@ -3326,7 +3328,7 @@ static int perf_sched__timehist(struct perf_sched *sched) evlist = session->evlist; - symbol__init(&session->header.env); + symbol__init(env); if (perf_time__parse_str(&sched->ptime, sched->time_str) != 0) { pr_err("Invalid time string\n"); @@ -3365,7 +3367,7 @@ static int perf_sched__timehist(struct perf_sched *sched) goto out; /* pre-allocate struct for per-CPU idle stats */ - sched->max_cpu.cpu = session->header.env.nr_cpus_online; + sched->max_cpu.cpu = env->nr_cpus_online; if (sched->max_cpu.cpu == 0) sched->max_cpu.cpu = 4; if (init_idle_threads(sched->max_cpu.cpu)) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 271f22962e32..31cce67217b0 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -714,7 +714,7 @@ static int perf_session__check_output_opt(struct perf_session *session) } } - if (tod && !session->header.env.clock.enabled) { + if (tod && !perf_session__env(session)->clock.enabled) { pr_err("Can't provide 'tod' time, missing clock data. " "Please record with -k/--clockid option.\n"); return -1; @@ -759,7 +759,7 @@ tod_scnprintf(struct perf_script *script, char *buf, int buflen, if (buflen < 64 || !script) return buf; - env = &script->session->header.env; + env = perf_session__env(script->session); if (!env->clock.enabled) { scnprintf(buf, buflen, "disabled"); return buf; @@ -3863,6 +3863,7 @@ int cmd_script(int argc, const char **argv) "perf script [] [script-args]", NULL }; + struct perf_env *env; perf_set_singlethreaded(); @@ -4109,6 +4110,7 @@ int cmd_script(int argc, const char **argv) if (IS_ERR(session)) return PTR_ERR(session); + env = perf_session__env(session); if (header || header_only) { script.tool.show_feat_hdr = SHOW_FEAT_HEADER; perf_session__fprintf_info(session, stdout, show_full_info); @@ -4118,17 +4120,17 @@ int cmd_script(int argc, const char **argv) if (show_full_info) script.tool.show_feat_hdr = SHOW_FEAT_HEADER_FULL_INFO; - if (symbol__init(&session->header.env) < 0) + if (symbol__init(env) < 0) goto out_delete; uname(&uts); if (data.is_pipe) { /* Assume pipe_mode indicates native_arch */ native_arch = true; - } else if (session->header.env.arch) { - if (!strcmp(uts.machine, session->header.env.arch)) + } else if (env->arch) { + if (!strcmp(uts.machine, env->arch)) native_arch = true; else if (!strcmp(uts.machine, "x86_64") && - !strcmp(session->header.env.arch, "i386")) + !strcmp(env->arch, "i386")) native_arch = true; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 00fce828cd5e..2c38dd98f6ca 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1689,48 +1689,48 @@ static struct aggr_cpu_id perf_env__get_global_aggr_by_cpu(struct perf_cpu cpu _ static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu cpu) { - return perf_env__get_socket_aggr_by_cpu(cpu, &perf_stat.session->header.env); + return perf_env__get_socket_aggr_by_cpu(cpu, perf_session__env(perf_stat.session)); } static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu cpu) { - return perf_env__get_die_aggr_by_cpu(cpu, &perf_stat.session->header.env); + return perf_env__get_die_aggr_by_cpu(cpu, perf_session__env(perf_stat.session)); } static struct aggr_cpu_id perf_stat__get_cluster_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu cpu) { - return perf_env__get_cluster_aggr_by_cpu(cpu, &perf_stat.session->header.env); + return perf_env__get_cluster_aggr_by_cpu(cpu, perf_session__env(perf_stat.session)); } static struct aggr_cpu_id perf_stat__get_cache_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu cpu) { - return perf_env__get_cache_aggr_by_cpu(cpu, &perf_stat.session->header.env); + return perf_env__get_cache_aggr_by_cpu(cpu, perf_session__env(perf_stat.session)); } static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu cpu) { - return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env); + return perf_env__get_core_aggr_by_cpu(cpu, perf_session__env(perf_stat.session)); } static struct aggr_cpu_id perf_stat__get_cpu_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu cpu) { - return perf_env__get_cpu_aggr_by_cpu(cpu, &perf_stat.session->header.env); + return perf_env__get_cpu_aggr_by_cpu(cpu, perf_session__env(perf_stat.session)); } static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu cpu) { - return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env); + return perf_env__get_node_aggr_by_cpu(cpu, perf_session__env(perf_stat.session)); } static struct aggr_cpu_id perf_stat__get_global_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu cpu) { - return perf_env__get_global_aggr_by_cpu(cpu, &perf_stat.session->header.env); + return perf_env__get_global_aggr_by_cpu(cpu, perf_session__env(perf_stat.session)); } static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode) @@ -1789,7 +1789,7 @@ static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode) static int perf_stat_init_aggr_mode_file(struct perf_stat *st) { - struct perf_env *env = &st->session->header.env; + struct perf_env *env = perf_session__env(st->session); aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode); bool needs_sort = stat_config.aggr_mode != AGGR_NONE; @@ -2112,8 +2112,9 @@ static int process_stat_round_event(struct perf_session *session, { struct perf_record_stat_round *stat_round = &event->stat_round; struct timespec tsh, *ts = NULL; - const char **argv = session->header.env.cmdline_argv; - int argc = session->header.env.nr_cmdline; + struct perf_env *env = perf_session__env(session); + const char **argv = env->cmdline_argv; + int argc = env->nr_cmdline; process_counters(); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 068d297aaf44..22050c640dfa 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1618,7 +1618,7 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name) if (IS_ERR(session)) return PTR_ERR(session); - symbol__init(&session->header.env); + symbol__init(perf_session__env(session)); (void)perf_header__process_sections(&session->header, perf_data__fd(session->data), diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index c77e195ea786..87d5742b7eb7 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -647,7 +647,8 @@ static void *display_thread_tui(void *arg) } ret = evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent, - &top->session->header.env, !top->record_opts.overwrite); + perf_session__env(top->session), + !top->record_opts.overwrite); if (ret == K_RELOAD) { top->zero = true; goto repeat; @@ -1253,7 +1254,7 @@ static int __cmd_top(struct perf_top *top) int ret; if (!annotate_opts.objdump_path) { - ret = perf_env__lookup_objdump(&top->session->header.env, + ret = perf_env__lookup_objdump(perf_session__env(top->session), &annotate_opts.objdump_path); if (ret) return ret; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index bb2dbc1d2ffa..0261f4eefe6d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -4701,7 +4701,7 @@ static int trace__replay(struct trace *trace) if (trace->opts.target.tid) symbol_conf.tid_list_str = strdup(trace->opts.target.tid); - if (symbol__init(&session->header.env) < 0) + if (symbol__init(perf_session__env(session)) < 0) goto out; trace->host = &session->machines.host; diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index a8cb5ba898ab..bc7d10630dad 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -69,9 +69,11 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) int i; struct aggr_cpu_id id; struct perf_cpu cpu; + struct perf_env *env; session = perf_session__new(&data, NULL); TEST_ASSERT_VAL("can't get session", !IS_ERR(session)); + env = perf_session__env(session); cpu__setup_cpunode_map(); /* On platforms with large numbers of CPUs process_cpu_topology() @@ -95,9 +97,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) * condition is true (see do_core_id_test in header.c). So always * run this test on those platforms. */ - if (!session->header.env.cpu - && strncmp(session->header.env.arch, "s390", 4) - && strncmp(session->header.env.arch, "aarch64", 7)) + if (!env->cpu && strncmp(env->arch, "s390", 4) && strncmp(env->arch, "aarch64", 7)) return TEST_SKIP; /* @@ -106,20 +106,20 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) * physical_package_id will be set to -1. Hence skip this * test if physical_package_id returns -1 for cpu from perf_cpu_map. */ - if (!strncmp(session->header.env.arch, "ppc64le", 7)) { + if (!strncmp(env->arch, "ppc64le", 7)) { if (cpu__get_socket_id(perf_cpu_map__cpu(map, 0)) == -1) return TEST_SKIP; } - TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu); + TEST_ASSERT_VAL("Session header CPU map not set", env->cpu); - for (i = 0; i < session->header.env.nr_cpus_avail; i++) { + for (i = 0; i < env->nr_cpus_avail; i++) { cpu.cpu = i; if (!perf_cpu_map__has(map, cpu)) continue; pr_debug("CPU %d, core %d, socket %d\n", i, - session->header.env.cpu[i].core_id, - session->header.env.cpu[i].socket_id); + env->cpu[i].core_id, + env->cpu[i].socket_id); } // Test that CPU ID contains socket, die, core and CPU @@ -129,13 +129,12 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) cpu.cpu == id.cpu.cpu); TEST_ASSERT_VAL("Cpu map - Core ID doesn't match", - session->header.env.cpu[cpu.cpu].core_id == id.core); + env->cpu[cpu.cpu].core_id == id.core); TEST_ASSERT_VAL("Cpu map - Socket ID doesn't match", - session->header.env.cpu[cpu.cpu].socket_id == - id.socket); + env->cpu[cpu.cpu].socket_id == id.socket); TEST_ASSERT_VAL("Cpu map - Die ID doesn't match", - session->header.env.cpu[cpu.cpu].die_id == id.die); + env->cpu[cpu.cpu].die_id == id.die); TEST_ASSERT_VAL("Cpu map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Cpu map - Thread IDX is set", id.thread_idx == -1); } @@ -144,14 +143,13 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) perf_cpu_map__for_each_cpu(cpu, i, map) { id = aggr_cpu_id__core(cpu, NULL); TEST_ASSERT_VAL("Core map - Core ID doesn't match", - session->header.env.cpu[cpu.cpu].core_id == id.core); + env->cpu[cpu.cpu].core_id == id.core); TEST_ASSERT_VAL("Core map - Socket ID doesn't match", - session->header.env.cpu[cpu.cpu].socket_id == - id.socket); + env->cpu[cpu.cpu].socket_id == id.socket); TEST_ASSERT_VAL("Core map - Die ID doesn't match", - session->header.env.cpu[cpu.cpu].die_id == id.die); + env->cpu[cpu.cpu].die_id == id.die); TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Core map - Thread IDX is set", id.thread_idx == -1); } @@ -160,11 +158,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) perf_cpu_map__for_each_cpu(cpu, i, map) { id = aggr_cpu_id__die(cpu, NULL); TEST_ASSERT_VAL("Die map - Socket ID doesn't match", - session->header.env.cpu[cpu.cpu].socket_id == - id.socket); + env->cpu[cpu.cpu].socket_id == id.socket); TEST_ASSERT_VAL("Die map - Die ID doesn't match", - session->header.env.cpu[cpu.cpu].die_id == id.die); + env->cpu[cpu.cpu].die_id == id.die); TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Die map - Core is set", id.core == -1); @@ -176,8 +173,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) perf_cpu_map__for_each_cpu(cpu, i, map) { id = aggr_cpu_id__socket(cpu, NULL); TEST_ASSERT_VAL("Socket map - Socket ID doesn't match", - session->header.env.cpu[cpu.cpu].socket_id == - id.socket); + env->cpu[cpu.cpu].socket_id == id.socket); TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1); diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index dc09a4730c50..664f361ef8c1 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -549,7 +549,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, * for perf-record and perf-report use header.env; * otherwise, use global perf_env. */ - env = session->data ? &session->header.env : &perf_env; + env = session->data ? perf_session__env(session) : &perf_env; arrays = 1UL << PERF_BPIL_JITED_KSYMS; arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS; diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c index ab760e267d41..3712be067464 100644 --- a/tools/perf/util/branch.c +++ b/tools/perf/util/branch.c @@ -46,7 +46,7 @@ const char *branch_new_type_name(int new_type) "FAULT_DATA", "FAULT_INST", /* - * TODO: This switch should happen on 'session->header.env.arch' + * TODO: This switch should happen on 'perf_session__env(session)->arch' * instead, because an arm64 platform perf recording could be * opened for analysis on other platforms as well. */ diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 5e7ff09fbc95..3d2e437e1354 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1338,14 +1338,14 @@ static void cleanup_events(struct perf_session *session) static int setup_streams(struct ctf_writer *cw, struct perf_session *session) { struct ctf_stream **stream; - struct perf_header *ph = &session->header; + struct perf_env *env = perf_session__env(session); int ncpus; /* * Try to get the number of cpus used in the data file, * if not present fallback to the MAX_CPUS. */ - ncpus = ph->env.nr_cpus_avail ?: MAX_CPUS; + ncpus = env->nr_cpus_avail ?: MAX_CPUS; stream = zalloc(sizeof(*stream) * ncpus); if (!stream) { @@ -1371,7 +1371,7 @@ static void free_streams(struct ctf_writer *cw) static int ctf_writer__setup_env(struct ctf_writer *cw, struct perf_session *session) { - struct perf_header *header = &session->header; + struct perf_env *env = perf_session__env(session); struct bt_ctf_writer *writer = cw->writer; #define ADD(__n, __v) \ @@ -1380,11 +1380,11 @@ do { \ return -1; \ } while (0) - ADD("host", header->env.hostname); + ADD("host", env->hostname); ADD("sysname", "Linux"); - ADD("release", header->env.os_release); - ADD("version", header->env.version); - ADD("machine", header->env.arch); + ADD("release", env->os_release); + ADD("version", env->version); + ADD("machine", env->arch); ADD("domain", "kernel"); ADD("tracer_name", "perf"); @@ -1401,7 +1401,7 @@ static int ctf_writer__setup_clock(struct ctf_writer *cw, int64_t offset = 0; if (tod) { - struct perf_env *env = &session->header.env; + struct perf_env *env = perf_session__env(session); if (!env->clock.enabled) { pr_err("Can't provide --tod time, missing clock data. " diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c index d9f805bf6fb0..9dc1e184cf3c 100644 --- a/tools/perf/util/data-convert-json.c +++ b/tools/perf/util/data-convert-json.c @@ -257,7 +257,8 @@ static int process_sample_event(const struct perf_tool *tool, static void output_headers(struct perf_session *session, struct convert_json *c) { struct stat st; - struct perf_header *header = &session->header; + const struct perf_header *header = &session->header; + const struct perf_env *env = perf_session__env(session); int ret; int fd = perf_data__fd(session->data); int i; @@ -280,32 +281,32 @@ static void output_headers(struct perf_session *session, struct convert_json *c) output_json_key_format(out, true, 2, "data-size", "%" PRIu64, header->data_size); output_json_key_format(out, true, 2, "feat-offset", "%" PRIu64, header->feat_offset); - output_json_key_string(out, true, 2, "hostname", header->env.hostname); - output_json_key_string(out, true, 2, "os-release", header->env.os_release); - output_json_key_string(out, true, 2, "arch", header->env.arch); + output_json_key_string(out, true, 2, "hostname", env->hostname); + output_json_key_string(out, true, 2, "os-release", env->os_release); + output_json_key_string(out, true, 2, "arch", env->arch); - if (header->env.cpu_desc) - output_json_key_string(out, true, 2, "cpu-desc", header->env.cpu_desc); + if (env->cpu_desc) + output_json_key_string(out, true, 2, "cpu-desc", env->cpu_desc); - output_json_key_string(out, true, 2, "cpuid", header->env.cpuid); - output_json_key_format(out, true, 2, "nrcpus-online", "%u", header->env.nr_cpus_online); - output_json_key_format(out, true, 2, "nrcpus-avail", "%u", header->env.nr_cpus_avail); + output_json_key_string(out, true, 2, "cpuid", env->cpuid); + output_json_key_format(out, true, 2, "nrcpus-online", "%u", env->nr_cpus_online); + output_json_key_format(out, true, 2, "nrcpus-avail", "%u", env->nr_cpus_avail); - if (header->env.clock.enabled) { + if (env->clock.enabled) { output_json_key_format(out, true, 2, "clockid", - "%u", header->env.clock.clockid); + "%u", env->clock.clockid); output_json_key_format(out, true, 2, "clock-time", - "%" PRIu64, header->env.clock.clockid_ns); + "%" PRIu64, env->clock.clockid_ns); output_json_key_format(out, true, 2, "real-time", - "%" PRIu64, header->env.clock.tod_ns); + "%" PRIu64, env->clock.tod_ns); } - output_json_key_string(out, true, 2, "perf-version", header->env.version); + output_json_key_string(out, true, 2, "perf-version", env->version); output_json_key_format(out, true, 2, "cmdline", "["); - for (i = 0; i < header->env.nr_cmdline; i++) { + for (i = 0; i < env->nr_cmdline; i++) { output_json_delimiters(out, i != 0, 3); - output_json_string(c->out, header->env.cmdline_argv[i]); + output_json_string(c->out, env->cmdline_argv[i]); } output_json_format(out, false, 2, "]"); } @@ -376,8 +377,7 @@ int bt_convert__perf2json(const char *input_name, const char *output_name, fprintf(stderr, "Error creating perf session!\n"); goto err_fclose; } - - if (symbol__init(&session->header.env) < 0) { + if (symbol__init(perf_session__env(session)) < 0) { fprintf(stderr, "Symbol init error!\n"); goto err_session_delete; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 38075059086c..b09d157f7d04 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2558,7 +2558,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, { int i, err = -1; struct perf_cpu_map *map; - int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS); + int nr_cpus = min(perf_session__env(session)->nr_cpus_avail, MAX_NR_CPUS); struct perf_cpu cpu; for (i = 0; i < PERF_TYPE_MAX; ++i) { @@ -2747,3 +2747,8 @@ int perf_session__dsos_hit_all(struct perf_session *session) return 0; } + +struct perf_env *perf_session__env(struct perf_session *session) +{ + return &session->header.env; +} diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index db1c120a9e67..e7f7464b838f 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -208,4 +208,6 @@ int perf_event__process_finished_round(const struct perf_tool *tool, union perf_event *event, struct ordered_events *oe); +struct perf_env *perf_session__env(struct perf_session *session); + #endif /* __PERF_SESSION_H */ diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c index 204ec03071bc..e83c7ababc2a 100644 --- a/tools/perf/util/tool.c +++ b/tools/perf/util/tool.c @@ -20,7 +20,7 @@ static int perf_session__process_compressed_event(struct perf_session *session, void *src; size_t decomp_size, src_size; u64 decomp_last_rem = 0; - size_t mmap_len, decomp_len = session->header.env.comp_mmap_len; + size_t mmap_len, decomp_len = perf_session__env(session)->comp_mmap_len; struct decomp *decomp, *decomp_last = session->active_decomp->decomp_last; if (decomp_last) { From 57ddb9cbb54fbf3772063795051b88a1f7258c6c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:50 -0700 Subject: [PATCH 1067/2411] perf evlist: Change env variable to session The session holds a perf_env pointer env. In UI code container_of is used to turn the env to a session, but this assumes the session header's env is in use. Rather than a dubious container_of, hold the session in the evlist and derive the env from the session with evsel__env, perf_session__env, etc. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-11-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-report.c | 6 +++++- tools/perf/builtin-script.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/tests/topology.c | 1 + tools/perf/ui/browser.h | 4 ++-- tools/perf/ui/browsers/header.c | 4 +--- tools/perf/ui/browsers/hists.c | 2 +- tools/perf/util/amd-sample-raw.c | 2 +- tools/perf/util/arm-spe.c | 2 +- tools/perf/util/evlist.h | 2 +- tools/perf/util/evsel.c | 12 +++++++++--- tools/perf/util/evsel.h | 1 + tools/perf/util/header.c | 2 +- tools/perf/util/s390-cpumsf.c | 2 +- tools/perf/util/sample-raw.c | 7 ++++--- tools/perf/util/sample-raw.h | 2 +- tools/perf/util/session.c | 4 +++- 17 files changed, 35 insertions(+), 22 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 704576e46e4b..ada8e0166c78 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1274,6 +1274,8 @@ static int process_attr(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct evlist **pevlist) { + struct perf_session *session; + struct perf_env *env; u64 sample_type; int err; @@ -1286,7 +1288,9 @@ static int process_attr(const struct perf_tool *tool __maybe_unused, * on events sample_type. */ sample_type = evlist__combined_sample_type(*pevlist); - callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env)); + session = (*pevlist)->session; + env = perf_session__env(session); + callchain_param_setup(sample_type, perf_env__arch(env)); return 0; } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 31cce67217b0..f2b5620165b4 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2534,7 +2534,7 @@ static int process_attr(const struct perf_tool *tool, union perf_event *event, * on events sample_type. */ sample_type = evlist__combined_sample_type(evlist); - callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env)); + callchain_param_setup(sample_type, perf_env__arch(perf_session__env(scr->session))); /* Enable fields for callchain entries */ if (symbol_conf.use_callchain && diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 87d5742b7eb7..2760971d4c97 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1654,7 +1654,6 @@ int cmd_top(int argc, const char **argv) "Couldn't read the cpuid for this machine: %s\n", str_error_r(errno, errbuf, sizeof(errbuf))); } - top.evlist->env = &perf_env; argc = parse_options(argc, argv, options, top_usage, 0); if (argc) @@ -1830,6 +1829,7 @@ int cmd_top(int argc, const char **argv) perf_top__update_print_entries(&top); signal(SIGWINCH, winch_sig); } + top.session->env = &perf_env; top.session = perf_session__new(NULL, NULL); if (IS_ERR(top.session)) { diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index bc7d10630dad..ec01150d208d 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -43,6 +43,7 @@ static int session_write_header(char *path) session->evlist = evlist__new_default(); TEST_ASSERT_VAL("can't get evlist", session->evlist); + session->evlist->session = session; perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); perf_header__set_feat(&session->header, HEADER_NRCPUS); diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index f59ad4f14d33..9d4404f9b87f 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -71,8 +71,8 @@ int ui_browser__help_window(struct ui_browser *browser, const char *text); bool ui_browser__dialog_yesno(struct ui_browser *browser, const char *text); int ui_browser__input_window(const char *title, const char *text, char *input, const char *exit_msg, int delay_sec); -struct perf_env; -int tui__header_window(struct perf_env *env); +struct perf_session; +int tui__header_window(struct perf_session *session); void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence); unsigned int ui_browser__argv_refresh(struct ui_browser *browser); diff --git a/tools/perf/ui/browsers/header.c b/tools/perf/ui/browsers/header.c index 2213b4661600..5b5ca32e3eef 100644 --- a/tools/perf/ui/browsers/header.c +++ b/tools/perf/ui/browsers/header.c @@ -93,16 +93,14 @@ static int ui__list_menu(int argc, char * const argv[]) return list_menu__run(&menu); } -int tui__header_window(struct perf_env *env) +int tui__header_window(struct perf_session *session) { int i, argc = 0; char **argv; - struct perf_session *session; char *ptr, *pos; size_t size; FILE *fp = open_memstream(&ptr, &size); - session = container_of(env, struct perf_session, header.env); perf_header__fprintf_info(session, fp, true); fclose(fp); diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index d26b925e3d7f..d9d3fb44477a 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3233,7 +3233,7 @@ static int evsel__hists_browse(struct evsel *evsel, int nr_events, const char *h case 'i': /* env->arch is NULL for live-mode (i.e. perf top) */ if (env->arch) - tui__header_window(env); + tui__header_window(evsel__session(evsel)); continue; case 'F': symbol_conf.filter_relative ^= 1; diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c index 4b540e6fb42d..b084dee76b1a 100644 --- a/tools/perf/util/amd-sample-raw.c +++ b/tools/perf/util/amd-sample-raw.c @@ -354,7 +354,7 @@ static void parse_cpuid(struct perf_env *env) */ bool evlist__has_amd_ibs(struct evlist *evlist) { - struct perf_env *env = evlist->env; + struct perf_env *env = perf_session__env(evlist->session); int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env); const char *pmu_mapping = perf_env__pmu_mappings(env); char name[sizeof("ibs_fetch")]; diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index d46e0cccac99..8942fa598a84 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -856,7 +856,7 @@ static bool arm_spe__synth_ds(struct arm_spe_queue *speq, const char *cpuid; pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n"); - cpuid = perf_env__cpuid(spe->session->evlist->env); + cpuid = perf_env__cpuid(perf_session__env(spe->session)); midr = strtol(cpuid, NULL, 16); } else { /* CPU ID is -1 for per-thread mode */ diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 1472d2179be1..5e71e3dc6042 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -71,7 +71,7 @@ struct evlist { struct mmap *overwrite_mmap; struct evsel *selected; struct events_stats stats; - struct perf_env *env; + struct perf_session *session; void (*trace_event_sample_raw)(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ae11df1e7902..3f766f240cc7 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -48,6 +48,7 @@ #include "record.h" #include "debug.h" #include "trace-event.h" +#include "session.h" #include "stat.h" #include "string2.h" #include "memswap.h" @@ -3872,11 +3873,16 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel)); } +struct perf_session *evsel__session(struct evsel *evsel) +{ + return evsel && evsel->evlist ? evsel->evlist->session : NULL; +} + struct perf_env *evsel__env(struct evsel *evsel) { - if (evsel && evsel->evlist && evsel->evlist->env) - return evsel->evlist->env; - return &perf_env; + struct perf_session *session = evsel__session(evsel); + + return session ? perf_session__env(session) : &perf_env; } static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8e79eb6d41b3..5797a02e5d6a 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -542,6 +542,7 @@ static inline bool evsel__is_dummy_event(struct evsel *evsel) (evsel->core.attr.config == PERF_COUNT_SW_DUMMY); } +struct perf_session *evsel__session(struct evsel *evsel); struct perf_env *evsel__env(struct evsel *evsel); int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 4f8133a18312..ce0fe7879ab0 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -4225,7 +4225,7 @@ int perf_session__read_header(struct perf_session *session) if (session->evlist == NULL) return -ENOMEM; - session->evlist->env = &header->env; + session->evlist->session = session; session->machines.host.env = &header->env; /* diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 0ce52f0280b8..c17dbe232c54 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -1142,7 +1142,7 @@ int s390_cpumsf_process_auxtrace_info(union perf_event *event, sf->machine = &session->machines.host; /* No kvm support */ sf->auxtrace_type = auxtrace_info->type; sf->pmu_type = PERF_TYPE_RAW; - sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid); + sf->machine_type = s390_cpumsf_get_type(perf_session__env(session)->cpuid); sf->auxtrace.process_event = s390_cpumsf_process_event; sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event; diff --git a/tools/perf/util/sample-raw.c b/tools/perf/util/sample-raw.c index f3f6bd9d290e..bcf442574d6e 100644 --- a/tools/perf/util/sample-raw.c +++ b/tools/perf/util/sample-raw.c @@ -6,15 +6,16 @@ #include "env.h" #include "header.h" #include "sample-raw.h" +#include "session.h" /* * Check platform the perf data file was created on and perform platform * specific interpretation. */ -void evlist__init_trace_event_sample_raw(struct evlist *evlist) +void evlist__init_trace_event_sample_raw(struct evlist *evlist, struct perf_env *env) { - const char *arch_pf = perf_env__arch(evlist->env); - const char *cpuid = perf_env__cpuid(evlist->env); + const char *arch_pf = perf_env__arch(env); + const char *cpuid = perf_env__cpuid(env); if (arch_pf && !strcmp("s390", arch_pf)) evlist->trace_event_sample_raw = evlist__s390_sample_raw; diff --git a/tools/perf/util/sample-raw.h b/tools/perf/util/sample-raw.h index ea01c5811503..896e9a87e373 100644 --- a/tools/perf/util/sample-raw.h +++ b/tools/perf/util/sample-raw.h @@ -11,5 +11,5 @@ void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, bool evlist__has_amd_ibs(struct evlist *evlist); void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); -void evlist__init_trace_event_sample_raw(struct evlist *evlist); +void evlist__init_trace_event_sample_raw(struct evlist *evlist, struct perf_env *env); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index b09d157f7d04..a851d9130abd 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -177,7 +177,7 @@ struct perf_session *__perf_session__new(struct perf_data *data, perf_session__set_comm_exec(session); } - evlist__init_trace_event_sample_raw(session->evlist); + evlist__init_trace_event_sample_raw(session->evlist, &session->header.env); /* Open the directory data. */ if (data->is_dir) { @@ -193,6 +193,8 @@ struct perf_session *__perf_session__new(struct perf_data *data, } else { session->machines.host.env = &perf_env; } + if (session->evlist) + session->evlist->session = session; session->machines.host.single_address_space = perf_env__single_address_space(session->machines.host.env); From b743a1368dea43b4ef6e51c2931eeada07556d87 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:51 -0700 Subject: [PATCH 1068/2411] perf header: Clean up use of perf_env Always use the perf_env from the feat_fd's perf_header. Cache the value on entry to a function in `env` and use `env->` consistently in the code. Ensure the header is initialized for use in perf_session__do_write_header. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-12-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/header.c | 174 ++++++++++++++++++++++----------------- 1 file changed, 98 insertions(+), 76 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index ce0fe7879ab0..4f2a6e10ed5c 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -557,6 +557,7 @@ static int write_event_desc(struct feat_fd *ff, static int write_cmdline(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { + struct perf_env *env = &ff->ph->env; char pbuf[MAXPATHLEN], *buf; int i, ret, n; @@ -564,7 +565,7 @@ static int write_cmdline(struct feat_fd *ff, buf = perf_exe(pbuf, MAXPATHLEN); /* account for binary path */ - n = perf_env.nr_cmdline + 1; + n = env->nr_cmdline + 1; ret = do_write(ff, &n, sizeof(n)); if (ret < 0) @@ -574,8 +575,8 @@ static int write_cmdline(struct feat_fd *ff, if (ret < 0) return ret; - for (i = 0 ; i < perf_env.nr_cmdline; i++) { - ret = do_write_string(ff, perf_env.cmdline_argv[i]); + for (i = 0 ; i < env->nr_cmdline; i++) { + ret = do_write_string(ff, env->cmdline_argv[i]); if (ret < 0) return ret; } @@ -586,6 +587,7 @@ static int write_cmdline(struct feat_fd *ff, static int write_cpu_topology(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { + struct perf_env *env = &ff->ph->env; struct cpu_topology *tp; u32 i; int ret, j; @@ -613,17 +615,17 @@ static int write_cpu_topology(struct feat_fd *ff, break; } - ret = perf_env__read_cpu_topology_map(&perf_env); + ret = perf_env__read_cpu_topology_map(env); if (ret < 0) goto done; - for (j = 0; j < perf_env.nr_cpus_avail; j++) { - ret = do_write(ff, &perf_env.cpu[j].core_id, - sizeof(perf_env.cpu[j].core_id)); + for (j = 0; j < env->nr_cpus_avail; j++) { + ret = do_write(ff, &env->cpu[j].core_id, + sizeof(env->cpu[j].core_id)); if (ret < 0) return ret; - ret = do_write(ff, &perf_env.cpu[j].socket_id, - sizeof(perf_env.cpu[j].socket_id)); + ret = do_write(ff, &env->cpu[j].socket_id, + sizeof(env->cpu[j].socket_id)); if (ret < 0) return ret; } @@ -641,9 +643,9 @@ static int write_cpu_topology(struct feat_fd *ff, goto done; } - for (j = 0; j < perf_env.nr_cpus_avail; j++) { - ret = do_write(ff, &perf_env.cpu[j].die_id, - sizeof(perf_env.cpu[j].die_id)); + for (j = 0; j < env->nr_cpus_avail; j++) { + ret = do_write(ff, &env->cpu[j].die_id, + sizeof(env->cpu[j].die_id)); if (ret < 0) return ret; } @@ -2123,17 +2125,18 @@ static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp) static void print_pmu_caps(struct feat_fd *ff, FILE *fp) { + struct perf_env *env = &ff->ph->env; struct pmu_caps *pmu_caps; - for (int i = 0; i < ff->ph->env.nr_pmus_with_caps; i++) { - pmu_caps = &ff->ph->env.pmu_caps[i]; + for (int i = 0; i < env->nr_pmus_with_caps; i++) { + pmu_caps = &env->pmu_caps[i]; __print_pmu_caps(fp, pmu_caps->nr_caps, pmu_caps->caps, pmu_caps->pmu_name); } - if (strcmp(perf_env__arch(&ff->ph->env), "x86") == 0 && - perf_env__has_pmu_mapping(&ff->ph->env, "ibs_op")) { - char *max_precise = perf_env__find_pmu_cap(&ff->ph->env, "cpu", "max_precise"); + if (strcmp(perf_env__arch(env), "x86") == 0 && + perf_env__has_pmu_mapping(env, "ibs_op")) { + char *max_precise = perf_env__find_pmu_cap(env, "cpu", "max_precise"); if (max_precise != NULL && atoi(max_precise) == 0) fprintf(fp, "# AMD systems uses ibs_op// PMU for some precise events, e.g.: cycles:p, see the 'perf list' man page for further details.\n"); @@ -2142,18 +2145,19 @@ static void print_pmu_caps(struct feat_fd *ff, FILE *fp) static void print_pmu_mappings(struct feat_fd *ff, FILE *fp) { + struct perf_env *env = &ff->ph->env; const char *delimiter = "# pmu mappings: "; char *str, *tmp; u32 pmu_num; u32 type; - pmu_num = ff->ph->env.nr_pmu_mappings; + pmu_num = env->nr_pmu_mappings; if (!pmu_num) { fprintf(fp, "# pmu mappings: not available\n"); return; } - str = ff->ph->env.pmu_mappings; + str = env->pmu_mappings; while (pmu_num) { type = strtoul(str, &tmp, 0); @@ -2235,17 +2239,18 @@ static void memory_node__fprintf(struct memory_node *n, static void print_mem_topology(struct feat_fd *ff, FILE *fp) { + struct perf_env *env = &ff->ph->env; struct memory_node *nodes; int i, nr; - nodes = ff->ph->env.memory_nodes; - nr = ff->ph->env.nr_memory_nodes; + nodes = env->memory_nodes; + nr = env->nr_memory_nodes; fprintf(fp, "# memory nodes (nr %d, block size 0x%llx):\n", - nr, ff->ph->env.memory_bsize); + nr, env->memory_bsize); for (i = 0; i < nr; i++) { - memory_node__fprintf(&nodes[i], ff->ph->env.memory_bsize, fp); + memory_node__fprintf(&nodes[i], env->memory_bsize, fp); } } @@ -2443,6 +2448,7 @@ static int process_build_id(struct feat_fd *ff, void *data __maybe_unused) static int process_nrcpus(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; int ret; u32 nr_cpus_avail, nr_cpus_online; @@ -2453,20 +2459,21 @@ static int process_nrcpus(struct feat_fd *ff, void *data __maybe_unused) ret = do_read_u32(ff, &nr_cpus_online); if (ret) return ret; - ff->ph->env.nr_cpus_avail = (int)nr_cpus_avail; - ff->ph->env.nr_cpus_online = (int)nr_cpus_online; + env->nr_cpus_avail = (int)nr_cpus_avail; + env->nr_cpus_online = (int)nr_cpus_online; return 0; } static int process_total_mem(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; u64 total_mem; int ret; ret = do_read_u64(ff, &total_mem); if (ret) return -1; - ff->ph->env.total_mem = (unsigned long long)total_mem; + env->total_mem = (unsigned long long)total_mem; return 0; } @@ -2527,13 +2534,14 @@ process_event_desc(struct feat_fd *ff, void *data __maybe_unused) static int process_cmdline(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; char *str, *cmdline = NULL, **argv = NULL; u32 nr, i, len = 0; if (do_read_u32(ff, &nr)) return -1; - ff->ph->env.nr_cmdline = nr; + env->nr_cmdline = nr; cmdline = zalloc(ff->size + nr + 1); if (!cmdline) @@ -2553,8 +2561,8 @@ static int process_cmdline(struct feat_fd *ff, void *data __maybe_unused) len += strlen(str) + 1; free(str); } - ff->ph->env.cmdline = cmdline; - ff->ph->env.cmdline_argv = (const char **) argv; + env->cmdline = cmdline; + env->cmdline_argv = (const char **) argv; return 0; error: @@ -2568,18 +2576,18 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) u32 nr, i; char *str = NULL; struct strbuf sb; - int cpu_nr = ff->ph->env.nr_cpus_avail; + struct perf_env *env = &ff->ph->env; + int cpu_nr = env->nr_cpus_avail; u64 size = 0; - struct perf_header *ph = ff->ph; - ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu)); - if (!ph->env.cpu) + env->cpu = calloc(cpu_nr, sizeof(*env->cpu)); + if (!env->cpu) return -1; if (do_read_u32(ff, &nr)) goto free_cpu; - ph->env.nr_sibling_cores = nr; + env->nr_sibling_cores = nr; size += sizeof(u32); if (strbuf_init(&sb, 128) < 0) goto free_cpu; @@ -2595,12 +2603,12 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) size += string_size(str); zfree(&str); } - ph->env.sibling_cores = strbuf_detach(&sb, NULL); + env->sibling_cores = strbuf_detach(&sb, NULL); if (do_read_u32(ff, &nr)) return -1; - ph->env.nr_sibling_threads = nr; + env->nr_sibling_threads = nr; size += sizeof(u32); for (i = 0; i < nr; i++) { @@ -2614,14 +2622,14 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) size += string_size(str); zfree(&str); } - ph->env.sibling_threads = strbuf_detach(&sb, NULL); + env->sibling_threads = strbuf_detach(&sb, NULL); /* * The header may be from old perf, * which doesn't include core id and socket id information. */ if (ff->size <= size) { - zfree(&ph->env.cpu); + zfree(&env->cpu); return 0; } @@ -2629,13 +2637,13 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) if (do_read_u32(ff, &nr)) goto free_cpu; - ph->env.cpu[i].core_id = nr; + env->cpu[i].core_id = nr; size += sizeof(u32); if (do_read_u32(ff, &nr)) goto free_cpu; - ph->env.cpu[i].socket_id = nr; + env->cpu[i].socket_id = nr; size += sizeof(u32); } @@ -2649,7 +2657,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) if (do_read_u32(ff, &nr)) return -1; - ph->env.nr_sibling_dies = nr; + env->nr_sibling_dies = nr; size += sizeof(u32); for (i = 0; i < nr; i++) { @@ -2663,13 +2671,13 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) size += string_size(str); zfree(&str); } - ph->env.sibling_dies = strbuf_detach(&sb, NULL); + env->sibling_dies = strbuf_detach(&sb, NULL); for (i = 0; i < (u32)cpu_nr; i++) { if (do_read_u32(ff, &nr)) goto free_cpu; - ph->env.cpu[i].die_id = nr; + env->cpu[i].die_id = nr; } return 0; @@ -2678,12 +2686,13 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) strbuf_release(&sb); zfree(&str); free_cpu: - zfree(&ph->env.cpu); + zfree(&env->cpu); return -1; } static int process_numa_topology(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; struct numa_node *nodes, *n; u32 nr, i; char *str; @@ -2718,8 +2727,8 @@ static int process_numa_topology(struct feat_fd *ff, void *data __maybe_unused) if (!n->map) goto error; } - ff->ph->env.nr_numa_nodes = nr; - ff->ph->env.numa_nodes = nodes; + env->nr_numa_nodes = nr; + env->numa_nodes = nodes; return 0; error: @@ -2729,6 +2738,7 @@ static int process_numa_topology(struct feat_fd *ff, void *data __maybe_unused) static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; char *name; u32 pmu_num; u32 type; @@ -2742,7 +2752,7 @@ static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused) return 0; } - ff->ph->env.nr_pmu_mappings = pmu_num; + env->nr_pmu_mappings = pmu_num; if (strbuf_init(&sb, 128) < 0) return -1; @@ -2761,14 +2771,14 @@ static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused) goto error; if (!strcmp(name, "msr")) - ff->ph->env.msr_pmu_type = type; + env->msr_pmu_type = type; free(name); pmu_num--; } /* AMD may set it by evlist__has_amd_ibs() from perf_session__new() */ - free(ff->ph->env.pmu_mappings); - ff->ph->env.pmu_mappings = strbuf_detach(&sb, NULL); + free(env->pmu_mappings); + env->pmu_mappings = strbuf_detach(&sb, NULL); return 0; error: @@ -2778,6 +2788,7 @@ static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused) static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; size_t ret = -1; u32 i, nr, nr_groups; struct perf_session *session; @@ -2791,7 +2802,7 @@ static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused) if (do_read_u32(ff, &nr_groups)) return -1; - ff->ph->env.nr_groups = nr_groups; + env->nr_groups = nr_groups; if (!nr_groups) { pr_debug("group desc not available\n"); return 0; @@ -2875,6 +2886,7 @@ static int process_auxtrace(struct feat_fd *ff, void *data __maybe_unused) static int process_cache(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; struct cpu_cache_level *caches; u32 cnt, i, version; @@ -2915,8 +2927,8 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused) #undef _R } - ff->ph->env.caches = caches; - ff->ph->env.caches_cnt = cnt; + env->caches = caches; + env->caches_cnt = cnt; return 0; out_free_caches: for (i = 0; i < cnt; i++) { @@ -2952,6 +2964,7 @@ static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused) static int process_mem_topology(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; struct memory_node *nodes; u64 version, i, nr, bsize; int ret = -1; @@ -2990,9 +3003,9 @@ static int process_mem_topology(struct feat_fd *ff, nodes[i] = n; } - ff->ph->env.memory_bsize = bsize; - ff->ph->env.memory_nodes = nodes; - ff->ph->env.nr_memory_nodes = nr; + env->memory_bsize = bsize; + env->memory_nodes = nodes; + env->nr_memory_nodes = nr; ret = 0; out: @@ -3004,7 +3017,9 @@ static int process_mem_topology(struct feat_fd *ff, static int process_clockid(struct feat_fd *ff, void *data __maybe_unused) { - if (do_read_u64(ff, &ff->ph->env.clock.clockid_res_ns)) + struct perf_env *env = &ff->ph->env; + + if (do_read_u64(ff, &env->clock.clockid_res_ns)) return -1; return 0; @@ -3013,6 +3028,7 @@ static int process_clockid(struct feat_fd *ff, static int process_clock_data(struct feat_fd *ff, void *_data __maybe_unused) { + struct perf_env *env = &ff->ph->env; u32 data32; u64 data64; @@ -3027,26 +3043,27 @@ static int process_clock_data(struct feat_fd *ff, if (do_read_u32(ff, &data32)) return -1; - ff->ph->env.clock.clockid = data32; + env->clock.clockid = data32; /* TOD ref time */ if (do_read_u64(ff, &data64)) return -1; - ff->ph->env.clock.tod_ns = data64; + env->clock.tod_ns = data64; /* clockid ref time */ if (do_read_u64(ff, &data64)) return -1; - ff->ph->env.clock.clockid_ns = data64; - ff->ph->env.clock.enabled = true; + env->clock.clockid_ns = data64; + env->clock.enabled = true; return 0; } static int process_hybrid_topology(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; struct hybrid_node *nodes, *n; u32 nr, i; @@ -3070,8 +3087,8 @@ static int process_hybrid_topology(struct feat_fd *ff, goto error; } - ff->ph->env.nr_hybrid_nodes = nr; - ff->ph->env.hybrid_nodes = nodes; + env->nr_hybrid_nodes = nr; + env->hybrid_nodes = nodes; return 0; error: @@ -3224,19 +3241,21 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) static int process_compressed(struct feat_fd *ff, void *data __maybe_unused) { - if (do_read_u32(ff, &(ff->ph->env.comp_ver))) + struct perf_env *env = &ff->ph->env; + + if (do_read_u32(ff, &(env->comp_ver))) return -1; - if (do_read_u32(ff, &(ff->ph->env.comp_type))) + if (do_read_u32(ff, &(env->comp_type))) return -1; - if (do_read_u32(ff, &(ff->ph->env.comp_level))) + if (do_read_u32(ff, &(env->comp_level))) return -1; - if (do_read_u32(ff, &(ff->ph->env.comp_ratio))) + if (do_read_u32(ff, &(env->comp_ratio))) return -1; - if (do_read_u32(ff, &(ff->ph->env.comp_mmap_len))) + if (do_read_u32(ff, &(env->comp_mmap_len))) return -1; return 0; @@ -3308,19 +3327,21 @@ static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps, static int process_cpu_pmu_caps(struct feat_fd *ff, void *data __maybe_unused) { - int ret = __process_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps, - &ff->ph->env.cpu_pmu_caps, - &ff->ph->env.max_branches, - &ff->ph->env.br_cntr_nr, - &ff->ph->env.br_cntr_width); + struct perf_env *env = &ff->ph->env; + int ret = __process_pmu_caps(ff, &env->nr_cpu_pmu_caps, + &env->cpu_pmu_caps, + &env->max_branches, + &env->br_cntr_nr, + &env->br_cntr_width); - if (!ret && !ff->ph->env.cpu_pmu_caps) + if (!ret && !env->cpu_pmu_caps) pr_debug("cpu pmu capabilities not available\n"); return ret; } static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; struct pmu_caps *pmu_caps; u32 nr_pmu, i; int ret; @@ -3358,8 +3379,8 @@ static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused) } } - ff->ph->env.nr_pmus_with_caps = nr_pmu; - ff->ph->env.pmu_caps = pmu_caps; + env->nr_pmus_with_caps = nr_pmu; + env->pmu_caps = pmu_caps; return 0; err: @@ -3657,6 +3678,7 @@ static int perf_session__do_write_header(struct perf_session *session, struct perf_header *header = &session->header; struct evsel *evsel; struct feat_fd ff = { + .ph = header, .fd = fd, }; u64 attr_offset = sizeof(f_header), attr_size = 0; From 5a156353e55e994627ac584e90b3b802e51e1ee2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:52 -0700 Subject: [PATCH 1069/2411] perf test: Avoid use perf_env The perf_env global variable holds the host perf_env data but its use is hit and miss. Switch to using local perf_env variables and ensure scoped perf_env__init and perf_env__exit. This loses command line setting of the perf_env, but this doesn't matter for tests. So the perf_env is fully initialized, clear it with memset in perf_env__init. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-13-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/tests/code-reading.c | 5 +++- tools/perf/tests/dlfilter-test.c | 50 ++++++++++++++++++-------------- tools/perf/util/env.c | 1 + 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 6efb6b4bbcce..0ec7004f90fe 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -651,11 +651,13 @@ static int do_test_code_reading(bool try_kcore) struct dso *dso; const char *events[] = { "cycles", "cycles:u", "cpu-clock", "cpu-clock:u", NULL }; int evidx = 0; + struct perf_env host_env; pid = getpid(); machine = machine__new_host(); - machine->env = &perf_env; + perf_env__init(&host_env); + machine->env = &host_env; ret = machine__create_kernel_maps(machine); if (ret < 0) { @@ -791,6 +793,7 @@ static int do_test_code_reading(bool try_kcore) perf_cpu_map__put(cpus); perf_thread_map__put(threads); machine__delete(machine); + perf_env__exit(&host_env); return err; } diff --git a/tools/perf/tests/dlfilter-test.c b/tools/perf/tests/dlfilter-test.c index 54f59d1246bc..6427e3382711 100644 --- a/tools/perf/tests/dlfilter-test.c +++ b/tools/perf/tests/dlfilter-test.c @@ -319,11 +319,12 @@ static int run_perf_script(struct test_data *td) static int test__dlfilter_test(struct test_data *td) { + struct perf_env host_env; u64 sample_type = TEST_SAMPLE_TYPE; pid_t pid = 12345; pid_t tid = 12346; u64 id = 99; - int err; + int err = TEST_OK; if (get_dlfilters_path(td->name, td->dlfilters, PATH_MAX)) return test_result("dlfilters not found", TEST_SKIP); @@ -353,37 +354,42 @@ static int test__dlfilter_test(struct test_data *td) pr_debug("Creating new host machine structure\n"); td->machine = machine__new_host(); - td->machine->env = &perf_env; + perf_env__init(&host_env); + td->machine->env = &host_env; td->fd = creat(td->perf_data_file_name, 0644); if (td->fd < 0) return test_result("Failed to create test perf.data file", TEST_FAIL); err = perf_header__write_pipe(td->fd); - if (err < 0) - return test_result("perf_header__write_pipe() failed", TEST_FAIL); - + if (err < 0) { + err = test_result("perf_header__write_pipe() failed", TEST_FAIL); + goto out; + } err = write_attr(td, sample_type, &id); - if (err) - return test_result("perf_event__synthesize_attr() failed", TEST_FAIL); - - if (write_comm(td->fd, pid, tid, "test-prog")) - return TEST_FAIL; - - if (write_mmap(td->fd, pid, tid, MAP_START, 0x10000, 0, td->prog_file_name)) - return TEST_FAIL; - - if (write_sample(td, sample_type, id, pid, tid) != TEST_OK) - return TEST_FAIL; - + if (err) { + err = test_result("perf_event__synthesize_attr() failed", TEST_FAIL); + goto out; + } + if (write_comm(td->fd, pid, tid, "test-prog")) { + err = TEST_FAIL; + goto out; + } + if (write_mmap(td->fd, pid, tid, MAP_START, 0x10000, 0, td->prog_file_name)) { + err = TEST_FAIL; + goto out; + } + if (write_sample(td, sample_type, id, pid, tid) != TEST_OK) { + err = TEST_FAIL; + goto out; + } if (verbose > 1) system_cmd("%s script -i %s -D", td->perf, td->perf_data_file_name); - err = run_perf_script(td); - if (err) - return TEST_FAIL; - - return TEST_OK; + err = run_perf_script(td) ? TEST_FAIL : TEST_OK; +out: + perf_env__exit(&host_env); + return err; } static void unlink_path(const char *path) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index ee51378fb0d9..c09159083bf0 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -271,6 +271,7 @@ void perf_env__exit(struct perf_env *env) void perf_env__init(struct perf_env *env) { + memset(env, 0, sizeof(*env)); #ifdef HAVE_LIBBPF_SUPPORT env->bpf_progs.infos = RB_ROOT; env->bpf_progs.btfs = RB_ROOT; From 740f7ba1e3be5d6f192dafc5efd0bd0a8e8567e2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:53 -0700 Subject: [PATCH 1070/2411] perf session: Add host_env argument to perf_session__new When creating a perf_session the host perf_env may or may not want to be used. For example, `perf top` uses a host perf_env while `perf inject` does not. Add a host_env argument to perf_session__new so that sessions requiring a host perf_env can pass it in. Currently if none is specified the global perf_env variable is used, but this will change in later patches. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-14-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-inject.c | 3 ++- tools/perf/util/session.c | 5 +++-- tools/perf/util/session.h | 5 +++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index f73350a3417a..40ba6a94f719 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -2539,7 +2539,8 @@ int cmd_inject(int argc, const char **argv) inject.tool.bpf_metadata = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.session = __perf_session__new(&data, &inject.tool, - /*trace_event_repipe=*/inject.output.is_pipe); + /*trace_event_repipe=*/inject.output.is_pipe, + /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a851d9130abd..36532329a633 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -138,7 +138,8 @@ static int ordered_events__deliver_event(struct ordered_events *oe, struct perf_session *__perf_session__new(struct perf_data *data, struct perf_tool *tool, - bool trace_event_repipe) + bool trace_event_repipe, + struct perf_env *host_env) { int ret = -ENOMEM; struct perf_session *session = zalloc(sizeof(*session)); @@ -191,7 +192,7 @@ struct perf_session *__perf_session__new(struct perf_data *data, symbol_conf.kallsyms_name = perf_data__kallsyms_name(data); } } else { - session->machines.host.env = &perf_env; + session->machines.host.env = host_env ?: &perf_env; } if (session->evlist) session->evlist->session = session; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index e7f7464b838f..cf88d65a25cb 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -107,12 +107,13 @@ struct perf_tool; struct perf_session *__perf_session__new(struct perf_data *data, struct perf_tool *tool, - bool trace_event_repipe); + bool trace_event_repipe, + struct perf_env *host_env); static inline struct perf_session *perf_session__new(struct perf_data *data, struct perf_tool *tool) { - return __perf_session__new(data, tool, /*trace_event_repipe=*/false); + return __perf_session__new(data, tool, /*trace_event_repipe=*/false, /*host_env=*/NULL); } void perf_session__delete(struct perf_session *session); From aaa23571fe4bb7fb7549ad09dd56de5ca1bd289d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:54 -0700 Subject: [PATCH 1071/2411] perf top: Make perf_env locally scoped The use of the global host perf_env variable is potentially inconsistent within the code. Switch perf top to using a locally scoped variable that is generally accessed through the session. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-15-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-top.c | 41 +++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 2760971d4c97..e9743f17bd0c 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1301,7 +1301,7 @@ static int __cmd_top(struct perf_top *top) perf_set_multithreaded(); if (perf_hpp_list.socket) { - ret = perf_env__read_cpu_topology_map(&perf_env); + ret = perf_env__read_cpu_topology_map(perf_session__env(top->session)); if (ret < 0) { char errbuf[BUFSIZ]; const char *err = str_error_r(-ret, errbuf, sizeof(errbuf)); @@ -1624,6 +1624,7 @@ int cmd_top(int argc, const char **argv) NULL }; int status = hists__init(); + struct perf_env host_env; if (status < 0) return status; @@ -1637,14 +1638,19 @@ int cmd_top(int argc, const char **argv) if (top.evlist == NULL) return -ENOMEM; + perf_env__init(&host_env); status = perf_config(perf_top_config, &top); if (status) - return status; + goto out_delete_evlist; /* * Since the per arch annotation init routine may need the cpuid, read * it here, since we are not getting this from the perf.data header. */ - status = perf_env__read_cpuid(&perf_env); + status = perf_env__set_cmdline(&host_env, argc, argv); + if (status) + goto out_delete_evlist; + + status = perf_env__read_cpuid(&host_env); if (status) { /* * Some arches do not provide a get_cpuid(), so just use pr_debug, otherwise @@ -1661,18 +1667,24 @@ int cmd_top(int argc, const char **argv) if (disassembler_style) { annotate_opts.disassembler_style = strdup(disassembler_style); - if (!annotate_opts.disassembler_style) - return -ENOMEM; + if (!annotate_opts.disassembler_style) { + status = -ENOMEM; + goto out_delete_evlist; + } } if (objdump_path) { annotate_opts.objdump_path = strdup(objdump_path); - if (!annotate_opts.objdump_path) - return -ENOMEM; + if (!annotate_opts.objdump_path) { + status = -ENOMEM; + goto out_delete_evlist; + } } if (addr2line_path) { symbol_conf.addr2line_path = strdup(addr2line_path); - if (!symbol_conf.addr2line_path) - return -ENOMEM; + if (!symbol_conf.addr2line_path) { + status = -ENOMEM; + goto out_delete_evlist; + } } status = symbol__validate_sym_arguments(); @@ -1735,7 +1747,7 @@ int cmd_top(int argc, const char **argv) symbol_conf.show_branchflag_count = true; if (opts->branch_stack) { - status = perf_env__read_core_pmu_caps(&perf_env); + status = perf_env__read_core_pmu_caps(&host_env); if (status) { pr_err("PMU capability data is not available\n"); goto out_delete_evlist; @@ -1829,14 +1841,16 @@ int cmd_top(int argc, const char **argv) perf_top__update_print_entries(&top); signal(SIGWINCH, winch_sig); } - top.session->env = &perf_env; - top.session = perf_session__new(NULL, NULL); + top.session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL, + /*trace_event_repipe=*/false, + &host_env); if (IS_ERR(top.session)) { status = PTR_ERR(top.session); top.session = NULL; goto out_delete_evlist; } + top.evlist->session = top.session; if (!evlist__needs_bpf_sb_event(top.evlist)) top.record_opts.no_bpf_event = true; @@ -1851,7 +1865,7 @@ int cmd_top(int argc, const char **argv) goto out_delete_evlist; } - if (evlist__add_bpf_sb_event(top.sb_evlist, &perf_env)) { + if (evlist__add_bpf_sb_event(top.sb_evlist, &host_env)) { pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); status = -EINVAL; goto out_delete_evlist; @@ -1873,6 +1887,7 @@ int cmd_top(int argc, const char **argv) evlist__delete(top.evlist); perf_session__delete(top.session); annotation_options__exit(); + perf_env__exit(&host_env); return status; } From aa91baa09b2a3c38deff05b83410ce86833258d5 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:55 -0700 Subject: [PATCH 1072/2411] perf bench synthesize: Avoid use of global perf_env The benchmark doesn't use a data file and so the header perf_env isn't used. Stack allocate a host perf_env for use to avoid the use of the global perf_env. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-16-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/bench/synthesize.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c index 9b333276cbdb..b3d493697675 100644 --- a/tools/perf/bench/synthesize.c +++ b/tools/perf/bench/synthesize.c @@ -114,12 +114,16 @@ static int run_single_threaded(void) .pid = "self", }; struct perf_thread_map *threads; + struct perf_env host_env; int err; perf_set_singlethreaded(); - session = perf_session__new(NULL, NULL); + perf_env__init(&host_env); + session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL, + /*trace_event_repipe=*/false, &host_env); if (IS_ERR(session)) { pr_err("Session creation failed.\n"); + perf_env__exit(&host_env); return PTR_ERR(session); } threads = thread_map__new_by_pid(getpid()); @@ -144,6 +148,7 @@ static int run_single_threaded(void) perf_thread_map__put(threads); perf_session__delete(session); + perf_env__exit(&host_env); return err; } @@ -154,17 +159,21 @@ static int do_run_multi_threaded(struct target *target, u64 runtime_us; unsigned int i; double time_average, time_stddev, event_average, event_stddev; - int err; + int err = 0; struct stats time_stats, event_stats; struct perf_session *session; + struct perf_env host_env; + perf_env__init(&host_env); init_stats(&time_stats); init_stats(&event_stats); for (i = 0; i < multi_iterations; i++) { - session = perf_session__new(NULL, NULL); - if (IS_ERR(session)) - return PTR_ERR(session); - + session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL, + /*trace_event_repipe=*/false, &host_env); + if (IS_ERR(session)) { + err = PTR_ERR(session); + goto err_out; + } atomic_set(&event_count, 0); gettimeofday(&start, NULL); err = __machine__synthesize_threads(&session->machines.host, @@ -175,7 +184,7 @@ static int do_run_multi_threaded(struct target *target, nr_threads_synthesize); if (err) { perf_session__delete(session); - return err; + goto err_out; } gettimeofday(&end, NULL); @@ -198,7 +207,9 @@ static int do_run_multi_threaded(struct target *target, printf(" Average time per event %.3f usec\n", time_average / event_average); - return 0; +err_out: + perf_env__exit(&host_env); + return err; } static int run_multi_threaded(void) From e481066388fe8003916461a54bf0ecffc02505a8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:56 -0700 Subject: [PATCH 1073/2411] perf machine: Explicitly pass in host perf_env When creating a machine for the host explicitly pass in a scoped perf_env. This removes a use of the global perf_env. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-17-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-buildid-list.c | 5 ++++- tools/perf/builtin-kallsyms.c | 21 ++++++++++++++++----- tools/perf/builtin-trace.c | 24 +++++++++++++++++------- tools/perf/tests/code-reading.c | 3 +-- tools/perf/tests/dlfilter-test.c | 3 +-- tools/perf/tests/dwarf-unwind.c | 10 +++++++--- tools/perf/tests/mmap-thread-lookup.c | 6 +++++- tools/perf/tests/symbols.c | 8 +++++++- tools/perf/util/debug.c | 9 ++++++++- tools/perf/util/machine.c | 16 ++++++++-------- tools/perf/util/machine.h | 6 +++--- tools/perf/util/probe-event.c | 5 ++++- 12 files changed, 81 insertions(+), 35 deletions(-) diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 151cd84b6dfe..a91bbb34ac94 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -45,11 +45,14 @@ static int buildid__map_cb(struct map *map, void *arg __maybe_unused) static void buildid__show_kernel_maps(void) { + struct perf_env host_env; struct machine *machine; - machine = machine__new_host(); + perf_env__init(&host_env); + machine = machine__new_host(&host_env); machine__for_each_kernel_map(machine, buildid__map_cb, NULL); machine__delete(machine); + perf_env__exit(&host_env); } static int sysfs__fprintf_build_id(FILE *fp) diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c index a3c2ffdc1af8..3c4339982b16 100644 --- a/tools/perf/builtin-kallsyms.c +++ b/tools/perf/builtin-kallsyms.c @@ -12,18 +12,28 @@ #include #include "debug.h" #include "dso.h" +#include "env.h" #include "machine.h" #include "map.h" #include "symbol.h" static int __cmd_kallsyms(int argc, const char **argv) { - int i; - struct machine *machine = machine__new_kallsyms(); + int i, err; + struct perf_env host_env; + struct machine *machine = NULL; + + perf_env__init(&host_env); + err = perf_env__set_cmdline(&host_env, argc, argv); + if (err) + goto out; + + machine = machine__new_kallsyms(&host_env); if (machine == NULL) { pr_err("Couldn't read /proc/kallsyms\n"); - return -1; + err = -1; + goto out; } for (i = 0; i < argc; ++i) { @@ -42,9 +52,10 @@ static int __cmd_kallsyms(int argc, const char **argv) map__unmap_ip(map, symbol->start), map__unmap_ip(map, symbol->end), symbol->start, symbol->end); } - +out: machine__delete(machine); - return 0; + perf_env__exit(&host_env); + return err; } int cmd_kallsyms(int argc, const char **argv) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0261f4eefe6d..5b53571de400 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -140,6 +140,7 @@ struct syscall_fmt { }; struct trace { + struct perf_env host_env; struct perf_tool tool; struct { /** Sorted sycall numbers used by the trace. */ @@ -1977,17 +1978,24 @@ static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long l return machine__resolve_kernel_addr(vmachine, addrp, modp); } -static int trace__symbols_init(struct trace *trace, struct evlist *evlist) +static int trace__symbols_init(struct trace *trace, int argc, const char **argv, + struct evlist *evlist) { int err = symbol__init(NULL); if (err) return err; - trace->host = machine__new_host(); - if (trace->host == NULL) - return -ENOMEM; + perf_env__init(&trace->host_env); + err = perf_env__set_cmdline(&trace->host_env, argc, argv); + if (err) + goto out; + trace->host = machine__new_host(&trace->host_env); + if (trace->host == NULL) { + err = -ENOMEM; + goto out; + } thread__set_priv_destructor(thread_trace__delete); err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr); @@ -1998,9 +2006,10 @@ static int trace__symbols_init(struct trace *trace, struct evlist *evlist) evlist->core.threads, trace__tool_process, true, false, 1); out: - if (err) + if (err) { + perf_env__exit(&trace->host_env); symbol__exit(); - + } return err; } @@ -2009,6 +2018,7 @@ static void trace__symbols__exit(struct trace *trace) machine__exit(trace->host); trace->host = NULL; + perf_env__exit(&trace->host_env); symbol__exit(); } @@ -4428,7 +4438,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out_delete_evlist; } - err = trace__symbols_init(trace, evlist); + err = trace__symbols_init(trace, argc, argv, evlist); if (err < 0) { fprintf(trace->output, "Problems initializing symbol libraries!\n"); goto out_delete_evlist; diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 0ec7004f90fe..9c2091310191 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -655,9 +655,8 @@ static int do_test_code_reading(bool try_kcore) pid = getpid(); - machine = machine__new_host(); perf_env__init(&host_env); - machine->env = &host_env; + machine = machine__new_host(&host_env); ret = machine__create_kernel_maps(machine); if (ret < 0) { diff --git a/tools/perf/tests/dlfilter-test.c b/tools/perf/tests/dlfilter-test.c index 6427e3382711..80a1c941138d 100644 --- a/tools/perf/tests/dlfilter-test.c +++ b/tools/perf/tests/dlfilter-test.c @@ -353,9 +353,8 @@ static int test__dlfilter_test(struct test_data *td) return test_result("Failed to find program symbols", TEST_FAIL); pr_debug("Creating new host machine structure\n"); - td->machine = machine__new_host(); perf_env__init(&host_env); - td->machine->env = &host_env; + td->machine = machine__new_host(&host_env); td->fd = creat(td->perf_data_file_name, 0644); if (td->fd < 0) diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 525c46b7971a..9ed78d00fb87 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -7,6 +7,7 @@ #include #include "tests.h" #include "debug.h" +#include "env.h" #include "machine.h" #include "event.h" #include "../util/unwind.h" @@ -180,6 +181,7 @@ NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__krava_1(struct thread *th noinline int test__dwarf_unwind(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { + struct perf_env host_env; struct machine *machine; struct thread *thread; int err = -1; @@ -188,15 +190,16 @@ noinline int test__dwarf_unwind(struct test_suite *test __maybe_unused, callchain_param.record_mode = CALLCHAIN_DWARF; dwarf_callchain_users = true; - machine = machine__new_live(/*kernel_maps=*/true, pid); + perf_env__init(&host_env); + machine = machine__new_live(&host_env, /*kernel_maps=*/true, pid); if (!machine) { pr_err("Could not get machine\n"); - return -1; + goto out; } if (machine__create_kernel_maps(machine)) { pr_err("Failed to create kernel maps\n"); - return -1; + goto out; } if (verbose > 1) @@ -213,6 +216,7 @@ noinline int test__dwarf_unwind(struct test_suite *test __maybe_unused, out: machine__delete(machine); + perf_env__exit(&host_env); return err; } diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 446a3615d720..0c5619c6e6e9 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -8,6 +8,7 @@ #include #include #include "debug.h" +#include "env.h" #include "event.h" #include "tests.h" #include "machine.h" @@ -155,6 +156,7 @@ static int synth_process(struct machine *machine) static int mmap_events(synth_cb synth) { + struct perf_env host_env; struct machine *machine; int err, i; @@ -167,7 +169,8 @@ static int mmap_events(synth_cb synth) */ TEST_ASSERT_VAL("failed to create threads", !threads_create()); - machine = machine__new_host(); + perf_env__init(&host_env); + machine = machine__new_host(&host_env); dump_trace = verbose > 1 ? 1 : 0; @@ -209,6 +212,7 @@ static int mmap_events(synth_cb synth) } machine__delete(machine); + perf_env__exit(&host_env); return err; } diff --git a/tools/perf/tests/symbols.c b/tools/perf/tests/symbols.c index b07fdf831868..f4ffe5804f40 100644 --- a/tools/perf/tests/symbols.c +++ b/tools/perf/tests/symbols.c @@ -5,6 +5,7 @@ #include #include "debug.h" #include "dso.h" +#include "env.h" #include "machine.h" #include "thread.h" #include "symbol.h" @@ -13,15 +14,18 @@ #include "tests.h" struct test_info { + struct perf_env host_env; struct machine *machine; struct thread *thread; }; static int init_test_info(struct test_info *ti) { - ti->machine = machine__new_host(); + perf_env__init(&ti->host_env); + ti->machine = machine__new_host(&ti->host_env); if (!ti->machine) { pr_debug("machine__new_host() failed!\n"); + perf_env__exit(&ti->host_env); return TEST_FAIL; } @@ -29,6 +33,7 @@ static int init_test_info(struct test_info *ti) ti->thread = machine__findnew_thread(ti->machine, 100, 100); if (!ti->thread) { pr_debug("machine__findnew_thread() failed!\n"); + perf_env__exit(&ti->host_env); return TEST_FAIL; } @@ -39,6 +44,7 @@ static void exit_test_info(struct test_info *ti) { thread__put(ti->thread); machine__delete(ti->machine); + perf_env__exit(&ti->host_env); } struct dso_map { diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 2878a7363ac8..1dfa4d0eec4d 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -17,6 +17,7 @@ #include "addr_location.h" #include "color.h" #include "debug.h" +#include "env.h" #include "event.h" #include "machine.h" #include "map.h" @@ -309,8 +310,12 @@ void __dump_stack(FILE *file, void **stackdump, size_t stackdump_size) { /* TODO: async safety. printf, malloc, etc. aren't safe inside a signal handler. */ pid_t pid = getpid(); - struct machine *machine = machine__new_live(/*kernel_maps=*/false, pid); + struct machine *machine; struct thread *thread = NULL; + struct perf_env host_env; + + perf_env__init(&host_env); + machine = machine__new_live(&host_env, /*kernel_maps=*/false, pid); if (machine) thread = machine__find_thread(machine, pid, pid); @@ -323,6 +328,7 @@ void __dump_stack(FILE *file, void **stackdump, size_t stackdump_size) */ backtrace_symbols_fd(stackdump, stackdump_size, fileno(file)); machine__delete(machine); + perf_env__exit(&host_env); return; } #endif @@ -349,6 +355,7 @@ void __dump_stack(FILE *file, void **stackdump, size_t stackdump_size) } thread__put(thread); machine__delete(machine); + perf_env__exit(&host_env); } /* Obtain a backtrace and print it to stdout. */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 2ef8c1cfae1e..b5dd42588c91 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -129,7 +129,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) return 0; } -static struct machine *__machine__new_host(bool kernel_maps) +static struct machine *__machine__new_host(struct perf_env *host_env, bool kernel_maps) { struct machine *machine = malloc(sizeof(*machine)); @@ -142,13 +142,13 @@ static struct machine *__machine__new_host(bool kernel_maps) free(machine); return NULL; } - machine->env = &perf_env; + machine->env = host_env; return machine; } -struct machine *machine__new_host(void) +struct machine *machine__new_host(struct perf_env *host_env) { - return __machine__new_host(/*kernel_maps=*/true); + return __machine__new_host(host_env, /*kernel_maps=*/true); } static int mmap_handler(const struct perf_tool *tool __maybe_unused, @@ -168,9 +168,9 @@ static int machine__init_live(struct machine *machine, pid_t pid) mmap_handler, machine, true); } -struct machine *machine__new_live(bool kernel_maps, pid_t pid) +struct machine *machine__new_live(struct perf_env *host_env, bool kernel_maps, pid_t pid) { - struct machine *machine = __machine__new_host(kernel_maps); + struct machine *machine = __machine__new_host(host_env, kernel_maps); if (!machine) return NULL; @@ -182,9 +182,9 @@ struct machine *machine__new_live(bool kernel_maps, pid_t pid) return machine; } -struct machine *machine__new_kallsyms(void) +struct machine *machine__new_kallsyms(struct perf_env *host_env) { - struct machine *machine = machine__new_host(); + struct machine *machine = machine__new_host(host_env); /* * FIXME: * 1) We should switch to machine__load_kallsyms(), i.e. not explicitly diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 180b369c366c..22a42c5825fa 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -169,9 +169,9 @@ struct thread *machine__findnew_guest_code(struct machine *machine, pid_t pid); void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size); void machines__set_comm_exec(struct machines *machines, bool comm_exec); -struct machine *machine__new_host(void); -struct machine *machine__new_kallsyms(void); -struct machine *machine__new_live(bool kernel_maps, pid_t pid); +struct machine *machine__new_host(struct perf_env *host_env); +struct machine *machine__new_kallsyms(struct perf_env *host_env); +struct machine *machine__new_live(struct perf_env *host_env, bool kernel_maps, pid_t pid); int machine__init(struct machine *machine, const char *root_dir, pid_t pid); void machine__exit(struct machine *machine); void machine__delete_threads(struct machine *machine); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 57ad150f8c43..6ab2eb551b6c 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -75,12 +75,14 @@ int e_snprintf(char *str, size_t size, const char *format, ...) } static struct machine *host_machine; +static struct perf_env host_env; /* Initialize symbol maps and path of vmlinux/modules */ int init_probe_symbol_maps(bool user_only) { int ret; + perf_env__init(&host_env); symbol_conf.allow_aliases = true; ret = symbol__init(NULL); if (ret < 0) { @@ -94,7 +96,7 @@ int init_probe_symbol_maps(bool user_only) if (symbol_conf.vmlinux_name) pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name); - host_machine = machine__new_host(); + host_machine = machine__new_host(&host_env); if (!host_machine) { pr_debug("machine__new_host() failed.\n"); symbol__exit(); @@ -111,6 +113,7 @@ void exit_probe_symbol_maps(void) machine__delete(host_machine); host_machine = NULL; symbol__exit(); + perf_env__exit(&host_env); } static struct ref_reloc_sym *kernel_get_ref_reloc_sym(struct map **pmap) From 69ac7472d28a21057275a396193f1bdcce6ba962 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:57 -0700 Subject: [PATCH 1074/2411] perf auxtrace: Pass perf_env from session through to mmap read auxtrace_mmap__read and auxtrace_mmap__read_snapshot end up calling `evsel__env(NULL)` which returns the global perf_env variable for the host. Their only call is in perf record. Rather than use the global variable pass through the perf_env for `perf record`. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-18-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-record.c | 8 ++++++-- tools/perf/util/auxtrace.c | 13 +++++++------ tools/perf/util/auxtrace.h | 6 ++++-- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 8a829ddff6f2..7ea3a11aca70 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -775,7 +775,9 @@ static int record__auxtrace_mmap_read(struct record *rec, { int ret; - ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, + ret = auxtrace_mmap__read(map, rec->itr, + perf_session__env(rec->session), + &rec->tool, record__process_auxtrace); if (ret < 0) return ret; @@ -791,7 +793,9 @@ static int record__auxtrace_mmap_read_snapshot(struct record *rec, { int ret; - ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, + ret = auxtrace_mmap__read_snapshot(map, rec->itr, + perf_session__env(rec->session), + &rec->tool, record__process_auxtrace, rec->opts.auxtrace_snapshot_size); if (ret < 0) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 03211c2623de..ebd32f1b8f12 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1890,7 +1890,7 @@ int __weak compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail) } static int __auxtrace_mmap__read(struct mmap *map, - struct auxtrace_record *itr, + struct auxtrace_record *itr, struct perf_env *env, const struct perf_tool *tool, process_auxtrace_t fn, bool snapshot, size_t snapshot_size) { @@ -1900,7 +1900,7 @@ static int __auxtrace_mmap__read(struct mmap *map, size_t size, head_off, old_off, len1, len2, padding; union perf_event ev; void *data1, *data2; - int kernel_is_64_bit = perf_env__kernel_is_64_bit(evsel__env(NULL)); + int kernel_is_64_bit = perf_env__kernel_is_64_bit(env); head = auxtrace_mmap__read_head(mm, kernel_is_64_bit); @@ -2002,17 +2002,18 @@ static int __auxtrace_mmap__read(struct mmap *map, } int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr, - const struct perf_tool *tool, process_auxtrace_t fn) + struct perf_env *env, const struct perf_tool *tool, + process_auxtrace_t fn) { - return __auxtrace_mmap__read(map, itr, tool, fn, false, 0); + return __auxtrace_mmap__read(map, itr, env, tool, fn, false, 0); } int auxtrace_mmap__read_snapshot(struct mmap *map, - struct auxtrace_record *itr, + struct auxtrace_record *itr, struct perf_env *env, const struct perf_tool *tool, process_auxtrace_t fn, size_t snapshot_size) { - return __auxtrace_mmap__read(map, itr, tool, fn, true, snapshot_size); + return __auxtrace_mmap__read(map, itr, env, tool, fn, true, snapshot_size); } /** diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index b0db84d27b25..f001cbb68f8e 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -23,6 +23,7 @@ union perf_event; struct perf_session; struct evlist; struct evsel; +struct perf_env; struct perf_tool; struct mmap; struct perf_sample; @@ -512,10 +513,11 @@ typedef int (*process_auxtrace_t)(const struct perf_tool *tool, size_t len1, void *data2, size_t len2); int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr, - const struct perf_tool *tool, process_auxtrace_t fn); + struct perf_env *env, const struct perf_tool *tool, + process_auxtrace_t fn); int auxtrace_mmap__read_snapshot(struct mmap *map, - struct auxtrace_record *itr, + struct auxtrace_record *itr, struct perf_env *env, const struct perf_tool *tool, process_auxtrace_t fn, size_t snapshot_size); From 003a86bce0728ad160bcb7c7566a4d40aee3c235 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:58 -0700 Subject: [PATCH 1075/2411] perf trace: Avoid global perf_env with evsel__env There is no session in perf trace unless in replay mode, so in host mode no session can be associated with the evlist. If the evsel__env call fails resort to the host_env that's part of the trace. Remove errno_to_name as it becomes a called once 1-line function once the argument is turned into a perf_env, just call perf_env__arch_strerrno directly. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-19-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-trace.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5b53571de400..fe737b3ac6e6 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2898,13 +2898,6 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam return sample__fprintf_callchain(sample, 38, print_opts, get_tls_callchain_cursor(), symbol_conf.bt_stop_list, trace->output); } -static const char *errno_to_name(struct evsel *evsel, int err) -{ - struct perf_env *env = evsel__env(evsel); - - return perf_env__arch_strerrno(env, err); -} - static int trace__sys_exit(struct trace *trace, struct evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -2990,8 +2983,9 @@ static int trace__sys_exit(struct trace *trace, struct evsel *evsel, } else if (ret < 0) { errno_print: { char bf[STRERR_BUFSIZE]; - const char *emsg = str_error_r(-ret, bf, sizeof(bf)), - *e = errno_to_name(evsel, -ret); + struct perf_env *env = evsel__env(evsel) ?: &trace->host_env; + const char *emsg = str_error_r(-ret, bf, sizeof(bf)); + const char *e = perf_env__arch_strerrno(env, err); fprintf(trace->output, "-1 %s (%s)", e, emsg); } From 525a599badeeafba88a4fa0f913e5cf87e2d51ec Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:32:59 -0700 Subject: [PATCH 1076/2411] perf env: Remove global perf_env The global perf_env was used for the host, but if a perf_env wasn't easy to come by it was used in a lot of places where potentially recorded and host data could be confused. Remove the global variable as now the majority of accesses retrieve the perf_env for the host from the session. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-20-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/perf.c | 3 --- tools/perf/util/bpf-event.c | 2 +- tools/perf/util/env.c | 2 -- tools/perf/util/env.h | 2 -- tools/perf/util/evsel.c | 2 +- tools/perf/util/session.c | 3 ++- 6 files changed, 4 insertions(+), 10 deletions(-) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index f0617cc41f5f..88c60ecf3395 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -346,12 +346,9 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) use_pager = 1; commit_pager_choice(); - perf_env__init(&perf_env); - perf_env__set_cmdline(&perf_env, argc, argv); status = p->fn(argc, argv); perf_config__exit(); exit_browser(status); - perf_env__exit(&perf_env); if (status) return status & 0xff; diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 664f361ef8c1..5b6d3e899e11 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -549,7 +549,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, * for perf-record and perf-report use header.env; * otherwise, use global perf_env. */ - env = session->data ? perf_session__env(session) : &perf_env; + env = perf_session__env(session); arrays = 1UL << PERF_BPIL_JITED_KSYMS; arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS; diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index c09159083bf0..c8c248754621 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -19,8 +19,6 @@ #include "strbuf.h" #include "trace/beauty/beauty.h" -struct perf_env perf_env; - #ifdef HAVE_LIBBPF_SUPPORT #include "bpf-event.h" #include "bpf-utils.h" diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index d8df59072529..e00179787a34 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -150,8 +150,6 @@ enum perf_compress_type { struct bpf_prog_info_node; struct btf_node; -extern struct perf_env perf_env; - int perf_env__read_core_pmu_caps(struct perf_env *env); void perf_env__exit(struct perf_env *env); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 3f766f240cc7..aa6efcc4404c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3882,7 +3882,7 @@ struct perf_env *evsel__env(struct evsel *evsel) { struct perf_session *session = evsel__session(evsel); - return session ? perf_session__env(session) : &perf_env; + return session ? perf_session__env(session) : NULL; } static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 36532329a633..2a79e6844f36 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -192,7 +192,8 @@ struct perf_session *__perf_session__new(struct perf_data *data, symbol_conf.kallsyms_name = perf_data__kallsyms_name(data); } } else { - session->machines.host.env = host_env ?: &perf_env; + assert(host_env != NULL); + session->machines.host.env = host_env; } if (session->evlist) session->evlist->session = session; From 8882095b1d4d785524a7a4df8e04e35cfd039142 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:33:00 -0700 Subject: [PATCH 1077/2411] perf sample: Remove arch notion of sample parsing By definition arch sample parsing and synthesis will inhibit certain kinds of cross-platform record then analysis (report, script, etc.). Remove arch_perf_parse_sample_weight and arch_perf_synthesize_sample_weight replacing with a common implementation. Combine perf_sample p_stage_cyc and retire_lat as weight3 to capture the differing uses regardless of compiled for architecture. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-21-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/arch/powerpc/util/event.c | 26 --------------------- tools/perf/arch/x86/tests/sample-parsing.c | 4 ++-- tools/perf/arch/x86/util/event.c | 27 ---------------------- tools/perf/builtin-script.c | 2 +- tools/perf/util/dlfilter.c | 2 +- tools/perf/util/event.h | 2 -- tools/perf/util/evsel.c | 17 ++++++++++---- tools/perf/util/hist.c | 4 ++-- tools/perf/util/hist.h | 3 ++- tools/perf/util/intel-tpebs.c | 4 ++-- tools/perf/util/sample.h | 6 ++--- tools/perf/util/session.c | 2 +- tools/perf/util/sort.c | 7 +++--- tools/perf/util/synthetic-events.c | 10 ++++++-- 14 files changed, 36 insertions(+), 80 deletions(-) diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c index 77d8cc2b5691..024ac8b54c33 100644 --- a/tools/perf/arch/powerpc/util/event.c +++ b/tools/perf/arch/powerpc/util/event.c @@ -11,32 +11,6 @@ #include "../../../util/debug.h" #include "../../../util/sample.h" -void arch_perf_parse_sample_weight(struct perf_sample *data, - const __u64 *array, u64 type) -{ - union perf_sample_weight weight; - - weight.full = *array; - if (type & PERF_SAMPLE_WEIGHT) - data->weight = weight.full; - else { - data->weight = weight.var1_dw; - data->ins_lat = weight.var2_w; - data->p_stage_cyc = weight.var3_w; - } -} - -void arch_perf_synthesize_sample_weight(const struct perf_sample *data, - __u64 *array, u64 type) -{ - *array = data->weight; - - if (type & PERF_SAMPLE_WEIGHT_STRUCT) { - *array &= 0xffffffff; - *array |= ((u64)data->ins_lat << 32); - } -} - const char *arch_perf_header_entry(const char *se_header) { if (!strcmp(se_header, "Local INSTR Latency")) diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c index a061e8619267..22feec23e53d 100644 --- a/tools/perf/arch/x86/tests/sample-parsing.c +++ b/tools/perf/arch/x86/tests/sample-parsing.c @@ -29,7 +29,7 @@ static bool samples_same(const struct perf_sample *s1, { if (type & PERF_SAMPLE_WEIGHT_STRUCT) { COMP(ins_lat); - COMP(retire_lat); + COMP(weight3); } return true; @@ -50,7 +50,7 @@ static int do_test(u64 sample_type) struct perf_sample sample = { .weight = 101, .ins_lat = 102, - .retire_lat = 103, + .weight3 = 103, }; struct perf_sample sample_out; size_t i, sz, bufsz; diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index a0400707180c..576c1c36046c 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -92,33 +92,6 @@ int perf_event__synthesize_extra_kmaps(const struct perf_tool *tool, #endif -void arch_perf_parse_sample_weight(struct perf_sample *data, - const __u64 *array, u64 type) -{ - union perf_sample_weight weight; - - weight.full = *array; - if (type & PERF_SAMPLE_WEIGHT) - data->weight = weight.full; - else { - data->weight = weight.var1_dw; - data->ins_lat = weight.var2_w; - data->retire_lat = weight.var3_w; - } -} - -void arch_perf_synthesize_sample_weight(const struct perf_sample *data, - __u64 *array, u64 type) -{ - *array = data->weight; - - if (type & PERF_SAMPLE_WEIGHT_STRUCT) { - *array &= 0xffffffff; - *array |= ((u64)data->ins_lat << 32); - *array |= ((u64)data->retire_lat << 48); - } -} - const char *arch_perf_header_entry(const char *se_header) { if (!strcmp(se_header, "Local Pipeline Stage Cycle")) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index f2b5620165b4..d9fbdcf72f25 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2252,7 +2252,7 @@ static void process_event(struct perf_script *script, fprintf(fp, "%16" PRIu16, sample->ins_lat); if (PRINT_FIELD(RETIRE_LAT)) - fprintf(fp, "%16" PRIu16, sample->retire_lat); + fprintf(fp, "%16" PRIu16, sample->weight3); if (PRINT_FIELD(CGROUP)) { const char *cgrp_name; diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c index ddacef881af2..c0afcbd954f8 100644 --- a/tools/perf/util/dlfilter.c +++ b/tools/perf/util/dlfilter.c @@ -513,6 +513,7 @@ int dlfilter__do_filter_event(struct dlfilter *d, d->d_addr_al = &d_addr_al; d_sample.size = sizeof(d_sample); + d_sample.p_stage_cyc = sample->weight3; d_ip_al.size = 0; /* To indicate d_ip_al is not initialized */ d_addr_al.size = 0; /* To indicate d_addr_al is not initialized */ @@ -526,7 +527,6 @@ int dlfilter__do_filter_event(struct dlfilter *d, ASSIGN(period); ASSIGN(weight); ASSIGN(ins_lat); - ASSIGN(p_stage_cyc); ASSIGN(transaction); ASSIGN(insn_cnt); ASSIGN(cyc_cnt); diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 67ad4a2014bc..b13385a6068b 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -391,8 +391,6 @@ extern unsigned int proc_map_timeout; #define PAGE_SIZE_NAME_LEN 32 char *get_page_size_name(u64 size, char *str); -void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type); -void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type); const char *arch_perf_header_entry(const char *se_header); int arch_support_sort_key(const char *sort_key); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index aa6efcc4404c..3d27e9bdd66b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2880,11 +2880,18 @@ perf_event__check_size(union perf_event *event, unsigned int sample_size) return 0; } -void __weak arch_perf_parse_sample_weight(struct perf_sample *data, - const __u64 *array, - u64 type __maybe_unused) +static void perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type) { - data->weight = *array; + union perf_sample_weight weight; + + weight.full = *array; + if (type & PERF_SAMPLE_WEIGHT_STRUCT) { + data->weight = weight.var1_dw; + data->ins_lat = weight.var2_w; + data->weight3 = weight.var3_w; + } else { + data->weight = weight.full; + } } u64 evsel__bitfield_swap_branch_flags(u64 value) @@ -3270,7 +3277,7 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, if (type & PERF_SAMPLE_WEIGHT_TYPE) { OVERFLOW_CHECK_u64(array); - arch_perf_parse_sample_weight(data, array, type); + perf_parse_sample_weight(data, array, type); array++; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index afc6855327ab..64ff427040c3 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -829,7 +829,7 @@ __hists__add_entry(struct hists *hists, .period = sample->period, .weight1 = sample->weight, .weight2 = sample->ins_lat, - .weight3 = sample->p_stage_cyc, + .weight3 = sample->weight3, .latency = al->latency, }, .parent = sym_parent, @@ -846,7 +846,7 @@ __hists__add_entry(struct hists *hists, .time = hist_time(sample->time), .weight = sample->weight, .ins_lat = sample->ins_lat, - .p_stage_cyc = sample->p_stage_cyc, + .weight3 = sample->weight3, .simd_flags = sample->simd_flags, }, *he = hists__findnew_entry(hists, &entry, al, sample_self); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index c64254088fc7..70438d03ca9c 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -255,7 +255,8 @@ struct hist_entry { u64 code_page_size; u64 weight; u64 ins_lat; - u64 p_stage_cyc; + /** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */ + u64 weight3; s32 socket; s32 cpu; int parallelism; diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c index 3b92ebf5c112..8c9aee157ec4 100644 --- a/tools/perf/util/intel-tpebs.c +++ b/tools/perf/util/intel-tpebs.c @@ -210,8 +210,8 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused, * latency value will be used. Save the number of samples and the sum of * retire latency value for each event. */ - t->last = sample->retire_lat; - update_stats(&t->stats, sample->retire_lat); + t->last = sample->weight3; + update_stats(&t->stats, sample->weight3); mutex_unlock(tpebs_mtx_get()); return 0; } diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h index 0e96240052e9..fae834144ef4 100644 --- a/tools/perf/util/sample.h +++ b/tools/perf/util/sample.h @@ -104,10 +104,8 @@ struct perf_sample { u8 cpumode; u16 misc; u16 ins_lat; - union { - u16 p_stage_cyc; - u16 retire_lat; - }; + /** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */ + u16 weight3; bool no_hw_idx; /* No hw_idx collected in branch_stack */ char insn[MAX_INSN]; void *raw_data; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 2a79e6844f36..26ae078278cd 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1099,7 +1099,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, printf("... weight: %" PRIu64 "", sample->weight); if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { printf(",0x%"PRIx16"", sample->ins_lat); - printf(",0x%"PRIx16"", sample->p_stage_cyc); + printf(",0x%"PRIx16"", sample->weight3); } printf("\n"); } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 7969d64a47bf..0ba2ce1b1c07 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1884,21 +1884,20 @@ struct sort_entry sort_global_ins_lat = { static int64_t sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) { - return left->p_stage_cyc - right->p_stage_cyc; + return left->weight3 - right->weight3; } static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, - he->p_stage_cyc * he->stat.nr_events); + return repsep_snprintf(bf, size, "%-*u", width, he->weight3 * he->stat.nr_events); } static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc); + return repsep_snprintf(bf, size, "%-*u", width, he->weight3); } struct sort_entry sort_local_p_stage_cyc = { diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index e7ca3f5eb493..cb2c1ace304a 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1573,10 +1573,16 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, return result; } -void __weak arch_perf_synthesize_sample_weight(const struct perf_sample *data, +static void perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type __maybe_unused) { *array = data->weight; + + if (type & PERF_SAMPLE_WEIGHT_STRUCT) { + *array &= 0xffffffff; + *array |= ((u64)data->ins_lat << 32); + *array |= ((u64)data->weight3 << 48); + } } static __u64 *copy_read_group_values(__u64 *array, __u64 read_format, @@ -1736,7 +1742,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo } if (type & PERF_SAMPLE_WEIGHT_TYPE) { - arch_perf_synthesize_sample_weight(sample, array, type); + perf_synthesize_sample_weight(sample, array, type); array++; } From a563c9f3bb8c23416f3e72edfbc75d1a4937f7e0 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:33:01 -0700 Subject: [PATCH 1078/2411] perf test: Move PERF_SAMPLE_WEIGHT_STRUCT parsing to common test test__x86_sample_parsing is identical to test__sample_parsing except it explicitly tested PERF_SAMPLE_WEIGHT_STRUCT. Now the parsing code is common move the PERF_SAMPLE_WEIGHT_STRUCT to the common sample parsing test and remove the x86 version. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-22-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/arch/x86/include/arch-tests.h | 1 - tools/perf/arch/x86/tests/Build | 1 - tools/perf/arch/x86/tests/arch-tests.c | 2 - tools/perf/arch/x86/tests/sample-parsing.c | 125 --------------------- tools/perf/tests/sample-parsing.c | 14 +++ 5 files changed, 14 insertions(+), 129 deletions(-) delete mode 100644 tools/perf/arch/x86/tests/sample-parsing.c diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index 8713e9122d4c..7d65b9e51840 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -14,7 +14,6 @@ int test__insn_x86(struct test_suite *test, int subtest); int test__intel_pt_pkt_decoder(struct test_suite *test, int subtest); int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest); int test__bp_modify(struct test_suite *test, int subtest); -int test__x86_sample_parsing(struct test_suite *test, int subtest); int test__amd_ibs_via_core_pmu(struct test_suite *test, int subtest); int test__amd_ibs_period(struct test_suite *test, int subtest); int test__hybrid(struct test_suite *test, int subtest); diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 311b6b53d3d8..7790b3e20f4e 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -2,7 +2,6 @@ perf-test-$(CONFIG_DWARF_UNWIND) += regs_load.o perf-test-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o perf-test-y += arch-tests.o -perf-test-y += sample-parsing.o perf-test-y += hybrid.o perf-test-$(CONFIG_AUXTRACE) += intel-pt-test.o ifeq ($(CONFIG_EXTRA_TESTS),y) diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index 29ec1861ccef..8f9cfeaa170f 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -23,7 +23,6 @@ struct test_suite suite__intel_pt = { #if defined(__x86_64__) DEFINE_SUITE("x86 bp modify", bp_modify); #endif -DEFINE_SUITE("x86 Sample parsing", x86_sample_parsing); DEFINE_SUITE("AMD IBS via core pmu", amd_ibs_via_core_pmu); DEFINE_SUITE_EXCLUSIVE("AMD IBS sample period", amd_ibs_period); static struct test_case hybrid_tests[] = { @@ -49,7 +48,6 @@ struct test_suite *arch_tests[] = { #if defined(__x86_64__) &suite__bp_modify, #endif - &suite__x86_sample_parsing, &suite__amd_ibs_via_core_pmu, &suite__amd_ibs_period, &suite__hybrid, diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c deleted file mode 100644 index 22feec23e53d..000000000000 --- a/tools/perf/arch/x86/tests/sample-parsing.c +++ /dev/null @@ -1,125 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include -#include -#include -#include -#include -#include -#include - -#include "event.h" -#include "evsel.h" -#include "debug.h" -#include "util/sample.h" -#include "util/synthetic-events.h" - -#include "tests/tests.h" -#include "arch-tests.h" - -#define COMP(m) do { \ - if (s1->m != s2->m) { \ - pr_debug("Samples differ at '"#m"'\n"); \ - return false; \ - } \ -} while (0) - -static bool samples_same(const struct perf_sample *s1, - const struct perf_sample *s2, - u64 type) -{ - if (type & PERF_SAMPLE_WEIGHT_STRUCT) { - COMP(ins_lat); - COMP(weight3); - } - - return true; -} - -static int do_test(u64 sample_type) -{ - struct evsel evsel = { - .needs_swap = false, - .core = { - . attr = { - .sample_type = sample_type, - .read_format = 0, - }, - }, - }; - union perf_event *event; - struct perf_sample sample = { - .weight = 101, - .ins_lat = 102, - .weight3 = 103, - }; - struct perf_sample sample_out; - size_t i, sz, bufsz; - int err, ret = -1; - - sz = perf_event__sample_event_size(&sample, sample_type, 0); - bufsz = sz + 4096; /* Add a bit for overrun checking */ - event = malloc(bufsz); - if (!event) { - pr_debug("malloc failed\n"); - return -1; - } - - memset(event, 0xff, bufsz); - event->header.type = PERF_RECORD_SAMPLE; - event->header.misc = 0; - event->header.size = sz; - - err = perf_event__synthesize_sample(event, sample_type, 0, &sample); - if (err) { - pr_debug("%s failed for sample_type %#"PRIx64", error %d\n", - "perf_event__synthesize_sample", sample_type, err); - goto out_free; - } - - /* The data does not contain 0xff so we use that to check the size */ - for (i = bufsz; i > 0; i--) { - if (*(i - 1 + (u8 *)event) != 0xff) - break; - } - if (i != sz) { - pr_debug("Event size mismatch: actual %zu vs expected %zu\n", - i, sz); - goto out_free; - } - - evsel.sample_size = __evsel__sample_size(sample_type); - - err = evsel__parse_sample(&evsel, event, &sample_out); - if (err) { - pr_debug("%s failed for sample_type %#"PRIx64", error %d\n", - "evsel__parse_sample", sample_type, err); - goto out_free; - } - - if (!samples_same(&sample, &sample_out, sample_type)) { - pr_debug("parsing failed for sample_type %#"PRIx64"\n", - sample_type); - goto out_free; - } - - ret = 0; -out_free: - free(event); - - return ret; -} - -/** - * test__x86_sample_parsing - test X86 specific sample parsing - * - * This function implements a test that synthesizes a sample event, parses it - * and then checks that the parsed sample matches the original sample. If the - * test passes %0 is returned, otherwise %-1 is returned. - * - * For now, the PERF_SAMPLE_WEIGHT_STRUCT is the only X86 specific sample type. - * The test only checks the PERF_SAMPLE_WEIGHT_STRUCT type. - */ -int test__x86_sample_parsing(struct test_suite *test __maybe_unused, int subtest __maybe_unused) -{ - return do_test(PERF_SAMPLE_WEIGHT_STRUCT); -} diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 72411580f869..a7327c942ca2 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -152,6 +152,12 @@ static bool samples_same(struct perf_sample *s1, if (type & PERF_SAMPLE_WEIGHT) COMP(weight); + if (type & PERF_SAMPLE_WEIGHT_STRUCT) { + COMP(weight); + COMP(ins_lat); + COMP(weight3); + } + if (type & PERF_SAMPLE_DATA_SRC) COMP(data_src); @@ -269,6 +275,8 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .cgroup = 114, .data_page_size = 115, .code_page_size = 116, + .ins_lat = 117, + .weight3 = 118, .aux_sample = { .size = sizeof(aux_data), .data = (void *)aux_data, @@ -439,6 +447,12 @@ static int test__sample_parsing(struct test_suite *test __maybe_unused, int subt if (err) return err; } + sample_type = (PERF_SAMPLE_MAX - 1) & ~PERF_SAMPLE_WEIGHT_STRUCT; + for (i = 0; i < ARRAY_SIZE(rf); i++) { + err = do_test(sample_type, sample_regs, rf[i]); + if (err) + return err; + } return 0; } From 6e19839a80b8713b836722ba9d99a3ab12cfb651 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:33:02 -0700 Subject: [PATCH 1079/2411] perf sort: Use perf_env to set arch sort keys and header Previously arch_support_sort_key and arch_perf_header_entry used a weak symbol to compile as appropriate for x86 and powerpc. A limitation to this is that the handling of a data file could vary in cross-platform development. Change to using the perf_env of the current session to determine the architecture kind and set the sort key and header entries as appropriate. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-23-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/arch/powerpc/util/Build | 1 - tools/perf/arch/powerpc/util/event.c | 34 ---------------- tools/perf/arch/x86/util/event.c | 19 --------- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-c2c.c | 53 ++++++++++++++----------- tools/perf/builtin-diff.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-top.c | 22 +++++------ tools/perf/tests/hists_cumulate.c | 8 ++-- tools/perf/tests/hists_filter.c | 8 ++-- tools/perf/tests/hists_link.c | 8 ++-- tools/perf/tests/hists_output.c | 10 ++--- tools/perf/util/event.h | 3 -- tools/perf/util/sort.c | 59 ++++++++++++++++++++-------- tools/perf/util/sort.h | 5 ++- 15 files changed, 106 insertions(+), 130 deletions(-) delete mode 100644 tools/perf/arch/powerpc/util/event.c diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index ed82715080f9..fdd6a77a3432 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -5,7 +5,6 @@ perf-util-y += mem-events.o perf-util-y += pmu.o perf-util-y += sym-handling.o perf-util-y += evsel.o -perf-util-y += event.o perf-util-$(CONFIG_LIBDW) += skip-callchain-idx.o diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c deleted file mode 100644 index 024ac8b54c33..000000000000 --- a/tools/perf/arch/powerpc/util/event.c +++ /dev/null @@ -1,34 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include - -#include "../../../util/event.h" -#include "../../../util/synthetic-events.h" -#include "../../../util/machine.h" -#include "../../../util/tool.h" -#include "../../../util/map.h" -#include "../../../util/debug.h" -#include "../../../util/sample.h" - -const char *arch_perf_header_entry(const char *se_header) -{ - if (!strcmp(se_header, "Local INSTR Latency")) - return "Finish Cyc"; - else if (!strcmp(se_header, "INSTR Latency")) - return "Global Finish_cyc"; - else if (!strcmp(se_header, "Local Pipeline Stage Cycle")) - return "Dispatch Cyc"; - else if (!strcmp(se_header, "Pipeline Stage Cycle")) - return "Global Dispatch_cyc"; - return se_header; -} - -int arch_support_sort_key(const char *sort_key) -{ - if (!strcmp(sort_key, "p_stage_cyc")) - return 1; - if (!strcmp(sort_key, "local_p_stage_cyc")) - return 1; - return 0; -} diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index 576c1c36046c..3cd384317739 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -91,22 +91,3 @@ int perf_event__synthesize_extra_kmaps(const struct perf_tool *tool, } #endif - -const char *arch_perf_header_entry(const char *se_header) -{ - if (!strcmp(se_header, "Local Pipeline Stage Cycle")) - return "Local Retire Latency"; - else if (!strcmp(se_header, "Pipeline Stage Cycle")) - return "Retire Latency"; - - return se_header; -} - -int arch_support_sort_key(const char *sort_key) -{ - if (!strcmp(sort_key, "p_stage_cyc")) - return 1; - if (!strcmp(sort_key, "local_p_stage_cyc")) - return 1; - return 0; -} diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 326593862998..5d57d2913f3d 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -947,7 +947,7 @@ int cmd_annotate(int argc, const char **argv) annotate_opts.show_br_cntr = true; } - if (setup_sorting(NULL) < 0) + if (setup_sorting(/*evlist=*/NULL, perf_session__env(annotate.session)) < 0) usage_with_options(annotate_usage, options); ret = __cmd_annotate(&annotate); diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 8cb36d9433f8..9e9ff471ddd1 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -195,12 +195,14 @@ static struct hist_entry_ops c2c_entry_ops = { static int c2c_hists__init(struct c2c_hists *hists, const char *sort, - int nr_header_lines); + int nr_header_lines, + struct perf_env *env); static struct c2c_hists* he__get_c2c_hists(struct hist_entry *he, const char *sort, - int nr_header_lines) + int nr_header_lines, + struct perf_env *env) { struct c2c_hist_entry *c2c_he; struct c2c_hists *hists; @@ -214,7 +216,7 @@ he__get_c2c_hists(struct hist_entry *he, if (!hists) return NULL; - ret = c2c_hists__init(hists, sort, nr_header_lines); + ret = c2c_hists__init(hists, sort, nr_header_lines, env); if (ret) { free(hists); return NULL; @@ -350,7 +352,7 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused, mi = mi_dup; - c2c_hists = he__get_c2c_hists(he, c2c.cl_sort, 2); + c2c_hists = he__get_c2c_hists(he, c2c.cl_sort, 2, machine->env); if (!c2c_hists) goto free_mi; @@ -1966,7 +1968,8 @@ static struct c2c_fmt *get_format(const char *name) return c2c_fmt; } -static int c2c_hists__init_output(struct perf_hpp_list *hpp_list, char *name) +static int c2c_hists__init_output(struct perf_hpp_list *hpp_list, char *name, + struct perf_env *env __maybe_unused) { struct c2c_fmt *c2c_fmt = get_format(name); int level = 0; @@ -1980,14 +1983,14 @@ static int c2c_hists__init_output(struct perf_hpp_list *hpp_list, char *name) return 0; } -static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name) +static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name, struct perf_env *env) { struct c2c_fmt *c2c_fmt = get_format(name); struct c2c_dimension *dim; if (!c2c_fmt) { reset_dimensions(); - return sort_dimension__add(hpp_list, name, NULL, 0); + return sort_dimension__add(hpp_list, name, /*evlist=*/NULL, env, /*level=*/0); } dim = c2c_fmt->dim; @@ -2008,7 +2011,7 @@ static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name) \ for (tok = strtok_r((char *)_list, ", ", &tmp); \ tok; tok = strtok_r(NULL, ", ", &tmp)) { \ - ret = _fn(hpp_list, tok); \ + ret = _fn(hpp_list, tok, env); \ if (ret == -EINVAL) { \ pr_err("Invalid --fields key: `%s'", tok); \ break; \ @@ -2021,7 +2024,8 @@ static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name) static int hpp_list__parse(struct perf_hpp_list *hpp_list, const char *output_, - const char *sort_) + const char *sort_, + struct perf_env *env) { char *output = output_ ? strdup(output_) : NULL; char *sort = sort_ ? strdup(sort_) : NULL; @@ -2052,7 +2056,8 @@ static int hpp_list__parse(struct perf_hpp_list *hpp_list, static int c2c_hists__init(struct c2c_hists *hists, const char *sort, - int nr_header_lines) + int nr_header_lines, + struct perf_env *env) { __hists__init(&hists->hists, &hists->list); @@ -2066,15 +2071,16 @@ static int c2c_hists__init(struct c2c_hists *hists, /* Overload number of header lines.*/ hists->list.nr_header_lines = nr_header_lines; - return hpp_list__parse(&hists->list, NULL, sort); + return hpp_list__parse(&hists->list, /*output=*/NULL, sort, env); } static int c2c_hists__reinit(struct c2c_hists *c2c_hists, const char *output, - const char *sort) + const char *sort, + struct perf_env *env) { perf_hpp__reset_output_field(&c2c_hists->list); - return hpp_list__parse(&c2c_hists->list, output, sort); + return hpp_list__parse(&c2c_hists->list, output, sort, env); } #define DISPLAY_LINE_LIMIT 0.001 @@ -2207,8 +2213,9 @@ static int filter_cb(struct hist_entry *he, void *arg __maybe_unused) return 0; } -static int resort_cl_cb(struct hist_entry *he, void *arg __maybe_unused) +static int resort_cl_cb(struct hist_entry *he, void *arg) { + struct perf_env *env = arg; struct c2c_hist_entry *c2c_he; struct c2c_hists *c2c_hists; bool display = he__display(he, &c2c.shared_clines_stats); @@ -2222,7 +2229,7 @@ static int resort_cl_cb(struct hist_entry *he, void *arg __maybe_unused) c2c_he->cacheline_idx = idx++; calc_width(c2c_he); - c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort); + c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort, env); hists__collapse_resort(&c2c_hists->hists, NULL); hists__output_resort_cb(&c2c_hists->hists, NULL, filter_cb); @@ -2334,7 +2341,7 @@ static int resort_shared_cl_cb(struct hist_entry *he, void *arg __maybe_unused) return 0; } -static int hists__iterate_cb(struct hists *hists, hists__resort_cb_t cb) +static int hists__iterate_cb(struct hists *hists, hists__resort_cb_t cb, void *arg) { struct rb_node *next = rb_first_cached(&hists->entries); int ret = 0; @@ -2343,7 +2350,7 @@ static int hists__iterate_cb(struct hists *hists, hists__resort_cb_t cb) struct hist_entry *he; he = rb_entry(next, struct hist_entry, rb_node); - ret = cb(he, NULL); + ret = cb(he, arg); if (ret) break; next = rb_next(&he->rb_node); @@ -2449,7 +2456,7 @@ static void print_cacheline(struct c2c_hists *c2c_hists, hists__fprintf(&c2c_hists->hists, false, 0, 0, 0, out, false); } -static void print_pareto(FILE *out) +static void print_pareto(FILE *out, struct perf_env *env) { struct perf_hpp_list hpp_list; struct rb_node *nd; @@ -2474,7 +2481,7 @@ static void print_pareto(FILE *out) "dcacheline"; perf_hpp_list__init(&hpp_list); - ret = hpp_list__parse(&hpp_list, cl_output, NULL); + ret = hpp_list__parse(&hpp_list, cl_output, /*evlist=*/NULL, env); if (WARN_ONCE(ret, "failed to setup sort entries\n")) return; @@ -2539,7 +2546,7 @@ static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session) fprintf(out, "=================================================\n"); fprintf(out, "#\n"); - print_pareto(out); + print_pareto(out, perf_session__env(session)); } #ifdef HAVE_SLANG_SUPPORT @@ -3097,7 +3104,7 @@ static int perf_c2c__report(int argc, const char **argv) goto out_session; } - err = c2c_hists__init(&c2c.hists, "dcacheline", 2); + err = c2c_hists__init(&c2c.hists, "dcacheline", 2, perf_session__env(session)); if (err) { pr_debug("Failed to initialize hists\n"); goto out_session; @@ -3181,13 +3188,13 @@ static int perf_c2c__report(int argc, const char **argv) else if (c2c.display == DISPLAY_SNP_PEER) sort_str = "tot_peer"; - c2c_hists__reinit(&c2c.hists, output_str, sort_str); + c2c_hists__reinit(&c2c.hists, output_str, sort_str, perf_session__env(session)); ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting..."); hists__collapse_resort(&c2c.hists.hists, NULL); hists__output_resort_cb(&c2c.hists.hists, &prog, resort_shared_cl_cb); - hists__iterate_cb(&c2c.hists.hists, resort_cl_cb); + hists__iterate_cb(&c2c.hists.hists, resort_cl_cb, perf_session__env(session)); ui_progress__finish(); diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index ae490d58af92..53d5ea4a6a4f 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -2003,7 +2003,7 @@ int cmd_diff(int argc, const char **argv) sort__mode = SORT_MODE__DIFF; } - if (setup_sorting(NULL) < 0) + if (setup_sorting(/*evlist=*/NULL, perf_session__env(data__files[0].session)) < 0) usage_with_options(diff_usage, options); setup_pager(); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ada8e0166c78..35df04dad2fd 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1790,7 +1790,7 @@ int cmd_report(int argc, const char **argv) } if ((last_key != K_SWITCH_INPUT_DATA && last_key != K_RELOAD) && - (setup_sorting(session->evlist) < 0)) { + (setup_sorting(session->evlist, perf_session__env(session)) < 0)) { if (sort_order) parse_options_usage(report_usage, options, "s", 1); if (field_order) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index e9743f17bd0c..a11f629c7d76 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1767,7 +1767,17 @@ int cmd_top(int argc, const char **argv) setup_browser(false); - if (setup_sorting(top.evlist) < 0) { + top.session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL, + /*trace_event_repipe=*/false, + &host_env); + if (IS_ERR(top.session)) { + status = PTR_ERR(top.session); + top.session = NULL; + goto out_delete_evlist; + } + top.evlist->session = top.session; + + if (setup_sorting(top.evlist, perf_session__env(top.session)) < 0) { if (sort_order) parse_options_usage(top_usage, options, "s", 1); if (field_order) @@ -1842,16 +1852,6 @@ int cmd_top(int argc, const char **argv) signal(SIGWINCH, winch_sig); } - top.session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL, - /*trace_event_repipe=*/false, - &host_env); - if (IS_ERR(top.session)) { - status = PTR_ERR(top.session); - top.session = NULL; - goto out_delete_evlist; - } - top.evlist->session = top.session; - if (!evlist__needs_bpf_sb_event(top.evlist)) top.record_opts.no_bpf_event = true; diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 1e0f5a310fd5..3eb9ef8d7ec6 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -295,7 +295,7 @@ static int test1(struct evsel *evsel, struct machine *machine) symbol_conf.cumulate_callchain = false; evsel__reset_sample_bit(evsel, CALLCHAIN); - setup_sorting(NULL); + setup_sorting(/*evlist=*/NULL, machine->env); callchain_register_param(&callchain_param); err = add_hist_entries(hists, machine); @@ -442,7 +442,7 @@ static int test2(struct evsel *evsel, struct machine *machine) symbol_conf.cumulate_callchain = false; evsel__set_sample_bit(evsel, CALLCHAIN); - setup_sorting(NULL); + setup_sorting(/*evlist=*/NULL, machine->env); callchain_register_param(&callchain_param); err = add_hist_entries(hists, machine); @@ -500,7 +500,7 @@ static int test3(struct evsel *evsel, struct machine *machine) symbol_conf.cumulate_callchain = true; evsel__reset_sample_bit(evsel, CALLCHAIN); - setup_sorting(NULL); + setup_sorting(/*evlist=*/NULL, machine->env); callchain_register_param(&callchain_param); err = add_hist_entries(hists, machine); @@ -684,7 +684,7 @@ static int test4(struct evsel *evsel, struct machine *machine) symbol_conf.cumulate_callchain = true; evsel__set_sample_bit(evsel, CALLCHAIN); - setup_sorting(NULL); + setup_sorting(/*evlist=*/NULL, machine->env); callchain_param = callchain_param_default; callchain_register_param(&callchain_param); diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 4b2e4f2fbe48..1cebd20cc91c 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -131,10 +131,6 @@ static int test__hists_filter(struct test_suite *test __maybe_unused, int subtes goto out; err = TEST_FAIL; - /* default sort order (comm,dso,sym) will be used */ - if (setup_sorting(NULL) < 0) - goto out; - machines__init(&machines); /* setup threads/dso/map/symbols also */ @@ -145,6 +141,10 @@ static int test__hists_filter(struct test_suite *test __maybe_unused, int subtes if (verbose > 1) machine__fprintf(machine, stderr); + /* default sort order (comm,dso,sym) will be used */ + if (setup_sorting(evlist, machine->env) < 0) + goto out; + /* process sample events */ err = add_hist_entries(evlist, machine); if (err < 0) diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 5b6f1e883466..996f5f0b3bd1 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -303,10 +303,6 @@ static int test__hists_link(struct test_suite *test __maybe_unused, int subtest goto out; err = TEST_FAIL; - /* default sort order (comm,dso,sym) will be used */ - if (setup_sorting(NULL) < 0) - goto out; - machines__init(&machines); /* setup threads/dso/map/symbols also */ @@ -317,6 +313,10 @@ static int test__hists_link(struct test_suite *test __maybe_unused, int subtest if (verbose > 1) machine__fprintf(machine, stderr); + /* default sort order (comm,dso,sym) will be used */ + if (setup_sorting(evlist, machine->env) < 0) + goto out; + /* process sample events */ err = add_hist_entries(evlist, machine); if (err < 0) diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index 33b5cc8352a7..ee5ec8bda60e 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -146,7 +146,7 @@ static int test1(struct evsel *evsel, struct machine *machine) field_order = NULL; sort_order = NULL; /* equivalent to sort_order = "comm,dso,sym" */ - setup_sorting(NULL); + setup_sorting(/*evlist=*/NULL, machine->env); /* * expected output: @@ -248,7 +248,7 @@ static int test2(struct evsel *evsel, struct machine *machine) field_order = "overhead,cpu"; sort_order = "pid"; - setup_sorting(NULL); + setup_sorting(/*evlist=*/NULL, machine->env); /* * expected output: @@ -304,7 +304,7 @@ static int test3(struct evsel *evsel, struct machine *machine) field_order = "comm,overhead,dso"; sort_order = NULL; - setup_sorting(NULL); + setup_sorting(/*evlist=*/NULL, machine->env); /* * expected output: @@ -378,7 +378,7 @@ static int test4(struct evsel *evsel, struct machine *machine) field_order = "dso,sym,comm,overhead,dso"; sort_order = "sym"; - setup_sorting(NULL); + setup_sorting(/*evlist=*/NULL, machine->env); /* * expected output: @@ -480,7 +480,7 @@ static int test5(struct evsel *evsel, struct machine *machine) field_order = "cpu,pid,comm,dso,sym"; sort_order = "dso,pid"; - setup_sorting(NULL); + setup_sorting(/*evlist=*/NULL, machine->env); /* * expected output: diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index b13385a6068b..e40d16d3246c 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -391,9 +391,6 @@ extern unsigned int proc_map_timeout; #define PAGE_SIZE_NAME_LEN 32 char *get_page_size_name(u64 size, char *str); -const char *arch_perf_header_entry(const char *se_header); -int arch_support_sort_key(const char *sort_key); - static inline bool perf_event_header__cpumode_is_guest(u8 cpumode) { return cpumode == PERF_RECORD_MISC_GUEST_KERNEL || diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 0ba2ce1b1c07..f3a565b0e230 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2530,19 +2530,44 @@ struct sort_dimension { int taken; }; -int __weak arch_support_sort_key(const char *sort_key __maybe_unused) +static int arch_support_sort_key(const char *sort_key, struct perf_env *env) { + const char *arch = perf_env__arch(env); + + if (!strcmp("x86", arch) || !strcmp("powerpc", arch)) { + if (!strcmp(sort_key, "p_stage_cyc")) + return 1; + if (!strcmp(sort_key, "local_p_stage_cyc")) + return 1; + } return 0; } -const char * __weak arch_perf_header_entry(const char *se_header) +static const char *arch_perf_header_entry(const char *se_header, struct perf_env *env) { + const char *arch = perf_env__arch(env); + + if (!strcmp("x86", arch)) { + if (!strcmp(se_header, "Local Pipeline Stage Cycle")) + return "Local Retire Latency"; + else if (!strcmp(se_header, "Pipeline Stage Cycle")) + return "Retire Latency"; + } else if (!strcmp("powerpc", arch)) { + if (!strcmp(se_header, "Local INSTR Latency")) + return "Finish Cyc"; + else if (!strcmp(se_header, "INSTR Latency")) + return "Global Finish_cyc"; + else if (!strcmp(se_header, "Local Pipeline Stage Cycle")) + return "Dispatch Cyc"; + else if (!strcmp(se_header, "Pipeline Stage Cycle")) + return "Global Dispatch_cyc"; + } return se_header; } -static void sort_dimension_add_dynamic_header(struct sort_dimension *sd) +static void sort_dimension_add_dynamic_header(struct sort_dimension *sd, struct perf_env *env) { - sd->entry->se_header = arch_perf_header_entry(sd->entry->se_header); + sd->entry->se_header = arch_perf_header_entry(sd->entry->se_header, env); } #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) } @@ -3594,7 +3619,7 @@ int hpp_dimension__add_output(unsigned col, bool implicit) } int sort_dimension__add(struct perf_hpp_list *list, const char *tok, - struct evlist *evlist, + struct evlist *evlist, struct perf_env *env, int level) { unsigned int i, j; @@ -3607,7 +3632,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, */ for (j = 0; j < ARRAY_SIZE(arch_specific_sort_keys); j++) { if (!strcmp(arch_specific_sort_keys[j], tok) && - !arch_support_sort_key(tok)) { + !arch_support_sort_key(tok, env)) { return 0; } } @@ -3620,7 +3645,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) { if (sd->name && !strcmp(dynamic_headers[j], sd->name)) - sort_dimension_add_dynamic_header(sd); + sort_dimension_add_dynamic_header(sd, env); } if (sd->entry == &sort_parent && parent_pattern) { @@ -3716,13 +3741,13 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, } /* This should match with sort_dimension__add() above */ -static bool is_hpp_sort_key(const char *key) +static bool is_hpp_sort_key(const char *key, struct perf_env *env) { unsigned i; for (i = 0; i < ARRAY_SIZE(arch_specific_sort_keys); i++) { if (!strcmp(arch_specific_sort_keys[i], key) && - !arch_support_sort_key(key)) { + !arch_support_sort_key(key, env)) { return false; } } @@ -3744,7 +3769,7 @@ static bool is_hpp_sort_key(const char *key) } static int setup_sort_list(struct perf_hpp_list *list, char *str, - struct evlist *evlist) + struct evlist *evlist, struct perf_env *env) { char *tmp, *tok; int ret = 0; @@ -3773,7 +3798,7 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str, } if (*tok) { - if (is_hpp_sort_key(tok)) { + if (is_hpp_sort_key(tok, env)) { /* keep output (hpp) sort keys in the same level */ if (prev_was_hpp) { bool next_same = (level == next_level); @@ -3786,7 +3811,7 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str, prev_was_hpp = false; } - ret = sort_dimension__add(list, tok, evlist, level); + ret = sort_dimension__add(list, tok, evlist, env, level); if (ret == -EINVAL) { if (!cacheline_size() && !strncasecmp(tok, "dcacheline", strlen(tok))) ui__error("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system"); @@ -3915,7 +3940,7 @@ static char *setup_overhead(char *keys) return keys; } -static int __setup_sorting(struct evlist *evlist) +static int __setup_sorting(struct evlist *evlist, struct perf_env *env) { char *str; const char *sort_keys; @@ -3955,7 +3980,7 @@ static int __setup_sorting(struct evlist *evlist) } } - ret = setup_sort_list(&perf_hpp_list, str, evlist); + ret = setup_sort_list(&perf_hpp_list, str, evlist, env); free(str); return ret; @@ -4191,16 +4216,16 @@ static int __setup_output_field(void) return ret; } -int setup_sorting(struct evlist *evlist) +int setup_sorting(struct evlist *evlist, struct perf_env *env) { int err; - err = __setup_sorting(evlist); + err = __setup_sorting(evlist, env); if (err < 0) return err; if (parent_pattern != default_parent_pattern) { - err = sort_dimension__add(&perf_hpp_list, "parent", evlist, -1); + err = sort_dimension__add(&perf_hpp_list, "parent", evlist, env, -1); if (err < 0) return err; } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index a742ab7f3c67..d7787958e06b 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -6,6 +6,7 @@ #include "hist.h" struct option; +struct perf_env; extern regex_t parent_regex; extern const char *sort_order; @@ -130,7 +131,7 @@ extern struct sort_entry sort_thread; struct evlist; struct tep_handle; -int setup_sorting(struct evlist *evlist); +int setup_sorting(struct evlist *evlist, struct perf_env *env); int setup_output_field(void); void reset_output_field(void); void sort__setup_elide(FILE *fp); @@ -145,7 +146,7 @@ bool is_strict_order(const char *order); int hpp_dimension__add_output(unsigned col, bool implicit); void reset_dimensions(void); int sort_dimension__add(struct perf_hpp_list *list, const char *tok, - struct evlist *evlist, + struct evlist *evlist, struct perf_env *env, int level); int output_field_add(struct perf_hpp_list *list, const char *tok, int *level); int64_t From 8b6cbcac76af2e6e8ac0330a4aab342d08ca7a5d Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 26 Jun 2025 14:33:57 +0200 Subject: [PATCH 1080/2411] rtla/timerlat: Introduce enum timerlat_tracing_mode After the introduction of BPF-based sample collection, rtla-timerlat effectively runs in one of three modes: - Pure BPF mode, with tracefs only being used to set up the timerlat tracer. Sample processing and stop on threshold are handled by BPF. - tracefs mode. BPF is unsupported or kernel is lacking the necessary trace event (osnoise:timerlat_sample). Stop on theshold is handled by timerlat tracer stopping tracing in all instances. - BPF/tracefs mixed mode - BPF is used for sample collection for top or histogram, tracefs is used for trace output and/or auto-analysis. Stop on threshold is handled both through BPF program, which stops sample collection for top/histogram and wakes up rtla, and by timerlat tracer, which stops tracing for trace output/auto-analysis instances. Add enum timerlat_tracing_mode, with three values: - TRACING_MODE_BPF - TRACING_MODE_TRACEFS - TRACING_MODE_MIXED Those represent the modes described above. A field of this type is added to struct timerlat_params, named "mode", replacing the no_bpf variable. params->mode is set in timerlat_{top,hist}_parse_args to TRACING_MODE_BPF or TRACING_MODE_MIXED based on whether trace output and/or auto-analysis is requested. timerlat_{top,hist}_main then checks if BPF is not unavailable or disabled, in that case, it sets params->mode to TRACING_MODE_TRACEFS. A condition is added to timerlat_apply_config that skips setting timerlat tracer thresholds if params->mode is TRACING_MODE_BPF (those are unnecessary, since they only turn off tracing, which is already turned off in that case, since BPF is used to collect samples). Cc: John Kacur Cc: Luis Goncalves Cc: Arnaldo Carvalho de Melo Cc: Chang Yin Cc: Costa Shulyupin Cc: Crystal Wood Cc: Gabriele Monaco Link: https://lore.kernel.org/20250626123405.1496931-2-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/timerlat.c | 24 +++++---- tools/tracing/rtla/src/timerlat.h | 18 +++++++ tools/tracing/rtla/src/timerlat_hist.c | 51 ++++++++++-------- tools/tracing/rtla/src/timerlat_top.c | 71 +++++++++++++++----------- 4 files changed, 104 insertions(+), 60 deletions(-) diff --git a/tools/tracing/rtla/src/timerlat.c b/tools/tracing/rtla/src/timerlat.c index c29e2ba2d7d8..63d6d43eafff 100644 --- a/tools/tracing/rtla/src/timerlat.c +++ b/tools/tracing/rtla/src/timerlat.c @@ -40,16 +40,22 @@ timerlat_apply_config(struct osnoise_tool *tool, struct timerlat_params *params) CPU_SET(i, ¶ms->monitored_cpus); } - retval = osnoise_set_stop_us(tool->context, params->stop_us); - if (retval) { - err_msg("Failed to set stop us\n"); - goto out_err; - } + if (params->mode != TRACING_MODE_BPF) { + /* + * In tracefs and mixed mode, timerlat tracer handles stopping + * on threshold + */ + retval = osnoise_set_stop_us(tool->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } - retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); - if (retval) { - err_msg("Failed to set stop total us\n"); - goto out_err; + retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } } diff --git a/tools/tracing/rtla/src/timerlat.h b/tools/tracing/rtla/src/timerlat.h index 73045aef23fa..e0a553545d03 100644 --- a/tools/tracing/rtla/src/timerlat.h +++ b/tools/tracing/rtla/src/timerlat.h @@ -1,6 +1,23 @@ // SPDX-License-Identifier: GPL-2.0 #include "osnoise.h" +/* + * Define timerlat tracing mode. + * + * There are three tracing modes: + * - tracefs-only, used when BPF is unavailable. + * - BPF-only, used when BPF is available and neither trace saving nor + * auto-analysis are enabled. + * - mixed mode, used when BPF is available and either trace saving or + * auto-analysis is enabled (which rely on sample collection through + * tracefs). + */ +enum timerlat_tracing_mode { + TRACING_MODE_BPF, + TRACING_MODE_TRACEFS, + TRACING_MODE_MIXED, +}; + struct timerlat_params { /* Common params */ char *cpus; @@ -30,6 +47,7 @@ struct timerlat_params { cpu_set_t hk_cpu_set; struct sched_attr sched_param; struct trace_events *events; + enum timerlat_tracing_mode mode; union { struct { /* top only */ diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 36d2294c963d..6cf260e8553b 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -802,6 +802,9 @@ static struct timerlat_params params->bucket_size = 1; params->entries = 256; + /* default to BPF mode */ + params->mode = TRACING_MODE_BPF; + while (1) { static struct option long_options[] = { {"auto", required_argument, 0, 'a'}, @@ -1054,6 +1057,13 @@ static struct timerlat_params if (params->kernel_workload && params->user_workload) timerlat_hist_usage("--kernel-threads and --user-threads are mutually exclusive!"); + /* + * If auto-analysis or trace output is enabled, switch from BPF mode to + * mixed mode + */ + if (params->mode == TRACING_MODE_BPF && params->trace_output && !params->no_aa) + params->mode = TRACING_MODE_MIXED; + return params; } @@ -1149,7 +1159,6 @@ int timerlat_hist_main(int argc, char *argv[]) pthread_t timerlat_u; int retval; int nr_cpus, i; - bool no_bpf = false; params = timerlat_hist_parse_args(argc, argv); if (!params) @@ -1161,12 +1170,6 @@ int timerlat_hist_main(int argc, char *argv[]) goto out_exit; } - retval = timerlat_hist_apply_config(tool, params); - if (retval) { - err_msg("Could not apply config\n"); - goto out_free; - } - trace = &tool->trace; /* * Save trace instance into global variable so that SIGINT can stop @@ -1175,24 +1178,30 @@ int timerlat_hist_main(int argc, char *argv[]) */ hist_inst = trace; + /* + * Try to enable BPF, unless disabled explicitly. + * If BPF enablement fails, fall back to tracefs mode. + */ if (getenv("RTLA_NO_BPF") && strncmp(getenv("RTLA_NO_BPF"), "1", 2) == 0) { debug_msg("RTLA_NO_BPF set, disabling BPF\n"); - no_bpf = true; - } - - if (!no_bpf && !tep_find_event_by_name(trace->tep, "osnoise", "timerlat_sample")) { + params->mode = TRACING_MODE_TRACEFS; + } else if (!tep_find_event_by_name(trace->tep, "osnoise", "timerlat_sample")) { debug_msg("osnoise:timerlat_sample missing, disabling BPF\n"); - no_bpf = true; - } - - if (!no_bpf) { + params->mode = TRACING_MODE_TRACEFS; + } else { retval = timerlat_bpf_init(params); if (retval) { debug_msg("Could not enable BPF\n"); - no_bpf = true; + params->mode = TRACING_MODE_TRACEFS; } } + retval = timerlat_hist_apply_config(tool, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_free; + } + retval = enable_timerlat(trace); if (retval) { err_msg("Failed to enable timerlat tracer\n"); @@ -1320,7 +1329,7 @@ int timerlat_hist_main(int argc, char *argv[]) trace_instance_start(&record->trace); if (!params->no_aa) trace_instance_start(&aa->trace); - if (no_bpf) { + if (params->mode == TRACING_MODE_TRACEFS) { trace_instance_start(trace); } else { retval = timerlat_bpf_attach(); @@ -1333,7 +1342,7 @@ int timerlat_hist_main(int argc, char *argv[]) tool->start_time = time(NULL); timerlat_hist_set_signals(params); - if (no_bpf) { + if (params->mode == TRACING_MODE_TRACEFS) { while (!stop_tracing) { sleep(params->sleep_time); @@ -1362,7 +1371,7 @@ int timerlat_hist_main(int argc, char *argv[]) } else timerlat_bpf_wait(-1); - if (!no_bpf) { + if (params->mode != TRACING_MODE_TRACEFS) { timerlat_bpf_detach(); retval = timerlat_hist_bpf_pull_data(tool); if (retval) { @@ -1409,10 +1418,10 @@ int timerlat_hist_main(int argc, char *argv[]) osnoise_destroy_tool(aa); osnoise_destroy_tool(record); osnoise_destroy_tool(tool); + if (params->mode != TRACING_MODE_TRACEFS) + timerlat_bpf_destroy(); free(params); free_cpu_idle_disable_states(); - if (!no_bpf) - timerlat_bpf_destroy(); out_exit: exit(return_value); } diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 7365e08fe986..1644eeb60181 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -559,6 +559,9 @@ static struct timerlat_params /* display data in microseconds */ params->output_divisor = 1000; + /* default to BPF mode */ + params->mode = TRACING_MODE_BPF; + while (1) { static struct option long_options[] = { {"auto", required_argument, 0, 'a'}, @@ -790,6 +793,13 @@ static struct timerlat_params if (params->kernel_workload && params->user_workload) timerlat_top_usage("--kernel-threads and --user-threads are mutually exclusive!"); + /* + * If auto-analysis or trace output is enabled, switch from BPF mode to + * mixed mode + */ + if (params->mode == TRACING_MODE_BPF && params->trace_output && !params->no_aa) + params->mode = TRACING_MODE_MIXED; + return params; } @@ -994,7 +1004,6 @@ int timerlat_top_main(int argc, char *argv[]) char *max_lat; int retval; int nr_cpus, i; - bool no_bpf = false; params = timerlat_top_parse_args(argc, argv); if (!params) @@ -1006,38 +1015,38 @@ int timerlat_top_main(int argc, char *argv[]) goto out_exit; } + trace = &top->trace; + /* + * Save trace instance into global variable so that SIGINT can stop + * the timerlat tracer. + * Otherwise, rtla could loop indefinitely when overloaded. + */ + top_inst = trace; + + /* + * Try to enable BPF, unless disabled explicitly. + * If BPF enablement fails, fall back to tracefs mode. + */ + if (getenv("RTLA_NO_BPF") && strncmp(getenv("RTLA_NO_BPF"), "1", 2) == 0) { + debug_msg("RTLA_NO_BPF set, disabling BPF\n"); + params->mode = TRACING_MODE_TRACEFS; + } else if (!tep_find_event_by_name(trace->tep, "osnoise", "timerlat_sample")) { + debug_msg("osnoise:timerlat_sample missing, disabling BPF\n"); + params->mode = TRACING_MODE_TRACEFS; + } else { + retval = timerlat_bpf_init(params); + if (retval) { + debug_msg("Could not enable BPF\n"); + params->mode = TRACING_MODE_TRACEFS; + } + } + retval = timerlat_top_apply_config(top, params); if (retval) { err_msg("Could not apply config\n"); goto out_free; } - trace = &top->trace; - /* - * Save trace instance into global variable so that SIGINT can stop - * the timerlat tracer. - * Otherwise, rtla could loop indefinitely when overloaded. - */ - top_inst = trace; - - if (getenv("RTLA_NO_BPF") && strncmp(getenv("RTLA_NO_BPF"), "1", 2) == 0) { - debug_msg("RTLA_NO_BPF set, disabling BPF\n"); - no_bpf = true; - } - - if (!no_bpf && !tep_find_event_by_name(trace->tep, "osnoise", "timerlat_sample")) { - debug_msg("osnoise:timerlat_sample missing, disabling BPF\n"); - no_bpf = true; - } - - if (!no_bpf) { - retval = timerlat_bpf_init(params); - if (retval) { - debug_msg("Could not enable BPF\n"); - no_bpf = true; - } - } - retval = enable_timerlat(trace); if (retval) { err_msg("Failed to enable timerlat tracer\n"); @@ -1166,7 +1175,7 @@ int timerlat_top_main(int argc, char *argv[]) trace_instance_start(&record->trace); if (!params->no_aa) trace_instance_start(&aa->trace); - if (no_bpf) { + if (params->mode == TRACING_MODE_TRACEFS) { trace_instance_start(trace); } else { retval = timerlat_bpf_attach(); @@ -1179,7 +1188,7 @@ int timerlat_top_main(int argc, char *argv[]) top->start_time = time(NULL); timerlat_top_set_signals(params); - if (no_bpf) + if (params->mode == TRACING_MODE_TRACEFS) retval = timerlat_top_main_loop(top, record, params, ¶ms_u); else retval = timerlat_top_bpf_main_loop(top, record, params, ¶ms_u); @@ -1187,7 +1196,7 @@ int timerlat_top_main(int argc, char *argv[]) if (retval) goto out_top; - if (!no_bpf) + if (params->mode != TRACING_MODE_TRACEFS) timerlat_bpf_detach(); if (params->user_workload && !params_u.stopped_running) { @@ -1239,6 +1248,8 @@ int timerlat_top_main(int argc, char *argv[]) osnoise_destroy_tool(aa); osnoise_destroy_tool(record); osnoise_destroy_tool(top); + if (params->mode != TRACING_MODE_TRACEFS) + timerlat_bpf_destroy(); free(params); free_cpu_idle_disable_states(); out_exit: From 6ea082b171e00bb68b749426f03d9d7e833e9f51 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 26 Jun 2025 14:33:58 +0200 Subject: [PATCH 1081/2411] rtla/timerlat: Add action on threshold feature Extend the functionality provided by the -t/--trace option, which triggers saving the contents of a tracefs buffer after tracing is stopped, to support implementing arbitrary actions. A new option, --on-threshold, is added, taking an argument that further specifies the action. Actions added in this patch are: - trace[,file=]: Saves tracefs buffer, optionally taking a filename. - signal,num=,pid=: Sends signal to process. "parent" might be specified instead of number to send signal to parent process. - shell,command=: Execute shell command. Multiple actions may be specified and will be executed in order, including multiple actions of the same type. Trace output requested via -t and -a now adds a trace action to the end of the list. If an action fails, the following actions are not executed. For example, this command: $ rtla timerlat -T 20 --on-threshold trace \ --on-threshold shell,command="grep ipi_send timerlat_trace.txt" \ --on-threshold signal,num=2,pid=parent will send signal 2 (SIGINT) to parent process, but only if saved trace contains the text "ipi_send". This way, the feature can be used for flexible reactions on latency spikes, and allows combining rtla with other tooling like perf. Cc: John Kacur Cc: Luis Goncalves Cc: Arnaldo Carvalho de Melo Cc: Chang Yin Cc: Costa Shulyupin Cc: Crystal Wood Cc: Gabriele Monaco Link: https://lore.kernel.org/20250626123405.1496931-3-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/Build | 1 + tools/tracing/rtla/src/actions.c | 235 +++++++++++++++++++++++++ tools/tracing/rtla/src/actions.h | 49 ++++++ tools/tracing/rtla/src/timerlat.h | 3 +- tools/tracing/rtla/src/timerlat_hist.c | 37 ++-- tools/tracing/rtla/src/timerlat_top.c | 38 ++-- 6 files changed, 341 insertions(+), 22 deletions(-) create mode 100644 tools/tracing/rtla/src/actions.c create mode 100644 tools/tracing/rtla/src/actions.h diff --git a/tools/tracing/rtla/src/Build b/tools/tracing/rtla/src/Build index 7bb7e39e391a..66631280b75b 100644 --- a/tools/tracing/rtla/src/Build +++ b/tools/tracing/rtla/src/Build @@ -1,5 +1,6 @@ rtla-y += trace.o rtla-y += utils.o +rtla-y += actions.o rtla-y += osnoise.o rtla-y += osnoise_top.o rtla-y += osnoise_hist.o diff --git a/tools/tracing/rtla/src/actions.c b/tools/tracing/rtla/src/actions.c new file mode 100644 index 000000000000..63bee5bdabfd --- /dev/null +++ b/tools/tracing/rtla/src/actions.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +#include "actions.h" +#include "trace.h" +#include "utils.h" + +/* + * actions_init - initialize struct actions + */ +void +actions_init(struct actions *self) +{ + self->size = action_default_size; + self->list = calloc(self->size, sizeof(struct action)); + self->len = 0; + + memset(&self->present, 0, sizeof(self->present)); + + /* This has to be set by the user */ + self->trace_output_inst = NULL; +} + +/* + * actions_destroy - destroy struct actions + */ +void +actions_destroy(struct actions *self) +{ + /* Free any action-specific data */ + for (struct action *action = self->list; action < self->list + self->len; action++) { + if (action->type == ACTION_SHELL) + free(action->command); + if (action->type == ACTION_TRACE_OUTPUT) + free(action->trace_output); + } + + /* Free action list */ + free(self->list); +} + +/* + * actions_new - Get pointer to new action + */ +static struct action * +actions_new(struct actions *self) +{ + if (self->size >= self->len) { + self->size *= 2; + self->list = realloc(self->list, self->size * sizeof(struct action)); + } + + return &self->list[self->len++]; +} + +/* + * actions_add_trace_output - add an action to output trace + */ +int +actions_add_trace_output(struct actions *self, const char *trace_output) +{ + struct action *action = actions_new(self); + + self->present[ACTION_TRACE_OUTPUT] = true; + action->type = ACTION_TRACE_OUTPUT; + action->trace_output = calloc(strlen(trace_output) + 1, sizeof(char)); + if (!action->trace_output) + return -1; + strcpy(action->trace_output, trace_output); + + return 0; +} + +/* + * actions_add_trace_output - add an action to send signal to a process + */ +int +actions_add_signal(struct actions *self, int signal, int pid) +{ + struct action *action = actions_new(self); + + self->present[ACTION_SIGNAL] = true; + action->type = ACTION_SIGNAL; + action->signal = signal; + action->pid = pid; + + return 0; +} + +/* + * actions_add_shell - add an action to execute a shell command + */ +int +actions_add_shell(struct actions *self, const char *command) +{ + struct action *action = actions_new(self); + + self->present[ACTION_SHELL] = true; + action->type = ACTION_SHELL; + action->command = calloc(strlen(command) + 1, sizeof(char)); + if (!action->command) + return -1; + strcpy(action->command, command); + + return 0; +} + +/* + * actions_parse - add an action based on text specification + */ +int +actions_parse(struct actions *self, const char *trigger) +{ + enum action_type type = ACTION_NONE; + char *token; + char trigger_c[strlen(trigger)]; + + /* For ACTION_SIGNAL */ + int signal = 0, pid = 0; + + /* For ACTION_TRACE_OUTPUT */ + char *trace_output; + + strcpy(trigger_c, trigger); + token = strtok(trigger_c, ","); + + if (strcmp(token, "trace") == 0) + type = ACTION_TRACE_OUTPUT; + else if (strcmp(token, "signal") == 0) + type = ACTION_SIGNAL; + else if (strcmp(token, "shell") == 0) + type = ACTION_SHELL; + else + /* Invalid trigger type */ + return -1; + + token = strtok(NULL, ","); + + switch (type) { + case ACTION_TRACE_OUTPUT: + /* Takes no argument */ + if (token == NULL) + trace_output = "timerlat_trace.txt"; + else { + if (strlen(token) > 5 && strncmp(token, "file=", 5) == 0) { + trace_output = token + 5; + } else { + /* Invalid argument */ + return -1; + } + + token = strtok(NULL, ","); + if (token != NULL) + /* Only one argument allowed */ + return -1; + } + return actions_add_trace_output(self, trace_output); + case ACTION_SIGNAL: + /* Takes two arguments, num (signal) and pid */ + while (token != NULL) { + if (strlen(token) > 4 && strncmp(token, "num=", 4) == 0) { + signal = atoi(token + 4); + } else if (strlen(token) > 4 && strncmp(token, "pid=", 4) == 0) { + if (strncmp(token + 4, "parent", 7) == 0) + pid = -1; + else + pid = atoi(token + 4); + } else { + /* Invalid argument */ + return -1; + } + + token = strtok(NULL, ","); + } + + if (!signal || !pid) + /* Missing argument */ + return -1; + + return actions_add_signal(self, signal, pid); + case ACTION_SHELL: + if (token == NULL) + return -1; + if (strlen(token) > 8 && strncmp(token, "command=", 8) == 0) + return actions_add_shell(self, token + 8); + return -1; + default: + return -1; + } +} + +/* + * actions_perform - perform all actions + */ +int +actions_perform(const struct actions *self) +{ + int pid, retval; + const struct action *action; + + for (action = self->list; action < self->list + self->len; action++) { + switch (action->type) { + case ACTION_TRACE_OUTPUT: + retval = save_trace_to_file(self->trace_output_inst, action->trace_output); + if (retval) { + err_msg("Error saving trace\n"); + return retval; + } + break; + case ACTION_SIGNAL: + if (action->pid == -1) + pid = getppid(); + else + pid = action->pid; + retval = kill(pid, action->signal); + if (retval) { + err_msg("Error sending signal\n"); + return retval; + } + break; + case ACTION_SHELL: + retval = system(action->command); + if (retval) + return retval; + break; + default: + break; + } + } + + return 0; +} diff --git a/tools/tracing/rtla/src/actions.h b/tools/tracing/rtla/src/actions.h new file mode 100644 index 000000000000..076bbff8519e --- /dev/null +++ b/tools/tracing/rtla/src/actions.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include + +enum action_type { + ACTION_NONE = 0, + ACTION_TRACE_OUTPUT, + ACTION_SIGNAL, + ACTION_SHELL, + ACTION_FIELD_N +}; + +struct action { + enum action_type type; + union { + struct { + /* For ACTION_TRACE_OUTPUT */ + char *trace_output; + }; + struct { + /* For ACTION_SIGNAL */ + int signal; + int pid; + }; + struct { + /* For ACTION_SHELL */ + char *command; + }; + }; +}; + +static const int action_default_size = 8; + +struct actions { + struct action *list; + int len, size; + bool present[ACTION_FIELD_N]; + + /* External dependencies */ + struct tracefs_instance *trace_output_inst; +}; + +void actions_init(struct actions *self); +void actions_destroy(struct actions *self); +int actions_add_trace_output(struct actions *self, const char *trace_output); +int actions_add_signal(struct actions *self, int signal, int pid); +int actions_add_shell(struct actions *self, const char *command); +int actions_parse(struct actions *self, const char *trigger); +int actions_perform(const struct actions *self); diff --git a/tools/tracing/rtla/src/timerlat.h b/tools/tracing/rtla/src/timerlat.h index e0a553545d03..d1fcf9a97621 100644 --- a/tools/tracing/rtla/src/timerlat.h +++ b/tools/tracing/rtla/src/timerlat.h @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include "actions.h" #include "osnoise.h" /* @@ -22,7 +23,6 @@ struct timerlat_params { /* Common params */ char *cpus; cpu_set_t monitored_cpus; - char *trace_output; char *cgroup_name; unsigned long long runtime; long long stop_us; @@ -48,6 +48,7 @@ struct timerlat_params { struct sched_attr sched_param; struct trace_events *events; enum timerlat_tracing_mode mode; + struct actions actions; union { struct { /* top only */ diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 6cf260e8553b..d975d2cd6604 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -757,6 +757,7 @@ static void timerlat_hist_usage(char *usage) " --warm-up s: let the workload run for s seconds before collecting data", " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", + " --on-threshold : define action to be executed at latency threshold, multiple are allowed", NULL, }; @@ -786,11 +787,14 @@ static struct timerlat_params int auto_thresh; int retval; int c; + char *trace_output = NULL; params = calloc(1, sizeof(*params)); if (!params) exit(1); + actions_init(¶ms->actions); + /* disabled by default */ params->dma_latency = -1; @@ -841,6 +845,7 @@ static struct timerlat_params {"warm-up", required_argument, 0, '\2'}, {"trace-buffer-size", required_argument, 0, '\3'}, {"deepest-idle-state", required_argument, 0, '\4'}, + {"on-threshold", required_argument, 0, '\5'}, {0, 0, 0, 0} }; @@ -866,7 +871,7 @@ static struct timerlat_params params->print_stack = auto_thresh; /* set trace */ - params->trace_output = "timerlat_trace.txt"; + trace_output = "timerlat_trace.txt"; break; case 'c': @@ -956,13 +961,13 @@ static struct timerlat_params case 't': if (optarg) { if (optarg[0] == '=') - params->trace_output = &optarg[1]; + trace_output = &optarg[1]; else - params->trace_output = &optarg[0]; + trace_output = &optarg[0]; } else if (optind < argc && argv[optind][0] != '-') - params->trace_output = argv[optind]; + trace_output = argv[optind]; else - params->trace_output = "timerlat_trace.txt"; + trace_output = "timerlat_trace.txt"; break; case 'u': params->user_workload = 1; @@ -1032,11 +1037,21 @@ static struct timerlat_params case '\4': params->deepest_idle_state = get_llong_from_str(optarg); break; + case '\5': + retval = actions_parse(¶ms->actions, optarg); + if (retval) { + err_msg("Invalid action %s\n", optarg); + exit(EXIT_FAILURE); + } + break; default: timerlat_hist_usage("Invalid option"); } } + if (trace_output) + actions_add_trace_output(¶ms->actions, trace_output); + if (geteuid()) { err_msg("rtla needs root permission\n"); exit(EXIT_FAILURE); @@ -1061,7 +1076,8 @@ static struct timerlat_params * If auto-analysis or trace output is enabled, switch from BPF mode to * mixed mode */ - if (params->mode == TRACING_MODE_BPF && params->trace_output && !params->no_aa) + if (params->mode == TRACING_MODE_BPF && + (params->actions.present[ACTION_TRACE_OUTPUT] || !params->no_aa)) params->mode = TRACING_MODE_MIXED; return params; @@ -1254,12 +1270,13 @@ int timerlat_hist_main(int argc, char *argv[]) } } - if (params->trace_output) { + if (params->actions.present[ACTION_TRACE_OUTPUT]) { record = osnoise_init_trace_tool("timerlat"); if (!record) { err_msg("Failed to enable the trace instance\n"); goto out_free; } + params->actions.trace_output_inst = record->trace.inst; if (params->events) { retval = trace_events_enable(&record->trace, params->events); @@ -1325,7 +1342,7 @@ int timerlat_hist_main(int argc, char *argv[]) * tracing while enabling other instances. The trace instance is the * one with most valuable information. */ - if (params->trace_output) + if (params->actions.present[ACTION_TRACE_OUTPUT]) trace_instance_start(&record->trace); if (!params->no_aa) trace_instance_start(&aa->trace); @@ -1395,8 +1412,7 @@ int timerlat_hist_main(int argc, char *argv[]) if (!params->no_aa) timerlat_auto_analysis(params->stop_us, params->stop_total_us); - save_trace_to_file(record ? record->trace.inst : NULL, - params->trace_output); + actions_perform(¶ms->actions); return_value = FAILED; } @@ -1418,6 +1434,7 @@ int timerlat_hist_main(int argc, char *argv[]) osnoise_destroy_tool(aa); osnoise_destroy_tool(record); osnoise_destroy_tool(tool); + actions_destroy(¶ms->actions); if (params->mode != TRACING_MODE_TRACEFS) timerlat_bpf_destroy(); free(params); diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 1644eeb60181..cdbfda009974 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -516,6 +516,7 @@ static void timerlat_top_usage(char *usage) " --warm-up s: let the workload run for s seconds before collecting data", " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", + " --on-threshold : define action to be executed at latency threshold, multiple are allowed", NULL, }; @@ -545,11 +546,14 @@ static struct timerlat_params long long auto_thresh; int retval; int c; + char *trace_output = NULL; params = calloc(1, sizeof(*params)); if (!params) exit(1); + actions_init(¶ms->actions); + /* disabled by default */ params->dma_latency = -1; @@ -592,6 +596,7 @@ static struct timerlat_params {"warm-up", required_argument, 0, '6'}, {"trace-buffer-size", required_argument, 0, '7'}, {"deepest-idle-state", required_argument, 0, '8'}, + {"on-threshold", required_argument, 0, '9'}, {0, 0, 0, 0} }; @@ -617,7 +622,7 @@ static struct timerlat_params params->print_stack = auto_thresh; /* set trace */ - params->trace_output = "timerlat_trace.txt"; + trace_output = "timerlat_trace.txt"; break; case '5': /* it is here because it is similar to -a */ @@ -712,14 +717,13 @@ static struct timerlat_params case 't': if (optarg) { if (optarg[0] == '=') - params->trace_output = &optarg[1]; + trace_output = &optarg[1]; else - params->trace_output = &optarg[0]; + trace_output = &optarg[0]; } else if (optind < argc && argv[optind][0] != '-') - params->trace_output = argv[optind]; + trace_output = argv[optind]; else - params->trace_output = "timerlat_trace.txt"; - + trace_output = "timerlat_trace.txt"; break; case 'u': params->user_workload = true; @@ -771,11 +775,21 @@ static struct timerlat_params case '8': params->deepest_idle_state = get_llong_from_str(optarg); break; + case '9': + retval = actions_parse(¶ms->actions, optarg); + if (retval) { + err_msg("Invalid action %s\n", optarg); + exit(EXIT_FAILURE); + } + break; default: timerlat_top_usage("Invalid option"); } } + if (trace_output) + actions_add_trace_output(¶ms->actions, trace_output); + if (geteuid()) { err_msg("rtla needs root permission\n"); exit(EXIT_FAILURE); @@ -797,7 +811,8 @@ static struct timerlat_params * If auto-analysis or trace output is enabled, switch from BPF mode to * mixed mode */ - if (params->mode == TRACING_MODE_BPF && params->trace_output && !params->no_aa) + if (params->mode == TRACING_MODE_BPF && + (params->actions.present[ACTION_TRACE_OUTPUT] || !params->no_aa)) params->mode = TRACING_MODE_MIXED; return params; @@ -1099,12 +1114,13 @@ int timerlat_top_main(int argc, char *argv[]) } } - if (params->trace_output) { + if (params->actions.present[ACTION_TRACE_OUTPUT]) { record = osnoise_init_trace_tool("timerlat"); if (!record) { err_msg("Failed to enable the trace instance\n"); goto out_free; } + params->actions.trace_output_inst = record->trace.inst; if (params->events) { retval = trace_events_enable(&record->trace, params->events); @@ -1171,7 +1187,7 @@ int timerlat_top_main(int argc, char *argv[]) * tracing while enabling other instances. The trace instance is the * one with most valuable information. */ - if (params->trace_output) + if (params->actions.present[ACTION_TRACE_OUTPUT]) trace_instance_start(&record->trace); if (!params->no_aa) trace_instance_start(&aa->trace); @@ -1214,8 +1230,7 @@ int timerlat_top_main(int argc, char *argv[]) if (!params->no_aa) timerlat_auto_analysis(params->stop_us, params->stop_total_us); - save_trace_to_file(record ? record->trace.inst : NULL, - params->trace_output); + actions_perform(¶ms->actions); return_value = FAILED; } else if (params->aa_only) { /* @@ -1248,6 +1263,7 @@ int timerlat_top_main(int argc, char *argv[]) osnoise_destroy_tool(aa); osnoise_destroy_tool(record); osnoise_destroy_tool(top); + actions_destroy(¶ms->actions); if (params->mode != TRACING_MODE_TRACEFS) timerlat_bpf_destroy(); free(params); From 3b78670e3a932c654dedf88807e70e19719cb0cb Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 26 Jun 2025 14:33:59 +0200 Subject: [PATCH 1082/2411] rtla/timerlat_bpf: Allow resuming tracing Currently, rtla-timerlat BPF program uses a global variable stored in a .bss section to store whether tracing has been stopped. Move the information to a separate map, so that it is easily writable from userspace, and add a function that clears the value, resuming tracing after it has been stopped. Cc: John Kacur Cc: Luis Goncalves Cc: Arnaldo Carvalho de Melo Cc: Chang Yin Cc: Costa Shulyupin Cc: Crystal Wood Cc: Gabriele Monaco Link: https://lore.kernel.org/20250626123405.1496931-4-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/timerlat.bpf.c | 13 +++++++++---- tools/tracing/rtla/src/timerlat_bpf.c | 13 +++++++++++++ tools/tracing/rtla/src/timerlat_bpf.h | 3 +++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/tools/tracing/rtla/src/timerlat.bpf.c b/tools/tracing/rtla/src/timerlat.bpf.c index 96196d46e170..084cd10c21fc 100644 --- a/tools/tracing/rtla/src/timerlat.bpf.c +++ b/tools/tracing/rtla/src/timerlat.bpf.c @@ -28,6 +28,13 @@ struct { __type(value, unsigned long long); } summary_irq SEC(".maps"), summary_thread SEC(".maps"), summary_user SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, unsigned int); + __type(value, unsigned long long); +} stop_tracing SEC(".maps"); + struct { __uint(type, BPF_MAP_TYPE_RINGBUF); __uint(max_entries, 1); @@ -41,8 +48,6 @@ const volatile int irq_threshold; const volatile int thread_threshold; const volatile bool aa_only; -int stop_tracing; - nosubprog unsigned long long map_get(void *map, unsigned int key) { @@ -109,7 +114,7 @@ nosubprog void set_stop_tracing(void) int value = 0; /* Suppress further sample processing */ - stop_tracing = 1; + map_set(&stop_tracing, 0, 1); /* Signal to userspace */ bpf_ringbuf_output(&signal_stop_tracing, &value, sizeof(value), 0); @@ -121,7 +126,7 @@ int handle_timerlat_sample(struct trace_event_raw_timerlat_sample *tp_args) unsigned long long latency, latency_us; int bucket; - if (stop_tracing) + if (map_get(&stop_tracing, 0)) return 0; latency = tp_args->timer_latency / output_divisor; diff --git a/tools/tracing/rtla/src/timerlat_bpf.c b/tools/tracing/rtla/src/timerlat_bpf.c index 0bc44ce5d69b..1666215dd687 100644 --- a/tools/tracing/rtla/src/timerlat_bpf.c +++ b/tools/tracing/rtla/src/timerlat_bpf.c @@ -106,6 +106,19 @@ int timerlat_bpf_wait(int timeout) return retval; } +/* + * timerlat_bpf_restart_tracing - restart stopped tracing + */ +int timerlat_bpf_restart_tracing(void) +{ + unsigned int key = 0; + unsigned long long value = 0; + + return bpf_map__update_elem(bpf->maps.stop_tracing, + &key, sizeof(key), + &value, sizeof(value), BPF_ANY); +} + static int get_value(struct bpf_map *map_irq, struct bpf_map *map_thread, struct bpf_map *map_user, diff --git a/tools/tracing/rtla/src/timerlat_bpf.h b/tools/tracing/rtla/src/timerlat_bpf.h index f1b54dbddb0e..118487436d30 100644 --- a/tools/tracing/rtla/src/timerlat_bpf.h +++ b/tools/tracing/rtla/src/timerlat_bpf.h @@ -18,6 +18,7 @@ int timerlat_bpf_attach(void); void timerlat_bpf_detach(void); void timerlat_bpf_destroy(void); int timerlat_bpf_wait(int timeout); +int timerlat_bpf_restart_tracing(void); int timerlat_bpf_get_hist_value(int key, long long *value_irq, long long *value_thread, @@ -28,6 +29,7 @@ int timerlat_bpf_get_summary_value(enum summary_field key, long long *value_thread, long long *value_user, int cpus); + static inline int have_libbpf_support(void) { return 1; } #else static inline int timerlat_bpf_init(struct timerlat_params *params) @@ -38,6 +40,7 @@ static inline int timerlat_bpf_attach(void) { return -1; } static inline void timerlat_bpf_detach(void) { }; static inline void timerlat_bpf_destroy(void) { }; static inline int timerlat_bpf_wait(int timeout) { return -1; } +static inline int timerlat_bpf_restart_tracing(void) { return -1; }; static inline int timerlat_bpf_get_hist_value(int key, long long *value_irq, long long *value_thread, From 8d933d5c89e80a818019fa5e0c060387bd145216 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 26 Jun 2025 14:34:00 +0200 Subject: [PATCH 1083/2411] rtla/timerlat: Add continue action Introduce option to resume tracing after a latency threshold overflow. The option is implemented as an action named "continue". Example: $ rtla timerlat top -q -T 200 -d 1s --on-threshold \ exec,command="echo Threshold" --on-threshold continue Threshold Threshold Threshold Timer Latency ... The feature is supported for both hist and top. After the continue action is executed, processing of the list of actions is stopped and tracing is resumed. Cc: John Kacur Cc: Luis Goncalves Cc: Arnaldo Carvalho de Melo Cc: Chang Yin Cc: Costa Shulyupin Cc: Crystal Wood Cc: Gabriele Monaco Link: https://lore.kernel.org/20250626123405.1496931-5-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/actions.c | 27 +++++++++++- tools/tracing/rtla/src/actions.h | 5 ++- tools/tracing/rtla/src/timerlat_hist.c | 40 +++++++++++++++--- tools/tracing/rtla/src/timerlat_top.c | 57 ++++++++++++++++---------- 4 files changed, 100 insertions(+), 29 deletions(-) diff --git a/tools/tracing/rtla/src/actions.c b/tools/tracing/rtla/src/actions.c index 63bee5bdabfd..aaf0808125d7 100644 --- a/tools/tracing/rtla/src/actions.c +++ b/tools/tracing/rtla/src/actions.c @@ -17,6 +17,7 @@ actions_init(struct actions *self) self->size = action_default_size; self->list = calloc(self->size, sizeof(struct action)); self->len = 0; + self->continue_flag = false; memset(&self->present, 0, sizeof(self->present)); @@ -108,6 +109,20 @@ actions_add_shell(struct actions *self, const char *command) return 0; } +/* + * actions_add_continue - add an action to resume measurement + */ +int +actions_add_continue(struct actions *self) +{ + struct action *action = actions_new(self); + + self->present[ACTION_CONTINUE] = true; + action->type = ACTION_CONTINUE; + + return 0; +} + /* * actions_parse - add an action based on text specification */ @@ -133,6 +148,8 @@ actions_parse(struct actions *self, const char *trigger) type = ACTION_SIGNAL; else if (strcmp(token, "shell") == 0) type = ACTION_SHELL; + else if (strcmp(token, "continue") == 0) + type = ACTION_CONTINUE; else /* Invalid trigger type */ return -1; @@ -187,6 +204,11 @@ actions_parse(struct actions *self, const char *trigger) if (strlen(token) > 8 && strncmp(token, "command=", 8) == 0) return actions_add_shell(self, token + 8); return -1; + case ACTION_CONTINUE: + /* Takes no argument */ + if (token != NULL) + return -1; + return actions_add_continue(self); default: return -1; } @@ -196,7 +218,7 @@ actions_parse(struct actions *self, const char *trigger) * actions_perform - perform all actions */ int -actions_perform(const struct actions *self) +actions_perform(struct actions *self) { int pid, retval; const struct action *action; @@ -226,6 +248,9 @@ actions_perform(const struct actions *self) if (retval) return retval; break; + case ACTION_CONTINUE: + self->continue_flag = true; + return 0; default: break; } diff --git a/tools/tracing/rtla/src/actions.h b/tools/tracing/rtla/src/actions.h index 076bbff8519e..b10a19d55c49 100644 --- a/tools/tracing/rtla/src/actions.h +++ b/tools/tracing/rtla/src/actions.h @@ -7,6 +7,7 @@ enum action_type { ACTION_TRACE_OUTPUT, ACTION_SIGNAL, ACTION_SHELL, + ACTION_CONTINUE, ACTION_FIELD_N }; @@ -35,6 +36,7 @@ struct actions { struct action *list; int len, size; bool present[ACTION_FIELD_N]; + bool continue_flag; /* External dependencies */ struct tracefs_instance *trace_output_inst; @@ -45,5 +47,6 @@ void actions_destroy(struct actions *self); int actions_add_trace_output(struct actions *self, const char *trace_output); int actions_add_signal(struct actions *self, int signal, int pid); int actions_add_shell(struct actions *self, const char *command); +int actions_add_continue(struct actions *self); int actions_parse(struct actions *self, const char *trigger); -int actions_perform(const struct actions *self); +int actions_perform(struct actions *self); diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index d975d2cd6604..4f13a8f92711 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -1374,8 +1374,20 @@ int timerlat_hist_main(int argc, char *argv[]) goto out_hist; } - if (osnoise_trace_is_off(tool, record)) - break; + if (osnoise_trace_is_off(tool, record)) { + actions_perform(¶ms->actions); + + if (!params->actions.continue_flag) + /* continue flag not set, break */ + break; + + /* continue action reached, re-enable tracing */ + if (params->actions.present[ACTION_TRACE_OUTPUT]) + trace_instance_start(&record->trace); + if (!params->no_aa) + trace_instance_start(&aa->trace); + trace_instance_start(trace); + } /* is there still any user-threads ? */ if (params->user_workload) { @@ -1385,8 +1397,27 @@ int timerlat_hist_main(int argc, char *argv[]) } } } - } else - timerlat_bpf_wait(-1); + } else { + while (!stop_tracing) { + timerlat_bpf_wait(-1); + + if (!stop_tracing) { + /* Threshold overflow, perform actions on threshold */ + actions_perform(¶ms->actions); + + if (!params->actions.continue_flag) + /* continue flag not set, break */ + break; + + /* continue action reached, re-enable tracing */ + if (params->actions.present[ACTION_TRACE_OUTPUT]) + trace_instance_start(&record->trace); + if (!params->no_aa) + trace_instance_start(&aa->trace); + timerlat_bpf_restart_tracing(); + } + } + } if (params->mode != TRACING_MODE_TRACEFS) { timerlat_bpf_detach(); @@ -1412,7 +1443,6 @@ int timerlat_hist_main(int argc, char *argv[]) if (!params->no_aa) timerlat_auto_analysis(params->stop_us, params->stop_total_us); - actions_perform(¶ms->actions); return_value = FAILED; } diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index cdbfda009974..60f9c78cb272 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -906,6 +906,7 @@ timerlat_top_set_signals(struct timerlat_params *params) static int timerlat_top_main_loop(struct osnoise_tool *top, struct osnoise_tool *record, + struct osnoise_tool *aa, struct timerlat_params *params, struct timerlat_u_params *params_u) { @@ -932,8 +933,20 @@ timerlat_top_main_loop(struct osnoise_tool *top, if (!params->quiet) timerlat_print_stats(params, top); - if (osnoise_trace_is_off(top, record)) - break; + if (osnoise_trace_is_off(top, record)) { + actions_perform(¶ms->actions); + + if (!params->actions.continue_flag) + /* continue flag not set, break */ + break; + + /* continue action reached, re-enable tracing */ + if (params->actions.present[ACTION_TRACE_OUTPUT]) + trace_instance_start(&record->trace); + if (!params->no_aa) + trace_instance_start(&aa->trace); + trace_instance_start(trace); + } /* is there still any user-threads ? */ if (params->user_workload) { @@ -953,6 +966,7 @@ timerlat_top_main_loop(struct osnoise_tool *top, static int timerlat_top_bpf_main_loop(struct osnoise_tool *top, struct osnoise_tool *record, + struct osnoise_tool *aa, struct timerlat_params *params, struct timerlat_u_params *params_u) { @@ -964,22 +978,9 @@ timerlat_top_bpf_main_loop(struct osnoise_tool *top, return 0; } - if (params->quiet) { - /* Quiet mode: wait for stop and then, print results */ - timerlat_bpf_wait(-1); - - retval = timerlat_top_bpf_pull_data(top); - if (retval) { - err_msg("Error pulling BPF data\n"); - return retval; - } - - return 0; - } - /* Pull and display data in a loop */ while (!stop_tracing) { - wait_retval = timerlat_bpf_wait(params->sleep_time); + wait_retval = timerlat_bpf_wait(params->quiet ? -1 : params->sleep_time); retval = timerlat_top_bpf_pull_data(top); if (retval) { @@ -987,11 +988,24 @@ timerlat_top_bpf_main_loop(struct osnoise_tool *top, return retval; } - timerlat_print_stats(params, top); + if (!params->quiet) + timerlat_print_stats(params, top); - if (wait_retval == 1) + if (wait_retval == 1) { /* Stopping requested by tracer */ - break; + actions_perform(¶ms->actions); + + if (!params->actions.continue_flag) + /* continue flag not set, break */ + break; + + /* continue action reached, re-enable tracing */ + if (params->actions.present[ACTION_TRACE_OUTPUT]) + trace_instance_start(&record->trace); + if (!params->no_aa) + trace_instance_start(&aa->trace); + timerlat_bpf_restart_tracing(); + } /* is there still any user-threads ? */ if (params->user_workload) { @@ -1205,9 +1219,9 @@ int timerlat_top_main(int argc, char *argv[]) timerlat_top_set_signals(params); if (params->mode == TRACING_MODE_TRACEFS) - retval = timerlat_top_main_loop(top, record, params, ¶ms_u); + retval = timerlat_top_main_loop(top, record, aa, params, ¶ms_u); else - retval = timerlat_top_bpf_main_loop(top, record, params, ¶ms_u); + retval = timerlat_top_bpf_main_loop(top, record, aa, params, ¶ms_u); if (retval) goto out_top; @@ -1230,7 +1244,6 @@ int timerlat_top_main(int argc, char *argv[]) if (!params->no_aa) timerlat_auto_analysis(params->stop_us, params->stop_total_us); - actions_perform(¶ms->actions); return_value = FAILED; } else if (params->aa_only) { /* From 3aadb65db5d656b003232e92d9d18de4e5161416 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 26 Jun 2025 14:34:01 +0200 Subject: [PATCH 1084/2411] rtla/timerlat: Add action on end feature Implement actions on end next to actions on threshold. A new option, --on-end is added, parallel to --on-threshold. Instead of being executed whenever a latency threshold is reached, it is executed at the end of the measurement. For example: $ rtla timerlat hist -d 5s --on-end trace will save the trace output at the end. All actions supported by --on-threshold are also supported by --on-end, except for continue, which does nothing with --on-end. Cc: John Kacur Cc: Luis Goncalves Cc: Arnaldo Carvalho de Melo Cc: Chang Yin Cc: Costa Shulyupin Cc: Crystal Wood Cc: Gabriele Monaco Link: https://lore.kernel.org/20250626123405.1496931-6-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/timerlat.h | 5 ++- tools/tracing/rtla/src/timerlat_hist.c | 44 +++++++++++++++++-------- tools/tracing/rtla/src/timerlat_top.c | 45 ++++++++++++++++++-------- 3 files changed, 65 insertions(+), 29 deletions(-) diff --git a/tools/tracing/rtla/src/timerlat.h b/tools/tracing/rtla/src/timerlat.h index d1fcf9a97621..bc55ed04fc96 100644 --- a/tools/tracing/rtla/src/timerlat.h +++ b/tools/tracing/rtla/src/timerlat.h @@ -48,7 +48,10 @@ struct timerlat_params { struct sched_attr sched_param; struct trace_events *events; enum timerlat_tracing_mode mode; - struct actions actions; + + struct actions threshold_actions; + struct actions end_actions; + union { struct { /* top only */ diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 4f13a8f92711..9baea1b251ed 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -758,6 +758,7 @@ static void timerlat_hist_usage(char *usage) " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", " --on-threshold : define action to be executed at latency threshold, multiple are allowed", + " --on-end : define action to be executed at measurement end, multiple are allowed", NULL, }; @@ -793,7 +794,8 @@ static struct timerlat_params if (!params) exit(1); - actions_init(¶ms->actions); + actions_init(¶ms->threshold_actions); + actions_init(¶ms->end_actions); /* disabled by default */ params->dma_latency = -1; @@ -846,6 +848,7 @@ static struct timerlat_params {"trace-buffer-size", required_argument, 0, '\3'}, {"deepest-idle-state", required_argument, 0, '\4'}, {"on-threshold", required_argument, 0, '\5'}, + {"on-end", required_argument, 0, '\6'}, {0, 0, 0, 0} }; @@ -1038,7 +1041,14 @@ static struct timerlat_params params->deepest_idle_state = get_llong_from_str(optarg); break; case '\5': - retval = actions_parse(¶ms->actions, optarg); + retval = actions_parse(¶ms->threshold_actions, optarg); + if (retval) { + err_msg("Invalid action %s\n", optarg); + exit(EXIT_FAILURE); + } + break; + case '\6': + retval = actions_parse(¶ms->end_actions, optarg); if (retval) { err_msg("Invalid action %s\n", optarg); exit(EXIT_FAILURE); @@ -1050,7 +1060,7 @@ static struct timerlat_params } if (trace_output) - actions_add_trace_output(¶ms->actions, trace_output); + actions_add_trace_output(¶ms->threshold_actions, trace_output); if (geteuid()) { err_msg("rtla needs root permission\n"); @@ -1077,7 +1087,8 @@ static struct timerlat_params * mixed mode */ if (params->mode == TRACING_MODE_BPF && - (params->actions.present[ACTION_TRACE_OUTPUT] || !params->no_aa)) + (params->threshold_actions.present[ACTION_TRACE_OUTPUT] || + params->end_actions.present[ACTION_TRACE_OUTPUT] || !params->no_aa)) params->mode = TRACING_MODE_MIXED; return params; @@ -1270,13 +1281,15 @@ int timerlat_hist_main(int argc, char *argv[]) } } - if (params->actions.present[ACTION_TRACE_OUTPUT]) { + if (params->threshold_actions.present[ACTION_TRACE_OUTPUT] || + params->end_actions.present[ACTION_TRACE_OUTPUT]) { record = osnoise_init_trace_tool("timerlat"); if (!record) { err_msg("Failed to enable the trace instance\n"); goto out_free; } - params->actions.trace_output_inst = record->trace.inst; + params->threshold_actions.trace_output_inst = record->trace.inst; + params->end_actions.trace_output_inst = record->trace.inst; if (params->events) { retval = trace_events_enable(&record->trace, params->events); @@ -1342,7 +1355,7 @@ int timerlat_hist_main(int argc, char *argv[]) * tracing while enabling other instances. The trace instance is the * one with most valuable information. */ - if (params->actions.present[ACTION_TRACE_OUTPUT]) + if (record) trace_instance_start(&record->trace); if (!params->no_aa) trace_instance_start(&aa->trace); @@ -1375,14 +1388,14 @@ int timerlat_hist_main(int argc, char *argv[]) } if (osnoise_trace_is_off(tool, record)) { - actions_perform(¶ms->actions); + actions_perform(¶ms->threshold_actions); - if (!params->actions.continue_flag) + if (!params->threshold_actions.continue_flag) /* continue flag not set, break */ break; /* continue action reached, re-enable tracing */ - if (params->actions.present[ACTION_TRACE_OUTPUT]) + if (record) trace_instance_start(&record->trace); if (!params->no_aa) trace_instance_start(&aa->trace); @@ -1403,14 +1416,14 @@ int timerlat_hist_main(int argc, char *argv[]) if (!stop_tracing) { /* Threshold overflow, perform actions on threshold */ - actions_perform(¶ms->actions); + actions_perform(¶ms->threshold_actions); - if (!params->actions.continue_flag) + if (!params->threshold_actions.continue_flag) /* continue flag not set, break */ break; /* continue action reached, re-enable tracing */ - if (params->actions.present[ACTION_TRACE_OUTPUT]) + if (record) trace_instance_start(&record->trace); if (!params->no_aa) trace_instance_start(&aa->trace); @@ -1435,6 +1448,8 @@ int timerlat_hist_main(int argc, char *argv[]) timerlat_print_stats(params, tool); + actions_perform(¶ms->end_actions); + return_value = PASSED; if (osnoise_trace_is_off(tool, record) && !stop_tracing) { @@ -1464,7 +1479,8 @@ int timerlat_hist_main(int argc, char *argv[]) osnoise_destroy_tool(aa); osnoise_destroy_tool(record); osnoise_destroy_tool(tool); - actions_destroy(¶ms->actions); + actions_destroy(¶ms->threshold_actions); + actions_destroy(¶ms->end_actions); if (params->mode != TRACING_MODE_TRACEFS) timerlat_bpf_destroy(); free(params); diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 60f9c78cb272..c80b81c0b4da 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -517,6 +517,7 @@ static void timerlat_top_usage(char *usage) " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", " --on-threshold : define action to be executed at latency threshold, multiple are allowed", + " --on-end: define action to be executed at measurement end, multiple are allowed", NULL, }; @@ -552,7 +553,8 @@ static struct timerlat_params if (!params) exit(1); - actions_init(¶ms->actions); + actions_init(¶ms->threshold_actions); + actions_init(¶ms->end_actions); /* disabled by default */ params->dma_latency = -1; @@ -597,6 +599,7 @@ static struct timerlat_params {"trace-buffer-size", required_argument, 0, '7'}, {"deepest-idle-state", required_argument, 0, '8'}, {"on-threshold", required_argument, 0, '9'}, + {"on-end", required_argument, 0, '\1'}, {0, 0, 0, 0} }; @@ -623,6 +626,7 @@ static struct timerlat_params /* set trace */ trace_output = "timerlat_trace.txt"; + break; case '5': /* it is here because it is similar to -a */ @@ -776,7 +780,14 @@ static struct timerlat_params params->deepest_idle_state = get_llong_from_str(optarg); break; case '9': - retval = actions_parse(¶ms->actions, optarg); + retval = actions_parse(¶ms->threshold_actions, optarg); + if (retval) { + err_msg("Invalid action %s\n", optarg); + exit(EXIT_FAILURE); + } + break; + case '\1': + retval = actions_parse(¶ms->end_actions, optarg); if (retval) { err_msg("Invalid action %s\n", optarg); exit(EXIT_FAILURE); @@ -788,7 +799,7 @@ static struct timerlat_params } if (trace_output) - actions_add_trace_output(¶ms->actions, trace_output); + actions_add_trace_output(¶ms->threshold_actions, trace_output); if (geteuid()) { err_msg("rtla needs root permission\n"); @@ -812,7 +823,8 @@ static struct timerlat_params * mixed mode */ if (params->mode == TRACING_MODE_BPF && - (params->actions.present[ACTION_TRACE_OUTPUT] || !params->no_aa)) + (params->threshold_actions.present[ACTION_TRACE_OUTPUT] || + params->end_actions.present[ACTION_TRACE_OUTPUT] || !params->no_aa)) params->mode = TRACING_MODE_MIXED; return params; @@ -934,14 +946,14 @@ timerlat_top_main_loop(struct osnoise_tool *top, timerlat_print_stats(params, top); if (osnoise_trace_is_off(top, record)) { - actions_perform(¶ms->actions); + actions_perform(¶ms->threshold_actions); - if (!params->actions.continue_flag) + if (!params->threshold_actions.continue_flag) /* continue flag not set, break */ break; /* continue action reached, re-enable tracing */ - if (params->actions.present[ACTION_TRACE_OUTPUT]) + if (record) trace_instance_start(&record->trace); if (!params->no_aa) trace_instance_start(&aa->trace); @@ -993,14 +1005,14 @@ timerlat_top_bpf_main_loop(struct osnoise_tool *top, if (wait_retval == 1) { /* Stopping requested by tracer */ - actions_perform(¶ms->actions); + actions_perform(¶ms->threshold_actions); - if (!params->actions.continue_flag) + if (!params->threshold_actions.continue_flag) /* continue flag not set, break */ break; /* continue action reached, re-enable tracing */ - if (params->actions.present[ACTION_TRACE_OUTPUT]) + if (record) trace_instance_start(&record->trace); if (!params->no_aa) trace_instance_start(&aa->trace); @@ -1128,13 +1140,15 @@ int timerlat_top_main(int argc, char *argv[]) } } - if (params->actions.present[ACTION_TRACE_OUTPUT]) { + if (params->threshold_actions.present[ACTION_TRACE_OUTPUT] || + params->end_actions.present[ACTION_TRACE_OUTPUT]) { record = osnoise_init_trace_tool("timerlat"); if (!record) { err_msg("Failed to enable the trace instance\n"); goto out_free; } - params->actions.trace_output_inst = record->trace.inst; + params->threshold_actions.trace_output_inst = record->trace.inst; + params->end_actions.trace_output_inst = record->trace.inst; if (params->events) { retval = trace_events_enable(&record->trace, params->events); @@ -1201,7 +1215,7 @@ int timerlat_top_main(int argc, char *argv[]) * tracing while enabling other instances. The trace instance is the * one with most valuable information. */ - if (params->actions.present[ACTION_TRACE_OUTPUT]) + if (record) trace_instance_start(&record->trace); if (!params->no_aa) trace_instance_start(&aa->trace); @@ -1236,6 +1250,8 @@ int timerlat_top_main(int argc, char *argv[]) timerlat_print_stats(params, top); + actions_perform(¶ms->end_actions); + return_value = PASSED; if (osnoise_trace_is_off(top, record) && !stop_tracing) { @@ -1276,7 +1292,8 @@ int timerlat_top_main(int argc, char *argv[]) osnoise_destroy_tool(aa); osnoise_destroy_tool(record); osnoise_destroy_tool(top); - actions_destroy(¶ms->actions); + actions_destroy(¶ms->threshold_actions); + actions_destroy(¶ms->end_actions); if (params->mode != TRACING_MODE_TRACEFS) timerlat_bpf_destroy(); free(params); From 916a9c5b03a7694a7eae5420fbf2fd763395ff14 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 26 Jun 2025 14:34:02 +0200 Subject: [PATCH 1085/2411] rtla/tests: Check rtla output with grep Add argument to the check command in the test suite that takes a regular expression that the output of rtla command is checked against. This allows testing for specific information in rtla output in addition to checking the return value. Two minor improvements are included: running rtla with "eval" so that arguments with spaces can be passed to it via shell quotations, and the stdout of pushd and popd is suppressed to clean up the test output. Cc: John Kacur Cc: Luis Goncalves Cc: Arnaldo Carvalho de Melo Cc: Chang Yin Cc: Costa Shulyupin Cc: Crystal Wood Cc: Gabriele Monaco Link: https://lore.kernel.org/20250626123405.1496931-7-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/tests/engine.sh | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/tools/tracing/rtla/tests/engine.sh b/tools/tracing/rtla/tests/engine.sh index f2616a8e4179..64c5be4313de 100644 --- a/tools/tracing/rtla/tests/engine.sh +++ b/tools/tracing/rtla/tests/engine.sh @@ -11,7 +11,7 @@ test_begin() { reset_osnoise() { # Reset osnoise options to default and remove any dangling instances created # by improperly exited rtla runs. - pushd /sys/kernel/tracing || return 1 + pushd /sys/kernel/tracing >/dev/null || return 1 # Remove dangling instances created by previous rtla run echo 0 > tracing_thresh @@ -35,11 +35,14 @@ reset_osnoise() { echo 0 > stop_tracing_us echo 1000 > timerlat_period_us - popd + popd >/dev/null } check() { + test_name=$0 + tested_command=$1 expected_exitcode=${3:-0} + expected_output=$4 # Simple check: run rtla with given arguments and test exit code. # If TEST_COUNT is set, run the test. Otherwise, just count. ctr=$(($ctr + 1)) @@ -49,8 +52,16 @@ check() { [ "$NO_RESET_OSNOISE" == 1 ] || reset_osnoise # Run rtla; in case of failure, include its output as comment # in the test results. - result=$(stdbuf -oL $TIMEOUT "$RTLA" $2 2>&1); exitcode=$? - if [ $exitcode -eq $expected_exitcode ] + result=$(eval stdbuf -oL $TIMEOUT "$RTLA" $2 2>&1); exitcode=$? + # Test if the results matches if requested + if [ -n "$expected_output" ] + then + grep -E "$expected_output" <<< "$result" > /dev/null; grep_result=$? + else + grep_result=0 + fi + + if [ $exitcode -eq $expected_exitcode ] && [ $grep_result -eq 0 ] then echo "ok $ctr - $1" else @@ -58,6 +69,8 @@ check() { # Add rtla output and exit code as comments in case of failure echo "$result" | col -b | while read line; do echo "# $line"; done printf "#\n# exit code %s\n" $exitcode + [ -n "$expected_output" ] && \ + printf "# Output match failed: \"%s\"\n" "$expected_output" fi fi } From 4e26f84abfbbfa88a66f8a3b7e5ea9e494d3caf3 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 26 Jun 2025 14:34:03 +0200 Subject: [PATCH 1086/2411] rtla/tests: Add tests for actions Add a bunch of tests covering most of both --on-threshold and --on-end. Parts sensitive to implementation of hist/top are tested for both. Cc: John Kacur Cc: Luis Goncalves Cc: Arnaldo Carvalho de Melo Cc: Chang Yin Cc: Costa Shulyupin Cc: Crystal Wood Cc: Gabriele Monaco Link: https://lore.kernel.org/20250626123405.1496931-8-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/tests/timerlat.t | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/tracing/rtla/tests/timerlat.t b/tools/tracing/rtla/tests/timerlat.t index 579c12a85e8f..b354bacd78f8 100644 --- a/tools/tracing/rtla/tests/timerlat.t +++ b/tools/tracing/rtla/tests/timerlat.t @@ -18,6 +18,8 @@ fi for option in $no_bpf_options do export RTLA_NO_BPF=$option + +# Basic tests check "verify help page" \ "timerlat --help" check "verify -s/--stack" \ @@ -36,6 +38,32 @@ check "verify -c/--cpus" \ "timerlat hist -c 0 -d 30s" check "hist test in nanoseconds" \ "timerlat hist -i 2 -c 0 -n -d 30s" 2 + +# Actions tests +check "trace output through -t" \ + "timerlat hist -T 2 -t" 2 "^ Saving trace to timerlat_trace.txt$" +check "trace output through -t with custom filename" \ + "timerlat hist -T 2 -t custom_filename.txt" 2 "^ Saving trace to custom_filename.txt$" +check "trace output through -A trace" \ + "timerlat hist -T 2 --on-threshold trace" 2 "^ Saving trace to timerlat_trace.txt$" +check "trace output through -A trace with custom filename" \ + "timerlat hist -T 2 --on-threshold trace,file=custom_filename.txt" 2 "^ Saving trace to custom_filename.txt$" +check "exec command" \ + "timerlat hist -T 2 --on-threshold shell,command='echo TestOutput'" 2 "^TestOutput$" +check "multiple actions" \ + "timerlat hist -T 2 --on-threshold shell,command='echo -n 1' --on-threshold shell,command='echo 2'" 2 "^12$" +check "hist stop at failed action" \ + "timerlat hist -T 2 --on-threshold shell,command='echo -n 1; false' --on-threshold shell,command='echo -n 2'" 2 "^1# RTLA timerlat histogram$" +check "top stop at failed action" \ + "timerlat top -T 2 --on-threshold shell,command='echo -n 1; false' --on-threshold shell,command='echo -n 2'" 2 "^1ALL" +check "hist with continue" \ + "timerlat hist -T 2 -d 1s --on-threshold shell,command='echo TestOutput' --on-threshold continue" 0 "^TestOutput$" +check "top with continue" \ + "timerlat top -q -T 2 -d 1s --on-threshold shell,command='echo TestOutput' --on-threshold continue" 0 "^TestOutput$" +check "hist with trace output at end" \ + "timerlat hist -d 1s --on-end trace" 0 "^ Saving trace to timerlat_trace.txt$" +check "top with trace output at end" \ + "timerlat top -d 1s --on-end trace" 0 "^ Saving trace to timerlat_trace.txt$" done test_end From 04f837165b9480d6d6d8b00bbc1298762f3f0e4d Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 26 Jun 2025 14:34:04 +0200 Subject: [PATCH 1087/2411] rtla/tests: Limit duration to maximum of 10s Many of the original rtla tests included durations of 1 minute and 30 seconds. Experience has shown this is unnecessary, since 10 seconds as waiting time for samples to appear. Change duration of all rtla tests to at most 10 seconds. This speeds up testing significantly. Before: $ make check All tests successful. Files=3, Tests=54, 536 wallclock secs ( 0.03 usr 0.00 sys + 20.31 cusr 22.02 csys = 42.36 CPU) Result: PASS After: $ make check ... All tests successful. Files=3, Tests=54, 196 wallclock secs ( 0.03 usr 0.01 sys + 20.28 cusr 20.68 csys = 41.00 CPU) Result: PASS Cc: John Kacur Cc: Luis Goncalves Cc: Arnaldo Carvalho de Melo Cc: Chang Yin Cc: Costa Shulyupin Cc: Crystal Wood Cc: Gabriele Monaco Link: https://lore.kernel.org/20250626123405.1496931-9-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/tests/hwnoise.t | 8 ++++---- tools/tracing/rtla/tests/osnoise.t | 4 ++-- tools/tracing/rtla/tests/timerlat.t | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/tracing/rtla/tests/hwnoise.t b/tools/tracing/rtla/tests/hwnoise.t index 5f71401a139e..448877564b8d 100644 --- a/tools/tracing/rtla/tests/hwnoise.t +++ b/tools/tracing/rtla/tests/hwnoise.t @@ -10,12 +10,12 @@ check "verify help page" \ check "detect noise higher than one microsecond" \ "hwnoise -c 0 -T 1 -d 5s -q" check "set the automatic trace mode" \ - "hwnoise -a 5 -d 30s" 2 + "hwnoise -a 5 -d 10s" 2 check "set scheduling param to the osnoise tracer threads" \ - "hwnoise -P F:1 -c 0 -r 900000 -d 1M -q" + "hwnoise -P F:1 -c 0 -r 900000 -d 10s -q" check "stop the trace if a single sample is higher than 1 us" \ - "hwnoise -s 1 -T 1 -t -d 30s" 2 + "hwnoise -s 1 -T 1 -t -d 10s" 2 check "enable a trace event trigger" \ - "hwnoise -t -e osnoise:irq_noise trigger=\"hist:key=desc,duration:sort=desc,duration:vals=hitcount\" -d 1m" + "hwnoise -t -e osnoise:irq_noise trigger=\"hist:key=desc,duration:sort=desc,duration:vals=hitcount\" -d 10s" test_end diff --git a/tools/tracing/rtla/tests/osnoise.t b/tools/tracing/rtla/tests/osnoise.t index 44908fc01abf..6a4dfa31dc55 100644 --- a/tools/tracing/rtla/tests/osnoise.t +++ b/tools/tracing/rtla/tests/osnoise.t @@ -8,13 +8,13 @@ set_timeout 2m check "verify help page" \ "osnoise --help" check "verify the --priority/-P param" \ - "osnoise top -P F:1 -c 0 -r 900000 -d 1M -q" + "osnoise top -P F:1 -c 0 -r 900000 -d 10s -q" check "verify the --stop/-s param" \ "osnoise top -s 30 -T 1 -t" 2 check "verify the --trace param" \ "osnoise hist -s 30 -T 1 -t" 2 check "verify the --entries/-E param" \ - "osnoise hist -P F:1 -c 0 -r 900000 -d 1M -b 10 -E 25" + "osnoise hist -P F:1 -c 0 -r 900000 -d 10s -b 10 -E 25" # Test setting default period by putting an absurdly high period # and stopping on threshold. diff --git a/tools/tracing/rtla/tests/timerlat.t b/tools/tracing/rtla/tests/timerlat.t index b354bacd78f8..2d59ee199c4d 100644 --- a/tools/tracing/rtla/tests/timerlat.t +++ b/tools/tracing/rtla/tests/timerlat.t @@ -25,9 +25,9 @@ check "verify help page" \ check "verify -s/--stack" \ "timerlat top -s 3 -T 10 -t" 2 check "verify -P/--priority" \ - "timerlat top -P F:1 -c 0 -d 1M -q" + "timerlat top -P F:1 -c 0 -d 10s -q" check "test in nanoseconds" \ - "timerlat top -i 2 -c 0 -n -d 30s" 2 + "timerlat top -i 2 -c 0 -n -d 10s" 2 check "set the automatic trace mode" \ "timerlat top -a 5 --dump-tasks" 2 check "print the auto-analysis if hits the stop tracing condition" \ @@ -35,9 +35,9 @@ check "print the auto-analysis if hits the stop tracing condition" \ check "disable auto-analysis" \ "timerlat top -s 3 -T 10 -t --no-aa" 2 check "verify -c/--cpus" \ - "timerlat hist -c 0 -d 30s" + "timerlat hist -c 0 -d 10s" check "hist test in nanoseconds" \ - "timerlat hist -i 2 -c 0 -n -d 30s" 2 + "timerlat hist -i 2 -c 0 -n -d 10s" 2 # Actions tests check "trace output through -t" \ From 70165c78e31d84b4712cc535b1e0fa1674f1dab3 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 26 Jun 2025 14:34:05 +0200 Subject: [PATCH 1088/2411] Documentation/rtla: Add actions feature Document both --on-threshold and --on-end, with examples. Cc: John Kacur Cc: Luis Goncalves Cc: Arnaldo Carvalho de Melo Cc: Chang Yin Cc: Costa Shulyupin Cc: Crystal Wood Cc: Gabriele Monaco Link: https://lore.kernel.org/20250626123405.1496931-10-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- .../tools/rtla/common_timerlat_options.rst | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/Documentation/tools/rtla/common_timerlat_options.rst b/Documentation/tools/rtla/common_timerlat_options.rst index 10dc802f8d65..7854368f1827 100644 --- a/Documentation/tools/rtla/common_timerlat_options.rst +++ b/Documentation/tools/rtla/common_timerlat_options.rst @@ -55,3 +55,67 @@ Set timerlat to run without workload, waiting for the user to dispatch a per-cpu task that waits for a new period on the tracing/osnoise/per_cpu/cpu$ID/timerlat_fd. See linux/tools/rtla/sample/timerlat_load.py for an example of user-load code. + +**--on-threshold** *action* + + Defines an action to be executed when tracing is stopped on a latency threshold + specified by **-i/--irq** or **-T/--thread**. + + Multiple --on-threshold actions may be specified, and they will be executed in + the order they are provided. If any action fails, subsequent actions in the list + will not be executed. + + Supported actions are: + + - *trace[,file=]* + + Saves trace output, optionally taking a filename. Alternative to -t/--trace. + Note that nlike -t/--trace, specifying this multiple times will result in + the trace being saved multiple times. + + - *signal,num=,pid=* + + Sends signal to process. "parent" might be specified in place of pid to target + the parent process of rtla. + + - *shell,command=* + + Execute shell command. + + - *continue* + + Continue tracing after actions are executed instead of stopping. + + Example: + + $ rtla timerlat -T 20 --on-threshold trace + --on-threshold shell,command="grep ipi_send timerlat_trace.txt" + --on-threshold signal,num=2,pid=parent + + This will save a trace with the default filename "timerlat_trace.txt", print its + lines that contain the text "ipi_send" on standard output, and send signal 2 + (SIGINT) to the parent process. + + Performance Considerations: + + For time-sensitive actions, it is recommended to run **rtla timerlat** with BPF + support and RT priority. Note that due to implementational limitations, actions + might be delayed up to one second after tracing is stopped if BPF mode is not + available or disabled. + +**--on-end** *action* + + Defines an action to be executed at the end of **rtla timerlat** tracing. + + Multiple --on-end actions can be specified, and they will be executed in the order + they are provided. If any action fails, subsequent actions in the list will not be + executed. + + See the documentation for **--on-threshold** for the list of supported actions, with + the exception that *continue* has no effect. + + Example: + + $ rtla timerlat -d 5s --on-end trace + + This runs rtla timerlat with default options and save trace output at the end. From 963f1b20a8d2a098954606b9725cd54336a2a86c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 25 Jun 2025 00:39:33 -0700 Subject: [PATCH 1089/2411] parisc: Makefile: fix a typo in palo.conf Correct "objree" to "objtree". "objree" is not defined. Fixes: 75dd47472b92 ("kbuild: remove src and obj from the top Makefile") Signed-off-by: Randy Dunlap Cc: Masahiro Yamada Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: linux-parisc@vger.kernel.org Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v5.3+ --- arch/parisc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 21b8166a6883..9cd9aa3d16f2 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -139,7 +139,7 @@ palo lifimage: vmlinuz fi @if test ! -f "$(PALOCONF)"; then \ cp $(srctree)/arch/parisc/defpalo.conf $(objtree)/palo.conf; \ - echo 'A generic palo config file ($(objree)/palo.conf) has been created for you.'; \ + echo 'A generic palo config file ($(objtree)/palo.conf) has been created for you.'; \ echo 'You should check it and re-run "make palo".'; \ echo 'WARNING: the "lifimage" file is now placed in this directory by default!'; \ false; \ From 305ab0a748c52eeaeb01d8cff6408842d19e5cb5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 25 Jun 2025 00:30:54 -0700 Subject: [PATCH 1090/2411] parisc: Makefile: explain that 64BIT requires both 32-bit and 64-bit compilers For building a 64-bit kernel, both 32-bit and 64-bit VDSO binaries are built, so both 32-bit and 64-bit compilers (and tools) should be in the PATH environment variable. Signed-off-by: Randy Dunlap Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: linux-parisc@vger.kernel.org Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v5.3+ --- arch/parisc/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 9cd9aa3d16f2..48ae3c79557a 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -39,7 +39,9 @@ endif export LD_BFD -# Set default 32 bits cross compilers for vdso +# Set default 32 bits cross compilers for vdso. +# This means that for 64BIT, both the 64-bit tools and the 32-bit tools +# need to be in the path. CC_ARCHES_32 = hppa hppa2.0 hppa1.1 CC_SUFFIXES = linux linux-gnu unknown-linux-gnu suse-linux CROSS32_COMPILE := $(call cc-cross-prefix, \ From cb22f247f371bd206a88cf0e0c05d80b8b62fb26 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 21 Jul 2025 15:13:42 -0400 Subject: [PATCH 1091/2411] parisc: Update comments in make_insert_tlb The following testcase exposed a problem with our read access checks in get_user() and raw_copy_from_user(): #include #include #include #include #include #include #include #include #include int main(int argc, char **argv) { unsigned long page_size = sysconf(_SC_PAGESIZE); char *p = malloc(3 * page_size); char *p_aligned; /* initialize memory region. If not initialized, write syscall below will correctly return EFAULT. */ if (1) memset(p, 'X', 3 * page_size); p_aligned = (char *) ((((uintptr_t) p) + (2*page_size - 1)) & ~(page_size - 1)); /* Drop PROT_READ protection. Kernel and userspace should fault when accessing that memory region */ mprotect(p_aligned, page_size, PROT_NONE); /* the following write() should return EFAULT, since PROT_READ was dropped by previous mprotect() */ int ret = write(2, p_aligned, 1); if (!ret || errno != EFAULT) printf("\n FAILURE: write() did not returned expected EFAULT value\n"); return 0; } Because of the way _PAGE_READ is handled, kernel code never generates a read access fault when it access a page as the kernel privilege level is always less than PL1 in the PTE. This patch reworks the comments in the make_insert_tlb macro to try to make this clearer. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v5.12+ --- arch/parisc/kernel/entry.S | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index ea57bcc21dc5..f4bf61a34701 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -499,6 +499,12 @@ * this happens is quite subtle, read below */ .macro make_insert_tlb spc,pte,prot,tmp space_to_prot \spc \prot /* create prot id from space */ + +#if _PAGE_SPECIAL_BIT == _PAGE_DMB_BIT + /* need to drop DMB bit, as it's used as SPECIAL flag */ + depi 0,_PAGE_SPECIAL_BIT,1,\pte +#endif + /* The following is the real subtlety. This is depositing * T <-> _PAGE_REFTRAP * D <-> _PAGE_DIRTY @@ -511,17 +517,18 @@ * Finally, _PAGE_READ goes in the top bit of PL1 (so we * trigger an access rights trap in user space if the user * tries to read an unreadable page */ -#if _PAGE_SPECIAL_BIT == _PAGE_DMB_BIT - /* need to drop DMB bit, as it's used as SPECIAL flag */ - depi 0,_PAGE_SPECIAL_BIT,1,\pte -#endif depd \pte,8,7,\prot /* PAGE_USER indicates the page can be read with user privileges, * so deposit X1|11 to PL1|PL2 (remember the upper bit of PL1 - * contains _PAGE_READ) */ + * contains _PAGE_READ). While the kernel can't directly write + * user pages which have _PAGE_WRITE zero, it can read pages + * which have _PAGE_READ zero (PL <= PL1). Thus, the kernel + * exception fault handler doesn't trigger when reading pages + * that aren't user read accessible */ extrd,u,*= \pte,_PAGE_USER_BIT+32,1,%r0 depdi 7,11,3,\prot + /* If we're a gateway page, drop PL2 back to zero for promotion * to kernel privilege (so we can execute the page as kernel). * Any privilege promotion page always denys read and write */ From 91428ca9320edbab1211851d82429d33b9cd73ef Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 21 Jul 2025 15:39:26 -0400 Subject: [PATCH 1092/2411] parisc: Check region is readable by user in raw_copy_from_user() Because of the way the _PAGE_READ is handled in the parisc PTE, an access interruption is not generated when the kernel reads from a region where the _PAGE_READ is zero. The current code was written assuming read access faults would also occur in the kernel. This change adds user access checks to raw_copy_from_user(). The prober_user() define checks whether user code has read access to a virtual address. Note that page faults are not handled in the exception support for the probe instruction. For this reason, we precede the probe by a ldb access check. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v5.12+ --- arch/parisc/include/asm/special_insns.h | 28 +++++++++++++++++++++++++ arch/parisc/lib/memcpy.c | 19 ++++++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h index 51f40eaf7780..1013eeba31e5 100644 --- a/arch/parisc/include/asm/special_insns.h +++ b/arch/parisc/include/asm/special_insns.h @@ -32,6 +32,34 @@ pa; \ }) +/** + * prober_user() - Probe user read access + * @sr: Space regster. + * @va: Virtual address. + * + * Return: Non-zero if address is accessible. + * + * Due to the way _PAGE_READ is handled in TLB entries, we need + * a special check to determine whether a user address is accessible. + * The ldb instruction does the initial access check. If it is + * successful, the probe instruction checks user access rights. + */ +#define prober_user(sr, va) ({ \ + unsigned long read_allowed; \ + __asm__ __volatile__( \ + "copy %%r0,%0\n" \ + "8:\tldb 0(%%sr%1,%2),%%r0\n" \ + "\tproberi (%%sr%1,%2),%3,%0\n" \ + "9:\n" \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \ + "or %%r0,%%r0,%%r0") \ + : "=&r" (read_allowed) \ + : "i" (sr), "r" (va), "i" (PRIV_USER) \ + : "memory" \ + ); \ + read_allowed; \ +}) + #define CR_EIEM 15 /* External Interrupt Enable Mask */ #define CR_CR16 16 /* CR16 Interval Timer */ #define CR_EIRR 23 /* External Interrupt Request Register */ diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c index 5fc0c852c84c..69d65ffab312 100644 --- a/arch/parisc/lib/memcpy.c +++ b/arch/parisc/lib/memcpy.c @@ -12,6 +12,7 @@ #include #include #include +#include #define get_user_space() mfsp(SR_USER) #define get_kernel_space() SR_KERNEL @@ -32,9 +33,25 @@ EXPORT_SYMBOL(raw_copy_to_user); unsigned long raw_copy_from_user(void *dst, const void __user *src, unsigned long len) { + unsigned long start = (unsigned long) src; + unsigned long end = start + len; + unsigned long newlen = len; + mtsp(get_user_space(), SR_TEMP1); mtsp(get_kernel_space(), SR_TEMP2); - return pa_memcpy(dst, (void __force *)src, len); + + /* Check region is user accessible */ + if (start) + while (start < end) { + if (!prober_user(SR_TEMP1, start)) { + newlen = (start - (unsigned long) src); + break; + } + start += PAGE_SIZE; + /* align to page boundry which may have different permission */ + start = PAGE_ALIGN_DOWN(start); + } + return len - newlen + pa_memcpy(dst, (void __force *)src, newlen); } EXPORT_SYMBOL(raw_copy_from_user); From 52ce9406a9625c4498c4eaa51e7a7ed9dcb9db16 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 21 Jul 2025 15:56:04 -0400 Subject: [PATCH 1093/2411] parisc: Rename pte_needs_flush() to pte_needs_cache_flush() in cache.c The local name used in cache.c conflicts the declaration in include/asm-generic/tlb.h. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v5.12+ --- arch/parisc/kernel/cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index db531e58d70e..3b37a7e7abe4 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -429,7 +429,7 @@ static inline pte_t *get_ptep(struct mm_struct *mm, unsigned long addr) return ptep; } -static inline bool pte_needs_flush(pte_t pte) +static inline bool pte_needs_cache_flush(pte_t pte) { return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_NO_CACHE)) == (_PAGE_PRESENT | _PAGE_ACCESSED); @@ -630,7 +630,7 @@ static void flush_cache_page_if_present(struct vm_area_struct *vma, ptep = get_ptep(vma->vm_mm, vmaddr); if (ptep) { pte = ptep_get(ptep); - needs_flush = pte_needs_flush(pte); + needs_flush = pte_needs_cache_flush(pte); pte_unmap(ptep); } if (needs_flush) From 802e55488bc2cc1ab6423b720255a785ccac42ce Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 21 Jul 2025 16:06:21 -0400 Subject: [PATCH 1094/2411] parisc: Define and use set_pte_at() When a PTE is changed, we need to flush the PTE. set_pte_at() was lost in the folio update. PA-RISC version is the same as the generic version. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v5.12+ --- arch/parisc/include/asm/pgtable.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 1a86a4370b29..2c139a4dbf4b 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -276,7 +276,7 @@ extern unsigned long *empty_zero_page; #define pte_none(x) (pte_val(x) == 0) #define pte_present(x) (pte_val(x) & _PAGE_PRESENT) #define pte_user(x) (pte_val(x) & _PAGE_USER) -#define pte_clear(mm, addr, xp) set_pte(xp, __pte(0)) +#define pte_clear(mm, addr, xp) set_pte_at((mm), (addr), (xp), __pte(0)) #define pmd_flag(x) (pmd_val(x) & PxD_FLAG_MASK) #define pmd_address(x) ((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT) @@ -392,6 +392,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, } } #define set_ptes set_ptes +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1) /* Used for deferring calls to flush_dcache_page() */ @@ -456,7 +457,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned if (!pte_young(pte)) { return 0; } - set_pte(ptep, pte_mkold(pte)); + set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte)); return 1; } @@ -466,7 +467,7 @@ pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *pt struct mm_struct; static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - set_pte(ptep, pte_wrprotect(*ptep)); + set_pte_at(mm, addr, ptep, pte_wrprotect(*ptep)); } #define pte_same(A,B) (pte_val(A) == pte_val(B)) From f92a5e36b0c45cd12ac0d1bc44680c0dfae34543 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 21 Jul 2025 16:13:13 -0400 Subject: [PATCH 1095/2411] parisc: Try to fixup kernel exception in bad_area_nosemaphore path of do_page_fault() Signed-off-by: John David Anglin Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v5.12+ --- arch/parisc/mm/fault.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index c39de84e98b0..f1785640b049 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -363,6 +363,10 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, mmap_read_unlock(mm); bad_area_nosemaphore: + if (!user_mode(regs) && fixup_exception(regs)) { + return; + } + if (user_mode(regs)) { int signo, si_code; From 4eab1c27ce1f0e89ab67b01bf1e4e4c75215708a Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 21 Jul 2025 16:18:41 -0400 Subject: [PATCH 1096/2411] parisc: Drop WARN_ON_ONCE() from flush_cache_vmap I have observed warning to occassionally trigger. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v5.12+ --- arch/parisc/kernel/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 3b37a7e7abe4..37ca484cc495 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -841,7 +841,7 @@ void flush_cache_vmap(unsigned long start, unsigned long end) } vm = find_vm_area((void *)start); - if (WARN_ON_ONCE(!vm)) { + if (!vm) { flush_cache_all(); return; } From f6334f4ae9a4e962ba74b026e1d965dfdf8cbef8 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Fri, 25 Jul 2025 12:12:14 -0400 Subject: [PATCH 1097/2411] parisc: Revise gateway LWS calls to probe user read access We use load and stbys,e instructions to trigger memory reference interruptions without writing to memory. Because of the way read access support is implemented, read access interruptions are only triggered at privilege levels 2 and 3. The kernel and gateway page execute at privilege level 0, so this code never triggers a read access interruption. Thus, it is currently possible for user code to execute a LWS compare and swap operation at an address that is read protected at privilege level 3 (PRIV_USER). Fix this by probing read access rights at privilege level 3 and branching to lws_fault if access isn't allowed. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v5.12+ --- arch/parisc/kernel/syscall.S | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 0fa81bf1466b..f58c4bccfbce 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -613,6 +613,9 @@ lws_compare_and_swap32: lws_compare_and_swap: /* Trigger memory reference interruptions without writing to memory */ 1: ldw 0(%r26), %r28 + proberi (%r26), PRIV_USER, %r28 + comb,=,n %r28, %r0, lws_fault /* backwards, likely not taken */ + nop 2: stbys,e %r0, 0(%r26) /* Calculate 8-bit hash index from virtual address */ @@ -767,6 +770,9 @@ cas2_lock_start: copy %r26, %r28 depi_safe 0, 31, 2, %r28 10: ldw 0(%r28), %r1 + proberi (%r28), PRIV_USER, %r1 + comb,=,n %r1, %r0, lws_fault /* backwards, likely not taken */ + nop 11: stbys,e %r0, 0(%r28) /* Calculate 8-bit hash index from virtual address */ @@ -951,41 +957,47 @@ atomic_xchg_begin: /* 8-bit exchange */ 1: ldb 0(%r24), %r20 + proberi (%r24), PRIV_USER, %r20 + comb,=,n %r20, %r0, lws_fault /* backwards, likely not taken */ + nop copy %r23, %r20 depi_safe 0, 31, 2, %r20 b atomic_xchg_start 2: stbys,e %r0, 0(%r20) - nop - nop - nop /* 16-bit exchange */ 3: ldh 0(%r24), %r20 + proberi (%r24), PRIV_USER, %r20 + comb,=,n %r20, %r0, lws_fault /* backwards, likely not taken */ + nop copy %r23, %r20 depi_safe 0, 31, 2, %r20 b atomic_xchg_start 4: stbys,e %r0, 0(%r20) - nop - nop - nop /* 32-bit exchange */ 5: ldw 0(%r24), %r20 + proberi (%r24), PRIV_USER, %r20 + comb,=,n %r20, %r0, lws_fault /* backwards, likely not taken */ + nop b atomic_xchg_start 6: stbys,e %r0, 0(%r23) nop nop - nop - nop - nop /* 64-bit exchange */ #ifdef CONFIG_64BIT 7: ldd 0(%r24), %r20 + proberi (%r24), PRIV_USER, %r20 + comb,=,n %r20, %r0, lws_fault /* backwards, likely not taken */ + nop 8: stdby,e %r0, 0(%r23) #else 7: ldw 0(%r24), %r20 8: ldw 4(%r24), %r20 + proberi (%r24), PRIV_USER, %r20 + comb,=,n %r20, %r0, lws_fault /* backwards, likely not taken */ + nop copy %r23, %r20 depi_safe 0, 31, 2, %r20 9: stbys,e %r0, 0(%r20) From 89f686a0fb6e473a876a9a60a13aec67a62b9a7e Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Fri, 25 Jul 2025 13:51:32 -0400 Subject: [PATCH 1098/2411] parisc: Revise __get_user() to probe user read access Because of the way read access support is implemented, read access interruptions are only triggered at privilege levels 2 and 3. The kernel executes at privilege level 0, so __get_user() never triggers a read access interruption (code 26). Thus, it is currently possible for user code to access a read protected address via a system call. Fix this by probing read access rights at privilege level 3 (PRIV_USER) and setting __gu_err to -EFAULT (-14) if access isn't allowed. Note the cmpiclr instruction does a 32-bit compare because COND macro doesn't work inside asm. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v5.12+ --- arch/parisc/include/asm/uaccess.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 88d0ae5769dd..6c531d2c847e 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -42,9 +42,24 @@ __gu_err; \ }) -#define __get_user(val, ptr) \ -({ \ - __get_user_internal(SR_USER, val, ptr); \ +#define __probe_user_internal(sr, error, ptr) \ +({ \ + __asm__("\tproberi (%%sr%1,%2),%3,%0\n" \ + "\tcmpiclr,= 1,%0,%0\n" \ + "\tldi %4,%0\n" \ + : "=r"(error) \ + : "i"(sr), "r"(ptr), "i"(PRIV_USER), \ + "i"(-EFAULT)); \ +}) + +#define __get_user(val, ptr) \ +({ \ + register long __gu_err; \ + \ + __gu_err = __get_user_internal(SR_USER, val, ptr); \ + if (likely(!__gu_err)) \ + __probe_user_internal(SR_USER, __gu_err, ptr); \ + __gu_err; \ }) #define __get_user_asm(sr, val, ldx, ptr) \ From f468992936894c9ce3b1659cf38c230d33b77a16 Mon Sep 17 00:00:00 2001 From: Shankari Anand Date: Thu, 26 Jun 2025 00:36:54 +0530 Subject: [PATCH 1099/2411] kconfig: nconf: Ensure null termination where strncpy is used strncpy() does not guarantee null-termination if the source string is longer than the destination buffer. Ensure the buffer is explicitly null-terminated to prevent potential string overflows or undefined behavior. Signed-off-by: Shankari Anand Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap Tested-by: Nicolas Schier Acked-by: Nicolas Schier --- scripts/kconfig/nconf.c | 2 ++ scripts/kconfig/nconf.gui.c | 1 + 2 files changed, 3 insertions(+) diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c index c0b2dabf6c89..ae1fe5f60327 100644 --- a/scripts/kconfig/nconf.c +++ b/scripts/kconfig/nconf.c @@ -593,6 +593,8 @@ static void item_add_str(const char *fmt, ...) tmp_str, sizeof(k_menu_items[index].str)); + k_menu_items[index].str[sizeof(k_menu_items[index].str) - 1] = '\0'; + free_item(curses_menu_items[index]); curses_menu_items[index] = new_item( k_menu_items[index].str, diff --git a/scripts/kconfig/nconf.gui.c b/scripts/kconfig/nconf.gui.c index 4bfdf8ac2a9a..7206437e784a 100644 --- a/scripts/kconfig/nconf.gui.c +++ b/scripts/kconfig/nconf.gui.c @@ -359,6 +359,7 @@ int dialog_inputbox(WINDOW *main_window, x = (columns-win_cols)/2; strncpy(result, init, *result_len); + result[*result_len - 1] = '\0'; /* create the windows */ win = newwin(win_lines, win_cols, y, x); From 1f937cdf32689279297185be72751ae1c5566baf Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Jun 2025 20:06:12 -0700 Subject: [PATCH 1100/2411] docs: kconfig: add alldefconfig to the all*configs Add "alldefconfig" to the explanation of the KCONFIG_ALLCONFIG environment variable usage so that all targets that use KCONFIG_ALLCONFIG are listed. Signed-off-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- Documentation/kbuild/kconfig.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/kbuild/kconfig.rst b/Documentation/kbuild/kconfig.rst index fc4e845bc249..d213c4f599a4 100644 --- a/Documentation/kbuild/kconfig.rst +++ b/Documentation/kbuild/kconfig.rst @@ -67,12 +67,12 @@ Environment variables for ``*config``: with its value when saving the configuration, instead of using the default, ``CONFIG_``. -Environment variables for ``{allyes/allmod/allno/rand}config``: +Environment variables for ``{allyes/allmod/allno/alldef/rand}config``: ``KCONFIG_ALLCONFIG`` - The allyesconfig/allmodconfig/allnoconfig/randconfig variants can also - use the environment variable KCONFIG_ALLCONFIG as a flag or a filename - that contains config symbols that the user requires to be set to a + The allyesconfig/allmodconfig/alldefconfig/allnoconfig/randconfig variants + can also use the environment variable KCONFIG_ALLCONFIG as a flag or a + filename that contains config symbols that the user requires to be set to a specific value. If KCONFIG_ALLCONFIG is used without a filename where KCONFIG_ALLCONFIG == "" or KCONFIG_ALLCONFIG == "1", ``make *config`` checks for a file named "all{yes/mod/no/def/random}.config" From 0c82f50a06aa13e6fc29e17081094489d57745fd Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:43:27 +0900 Subject: [PATCH 1101/2411] kconfig: gconf: fix behavior of a menu under a symbol in split view A menu can be created under a symbol. [Example] menu "outer menu" config A bool "A" menu "inner menu" depends on A config B bool "B" endmenu endmenu After being re-parented by menu_finalize(), the menu tree is structured like follows: menu "outer menu" \-- A \-- menu "inner menu" \-- B In split view, the symbol A is shown in the right pane, so all of its descendants must also be shown there. This has never worked correctly. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 28c4b5b37448..7397a51641a7 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -803,7 +803,7 @@ static gboolean on_treeview2_button_press_event(GtkWidget *widget, enum prop_type ptype; ptype = menu->prompt ? menu->prompt->type : P_UNKNOWN; - if (ptype == P_MENU && view_mode != FULL_VIEW && col == COL_OPTION) { + if (ptype == P_MENU && view_mode == SINGLE_VIEW && col == COL_OPTION) { // goes down into menu browsed = menu; display_tree_part(); @@ -953,8 +953,7 @@ static void _display_tree(GtkTreeStore *tree, struct menu *menu, gtk_tree_store_append(tree, &iter, parent); set_node(tree, &iter, child); - if ((view_mode != FULL_VIEW) && (ptype == P_MENU) - && (tree == tree2)) + if ((view_mode == SINGLE_VIEW) && (ptype == P_MENU)) continue; /* if (((menu != &rootmenu) && !(menu->flags & MENU_ROOT)) From 06ba76dc825703fa61cee72c2ae66508ef5f10ec Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:43:28 +0900 Subject: [PATCH 1102/2411] kconfig: gconf: use configure-event handler to adjust pane separator The size-request event handler is currently used to adjust the position of the horizontal separator in the right pane. However, the size-request signal is not available in GTK 3. Use the configure-event signal instead. Signed-off-by: Masahiro Yamada Tested-by: Randy Dunlap --- scripts/kconfig/gconf.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 7397a51641a7..37eec7a6bf54 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -606,23 +606,12 @@ static void on_window1_destroy(GtkObject *object, gpointer user_data) gtk_main_quit(); } -static void on_window1_size_request(GtkWidget *widget, - GtkRequisition *requisition, - gpointer user_data) +static gboolean on_window1_configure(GtkWidget *self, + GdkEventConfigure *event, + gpointer user_data) { - static gint old_h; - gint w, h; - - if (widget->window == NULL) - gtk_window_get_default_size(GTK_WINDOW(main_wnd), &w, &h); - else - gdk_window_get_size(widget->window, &w, &h); - - if (h == old_h) - return; - old_h = h; - - gtk_paned_set_position(GTK_PANED(vpaned), 2 * h / 3); + gtk_paned_set_position(GTK_PANED(vpaned), 2 * event->height / 3); + return FALSE; } static gboolean on_window1_delete_event(GtkWidget *widget, GdkEvent *event, @@ -1023,8 +1012,8 @@ static void init_main_window(const gchar *glade_file) main_wnd = glade_xml_get_widget(xml, "window1"); g_signal_connect(main_wnd, "destroy", G_CALLBACK(on_window1_destroy), NULL); - g_signal_connect(main_wnd, "size_request", - G_CALLBACK(on_window1_size_request), NULL); + g_signal_connect(main_wnd, "configure-event", + G_CALLBACK(on_window1_configure), NULL); g_signal_connect(main_wnd, "delete_event", G_CALLBACK(on_window1_delete_event), NULL); From 894ad403439e54d3cdee77a538190dd08ae54789 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:43:29 +0900 Subject: [PATCH 1103/2411] kconfig: gconf: rename display_tree_part() This function recreates the tree store to update the menu content. Rename it to recreate_tree() to better reflect its purpose. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 37eec7a6bf54..05ee10f5f45b 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -57,7 +57,7 @@ enum { }; static void display_tree(GtkTreeStore *store, struct menu *menu); -static void display_tree_part(void); +static void recreate_tree(void); static void conf_changed(bool dirty) { @@ -327,7 +327,7 @@ static void set_view_mode(enum view_mode mode) browsed = menu_get_parent_menu(selected) ?: &rootmenu; else browsed = &rootmenu; - display_tree_part(); + recreate_tree(); text_insert_msg("", ""); select_menu(GTK_TREE_VIEW(tree2_w), selected); gtk_widget_set_sensitive(single_btn, FALSE); @@ -558,7 +558,7 @@ static void on_back_clicked(GtkButton *button, gpointer user_data) ptype = browsed->prompt ? browsed->prompt->type : P_UNKNOWN; if (ptype != P_MENU) browsed = browsed->parent; - display_tree_part(); + recreate_tree(); if (browsed == &rootmenu) gtk_widget_set_sensitive(back_btn, FALSE); @@ -795,7 +795,7 @@ static gboolean on_treeview2_button_press_event(GtkWidget *widget, if (ptype == P_MENU && view_mode == SINGLE_VIEW && col == COL_OPTION) { // goes down into menu browsed = menu; - display_tree_part(); + recreate_tree(); gtk_widget_set_sensitive(back_btn, TRUE); } else if (col == COL_OPTION) { toggle_sym_value(menu); @@ -900,7 +900,7 @@ static gboolean on_treeview1_button_press_event(GtkWidget *widget, if (menu->type == M_MENU) { browsed = menu; - display_tree_part(); + recreate_tree(); } gtk_tree_view_set_cursor(view, path, NULL, FALSE); @@ -961,8 +961,8 @@ static void display_tree(GtkTreeStore *store, struct menu *menu) _display_tree(store, menu, NULL); } -/* Display a part of the tree starting at current node (single/split view) */ -static void display_tree_part(void) +/* Recreate the tree store starting at 'browsed' node */ +static void recreate_tree(void) { gtk_tree_store_clear(tree2); display_tree(tree2, browsed); From e06030c1ae3299f71ae38ccbdd4ae0a2d0aa9189 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:43:30 +0900 Subject: [PATCH 1104/2411] kconfig: gconf: rename gconf.glade to gconf.ui The next commit will convert this file to GtkBuilder format. Rename it in advance to reflect the intended format. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/gconf.c | 6 +++--- scripts/kconfig/{gconf.glade => gconf.ui} | 0 2 files changed, 3 insertions(+), 3 deletions(-) rename scripts/kconfig/{gconf.glade => gconf.ui} (100%) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 05ee10f5f45b..8006cc547180 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -1327,11 +1327,11 @@ int main(int ac, char *av[]) /* Determine GUI path */ env = getenv(SRCTREE); if (env) - glade_file = g_strconcat(env, "/scripts/kconfig/gconf.glade", NULL); + glade_file = g_strconcat(env, "/scripts/kconfig/gconf.ui", NULL); else if (av[0][0] == '/') - glade_file = g_strconcat(av[0], ".glade", NULL); + glade_file = g_strconcat(av[0], ".ui", NULL); else - glade_file = g_strconcat(g_get_current_dir(), "/", av[0], ".glade", NULL); + glade_file = g_strconcat(g_get_current_dir(), "/", av[0], ".ui", NULL); /* Conf stuffs */ if (ac > 1 && av[1][0] == '-') { diff --git a/scripts/kconfig/gconf.glade b/scripts/kconfig/gconf.ui similarity index 100% rename from scripts/kconfig/gconf.glade rename to scripts/kconfig/gconf.ui From 9755d167bf51fad7091bd990f8d57006d6a60669 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:43:31 +0900 Subject: [PATCH 1105/2411] kconfig: gconf: migrate to GTK 3 This commit switches from GTK 2.x to GTK 3, applying the following necessary changes: - Do not include individual headers - GtkObject is gone - Convert Glade to GtkBuilder Link: https://docs.gtk.org/gtk3/migrating-2to3.html Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/gconf-cfg.sh | 11 +- scripts/kconfig/gconf.c | 70 ++++++------ scripts/kconfig/gconf.ui | 200 +++++++++++++++++------------------ 3 files changed, 135 insertions(+), 146 deletions(-) diff --git a/scripts/kconfig/gconf-cfg.sh b/scripts/kconfig/gconf-cfg.sh index fc954c0538fa..856c692f480c 100755 --- a/scripts/kconfig/gconf-cfg.sh +++ b/scripts/kconfig/gconf-cfg.sh @@ -6,7 +6,7 @@ set -eu cflags=$1 libs=$2 -PKG="gtk+-2.0 gmodule-2.0 libglade-2.0" +PKG=gtk+-3.0 if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then echo >&2 "*" @@ -18,18 +18,11 @@ fi if ! ${HOSTPKG_CONFIG} --exists $PKG; then echo >&2 "*" echo >&2 "* Unable to find the GTK+ installation. Please make sure that" - echo >&2 "* the GTK+ 2.0 development package is correctly installed." + echo >&2 "* the GTK 3 development package is correctly installed." echo >&2 "* You need $PKG" echo >&2 "*" exit 1 fi -if ! ${HOSTPKG_CONFIG} --atleast-version=2.0.0 gtk+-2.0; then - echo >&2 "*" - echo >&2 "* GTK+ is present but version >= 2.0.0 is required." - echo >&2 "*" - exit 1 -fi - ${HOSTPKG_CONFIG} --cflags ${PKG} > ${cflags} ${HOSTPKG_CONFIG} --libs ${PKG} > ${libs} diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 8006cc547180..313250d4fc53 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -7,10 +7,7 @@ #include "lkc.h" #include "images.h" -#include #include -#include -#include #include #include @@ -601,7 +598,7 @@ static void on_expand_clicked(GtkButton *button, gpointer user_data) /* Main Windows Callbacks */ -static void on_window1_destroy(GtkObject *object, gpointer user_data) +static void on_window1_destroy(GtkWidget *widget, gpointer user_data) { gtk_main_quit(); } @@ -1001,15 +998,15 @@ static void replace_button_icon(GtkWidget *widget, const char * const xpm[]) static void init_main_window(const gchar *glade_file) { - GladeXML *xml; + GtkBuilder *builder; GtkWidget *widget; GtkTextBuffer *txtbuf; - xml = glade_xml_new(glade_file, "window1", NULL); - if (!xml) + builder = gtk_builder_new_from_file(glade_file); + if (!builder) g_error("GUI loading failed !\n"); - main_wnd = glade_xml_get_widget(xml, "window1"); + main_wnd = GTK_WIDGET(gtk_builder_get_object(builder, "window1")); g_signal_connect(main_wnd, "destroy", G_CALLBACK(on_window1_destroy), NULL); g_signal_connect(main_wnd, "configure-event", @@ -1017,9 +1014,9 @@ static void init_main_window(const gchar *glade_file) g_signal_connect(main_wnd, "delete_event", G_CALLBACK(on_window1_delete_event), NULL); - hpaned = glade_xml_get_widget(xml, "hpaned1"); - vpaned = glade_xml_get_widget(xml, "vpaned1"); - tree1_w = glade_xml_get_widget(xml, "treeview1"); + hpaned = GTK_WIDGET(gtk_builder_get_object(builder, "hpaned1")); + vpaned = GTK_WIDGET(gtk_builder_get_object(builder, "vpaned1")); + tree1_w = GTK_WIDGET(gtk_builder_get_object(builder, "treeview1")); g_signal_connect(tree1_w, "cursor_changed", G_CALLBACK(on_treeview2_cursor_changed), NULL); g_signal_connect(tree1_w, "button_press_event", @@ -1027,7 +1024,7 @@ static void init_main_window(const gchar *glade_file) g_signal_connect(tree1_w, "key_press_event", G_CALLBACK(on_treeview2_key_press_event), NULL); - tree2_w = glade_xml_get_widget(xml, "treeview2"); + tree2_w = GTK_WIDGET(gtk_builder_get_object(builder, "treeview2")); g_signal_connect(tree2_w, "cursor_changed", G_CALLBACK(on_treeview2_cursor_changed), NULL); g_signal_connect(tree2_w, "button_press_event", @@ -1035,101 +1032,101 @@ static void init_main_window(const gchar *glade_file) g_signal_connect(tree2_w, "key_press_event", G_CALLBACK(on_treeview2_key_press_event), NULL); - text_w = glade_xml_get_widget(xml, "textview3"); + text_w = GTK_WIDGET(gtk_builder_get_object(builder, "textview3")); /* menubar */ - widget = glade_xml_get_widget(xml, "load1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "load1")); g_signal_connect(widget, "activate", G_CALLBACK(on_load1_activate), NULL); - save_menu_item = glade_xml_get_widget(xml, "save1"); + save_menu_item = GTK_WIDGET(gtk_builder_get_object(builder, "save1")); g_signal_connect(save_menu_item, "activate", G_CALLBACK(on_save_activate), NULL); - widget = glade_xml_get_widget(xml, "save_as1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "save_as1")); g_signal_connect(widget, "activate", G_CALLBACK(on_save_as1_activate), NULL); - widget = glade_xml_get_widget(xml, "quit1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "quit1")); g_signal_connect(widget, "activate", G_CALLBACK(on_quit1_activate), NULL); - widget = glade_xml_get_widget(xml, "show_name1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "show_name1")); g_signal_connect(widget, "activate", G_CALLBACK(on_show_name1_activate), NULL); gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, show_name); - widget = glade_xml_get_widget(xml, "show_range1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "show_range1")); g_signal_connect(widget, "activate", G_CALLBACK(on_show_range1_activate), NULL); gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, show_range); - widget = glade_xml_get_widget(xml, "show_data1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "show_data1")); g_signal_connect(widget, "activate", G_CALLBACK(on_show_data1_activate), NULL); gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, show_value); - widget = glade_xml_get_widget(xml, "set_option_mode1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "set_option_mode1")); g_signal_connect(widget, "activate", G_CALLBACK(on_set_option_mode1_activate), NULL); - widget = glade_xml_get_widget(xml, "set_option_mode2"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "set_option_mode2")); g_signal_connect(widget, "activate", G_CALLBACK(on_set_option_mode2_activate), NULL); - widget = glade_xml_get_widget(xml, "set_option_mode3"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "set_option_mode3")); g_signal_connect(widget, "activate", G_CALLBACK(on_set_option_mode3_activate), NULL); - widget = glade_xml_get_widget(xml, "introduction1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "introduction1")); g_signal_connect(widget, "activate", G_CALLBACK(on_introduction1_activate), NULL); - widget = glade_xml_get_widget(xml, "about1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "about1")); g_signal_connect(widget, "activate", G_CALLBACK(on_about1_activate), NULL); - widget = glade_xml_get_widget(xml, "license1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "license1")); g_signal_connect(widget, "activate", G_CALLBACK(on_license1_activate), NULL); /* toolbar */ - back_btn = glade_xml_get_widget(xml, "button1"); + back_btn = GTK_WIDGET(gtk_builder_get_object(builder, "button1")); g_signal_connect(back_btn, "clicked", G_CALLBACK(on_back_clicked), NULL); gtk_widget_set_sensitive(back_btn, FALSE); - widget = glade_xml_get_widget(xml, "button2"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "button2")); g_signal_connect(widget, "clicked", G_CALLBACK(on_load_clicked), NULL); - save_btn = glade_xml_get_widget(xml, "button3"); + save_btn = GTK_WIDGET(gtk_builder_get_object(builder, "button3")); g_signal_connect(save_btn, "clicked", G_CALLBACK(on_save_clicked), NULL); - single_btn = glade_xml_get_widget(xml, "button4"); + single_btn = GTK_WIDGET(gtk_builder_get_object(builder, "button4")); g_signal_connect(single_btn, "clicked", G_CALLBACK(on_single_clicked), NULL); replace_button_icon(single_btn, xpm_single_view); - split_btn = glade_xml_get_widget(xml, "button5"); + split_btn = GTK_WIDGET(gtk_builder_get_object(builder, "button5")); g_signal_connect(split_btn, "clicked", G_CALLBACK(on_split_clicked), NULL); replace_button_icon(split_btn, xpm_split_view); - full_btn = glade_xml_get_widget(xml, "button6"); + full_btn = GTK_WIDGET(gtk_builder_get_object(builder, "button6")); g_signal_connect(full_btn, "clicked", G_CALLBACK(on_full_clicked), NULL); replace_button_icon(full_btn, xpm_tree_view); - widget = glade_xml_get_widget(xml, "button7"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "button7")); g_signal_connect(widget, "clicked", G_CALLBACK(on_collapse_clicked), NULL); - widget = glade_xml_get_widget(xml, "button8"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "button8")); g_signal_connect(widget, "clicked", G_CALLBACK(on_expand_clicked), NULL); @@ -1144,7 +1141,9 @@ static void init_main_window(const gchar *glade_file) gtk_window_set_title(GTK_WINDOW(main_wnd), rootmenu.prompt->text); - gtk_widget_show(main_wnd); + gtk_widget_show_all(main_wnd); + + g_object_unref(builder); conf_set_changed_callback(conf_changed); } @@ -1322,7 +1321,6 @@ int main(int ac, char *av[]) /* GTK stuffs */ gtk_init(&ac, &av); - glade_init(); /* Determine GUI path */ env = getenv(SRCTREE); diff --git a/scripts/kconfig/gconf.ui b/scripts/kconfig/gconf.ui index cd714e64cff1..de20a9143aa8 100644 --- a/scripts/kconfig/gconf.ui +++ b/scripts/kconfig/gconf.ui @@ -1,8 +1,8 @@ - + - + True Gtk Kernel Configurator GTK_WINDOW_TOPLEVEL @@ -19,193 +19,193 @@ GDK_GRAVITY_NORTH_WEST - + True False 0 - + True - + True _File True - - + + - + True Load a config file _Load True - + - + True Save the config in .config _Save True - + - + True Save the config in a file Save _as True - + - + True - + - + True _Quit True - + - + - + - + True _Options True - - + + - + True Show name Show _name True False - + - + True Show range (Y/M/N) Show _range True False - + - + True Show value of the option Show _data True False - + - + True - + - + True Show normal options Show normal options True True - + - + True Show all options Show all _options True False set_option_mode1 - + - + True Show all options with prompts Show all prompt options True False set_option_mode1 - + - + - + - + True _Help True - - + + - + True _Introduction True - + - + True _About True - + - + True _License True - + - + - + - + 0 False @@ -214,14 +214,14 @@ - + True GTK_ORIENTATION_HORIZONTAL GTK_TOOLBAR_BOTH True - + True Goes up one level (single view) Back @@ -230,7 +230,7 @@ True True False - + False True @@ -238,18 +238,18 @@ - + True True True False - + True - + - + False False @@ -257,7 +257,7 @@ - + True Load a config file Load @@ -266,7 +266,7 @@ True True False - + False True @@ -274,7 +274,7 @@ - + True Save a config file Save @@ -283,7 +283,7 @@ True True False - + False True @@ -291,18 +291,18 @@ - + True True True False - + True - + - + False False @@ -310,7 +310,7 @@ - + True Single view Single @@ -319,7 +319,7 @@ True True False - + False True @@ -327,7 +327,7 @@ - + True Split view Split @@ -336,7 +336,7 @@ True True False - + False True @@ -344,7 +344,7 @@ - + True Full view Full @@ -353,7 +353,7 @@ True True False - + False True @@ -361,18 +361,18 @@ - + True True True False - + True - + - + False False @@ -380,7 +380,7 @@ - + True Collapse the whole tree in the right frame Collapse @@ -389,7 +389,7 @@ True True False - + False True @@ -397,7 +397,7 @@ - + True Expand the whole tree in the right frame Expand @@ -406,13 +406,13 @@ True True False - + False True - + 0 False @@ -421,14 +421,13 @@ - + 1 True True - 0 - + True GTK_POLICY_AUTOMATIC GTK_POLICY_AUTOMATIC @@ -436,16 +435,16 @@ GTK_CORNER_TOP_LEFT - + True True True False False False - + - + True False @@ -453,13 +452,12 @@ - + True True - 0 - + True GTK_POLICY_AUTOMATIC GTK_POLICY_AUTOMATIC @@ -467,7 +465,7 @@ GTK_CORNER_TOP_LEFT - + True True True @@ -475,9 +473,9 @@ False False False - + - + True False @@ -485,7 +483,7 @@ - + True GTK_POLICY_NEVER GTK_POLICY_AUTOMATIC @@ -493,7 +491,7 @@ GTK_CORNER_TOP_LEFT - + True True False @@ -508,29 +506,29 @@ 0 0 0 - + - + True True - + True True - + 0 True True - + - + - + From df889fdbb8d4243504eba94e1c3a809a4996a219 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:43:32 +0900 Subject: [PATCH 1106/2411] kconfig: gconf: replace GtkVbox with GtkBox GtkVBox is deprecated with GTK 3.2. [1] Use GtkBox instead. [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/3.2.0/gtk/gtkvbox.c#L47 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.ui | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/gconf.ui b/scripts/kconfig/gconf.ui index de20a9143aa8..806ea3d1bac4 100644 --- a/scripts/kconfig/gconf.ui +++ b/scripts/kconfig/gconf.ui @@ -19,7 +19,8 @@ GDK_GRAVITY_NORTH_WEST - + + vertical True False 0 From d6f0b652d9b54af5a9cf3e926ecfba81c28e1fc4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:43:33 +0900 Subject: [PATCH 1107/2411] kconfig: gconf: replace GdkColor with GdkRGBA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GdkColor is deprecated with GTK 3.14. [1] Use GdkRGBA instead. This fixes warnings such as: scripts/kconfig/gconf.c: In function ‘set_node’: scripts/kconfig/gconf.c:138:9: warning: ‘gdk_color_parse’ is deprecated: Use 'gdk_rgba_parse' instead [-Wdeprecated-declarations] 138 | gdk_color_parse(menu_is_visible(menu) ? "Black" : "DarkGray", &color); | ^~~~~~~~~~~~~~~ [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/3.14.0/gdk/deprecated/gdkcolor.h#L52 Signed-off-by: Masahiro Yamada Tested-by: Randy Dunlap --- scripts/kconfig/gconf.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 313250d4fc53..f4c2b07e0207 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -176,7 +176,7 @@ static void set_node(GtkTreeStore *tree, GtkTreeIter *node, struct menu *menu) const gchar *_mod = ""; const gchar *_yes = ""; const gchar *value = ""; - GdkColor color; + GdkRGBA color; gboolean editable = FALSE; gboolean btnvis = FALSE; @@ -186,7 +186,7 @@ static void set_node(GtkTreeStore *tree, GtkTreeIter *node, struct menu *menu) menu->type == M_COMMENT ? "***" : "", sym && !sym_has_value(sym) ? "(NEW)" : ""); - gdk_color_parse(menu_is_visible(menu) ? "Black" : "DarkGray", &color); + gdk_rgba_parse(&color, menu_is_visible(menu) ? "Black" : "DarkGray"); if (!sym) goto set; @@ -1174,7 +1174,7 @@ static void init_left_tree(void) G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_POINTER, GDK_TYPE_COLOR, + G_TYPE_POINTER, GDK_TYPE_RGBA, G_TYPE_BOOLEAN, GDK_TYPE_PIXBUF, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, @@ -1205,7 +1205,7 @@ static void init_left_tree(void) gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), renderer, "text", COL_OPTION, - "foreground-gdk", + "foreground-rgba", COL_COLOR, NULL); sel = gtk_tree_view_get_selection(view); @@ -1225,7 +1225,7 @@ static void init_right_tree(void) G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_POINTER, GDK_TYPE_COLOR, + G_TYPE_POINTER, GDK_TYPE_RGBA, G_TYPE_BOOLEAN, GDK_TYPE_PIXBUF, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, @@ -1263,32 +1263,32 @@ static void init_right_tree(void) gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), renderer, "text", COL_OPTION, - "foreground-gdk", + "foreground-rgba", COL_COLOR, NULL); renderer = gtk_cell_renderer_text_new(); gtk_tree_view_insert_column_with_attributes(view, -1, "Name", renderer, "text", COL_NAME, - "foreground-gdk", + "foreground-rgba", COL_COLOR, NULL); renderer = gtk_cell_renderer_text_new(); gtk_tree_view_insert_column_with_attributes(view, -1, "N", renderer, "text", COL_NO, - "foreground-gdk", + "foreground-rgba", COL_COLOR, NULL); renderer = gtk_cell_renderer_text_new(); gtk_tree_view_insert_column_with_attributes(view, -1, "M", renderer, "text", COL_MOD, - "foreground-gdk", + "foreground-rgba", COL_COLOR, NULL); renderer = gtk_cell_renderer_text_new(); gtk_tree_view_insert_column_with_attributes(view, -1, "Y", renderer, "text", COL_YES, - "foreground-gdk", + "foreground-rgba", COL_COLOR, NULL); renderer = gtk_cell_renderer_text_new(); gtk_tree_view_insert_column_with_attributes(view, -1, @@ -1296,7 +1296,7 @@ static void init_right_tree(void) "text", COL_VALUE, "editable", COL_EDIT, - "foreground-gdk", + "foreground-rgba", COL_COLOR, NULL); g_signal_connect(G_OBJECT(renderer), "edited", G_CALLBACK(renderer_edited), tree2_w); From bfa7375c10dfabf6b3289041c12d698861277d90 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:43:34 +0900 Subject: [PATCH 1108/2411] kconfig: gconf: replace GtkHPaned and GtkVPaned with GtkPaned GtkHPaned and GtkVPaned are deprecated with GTK 3.2. [1] [2] Use GtkPaned instead. [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/3.2.0/gtk/gtkhpaned.c#L44 [2]: https://gitlab.gnome.org/GNOME/gtk/-/blob/3.2.0/gtk/gtkvpaned.c#L44 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.ui | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/gconf.ui b/scripts/kconfig/gconf.ui index 806ea3d1bac4..c37807e8b782 100644 --- a/scripts/kconfig/gconf.ui +++ b/scripts/kconfig/gconf.ui @@ -422,7 +422,7 @@ - + 1 True True @@ -453,7 +453,8 @@ - + + vertical True True From 65056488e8bfaf6626cd2bba9fa847b264d9fefc Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:43:35 +0900 Subject: [PATCH 1109/2411] kconfig: gconf: show GTK version in About dialog Likewise xconfig, it is useful to display the GTK version in the About dialog. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/gconf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index f4c2b07e0207..7725d2c9d92a 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -525,7 +525,11 @@ static void on_about1_activate(GtkMenuItem *menuitem, gpointer user_data) dialog = gtk_message_dialog_new(GTK_WINDOW(main_wnd), GTK_DIALOG_DESTROY_WITH_PARENT, GTK_MESSAGE_INFO, - GTK_BUTTONS_CLOSE, "%s", about_text); + GTK_BUTTONS_CLOSE, "%s\nGTK version: %d.%d.%d", + about_text, + gtk_get_major_version(), + gtk_get_minor_version(), + gtk_get_micro_version()); gtk_dialog_run(GTK_DIALOG(dialog)); gtk_widget_destroy(dialog); } From 263e70bc42862af18dce43393ef14277827a0c7f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:48:31 +0900 Subject: [PATCH 1110/2411] kconfig: add a function to dump all menu entries in a tree-like format This is useful for debugging purposes. menu_finalize() re-parents menu entries, and this function can be used to dump the final structure of the menu tree. Signed-off-by: Masahiro Yamada --- scripts/kconfig/lkc.h | 1 + scripts/kconfig/menu.c | 74 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h index 37b606c74bff..56548efc14d7 100644 --- a/scripts/kconfig/lkc.h +++ b/scripts/kconfig/lkc.h @@ -102,6 +102,7 @@ struct menu *menu_get_menu_or_parent_menu(struct menu *menu); int get_jump_key_char(void); struct gstr get_relations_str(struct symbol **sym_arr, struct list_head *head); void menu_get_ext_help(struct menu *menu, struct gstr *help); +void menu_dump(void); /* symbol.c */ void sym_clear_all_valid(void); diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index a5e5b4fdcd93..0f1a6513987c 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -788,3 +788,77 @@ void menu_get_ext_help(struct menu *menu, struct gstr *help) if (sym) get_symbol_str(help, sym, NULL); } + +/** + * menu_dump - dump all menu entries in a tree-like format + */ +void menu_dump(void) +{ + struct menu *menu = &rootmenu; + unsigned long long bits = 0; + int indent = 0; + + while (menu) { + + for (int i = indent - 1; i >= 0; i--) { + if (bits & (1ULL << i)) { + if (i > 0) + printf("| "); + else + printf("|-- "); + } else { + if (i > 0) + printf(" "); + else + printf("`-- "); + } + } + + switch (menu->type) { + case M_CHOICE: + printf("choice \"%s\"\n", menu->prompt->text); + break; + case M_COMMENT: + printf("comment \"%s\"\n", menu->prompt->text); + break; + case M_IF: + printf("if\n"); + break; + case M_MENU: + printf("menu \"%s\"", menu->prompt->text); + if (!menu->sym) { + printf("\n"); + break; + } + printf(" + "); + /* fallthrough */ + case M_NORMAL: + printf("symbol %s\n", menu->sym->name); + break; + } + if (menu->list) { + bits <<= 1; + menu = menu->list; + if (menu->next) + bits |= 1; + else + bits &= ~1; + indent++; + continue; + } + + while (menu && !menu->next) { + menu = menu->parent; + bits >>= 1; + indent--; + } + + if (menu) { + menu = menu->next; + if (menu->next) + bits |= 1; + else + bits &= ~1; + } + } +} From 721bfe583c52ba1ea74b3736a31a9dcfe6dd6d95 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:48:56 +0900 Subject: [PATCH 1111/2411] kconfig: qconf: fix ConfigList::updateListAllforAll() ConfigList::updateListForAll() and ConfigList::updateListAllforAll() are identical. Commit f9b918fae678 ("kconfig: qconf: move ConfigView::updateList(All) to ConfigList class") was a misconversion. Fixes: f9b918fae678 ("kconfig: qconf: move ConfigView::updateList(All) to ConfigList class") Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index dc056b0a8fde..a7c98bbbd8ac 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -486,7 +486,7 @@ void ConfigList::updateListAllForAll() while (it.hasNext()) { ConfigList *list = it.next(); - list->updateList(); + list->updateListAll(); } } From 87433e3e06a6b6a78a541b6ac39000f41779a882 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:50:35 +0900 Subject: [PATCH 1112/2411] kconfig: qconf: confine {begin,end}Group to constructor and destructor Call beginGroup() in the the constructor and endGroup() in the destructor. This looks cleaner. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 8 ++++++-- scripts/kconfig/qconf.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index a7c98bbbd8ac..f8992db1870a 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -37,6 +37,12 @@ QAction *ConfigMainWindow::saveAction; ConfigSettings::ConfigSettings() : QSettings("kernel.org", "qconf") { + beginGroup("/kconfig/qconf"); +} + +ConfigSettings::~ConfigSettings() +{ + endGroup(); } /** @@ -1829,7 +1835,6 @@ int main(int ac, char** av) configApp = new QApplication(ac, av); configSettings = new ConfigSettings(); - configSettings->beginGroup("/kconfig/qconf"); v = new ConfigMainWindow(); //zconfdump(stdout); @@ -1837,7 +1842,6 @@ int main(int ac, char** av) v->show(); configApp->exec(); - configSettings->endGroup(); delete configSettings; delete v; delete configApp; diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index 62ab3286d04f..ab4e51f12914 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -24,6 +24,7 @@ class ConfigMainWindow; class ConfigSettings : public QSettings { public: ConfigSettings(); + ~ConfigSettings(void); QList readSizes(const QString& key, bool *ok); bool writeSizes(const QString& key, const QList& value); }; From e06aa69de21b6de2ef83f559768a4005114f5661 Mon Sep 17 00:00:00 2001 From: Giuliano Procida Date: Tue, 1 Jul 2025 16:19:10 +0100 Subject: [PATCH 1113/2411] gendwarfksyms: use preferred form of sizeof for allocation The preferred form is to use the variable being allocated to, rather than explicitly supplying a type name which might become stale. Also do this for memset. Suggested-by: Masahiro Yamada Signed-off-by: Giuliano Procida Reviewed-by: Sami Tolvanen Signed-off-by: Masahiro Yamada --- scripts/gendwarfksyms/cache.c | 2 +- scripts/gendwarfksyms/die.c | 4 ++-- scripts/gendwarfksyms/dwarf.c | 2 +- scripts/gendwarfksyms/kabi.c | 2 +- scripts/gendwarfksyms/symbols.c | 2 +- scripts/gendwarfksyms/types.c | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/gendwarfksyms/cache.c b/scripts/gendwarfksyms/cache.c index c9c19b86a686..1c640db93db3 100644 --- a/scripts/gendwarfksyms/cache.c +++ b/scripts/gendwarfksyms/cache.c @@ -15,7 +15,7 @@ void cache_set(struct cache *cache, unsigned long key, int value) { struct cache_item *ci; - ci = xmalloc(sizeof(struct cache_item)); + ci = xmalloc(sizeof(*ci)); ci->key = key; ci->value = value; hash_add(cache->cache, &ci->hash, hash_32(key)); diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c index 6183bbbe7b54..052f7a3f975a 100644 --- a/scripts/gendwarfksyms/die.c +++ b/scripts/gendwarfksyms/die.c @@ -33,7 +33,7 @@ static struct die *create_die(Dwarf_Die *die, enum die_state state) { struct die *cd; - cd = xmalloc(sizeof(struct die)); + cd = xmalloc(sizeof(*cd)); init_die(cd); cd->addr = (uintptr_t)die->addr; @@ -123,7 +123,7 @@ static struct die_fragment *append_item(struct die *cd) { struct die_fragment *df; - df = xmalloc(sizeof(struct die_fragment)); + df = xmalloc(sizeof(*df)); df->type = FRAGMENT_EMPTY; list_add_tail(&df->list, &cd->fragments); return df; diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c index 13ea7bf1ae7d..3538a7d9cb07 100644 --- a/scripts/gendwarfksyms/dwarf.c +++ b/scripts/gendwarfksyms/dwarf.c @@ -634,7 +634,7 @@ static int get_union_kabi_status(Dwarf_Die *die, Dwarf_Die *placeholder, * Note that the user of this feature is responsible for ensuring * that the structure actually remains ABI compatible. */ - memset(&state.kabi, 0, sizeof(struct kabi_state)); + memset(&state.kabi, 0, sizeof(state.kabi)); res = checkp(process_die_container(&state, NULL, die, check_union_member_kabi_status, diff --git a/scripts/gendwarfksyms/kabi.c b/scripts/gendwarfksyms/kabi.c index b3ade713778f..e3c2a3ccf51a 100644 --- a/scripts/gendwarfksyms/kabi.c +++ b/scripts/gendwarfksyms/kabi.c @@ -228,7 +228,7 @@ void kabi_read_rules(int fd) if (type == KABI_RULE_TYPE_UNKNOWN) error("unsupported kABI rule type: '%s'", field); - rule = xmalloc(sizeof(struct rule)); + rule = xmalloc(sizeof(*rule)); rule->type = type; rule->target = xstrdup(get_rule_field(&rule_str, &left)); diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c index 327f87389c34..35ed594f0749 100644 --- a/scripts/gendwarfksyms/symbols.c +++ b/scripts/gendwarfksyms/symbols.c @@ -146,7 +146,7 @@ void symbol_read_exports(FILE *file) continue; } - sym = xcalloc(1, sizeof(struct symbol)); + sym = xcalloc(1, sizeof(*sym)); sym->name = name; sym->addr.section = SHN_UNDEF; sym->state = SYMBOL_UNPROCESSED; diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c index 7bd459ea6c59..5344c7b9a9ce 100644 --- a/scripts/gendwarfksyms/types.c +++ b/scripts/gendwarfksyms/types.c @@ -43,7 +43,7 @@ static int type_list_append(struct list_head *list, const char *s, void *owned) if (!s) return 0; - entry = xmalloc(sizeof(struct type_list_entry)); + entry = xmalloc(sizeof(*entry)); entry->str = s; entry->owned = owned; list_add_tail(&entry->list, list); @@ -120,7 +120,7 @@ static struct type_expansion *type_map_add(const char *name, struct type_expansion *e; if (__type_map_get(name, &e)) { - e = xmalloc(sizeof(struct type_expansion)); + e = xmalloc(sizeof(*e)); type_expansion_init(e); e->name = xstrdup(name); From d8f26717c901b7ec88c3151988fe70ecaed990b8 Mon Sep 17 00:00:00 2001 From: Giuliano Procida Date: Tue, 1 Jul 2025 16:19:11 +0100 Subject: [PATCH 1114/2411] gendwarfksyms: order -T symtypes output by name When writing symtypes information, we iterate through the entire hash table containing type expansions. The key order varies unpredictably as new entries are added, making it harder to compare symtypes between builds. Resolve this by sorting the type expansions by name before output. Signed-off-by: Giuliano Procida Acked-by: Greg Kroah-Hartman Reviewed-by: Sami Tolvanen Signed-off-by: Masahiro Yamada --- scripts/gendwarfksyms/types.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c index 5344c7b9a9ce..9c3b053bf061 100644 --- a/scripts/gendwarfksyms/types.c +++ b/scripts/gendwarfksyms/types.c @@ -6,6 +6,8 @@ #define _GNU_SOURCE #include #include +#include +#include #include #include "gendwarfksyms.h" @@ -179,20 +181,41 @@ static int type_map_get(const char *name, struct type_expansion **res) return -1; } +static int cmp_expansion_name(const void *p1, const void *p2) +{ + struct type_expansion *const *e1 = p1; + struct type_expansion *const *e2 = p2; + + return strcmp((*e1)->name, (*e2)->name); +} + static void type_map_write(FILE *file) { struct type_expansion *e; struct hlist_node *tmp; + struct type_expansion **es; + size_t count = 0; + size_t i = 0; if (!file) return; - hash_for_each_safe(type_map, e, tmp, hash) { - checkp(fputs(e->name, file)); + hash_for_each_safe(type_map, e, tmp, hash) + ++count; + es = xmalloc(count * sizeof(*es)); + hash_for_each_safe(type_map, e, tmp, hash) + es[i++] = e; + + qsort(es, count, sizeof(*es), cmp_expansion_name); + + for (i = 0; i < count; ++i) { + checkp(fputs(es[i]->name, file)); checkp(fputs(" ", file)); - type_list_write(&e->expanded, file); + type_list_write(&es[i]->expanded, file); checkp(fputs("\n", file)); } + + free(es); } static void type_map_free(void) From b9f75396ec107628cc5f52fb6e055c1c9dc68401 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Mon, 14 Jul 2025 17:29:23 -0500 Subject: [PATCH 1115/2411] scripts: add zboot support to extract-vmlinux Zboot compressed kernel images are used for arm64 kernels on various distros. extract-vmlinux fails with those kernels because the wrapped image is another PE. While this could be a bit confusing, the tools primary purpose of unwrapping and decompressing the contained kernel image makes it the obvious place for this functionality. Add a 'file' check in check_vmlinux() that detects a contained PE image before trying readelf. Recent (FILES_39, Jun/2020) file implementations output something like: "Linux kernel ARM64 boot executable Image, little-endian, 4K pages" Which is also a stronger statement than readelf provides so drop that part of the comment. At the same time this means that kernel images which don't appear to contain a compressed image will be returned rather than reporting an error. Which matches the behavior for existing ELF files. The extracted PE image can then be inspected, or used as would any other kernel PE. Signed-off-by: Jeremy Linton Signed-off-by: Masahiro Yamada --- scripts/extract-vmlinux | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/scripts/extract-vmlinux b/scripts/extract-vmlinux index 8995cd304e6e..189956b5a5c8 100755 --- a/scripts/extract-vmlinux +++ b/scripts/extract-vmlinux @@ -12,13 +12,12 @@ check_vmlinux() { - # Use readelf to check if it's a valid ELF - # TODO: find a better to way to check that it's really vmlinux - # and not just an elf - readelf -h $1 > /dev/null 2>&1 || return 1 - - cat $1 - exit 0 + if file "$1" | grep -q 'Linux kernel.*boot executable' || + readelf -h "$1" > /dev/null 2>&1 + then + cat "$1" + exit 0 + fi } try_decompress() From 4668619092554e1b95c9a5ac2941ca47ba6d548a Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Tue, 15 Jul 2025 16:36:07 -0500 Subject: [PATCH 1116/2411] PCI: pnv_php: Clean up allocated IRQs on unplug When the root of a nested PCIe bridge configuration is unplugged, the pnv_php driver leaked the allocated IRQ resources for the child bridges' hotplug event notifications, resulting in a panic. Fix this by walking all child buses and deallocating all its IRQ resources before calling pci_hp_remove_devices(). Also modify the lifetime of the workqueue at struct pnv_php_slot::wq so that it is only destroyed in pnv_php_free_slot(), instead of pnv_php_disable_irq(). This is required since pnv_php_disable_irq() will now be called by workers triggered by hot unplug interrupts, so the workqueue needs to stay allocated. The abridged kernel panic that occurs without this patch is as follows: WARNING: CPU: 0 PID: 687 at kernel/irq/msi.c:292 msi_device_data_release+0x6c/0x9c CPU: 0 UID: 0 PID: 687 Comm: bash Not tainted 6.14.0-rc5+ #2 Call Trace: msi_device_data_release+0x34/0x9c (unreliable) release_nodes+0x64/0x13c devres_release_all+0xc0/0x140 device_del+0x2d4/0x46c pci_destroy_dev+0x5c/0x194 pci_hp_remove_devices+0x90/0x128 pci_hp_remove_devices+0x44/0x128 pnv_php_disable_slot+0x54/0xd4 power_write_file+0xf8/0x18c pci_slot_attr_store+0x40/0x5c sysfs_kf_write+0x64/0x78 kernfs_fop_write_iter+0x1b0/0x290 vfs_write+0x3bc/0x50c ksys_write+0x84/0x140 system_call_exception+0x124/0x230 system_call_vectored_common+0x15c/0x2ec Signed-off-by: Shawn Anastasio Signed-off-by: Timothy Pearson [bhelgaas: tidy comments] Signed-off-by: Bjorn Helgaas Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/2013845045.1359852.1752615367790.JavaMail.zimbra@raptorengineeringinc.com --- drivers/pci/hotplug/pnv_php.c | 98 ++++++++++++++++++++++++++++------- 1 file changed, 78 insertions(+), 20 deletions(-) diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 573a41869c15..1304329ca6f7 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -3,6 +3,7 @@ * PCI Hotplug Driver for PowerPC PowerNV platform. * * Copyright Gavin Shan, IBM Corporation 2016. + * Copyright (C) 2025 Raptor Engineering, LLC */ #include @@ -36,8 +37,10 @@ static void pnv_php_register(struct device_node *dn); static void pnv_php_unregister_one(struct device_node *dn); static void pnv_php_unregister(struct device_node *dn); +static void pnv_php_enable_irq(struct pnv_php_slot *php_slot); + static void pnv_php_disable_irq(struct pnv_php_slot *php_slot, - bool disable_device) + bool disable_device, bool disable_msi) { struct pci_dev *pdev = php_slot->pdev; u16 ctrl; @@ -53,19 +56,15 @@ static void pnv_php_disable_irq(struct pnv_php_slot *php_slot, php_slot->irq = 0; } - if (php_slot->wq) { - destroy_workqueue(php_slot->wq); - php_slot->wq = NULL; - } - - if (disable_device) { + if (disable_device || disable_msi) { if (pdev->msix_enabled) pci_disable_msix(pdev); else if (pdev->msi_enabled) pci_disable_msi(pdev); - - pci_disable_device(pdev); } + + if (disable_device) + pci_disable_device(pdev); } static void pnv_php_free_slot(struct kref *kref) @@ -74,7 +73,8 @@ static void pnv_php_free_slot(struct kref *kref) struct pnv_php_slot, kref); WARN_ON(!list_empty(&php_slot->children)); - pnv_php_disable_irq(php_slot, false); + pnv_php_disable_irq(php_slot, false, false); + destroy_workqueue(php_slot->wq); kfree(php_slot->name); kfree(php_slot); } @@ -561,8 +561,58 @@ static int pnv_php_reset_slot(struct hotplug_slot *slot, bool probe) static int pnv_php_enable_slot(struct hotplug_slot *slot) { struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); + u32 prop32; + int ret; - return pnv_php_enable(php_slot, true); + ret = pnv_php_enable(php_slot, true); + if (ret) + return ret; + + /* (Re-)enable interrupt if the slot supports surprise hotplug */ + ret = of_property_read_u32(php_slot->dn, "ibm,slot-surprise-pluggable", + &prop32); + if (!ret && prop32) + pnv_php_enable_irq(php_slot); + + return 0; +} + +/* + * Disable any hotplug interrupts for all slots on the provided bus, as well as + * all downstream slots in preparation for a hot unplug. + */ +static int pnv_php_disable_all_irqs(struct pci_bus *bus) +{ + struct pci_bus *child_bus; + struct pci_slot *slot; + + /* First go down child buses */ + list_for_each_entry(child_bus, &bus->children, node) + pnv_php_disable_all_irqs(child_bus); + + /* Disable IRQs for all pnv_php slots on this bus */ + list_for_each_entry(slot, &bus->slots, list) { + struct pnv_php_slot *php_slot = to_pnv_php_slot(slot->hotplug); + + pnv_php_disable_irq(php_slot, false, true); + } + + return 0; +} + +/* + * Disable any hotplug interrupts for all downstream slots on the provided + * bus in preparation for a hot unplug. + */ +static int pnv_php_disable_all_downstream_irqs(struct pci_bus *bus) +{ + struct pci_bus *child_bus; + + /* Go down child buses, recursively deactivating their IRQs */ + list_for_each_entry(child_bus, &bus->children, node) + pnv_php_disable_all_irqs(child_bus); + + return 0; } static int pnv_php_disable_slot(struct hotplug_slot *slot) @@ -579,6 +629,13 @@ static int pnv_php_disable_slot(struct hotplug_slot *slot) php_slot->state != PNV_PHP_STATE_REGISTERED) return 0; + /* + * Free all IRQ resources from all child slots before remove. + * Note that we do not disable the root slot IRQ here as that + * would also deactivate the slot hot (re)plug interrupt! + */ + pnv_php_disable_all_downstream_irqs(php_slot->bus); + /* Remove all devices behind the slot */ pci_lock_rescan_remove(); pci_hp_remove_devices(php_slot->bus); @@ -647,6 +704,15 @@ static struct pnv_php_slot *pnv_php_alloc_slot(struct device_node *dn) return NULL; } + /* Allocate workqueue for this slot's interrupt handling */ + php_slot->wq = alloc_workqueue("pciehp-%s", 0, 0, php_slot->name); + if (!php_slot->wq) { + SLOT_WARN(php_slot, "Cannot alloc workqueue\n"); + kfree(php_slot->name); + kfree(php_slot); + return NULL; + } + if (dn->child && PCI_DN(dn->child)) php_slot->slot_no = PCI_SLOT(PCI_DN(dn->child)->devfn); else @@ -843,14 +909,6 @@ static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq) u16 sts, ctrl; int ret; - /* Allocate workqueue */ - php_slot->wq = alloc_workqueue("pciehp-%s", 0, 0, php_slot->name); - if (!php_slot->wq) { - SLOT_WARN(php_slot, "Cannot alloc workqueue\n"); - pnv_php_disable_irq(php_slot, true); - return; - } - /* Check PDC (Presence Detection Change) is broken or not */ ret = of_property_read_u32(php_slot->dn, "ibm,slot-broken-pdc", &broken_pdc); @@ -869,7 +927,7 @@ static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq) ret = request_irq(irq, pnv_php_interrupt, IRQF_SHARED, php_slot->name, php_slot); if (ret) { - pnv_php_disable_irq(php_slot, true); + pnv_php_disable_irq(php_slot, true, true); SLOT_WARN(php_slot, "Error %d enabling IRQ %d\n", ret, irq); return; } From 80f9fc2362797538ebd4fd70a1dfa838cc2c2cdb Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Tue, 15 Jul 2025 16:36:55 -0500 Subject: [PATCH 1117/2411] PCI: pnv_php: Work around switches with broken presence detection The Microsemi Switchtec PM8533 PFX 48xG3 [11f8:8533] PCIe switch system was observed to incorrectly assert the Presence Detect Set bit in its capabilities when tested on a Raptor Computing Systems Blackbird system, resulting in the hot insert path never attempting a rescan of the bus and any downstream devices not being re-detected. Work around this by additionally checking whether the PCIe data link is active or not when performing presence detection on downstream switches' ports, similar to the pciehp_hpc.c driver. Signed-off-by: Shawn Anastasio Signed-off-by: Timothy Pearson Signed-off-by: Bjorn Helgaas Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/505981576.1359853.1752615415117.JavaMail.zimbra@raptorengineeringinc.com --- drivers/pci/hotplug/pnv_php.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 1304329ca6f7..5476c9e7760d 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -391,6 +391,20 @@ static int pnv_php_get_power_state(struct hotplug_slot *slot, u8 *state) return 0; } +static int pcie_check_link_active(struct pci_dev *pdev) +{ + u16 lnk_status; + int ret; + + ret = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status); + if (ret == PCIBIOS_DEVICE_NOT_FOUND || PCI_POSSIBLE_ERROR(lnk_status)) + return -ENODEV; + + ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA); + + return ret; +} + static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state) { struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); @@ -403,6 +417,19 @@ static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state) */ ret = pnv_pci_get_presence_state(php_slot->id, &presence); if (ret >= 0) { + if (pci_pcie_type(php_slot->pdev) == PCI_EXP_TYPE_DOWNSTREAM && + presence == OPAL_PCI_SLOT_EMPTY) { + /* + * Similar to pciehp_hpc, check whether the Link Active + * bit is set to account for broken downstream bridges + * that don't properly assert Presence Detect State, as + * was observed on the Microsemi Switchtec PM8533 PFX + * [11f8:8533]. + */ + if (pcie_check_link_active(php_slot->pdev) > 0) + presence = OPAL_PCI_SLOT_PRESENT; + } + *state = presence; ret = 0; } else { From e82b34eed04b0ddcff4548b62633467235672fd3 Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Tue, 15 Jul 2025 16:37:34 -0500 Subject: [PATCH 1118/2411] powerpc/eeh: Export eeh_unfreeze_pe() The PowerNV hotplug driver needs to be able to clear any frozen PE(s) on the PHB after suprise removal of a downstream device. Export the eeh_unfreeze_pe() symbol to allow implementation of this functionality in the php_nv module. Signed-off-by: Timothy Pearson Signed-off-by: Bjorn Helgaas Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/1778535414.1359858.1752615454618.JavaMail.zimbra@raptorengineeringinc.com --- arch/powerpc/kernel/eeh.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 13578f4db254..bb836f02101c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1139,6 +1139,7 @@ int eeh_unfreeze_pe(struct eeh_pe *pe) return ret; } +EXPORT_SYMBOL_GPL(eeh_unfreeze_pe); static struct pci_device_id eeh_reset_ids[] = { From 1010b4c012b0d78dfb9d3132b49aa2ef024a07a7 Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Tue, 15 Jul 2025 16:38:23 -0500 Subject: [PATCH 1119/2411] powerpc/eeh: Make EEH driver device hotplug safe Multiple race conditions existed between the PCIe hotplug driver and the EEH driver, leading to a variety of kernel oopses of the same general nature: A second class of oops is also seen when the underlying bus disappears during device recovery. Refactor the EEH module to be PCI rescan and remove safe. Also clean up a few minor formatting / readability issues. Signed-off-by: Timothy Pearson Signed-off-by: Bjorn Helgaas Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/1334208367.1359861.1752615503144.JavaMail.zimbra@raptorengineeringinc.com --- arch/powerpc/kernel/eeh_driver.c | 48 +++++++++++++++++++++----------- arch/powerpc/kernel/eeh_pe.c | 10 ++++--- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 10ce6b3bd3b7..48ad0116f359 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -257,13 +257,12 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn, struct pci_driver *driver; enum pci_ers_result new_result; - pci_lock_rescan_remove(); pdev = edev->pdev; if (pdev) get_device(&pdev->dev); - pci_unlock_rescan_remove(); if (!pdev) { eeh_edev_info(edev, "no device"); + *result = PCI_ERS_RESULT_DISCONNECT; return; } device_lock(&pdev->dev); @@ -304,8 +303,9 @@ static void eeh_pe_report(const char *name, struct eeh_pe *root, struct eeh_dev *edev, *tmp; pr_info("EEH: Beginning: '%s'\n", name); - eeh_for_each_pe(root, pe) eeh_pe_for_each_dev(pe, edev, tmp) - eeh_pe_report_edev(edev, fn, result); + eeh_for_each_pe(root, pe) + eeh_pe_for_each_dev(pe, edev, tmp) + eeh_pe_report_edev(edev, fn, result); if (result) pr_info("EEH: Finished:'%s' with aggregate recovery state:'%s'\n", name, pci_ers_result_name(*result)); @@ -383,6 +383,8 @@ static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata) if (!edev) return; + pci_lock_rescan_remove(); + /* * The content in the config space isn't saved because * the blocked config space on some adapters. We have @@ -393,14 +395,19 @@ static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata) if (list_is_last(&edev->entry, &edev->pe->edevs)) eeh_pe_restore_bars(edev->pe); + pci_unlock_rescan_remove(); return; } pdev = eeh_dev_to_pci_dev(edev); - if (!pdev) + if (!pdev) { + pci_unlock_rescan_remove(); return; + } pci_restore_state(pdev); + + pci_unlock_rescan_remove(); } /** @@ -647,9 +654,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) { eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); } else { - pci_lock_rescan_remove(); pci_hp_remove_devices(bus); - pci_unlock_rescan_remove(); } /* @@ -665,8 +670,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (rc) return rc; - pci_lock_rescan_remove(); - /* Restore PE */ eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); @@ -674,7 +677,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, /* Clear frozen state */ rc = eeh_clear_pe_frozen_state(pe, false); if (rc) { - pci_unlock_rescan_remove(); return rc; } @@ -709,7 +711,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, pe->tstamp = tstamp; pe->freeze_count = cnt; - pci_unlock_rescan_remove(); return 0; } @@ -843,10 +844,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe) {LIST_HEAD_INIT(rmv_data.removed_vf_list), 0}; int devices = 0; + pci_lock_rescan_remove(); + bus = eeh_pe_bus_get(pe); if (!bus) { pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); + pci_unlock_rescan_remove(); return; } @@ -1094,10 +1098,15 @@ void eeh_handle_normal_event(struct eeh_pe *pe) eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - pci_lock_rescan_remove(); - pci_hp_remove_devices(bus); - pci_unlock_rescan_remove(); + bus = eeh_pe_bus_get(pe); + if (bus) + pci_hp_remove_devices(bus); + else + pr_err("%s: PCI bus for PHB#%x-PE#%x disappeared\n", + __func__, pe->phb->global_number, pe->addr); + /* The passed PE should no longer be used */ + pci_unlock_rescan_remove(); return; } @@ -1114,6 +1123,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe) eeh_clear_slot_attention(edev->pdev); eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true); + + pci_unlock_rescan_remove(); } /** @@ -1132,6 +1143,7 @@ void eeh_handle_special_event(void) unsigned long flags; int rc; + pci_lock_rescan_remove(); do { rc = eeh_ops->next_error(&pe); @@ -1171,10 +1183,12 @@ void eeh_handle_special_event(void) break; case EEH_NEXT_ERR_NONE: + pci_unlock_rescan_remove(); return; default: pr_warn("%s: Invalid value %d from next_error()\n", __func__, rc); + pci_unlock_rescan_remove(); return; } @@ -1186,7 +1200,9 @@ void eeh_handle_special_event(void) if (rc == EEH_NEXT_ERR_FROZEN_PE || rc == EEH_NEXT_ERR_FENCED_PHB) { eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + pci_unlock_rescan_remove(); eeh_handle_normal_event(pe); + pci_lock_rescan_remove(); } else { eeh_for_each_pe(pe, tmp_pe) eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev) @@ -1199,7 +1215,6 @@ void eeh_handle_special_event(void) eeh_report_failure, NULL); eeh_set_channel_state(pe, pci_channel_io_perm_failure); - pci_lock_rescan_remove(); list_for_each_entry(hose, &hose_list, list_node) { phb_pe = eeh_phb_pe_get(hose); if (!phb_pe || @@ -1218,7 +1233,6 @@ void eeh_handle_special_event(void) } pci_hp_remove_devices(bus); } - pci_unlock_rescan_remove(); } /* @@ -1228,4 +1242,6 @@ void eeh_handle_special_event(void) if (rc == EEH_NEXT_ERR_DEAD_IOC) break; } while (rc != EEH_NEXT_ERR_NONE); + + pci_unlock_rescan_remove(); } diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index d283d281d28e..e740101fadf3 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -671,10 +671,12 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val); /* Check link */ - if (!edev->pdev->link_active_reporting) { - eeh_edev_dbg(edev, "No link reporting capability\n"); - msleep(1000); - return; + if (edev->pdev) { + if (!edev->pdev->link_active_reporting) { + eeh_edev_dbg(edev, "No link reporting capability\n"); + msleep(1000); + return; + } } /* Wait the link is up until timeout (5s) */ From a2a2a6fc2469524caa713036297c542746d148dc Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Tue, 15 Jul 2025 16:39:06 -0500 Subject: [PATCH 1120/2411] PCI: pnv_php: Fix surprise plug detection and recovery The existing PowerNV hotplug code did not handle surprise plug events correctly, leading to a complete failure of the hotplug system after device removal and a required reboot to detect new devices. This comes down to two issues: 1) When a device is surprise removed, often the bridge upstream port will cause a PE freeze on the PHB. If this freeze is not cleared, the MSI interrupts from the bridge hotplug notification logic will not be received by the kernel, stalling all plug events on all slots associated with the PE. 2) When a device is removed from a slot, regardless of surprise or programmatic removal, the associated PHB/PE ls left frozen. If this freeze is not cleared via a fundamental reset, skiboot is unable to clear the freeze and cannot retrain / rescan the slot. This also requires a reboot to clear the freeze and redetect the device in the slot. Issue the appropriate unfreeze and rescan commands on hotplug events, and don't oops on hotplug if pci_bus_to_OF_node() returns NULL. Signed-off-by: Timothy Pearson [bhelgaas: tidy comments] Signed-off-by: Bjorn Helgaas Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/171044224.1359864.1752615546988.JavaMail.zimbra@raptorengineeringinc.com --- arch/powerpc/kernel/pci-hotplug.c | 3 + drivers/pci/hotplug/pnv_php.c | 110 +++++++++++++++++++++++++++++- 2 files changed, 110 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 9ea74973d78d..6f444d0822d8 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -141,6 +141,9 @@ void pci_hp_add_devices(struct pci_bus *bus) struct pci_controller *phb; struct device_node *dn = pci_bus_to_OF_node(bus); + if (!dn) + return; + phb = pci_bus_to_host(bus); mode = PCI_PROBE_NORMAL; diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 5476c9e7760d..4f85e7fe29ec 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -4,12 +4,14 @@ * * Copyright Gavin Shan, IBM Corporation 2016. * Copyright (C) 2025 Raptor Engineering, LLC + * Copyright (C) 2025 Raptor Computing Systems, LLC */ #include #include #include #include +#include #include #include @@ -469,6 +471,61 @@ static int pnv_php_set_attention_state(struct hotplug_slot *slot, u8 state) return 0; } +static int pnv_php_activate_slot(struct pnv_php_slot *php_slot, + struct hotplug_slot *slot) +{ + int ret, i; + + /* + * Issue initial slot activation command to firmware + * + * Firmware will power slot on, attempt to train the link, and + * discover any downstream devices. If this process fails, firmware + * will return an error code and an invalid device tree. Failure + * can be caused for multiple reasons, including a faulty + * downstream device, poor connection to the downstream device, or + * a previously latched PHB fence. On failure, issue fundamental + * reset up to three times before aborting. + */ + ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_ON); + if (ret) { + SLOT_WARN( + php_slot, + "PCI slot activation failed with error code %d, possible frozen PHB", + ret); + SLOT_WARN( + php_slot, + "Attempting complete PHB reset before retrying slot activation\n"); + for (i = 0; i < 3; i++) { + /* + * Slot activation failed, PHB may be fenced from a + * prior device failure. + * + * Use the OPAL fundamental reset call to both try a + * device reset and clear any potentially active PHB + * fence / freeze. + */ + SLOT_WARN(php_slot, "Try %d...\n", i + 1); + pci_set_pcie_reset_state(php_slot->pdev, + pcie_warm_reset); + msleep(250); + pci_set_pcie_reset_state(php_slot->pdev, + pcie_deassert_reset); + + ret = pnv_php_set_slot_power_state( + slot, OPAL_PCI_SLOT_POWER_ON); + if (!ret) + break; + } + + if (i >= 3) + SLOT_WARN(php_slot, + "Failed to bring slot online, aborting!\n"); + } + + return ret; +} + static int pnv_php_enable(struct pnv_php_slot *php_slot, bool rescan) { struct hotplug_slot *slot = &php_slot->slot; @@ -531,7 +588,7 @@ static int pnv_php_enable(struct pnv_php_slot *php_slot, bool rescan) goto scan; /* Power is off, turn it on and then scan the slot */ - ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_ON); + ret = pnv_php_activate_slot(php_slot, slot); if (ret) return ret; @@ -838,16 +895,63 @@ static int pnv_php_enable_msix(struct pnv_php_slot *php_slot) return entry.vector; } +static void +pnv_php_detect_clear_suprise_removal_freeze(struct pnv_php_slot *php_slot) +{ + struct pci_dev *pdev = php_slot->pdev; + struct eeh_dev *edev; + struct eeh_pe *pe; + int i, rc; + + /* + * When a device is surprise removed from a downstream bridge slot, + * the upstream bridge port can still end up frozen due to related EEH + * events, which will in turn block the MSI interrupts for slot hotplug + * detection. + * + * Detect and thaw any frozen upstream PE after slot deactivation. + */ + edev = pci_dev_to_eeh_dev(pdev); + pe = edev ? edev->pe : NULL; + rc = eeh_pe_get_state(pe); + if ((rc == -ENODEV) || (rc == -ENOENT)) { + SLOT_WARN( + php_slot, + "Upstream bridge PE state unknown, hotplug detect may fail\n"); + } else { + if (pe->state & EEH_PE_ISOLATED) { + SLOT_WARN( + php_slot, + "Upstream bridge PE %02x frozen, thawing...\n", + pe->addr); + for (i = 0; i < 3; i++) + if (!eeh_unfreeze_pe(pe)) + break; + if (i >= 3) + SLOT_WARN( + php_slot, + "Unable to thaw PE %02x, hotplug detect will fail!\n", + pe->addr); + else + SLOT_WARN(php_slot, + "PE %02x thawed successfully\n", + pe->addr); + } + } +} + static void pnv_php_event_handler(struct work_struct *work) { struct pnv_php_event *event = container_of(work, struct pnv_php_event, work); struct pnv_php_slot *php_slot = event->php_slot; - if (event->added) + if (event->added) { pnv_php_enable_slot(&php_slot->slot); - else + } else { pnv_php_disable_slot(&php_slot->slot); + pnv_php_detect_clear_suprise_removal_freeze(php_slot); + } kfree(event); } From a8f2b96ca9ee87be8091fcc2746b811c173648a0 Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Tue, 15 Jul 2025 16:39:42 -0500 Subject: [PATCH 1121/2411] PCI: pnv_php: Enable third attention indicator state The PCIe specification allows three attention indicator states, on, off, and blink. Enable all three states instead of basic on / off control. This changes the userspace API (writes to the sysfs "attention" file) to match the behavior of pciehp. Here's the comparison of previous and new indicator behavior: Value Previous New Behavior ----- -------- ------------------------ 0 off (reserved, so undefined) 1 on on 2 on blink 3 on off Signed-off-by: Timothy Pearson [bhelgaas: add specifics of behavior change] Signed-off-by: Bjorn Helgaas Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/1210309411.1359866.1752615582001.JavaMail.zimbra@raptorengineeringinc.com --- drivers/pci/hotplug/pnv_php.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 4f85e7fe29ec..c5345bff9a55 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -441,10 +441,23 @@ static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state) return ret; } +static int pnv_php_get_raw_indicator_status(struct hotplug_slot *slot, u8 *state) +{ + struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); + struct pci_dev *bridge = php_slot->pdev; + u16 status; + + pcie_capability_read_word(bridge, PCI_EXP_SLTCTL, &status); + *state = (status & (PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC)) >> 6; + return 0; +} + + static int pnv_php_get_attention_state(struct hotplug_slot *slot, u8 *state) { struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); + pnv_php_get_raw_indicator_status(slot, &php_slot->attention_state); *state = php_slot->attention_state; return 0; } @@ -462,7 +475,7 @@ static int pnv_php_set_attention_state(struct hotplug_slot *slot, u8 state) mask = PCI_EXP_SLTCTL_AIC; if (state) - new = PCI_EXP_SLTCTL_ATTN_IND_ON; + new = FIELD_PREP(PCI_EXP_SLTCTL_AIC, state); else new = PCI_EXP_SLTCTL_ATTN_IND_OFF; From 6d4d44254e43157bb760aa16367a394c2ab299b8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 17 Jul 2025 08:24:08 +0900 Subject: [PATCH 1122/2411] kconfig: gconf: fix single view to display dependent symbols correctly In the following example, the symbol C was never displayed in Single view. Fix the recursion logic so that all symbols are shown. menu "menu" config A bool "A" config B bool "B" depends on A config C bool "C" depends on B endmenu Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 7725d2c9d92a..c67b35807e8e 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -914,9 +914,7 @@ static gboolean on_treeview1_button_press_event(GtkWidget *widget, static void _display_tree(GtkTreeStore *tree, struct menu *menu, GtkTreeIter *parent) { - struct property *prop; struct menu *child; - enum prop_type ptype; GtkTreeIter iter; for (child = menu->list; child; child = child->next) { @@ -929,9 +927,6 @@ static void _display_tree(GtkTreeStore *tree, struct menu *menu, if (child->type == M_IF) continue; - prop = child->prompt; - ptype = prop ? prop->type : P_UNKNOWN; - if ((view_mode == SPLIT_VIEW) && !(child->flags & MENU_ROOT) && (tree == tree1)) continue; @@ -943,16 +938,7 @@ static void _display_tree(GtkTreeStore *tree, struct menu *menu, gtk_tree_store_append(tree, &iter, parent); set_node(tree, &iter, child); - if ((view_mode == SINGLE_VIEW) && (ptype == P_MENU)) - continue; -/* - if (((menu != &rootmenu) && !(menu->flags & MENU_ROOT)) - || (view_mode == FULL_VIEW) - || (view_mode == SPLIT_VIEW))*/ - - if (((view_mode == SINGLE_VIEW) && (menu->flags & MENU_ROOT)) - || (view_mode == FULL_VIEW) - || (view_mode == SPLIT_VIEW)) + if (view_mode != SINGLE_VIEW || child->type != M_MENU) _display_tree(tree, child, &iter); } } From 15a5ae3b0976d1190728044920cf6337a218ae62 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 17 Jul 2025 08:24:09 +0900 Subject: [PATCH 1123/2411] kconfig: gconf: Fix Back button behavior Clicking the Back button may navigate to a non-menu hierarchy level. [Example] menu "menu1" config A bool "A" default y config B bool "B" depends on A default y menu "menu2" depends on B config C bool "C" default y endmenu endmenu After being re-parented by menu_finalize(), the menu tree is structured like follows: menu "menu1" \-- A \-- B \-- menu "menu2" \-- C In Single view, visit "menu2" and click the Back button. It should go up to "menu1" and show A, B and "menu2", but instead goes up to A and show only B and "menu2". This is a bug in on_back_clicked(). Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index c67b35807e8e..d9ea71664412 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -553,12 +553,8 @@ static void on_license1_activate(GtkMenuItem *menuitem, gpointer user_data) /* toolbar handlers */ static void on_back_clicked(GtkButton *button, gpointer user_data) { - enum prop_type ptype; + browsed = menu_get_parent_menu(browsed) ?: &rootmenu; - browsed = browsed->parent; - ptype = browsed->prompt ? browsed->prompt->type : P_UNKNOWN; - if (ptype != P_MENU) - browsed = browsed->parent; recreate_tree(); if (browsed == &rootmenu) From 2bc0148f78193865065035fe19095c78c3d8129f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 17 Jul 2025 08:24:10 +0900 Subject: [PATCH 1124/2411] kconfig: gconf: replace GtkImageMenuItem with GtkMenuItem GtkImageMenuItem is deprecated with GTK 3.10. [1] Use GtkMenuItem instead. [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/3.10.0/gtk/deprecated/gtkimagemenuitem.c#L797 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.ui | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/kconfig/gconf.ui b/scripts/kconfig/gconf.ui index c37807e8b782..ab4431255fa7 100644 --- a/scripts/kconfig/gconf.ui +++ b/scripts/kconfig/gconf.ui @@ -39,7 +39,7 @@ - + True Load a config file _Load @@ -49,7 +49,7 @@ - + True Save the config in .config _Save @@ -59,7 +59,7 @@ - + True Save the config in a file Save _as @@ -74,7 +74,7 @@ - + True _Quit True @@ -178,7 +178,7 @@ - + True _Introduction True @@ -187,7 +187,7 @@ - + True _About True @@ -196,7 +196,7 @@ - + True _License True From e16f08062f91570aa225bc490e0a92d63ae13769 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 17 Jul 2025 08:24:11 +0900 Subject: [PATCH 1125/2411] kconfig: gconf: use hyphens in signals Using hyphens in signal names is the official convention, even though underscores also work. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index d9ea71664412..e4f89270d19f 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -997,25 +997,25 @@ static void init_main_window(const gchar *glade_file) G_CALLBACK(on_window1_destroy), NULL); g_signal_connect(main_wnd, "configure-event", G_CALLBACK(on_window1_configure), NULL); - g_signal_connect(main_wnd, "delete_event", + g_signal_connect(main_wnd, "delete-event", G_CALLBACK(on_window1_delete_event), NULL); hpaned = GTK_WIDGET(gtk_builder_get_object(builder, "hpaned1")); vpaned = GTK_WIDGET(gtk_builder_get_object(builder, "vpaned1")); tree1_w = GTK_WIDGET(gtk_builder_get_object(builder, "treeview1")); - g_signal_connect(tree1_w, "cursor_changed", + g_signal_connect(tree1_w, "cursor-changed", G_CALLBACK(on_treeview2_cursor_changed), NULL); - g_signal_connect(tree1_w, "button_press_event", + g_signal_connect(tree1_w, "button-press-event", G_CALLBACK(on_treeview1_button_press_event), NULL); - g_signal_connect(tree1_w, "key_press_event", + g_signal_connect(tree1_w, "key-press-event", G_CALLBACK(on_treeview2_key_press_event), NULL); tree2_w = GTK_WIDGET(gtk_builder_get_object(builder, "treeview2")); - g_signal_connect(tree2_w, "cursor_changed", + g_signal_connect(tree2_w, "cursor-changed", G_CALLBACK(on_treeview2_cursor_changed), NULL); - g_signal_connect(tree2_w, "button_press_event", + g_signal_connect(tree2_w, "button-press-event", G_CALLBACK(on_treeview2_button_press_event), NULL); - g_signal_connect(tree2_w, "key_press_event", + g_signal_connect(tree2_w, "key-press-event", G_CALLBACK(on_treeview2_key_press_event), NULL); text_w = GTK_WIDGET(gtk_builder_get_object(builder, "textview3")); From 5ceb15fdc629aa3030e8f8987c561d36678f9559 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 17 Jul 2025 08:24:12 +0900 Subject: [PATCH 1126/2411] kconfig: gconf: remove unneeded variable in text_insert_msg The 'msg' and 'message' refer to the same pointer. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index e4f89270d19f..651140af7d13 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -90,11 +90,10 @@ static void text_insert_help(struct menu *menu) } -static void text_insert_msg(const char *title, const char *message) +static void text_insert_msg(const char *title, const char *msg) { GtkTextBuffer *buffer; GtkTextIter start, end; - const char *msg = message; buffer = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_w)); gtk_text_buffer_get_bounds(buffer, &start, &end); From eb549e194bf2d5c86b1b7a71fad54d610dd6c892 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 17 Jul 2025 08:24:13 +0900 Subject: [PATCH 1127/2411] kconfig: gconf: refactor text_insert_help() text_insert_help() and text_insert_msg() share similar code. Refactor text_insert_help() to eliminate the code duplication. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 35 +++++++++-------------------------- 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 651140af7d13..8b164ccfa008 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -64,32 +64,6 @@ static void conf_changed(bool dirty) /* Utility Functions */ - -static void text_insert_help(struct menu *menu) -{ - GtkTextBuffer *buffer; - GtkTextIter start, end; - const char *prompt = menu_get_prompt(menu); - struct gstr help = str_new(); - - menu_get_ext_help(menu, &help); - - buffer = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_w)); - gtk_text_buffer_get_bounds(buffer, &start, &end); - gtk_text_buffer_delete(buffer, &start, &end); - gtk_text_view_set_left_margin(GTK_TEXT_VIEW(text_w), 15); - - gtk_text_buffer_get_end_iter(buffer, &end); - gtk_text_buffer_insert_with_tags(buffer, &end, prompt, -1, tag1, - NULL); - gtk_text_buffer_insert_at_cursor(buffer, "\n\n", 2); - gtk_text_buffer_get_end_iter(buffer, &end); - gtk_text_buffer_insert_with_tags(buffer, &end, str_get(&help), -1, tag2, - NULL); - str_free(&help); -} - - static void text_insert_msg(const char *title, const char *msg) { GtkTextBuffer *buffer; @@ -109,6 +83,15 @@ static void text_insert_msg(const char *title, const char *msg) NULL); } +static void text_insert_help(struct menu *menu) +{ + struct gstr help = str_new(); + + menu_get_ext_help(menu, &help); + text_insert_msg(menu_get_prompt(menu), str_get(&help)); + str_free(&help); +} + static void _select_menu(GtkTreeView *view, GtkTreeModel *model, GtkTreeIter *parent, struct menu *match) { From 95f610e36adc74f3972e31c28567d66777ce37f3 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 24 Jul 2025 11:04:19 +0200 Subject: [PATCH 1128/2411] rtc: pcf85063: scope pcf85063_config structures Fix possible warning: >> drivers/rtc/rtc-pcf85063.c:566:37: warning: unused variable 'config_rv8063' [-Wunused-const-variable] 566 | static const struct pcf85063_config config_rv8063 = { | ^~~~~~~~~~~~~ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507241607.dmz2qrO5-lkp@intel.com/ Link: https://lore.kernel.org/r/20250724090420.917705-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf85063.c | 94 +++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index d9b67b959d18..e3b58cdb1eda 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -528,53 +528,6 @@ static struct clk *pcf85063_clkout_register_clk(struct pcf85063 *pcf85063) } #endif -static const struct pcf85063_config config_pcf85063 = { - .regmap = { - .reg_bits = 8, - .val_bits = 8, - .max_register = 0x0a, - }, -}; - -static const struct pcf85063_config config_pcf85063tp = { - .regmap = { - .reg_bits = 8, - .val_bits = 8, - .max_register = 0x0a, - }, -}; - -static const struct pcf85063_config config_pcf85063a = { - .regmap = { - .reg_bits = 8, - .val_bits = 8, - .max_register = 0x11, - }, - .has_alarms = 1, -}; - -static const struct pcf85063_config config_rv8263 = { - .regmap = { - .reg_bits = 8, - .val_bits = 8, - .max_register = 0x11, - }, - .has_alarms = 1, - .force_cap_7000 = 1, -}; - -static const struct pcf85063_config config_rv8063 = { - .regmap = { - .reg_bits = 8, - .val_bits = 8, - .max_register = 0x11, - .read_flag_mask = BIT(7) | BIT(5), - .write_flag_mask = BIT(5), - }, - .has_alarms = 1, - .force_cap_7000 = 1, -}; - static int pcf85063_probe(struct device *dev, struct regmap *regmap, int irq, const struct pcf85063_config *config) { @@ -671,6 +624,41 @@ static int pcf85063_probe(struct device *dev, struct regmap *regmap, int irq, #if IS_ENABLED(CONFIG_I2C) +static const struct pcf85063_config config_pcf85063 = { + .regmap = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0x0a, + }, +}; + +static const struct pcf85063_config config_pcf85063tp = { + .regmap = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0x0a, + }, +}; + +static const struct pcf85063_config config_pcf85063a = { + .regmap = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0x11, + }, + .has_alarms = 1, +}; + +static const struct pcf85063_config config_rv8263 = { + .regmap = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0x11, + }, + .has_alarms = 1, + .force_cap_7000 = 1, +}; + static const struct i2c_device_id pcf85063_ids[] = { { "pca85073a", .driver_data = (kernel_ulong_t)&config_pcf85063a }, { "pcf85063", .driver_data = (kernel_ulong_t)&config_pcf85063 }, @@ -743,6 +731,18 @@ static void pcf85063_unregister_driver(void) #if IS_ENABLED(CONFIG_SPI_MASTER) +static const struct pcf85063_config config_rv8063 = { + .regmap = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0x11, + .read_flag_mask = BIT(7) | BIT(5), + .write_flag_mask = BIT(5), + }, + .has_alarms = 1, + .force_cap_7000 = 1, +}; + static const struct spi_device_id rv8063_id[] = { { "rv8063" }, {} From d89c58068aa667295fa75d0613c869b612bd6249 Mon Sep 17 00:00:00 2001 From: Blake Jones Date: Fri, 25 Jul 2025 17:40:23 -0700 Subject: [PATCH 1129/2411] perf test: Fix comment ordering The previous commit that introduced this test overlooked a behavior of "perf test list", causing it to print "SPDX-License-Identifier: GPL-2.0" as a description for that test. This reorders the comments to fix that issue. Fixes: edf2cadf01e8 ("perf test: add test for BPF metadata collection") Signed-off-by: Blake Jones Reviewed-by: Ian Rogers Link: https://lore.kernel.org/r/20250726004023.3466563-1-blakejones@google.com [ update the commit message a little bit ] Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/test_bpf_metadata.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/test_bpf_metadata.sh b/tools/perf/tests/shell/test_bpf_metadata.sh index bc9aef161664..69e3c2055134 100755 --- a/tools/perf/tests/shell/test_bpf_metadata.sh +++ b/tools/perf/tests/shell/test_bpf_metadata.sh @@ -1,7 +1,7 @@ #!/bin/bash -# SPDX-License-Identifier: GPL-2.0 +# BPF metadata collection test # -# BPF metadata collection test. +# SPDX-License-Identifier: GPL-2.0 set -e From af470fb532fc803c4c582d15b4bd394682a77a15 Mon Sep 17 00:00:00 2001 From: Chen Pei Date: Sat, 26 Jul 2025 19:15:32 +0800 Subject: [PATCH 1130/2411] perf tools: Remove libtraceevent in .gitignore The libtraceevent has been removed from the source tree, and .gitignore needs to be updated as well. Fixes: 4171925aa9f3f7bf ("tools lib traceevent: Remove libtraceevent") Signed-off-by: Chen Pei Link: https://lore.kernel.org/r/20250726111532.8031-1-cp0613@linux.alibaba.com Signed-off-by: Namhyung Kim --- tools/perf/.gitignore | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 5aaf73df6700..b64302a76144 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -48,8 +48,6 @@ libbpf/ libperf/ libsubcmd/ libsymbol/ -libtraceevent/ -libtraceevent_plugins/ fixdep Documentation/doc.dep python_ext_build/ From 9957d8c801fe0cb905a9443d7a88e6a051f81105 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 25 Jul 2025 11:51:48 -0700 Subject: [PATCH 1131/2411] perf jevents: Add common software event json Add json for software events so that in perf list the events can have a description. Common json exists for the tool PMU but it has no sysfs equivalent. Modify the map_for_pmu code to return the common map (rather than an architecture specific one) when a PMU with a common name is being looked for, this allows the events to be found. Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20250725185202.68671-3-irogers@google.com Signed-off-by: Namhyung Kim --- .../arch/common/common/software.json | 92 ++++++ tools/perf/pmu-events/empty-pmu-events.c | 266 +++++++++++------- tools/perf/pmu-events/jevents.py | 15 +- 3 files changed, 264 insertions(+), 109 deletions(-) create mode 100644 tools/perf/pmu-events/arch/common/common/software.json diff --git a/tools/perf/pmu-events/arch/common/common/software.json b/tools/perf/pmu-events/arch/common/common/software.json new file mode 100644 index 000000000000..f2551f1107fd --- /dev/null +++ b/tools/perf/pmu-events/arch/common/common/software.json @@ -0,0 +1,92 @@ +[ + { + "Unit": "software", + "EventName": "cpu-clock", + "BriefDescription": "Per-CPU high-resolution timer based event", + "ConfigCode": "0" + }, + { + "Unit": "software", + "EventName": "task-clock", + "BriefDescription": "Per-task high-resolution timer based event", + "ConfigCode": "1" + }, + { + "Unit": "software", + "EventName": "faults", + "BriefDescription": "Number of page faults [This event is an alias of page-faults]", + "ConfigCode": "2" + }, + { + "Unit": "software", + "EventName": "page-faults", + "BriefDescription": "Number of page faults [This event is an alias of faults]", + "ConfigCode": "2" + }, + { + "Unit": "software", + "EventName": "context-switches", + "BriefDescription": "Number of context switches [This event is an alias of cs]", + "ConfigCode": "3" + }, + { + "Unit": "software", + "EventName": "cs", + "BriefDescription": "Number of context switches [This event is an alias of context-switches]", + "ConfigCode": "3" + }, + { + "Unit": "software", + "EventName": "cpu-migrations", + "BriefDescription": "Number of times a process has migrated to a new CPU [This event is an alias of migrations]", + "ConfigCode": "4" + }, + { + "Unit": "software", + "EventName": "migrations", + "BriefDescription": "Number of times a process has migrated to a new CPU [This event is an alias of cpu-migrations]", + "ConfigCode": "4" + }, + { + "Unit": "software", + "EventName": "minor-faults", + "BriefDescription": "Number of minor page faults. Minor faults don't require I/O to handle", + "ConfigCode": "5" + }, + { + "Unit": "software", + "EventName": "major-faults", + "BriefDescription": "Number of major page faults. Major faults require I/O to handle", + "ConfigCode": "6" + }, + { + "Unit": "software", + "EventName": "alignment-faults", + "BriefDescription": "Number of kernel handled memory alignment faults", + "ConfigCode": "7" + }, + { + "Unit": "software", + "EventName": "emulation-faults", + "BriefDescription": "Number of kernel handled unimplemented instruction faults handled through emulation", + "ConfigCode": "8" + }, + { + "Unit": "software", + "EventName": "dummy", + "BriefDescription": "A placeholder event that doesn't count anything", + "ConfigCode": "9" + }, + { + "Unit": "software", + "EventName": "bpf-output", + "BriefDescription": "An event used by BPF programs to write to the perf ring buffer", + "ConfigCode": "10" + }, + { + "Unit": "software", + "EventName": "cgroup-switches", + "BriefDescription": "Number of context switches to a task in a different cgroup", + "ConfigCode": "11" + } +] diff --git a/tools/perf/pmu-events/empty-pmu-events.c b/tools/perf/pmu-events/empty-pmu-events.c index a4569a74db07..041c598b16d8 100644 --- a/tools/perf/pmu-events/empty-pmu-events.c +++ b/tools/perf/pmu-events/empty-pmu-events.c @@ -19,109 +19,147 @@ struct pmu_table_entry { }; static const char *const big_c_string = -/* offset=0 */ "tool\000" -/* offset=5 */ "duration_time\000tool\000Wall clock interval time in nanoseconds\000config=1\000\00000\000\000\000\000\000" -/* offset=81 */ "user_time\000tool\000User (non-kernel) time in nanoseconds\000config=2\000\00000\000\000\000\000\000" -/* offset=151 */ "system_time\000tool\000System/kernel time in nanoseconds\000config=3\000\00000\000\000\000\000\000" -/* offset=219 */ "has_pmem\000tool\0001 if persistent memory installed otherwise 0\000config=4\000\00000\000\000\000\000\000" -/* offset=295 */ "num_cores\000tool\000Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU\000config=5\000\00000\000\000\000\000\000" -/* offset=440 */ "num_cpus\000tool\000Number of logical Linux CPUs. There may be multiple such CPUs on a core\000config=6\000\00000\000\000\000\000\000" -/* offset=543 */ "num_cpus_online\000tool\000Number of online logical Linux CPUs. There may be multiple such CPUs on a core\000config=7\000\00000\000\000\000\000\000" -/* offset=660 */ "num_dies\000tool\000Number of dies. Each die has 1 or more cores\000config=8\000\00000\000\000\000\000\000" -/* offset=736 */ "num_packages\000tool\000Number of packages. Each package has 1 or more die\000config=9\000\00000\000\000\000\000\000" -/* offset=822 */ "slots\000tool\000Number of functional units that in parallel can execute parts of an instruction\000config=0xa\000\00000\000\000\000\000\000" -/* offset=932 */ "smt_on\000tool\0001 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0\000config=0xb\000\00000\000\000\000\000\000" -/* offset=1039 */ "system_tsc_freq\000tool\000The amount a Time Stamp Counter (TSC) increases per second\000config=0xc\000\00000\000\000\000\000\000" -/* offset=1138 */ "default_core\000" -/* offset=1151 */ "bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000\000\000\000" -/* offset=1213 */ "bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000\000\000\000" -/* offset=1275 */ "l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000\000\000\000Attributable Level 3 cache access, read\000" -/* offset=1373 */ "segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000\000\000\000" -/* offset=1475 */ "dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000\000\000\000" -/* offset=1608 */ "eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000\000\000\000" -/* offset=1726 */ "hisi_sccl,ddrc\000" -/* offset=1741 */ "uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000\000\000\000\000" -/* offset=1811 */ "uncore_cbox\000" -/* offset=1823 */ "unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000\000\000\000\000" -/* offset=1977 */ "event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000\000\000\000\000" -/* offset=2031 */ "event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000\000\000\000\000" -/* offset=2089 */ "hisi_sccl,l3c\000" -/* offset=2103 */ "uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000\000\000\000\000" -/* offset=2171 */ "uncore_imc_free_running\000" -/* offset=2195 */ "uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000\000\000\000\000" -/* offset=2275 */ "uncore_imc\000" -/* offset=2286 */ "uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000\000\000\000\000" -/* offset=2351 */ "uncore_sys_ddr_pmu\000" -/* offset=2370 */ "sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000\000\000\000" -/* offset=2446 */ "uncore_sys_ccn_pmu\000" -/* offset=2465 */ "sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000\000\000\000" -/* offset=2542 */ "uncore_sys_cmn_pmu\000" -/* offset=2561 */ "sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000\000\000\000" -/* offset=2704 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000" -/* offset=2726 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000" -/* offset=2789 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000" -/* offset=2955 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" -/* offset=3019 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" -/* offset=3086 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000" -/* offset=3157 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000" -/* offset=3251 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000" -/* offset=3385 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000" -/* offset=3449 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000" -/* offset=3517 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000" -/* offset=3587 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\00000" -/* offset=3609 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\00000" -/* offset=3631 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\00000" -/* offset=3651 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000" +/* offset=0 */ "software\000" +/* offset=9 */ "cpu-clock\000software\000Per-CPU high-resolution timer based event\000config=0\000\00000\000\000\000\000\000" +/* offset=87 */ "task-clock\000software\000Per-task high-resolution timer based event\000config=1\000\00000\000\000\000\000\000" +/* offset=167 */ "faults\000software\000Number of page faults [This event is an alias of page-faults]\000config=2\000\00000\000\000\000\000\000" +/* offset=262 */ "page-faults\000software\000Number of page faults [This event is an alias of faults]\000config=2\000\00000\000\000\000\000\000" +/* offset=357 */ "context-switches\000software\000Number of context switches [This event is an alias of cs]\000config=3\000\00000\000\000\000\000\000" +/* offset=458 */ "cs\000software\000Number of context switches [This event is an alias of context-switches]\000config=3\000\00000\000\000\000\000\000" +/* offset=559 */ "cpu-migrations\000software\000Number of times a process has migrated to a new CPU [This event is an alias of migrations]\000config=4\000\00000\000\000\000\000\000" +/* offset=691 */ "migrations\000software\000Number of times a process has migrated to a new CPU [This event is an alias of cpu-migrations]\000config=4\000\00000\000\000\000\000\000" +/* offset=823 */ "minor-faults\000software\000Number of minor page faults. Minor faults don't require I/O to handle\000config=5\000\00000\000\000\000\000\000" +/* offset=932 */ "major-faults\000software\000Number of major page faults. Major faults require I/O to handle\000config=6\000\00000\000\000\000\000\000" +/* offset=1035 */ "alignment-faults\000software\000Number of kernel handled memory alignment faults\000config=7\000\00000\000\000\000\000\000" +/* offset=1127 */ "emulation-faults\000software\000Number of kernel handled unimplemented instruction faults handled through emulation\000config=8\000\00000\000\000\000\000\000" +/* offset=1254 */ "dummy\000software\000A placeholder event that doesn't count anything\000config=9\000\00000\000\000\000\000\000" +/* offset=1334 */ "bpf-output\000software\000An event used by BPF programs to write to the perf ring buffer\000config=0xa\000\00000\000\000\000\000\000" +/* offset=1436 */ "cgroup-switches\000software\000Number of context switches to a task in a different cgroup\000config=0xb\000\00000\000\000\000\000\000" +/* offset=1539 */ "tool\000" +/* offset=1544 */ "duration_time\000tool\000Wall clock interval time in nanoseconds\000config=1\000\00000\000\000\000\000\000" +/* offset=1620 */ "user_time\000tool\000User (non-kernel) time in nanoseconds\000config=2\000\00000\000\000\000\000\000" +/* offset=1690 */ "system_time\000tool\000System/kernel time in nanoseconds\000config=3\000\00000\000\000\000\000\000" +/* offset=1758 */ "has_pmem\000tool\0001 if persistent memory installed otherwise 0\000config=4\000\00000\000\000\000\000\000" +/* offset=1834 */ "num_cores\000tool\000Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU\000config=5\000\00000\000\000\000\000\000" +/* offset=1979 */ "num_cpus\000tool\000Number of logical Linux CPUs. There may be multiple such CPUs on a core\000config=6\000\00000\000\000\000\000\000" +/* offset=2082 */ "num_cpus_online\000tool\000Number of online logical Linux CPUs. There may be multiple such CPUs on a core\000config=7\000\00000\000\000\000\000\000" +/* offset=2199 */ "num_dies\000tool\000Number of dies. Each die has 1 or more cores\000config=8\000\00000\000\000\000\000\000" +/* offset=2275 */ "num_packages\000tool\000Number of packages. Each package has 1 or more die\000config=9\000\00000\000\000\000\000\000" +/* offset=2361 */ "slots\000tool\000Number of functional units that in parallel can execute parts of an instruction\000config=0xa\000\00000\000\000\000\000\000" +/* offset=2471 */ "smt_on\000tool\0001 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0\000config=0xb\000\00000\000\000\000\000\000" +/* offset=2578 */ "system_tsc_freq\000tool\000The amount a Time Stamp Counter (TSC) increases per second\000config=0xc\000\00000\000\000\000\000\000" +/* offset=2677 */ "default_core\000" +/* offset=2690 */ "bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000\000\000\000" +/* offset=2752 */ "bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000\000\000\000" +/* offset=2814 */ "l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000\000\000\000Attributable Level 3 cache access, read\000" +/* offset=2912 */ "segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000\000\000\000" +/* offset=3014 */ "dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000\000\000\000" +/* offset=3147 */ "eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000\000\000\000" +/* offset=3265 */ "hisi_sccl,ddrc\000" +/* offset=3280 */ "uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000\000\000\000\000" +/* offset=3350 */ "uncore_cbox\000" +/* offset=3362 */ "unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000\000\000\000\000" +/* offset=3516 */ "event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000\000\000\000\000" +/* offset=3570 */ "event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000\000\000\000\000" +/* offset=3628 */ "hisi_sccl,l3c\000" +/* offset=3642 */ "uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000\000\000\000\000" +/* offset=3710 */ "uncore_imc_free_running\000" +/* offset=3734 */ "uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000\000\000\000\000" +/* offset=3814 */ "uncore_imc\000" +/* offset=3825 */ "uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000\000\000\000\000" +/* offset=3890 */ "uncore_sys_ddr_pmu\000" +/* offset=3909 */ "sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000\000\000\000" +/* offset=3985 */ "uncore_sys_ccn_pmu\000" +/* offset=4004 */ "sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000\000\000\000" +/* offset=4081 */ "uncore_sys_cmn_pmu\000" +/* offset=4100 */ "sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000\000\000\000" +/* offset=4243 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000" +/* offset=4265 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000" +/* offset=4328 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000" +/* offset=4494 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" +/* offset=4558 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" +/* offset=4625 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000" +/* offset=4696 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000" +/* offset=4790 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000" +/* offset=4924 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000" +/* offset=4988 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000" +/* offset=5056 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000" +/* offset=5126 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\00000" +/* offset=5148 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\00000" +/* offset=5170 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\00000" +/* offset=5190 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000" ; +static const struct compact_pmu_event pmu_events__common_software[] = { +{ 1035 }, /* alignment-faults\000software\000Number of kernel handled memory alignment faults\000config=7\000\00000\000\000\000\000\000 */ +{ 1334 }, /* bpf-output\000software\000An event used by BPF programs to write to the perf ring buffer\000config=0xa\000\00000\000\000\000\000\000 */ +{ 1436 }, /* cgroup-switches\000software\000Number of context switches to a task in a different cgroup\000config=0xb\000\00000\000\000\000\000\000 */ +{ 357 }, /* context-switches\000software\000Number of context switches [This event is an alias of cs]\000config=3\000\00000\000\000\000\000\000 */ +{ 9 }, /* cpu-clock\000software\000Per-CPU high-resolution timer based event\000config=0\000\00000\000\000\000\000\000 */ +{ 559 }, /* cpu-migrations\000software\000Number of times a process has migrated to a new CPU [This event is an alias of migrations]\000config=4\000\00000\000\000\000\000\000 */ +{ 458 }, /* cs\000software\000Number of context switches [This event is an alias of context-switches]\000config=3\000\00000\000\000\000\000\000 */ +{ 1254 }, /* dummy\000software\000A placeholder event that doesn't count anything\000config=9\000\00000\000\000\000\000\000 */ +{ 1127 }, /* emulation-faults\000software\000Number of kernel handled unimplemented instruction faults handled through emulation\000config=8\000\00000\000\000\000\000\000 */ +{ 167 }, /* faults\000software\000Number of page faults [This event is an alias of page-faults]\000config=2\000\00000\000\000\000\000\000 */ +{ 932 }, /* major-faults\000software\000Number of major page faults. Major faults require I/O to handle\000config=6\000\00000\000\000\000\000\000 */ +{ 691 }, /* migrations\000software\000Number of times a process has migrated to a new CPU [This event is an alias of cpu-migrations]\000config=4\000\00000\000\000\000\000\000 */ +{ 823 }, /* minor-faults\000software\000Number of minor page faults. Minor faults don't require I/O to handle\000config=5\000\00000\000\000\000\000\000 */ +{ 262 }, /* page-faults\000software\000Number of page faults [This event is an alias of faults]\000config=2\000\00000\000\000\000\000\000 */ +{ 87 }, /* task-clock\000software\000Per-task high-resolution timer based event\000config=1\000\00000\000\000\000\000\000 */ +}; static const struct compact_pmu_event pmu_events__common_tool[] = { -{ 5 }, /* duration_time\000tool\000Wall clock interval time in nanoseconds\000config=1\000\00000\000\000\000\000\000 */ -{ 219 }, /* has_pmem\000tool\0001 if persistent memory installed otherwise 0\000config=4\000\00000\000\000\000\000\000 */ -{ 295 }, /* num_cores\000tool\000Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU\000config=5\000\00000\000\000\000\000\000 */ -{ 440 }, /* num_cpus\000tool\000Number of logical Linux CPUs. There may be multiple such CPUs on a core\000config=6\000\00000\000\000\000\000\000 */ -{ 543 }, /* num_cpus_online\000tool\000Number of online logical Linux CPUs. There may be multiple such CPUs on a core\000config=7\000\00000\000\000\000\000\000 */ -{ 660 }, /* num_dies\000tool\000Number of dies. Each die has 1 or more cores\000config=8\000\00000\000\000\000\000\000 */ -{ 736 }, /* num_packages\000tool\000Number of packages. Each package has 1 or more die\000config=9\000\00000\000\000\000\000\000 */ -{ 822 }, /* slots\000tool\000Number of functional units that in parallel can execute parts of an instruction\000config=0xa\000\00000\000\000\000\000\000 */ -{ 932 }, /* smt_on\000tool\0001 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0\000config=0xb\000\00000\000\000\000\000\000 */ -{ 151 }, /* system_time\000tool\000System/kernel time in nanoseconds\000config=3\000\00000\000\000\000\000\000 */ -{ 1039 }, /* system_tsc_freq\000tool\000The amount a Time Stamp Counter (TSC) increases per second\000config=0xc\000\00000\000\000\000\000\000 */ -{ 81 }, /* user_time\000tool\000User (non-kernel) time in nanoseconds\000config=2\000\00000\000\000\000\000\000 */ +{ 1544 }, /* duration_time\000tool\000Wall clock interval time in nanoseconds\000config=1\000\00000\000\000\000\000\000 */ +{ 1758 }, /* has_pmem\000tool\0001 if persistent memory installed otherwise 0\000config=4\000\00000\000\000\000\000\000 */ +{ 1834 }, /* num_cores\000tool\000Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU\000config=5\000\00000\000\000\000\000\000 */ +{ 1979 }, /* num_cpus\000tool\000Number of logical Linux CPUs. There may be multiple such CPUs on a core\000config=6\000\00000\000\000\000\000\000 */ +{ 2082 }, /* num_cpus_online\000tool\000Number of online logical Linux CPUs. There may be multiple such CPUs on a core\000config=7\000\00000\000\000\000\000\000 */ +{ 2199 }, /* num_dies\000tool\000Number of dies. Each die has 1 or more cores\000config=8\000\00000\000\000\000\000\000 */ +{ 2275 }, /* num_packages\000tool\000Number of packages. Each package has 1 or more die\000config=9\000\00000\000\000\000\000\000 */ +{ 2361 }, /* slots\000tool\000Number of functional units that in parallel can execute parts of an instruction\000config=0xa\000\00000\000\000\000\000\000 */ +{ 2471 }, /* smt_on\000tool\0001 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0\000config=0xb\000\00000\000\000\000\000\000 */ +{ 1690 }, /* system_time\000tool\000System/kernel time in nanoseconds\000config=3\000\00000\000\000\000\000\000 */ +{ 2578 }, /* system_tsc_freq\000tool\000The amount a Time Stamp Counter (TSC) increases per second\000config=0xc\000\00000\000\000\000\000\000 */ +{ 1620 }, /* user_time\000tool\000User (non-kernel) time in nanoseconds\000config=2\000\00000\000\000\000\000\000 */ }; const struct pmu_table_entry pmu_events__common[] = { +{ + .entries = pmu_events__common_software, + .num_entries = ARRAY_SIZE(pmu_events__common_software), + .pmu_name = { 0 /* software\000 */ }, +}, { .entries = pmu_events__common_tool, .num_entries = ARRAY_SIZE(pmu_events__common_tool), - .pmu_name = { 0 /* tool\000 */ }, + .pmu_name = { 1539 /* tool\000 */ }, }, }; static const struct compact_pmu_event pmu_events__test_soc_cpu_default_core[] = { -{ 1151 }, /* bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000\000\000\000 */ -{ 1213 }, /* bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000\000\000\000 */ -{ 1475 }, /* dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000\000\000\000 */ -{ 1608 }, /* eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000\000\000\000 */ -{ 1275 }, /* l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000\000\000\000Attributable Level 3 cache access, read\000 */ -{ 1373 }, /* segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000\000\000\000 */ +{ 2690 }, /* bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000\000\000\000 */ +{ 2752 }, /* bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000\000\000\000 */ +{ 3014 }, /* dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000\000\000\000 */ +{ 3147 }, /* eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000\000\000\000 */ +{ 2814 }, /* l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000\000\000\000Attributable Level 3 cache access, read\000 */ +{ 2912 }, /* segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000\000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_hisi_sccl_ddrc[] = { -{ 1741 }, /* uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000\000\000\000\000 */ +{ 3280 }, /* uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000\000\000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_hisi_sccl_l3c[] = { -{ 2103 }, /* uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000\000\000\000\000 */ +{ 3642 }, /* uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000\000\000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_cbox[] = { -{ 1977 }, /* event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000\000\000\000\000 */ -{ 2031 }, /* event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000\000\000\000\000 */ -{ 1823 }, /* unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000\000\000\000\000 */ +{ 3516 }, /* event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000\000\000\000\000 */ +{ 3570 }, /* event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000\000\000\000\000 */ +{ 3362 }, /* unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000\000\000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_imc[] = { -{ 2286 }, /* uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000\000\000\000\000 */ +{ 3825 }, /* uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000\000\000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_imc_free_running[] = { -{ 2195 }, /* uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000\000\000\000\000 */ +{ 3734 }, /* uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000\000\000\000\000 */ }; @@ -129,51 +167,51 @@ const struct pmu_table_entry pmu_events__test_soc_cpu[] = { { .entries = pmu_events__test_soc_cpu_default_core, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_default_core), - .pmu_name = { 1138 /* default_core\000 */ }, + .pmu_name = { 2677 /* default_core\000 */ }, }, { .entries = pmu_events__test_soc_cpu_hisi_sccl_ddrc, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_hisi_sccl_ddrc), - .pmu_name = { 1726 /* hisi_sccl,ddrc\000 */ }, + .pmu_name = { 3265 /* hisi_sccl,ddrc\000 */ }, }, { .entries = pmu_events__test_soc_cpu_hisi_sccl_l3c, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_hisi_sccl_l3c), - .pmu_name = { 2089 /* hisi_sccl,l3c\000 */ }, + .pmu_name = { 3628 /* hisi_sccl,l3c\000 */ }, }, { .entries = pmu_events__test_soc_cpu_uncore_cbox, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_cbox), - .pmu_name = { 1811 /* uncore_cbox\000 */ }, + .pmu_name = { 3350 /* uncore_cbox\000 */ }, }, { .entries = pmu_events__test_soc_cpu_uncore_imc, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_imc), - .pmu_name = { 2275 /* uncore_imc\000 */ }, + .pmu_name = { 3814 /* uncore_imc\000 */ }, }, { .entries = pmu_events__test_soc_cpu_uncore_imc_free_running, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_imc_free_running), - .pmu_name = { 2171 /* uncore_imc_free_running\000 */ }, + .pmu_name = { 3710 /* uncore_imc_free_running\000 */ }, }, }; static const struct compact_pmu_event pmu_metrics__test_soc_cpu_default_core[] = { -{ 2704 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000 */ -{ 3385 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000 */ -{ 3157 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000 */ -{ 3251 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000 */ -{ 3449 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ -{ 3517 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ -{ 2789 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000 */ -{ 2726 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000 */ -{ 3651 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000 */ -{ 3587 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\00000 */ -{ 3609 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\00000 */ -{ 3631 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\00000 */ -{ 3086 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000 */ -{ 2955 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ -{ 3019 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ +{ 4243 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000 */ +{ 4924 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000 */ +{ 4696 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000 */ +{ 4790 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000 */ +{ 4988 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ +{ 5056 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ +{ 4328 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000 */ +{ 4265 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000 */ +{ 5190 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000 */ +{ 5126 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\00000 */ +{ 5148 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\00000 */ +{ 5170 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\00000 */ +{ 4625 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000 */ +{ 4494 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ +{ 4558 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ }; @@ -181,18 +219,18 @@ const struct pmu_table_entry pmu_metrics__test_soc_cpu[] = { { .entries = pmu_metrics__test_soc_cpu_default_core, .num_entries = ARRAY_SIZE(pmu_metrics__test_soc_cpu_default_core), - .pmu_name = { 1138 /* default_core\000 */ }, + .pmu_name = { 2677 /* default_core\000 */ }, }, }; static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_ccn_pmu[] = { -{ 2465 }, /* sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000\000\000\000 */ +{ 4004 }, /* sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000\000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_cmn_pmu[] = { -{ 2561 }, /* sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000\000\000\000 */ +{ 4100 }, /* sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000\000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_ddr_pmu[] = { -{ 2370 }, /* sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000\000\000\000 */ +{ 3909 }, /* sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000\000\000\000 */ }; @@ -200,17 +238,17 @@ const struct pmu_table_entry pmu_events__test_soc_sys[] = { { .entries = pmu_events__test_soc_sys_uncore_sys_ccn_pmu, .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_ccn_pmu), - .pmu_name = { 2446 /* uncore_sys_ccn_pmu\000 */ }, + .pmu_name = { 3985 /* uncore_sys_ccn_pmu\000 */ }, }, { .entries = pmu_events__test_soc_sys_uncore_sys_cmn_pmu, .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_cmn_pmu), - .pmu_name = { 2542 /* uncore_sys_cmn_pmu\000 */ }, + .pmu_name = { 4081 /* uncore_sys_cmn_pmu\000 */ }, }, { .entries = pmu_events__test_soc_sys_uncore_sys_ddr_pmu, .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_ddr_pmu), - .pmu_name = { 2351 /* uncore_sys_ddr_pmu\000 */ }, + .pmu_name = { 3890 /* uncore_sys_ddr_pmu\000 */ }, }, }; @@ -632,8 +670,20 @@ static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) { struct perf_cpu cpu = {-1}; - if (pmu) + if (pmu) { + for (size_t i = 0; i < ARRAY_SIZE(pmu_events__common); i++) { + const char *pmu_name = &big_c_string[pmu_events__common[i].pmu_name.offset]; + + if (!strcmp(pmu_name, pmu->name)) { + const struct pmu_events_map *map = &pmu_events_map[0]; + + while (strcmp("common", map->arch)) + map++; + return map; + } + } cpu = perf_cpu_map__min(pmu->cpus); + } return map_for_cpu(cpu); } diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 0abd3cfb15ea..168c044dd7cc 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -296,6 +296,7 @@ class JsonEvent: 'cpu_atom': 'cpu_atom', 'ali_drw': 'ali_drw', 'arm_cmn': 'arm_cmn', + 'software': 'software', 'tool': 'tool', } return table[unit] if unit in table else f'uncore_{unit.lower()}' @@ -1159,8 +1160,20 @@ static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) { struct perf_cpu cpu = {-1}; - if (pmu) + if (pmu) { + for (size_t i = 0; i < ARRAY_SIZE(pmu_events__common); i++) { + const char *pmu_name = &big_c_string[pmu_events__common[i].pmu_name.offset]; + + if (!strcmp(pmu_name, pmu->name)) { + const struct pmu_events_map *map = &pmu_events_map[0]; + + while (strcmp("common", map->arch)) + map++; + return map; + } + } cpu = perf_cpu_map__min(pmu->cpus); + } return map_for_cpu(cpu); } From 6e9fa4131abb0129b1153ba6d194bd294b9f9986 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 25 Jul 2025 11:51:49 -0700 Subject: [PATCH 1132/2411] perf parse-events: Remove non-json software events Remove the hard coded encodings from parse-events. This has the consequence that software events are matched using the sysfs/json priority, will be case insensitive and will be wildcarded across PMUs. As there were software and hardware types in the parsing code, the removal means software vs hardware logic can be removed and hardware assumed. Now the perf json provides detailed descriptions of software events, remove the previous listing support that didn't contain event descriptions. When globbing is required for the "sw" option in perf list, use string PMU globbing as was done previously for the tool PMU. The output of `perf list sw` command changed like this. Before: List of pre-defined events (to be used in -e or -M): alignment-faults [Software event] bpf-output [Software event] cgroup-switches [Software event] context-switches OR cs [Software event] cpu-clock [Software event] cpu-migrations OR migrations [Software event] dummy [Software event] emulation-faults [Software event] major-faults [Software event] minor-faults [Software event] page-faults OR faults [Software event] task-clock [Software event] After: List of pre-defined events (to be used in -e or -M): software: alignment-faults [Number of kernel handled memory alignment faults. Unit: software] bpf-output [An event used by BPF programs to write to the perf ring buffer. Unit: software] cgroup-switches [Number of context switches to a task in a different cgroup. Unit: software] context-switches [Number of context switches [This event is an alias of cs]. Unit: software] cpu-clock [Per-CPU high-resolution timer based event. Unit: software] cpu-migrations [Number of times a process has migrated to a new CPU [This event is an alias of migrations]. Unit: software] cs [Number of context switches [This event is an alias of context-switches]. Unit: software] dummy [A placeholder event that doesn't count anything. Unit: software] ... Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20250725185202.68671-4-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-list.c | 19 ++++++------- tools/perf/util/parse-events.c | 51 ---------------------------------- tools/perf/util/parse-events.h | 1 - tools/perf/util/parse-events.l | 38 +++++++++---------------- tools/perf/util/parse-events.y | 29 ++++++++----------- tools/perf/util/print-events.c | 2 -- 6 files changed, 33 insertions(+), 107 deletions(-) diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index e9b595d75df2..674bb0afbf93 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -623,16 +623,17 @@ int cmd_list(int argc, const char **argv) else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0) { char *old_pmu_glob = default_ps.pmu_glob; + static const char * const sw_globs[] = { "software", "tool" }; - print_symbol_events(&print_cb, ps, PERF_TYPE_SOFTWARE, - event_symbols_sw, PERF_COUNT_SW_MAX); - default_ps.pmu_glob = strdup("tool"); - if (!default_ps.pmu_glob) { - ret = -1; - goto out; + for (size_t j = 0; j < ARRAY_SIZE(sw_globs); j++) { + default_ps.pmu_glob = strdup(sw_globs[j]); + if (!default_ps.pmu_glob) { + ret = -1; + goto out; + } + perf_pmus__print_pmu_events(&print_cb, ps); + zfree(&default_ps.pmu_glob); } - perf_pmus__print_pmu_events(&print_cb, ps); - zfree(&default_ps.pmu_glob); default_ps.pmu_glob = old_pmu_glob; } else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) @@ -679,8 +680,6 @@ int cmd_list(int argc, const char **argv) default_ps.event_glob = s; print_symbol_events(&print_cb, ps, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX); - print_symbol_events(&print_cb, ps, PERF_TYPE_SOFTWARE, - event_symbols_sw, PERF_COUNT_SW_MAX); print_hwcache_events(&print_cb, ps); perf_pmus__print_pmu_events(&print_cb, ps); print_tracepoint_events(&print_cb, ps); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 01fa8c80998b..74e0822ad82d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -84,57 +84,6 @@ const struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { }, }; -const struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { - [PERF_COUNT_SW_CPU_CLOCK] = { - .symbol = "cpu-clock", - .alias = "", - }, - [PERF_COUNT_SW_TASK_CLOCK] = { - .symbol = "task-clock", - .alias = "", - }, - [PERF_COUNT_SW_PAGE_FAULTS] = { - .symbol = "page-faults", - .alias = "faults", - }, - [PERF_COUNT_SW_CONTEXT_SWITCHES] = { - .symbol = "context-switches", - .alias = "cs", - }, - [PERF_COUNT_SW_CPU_MIGRATIONS] = { - .symbol = "cpu-migrations", - .alias = "migrations", - }, - [PERF_COUNT_SW_PAGE_FAULTS_MIN] = { - .symbol = "minor-faults", - .alias = "", - }, - [PERF_COUNT_SW_PAGE_FAULTS_MAJ] = { - .symbol = "major-faults", - .alias = "", - }, - [PERF_COUNT_SW_ALIGNMENT_FAULTS] = { - .symbol = "alignment-faults", - .alias = "", - }, - [PERF_COUNT_SW_EMULATION_FAULTS] = { - .symbol = "emulation-faults", - .alias = "", - }, - [PERF_COUNT_SW_DUMMY] = { - .symbol = "dummy", - .alias = "", - }, - [PERF_COUNT_SW_BPF_OUTPUT] = { - .symbol = "bpf-output", - .alias = "", - }, - [PERF_COUNT_SW_CGROUP_SWITCHES] = { - .symbol = "cgroup-switches", - .alias = "", - }, -}; - static const char *const event_types[] = { [PERF_TYPE_HARDWARE] = "hardware", [PERF_TYPE_SOFTWARE] = "software", diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index b47bf2810112..62dc7202e3ba 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -264,7 +264,6 @@ struct event_symbol { const char *alias; }; extern const struct event_symbol event_symbols_hw[]; -extern const struct event_symbol event_symbols_sw[]; char *parse_events_formats_error_string(char *additional_terms); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 4af7b9c1f44d..2034590eb789 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -117,12 +117,12 @@ do { \ yyless(0); \ } while (0) -static int sym(yyscan_t scanner, int type, int config) +static int sym(yyscan_t scanner, int config) { YYSTYPE *yylval = parse_events_get_lval(scanner); - yylval->num = (type << 16) + config; - return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW; + yylval->num = config; + return PE_VALUE_SYM_HW; } static int term(yyscan_t scanner, enum parse_events__term_type type) @@ -391,28 +391,16 @@ r0x{num_raw_hex} { return str(yyscanner, PE_RAW); } <> { BEGIN(INITIAL); } } -cpu-cycles|cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); } -stalled-cycles-frontend|idle-cycles-frontend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } -stalled-cycles-backend|idle-cycles-backend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } -instructions { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); } -cache-references { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); } -cache-misses { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); } -branch-instructions|branches { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); } -branch-misses { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); } -bus-cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES); } -ref-cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES); } -cpu-clock { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); } -task-clock { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); } -page-faults|faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); } -minor-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN); } -major-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ); } -context-switches|cs { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); } -cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); } -alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } -emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } -dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } -bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } -cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); } +cpu-cycles|cycles { return sym(yyscanner, PERF_COUNT_HW_CPU_CYCLES); } +stalled-cycles-frontend|idle-cycles-frontend { return sym(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } +stalled-cycles-backend|idle-cycles-backend { return sym(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } +instructions { return sym(yyscanner, PERF_COUNT_HW_INSTRUCTIONS); } +cache-references { return sym(yyscanner, PERF_COUNT_HW_CACHE_REFERENCES); } +cache-misses { return sym(yyscanner, PERF_COUNT_HW_CACHE_MISSES); } +branch-instructions|branches { return sym(yyscanner, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); } +branch-misses { return sym(yyscanner, PERF_COUNT_HW_BRANCH_MISSES); } +bus-cycles { return sym(yyscanner, PERF_COUNT_HW_BUS_CYCLES); } +ref-cycles { return sym(yyscanner, PERF_COUNT_HW_REF_CPU_CYCLES); } {lc_type} { return str(yyscanner, PE_LEGACY_CACHE); } {lc_type}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index f888cbb076d6..a2361c0040d7 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -55,7 +55,7 @@ static void free_list_evsel(struct list_head* list_evsel) %} %token PE_START_EVENTS PE_START_TERMS -%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_TERM +%token PE_VALUE PE_VALUE_SYM_HW PE_TERM %token PE_EVENT_NAME %token PE_RAW PE_NAME %token PE_MODIFIER_EVENT PE_MODIFIER_BP PE_BP_COLON PE_BP_SLASH @@ -66,10 +66,8 @@ static void free_list_evsel(struct list_head* list_evsel) %token PE_TERM_HW %type PE_VALUE %type PE_VALUE_SYM_HW -%type PE_VALUE_SYM_SW %type PE_MODIFIER_EVENT %type PE_TERM -%type value_sym %type PE_RAW %type PE_NAME %type PE_LEGACY_CACHE @@ -306,24 +304,19 @@ PE_NAME sep_dc $$ = list; } -value_sym: -PE_VALUE_SYM_HW -| -PE_VALUE_SYM_SW - event_legacy_symbol: -value_sym '/' event_config '/' +PE_VALUE_SYM_HW '/' event_config '/' { struct list_head *list; - int type = $1 >> 16; - int config = $1 & 255; int err; - bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE); list = alloc_list(); if (!list) YYNOMEM; - err = parse_events_add_numeric(_parse_state, list, type, config, $3, wildcard); + err = parse_events_add_numeric(_parse_state, list, + PERF_TYPE_HARDWARE, $1, + $3, + /*wildcard=*/true); parse_events_terms__delete($3); if (err) { free_list_evsel(list); @@ -332,18 +325,18 @@ value_sym '/' event_config '/' $$ = list; } | -value_sym sep_slash_slash_dc +PE_VALUE_SYM_HW sep_slash_slash_dc { struct list_head *list; - int type = $1 >> 16; - int config = $1 & 255; - bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE); int err; list = alloc_list(); if (!list) YYNOMEM; - err = parse_events_add_numeric(_parse_state, list, type, config, /*head_config=*/NULL, wildcard); + err = parse_events_add_numeric(_parse_state, list, + PERF_TYPE_HARDWARE, $1, + /*head_config=*/NULL, + /*wildcard=*/true); if (err) PE_ABORT(err); $$ = list; diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index e233bacaa641..c1a8708b55ab 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -521,8 +521,6 @@ void print_events(const struct print_callbacks *print_cb, void *print_state) { print_symbol_events(print_cb, print_state, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX); - print_symbol_events(print_cb, print_state, PERF_TYPE_SOFTWARE, - event_symbols_sw, PERF_COUNT_SW_MAX); print_hwcache_events(print_cb, print_state); From d002aab87de84b26c6f0a2b9549a589105d00d35 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 25 Jul 2025 11:51:50 -0700 Subject: [PATCH 1133/2411] perf tp_pmu: Factor existing tracepoint logic to new file Start the creation of a tracepoint PMU abstraction. Tracepoint events don't follow the regular sysfs perf conventions. Eventually the new PMU abstraction will bridge the gap so tracepoint events look more like regular perf ones. Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20250725185202.68671-5-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/Build | 1 + tools/perf/util/evsel.c | 21 +---- tools/perf/util/parse-events.c | 151 ++++++++++++++------------------- tools/perf/util/tp_pmu.c | 95 +++++++++++++++++++++ tools/perf/util/tp_pmu.h | 12 +++ 5 files changed, 172 insertions(+), 108 deletions(-) create mode 100644 tools/perf/util/tp_pmu.c create mode 100644 tools/perf/util/tp_pmu.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 12bc01c843b2..4959e7a990e4 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -88,6 +88,7 @@ perf-util-y += pmu-bison.o perf-util-y += drm_pmu.o perf-util-y += hwmon_pmu.o perf-util-y += tool_pmu.o +perf-util-y += tp_pmu.o perf-util-y += svghelper.o perf-util-y += trace-event-info.o perf-util-y += trace-event-scripting.o diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 3d27e9bdd66b..d264c143b592 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -60,6 +60,7 @@ #include "drm_pmu.h" #include "hwmon_pmu.h" #include "tool_pmu.h" +#include "tp_pmu.h" #include "rlimit.h" #include "../perf-sys.h" #include "util/parse-branch-options.h" @@ -572,24 +573,6 @@ struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig) return NULL; } -static int trace_event__id(const char *sys, const char *name) -{ - char *tp_dir = get_events_file(sys); - char path[PATH_MAX]; - int id, err; - - if (!tp_dir) - return -1; - - scnprintf(path, PATH_MAX, "%s/%s/id", tp_dir, name); - put_events_file(tp_dir); - err = filename__read_int(path, &id); - if (err) - return err; - - return id; -} - /* * Returns pointer with encoded error via interface. */ @@ -623,7 +606,7 @@ struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx, bool event_attr_init(&attr); if (format) { - id = trace_event__id(sys, name); + id = tp_pmu__id(sys, name); if (id < 0) { err = id; goto out_free; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 74e0822ad82d..8282ddf68b98 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -17,13 +17,12 @@ #include "string2.h" #include "strbuf.h" #include "debug.h" -#include -#include #include #include #include #include "pmu.h" #include "pmus.h" +#include "tp_pmu.h" #include "asm/bug.h" #include "ui/ui.h" #include "util/parse-branch-options.h" @@ -33,6 +32,7 @@ #include "util/stat.h" #include "util/util.h" #include "tracepoint.h" +#include #define MAX_NAME_LEN 100 @@ -599,105 +599,82 @@ static int add_tracepoint(struct parse_events_state *parse_state, return 0; } -static int add_tracepoint_multi_event(struct parse_events_state *parse_state, - struct list_head *list, - const char *sys_name, const char *evt_name, - struct parse_events_error *err, - struct parse_events_terms *head_config, YYLTYPE *loc) +struct add_tracepoint_multi_args { + struct parse_events_state *parse_state; + struct list_head *list; + const char *sys_glob; + const char *evt_glob; + struct parse_events_error *err; + struct parse_events_terms *head_config; + YYLTYPE *loc; + int found; +}; + +static int add_tracepoint_multi_event_cb(void *state, const char *sys_name, const char *evt_name) { - char *evt_path; - struct io_dirent64 *evt_ent; - struct io_dir evt_dir; - int ret = 0, found = 0; + struct add_tracepoint_multi_args *args = state; + int ret; - evt_path = get_events_file(sys_name); - if (!evt_path) { - tracepoint_error(err, errno, sys_name, evt_name, loc->first_column); - return -1; - } - io_dir__init(&evt_dir, open(evt_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); - if (evt_dir.dirfd < 0) { - put_events_file(evt_path); - tracepoint_error(err, errno, sys_name, evt_name, loc->first_column); - return -1; - } + if (!strglobmatch(evt_name, args->evt_glob)) + return 0; - while (!ret && (evt_ent = io_dir__readdir(&evt_dir))) { - if (!strcmp(evt_ent->d_name, ".") - || !strcmp(evt_ent->d_name, "..") - || !strcmp(evt_ent->d_name, "enable") - || !strcmp(evt_ent->d_name, "filter")) - continue; + args->found++; + ret = add_tracepoint(args->parse_state, args->list, sys_name, evt_name, + args->err, args->head_config, args->loc); - if (!strglobmatch(evt_ent->d_name, evt_name)) - continue; - - found++; - - ret = add_tracepoint(parse_state, list, sys_name, evt_ent->d_name, - err, head_config, loc); - } - - if (!found) { - tracepoint_error(err, ENOENT, sys_name, evt_name, loc->first_column); - ret = -1; - } - - put_events_file(evt_path); - close(evt_dir.dirfd); return ret; } -static int add_tracepoint_event(struct parse_events_state *parse_state, - struct list_head *list, - const char *sys_name, const char *evt_name, - struct parse_events_error *err, - struct parse_events_terms *head_config, YYLTYPE *loc) +static int add_tracepoint_multi_event(struct add_tracepoint_multi_args *args, const char *sys_name) { - return strpbrk(evt_name, "*?") ? - add_tracepoint_multi_event(parse_state, list, sys_name, evt_name, - err, head_config, loc) : - add_tracepoint(parse_state, list, sys_name, evt_name, - err, head_config, loc); + if (strpbrk(args->evt_glob, "*?") == NULL) { + /* Not a glob. */ + args->found++; + return add_tracepoint(args->parse_state, args->list, sys_name, args->evt_glob, + args->err, args->head_config, args->loc); + } + + return tp_pmu__for_each_tp_event(sys_name, args, add_tracepoint_multi_event_cb); +} + +static int add_tracepoint_multi_sys_cb(void *state, const char *sys_name) +{ + struct add_tracepoint_multi_args *args = state; + + if (!strglobmatch(sys_name, args->sys_glob)) + return 0; + + return add_tracepoint_multi_event(args, sys_name); } static int add_tracepoint_multi_sys(struct parse_events_state *parse_state, struct list_head *list, - const char *sys_name, const char *evt_name, + const char *sys_glob, const char *evt_glob, struct parse_events_error *err, struct parse_events_terms *head_config, YYLTYPE *loc) { - struct io_dirent64 *events_ent; - struct io_dir events_dir; - int ret = 0; - char *events_dir_path = get_tracing_file("events"); + struct add_tracepoint_multi_args args = { + .parse_state = parse_state, + .list = list, + .sys_glob = sys_glob, + .evt_glob = evt_glob, + .err = err, + .head_config = head_config, + .loc = loc, + .found = 0, + }; + int ret; - if (!events_dir_path) { - tracepoint_error(err, errno, sys_name, evt_name, loc->first_column); - return -1; + if (strpbrk(sys_glob, "*?") == NULL) { + /* Not a glob. */ + ret = add_tracepoint_multi_event(&args, sys_glob); + } else { + ret = tp_pmu__for_each_tp_sys(&args, add_tracepoint_multi_sys_cb); } - io_dir__init(&events_dir, open(events_dir_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); - put_events_file(events_dir_path); - if (events_dir.dirfd < 0) { - tracepoint_error(err, errno, sys_name, evt_name, loc->first_column); - return -1; + if (args.found == 0) { + tracepoint_error(err, ENOENT, sys_glob, evt_glob, loc->first_column); + return -ENOENT; } - - while (!ret && (events_ent = io_dir__readdir(&events_dir))) { - if (!strcmp(events_ent->d_name, ".") - || !strcmp(events_ent->d_name, "..") - || !strcmp(events_ent->d_name, "enable") - || !strcmp(events_ent->d_name, "header_event") - || !strcmp(events_ent->d_name, "header_page")) - continue; - - if (!strglobmatch(events_ent->d_name, sys_name)) - continue; - - ret = add_tracepoint_event(parse_state, list, events_ent->d_name, - evt_name, err, head_config, loc); - } - close(events_dir.dirfd); return ret; } @@ -1406,12 +1383,8 @@ int parse_events_add_tracepoint(struct parse_events_state *parse_state, return -EINVAL; } - if (strpbrk(sys, "*?")) - return add_tracepoint_multi_sys(parse_state, list, sys, event, - err, head_config, loc); - else - return add_tracepoint_event(parse_state, list, sys, event, - err, head_config, loc); + return add_tracepoint_multi_sys(parse_state, list, sys, event, + err, head_config, loc); } static int __parse_events_add_numeric(struct parse_events_state *parse_state, diff --git a/tools/perf/util/tp_pmu.c b/tools/perf/util/tp_pmu.c new file mode 100644 index 000000000000..42bd967a4530 --- /dev/null +++ b/tools/perf/util/tp_pmu.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#include "tp_pmu.h" +#include +#include +#include +#include +#include +#include + +int tp_pmu__id(const char *sys, const char *name) +{ + char *tp_dir = get_events_file(sys); + char path[PATH_MAX]; + int id, err; + + if (!tp_dir) + return -1; + + scnprintf(path, PATH_MAX, "%s/%s/id", tp_dir, name); + put_events_file(tp_dir); + err = filename__read_int(path, &id); + if (err) + return err; + + return id; +} + + +int tp_pmu__for_each_tp_event(const char *sys, void *state, tp_event_callback cb) +{ + char *evt_path; + struct io_dirent64 *evt_ent; + struct io_dir evt_dir; + int ret = 0; + + evt_path = get_events_file(sys); + if (!evt_path) + return -errno; + + io_dir__init(&evt_dir, open(evt_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (evt_dir.dirfd < 0) { + ret = -errno; + put_events_file(evt_path); + return ret; + } + put_events_file(evt_path); + + while (!ret && (evt_ent = io_dir__readdir(&evt_dir))) { + if (!strcmp(evt_ent->d_name, ".") + || !strcmp(evt_ent->d_name, "..") + || !strcmp(evt_ent->d_name, "enable") + || !strcmp(evt_ent->d_name, "filter")) + continue; + + ret = cb(state, sys, evt_ent->d_name); + if (ret) + break; + } + close(evt_dir.dirfd); + return ret; +} + +int tp_pmu__for_each_tp_sys(void *state, tp_sys_callback cb) +{ + struct io_dirent64 *events_ent; + struct io_dir events_dir; + int ret = 0; + char *events_dir_path = get_tracing_file("events"); + + if (!events_dir_path) + return -errno; + + io_dir__init(&events_dir, open(events_dir_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (events_dir.dirfd < 0) { + ret = -errno; + put_events_file(events_dir_path); + return ret; + } + put_events_file(events_dir_path); + + while (!ret && (events_ent = io_dir__readdir(&events_dir))) { + if (!strcmp(events_ent->d_name, ".") || + !strcmp(events_ent->d_name, "..") || + !strcmp(events_ent->d_name, "enable") || + !strcmp(events_ent->d_name, "header_event") || + !strcmp(events_ent->d_name, "header_page")) + continue; + + ret = cb(state, events_ent->d_name); + if (ret) + break; + } + close(events_dir.dirfd); + return ret; +} diff --git a/tools/perf/util/tp_pmu.h b/tools/perf/util/tp_pmu.h new file mode 100644 index 000000000000..49537303bd73 --- /dev/null +++ b/tools/perf/util/tp_pmu.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __TP_PMU_H +#define __TP_PMU_H + +typedef int (*tp_sys_callback)(void *state, const char *sys_name); +typedef int (*tp_event_callback)(void *state, const char *sys_name, const char *evt_name); + +int tp_pmu__id(const char *sys, const char *name); +int tp_pmu__for_each_tp_event(const char *sys, void *state, tp_event_callback cb); +int tp_pmu__for_each_tp_sys(void *state, tp_sys_callback cb); + +#endif /* __TP_PMU_H */ From 45b6e281cb0648acd04f896375de69481d29daa7 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 25 Jul 2025 11:51:51 -0700 Subject: [PATCH 1134/2411] perf tp_pmu: Add event APIs Add event APIs for the tracepoint PMU allowing things like perf list to function using it. For perf list add the tracepoint format in the long description (shown with -v). $ sudo perf list -v tracepoint List of pre-defined events (to be used in -e or -M): alarmtimer:alarmtimer_cancel [Tracepoint event] [name: alarmtimer_cancel ID: 416 format: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1; signed:0; field:int common_pid; offset:4; size:4; signed:1; field:void * alarm; offset:8; size:8; signed:0; field:unsigned char alarm_type; offset:16; size:1; signed:0; field:s64 expires; offset:24; size:8; signed:1; field:s64 now; offset:32; size:8; signed:1; print fmt: "alarmtimer:%p type:%s expires:%llu now:%llu",REC->alarm,__print_flags((1 << REC->alarm_type)," | ",{ 1 << 0, "REALTIME" },{ 1 << 1,"BOOTTIME" },{ 1 << 3,"REALTIME Freezer" },{ 1 << 4,"BOOTTIME Freezer" }),REC->expires,REC->now . Unit: tracepoint] alarmtimer:alarmtimer_fired [Tracepoint event] [name: alarmtimer_fired ID: 418 ... Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20250725185202.68671-6-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/pmu.c | 7 +++ tools/perf/util/tp_pmu.c | 115 +++++++++++++++++++++++++++++++++++++++ tools/perf/util/tp_pmu.h | 7 +++ 3 files changed, 129 insertions(+) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index f3da6e27bfcb..5a291f1380ed 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -24,6 +24,7 @@ #include "hwmon_pmu.h" #include "pmus.h" #include "tool_pmu.h" +#include "tp_pmu.h" #include #include #include "parse-events.h" @@ -1983,6 +1984,8 @@ bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name) return false; if (perf_pmu__is_tool(pmu) && tool_pmu__skip_event(name)) return false; + if (perf_pmu__is_tracepoint(pmu)) + return tp_pmu__have_event(pmu, name); if (perf_pmu__is_hwmon(pmu)) return hwmon_pmu__have_event(pmu, name); if (perf_pmu__is_drm(pmu)) @@ -1998,6 +2001,8 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu) { size_t nr; + if (perf_pmu__is_tracepoint(pmu)) + return tp_pmu__num_events(pmu); if (perf_pmu__is_hwmon(pmu)) return hwmon_pmu__num_events(pmu); if (perf_pmu__is_drm(pmu)) @@ -2068,6 +2073,8 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, struct hashmap_entry *entry; size_t bkt; + if (perf_pmu__is_tracepoint(pmu)) + return tp_pmu__for_each_event(pmu, state, cb); if (perf_pmu__is_hwmon(pmu)) return hwmon_pmu__for_each_event(pmu, state, cb); if (perf_pmu__is_drm(pmu)) diff --git a/tools/perf/util/tp_pmu.c b/tools/perf/util/tp_pmu.c index 42bd967a4530..e7534a973247 100644 --- a/tools/perf/util/tp_pmu.c +++ b/tools/perf/util/tp_pmu.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) #include "tp_pmu.h" +#include "pmus.h" #include #include #include @@ -93,3 +94,117 @@ int tp_pmu__for_each_tp_sys(void *state, tp_sys_callback cb) close(events_dir.dirfd); return ret; } + +bool perf_pmu__is_tracepoint(const struct perf_pmu *pmu) +{ + return pmu->type == PERF_TYPE_TRACEPOINT; +} + +struct for_each_event_args { + void *state; + pmu_event_callback cb; + const struct perf_pmu *pmu; +}; + +static int for_each_event_cb(void *state, const char *sys_name, const char *evt_name) +{ + struct for_each_event_args *args = state; + char name[2 * FILENAME_MAX + 2]; + /* 16 possible hex digits and 22 other characters and \0. */ + char encoding[16 + 22]; + char *format = NULL; + size_t format_size; + struct pmu_event_info info = { + .pmu = args->pmu, + .pmu_name = args->pmu->name, + .event_type_desc = "Tracepoint event", + }; + char *tp_dir = get_events_file(sys_name); + char path[PATH_MAX]; + int id, err; + + if (!tp_dir) + return -1; + + scnprintf(path, sizeof(path), "%s/%s/id", tp_dir, evt_name); + err = filename__read_int(path, &id); + if (err == 0) { + snprintf(encoding, sizeof(encoding), "tracepoint/config=0x%x/", id); + info.encoding_desc = encoding; + } + + scnprintf(path, sizeof(path), "%s/%s/format", tp_dir, evt_name); + put_events_file(tp_dir); + err = filename__read_str(path, &format, &format_size); + if (err == 0) { + info.long_desc = format; + for (size_t i = 0 ; i < format_size; i++) { + /* Swap tabs to spaces due to some rendering issues. */ + if (format[i] == '\t') + format[i] = ' '; + } + } + snprintf(name, sizeof(name), "%s:%s", sys_name, evt_name); + info.name = name; + err = args->cb(args->state, &info); + free(format); + return err; +} + +static int for_each_event_sys_cb(void *state, const char *sys_name) +{ + return tp_pmu__for_each_tp_event(sys_name, state, for_each_event_cb); +} + +int tp_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb) +{ + struct for_each_event_args args = { + .state = state, + .cb = cb, + .pmu = pmu, + }; + + return tp_pmu__for_each_tp_sys(&args, for_each_event_sys_cb); +} + +static int num_events_cb(void *state, const char *sys_name __maybe_unused, + const char *evt_name __maybe_unused) +{ + size_t *count = state; + + (*count)++; + return 0; +} + +static int num_events_sys_cb(void *state, const char *sys_name) +{ + return tp_pmu__for_each_tp_event(sys_name, state, num_events_cb); +} + +size_t tp_pmu__num_events(struct perf_pmu *pmu __maybe_unused) +{ + size_t count = 0; + + tp_pmu__for_each_tp_sys(&count, num_events_sys_cb); + return count; +} + +bool tp_pmu__have_event(struct perf_pmu *pmu __maybe_unused, const char *name) +{ + char *dup_name, *colon; + int id; + + colon = strchr(name, ':'); + if (colon == NULL) + return false; + + dup_name = strdup(name); + if (!dup_name) + return false; + + colon = dup_name + (colon - name); + *colon = '\0'; + id = tp_pmu__id(dup_name, colon + 1); + free(dup_name); + return id >= 0; +} diff --git a/tools/perf/util/tp_pmu.h b/tools/perf/util/tp_pmu.h index 49537303bd73..30456bd6943d 100644 --- a/tools/perf/util/tp_pmu.h +++ b/tools/perf/util/tp_pmu.h @@ -2,6 +2,8 @@ #ifndef __TP_PMU_H #define __TP_PMU_H +#include "pmu.h" + typedef int (*tp_sys_callback)(void *state, const char *sys_name); typedef int (*tp_event_callback)(void *state, const char *sys_name, const char *evt_name); @@ -9,4 +11,9 @@ int tp_pmu__id(const char *sys, const char *name); int tp_pmu__for_each_tp_event(const char *sys, void *state, tp_event_callback cb); int tp_pmu__for_each_tp_sys(void *state, tp_sys_callback cb); +bool perf_pmu__is_tracepoint(const struct perf_pmu *pmu); +int tp_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb); +size_t tp_pmu__num_events(struct perf_pmu *pmu); +bool tp_pmu__have_event(struct perf_pmu *pmu, const char *name); + #endif /* __TP_PMU_H */ From 55c09681cc67d175bd62b787c8b6eeafbe1b5851 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 25 Jul 2025 11:51:52 -0700 Subject: [PATCH 1135/2411] perf list: Remove tracepoint printing code Now that the tp_pmu can iterate and describe events remove the custom tracepoint printing logic, this avoids perf list showing the tracepoint events twice. Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20250725185202.68671-7-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-list.c | 29 ++++++++--- tools/perf/util/print-events.c | 93 ---------------------------------- tools/perf/util/print-events.h | 2 - 3 files changed, 23 insertions(+), 101 deletions(-) diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 674bb0afbf93..3a4061d02f6c 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -614,9 +614,18 @@ int cmd_list(int argc, const char **argv) for (i = 0; i < argc; ++i) { char *sep, *s; - if (strcmp(argv[i], "tracepoint") == 0) - print_tracepoint_events(&print_cb, ps); - else if (strcmp(argv[i], "hw") == 0 || + if (strcmp(argv[i], "tracepoint") == 0) { + char *old_pmu_glob = default_ps.pmu_glob; + + default_ps.pmu_glob = strdup("tracepoint"); + if (!default_ps.pmu_glob) { + ret = -1; + goto out; + } + perf_pmus__print_pmu_events(&print_cb, ps); + zfree(&default_ps.pmu_glob); + default_ps.pmu_glob = old_pmu_glob; + } else if (strcmp(argv[i], "hw") == 0 || strcmp(argv[i], "hardware") == 0) print_symbol_events(&print_cb, ps, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX); @@ -658,6 +667,7 @@ int cmd_list(int argc, const char **argv) #endif else if ((sep = strchr(argv[i], ':')) != NULL) { char *old_pmu_glob = default_ps.pmu_glob; + char *old_event_glob = default_ps.event_glob; default_ps.event_glob = strdup(argv[i]); if (!default_ps.event_glob) { @@ -665,13 +675,21 @@ int cmd_list(int argc, const char **argv) goto out; } - print_tracepoint_events(&print_cb, ps); + default_ps.pmu_glob = strdup("tracepoint"); + if (!default_ps.pmu_glob) { + zfree(&default_ps.event_glob); + ret = -1; + goto out; + } + perf_pmus__print_pmu_events(&print_cb, ps); + zfree(&default_ps.pmu_glob); + default_ps.pmu_glob = old_pmu_glob; print_sdt_events(&print_cb, ps); default_ps.metrics = true; default_ps.metricgroups = true; metricgroup__print(&print_cb, ps); zfree(&default_ps.event_glob); - default_ps.pmu_glob = old_pmu_glob; + default_ps.event_glob = old_event_glob; } else { if (asprintf(&s, "*%s*", argv[i]) < 0) { printf("Critical: Not enough memory! Trying to continue...\n"); @@ -682,7 +700,6 @@ int cmd_list(int argc, const char **argv) event_symbols_hw, PERF_COUNT_HW_MAX); print_hwcache_events(&print_cb, ps); perf_pmus__print_pmu_events(&print_cb, ps); - print_tracepoint_events(&print_cb, ps); print_sdt_events(&print_cb, ps); default_ps.metrics = true; default_ps.metricgroups = true; diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index c1a8708b55ab..3a5e5e7bae13 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -44,97 +44,6 @@ static const char * const event_type_descriptors[] = { "Hardware breakpoint", }; -/* - * Print the events from /tracing/events - */ -void print_tracepoint_events(const struct print_callbacks *print_cb __maybe_unused, void *print_state __maybe_unused) -{ - char *events_path = get_tracing_file("events"); - int events_fd = open(events_path, O_PATH); - struct dirent **sys_namelist = NULL; - int sys_items; - - if (events_fd < 0) { - pr_err("Error: failed to open tracing events directory\n"); - pr_err("%s: %s\n", events_path, strerror(errno)); - return; - } - put_tracing_file(events_path); - - sys_items = tracing_events__scandir_alphasort(&sys_namelist); - - for (int i = 0; i < sys_items; i++) { - struct dirent *sys_dirent = sys_namelist[i]; - struct dirent **evt_namelist = NULL; - int dir_fd; - int evt_items; - - if (sys_dirent->d_type != DT_DIR || - !strcmp(sys_dirent->d_name, ".") || - !strcmp(sys_dirent->d_name, "..")) - goto next_sys; - - dir_fd = openat(events_fd, sys_dirent->d_name, O_PATH); - if (dir_fd < 0) - goto next_sys; - - evt_items = scandirat(events_fd, sys_dirent->d_name, &evt_namelist, NULL, alphasort); - for (int j = 0; j < evt_items; j++) { - /* - * Buffer sized at twice the max filename length + 1 - * separator + 1 \0 terminator. - */ - char buf[NAME_MAX * 2 + 2]; - /* 16 possible hex digits and 22 other characters and \0. */ - char encoding[16 + 22]; - struct dirent *evt_dirent = evt_namelist[j]; - struct io id; - __u64 config; - - if (evt_dirent->d_type != DT_DIR || - !strcmp(evt_dirent->d_name, ".") || - !strcmp(evt_dirent->d_name, "..")) - goto next_evt; - - snprintf(buf, sizeof(buf), "%s/id", evt_dirent->d_name); - io__init(&id, openat(dir_fd, buf, O_RDONLY), buf, sizeof(buf)); - - if (id.fd < 0) - goto next_evt; - - if (io__get_dec(&id, &config) < 0) { - close(id.fd); - goto next_evt; - } - close(id.fd); - - snprintf(buf, sizeof(buf), "%s:%s", - sys_dirent->d_name, evt_dirent->d_name); - snprintf(encoding, sizeof(encoding), "tracepoint/config=0x%llx/", config); - print_cb->print_event(print_state, - /*topic=*/NULL, - /*pmu_name=*/NULL, /* really "tracepoint" */ - /*event_name=*/buf, - /*event_alias=*/NULL, - /*scale_unit=*/NULL, - /*deprecated=*/false, - "Tracepoint event", - /*desc=*/NULL, - /*long_desc=*/NULL, - encoding); -next_evt: - free(evt_namelist[j]); - } - close(dir_fd); - free(evt_namelist); -next_sys: - free(sys_namelist[i]); - } - - free(sys_namelist); - close(events_fd); -} - void print_sdt_events(const struct print_callbacks *print_cb, void *print_state) { struct strlist *bidlist, *sdtlist; @@ -552,8 +461,6 @@ void print_events(const struct print_callbacks *print_cb, void *print_state) /*long_desc=*/NULL, /*encoding_desc=*/NULL); - print_tracepoint_events(print_cb, print_state); - print_sdt_events(print_cb, print_state); metricgroup__print(print_cb, print_state); diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h index 48682e2d166d..4d95b8257e23 100644 --- a/tools/perf/util/print-events.h +++ b/tools/perf/util/print-events.h @@ -37,8 +37,6 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state) void print_symbol_events(const struct print_callbacks *print_cb, void *print_state, unsigned int type, const struct event_symbol *syms, unsigned int max); - -void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state); void metricgroup__print(const struct print_callbacks *print_cb, void *print_state); bool is_event_supported(u8 type, u64 config); From b91a9abbf4734d411d304661fbb7e2878281eb51 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 25 Jul 2025 11:51:53 -0700 Subject: [PATCH 1136/2411] perf list: Skip ABI PMUs when printing pmu values Avoid printing tracepoint, legacy and software events when listing for the pmu option. Add the PMU type to the print_event callbacks to ease detection. Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20250725185202.68671-8-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-list.c | 17 +++++++++++++---- tools/perf/util/pfm.c | 2 ++ tools/perf/util/pmus.c | 2 ++ tools/perf/util/print-events.c | 5 +++++ tools/perf/util/print-events.h | 2 +- 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 3a4061d02f6c..caf42276bd0f 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -58,6 +58,8 @@ struct print_state { bool metrics; /** @metricgroups: Controls printing of metric and metric groups. */ bool metricgroups; + /** @exclude_abi: Exclude PMUs with types less than PERF_TYPE_MAX except PERF_TYPE_RAW. */ + bool exclude_abi; /** @last_topic: The last printed event topic. */ char *last_topic; /** @last_metricgroups: The last printed metric group. */ @@ -113,7 +115,8 @@ static void wordwrap(FILE *fp, const char *s, int start, int max, int corr) } } -static void default_print_event(void *ps, const char *topic, const char *pmu_name, +static void default_print_event(void *ps, const char *topic, + const char *pmu_name, u32 pmu_type, const char *event_name, const char *event_alias, const char *scale_unit __maybe_unused, bool deprecated, const char *event_type_desc, @@ -130,6 +133,9 @@ static void default_print_event(void *ps, const char *topic, const char *pmu_nam if (print_state->pmu_glob && pmu_name && !strglobmatch(pmu_name, print_state->pmu_glob)) return; + if (print_state->exclude_abi && pmu_type < PERF_TYPE_MAX && pmu_type != PERF_TYPE_RAW) + return; + if (print_state->event_glob && (!event_name || !strglobmatch(event_name, print_state->event_glob)) && (!event_alias || !strglobmatch(event_alias, print_state->event_glob)) && @@ -354,7 +360,8 @@ static void fix_escape_fprintf(FILE *fp, struct strbuf *buf, const char *fmt, .. fputs(buf->buf, fp); } -static void json_print_event(void *ps, const char *topic, const char *pmu_name, +static void json_print_event(void *ps, const char *topic, + const char *pmu_name, u32 pmu_type __maybe_unused, const char *event_name, const char *event_alias, const char *scale_unit, bool deprecated, const char *event_type_desc, @@ -647,9 +654,11 @@ int cmd_list(int argc, const char **argv) } else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) print_hwcache_events(&print_cb, ps); - else if (strcmp(argv[i], "pmu") == 0) + else if (strcmp(argv[i], "pmu") == 0) { + default_ps.exclude_abi = true; perf_pmus__print_pmu_events(&print_cb, ps); - else if (strcmp(argv[i], "sdt") == 0) + default_ps.exclude_abi = false; + } else if (strcmp(argv[i], "sdt") == 0) print_sdt_events(&print_cb, ps); else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) { default_ps.metricgroups = false; diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index e89395814e88..e5b3a2a5ddef 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -230,6 +230,7 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, if (is_libpfm_event_supported(name, cpus, threads)) { print_cb->print_event(print_state, topic, pinfo->name, + /*pmu_type=*/PERF_TYPE_RAW, name, info->equiv, /*scale_unit=*/NULL, /*deprecated=*/NULL, "PFM event", @@ -265,6 +266,7 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, print_cb->print_event(print_state, topic, pinfo->name, + /*pmu_type=*/PERF_TYPE_RAW, name, /*alias=*/NULL, /*scale_unit=*/NULL, /*deprecated=*/NULL, "PFM event", diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c index 9137bb9036ed..98be2eb8f1f0 100644 --- a/tools/perf/util/pmus.c +++ b/tools/perf/util/pmus.c @@ -645,6 +645,7 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p print_cb->print_event(print_state, aliases[j].topic, aliases[j].pmu_name, + aliases[j].pmu->type, aliases[j].name, aliases[j].alias, aliases[j].scale_unit, @@ -749,6 +750,7 @@ void perf_pmus__print_raw_pmu_events(const struct print_callbacks *print_cb, voi print_cb->print_event(print_state, /*topic=*/NULL, /*pmu_name=*/NULL, + pmu->type, format_args.short_string.buf, /*event_alias=*/NULL, /*scale_unit=*/NULL, diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index 3a5e5e7bae13..4153124a9948 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -121,6 +121,7 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state) print_cb->print_event(print_state, /*topic=*/NULL, /*pmu_name=*/NULL, + PERF_TYPE_TRACEPOINT, evt_name ?: sdt_name->s, /*event_alias=*/NULL, /*deprecated=*/false, @@ -222,6 +223,7 @@ int print_hwcache_events(const struct print_callbacks *print_cb, void *print_sta print_cb->print_event(print_state, "cache", pmu->name, + pmu->type, name, alias_name, /*scale_unit=*/NULL, @@ -278,6 +280,7 @@ void print_symbol_events(const struct print_callbacks *print_cb, void *print_sta print_cb->print_event(print_state, /*topic=*/NULL, /*pmu_name=*/NULL, + type, nd->s, alias, /*scale_unit=*/NULL, @@ -438,6 +441,7 @@ void print_events(const struct print_callbacks *print_cb, void *print_state) print_cb->print_event(print_state, /*topic=*/NULL, /*pmu_name=*/NULL, + PERF_TYPE_RAW, "rNNN", /*event_alias=*/NULL, /*scale_unit=*/NULL, @@ -452,6 +456,7 @@ void print_events(const struct print_callbacks *print_cb, void *print_state) print_cb->print_event(print_state, /*topic=*/NULL, /*pmu_name=*/NULL, + PERF_TYPE_BREAKPOINT, "mem:[/len][:access]", /*scale_unit=*/NULL, /*event_alias=*/NULL, diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h index 4d95b8257e23..d6ba384f0c66 100644 --- a/tools/perf/util/print-events.h +++ b/tools/perf/util/print-events.h @@ -12,7 +12,7 @@ struct print_callbacks { void (*print_start)(void *print_state); void (*print_end)(void *print_state); void (*print_event)(void *print_state, const char *topic, - const char *pmu_name, + const char *pmu_name, u32 pmu_type, const char *event_name, const char *event_alias, const char *scale_unit, bool deprecated, const char *event_type_desc, From 2662c7a9c3dcc9613a01c07a9118beb906aa455b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Jun 2025 11:02:19 +0200 Subject: [PATCH 1137/2411] fbdev: nvidiafb: fix build on 32-bit ARCH=um Now that ARCH=um no longer has IO port accesses, this driver can no longer build as-is. Make the IO port calls not just conditional on i386 but also !UML. Reported-by: Arnd Bergmann Signed-off-by: Johannes Berg Signed-off-by: Helge Deller --- drivers/video/fbdev/nvidia/nv_local.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/nvidia/nv_local.h b/drivers/video/fbdev/nvidia/nv_local.h index 68e508daa417..93aff35305a9 100644 --- a/drivers/video/fbdev/nvidia/nv_local.h +++ b/drivers/video/fbdev/nvidia/nv_local.h @@ -80,7 +80,7 @@ (par)->dmaFree -= ((size) + 1); \ } -#if defined(__i386__) +#if defined(__i386__) && !defined(CONFIG_UML) #define _NV_FENCE() outb(0, 0x3D0); #else #define _NV_FENCE() mb(); From ecdd7df997fd992f0ec70b788e3b12258008a2bf Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 15 Jun 2025 11:36:51 -0700 Subject: [PATCH 1138/2411] fbdev: nvidiafb: add depends on HAS_IOPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nvidiafb driver uses inb()/outb() without depending on HAS_IOPORT, which leads to build errors since kernel v6.13-rc1: commit 6f043e757445 ("asm-generic/io.h: Remove I/O port accessors for HAS_IOPORT=n") Add the HAS_IOPORT dependency to prevent the build errors. (Found in ARCH=um allmodconfig builds) drivers/video/fbdev/nvidia/nv_accel.c: In function ‘NVDmaWait’: include/asm-generic/io.h:596:15: error: call to ‘_outb’ declared with attribute error: outb() requires CONFIG_HAS_IOPORT 596 | #define _outb _outb Signed-off-by: Randy Dunlap Cc: Arnd Bergmann Cc: Niklas Schnelle Cc: Antonino Daplas Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: stable@vger.kernel.org # v6.13+ Signed-off-by: Helge Deller --- drivers/video/fbdev/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index 55c6686f091e..c21484d15f0c 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -660,7 +660,7 @@ config FB_ATMEL config FB_NVIDIA tristate "nVidia Framebuffer Support" - depends on FB && PCI + depends on FB && PCI && HAS_IOPORT select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT From 523b84dc7ccea9c4d79126d6ed1cf9033cf83b05 Mon Sep 17 00:00:00 2001 From: Yongzhen Zhang Date: Tue, 1 Jul 2025 17:07:04 +0800 Subject: [PATCH 1139/2411] fbdev: fix potential buffer overflow in do_register_framebuffer() The current implementation may lead to buffer overflow when: 1. Unregistration creates NULL gaps in registered_fb[] 2. All array slots become occupied despite num_registered_fb < FB_MAX 3. The registration loop exceeds array bounds Add boundary check to prevent registered_fb[FB_MAX] access. Signed-off-by: Yongzhen Zhang Signed-off-by: Helge Deller --- drivers/video/fbdev/core/fbmem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index dfcf5e4d1d4c..53f1719b1ae1 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -449,6 +449,9 @@ static int do_register_framebuffer(struct fb_info *fb_info) if (!registered_fb[i]) break; + if (i >= FB_MAX) + return -ENXIO; + if (!fb_info->modelist.prev || !fb_info->modelist.next) INIT_LIST_HEAD(&fb_info->modelist); From c80de50c192f135a78f6c924818b2f5cd6ca7524 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 3 Jul 2025 13:35:13 -0500 Subject: [PATCH 1140/2411] fbdev: simplefb: Use of_reserved_mem_region_to_resource() for "memory-region" Use the newly added of_reserved_mem_region_to_resource() function to handle "memory-region" properties. The error handling is a bit different. "memory-region" is optional, so failed lookup is not an error. But then an error in of_address_to_resource() is treated as an error. However, that distinction is not really important. Either the region is available and usable or it is not. So now, it is just of_reserved_mem_region_to_resource() which is checked for an error. Signed-off-by: Rob Herring (Arm) Signed-off-by: Helge Deller --- drivers/video/fbdev/simplefb.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/video/fbdev/simplefb.c b/drivers/video/fbdev/simplefb.c index be95fcddce4c..1893815dc67f 100644 --- a/drivers/video/fbdev/simplefb.c +++ b/drivers/video/fbdev/simplefb.c @@ -21,9 +21,9 @@ #include #include #include -#include #include #include +#include #include #include #include @@ -134,7 +134,7 @@ struct simplefb_params { static int simplefb_parse_dt(struct platform_device *pdev, struct simplefb_params *params) { - struct device_node *np = pdev->dev.of_node, *mem; + struct device_node *np = pdev->dev.of_node; int ret; const char *format; int i; @@ -174,19 +174,10 @@ static int simplefb_parse_dt(struct platform_device *pdev, return -EINVAL; } - mem = of_parse_phandle(np, "memory-region", 0); - if (mem) { - ret = of_address_to_resource(mem, 0, ¶ms->memory); - if (ret < 0) { - dev_err(&pdev->dev, "failed to parse memory-region\n"); - of_node_put(mem); - return ret; - } - + ret = of_reserved_mem_region_to_resource(np, 0, ¶ms->memory); + if (!ret) { if (of_property_present(np, "reg")) dev_warn(&pdev->dev, "preferring \"memory-region\" over \"reg\" property\n"); - - of_node_put(mem); } else { memset(¶ms->memory, 0, sizeof(params->memory)); } From b56f93f568dc0214963d9d9d2fd2c992cf241c76 Mon Sep 17 00:00:00 2001 From: Giovanni Di Santi Date: Wed, 9 Jul 2025 11:53:52 +0200 Subject: [PATCH 1141/2411] fbdev: kyro: Add missing PCI memory region request The kyro framebuffer driver did not request its PCI memory regions, which could lead to conflicts with other drivers. This change addresses the task "Request memory regions in all fbdev drivers" from the file Documentation/gpu/todo.rst. This is addressed by using the managed device functions pcim_enable_device() and pcim_request_all_regions(). This simplifies the code by making error handling and driver removal cleanup automatic for these resources. Signed-off-by: Giovanni Di Santi Signed-off-by: Helge Deller --- drivers/video/fbdev/kyro/fbdev.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/kyro/fbdev.c b/drivers/video/fbdev/kyro/fbdev.c index 08ee8baa79f8..86e5d60ed0ff 100644 --- a/drivers/video/fbdev/kyro/fbdev.c +++ b/drivers/video/fbdev/kyro/fbdev.c @@ -679,7 +679,8 @@ static int kyrofb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - if ((err = pci_enable_device(pdev))) { + err = pcim_enable_device(pdev); + if (err) { printk(KERN_WARNING "kyrofb: Can't enable pdev: %d\n", err); return err; } @@ -688,6 +689,10 @@ static int kyrofb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!info) return -ENOMEM; + err = pcim_request_all_regions(pdev, "kyrofb"); + if (err) + goto out_free_fb; + currentpar = info->par; kyro_fix.smem_start = pci_resource_start(pdev, 0); From e0bf12a43243e6afc5a03fc55c58ec48aba48088 Mon Sep 17 00:00:00 2001 From: Giovanni Di Santi Date: Wed, 9 Jul 2025 11:53:53 +0200 Subject: [PATCH 1142/2411] fbdev: kyro: Use devm_ioremap() for mmio registers Replace the manual ioremap() call for the MMIO registers with the device-managed devm_ioremap() variant. This simplifies the driver's resource management by ensuring the memory is automatically unmapped when the driver detaches from the device. Signed-off-by: Giovanni Di Santi Signed-off-by: Helge Deller --- drivers/video/fbdev/kyro/fbdev.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/video/fbdev/kyro/fbdev.c b/drivers/video/fbdev/kyro/fbdev.c index 86e5d60ed0ff..ddc241f508b1 100644 --- a/drivers/video/fbdev/kyro/fbdev.c +++ b/drivers/video/fbdev/kyro/fbdev.c @@ -701,13 +701,14 @@ static int kyrofb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) kyro_fix.mmio_len = pci_resource_len(pdev, 1); currentpar->regbase = deviceInfo.pSTGReg = - ioremap(kyro_fix.mmio_start, kyro_fix.mmio_len); + devm_ioremap(&pdev->dev, kyro_fix.mmio_start, + kyro_fix.mmio_len); if (!currentpar->regbase) goto out_free_fb; info->screen_base = pci_ioremap_wc_bar(pdev, 0); if (!info->screen_base) - goto out_unmap_regs; + goto out_free_fb; if (!nomtrr) currentpar->wc_cookie = arch_phys_wc_add(kyro_fix.smem_start, @@ -755,8 +756,6 @@ static int kyrofb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) out_unmap: iounmap(info->screen_base); -out_unmap_regs: - iounmap(currentpar->regbase); out_free_fb: framebuffer_release(info); @@ -779,7 +778,6 @@ static void kyrofb_remove(struct pci_dev *pdev) deviceInfo.ulOverlayOffset = 0; iounmap(info->screen_base); - iounmap(par->regbase); arch_phys_wc_del(par->wc_cookie); From 32dfb6112ea3ca143636832cd34234f2be4830bb Mon Sep 17 00:00:00 2001 From: Giovanni Di Santi Date: Wed, 9 Jul 2025 11:53:54 +0200 Subject: [PATCH 1143/2411] fbdev: kyro: Use devm_ioremap_wc() for screen mem Replace the manual pci_ioremap_wc() call for mapping screen memory with the device-managed devm_ioremap_wc() variant. This simplifies the driver's resource management by ensuring the memory is automatically unmapped when the driver detaches from the device. Signed-off-by: Giovanni Di Santi Signed-off-by: Helge Deller --- drivers/video/fbdev/kyro/fbdev.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/video/fbdev/kyro/fbdev.c b/drivers/video/fbdev/kyro/fbdev.c index ddc241f508b1..c8b1dfa456a3 100644 --- a/drivers/video/fbdev/kyro/fbdev.c +++ b/drivers/video/fbdev/kyro/fbdev.c @@ -706,7 +706,8 @@ static int kyrofb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!currentpar->regbase) goto out_free_fb; - info->screen_base = pci_ioremap_wc_bar(pdev, 0); + info->screen_base = devm_ioremap_wc(&pdev->dev, kyro_fix.smem_start, + kyro_fix.smem_len); if (!info->screen_base) goto out_free_fb; @@ -743,7 +744,7 @@ static int kyrofb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) fb_memset_io(info->screen_base, 0, size); if (register_framebuffer(info) < 0) - goto out_unmap; + goto out_free_fb; fb_info(info, "%s frame buffer device, at %dx%d@%d using %ldk/%ldk of VRAM\n", info->fix.id, @@ -754,8 +755,6 @@ static int kyrofb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; -out_unmap: - iounmap(info->screen_base); out_free_fb: framebuffer_release(info); @@ -777,8 +776,6 @@ static void kyrofb_remove(struct pci_dev *pdev) deviceInfo.ulNextFreeVidMem = 0; deviceInfo.ulOverlayOffset = 0; - iounmap(info->screen_base); - arch_phys_wc_del(par->wc_cookie); unregister_framebuffer(info); From 57ba4d5338a6b455d6b0bb9aa4ce9826897b9007 Mon Sep 17 00:00:00 2001 From: "Darshan R." Date: Mon, 21 Jul 2025 12:56:47 +0000 Subject: [PATCH 1144/2411] fbdev: svgalib: Clean up coding style This patch addresses various coding style issues in `svgalib.c` to improve readability and better align the code with the Linux kernel's formatting standards. The changes primarily consist of: - Adjusting whitespace around operators and after keywords. - Standardizing brace placement for control flow statements. - Removing unnecessary braces on single-statement if/else blocks. - Deleting extraneous blank lines throughout the file. These changes are purely stylistic and introduce no functional modifications. Signed-off-by: Darshan R. Signed-off-by: Helge Deller --- drivers/video/fbdev/core/svgalib.c | 95 +++++++++++++----------------- 1 file changed, 42 insertions(+), 53 deletions(-) diff --git a/drivers/video/fbdev/core/svgalib.c b/drivers/video/fbdev/core/svgalib.c index 821b89a0a645..5234ad109dfd 100644 --- a/drivers/video/fbdev/core/svgalib.c +++ b/drivers/video/fbdev/core/svgalib.c @@ -19,7 +19,6 @@ #include #include - /* Write a CRT register value spread across multiple registers */ void svga_wcrt_multi(void __iomem *regbase, const struct vga_regset *regset, u32 value) { @@ -31,12 +30,13 @@ void svga_wcrt_multi(void __iomem *regbase, const struct vga_regset *regset, u32 while (bitnum <= regset->highbit) { bitval = 1 << bitnum; regval = regval & ~bitval; - if (value & 1) regval = regval | bitval; - bitnum ++; + if (value & 1) + regval = regval | bitval; + bitnum++; value = value >> 1; } vga_wcrt(regbase, regset->regnum, regval); - regset ++; + regset++; } } @@ -51,12 +51,13 @@ void svga_wseq_multi(void __iomem *regbase, const struct vga_regset *regset, u32 while (bitnum <= regset->highbit) { bitval = 1 << bitnum; regval = regval & ~bitval; - if (value & 1) regval = regval | bitval; - bitnum ++; + if (value & 1) + regval = regval | bitval; + bitnum++; value = value >> 1; } vga_wseq(regbase, regset->regnum, regval); - regset ++; + regset++; } } @@ -66,15 +67,13 @@ static unsigned int svga_regset_size(const struct vga_regset *regset) while (regset->regnum != VGA_REGSET_END_VAL) { count += regset->highbit - regset->lowbit + 1; - regset ++; + regset++; } return 1 << count; } - /* ------------------------------------------------------------------------- */ - /* Set graphics controller registers to sane values */ void svga_set_default_gfx_regs(void __iomem *regbase) { @@ -102,7 +101,7 @@ void svga_set_default_atc_regs(void __iomem *regbase) vga_w(regbase, VGA_ATT_W, 0x00); /* All standard ATC registers (AR00 - AR14) */ - for (count = 0; count <= 0xF; count ++) + for (count = 0; count <= 0xF; count++) svga_wattr(regbase, count, count); svga_wattr(regbase, VGA_ATC_MODE, 0x01); @@ -187,10 +186,8 @@ void svga_dump_var(struct fb_var_screeninfo *var, int node) } #endif /* 0 */ - /* ------------------------------------------------------------------------- */ - void svga_settile(struct fb_info *info, struct fb_tilemap *map) { const u8 *font = map->data; @@ -229,7 +226,7 @@ void svga_tilecopy(struct fb_info *info, struct fb_tilearea *area) ((area->sy == area->dy) && (area->sx > area->dx))) { src = fb + area->sx * colstride + area->sy * rowstride; dst = fb + area->dx * colstride + area->dy * rowstride; - } else { + } else { src = fb + (area->sx + area->width - 1) * colstride + (area->sy + area->height - 1) * rowstride; dst = fb + (area->dx + area->width - 1) * colstride @@ -237,7 +234,7 @@ void svga_tilecopy(struct fb_info *info, struct fb_tilearea *area) colstride = -colstride; rowstride = -rowstride; - } + } for (dy = 0; dy < area->height; dy++) { u16 __iomem *src2 = src; @@ -284,19 +281,19 @@ void svga_tileblit(struct fb_info *info, struct fb_tileblit *blit) u8 __iomem *fb = (u8 __iomem *)info->screen_base; fb += blit->sx * colstride + blit->sy * rowstride; - i=0; - for (dy=0; dy < blit->height; dy ++) { + i = 0; + for (dy = 0; dy < blit->height; dy++) { u8 __iomem *fb2 = fb; - for (dx = 0; dx < blit->width; dx ++) { + for (dx = 0; dx < blit->width; dx++) { fb_writeb(blit->indices[i], fb2); fb_writeb(attr, fb2 + 1); fb2 += colstride; - i ++; - if (i == blit->length) return; + i++; + if (i == blit->length) + return; } fb += rowstride; } - } /* Set cursor in text (tileblit) mode */ @@ -308,15 +305,15 @@ void svga_tilecursor(void __iomem *regbase, struct fb_info *info, struct fb_tile + (cursor->sy + (info->var.yoffset / 16)) * (info->var.xres_virtual / 8); - if (! cursor -> mode) + if (!cursor->mode) return; svga_wcrt_mask(regbase, 0x0A, 0x20, 0x20); /* disable cursor */ - if (cursor -> shape == FB_TILE_CURSOR_NONE) + if (cursor->shape == FB_TILE_CURSOR_NONE) return; - switch (cursor -> shape) { + switch (cursor->shape) { case FB_TILE_CURSOR_UNDERLINE: cs = 0x0d; break; @@ -374,7 +371,6 @@ EXPORT_SYMBOL(svga_get_caps); /* ------------------------------------------------------------------------- */ - /* * Compute PLL settings (M, N, R) * F_VCO = (F_BASE * M) / N @@ -385,7 +381,7 @@ int svga_compute_pll(const struct svga_pll *pll, u32 f_wanted, u16 *m, u16 *n, u u16 am, an, ar; u32 f_vco, f_current, delta_current, delta_best; - pr_debug("fb%d: ideal frequency: %d kHz\n", node, (unsigned int) f_wanted); + pr_debug("fb%d: ideal frequency: %d kHz\n", node, (unsigned int)f_wanted); ar = pll->r_max; f_vco = f_wanted << ar; @@ -416,7 +412,7 @@ int svga_compute_pll(const struct svga_pll *pll, u32 f_wanted, u16 *m, u16 *n, u while ((am <= pll->m_max) && (an <= pll->n_max)) { f_current = (pll->f_base * am) / an; - delta_current = abs_diff (f_current, f_vco); + delta_current = abs_diff(f_current, f_vco); if (delta_current < delta_best) { delta_best = delta_current; @@ -424,58 +420,55 @@ int svga_compute_pll(const struct svga_pll *pll, u32 f_wanted, u16 *m, u16 *n, u *n = an; } - if (f_current <= f_vco) { - am ++; - } else { - an ++; - } + if (f_current <= f_vco) + am++; + else + an++; } f_current = (pll->f_base * *m) / *n; - pr_debug("fb%d: found frequency: %d kHz (VCO %d kHz)\n", node, (int) (f_current >> ar), (int) f_current); - pr_debug("fb%d: m = %d n = %d r = %d\n", node, (unsigned int) *m, (unsigned int) *n, (unsigned int) *r); + pr_debug("fb%d: found frequency: %d kHz (VCO %d kHz)\n", node, (int)(f_current >> ar), (int)f_current); + pr_debug("fb%d: m = %d n = %d r = %d\n", node, (unsigned int)*m, (unsigned int)*n, (unsigned int)*r); return 0; } - /* ------------------------------------------------------------------------- */ - /* Check CRT timing values */ int svga_check_timings(const struct svga_timing_regs *tm, struct fb_var_screeninfo *var, int node) { u32 value; - var->xres = (var->xres+7)&~7; - var->left_margin = (var->left_margin+7)&~7; - var->right_margin = (var->right_margin+7)&~7; - var->hsync_len = (var->hsync_len+7)&~7; + var->xres = (var->xres + 7) & ~7; + var->left_margin = (var->left_margin + 7) & ~7; + var->right_margin = (var->right_margin + 7) & ~7; + var->hsync_len = (var->hsync_len + 7) & ~7; /* Check horizontal total */ value = var->xres + var->left_margin + var->right_margin + var->hsync_len; - if (((value / 8) - 5) >= svga_regset_size (tm->h_total_regs)) + if (((value / 8) - 5) >= svga_regset_size(tm->h_total_regs)) return -EINVAL; /* Check horizontal display and blank start */ value = var->xres; - if (((value / 8) - 1) >= svga_regset_size (tm->h_display_regs)) + if (((value / 8) - 1) >= svga_regset_size(tm->h_display_regs)) return -EINVAL; - if (((value / 8) - 1) >= svga_regset_size (tm->h_blank_start_regs)) + if (((value / 8) - 1) >= svga_regset_size(tm->h_blank_start_regs)) return -EINVAL; /* Check horizontal sync start */ value = var->xres + var->right_margin; - if (((value / 8) - 1) >= svga_regset_size (tm->h_sync_start_regs)) + if (((value / 8) - 1) >= svga_regset_size(tm->h_sync_start_regs)) return -EINVAL; /* Check horizontal blank end (or length) */ value = var->left_margin + var->right_margin + var->hsync_len; - if ((value == 0) || ((value / 8) >= svga_regset_size (tm->h_blank_end_regs))) + if ((value == 0) || ((value / 8) >= svga_regset_size(tm->h_blank_end_regs))) return -EINVAL; /* Check horizontal sync end (or length) */ value = var->hsync_len; - if ((value == 0) || ((value / 8) >= svga_regset_size (tm->h_sync_end_regs))) + if ((value == 0) || ((value / 8) >= svga_regset_size(tm->h_sync_end_regs))) return -EINVAL; /* Check vertical total */ @@ -497,12 +490,12 @@ int svga_check_timings(const struct svga_timing_regs *tm, struct fb_var_screenin /* Check vertical blank end (or length) */ value = var->upper_margin + var->lower_margin + var->vsync_len; - if ((value == 0) || (value >= svga_regset_size (tm->v_blank_end_regs))) + if ((value == 0) || (value >= svga_regset_size(tm->v_blank_end_regs))) return -EINVAL; /* Check vertical sync end (or length) */ value = var->vsync_len; - if ((value == 0) || (value >= svga_regset_size (tm->v_sync_end_regs))) + if ((value == 0) || (value >= svga_regset_size(tm->v_sync_end_regs))) return -EINVAL; return 0; @@ -596,18 +589,15 @@ void svga_set_timings(void __iomem *regbase, const struct svga_timing_regs *tm, vga_w(regbase, VGA_MIS_W, regval); } - /* ------------------------------------------------------------------------- */ - static inline int match_format(const struct svga_fb_format *frm, struct fb_var_screeninfo *var) { int i = 0; int stored = -EINVAL; - while (frm->bits_per_pixel != SVGA_FORMAT_END_VAL) - { + while (frm->bits_per_pixel != SVGA_FORMAT_END_VAL) { if ((var->bits_per_pixel == frm->bits_per_pixel) && (var->red.length <= frm->red.length) && (var->green.length <= frm->green.length) && @@ -647,7 +637,6 @@ int svga_match_format(const struct svga_fb_format *frm, return i; } - EXPORT_SYMBOL(svga_wcrt_multi); EXPORT_SYMBOL(svga_wseq_multi); From da11e6a30e0bb8e911288bdc443b3dc8f6a7cac7 Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Wed, 23 Jul 2025 22:25:34 -0500 Subject: [PATCH 1145/2411] fbdev: imxfb: Check fb_add_videomode to prevent null-ptr-deref fb_add_videomode() can fail with -ENOMEM when its internal kmalloc() cannot allocate a struct fb_modelist. If that happens, the modelist stays empty but the driver continues to register. Add a check for its return value to prevent poteintial null-ptr-deref, which is similar to the commit 17186f1f90d3 ("fbdev: Fix do_register_framebuffer to prevent null-ptr-deref in fb_videomode_to_var"). Fixes: 1b6c79361ba5 ("video: imxfb: Add DT support") Signed-off-by: Chenyuan Yang Signed-off-by: Helge Deller --- drivers/video/fbdev/imxfb.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c index f30da32cdaed..a077bf346bdf 100644 --- a/drivers/video/fbdev/imxfb.c +++ b/drivers/video/fbdev/imxfb.c @@ -996,8 +996,13 @@ static int imxfb_probe(struct platform_device *pdev) info->fix.smem_start = fbi->map_dma; INIT_LIST_HEAD(&info->modelist); - for (i = 0; i < fbi->num_modes; i++) - fb_add_videomode(&fbi->mode[i].mode, &info->modelist); + for (i = 0; i < fbi->num_modes; i++) { + ret = fb_add_videomode(&fbi->mode[i].mode, &info->modelist); + if (ret) { + dev_err(&pdev->dev, "Failed to add videomode\n"); + goto failed_cmap; + } + } /* * This makes sure that our colour bitfield From a2a42f0c96d709d0cf5cc672acb352934ca95326 Mon Sep 17 00:00:00 2001 From: Daniel Palmer Date: Fri, 25 Jul 2025 14:30:57 +0900 Subject: [PATCH 1146/2411] fbdev: Fix typo in Kconfig text for FB_DEVICE Seems like someone hit 'c' when they meant to hit 'd'. Signed-off-by: Daniel Palmer Signed-off-by: Helge Deller --- drivers/video/fbdev/core/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/Kconfig b/drivers/video/fbdev/core/Kconfig index 4abe12db7594..413cbfac569d 100644 --- a/drivers/video/fbdev/core/Kconfig +++ b/drivers/video/fbdev/core/Kconfig @@ -31,7 +31,7 @@ config FB_DEVICE default FB help Say Y here if you want the legacy /dev/fb* device file and - interfaces within sysfs anc procfs. It is only required if you + interfaces within sysfs and procfs. It is only required if you have userspace programs that depend on fbdev for graphics output. This does not affect the framebuffer console. If unsure, say N. From 91a256467eed9e4449969163e3c93bc4bd990145 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 23 Sep 2024 23:48:53 +0300 Subject: [PATCH 1147/2411] fbcon: fbcon_cursor_noblink -> fbcon_cursor_blink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Invert fbcon_cursor_noblink into fbcon_cursor_blink so that: - it matches the sysfs attribute exactly - avoids having to do these NOT operations all over the place - use bool instead of int Signed-off-by: Ville Syrjälä Signed-off-by: Helge Deller --- drivers/video/fbdev/core/fbcon.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 2df48037688d..83b42f724559 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -171,7 +171,7 @@ static const struct consw fb_con; #define advance_row(p, delta) (unsigned short *)((unsigned long)(p) + (delta) * vc->vc_size_row) -static int fbcon_cursor_noblink; +static bool fbcon_cursor_blink = true; #define divides(a, b) ((!(a) || (b)%(a)) ? 0 : 1) @@ -406,7 +406,7 @@ static void fbcon_add_cursor_work(struct fb_info *info) { struct fbcon_ops *ops = info->fbcon_par; - if (!fbcon_cursor_noblink) + if (fbcon_cursor_blink) queue_delayed_work(system_power_efficient_wq, &ops->cursor_work, ops->cur_blink_jiffies); } @@ -3273,10 +3273,10 @@ static ssize_t cursor_blink_store(struct device *device, blink = simple_strtoul(buf, last, 0); if (blink) { - fbcon_cursor_noblink = 0; + fbcon_cursor_blink = true; fbcon_add_cursor_work(info); } else { - fbcon_cursor_noblink = 1; + fbcon_cursor_blink = false; fbcon_del_cursor_work(info); } From ffc825a27f5503136196cb38f41641b58bf2df31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 23 Sep 2024 18:57:47 +0300 Subject: [PATCH 1148/2411] fbcon: fbcon_is_inactive() -> fbcon_is_active() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Invert fbcon_is_inactive() into fbcon_is_active(). Much easier on the poor brain when you don't have to do dobule negations all over the place. Signed-off-by: Ville Syrjälä Acked-by: Helge Deller Reviewed-by: Thomas Zimmermann Signed-off-by: Helge Deller --- drivers/video/fbdev/core/fbcon.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 83b42f724559..0e06066aed95 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -288,12 +288,12 @@ static bool fbcon_skip_panic(struct fb_info *info) #endif } -static inline int fbcon_is_inactive(struct vc_data *vc, struct fb_info *info) +static inline int fbcon_is_active(struct vc_data *vc, struct fb_info *info) { struct fbcon_ops *ops = info->fbcon_par; - return (info->state != FBINFO_STATE_RUNNING || - vc->vc_mode != KD_TEXT || ops->graphics || fbcon_skip_panic(info)); + return info->state == FBINFO_STATE_RUNNING && + vc->vc_mode == KD_TEXT && !ops->graphics && !fbcon_skip_panic(info); } static int get_color(struct vc_data *vc, struct fb_info *info, @@ -1266,7 +1266,7 @@ static void __fbcon_clear(struct vc_data *vc, unsigned int sy, unsigned int sx, struct fbcon_display *p = &fb_display[vc->vc_num]; u_int y_break; - if (fbcon_is_inactive(vc, info)) + if (!fbcon_is_active(vc, info)) return; if (!height || !width) @@ -1310,7 +1310,7 @@ static void fbcon_putcs(struct vc_data *vc, const u16 *s, unsigned int count, struct fbcon_display *p = &fb_display[vc->vc_num]; struct fbcon_ops *ops = info->fbcon_par; - if (!fbcon_is_inactive(vc, info)) + if (fbcon_is_active(vc, info)) ops->putcs(vc, info, s, count, real_y(p, ypos), xpos, get_color(vc, info, scr_readw(s), 1), get_color(vc, info, scr_readw(s), 0)); @@ -1321,7 +1321,7 @@ static void fbcon_clear_margins(struct vc_data *vc, int bottom_only) struct fb_info *info = fbcon_info_from_console(vc->vc_num); struct fbcon_ops *ops = info->fbcon_par; - if (!fbcon_is_inactive(vc, info)) + if (fbcon_is_active(vc, info)) ops->clear_margins(vc, info, margin_color, bottom_only); } @@ -1333,7 +1333,7 @@ static void fbcon_cursor(struct vc_data *vc, bool enable) ops->cur_blink_jiffies = msecs_to_jiffies(vc->vc_cur_blink_ms); - if (fbcon_is_inactive(vc, info) || vc->vc_deccm != 1) + if (!fbcon_is_active(vc, info) || vc->vc_deccm != 1) return; if (vc->vc_cursor_type & CUR_SW) @@ -1739,7 +1739,7 @@ static void fbcon_bmove(struct vc_data *vc, int sy, int sx, int dy, int dx, struct fb_info *info = fbcon_info_from_console(vc->vc_num); struct fbcon_display *p = &fb_display[vc->vc_num]; - if (fbcon_is_inactive(vc, info)) + if (!fbcon_is_active(vc, info)) return; if (!width || !height) @@ -1763,7 +1763,7 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b, struct fbcon_display *p = &fb_display[vc->vc_num]; int scroll_partial = info->flags & FBINFO_PARTIAL_PAN_OK; - if (fbcon_is_inactive(vc, info)) + if (!fbcon_is_active(vc, info)) return true; fbcon_cursor(vc, false); @@ -2147,7 +2147,7 @@ static bool fbcon_switch(struct vc_data *vc) fbcon_del_cursor_work(old_info); } - if (fbcon_is_inactive(vc, info) || + if (!fbcon_is_active(vc, info) || ops->blank_state != FB_BLANK_UNBLANK) fbcon_del_cursor_work(info); else @@ -2187,7 +2187,7 @@ static bool fbcon_switch(struct vc_data *vc) scrollback_max = 0; scrollback_current = 0; - if (!fbcon_is_inactive(vc, info)) { + if (fbcon_is_active(vc, info)) { ops->var.xoffset = ops->var.yoffset = p->yscroll = 0; ops->update_start(info); } @@ -2243,7 +2243,7 @@ static bool fbcon_blank(struct vc_data *vc, enum vesa_blank_mode blank, } } - if (!fbcon_is_inactive(vc, info)) { + if (fbcon_is_active(vc, info)) { if (ops->blank_state != blank) { ops->blank_state = blank; fbcon_cursor(vc, !blank); @@ -2257,7 +2257,7 @@ static bool fbcon_blank(struct vc_data *vc, enum vesa_blank_mode blank, update_screen(vc); } - if (mode_switch || fbcon_is_inactive(vc, info) || + if (mode_switch || !fbcon_is_active(vc, info) || ops->blank_state != FB_BLANK_UNBLANK) fbcon_del_cursor_work(info); else @@ -2587,7 +2587,7 @@ static void fbcon_set_palette(struct vc_data *vc, const unsigned char *table) int i, j, k, depth; u8 val; - if (fbcon_is_inactive(vc, info)) + if (!fbcon_is_active(vc, info)) return; if (!con_is_visible(vc)) @@ -2687,7 +2687,7 @@ static void fbcon_modechanged(struct fb_info *info) scrollback_max = 0; scrollback_current = 0; - if (!fbcon_is_inactive(vc, info)) { + if (fbcon_is_active(vc, info)) { ops->var.xoffset = ops->var.yoffset = p->yscroll = 0; ops->update_start(info); } From 311b07842fb0bb69b5b266b3dfd6037260a3ec2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 23 Sep 2024 18:57:48 +0300 Subject: [PATCH 1149/2411] fbcon: Introduce get_{fg,bg}_color() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the code more legible by adding get_{fg,bg}_color() which hide the obscure 'is_fg' parameter of get_color() from the caller. Signed-off-by: Ville Syrjälä Acked-by: Helge Deller Reviewed-by: Thomas Zimmermann Signed-off-by: Helge Deller --- drivers/video/fbdev/core/fbcon.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 0e06066aed95..bc3cde4138bd 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -363,6 +363,16 @@ static int get_color(struct vc_data *vc, struct fb_info *info, return color; } +static int get_fg_color(struct vc_data *vc, struct fb_info *info, u16 c) +{ + return get_color(vc, info, c, 1); +} + +static int get_bg_color(struct vc_data *vc, struct fb_info *info, u16 c) +{ + return get_color(vc, info, c, 0); +} + static void fb_flashcursor(struct work_struct *work) { struct fbcon_ops *ops = container_of(work, struct fbcon_ops, cursor_work.work); @@ -394,8 +404,9 @@ static void fb_flashcursor(struct work_struct *work) c = scr_readw((u16 *) vc->vc_pos); enable = ops->cursor_flash && !ops->cursor_state.enable; - ops->cursor(vc, info, enable, get_color(vc, info, c, 1), - get_color(vc, info, c, 0)); + ops->cursor(vc, info, enable, + get_fg_color(vc, info, c), + get_bg_color(vc, info, c)); console_unlock(); queue_delayed_work(system_power_efficient_wq, &ops->cursor_work, @@ -1312,8 +1323,8 @@ static void fbcon_putcs(struct vc_data *vc, const u16 *s, unsigned int count, if (fbcon_is_active(vc, info)) ops->putcs(vc, info, s, count, real_y(p, ypos), xpos, - get_color(vc, info, scr_readw(s), 1), - get_color(vc, info, scr_readw(s), 0)); + get_fg_color(vc, info, scr_readw(s)), + get_bg_color(vc, info, scr_readw(s))); } static void fbcon_clear_margins(struct vc_data *vc, int bottom_only) @@ -1346,8 +1357,9 @@ static void fbcon_cursor(struct vc_data *vc, bool enable) if (!ops->cursor) return; - ops->cursor(vc, info, enable, get_color(vc, info, c, 1), - get_color(vc, info, c, 0)); + ops->cursor(vc, info, enable, + get_fg_color(vc, info, c), + get_bg_color(vc, info, c)); } static int scrollback_phys_max = 0; From 81b96e4aef9592493873507eec52eca68f0721ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 23 Sep 2024 23:50:16 +0300 Subject: [PATCH 1150/2411] fbcon: Use 'bool' where appopriate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use 'bool' type where it makes more sense than 'int'. v2: Rebase due to corrected 'fbcon_cursor_blink' initial value Acked-by: Helge Deller Signed-off-by: Ville Syrjälä Reviewed-by: Thomas Zimmermann Signed-off-by: Helge Deller --- drivers/video/fbdev/core/fbcon.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index bc3cde4138bd..09343ab4ac51 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -134,9 +134,9 @@ static int logo_shown = FBCON_LOGO_CANSHOW; /* console mappings */ static unsigned int first_fb_vc; static unsigned int last_fb_vc = MAX_NR_CONSOLES - 1; -static int fbcon_is_default = 1; +static bool fbcon_is_default = true; static int primary_device = -1; -static int fbcon_has_console_bind; +static bool fbcon_has_console_bind; #ifdef CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY static int map_override; @@ -288,7 +288,7 @@ static bool fbcon_skip_panic(struct fb_info *info) #endif } -static inline int fbcon_is_active(struct vc_data *vc, struct fb_info *info) +static inline bool fbcon_is_active(struct vc_data *vc, struct fb_info *info) { struct fbcon_ops *ops = info->fbcon_par; @@ -297,7 +297,7 @@ static inline int fbcon_is_active(struct vc_data *vc, struct fb_info *info) } static int get_color(struct vc_data *vc, struct fb_info *info, - u16 c, int is_fg) + u16 c, bool is_fg) { int depth = fb_get_color_depth(&info->var, &info->fix); int color = 0; @@ -365,12 +365,12 @@ static int get_color(struct vc_data *vc, struct fb_info *info, static int get_fg_color(struct vc_data *vc, struct fb_info *info, u16 c) { - return get_color(vc, info, c, 1); + return get_color(vc, info, c, true); } static int get_bg_color(struct vc_data *vc, struct fb_info *info, u16 c) { - return get_color(vc, info, c, 0); + return get_color(vc, info, c, false); } static void fb_flashcursor(struct work_struct *work) @@ -474,7 +474,7 @@ static int __init fb_console_setup(char *this_opt) last_fb_vc = simple_strtoul(options, &options, 10) - 1; if (last_fb_vc < first_fb_vc || last_fb_vc >= MAX_NR_CONSOLES) last_fb_vc = MAX_NR_CONSOLES - 1; - fbcon_is_default = 0; + fbcon_is_default = false; continue; } @@ -569,7 +569,7 @@ static int do_fbcon_takeover(int show_logo) con2fb_map[i] = -1; info_idx = -1; } else { - fbcon_has_console_bind = 1; + fbcon_has_console_bind = true; } return err; @@ -2817,7 +2817,7 @@ static void fbcon_unbind(void) fbcon_is_default); if (!ret) - fbcon_has_console_bind = 0; + fbcon_has_console_bind = false; } #else static inline void fbcon_unbind(void) {} @@ -3268,8 +3268,9 @@ static ssize_t cursor_blink_store(struct device *device, const char *buf, size_t count) { struct fb_info *info; - int blink, idx; char **last = NULL; + bool blink; + int idx; console_lock(); idx = con2fb_map[fg_console]; From 19122a7c28ed119c6ec9adca710acecf633af16a Mon Sep 17 00:00:00 2001 From: Vishal Parmar Date: Sun, 27 Jul 2025 16:31:45 +0530 Subject: [PATCH 1151/2411] docs: powerpc: add htm.rst to toctree The file Documentation/arch/powerpc/htm.rst is not included in the index.rst toctree. This results in a warning when building the docs: WARNING: document isn't included in any toctree: htm.rst Add it to the index.rst file so that it is properly included in the PowerPC documentation TOC. Signed-off-by: Vishal Parmar Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250727110145.839906-1-vishistriker@gmail.com --- Documentation/arch/powerpc/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/arch/powerpc/index.rst b/Documentation/arch/powerpc/index.rst index 0560cbae5fa1..173a787b6cc3 100644 --- a/Documentation/arch/powerpc/index.rst +++ b/Documentation/arch/powerpc/index.rst @@ -36,6 +36,7 @@ powerpc vas-api vcpudispatch_stats vmemmap_dedup + htm features From cf2a6de32cabbf84a889e24a9ee7c51dee4a1f70 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 17 Jul 2025 20:29:17 +0000 Subject: [PATCH 1152/2411] powerpc64/bpf: Add jit support for load_acquire and store_release Add JIT support for the load_acquire and store_release instructions. The implementation is similar to the kernel where: load_acquire => plain load -> lwsync store_release => lwsync -> plain store To test the correctness of the implementation, following selftests were run: [fedora@linux-kernel bpf]$ sudo ./test_progs -a \ verifier_load_acquire,verifier_store_release,atomics #11/1 atomics/add:OK #11/2 atomics/sub:OK #11/3 atomics/and:OK #11/4 atomics/or:OK #11/5 atomics/xor:OK #11/6 atomics/cmpxchg:OK #11/7 atomics/xchg:OK #11 atomics:OK #519/1 verifier_load_acquire/load-acquire, 8-bit:OK #519/2 verifier_load_acquire/load-acquire, 8-bit @unpriv:OK #519/3 verifier_load_acquire/load-acquire, 16-bit:OK #519/4 verifier_load_acquire/load-acquire, 16-bit @unpriv:OK #519/5 verifier_load_acquire/load-acquire, 32-bit:OK #519/6 verifier_load_acquire/load-acquire, 32-bit @unpriv:OK #519/7 verifier_load_acquire/load-acquire, 64-bit:OK #519/8 verifier_load_acquire/load-acquire, 64-bit @unpriv:OK #519/9 verifier_load_acquire/load-acquire with uninitialized src_reg:OK #519/10 verifier_load_acquire/load-acquire with uninitialized src_reg @unpriv:OK #519/11 verifier_load_acquire/load-acquire with non-pointer src_reg:OK #519/12 verifier_load_acquire/load-acquire with non-pointer src_reg @unpriv:OK #519/13 verifier_load_acquire/misaligned load-acquire:OK #519/14 verifier_load_acquire/misaligned load-acquire @unpriv:OK #519/15 verifier_load_acquire/load-acquire from ctx pointer:OK #519/16 verifier_load_acquire/load-acquire from ctx pointer @unpriv:OK #519/17 verifier_load_acquire/load-acquire with invalid register R15:OK #519/18 verifier_load_acquire/load-acquire with invalid register R15 @unpriv:OK #519/19 verifier_load_acquire/load-acquire from pkt pointer:OK #519/20 verifier_load_acquire/load-acquire from flow_keys pointer:OK #519/21 verifier_load_acquire/load-acquire from sock pointer:OK #519 verifier_load_acquire:OK #556/1 verifier_store_release/store-release, 8-bit:OK #556/2 verifier_store_release/store-release, 8-bit @unpriv:OK #556/3 verifier_store_release/store-release, 16-bit:OK #556/4 verifier_store_release/store-release, 16-bit @unpriv:OK #556/5 verifier_store_release/store-release, 32-bit:OK #556/6 verifier_store_release/store-release, 32-bit @unpriv:OK #556/7 verifier_store_release/store-release, 64-bit:OK #556/8 verifier_store_release/store-release, 64-bit @unpriv:OK #556/9 verifier_store_release/store-release with uninitialized src_reg:OK #556/10 verifier_store_release/store-release with uninitialized src_reg @unpriv:OK #556/11 verifier_store_release/store-release with uninitialized dst_reg:OK #556/12 verifier_store_release/store-release with uninitialized dst_reg @unpriv:OK #556/13 verifier_store_release/store-release with non-pointer dst_reg:OK #556/14 verifier_store_release/store-release with non-pointer dst_reg @unpriv:OK #556/15 verifier_store_release/misaligned store-release:OK #556/16 verifier_store_release/misaligned store-release @unpriv:OK #556/17 verifier_store_release/store-release to ctx pointer:OK #556/18 verifier_store_release/store-release to ctx pointer @unpriv:OK #556/19 verifier_store_release/store-release, leak pointer to stack:OK #556/20 verifier_store_release/store-release, leak pointer to stack @unpriv:OK #556/21 verifier_store_release/store-release, leak pointer to map:OK #556/22 verifier_store_release/store-release, leak pointer to map @unpriv:OK #556/23 verifier_store_release/store-release with invalid register R15:OK #556/24 verifier_store_release/store-release with invalid register R15 @unpriv:OK #556/25 verifier_store_release/store-release to pkt pointer:OK #556/26 verifier_store_release/store-release to flow_keys pointer:OK #556/27 verifier_store_release/store-release to sock pointer:OK #556 verifier_store_release:OK Summary: 3/55 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Puranjay Mohan Tested-by: Saket Kumar Bhaskar Reviewed-by: Hari Bathini Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250717202935.29018-2-puranjay@kernel.org --- arch/powerpc/include/asm/ppc-opcode.h | 1 + arch/powerpc/net/bpf_jit_comp64.c | 82 ++++++++++++++++++++ tools/testing/selftests/bpf/progs/bpf_misc.h | 3 +- 3 files changed, 85 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 4312bcb913a4..8053b24afc39 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -425,6 +425,7 @@ #define PPC_RAW_SC() (0x44000002) #define PPC_RAW_SYNC() (0x7c0004ac) #define PPC_RAW_ISYNC() (0x4c00012c) +#define PPC_RAW_LWSYNC() (0x7c2004ac) /* * Define what the VSX XX1 form instructions will look like, then add diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 5daa77aee7f7..2039eb957f3f 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -392,6 +392,71 @@ asm ( " blr ;" ); +static int emit_atomic_ld_st(const struct bpf_insn insn, struct codegen_context *ctx, u32 *image) +{ + u32 code = insn.code; + u32 dst_reg = bpf_to_ppc(insn.dst_reg); + u32 src_reg = bpf_to_ppc(insn.src_reg); + u32 size = BPF_SIZE(code); + u32 tmp1_reg = bpf_to_ppc(TMP_REG_1); + u32 tmp2_reg = bpf_to_ppc(TMP_REG_2); + s16 off = insn.off; + s32 imm = insn.imm; + + switch (imm) { + case BPF_LOAD_ACQ: + switch (size) { + case BPF_B: + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); + break; + case BPF_H: + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); + break; + case BPF_W: + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); + break; + case BPF_DW: + if (off % 4) { + EMIT(PPC_RAW_LI(tmp1_reg, off)); + EMIT(PPC_RAW_LDX(dst_reg, src_reg, tmp1_reg)); + } else { + EMIT(PPC_RAW_LD(dst_reg, src_reg, off)); + } + break; + } + EMIT(PPC_RAW_LWSYNC()); + break; + case BPF_STORE_REL: + EMIT(PPC_RAW_LWSYNC()); + switch (size) { + case BPF_B: + EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); + break; + case BPF_H: + EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); + break; + case BPF_W: + EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); + break; + case BPF_DW: + if (off % 4) { + EMIT(PPC_RAW_LI(tmp2_reg, off)); + EMIT(PPC_RAW_STDX(src_reg, dst_reg, tmp2_reg)); + } else { + EMIT(PPC_RAW_STD(src_reg, dst_reg, off)); + } + break; + } + break; + default: + pr_err_ratelimited("unexpected atomic load/store op code %02x\n", + imm); + return -EINVAL; + } + + return 0; +} + /* Assemble the body code between the prologue & epilogue */ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx, u32 *addrs, int pass, bool extra_pass) @@ -859,8 +924,25 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code /* * BPF_STX ATOMIC (atomic ops) */ + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: + if (bpf_atomic_is_load_store(&insn[i])) { + ret = emit_atomic_ld_st(insn[i], ctx, image); + if (ret) + return ret; + + if (size != BPF_DW && insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; + break; + } else if (size == BPF_B || size == BPF_H) { + pr_err_ratelimited( + "eBPF filter atomic op code %02x (@%d) unsupported\n", + code, i); + return -EOPNOTSUPP; + } + save_reg = tmp2_reg; ret_reg = src_reg; diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 6e208e24ba3b..3bf3af5639fa 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -227,7 +227,8 @@ #if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) || \ + (defined(__TARGET_ARCH_powerpc)) #define CAN_USE_LOAD_ACQ_STORE_REL #endif From bd7814a4c0fd883894bdf9fe5eda24c9df826e4c Mon Sep 17 00:00:00 2001 From: Edip Hazuri Date: Fri, 25 Jul 2025 18:14:37 +0300 Subject: [PATCH 1153/2411] ALSA: hda/realtek - Fix mute LED for HP Victus 16-r1xxx The mute led on this laptop is using ALC245 but requires a quirk to work This patch enables the existing quirk for the device. Tested on Victus 16-r1xxx Laptop. The LED behaviour works as intended. Cc: Signed-off-by: Edip Hazuri Link: https://patch.msgid.link/20250725151436.51543-2-edip@medip.dev Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 05019fa73297..33ef08d251d6 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6580,6 +6580,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c91, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8c99, "HP Victus 16-r1xxx (MB 8C99)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8c9c, "HP Victus 16-s1xxx (MB 8C9C)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), From 8a15ca0ca51399b652b1bbb23b590b220cf03d62 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Mon, 28 Jul 2025 19:00:35 +0930 Subject: [PATCH 1154/2411] ALSA: scarlett2: Add retry on -EPROTO from scarlett2_usb_tx() During communication with Focusrite Scarlett Gen 2/3/4 USB audio interfaces, -EPROTO is sometimes returned from scarlett2_usb_tx(), snd_usb_ctl_msg() which can cause initialisation and control operations to fail intermittently. This patch adds up to 5 retries in scarlett2_usb(), with a delay starting at 5ms and doubling each time. This follows the same approach as the fix for usb_set_interface() in endpoint.c (commit f406005e162b ("ALSA: usb-audio: Add retry on -EPROTO from usb_set_interface()")), which resolved similar -EPROTO issues during device initialisation, and is the same approach as in fcp.c:fcp_usb(). Fixes: 9e4d5c1be21f ("ALSA: usb-audio: Scarlett Gen 2 mixer interface") Closes: https://github.com/geoffreybennett/linux-fcp/issues/41 Cc: stable@vger.kernel.org Signed-off-by: Geoffrey D. Bennett Link: https://patch.msgid.link/aIdDO6ld50WQwNim@m.b4.vu Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett2.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/usb/mixer_scarlett2.c b/sound/usb/mixer_scarlett2.c index 49eeb1444dce..15bbdafc4894 100644 --- a/sound/usb/mixer_scarlett2.c +++ b/sound/usb/mixer_scarlett2.c @@ -2351,6 +2351,8 @@ static int scarlett2_usb( struct scarlett2_usb_packet *req, *resp = NULL; size_t req_buf_size = struct_size(req, data, req_size); size_t resp_buf_size = struct_size(resp, data, resp_size); + int retries = 0; + const int max_retries = 5; int err; req = kmalloc(req_buf_size, GFP_KERNEL); @@ -2374,10 +2376,15 @@ static int scarlett2_usb( if (req_size) memcpy(req->data, req_data, req_size); +retry: err = scarlett2_usb_tx(dev, private->bInterfaceNumber, req, req_buf_size); if (err != req_buf_size) { + if (err == -EPROTO && ++retries <= max_retries) { + msleep(5 * (1 << (retries - 1))); + goto retry; + } usb_audio_err( mixer->chip, "%s USB request result cmd %x was %d\n", From 48defdf6b083f74a44e1f742db284960d3444aec Mon Sep 17 00:00:00 2001 From: Aaron Plattner Date: Mon, 21 Jul 2025 16:06:39 -0700 Subject: [PATCH 1155/2411] watchdog: sbsa: Adjust keepalive timeout to avoid MediaTek WS0 race condition The MediaTek implementation of the sbsa_gwdt watchdog has a race condition where a write to SBSA_GWDT_WRR is ignored if it occurs while the hardware is processing a timeout refresh that asserts WS0. Detect this based on the hardware implementer and adjust wdd->min_hw_heartbeat_ms to avoid the race by forcing the keepalive ping to be one second later. Signed-off-by: Aaron Plattner Acked-by: Timur Tabi Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250721230640.2244915-1-aplattner@nvidia.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sbsa_gwdt.c | 50 +++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/drivers/watchdog/sbsa_gwdt.c b/drivers/watchdog/sbsa_gwdt.c index 5f23913ce3b4..6ce1bfb39064 100644 --- a/drivers/watchdog/sbsa_gwdt.c +++ b/drivers/watchdog/sbsa_gwdt.c @@ -75,11 +75,17 @@ #define SBSA_GWDT_VERSION_MASK 0xF #define SBSA_GWDT_VERSION_SHIFT 16 +#define SBSA_GWDT_IMPL_MASK 0x7FF +#define SBSA_GWDT_IMPL_SHIFT 0 +#define SBSA_GWDT_IMPL_MEDIATEK 0x426 + /** * struct sbsa_gwdt - Internal representation of the SBSA GWDT * @wdd: kernel watchdog_device structure * @clk: store the System Counter clock frequency, in Hz. * @version: store the architecture version + * @need_ws0_race_workaround: + * indicate whether to adjust wdd->timeout to avoid a race with WS0 * @refresh_base: Virtual address of the watchdog refresh frame * @control_base: Virtual address of the watchdog control frame */ @@ -87,6 +93,7 @@ struct sbsa_gwdt { struct watchdog_device wdd; u32 clk; int version; + bool need_ws0_race_workaround; void __iomem *refresh_base; void __iomem *control_base; }; @@ -161,6 +168,31 @@ static int sbsa_gwdt_set_timeout(struct watchdog_device *wdd, */ sbsa_gwdt_reg_write(((u64)gwdt->clk / 2) * timeout, gwdt); + /* + * Some watchdog hardware has a race condition where it will ignore + * sbsa_gwdt_keepalive() if it is called at the exact moment that a + * timeout occurs and WS0 is being asserted. Unfortunately, the default + * behavior of the watchdog core is very likely to trigger this race + * when action=0 because it programs WOR to be half of the desired + * timeout, and watchdog_next_keepalive() chooses the exact same time to + * send keepalive pings. + * + * This triggers a race where sbsa_gwdt_keepalive() can be called right + * as WS0 is being asserted, and affected hardware will ignore that + * write and continue to assert WS0. After another (timeout / 2) + * seconds, the same race happens again. If the driver wins then the + * explicit refresh will reset WS0 to false but if the hardware wins, + * then WS1 is asserted and the system resets. + * + * Avoid the problem by scheduling keepalive heartbeats one second later + * than the WOR timeout. + * + * This workaround might not be needed in a future revision of the + * hardware. + */ + if (gwdt->need_ws0_race_workaround) + wdd->min_hw_heartbeat_ms = timeout * 500 + 1000; + return 0; } @@ -202,12 +234,15 @@ static int sbsa_gwdt_keepalive(struct watchdog_device *wdd) static void sbsa_gwdt_get_version(struct watchdog_device *wdd) { struct sbsa_gwdt *gwdt = watchdog_get_drvdata(wdd); - int ver; + int iidr, ver, impl; - ver = readl(gwdt->control_base + SBSA_GWDT_W_IIDR); - ver = (ver >> SBSA_GWDT_VERSION_SHIFT) & SBSA_GWDT_VERSION_MASK; + iidr = readl(gwdt->control_base + SBSA_GWDT_W_IIDR); + ver = (iidr >> SBSA_GWDT_VERSION_SHIFT) & SBSA_GWDT_VERSION_MASK; + impl = (iidr >> SBSA_GWDT_IMPL_SHIFT) & SBSA_GWDT_IMPL_MASK; gwdt->version = ver; + gwdt->need_ws0_race_workaround = + !action && (impl == SBSA_GWDT_IMPL_MEDIATEK); } static int sbsa_gwdt_start(struct watchdog_device *wdd) @@ -299,6 +334,15 @@ static int sbsa_gwdt_probe(struct platform_device *pdev) else wdd->max_hw_heartbeat_ms = GENMASK_ULL(47, 0) / gwdt->clk * 1000; + if (gwdt->need_ws0_race_workaround) { + /* + * A timeout of 3 seconds means that WOR will be set to 1.5 + * seconds and the heartbeat will be scheduled every 2.5 + * seconds. + */ + wdd->min_timeout = 3; + } + status = readl(cf_base + SBSA_GWDT_WCS); if (status & SBSA_GWDT_WCS_WS1) { dev_warn(dev, "System reset by WDT.\n"); From d9e9aa3e971b37c6d6dfd15ad8dc65537a925725 Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Tue, 15 Jul 2025 03:25:03 +0530 Subject: [PATCH 1156/2411] drm/xe: Don't fail probe on unsupported mailbox command If the device is running older pcode firmware, it is possible that newer mailbox commands are not supported by it. The sysfs attributes aren't useful in that case, but we shouldn't fail driver probe because of it. As of now, it is unknown if we can distinguish unsupported commands before attempting them. But until we figure out a way to do that, fix the regressions. v2: Add debug message (Lucas) Fixes: cdc36b66cd41 ("drm/xe: Expose fan control and voltage regulator version") Signed-off-by: Raag Jadav Tested-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://lore.kernel.org/r/20250714215503.2897748-1-raag.jadav@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit ed5461daa150b037e36b8202381da1ef85d6b16b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_sysfs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index e5fd0cd537bc..bd9015761aa0 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -160,8 +160,13 @@ static int late_bind_create_files(struct device *dev) ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), &cap, NULL); - if (ret) + if (ret) { + if (ret == -ENXIO) { + drm_dbg(&xe->drm, "Late binding not supported by firmware\n"); + ret = 0; + } goto out; + } if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) { ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr); From 6aaceed7fe1a400082ec5990884b11ef7266a605 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Tue, 15 Jul 2025 11:14:22 -0700 Subject: [PATCH 1157/2411] drm/xe/oa: Fix static checker warning about null gt There is a static checker warning that gt returned by xe_device_get_gt can be NULL and that is being dereferenced. Use xe_root_mmio_gt instead, which is equivalent and cannot return a NULL gt 0. Fixes: 10d42ef34bce ("drm/xe/oa: Assign hwe for OAM_SAG") Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Link: https://lore.kernel.org/r/20250715181422.2807624-1-ashutosh.dixit@intel.com (cherry picked from commit 308dc9b27874d0e8a0258869b9e681b0fdd2e579) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_oa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index d991fbd90f20..5729e7d3e335 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1941,7 +1941,7 @@ static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) /* If not provided, OA unit defaults to OA unit 0 as per uapi */ if (!param->oa_unit) - param->oa_unit = &xe_device_get_gt(oa->xe, 0)->oa.oa_unit[0]; + param->oa_unit = &xe_root_mmio_gt(oa->xe)->oa.oa_unit[0]; /* When we have an exec_q, get hwe from the exec_q */ if (param->exec_q) { From 2bd986021c297ba675e831c3164bf9bdbbca3bc3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 15 Jul 2025 17:59:44 -0500 Subject: [PATCH 1158/2411] drm/xe: Fix a NULL vs IS_ERR() bug in xe_i2c_register_adapter() The fwnode_create_software_node() function returns error pointers. It never returns NULL. Update the checks to match. Fixes: f0e53aadd702 ("drm/xe: Support for I2C attached MCUs") Signed-off-by: Dan Carpenter Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/65825d00-81ab-4665-af51-4fff6786a250@sabinyo.mountain Signed-off-by: Rodrigo Vivi (cherry picked from commit 2f264d58cc805a3cefc6b98097f90fbc388136ef) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c index db9c0340be5c..1f19718db559 100644 --- a/drivers/gpu/drm/xe/xe_i2c.c +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -96,8 +96,8 @@ static int xe_i2c_register_adapter(struct xe_i2c *i2c) int ret; fwnode = fwnode_create_software_node(xe_i2c_adapter_properties, NULL); - if (!fwnode) - return -ENOMEM; + if (IS_ERR(fwnode)) + return PTR_ERR(fwnode); /* * Not using platform_device_register_full() here because we don't have From dc94168eaa6f6f2476c4e1a894bd8d031df6226d Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Mon, 21 Jul 2025 17:45:20 -0400 Subject: [PATCH 1159/2411] drm/xe/uc: Fix missing unwind goto Fix missing unwind goto on error handling. Fixes: b2c4ac219fa4 ("drm/xe/uc: Disable GuC communication on hardware initialization error") Signed-off-by: Zhanjun Dong Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://lore.kernel.org/r/20250721214520.954014-1-zhanjun.dong@intel.com (cherry picked from commit 176f44a5ec0b074aaf44852db77d0c183c36696d) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 3e0c3af235f2..465bda355443 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -164,7 +164,7 @@ static int vf_uc_load_hw(struct xe_uc *uc) err = xe_guc_opt_in_features_enable(&uc->guc); if (err) - return err; + goto err_out; err = xe_gt_record_default_lrcs(uc_to_gt(uc)); if (err) From cccb918e0231fefba059f049acced18760242136 Mon Sep 17 00:00:00 2001 From: Lukasz Laguna Date: Thu, 17 Jul 2025 17:54:20 +0200 Subject: [PATCH 1160/2411] drm/xe/vf: Don't register I2C devices if VF VF drivers can't access I2C devices, so skip their registration when running as VF. Signed-off-by: Lukasz Laguna Fixes: f0e53aadd702 ("drm/xe: Support for I2C attached MCUs") Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250717155420.25298-1-lukasz.laguna@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 9a220e065914b67b55d3d0ab91c3e215742fdd73) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_i2c.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c index 1f19718db559..bc7dc2099470 100644 --- a/drivers/gpu/drm/xe/xe_i2c.c +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -283,6 +283,9 @@ int xe_i2c_probe(struct xe_device *xe) if (xe->info.platform != XE_BATTLEMAGE) return 0; + if (IS_SRIOV_VF(xe)) + return 0; + xe_i2c_read_endpoint(xe_root_tile_mmio(xe), &ep); if (ep.cookie != XE_I2C_EP_COOKIE_DEVICE) return 0; From 892ae5f806af323ceb6073fc550d62c635d7271c Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Fri, 25 Jul 2025 15:38:16 +0200 Subject: [PATCH 1161/2411] rtla/tests: Add grep checks for base test cases Checking for patterns in rtla output with grep was added to test rtla actions. Add grep checks also for base tests where applicable. Also fix trace event histogram trigger check to use the correct syntax for the command-line option so that the test passes with the grep check. Cc: John Kacur Cc: Luis Goncalves Cc: Chang Yin Cc: Costa Shulyupin Link: https://lore.kernel.org/20250725133817.59237-2-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/tests/hwnoise.t | 11 ++++++----- tools/tracing/rtla/tests/osnoise.t | 6 +++--- tools/tracing/rtla/tests/timerlat.t | 12 +++++++----- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/tools/tracing/rtla/tests/hwnoise.t b/tools/tracing/rtla/tests/hwnoise.t index 448877564b8d..23ce250a6852 100644 --- a/tools/tracing/rtla/tests/hwnoise.t +++ b/tools/tracing/rtla/tests/hwnoise.t @@ -6,16 +6,17 @@ test_begin set_timeout 2m check "verify help page" \ - "hwnoise --help" + "hwnoise --help" 0 "summary of hardware-related noise" check "detect noise higher than one microsecond" \ - "hwnoise -c 0 -T 1 -d 5s -q" + "hwnoise -c 0 -T 1 -d 5s -q" 0 check "set the automatic trace mode" \ - "hwnoise -a 5 -d 10s" 2 + "hwnoise -a 5 -d 10s" 2 "osnoise hit stop tracing" check "set scheduling param to the osnoise tracer threads" \ "hwnoise -P F:1 -c 0 -r 900000 -d 10s -q" check "stop the trace if a single sample is higher than 1 us" \ - "hwnoise -s 1 -T 1 -t -d 10s" 2 + "hwnoise -s 1 -T 1 -t -d 10s" 2 "Saving trace to osnoise_trace.txt" check "enable a trace event trigger" \ - "hwnoise -t -e osnoise:irq_noise trigger=\"hist:key=desc,duration:sort=desc,duration:vals=hitcount\" -d 10s" + "hwnoise -t -e osnoise:irq_noise --trigger=\"hist:key=desc,duration:sort=desc,duration:vals=hitcount\" -d 10s" \ + 0 "Saving event osnoise:irq_noise hist to osnoise_irq_noise_hist.txt" test_end diff --git a/tools/tracing/rtla/tests/osnoise.t b/tools/tracing/rtla/tests/osnoise.t index 6a4dfa31dc55..7574ec6a5a53 100644 --- a/tools/tracing/rtla/tests/osnoise.t +++ b/tools/tracing/rtla/tests/osnoise.t @@ -6,13 +6,13 @@ test_begin set_timeout 2m check "verify help page" \ - "osnoise --help" + "osnoise --help" 0 "osnoise version" check "verify the --priority/-P param" \ "osnoise top -P F:1 -c 0 -r 900000 -d 10s -q" check "verify the --stop/-s param" \ - "osnoise top -s 30 -T 1 -t" 2 + "osnoise top -s 30 -T 1" 2 "osnoise hit stop tracing" check "verify the --trace param" \ - "osnoise hist -s 30 -T 1 -t" 2 + "osnoise hist -s 30 -T 1 -t" 2 "Saving trace to osnoise_trace.txt" check "verify the --entries/-E param" \ "osnoise hist -P F:1 -c 0 -r 900000 -d 10s -b 10 -E 25" diff --git a/tools/tracing/rtla/tests/timerlat.t b/tools/tracing/rtla/tests/timerlat.t index 2d59ee199c4d..db263dc90a2d 100644 --- a/tools/tracing/rtla/tests/timerlat.t +++ b/tools/tracing/rtla/tests/timerlat.t @@ -21,15 +21,17 @@ export RTLA_NO_BPF=$option # Basic tests check "verify help page" \ - "timerlat --help" + "timerlat --help" 0 "timerlat version" check "verify -s/--stack" \ - "timerlat top -s 3 -T 10 -t" 2 + "timerlat top -s 3 -T 10 -t" 2 "Blocking thread stack trace" check "verify -P/--priority" \ "timerlat top -P F:1 -c 0 -d 10s -q" check "test in nanoseconds" \ - "timerlat top -i 2 -c 0 -n -d 10s" 2 + "timerlat top -i 2 -c 0 -n -d 10s" 2 "ns" check "set the automatic trace mode" \ - "timerlat top -a 5 --dump-tasks" 2 + "timerlat top -a 5" 2 "analyzing it" +check "dump tasks" \ + "timerlat top -a 5 --dump-tasks" 2 "Printing CPU tasks" check "print the auto-analysis if hits the stop tracing condition" \ "timerlat top --aa-only 5" 2 check "disable auto-analysis" \ @@ -37,7 +39,7 @@ check "disable auto-analysis" \ check "verify -c/--cpus" \ "timerlat hist -c 0 -d 10s" check "hist test in nanoseconds" \ - "timerlat hist -i 2 -c 0 -n -d 10s" 2 + "timerlat hist -i 2 -c 0 -n -d 10s" 2 "ns" # Actions tests check "trace output through -t" \ From a2e1407eb8405e59c56b2325d910a73fd917eb3e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 23 Jul 2025 19:56:39 +0200 Subject: [PATCH 1162/2411] drm/xe/guc: Clear whole g2h_fence during initialization The struct g2h_fence must be explicitly initializated using the g2h_fence_init() function to avoid trash values in its members, but we missed to update this helper function with the new member. To fix that and avoid any future mistakes, memset the whole struct first, then update remaining non-zero members. Fixes: 94de94d24ea8 ("drm/xe/guc: Cancel ongoing H2G requests when stopping CT") Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Cc: Lukasz Laguna Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250723175639.206875-1-michal.wajdeczko@intel.com (cherry picked from commit 159afd92bae8153bdd8d8b34aea0d463fe19c978) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index b6acccfcd351..3f4e6a46ff16 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -95,12 +95,8 @@ struct g2h_fence { static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer) { + memset(g2h_fence, 0, sizeof(*g2h_fence)); g2h_fence->response_buffer = response_buffer; - g2h_fence->response_data = 0; - g2h_fence->response_len = 0; - g2h_fence->fail = false; - g2h_fence->retry = false; - g2h_fence->done = false; g2h_fence->seqno = ~0x0; } From a80db1f85774ae571b94077f65c5cd57467641d3 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Fri, 25 Jul 2025 15:38:17 +0200 Subject: [PATCH 1163/2411] rtla/tests: Test timerlat -P option using actions The -P option is used to set priority of osnoise and timerlat threads. Extend the test for -P with --on-threshold calling a script that looks for running timerlat threads and checks if their priority is set correctly. As --on-threshold is only supported by timerlat at the moment, this is only implemented there so far. Cc: John Kacur Cc: Luis Goncalves Cc: Chang Yin Cc: Costa Shulyupin Link: https://lore.kernel.org/20250725133817.59237-3-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/tests/engine.sh | 2 +- tools/tracing/rtla/tests/scripts/check-priority.sh | 8 ++++++++ tools/tracing/rtla/tests/timerlat.t | 3 ++- 3 files changed, 11 insertions(+), 2 deletions(-) create mode 100755 tools/tracing/rtla/tests/scripts/check-priority.sh diff --git a/tools/tracing/rtla/tests/engine.sh b/tools/tracing/rtla/tests/engine.sh index 64c5be4313de..a97d644ead99 100644 --- a/tools/tracing/rtla/tests/engine.sh +++ b/tools/tracing/rtla/tests/engine.sh @@ -69,7 +69,7 @@ check() { # Add rtla output and exit code as comments in case of failure echo "$result" | col -b | while read line; do echo "# $line"; done printf "#\n# exit code %s\n" $exitcode - [ -n "$expected_output" ] && \ + [ -n "$expected_output" ] && [ $grep_result -ne 0 ] && \ printf "# Output match failed: \"%s\"\n" "$expected_output" fi fi diff --git a/tools/tracing/rtla/tests/scripts/check-priority.sh b/tools/tracing/rtla/tests/scripts/check-priority.sh new file mode 100755 index 000000000000..79b702a34a96 --- /dev/null +++ b/tools/tracing/rtla/tests/scripts/check-priority.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +pids="$(pgrep ^$1)" || exit 1 +for pid in $pids +do + chrt -p $pid | cut -d ':' -f 2 | head -n1 | grep "^ $2\$" >/dev/null + chrt -p $pid | cut -d ':' -f 2 | tail -n1 | grep "^ $3\$" >/dev/null +done && echo "Priorities are set correctly" diff --git a/tools/tracing/rtla/tests/timerlat.t b/tools/tracing/rtla/tests/timerlat.t index db263dc90a2d..c71aed5534bf 100644 --- a/tools/tracing/rtla/tests/timerlat.t +++ b/tools/tracing/rtla/tests/timerlat.t @@ -25,7 +25,8 @@ check "verify help page" \ check "verify -s/--stack" \ "timerlat top -s 3 -T 10 -t" 2 "Blocking thread stack trace" check "verify -P/--priority" \ - "timerlat top -P F:1 -c 0 -d 10s -q" + "timerlat top -P F:1 -c 0 -d 10s -q -T 1 --on-threshold shell,command=\"tests/scripts/check-priority.sh timerlatu/ SCHED_FIFO 1\"" \ + 2 "Priorities are set correctly" check "test in nanoseconds" \ "timerlat top -i 2 -c 0 -n -d 10s" 2 "ns" check "set the automatic trace mode" \ From 4846856c3a4afa882b6d1b842ed2fad6f3781f4d Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 24 Jul 2025 19:38:55 +0000 Subject: [PATCH 1164/2411] drm/xe/hw_engine_group: Avoid call kfree() for drmm_kzalloc() Memory allocated with drmm_kzalloc() should not be freed using kfree(), as it is managed by the DRM subsystem. The memory will be automatically freed when the associated drm_device is released. These 3 group pointers are allocated using drmm_kzalloc() in hw_engine_group_alloc(), so they don't require manual deallocation. Fixes: 67979060740f ("drm/xe/hw_engine_group: Fix potential leak") Cc: Michal Wajdeczko Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20250724193854.1124510-2-shuicheng.lin@intel.com (cherry picked from commit f98de826b418885a21ece67f0f5b921ae759b7bf) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine_group.c | 28 ++++++------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 87a6dcb1b4b5..c926f840c87b 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -75,25 +75,18 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) enum xe_hw_engine_id id; struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs; struct xe_device *xe = gt_to_xe(gt); - int err; group_rcs_ccs = hw_engine_group_alloc(xe); - if (IS_ERR(group_rcs_ccs)) { - err = PTR_ERR(group_rcs_ccs); - goto err_group_rcs_ccs; - } + if (IS_ERR(group_rcs_ccs)) + return PTR_ERR(group_rcs_ccs); group_bcs = hw_engine_group_alloc(xe); - if (IS_ERR(group_bcs)) { - err = PTR_ERR(group_bcs); - goto err_group_bcs; - } + if (IS_ERR(group_bcs)) + return PTR_ERR(group_bcs); group_vcs_vecs = hw_engine_group_alloc(xe); - if (IS_ERR(group_vcs_vecs)) { - err = PTR_ERR(group_vcs_vecs); - goto err_group_vcs_vecs; - } + if (IS_ERR(group_vcs_vecs)) + return PTR_ERR(group_vcs_vecs); for_each_hw_engine(hwe, gt, id) { switch (hwe->class) { @@ -116,15 +109,6 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) } return 0; - -err_group_vcs_vecs: - kfree(group_vcs_vecs); -err_group_bcs: - kfree(group_bcs); -err_group_rcs_ccs: - kfree(group_rcs_ccs); - - return err; } /** From 942ac8da6388c25fe62b2792c78715e0ea6e649b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 22 Jul 2025 16:10:54 +0200 Subject: [PATCH 1165/2411] drm/xe/configfs: Fix pci_dev reference leak We are using pci_get_domain_bus_and_slot() function to verify if the given config directory name matches any existing PCI device, but we missed to call matching pci_dev_put() to release reference. While around, also change error code in case of no device match, to make it more specific than generic formatting error. Fixes: 16280ded45fb ("drm/xe: Add configfs to enable survivability mode") Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Reviewed-by: Jonathan Cavitt Link: https://lore.kernel.org/r/20250722141059.30707-2-michal.wajdeczko@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 0bdd05c2a82bbf2419415d012fd4f5faeca7f1af) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_configfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index 8ec1ff1e4e80..e9b46a2d0019 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -267,7 +267,8 @@ static struct config_group *xe_config_make_device_group(struct config_group *gro pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, function)); if (!pdev) - return ERR_PTR(-EINVAL); + return ERR_PTR(-ENODEV); + pci_dev_put(pdev); dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) From 9acb237deff7667b0f6b10fe6b1b70c4429ea049 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 22 Jul 2025 16:56:41 -0400 Subject: [PATCH 1166/2411] NFSv4.2: another fix for listxattr Currently, when the server supports NFS4.1 security labels then security.selinux label in included twice. Instead, only add it when the server doesn't possess security label support. Fixes: 243fea134633 ("NFSv4.2: fix listxattr to return selinux security label") Signed-off-by: Olga Kornievskaia Link: https://lore.kernel.org/r/20250722205641.79394-1-okorniev@redhat.com Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d8bebd757af3..d7dc669d84c5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10883,7 +10883,7 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size) { - ssize_t error, error2, error3, error4; + ssize_t error, error2, error3, error4 = 0; size_t left = size; error = generic_listxattr(dentry, list, left); @@ -10911,9 +10911,11 @@ static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size) left -= error3; } - error4 = security_inode_listsecurity(d_inode(dentry), list, left); - if (error4 < 0) - return error4; + if (!nfs_server_capable(d_inode(dentry), NFS_CAP_SECURITY_LABEL)) { + error4 = security_inode_listsecurity(d_inode(dentry), list, left); + if (error4 < 0) + return error4; + } error += error2 + error3 + error4; if (size && error > size) From 99765233ab42bf7a4950377ad7894dce8a5c0e60 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 9 Jul 2025 21:47:43 -0400 Subject: [PATCH 1167/2411] NFS: Fixup allocation flags for nfsiod's __GFP_NORETRY If the NFS client is doing writeback from a workqueue context, avoid using __GFP_NORETRY for allocations if the task has set PF_MEMALLOC_NOIO or PF_MEMALLOC_NOFS. The combination of these flags makes memory allocation failures much more likely. We've seen those allocation failures show up when the loopback driver is doing writeback from a workqueue to a file on NFS, where memory allocation failure results in errors or corruption within the loopback device's filesystem. Suggested-by: Trond Myklebust Fixes: 0bae835b63c5 ("NFS: Avoid writeback threads getting stuck in mempool_alloc()") Signed-off-by: Benjamin Coddington Reviewed-by: Laurence Oberman Tested-by: Laurence Oberman Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/f83ac1155a4bc670f2663959a7a068571e06afd9.1752111622.git.bcodding@redhat.com Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d55dce8bf043..0143e0794d32 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -670,9 +670,12 @@ nfs_write_match_verf(const struct nfs_writeverf *verf, static inline gfp_t nfs_io_gfp_mask(void) { - if (current->flags & PF_WQ_WORKER) - return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; - return GFP_KERNEL; + gfp_t ret = current_gfp_context(GFP_KERNEL); + + /* For workers __GFP_NORETRY only with __GFP_IO or __GFP_FS */ + if ((current->flags & PF_WQ_WORKER) && ret == GFP_KERNEL) + ret |= __GFP_NORETRY | __GFP_NOWARN; + return ret; } /* From 0349b7f95c806ea30d558c7fec9502f4470fb1b6 Mon Sep 17 00:00:00 2001 From: wangzijie Date: Mon, 28 Jul 2025 13:02:35 +0800 Subject: [PATCH 1168/2411] f2fs: avoid redundant clean nat entry move in lru list __lookup_nat_cache follows LRU manner to move clean nat entry, when nat entries are going to be dirty, no need to move them to tail of lru list. Introduce a parameter 'for_dirty' to avoid it. Signed-off-by: wangzijie Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 76aba1961b54..940b52d383ba 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -204,14 +204,17 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, return ne; } -static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) +static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n, bool for_dirty) { struct nat_entry *ne; ne = radix_tree_lookup(&nm_i->nat_root, n); - /* for recent accessed nat entry, move it to tail of lru list */ - if (ne && !get_nat_flag(ne, IS_DIRTY)) { + /* + * for recent accessed nat entry which will not be dirtied soon + * later, move it to tail of lru list. + */ + if (ne && !get_nat_flag(ne, IS_DIRTY) && !for_dirty) { spin_lock(&nm_i->nat_list_lock); if (!list_empty(&ne->list)) list_move_tail(&ne->list, &nm_i->nat_entries); @@ -383,7 +386,7 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) bool need = false; f2fs_down_read(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); + e = __lookup_nat_cache(nm_i, nid, false); if (e) { if (!get_nat_flag(e, IS_CHECKPOINTED) && !get_nat_flag(e, HAS_FSYNCED_INODE)) @@ -400,7 +403,7 @@ bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) bool is_cp = true; f2fs_down_read(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); + e = __lookup_nat_cache(nm_i, nid, false); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; f2fs_up_read(&nm_i->nat_tree_lock); @@ -414,7 +417,7 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) bool need_update = true; f2fs_down_read(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, ino); + e = __lookup_nat_cache(nm_i, ino, false); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) @@ -439,7 +442,7 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, return; f2fs_down_write(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); + e = __lookup_nat_cache(nm_i, nid, false); if (!e) e = __init_nat_entry(nm_i, new, ne, false); else @@ -460,7 +463,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true); f2fs_down_write(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, ni->nid); + e = __lookup_nat_cache(nm_i, ni->nid, true); if (!e) { e = __init_nat_entry(nm_i, new, NULL, true); copy_node_info(&e->ni, ni); @@ -502,7 +505,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, /* update fsync_mark if its inode nat entry is still alive */ if (ni->nid != ni->ino) - e = __lookup_nat_cache(nm_i, ni->ino); + e = __lookup_nat_cache(nm_i, ni->ino, false); if (e) { if (fsync_done && ni->nid == ni->ino) set_nat_flag(e, HAS_FSYNCED_INODE, true); @@ -562,7 +565,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, retry: /* Check nat cache */ f2fs_down_read(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); + e = __lookup_nat_cache(nm_i, nid, false); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); @@ -2371,7 +2374,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, * - __remove_nid_from_list(PREALLOC_NID) * - __insert_nid_to_list(FREE_NID) */ - ne = __lookup_nat_cache(nm_i, nid); + ne = __lookup_nat_cache(nm_i, nid, false); if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || nat_get_blkaddr(ne) != NULL_ADDR)) goto err_out; @@ -2936,7 +2939,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) raw_ne = nat_in_journal(journal, i); - ne = __lookup_nat_cache(nm_i, nid); + ne = __lookup_nat_cache(nm_i, nid, true); if (!ne) { ne = __alloc_nat_entry(sbi, nid, true); __init_nat_entry(nm_i, ne, &raw_ne, true); From 40aa9e1223fd38e65ac72373e642c7638a3b4752 Mon Sep 17 00:00:00 2001 From: wangzijie Date: Mon, 28 Jul 2025 13:02:36 +0800 Subject: [PATCH 1169/2411] f2fs: directly add newly allocated pre-dirty nat entry to dirty set list When we need to alloc nat entry and set it dirty, we can directly add it to dirty set list(or initialize its list_head for new_ne) instead of adding it to clean list and make a move. Introduce init_dirty flag to do it. Signed-off-by: wangzijie Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 940b52d383ba..27743b93e186 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -185,7 +185,7 @@ static void __free_nat_entry(struct nat_entry *e) /* must be locked by nat_tree_lock */ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, - struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail) + struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail, bool init_dirty) { if (no_fail) f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne); @@ -195,6 +195,12 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, if (raw_ne) node_info_from_raw_nat(&ne->ni, raw_ne); + if (init_dirty) { + INIT_LIST_HEAD(&ne->list); + nm_i->nat_cnt[TOTAL_NAT]++; + return ne; + } + spin_lock(&nm_i->nat_list_lock); list_add_tail(&ne->list, &nm_i->nat_entries); spin_unlock(&nm_i->nat_list_lock); @@ -259,7 +265,7 @@ static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i, } static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, - struct nat_entry *ne) + struct nat_entry *ne, bool init_dirty) { struct nat_entry_set *head; bool new_ne = nat_get_blkaddr(ne) == NEW_ADDR; @@ -282,7 +288,8 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, goto refresh_list; nm_i->nat_cnt[DIRTY_NAT]++; - nm_i->nat_cnt[RECLAIMABLE_NAT]--; + if (!init_dirty) + nm_i->nat_cnt[RECLAIMABLE_NAT]--; set_nat_flag(ne, IS_DIRTY, true); refresh_list: spin_lock(&nm_i->nat_list_lock); @@ -444,7 +451,7 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, f2fs_down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid, false); if (!e) - e = __init_nat_entry(nm_i, new, ne, false); + e = __init_nat_entry(nm_i, new, ne, false, false); else f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) || nat_get_blkaddr(e) != @@ -461,11 +468,13 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true); + bool init_dirty = false; f2fs_down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid, true); if (!e) { - e = __init_nat_entry(nm_i, new, NULL, true); + init_dirty = true; + e = __init_nat_entry(nm_i, new, NULL, true, true); copy_node_info(&e->ni, ni); f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { @@ -501,7 +510,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, nat_set_blkaddr(e, new_blkaddr); if (!__is_valid_data_blkaddr(new_blkaddr)) set_nat_flag(e, IS_CHECKPOINTED, false); - __set_nat_cache_dirty(nm_i, e); + __set_nat_cache_dirty(nm_i, e, init_dirty); /* update fsync_mark if its inode nat entry is still alive */ if (ni->nid != ni->ino) @@ -2927,6 +2936,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); struct f2fs_journal *journal = curseg->journal; int i; + bool init_dirty; down_write(&curseg->journal_rwsem); for (i = 0; i < nats_in_cursum(journal); i++) { @@ -2937,12 +2947,15 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) if (f2fs_check_nid_range(sbi, nid)) continue; + init_dirty = false; + raw_ne = nat_in_journal(journal, i); ne = __lookup_nat_cache(nm_i, nid, true); if (!ne) { + init_dirty = true; ne = __alloc_nat_entry(sbi, nid, true); - __init_nat_entry(nm_i, ne, &raw_ne, true); + __init_nat_entry(nm_i, ne, &raw_ne, true, true); } /* @@ -2957,7 +2970,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->nid_list_lock); } - __set_nat_cache_dirty(nm_i, ne); + __set_nat_cache_dirty(nm_i, ne, init_dirty); } update_nats_in_cursum(journal, -i); up_write(&curseg->journal_rwsem); From 6840faddb65683b4e7bd8196f177b038a1e19faf Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 24 Jul 2025 16:01:42 +0800 Subject: [PATCH 1170/2411] f2fs: fix to update upper_p in __get_secs_required() correctly Commit 1acd73edbbfe ("f2fs: fix to account dirty data in __get_secs_required()") missed to calculate upper_p w/ data_secs, fix it. Fixes: 1acd73edbbfe ("f2fs: fix to account dirty data in __get_secs_required()") Cc: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index d2c73f641134..2123645cf175 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -678,7 +678,7 @@ static inline void __get_secs_required(struct f2fs_sb_info *sbi, if (lower_p) *lower_p = node_secs + dent_secs + data_secs; if (upper_p) - *upper_p = node_secs + dent_secs + + *upper_p = node_secs + dent_secs + data_secs + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0) + (data_blocks ? 1 : 0); if (curseg_p) From e194e140ab7de2ce2782e64b9e086a43ca6ff4f2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 24 Jul 2025 16:01:43 +0800 Subject: [PATCH 1171/2411] f2fs: fix to calculate dirty data during has_not_enough_free_secs() In lfs mode, dirty data needs OPU, we'd better calculate lower_p and upper_p w/ them during has_not_enough_free_secs(), otherwise we may encounter out-of-space issue due to we missed to reclaim enough free section w/ foreground gc. Fixes: 36abef4e796d ("f2fs: introduce mode=lfs mount option") Cc: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 2123645cf175..5e2ee5c686b1 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -668,8 +668,7 @@ static inline void __get_secs_required(struct f2fs_sb_info *sbi, unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi); unsigned int data_blocks = 0; - if (f2fs_lfs_mode(sbi) && - unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (f2fs_lfs_mode(sbi)) { total_data_blocks = get_pages(sbi, F2FS_DIRTY_DATA); data_secs = total_data_blocks / CAP_BLKS_PER_SEC(sbi); data_blocks = total_data_blocks % CAP_BLKS_PER_SEC(sbi); From 1005a3ca28e90c7a64fa43023f866b960a60f791 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 24 Jul 2025 16:01:44 +0800 Subject: [PATCH 1172/2411] f2fs: fix to trigger foreground gc during f2fs_map_blocks() in lfs mode w/ "mode=lfs" mount option, generic/299 will cause system panic as below: ------------[ cut here ]------------ kernel BUG at fs/f2fs/segment.c:2835! Call Trace: f2fs_allocate_data_block+0x6f4/0xc50 f2fs_map_blocks+0x970/0x1550 f2fs_iomap_begin+0xb2/0x1e0 iomap_iter+0x1d6/0x430 __iomap_dio_rw+0x208/0x9a0 f2fs_file_write_iter+0x6b3/0xfa0 aio_write+0x15d/0x2e0 io_submit_one+0x55e/0xab0 __x64_sys_io_submit+0xa5/0x230 do_syscall_64+0x84/0x2f0 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0010:new_curseg+0x70f/0x720 The root cause of we run out-of-space is: in f2fs_map_blocks(), f2fs may trigger foreground gc only if it allocates any physical block, it will be a little bit later when there is multiple threads writing data w/ aio/dio/bufio method in parallel, since we always use OPU in lfs mode, so f2fs_map_blocks() does block allocations aggressively. In order to fix this issue, let's give a chance to trigger foreground gc in prior to block allocation in f2fs_map_blocks(). Fixes: 36abef4e796d ("f2fs: introduce mode=lfs mount option") Cc: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e11dd1431e5b..083d66b8ba07 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1573,8 +1573,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) end = pgofs + maxblocks; next_dnode: - if (map->m_may_create) + if (map->m_may_create) { + if (f2fs_lfs_mode(sbi)) + f2fs_balance_fs(sbi, true); f2fs_map_lock(sbi, flag); + } /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); From eb3bb145280b6c857a748731a229698e4a7cf37b Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Sat, 26 Jul 2025 00:02:54 +0500 Subject: [PATCH 1173/2411] ASoC: SOF: amd: acp-loader: Use GFP_KERNEL for DMA allocations in resume context Replace GFP_ATOMIC with GFP_KERNEL for dma_alloc_coherent() calls. This change improves memory allocation reliability during firmware loading, particularly during system resume when memory pressure is high. Because of using GFP_KERNEL, reclaim can happen which can reduce the probability of failure. Fixes memory allocation failures observed during system resume with fragmented memory conditions. snd_sof_amd_vangogh 0000:04:00.5: error: failed to load DSP firmware after resume -12 Fixes: 145d7e5ae8f4e ("ASoC: SOF: amd: add option to use sram for data bin loading") Fixes: 7e51a9e38ab20 ("ASoC: SOF: amd: Add fw loader and renoir dsp ops to load firmware") Cc: stable@vger.kernel.org Signed-off-by: Muhammad Usama Anjum Link: https://patch.msgid.link/20250725190254.1081184-1-usama.anjum@collabora.com Signed-off-by: Mark Brown --- sound/soc/sof/amd/acp-loader.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/sof/amd/acp-loader.c b/sound/soc/sof/amd/acp-loader.c index ea105227227d..98324bbade15 100644 --- a/sound/soc/sof/amd/acp-loader.c +++ b/sound/soc/sof/amd/acp-loader.c @@ -65,7 +65,7 @@ int acp_dsp_block_write(struct snd_sof_dev *sdev, enum snd_sof_fw_blk_type blk_t dma_size = page_count * ACP_PAGE_SIZE; adata->bin_buf = dma_alloc_coherent(&pci->dev, dma_size, &adata->sha_dma_addr, - GFP_ATOMIC); + GFP_KERNEL); if (!adata->bin_buf) return -ENOMEM; } @@ -77,7 +77,7 @@ int acp_dsp_block_write(struct snd_sof_dev *sdev, enum snd_sof_fw_blk_type blk_t adata->data_buf = dma_alloc_coherent(&pci->dev, ACP_DEFAULT_DRAM_LENGTH, &adata->dma_addr, - GFP_ATOMIC); + GFP_KERNEL); if (!adata->data_buf) return -ENOMEM; } @@ -90,7 +90,7 @@ int acp_dsp_block_write(struct snd_sof_dev *sdev, enum snd_sof_fw_blk_type blk_t adata->sram_data_buf = dma_alloc_coherent(&pci->dev, ACP_DEFAULT_SRAM_LENGTH, &adata->sram_dma_addr, - GFP_ATOMIC); + GFP_KERNEL); if (!adata->sram_data_buf) return -ENOMEM; } From 3556dac8289456bc8b28670546b969f543967856 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 10 Jul 2025 20:57:37 -0400 Subject: [PATCH 1174/2411] drm/amd/display: Fix divide by zero when calculating min ODM factor [WHY&HOW] If the debug option is set to disable_dsc the max slice width and/or dispclk can be zero. This causes a divide by zero when calculating the min ODM combine factor. Add a check to ensure they are valid first. Reviewed-by: Wenjing Liu Signed-off-by: Dillon Varone Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 29 +++++++++++---------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index a454d16e6586..1f53a9f0c0ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -152,7 +152,7 @@ uint32_t dc_bandwidth_in_kbps_from_timing( } /* Forward Declerations */ -static unsigned int get_min_slice_count_for_odm( +static unsigned int get_min_dsc_slice_count_for_odm( const struct display_stream_compressor *dsc, const struct dsc_enc_caps *dsc_enc_caps, const struct dc_crtc_timing *timing); @@ -466,7 +466,7 @@ bool dc_dsc_compute_bandwidth_range( struct dc_dsc_bw_range *range) { bool is_dsc_possible = false; - unsigned int min_slice_count; + unsigned int min_dsc_slice_count; struct dsc_enc_caps dsc_enc_caps; struct dsc_enc_caps dsc_common_caps; struct dc_dsc_config config = {0}; @@ -478,14 +478,14 @@ bool dc_dsc_compute_bandwidth_range( get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz); - min_slice_count = get_min_slice_count_for_odm(dsc, &dsc_enc_caps, timing); + min_dsc_slice_count = get_min_dsc_slice_count_for_odm(dsc, &dsc_enc_caps, timing); is_dsc_possible = intersect_dsc_caps(dsc_sink_caps, &dsc_enc_caps, timing->pixel_encoding, &dsc_common_caps); if (is_dsc_possible) is_dsc_possible = setup_dsc_config(dsc_sink_caps, &dsc_enc_caps, 0, timing, - &options, link_encoding, min_slice_count, &config); + &options, link_encoding, min_dsc_slice_count, &config); if (is_dsc_possible) is_dsc_possible = decide_dsc_bandwidth_range(min_bpp_x16, max_bpp_x16, @@ -593,14 +593,12 @@ static void build_dsc_enc_caps( struct dc *dc; - memset(&single_dsc_enc_caps, 0, sizeof(struct dsc_enc_caps)); - if (!dsc || !dsc->ctx || !dsc->ctx->dc || !dsc->funcs->dsc_get_single_enc_caps) return; dc = dsc->ctx->dc; - if (!dc->clk_mgr || !dc->clk_mgr->funcs->get_max_clock_khz || !dc->res_pool) + if (!dc->clk_mgr || !dc->clk_mgr->funcs->get_max_clock_khz || !dc->res_pool || dc->debug.disable_dsc) return; /* get max DSCCLK from clk_mgr */ @@ -634,7 +632,7 @@ static inline uint32_t dsc_div_by_10_round_up(uint32_t value) return (value + 9) / 10; } -static unsigned int get_min_slice_count_for_odm( +static unsigned int get_min_dsc_slice_count_for_odm( const struct display_stream_compressor *dsc, const struct dsc_enc_caps *dsc_enc_caps, const struct dc_crtc_timing *timing) @@ -651,6 +649,10 @@ static unsigned int get_min_slice_count_for_odm( } } + /* validate parameters */ + if (max_dispclk_khz == 0 || dsc_enc_caps->max_slice_width == 0) + return 1; + /* consider minimum odm slices required due to * 1) display pipe throughput (dispclk) * 2) max image width per slice @@ -669,13 +671,12 @@ static void get_dsc_enc_caps( { memset(dsc_enc_caps, 0, sizeof(struct dsc_enc_caps)); - if (!dsc) + if (!dsc || !dsc->ctx || !dsc->ctx->dc || dsc->ctx->dc->debug.disable_dsc) return; /* check if reported cap global or only for a single DCN DSC enc */ if (dsc->funcs->dsc_get_enc_caps) { - if (!dsc->ctx->dc->debug.disable_dsc) - dsc->funcs->dsc_get_enc_caps(dsc_enc_caps, pixel_clock_100Hz); + dsc->funcs->dsc_get_enc_caps(dsc_enc_caps, pixel_clock_100Hz); } else { build_dsc_enc_caps(dsc, dsc_enc_caps); } @@ -1295,10 +1296,10 @@ bool dc_dsc_compute_config( { bool is_dsc_possible = false; struct dsc_enc_caps dsc_enc_caps; - unsigned int min_slice_count; + unsigned int min_dsc_slice_count; get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz); - min_slice_count = get_min_slice_count_for_odm(dsc, &dsc_enc_caps, timing); + min_dsc_slice_count = get_min_dsc_slice_count_for_odm(dsc, &dsc_enc_caps, timing); is_dsc_possible = setup_dsc_config(dsc_sink_caps, &dsc_enc_caps, @@ -1306,7 +1307,7 @@ bool dc_dsc_compute_config( timing, options, link_encoding, - min_slice_count, + min_dsc_slice_count, dsc_cfg); return is_dsc_possible; } From c90f2e1172c51fa25492471dc9910e2d7c1444b9 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 15 Jul 2025 16:50:22 -0700 Subject: [PATCH 1175/2411] drm/amdgpu: Initialize data to NULL in imu_v12_0_program_rlc_ram() After a recent change in clang to expose uninitialized warnings from const variables and pointers [1], there is a warning in imu_v12_0_program_rlc_ram() because data is passed uninitialized to program_imu_rlc_ram(): drivers/gpu/drm/amd/amdgpu/imu_v12_0.c:374:30: error: variable 'data' is uninitialized when used here [-Werror,-Wuninitialized] 374 | program_imu_rlc_ram(adev, data, (const u32)size); | ^~~~ As this warning happens early in clang's frontend, it does not realize that due to the assignment of r to -EINVAL, program_imu_rlc_ram() is never actually called, and even if it were, data would not be dereferenced because size is 0. Just initialize data to NULL to silence the warning, as the commit that added program_imu_rlc_ram() mentioned it would eventually be used over the old method, at which point data can be properly initialized and used. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/2107 Fixes: 56159fffaab5 ("drm/amdgpu: use new method to program rlc ram") Link: https://github.com/llvm/llvm-project/commit/2464313eef01c5b1edf0eccf57a32cdee01472c7 [1] Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/imu_v12_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c index df898dbb746e..8cb6b1854d24 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c @@ -362,7 +362,7 @@ static void program_imu_rlc_ram(struct amdgpu_device *adev, static void imu_v12_0_program_rlc_ram(struct amdgpu_device *adev) { u32 reg_data, size = 0; - const u32 *data; + const u32 *data = NULL; int r = -EINVAL; WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, 0x2); From b4a69f7f29c8a459ad6b4d8a8b72450f1d9fd288 Mon Sep 17 00:00:00 2001 From: Peter Shkenev Date: Thu, 17 Jul 2025 23:48:17 +0300 Subject: [PATCH 1176/2411] drm/amdgpu: check if hubbub is NULL in debugfs/amdgpu_dm_capabilities HUBBUB structure is not initialized on DCE hardware, so check if it is NULL to avoid null dereference while accessing amdgpu_dm_capabilities file in debugfs. Signed-off-by: Peter Shkenev Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index c7d13e743e6c..b726bcd18e29 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -3988,7 +3988,7 @@ static int capabilities_show(struct seq_file *m, void *unused) struct hubbub *hubbub = dc->res_pool->hubbub; - if (hubbub->funcs->get_mall_en) + if (hubbub && hubbub->funcs->get_mall_en) hubbub->funcs->get_mall_en(hubbub, &mall_in_use); if (dc->cap_funcs.get_subvp_en) From 284d4dfe850e665f0e7d4dfaf4d3d3da76d11fb0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 24 Jun 2025 11:22:26 -0400 Subject: [PATCH 1177/2411] drm/amdgpu: track whether a queue is a kernel queue in amdgpu_mqd_prop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Used to to set the MQD appropriately for each queue type. Kernel queues have additional privileges. Acked-by: Christian König Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.16.x --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a1737556a77e..ef3af170dda4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -883,6 +883,7 @@ struct amdgpu_mqd_prop { uint64_t csa_addr; uint64_t fence_address; bool tmz_queue; + bool kernel_queue; }; struct amdgpu_mqd { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index a5c3f64cbce6..6379bb25bf5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -719,6 +719,7 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring, prop->eop_gpu_addr = ring->eop_gpu_addr; prop->use_doorbell = ring->use_doorbell; prop->doorbell_index = ring->doorbell_index; + prop->kernel_queue = true; /* map_queues packet doesn't need activate the queue, * so only kiq need set this field. From 1f02f2044bda1db1fd995bc35961ab075fa7b5a2 Mon Sep 17 00:00:00 2001 From: Gang Ba Date: Tue, 8 Jul 2025 14:36:13 -0400 Subject: [PATCH 1178/2411] drm/amdgpu: Avoid extra evict-restore process. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If vm belongs to another process, this is fclose after fork, wait may enable signaling KFD eviction fence and cause parent process queue evicted. [677852.634569] amdkfd_fence_enable_signaling+0x56/0x70 [amdgpu] [677852.634814] __dma_fence_enable_signaling+0x3e/0xe0 [677852.634820] dma_fence_wait_timeout+0x3a/0x140 [677852.634825] amddma_resv_wait_timeout+0x7f/0xf0 [amdkcl] [677852.634831] amdgpu_vm_wait_idle+0x2d/0x60 [amdgpu] [677852.635026] amdgpu_flush+0x34/0x50 [amdgpu] [677852.635208] filp_flush+0x38/0x90 [677852.635213] filp_close+0x14/0x30 [677852.635216] do_close_on_exec+0xdd/0x130 [677852.635221] begin_new_exec+0x1da/0x490 [677852.635225] load_elf_binary+0x307/0xea0 [677852.635231] ? srso_alias_return_thunk+0x5/0xfbef5 [677852.635235] ? ima_bprm_check+0xa2/0xd0 [677852.635240] search_binary_handler+0xda/0x260 [677852.635245] exec_binprm+0x58/0x1a0 [677852.635249] bprm_execve.part.0+0x16f/0x210 [677852.635254] bprm_execve+0x45/0x80 [677852.635257] do_execveat_common.isra.0+0x190/0x200 Suggested-by: Christian König Signed-off-by: Gang Ba Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d5c0637d7392..5cacf5717016 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2414,13 +2414,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, */ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, - DMA_RESV_USAGE_BOOKKEEP, - true, timeout); + timeout = drm_sched_entity_flush(&vm->immediate, timeout); if (timeout <= 0) return timeout; - return dma_fence_wait_timeout(vm->last_unlocked, true, timeout); + return drm_sched_entity_flush(&vm->delayed, timeout); } static void amdgpu_vm_destroy_task_info(struct kref *kref) From 9c2883057b3c861879b647f34e8bc448954e8729 Mon Sep 17 00:00:00 2001 From: Lauri Tirkkonen Date: Mon, 21 Jul 2025 09:59:40 +0900 Subject: [PATCH 1179/2411] drm/amd/display: fix initial backlight brightness calculation DIV_ROUND_CLOSEST(x, 100) returns either 0 or 1 if 0 Cc: stable@vger.kernel.org Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/aH2Q_HJvxKbW74vU@hacktheplanet.fi Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 096b23ad4845..468c9c5a6773 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4983,9 +4983,9 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) caps = &dm->backlight_caps[aconnector->bl_idx]; if (get_brightness_range(caps, &min, &max)) { if (power_supply_is_system_supplied() > 0) - props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps->ac_level, 100); + props.brightness = DIV_ROUND_CLOSEST((max - min) * caps->ac_level, 100); else - props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps->dc_level, 100); + props.brightness = DIV_ROUND_CLOSEST((max - min) * caps->dc_level, 100); /* min is zero, so max needs to be adjusted */ props.max_brightness = max - min; drm_dbg(drm, "Backlight caps: min: %d, max: %d, ac %d, dc %d\n", min, max, From dfe9707c075a365ccd1f82cceabdf6ab55a77b5f Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 21 Jul 2025 18:52:36 +0530 Subject: [PATCH 1180/2411] drm/amd/display: Fix misuse of /** to /* in 'dce_i2c_hw.c' Fix the comment style before cntl_stuck_hw_workaround() by replacing '/**' with '/*' since it is not a kdoc comment. Fixes the below with gcc W=1: display/dc/dce/dce_i2c_hw.c:380: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * If we boot without an HDMI display, the I2C engine does not get initialized Fixes: 04d57f4462a6 ("drm/amd/display: Workaround for stuck I2C arbitrage") Cc: Alvin Lee Cc: Dominik Kaszewski Cc: Ivan Lipski Cc: Harry Wentland Cc: Tom Chung Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c index 4e06468a6284..0421b267a0b5 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c @@ -377,10 +377,16 @@ static bool setup_engine( } /** + * cntl_stuck_hw_workaround - Workaround for I2C engine stuck state + * @dce_i2c_hw: Pointer to dce_i2c_hw structure + * * If we boot without an HDMI display, the I2C engine does not get initialized * correctly. One of its symptoms is that SW_USE_I2C does not get cleared after - * acquire, so that after setting SW_DONE_USING_I2C on release, the engine gets + * acquire. After setting SW_DONE_USING_I2C on release, the engine gets * immediately reacquired by SW, preventing DMUB from using it. + * + * This function checks the I2C arbitration status and applies a release + * workaround if necessary. */ static void cntl_stuck_hw_workaround(struct dce_i2c_hw *dce_i2c_hw) { From 8e0d1edb5c16732b695eaf4bd7096b1569817cf0 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 24 Jul 2025 15:16:18 +0800 Subject: [PATCH 1181/2411] drm/amd/amdgpu: fix missing lock for cper.ring->rptr/wptr access Add lock protection for 'ring->wptr'/'ring->rptr' to ensure the correct execution. Fixes: 8652920d2c00 ("drm/amdgpu: add mutex lock for cper ring") Signed-off-by: Yang Wang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c index 15dde1f50328..25252231a68a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c @@ -459,7 +459,7 @@ static u32 amdgpu_cper_ring_get_ent_sz(struct amdgpu_ring *ring, u64 pos) void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count) { - u64 pos, wptr_old, rptr = *ring->rptr_cpu_addr & ring->ptr_mask; + u64 pos, wptr_old, rptr; int rec_cnt_dw = count >> 2; u32 chunk, ent_sz; u8 *s = (u8 *)src; @@ -472,9 +472,11 @@ void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count) return; } - wptr_old = ring->wptr; - mutex_lock(&ring->adev->cper.ring_lock); + + wptr_old = ring->wptr; + rptr = *ring->rptr_cpu_addr & ring->ptr_mask; + while (count) { ent_sz = amdgpu_cper_ring_get_ent_sz(ring, ring->wptr); chunk = umin(ent_sz, count); From 0395cde08e1f7eee810b5799466e41635a21e599 Mon Sep 17 00:00:00 2001 From: Frank Min Date: Wed, 4 Jun 2025 21:39:34 +0800 Subject: [PATCH 1182/2411] drm/amdgpu: add kicker fws loading for gfx12/smu14/psp14 1. Add kicker firmwares loading for gfx12/smu14/psp14 2. Register additional MODULE_FIRMWARE entries for kicker fws - gc_12_0_1_rlc_kicker.bin - gc_12_0_1_imu_kicker.bin - psp_14_0_3_sos_kicker.bin - psp_14_0_3_ta_kicker.bin - smu_14_0_3_kicker.bin Signed-off-by: Frank Min Reviewed-by: Gui Chengming Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 14 ++++++++++---- drivers/gpu/drm/amd/amdgpu/imu_v12_0.c | 11 ++++++++--- drivers/gpu/drm/amd/amdgpu/psp_v14_0.c | 2 ++ drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c | 11 ++++++++--- 5 files changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index a0b50a8ac9c4..e96f24e9ad57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -32,6 +32,7 @@ static const struct kicker_device kicker_device_list[] = { {0x744B, 0x00}, + {0x7551, 0xC8} }; static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header *hdr) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 09bf72237d1d..3e138527d534 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -79,6 +79,7 @@ MODULE_FIRMWARE("amdgpu/gc_12_0_1_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_me.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc_kicker.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_toc.bin"); static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = { @@ -586,7 +587,7 @@ static int gfx_v12_0_init_toc_microcode(struct amdgpu_device *adev, const char * static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) { - char ucode_prefix[15]; + char ucode_prefix[30]; int err; const struct rlc_firmware_header_v2_0 *rlc_hdr; uint16_t version_major; @@ -613,9 +614,14 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); if (!amdgpu_sriov_vf(adev)) { - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, - AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_rlc.bin", ucode_prefix); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_rlc_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_rlc.bin", ucode_prefix); if (err) goto out; rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c index 8cb6b1854d24..58cd87db8061 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c @@ -34,12 +34,13 @@ MODULE_FIRMWARE("amdgpu/gc_12_0_0_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_imu_kicker.bin"); #define TRANSFER_RAM_MASK 0x001c0000 static int imu_v12_0_init_microcode(struct amdgpu_device *adev) { - char ucode_prefix[15]; + char ucode_prefix[30]; int err; const struct imu_firmware_header_v1_0 *imu_hdr; struct amdgpu_firmware_info *info = NULL; @@ -47,8 +48,12 @@ static int imu_v12_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_imu.bin", ucode_prefix); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c index 36ef4a72ad1d..38dfc5c19f2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -34,7 +34,9 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_2_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_5_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_5_ta.bin"); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index 76c1adda83db..f9b0938c57ea 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -62,13 +62,14 @@ const int decoded_link_width[8] = {0, 1, 2, 4, 8, 12, 16, 32}; MODULE_FIRMWARE("amdgpu/smu_14_0_2.bin"); MODULE_FIRMWARE("amdgpu/smu_14_0_3.bin"); +MODULE_FIRMWARE("amdgpu/smu_14_0_3_kicker.bin"); #define ENABLE_IMU_ARG_GFXOFF_ENABLE 1 int smu_v14_0_init_microcode(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - char ucode_prefix[15]; + char ucode_prefix[30]; int err = 0; const struct smc_firmware_header_v1_0 *hdr; const struct common_firmware_header *header; @@ -79,8 +80,12 @@ int smu_v14_0_init_microcode(struct smu_context *smu) return 0; amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s.bin", ucode_prefix); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); if (err) goto out; From 02f3ec53177243d32ee8b6f8ba99136d7887ee3a Mon Sep 17 00:00:00 2001 From: Roman Li Date: Mon, 14 Jul 2025 14:37:33 -0400 Subject: [PATCH 1183/2411] drm/amd/display: Disable dsc_power_gate for dcn314 by default [Why] "REG_WAIT timeout 1us * 1000 tries - dcn314_dsc_pg_control line" warnings seen after resuming from s2idle. DCN314 has issues with DSC power gating that cause REG_WAIT timeouts when attempting to power down DSC blocks. [How] Disable dsc_power_gate for dcn314 by default. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Roman Li Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index de708fdc1e80..663c49cce4aa 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -926,6 +926,7 @@ static const struct dc_debug_options debug_defaults_drv = { .seamless_boot_odm_combine = true, .enable_legacy_fast_update = true, .using_dml2 = false, + .disable_dsc_power_gate = true, }; static const struct dc_panel_config panel_config_defaults = { From a5ce8695d6d1b40d6960d2d298b579042c158f25 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sun, 6 Jul 2025 08:38:05 -0500 Subject: [PATCH 1184/2411] drm/amd/display: Avoid configuring PSR granularity if PSR-SU not supported [Why] If PSR-SU is disabled on the link, then configuring su_y granularity in mod_power_calc_psr_configs() can lead to assertions in psr_su_set_dsc_slice_height(). [How] Check the PSR version in amdgpu_dm_link_setup_psr() to determine whether or not to configure granularity. Reviewed-by: Sun peng (Leo) Li Signed-off-by: Mario Limonciello Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index f984cb0cb889..ff7b867ae98b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -119,8 +119,10 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream) psr_config.allow_multi_disp_optimizations = (amdgpu_dc_feature_mask & DC_PSR_ALLOW_MULTI_DISP_OPT); - if (!psr_su_set_dsc_slice_height(dc, link, stream, &psr_config)) - return false; + if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) { + if (!psr_su_set_dsc_slice_height(dc, link, stream, &psr_config)) + return false; + } ret = dc_link_setup_psr(link, stream, &psr_config, &psr_context); From b174084b3fe15ad1acc69530e673c1535d2e4f85 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 15 Jul 2025 14:41:46 -0500 Subject: [PATCH 1185/2411] drm/amd/display: Only finalize atomic_obj if it was initialized [Why] If amdgpu_dm failed to initalize before amdgpu_dm_initialize_drm_device() completed then freeing atomic_obj will lead to list corruption. [How] Check if atomic_obj state is initialized before trying to free. Reviewed-by: Harry Wentland Signed-off-by: Mario Limonciello Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 468c9c5a6773..5db0df2b0ecb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5410,7 +5410,8 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm) { - drm_atomic_private_obj_fini(&dm->atomic_obj); + if (dm->atomic_obj.state) + drm_atomic_private_obj_fini(&dm->atomic_obj); } /****************************************************************************** From 2b6943df54136f40aff8a6d7ba7c26724d89a0bd Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 15 May 2025 15:16:17 -0500 Subject: [PATCH 1186/2411] drm/amd/display: Pass up errors for reset GPU that fails to init HW [Why] If a GPU is in reset and the hardware fails to initialize the rest of the resume sequence shouldn't be run. [How] Pass error code up to caller of dm_resume(). Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5db0df2b0ecb..2a175fc0399c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3398,8 +3398,10 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) link_enc_cfg_copy(adev->dm.dc->current_state, dc_state); r = dm_dmub_hw_init(adev); - if (r) + if (r) { drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r); + return r; + } dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0); dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); From 2d418e4fd9f1eca7dfce80de86dd702d36a06a25 Mon Sep 17 00:00:00 2001 From: Ivan Lipski Date: Thu, 17 Jul 2025 13:58:35 -0400 Subject: [PATCH 1187/2411] drm/amd/display: Allow DCN301 to clear update flags [Why & How] Not letting DCN301 to clear after surface/stream update results in artifacts when switching between active overlay planes. The issue is known and has been solved initially. See below: (https://gitlab.freedesktop.org/drm/amd/-/issues/3441) Fixes: f354556e29f4 ("drm/amd/display: limit clear_update_flags t dcn32 and above") Reviewed-by: Mario Limonciello Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index c31f7f8e409f..28aca7017f0f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5443,7 +5443,8 @@ bool dc_update_planes_and_stream(struct dc *dc, else ret = update_planes_and_stream_v2(dc, srf_updates, surface_count, stream, stream_update); - if (ret && dc->ctx->dce_version >= DCN_VERSION_3_2) + if (ret && (dc->ctx->dce_version >= DCN_VERSION_3_2 || + dc->ctx->dce_version == DCN_VERSION_3_01)) clear_update_flags(srf_updates, surface_count, stream); return ret; From a0b34e4c8663b13e45c78267b4de3004b1a72490 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 18 Jul 2025 15:53:54 -0400 Subject: [PATCH 1188/2411] drm/amdgpu: update mmhub 4.1.0 client id mappings Update the client id mapping so the correct clients get printed when there is a mmhub page fault. Tested-by: David (Ming Qiang) Wu Reviewed-by: David (Ming Qiang) Wu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c | 34 +++++++++-------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c index f2ab5001b492..951998454b25 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c @@ -37,39 +37,31 @@ static const char *mmhub_client_ids_v4_1_0[][2] = { [0][0] = "VMC", [4][0] = "DCEDMC", - [5][0] = "DCEVGA", [6][0] = "MP0", [7][0] = "MP1", [8][0] = "MPIO", - [16][0] = "HDP", - [17][0] = "LSDMA", - [18][0] = "JPEG", - [19][0] = "VCNU0", - [21][0] = "VSCH", - [22][0] = "VCNU1", - [23][0] = "VCN1", - [32+20][0] = "VCN0", - [2][1] = "DBGUNBIO", + [16][0] = "LSDMA", + [17][0] = "JPEG", + [19][0] = "VCNU", + [22][0] = "VSCH", + [23][0] = "HDP", + [32+23][0] = "VCNRD", [3][1] = "DCEDWB", [4][1] = "DCEDMC", - [5][1] = "DCEVGA", [6][1] = "MP0", [7][1] = "MP1", [8][1] = "MPIO", [10][1] = "DBGU0", [11][1] = "DBGU1", - [12][1] = "DBGU2", - [13][1] = "DBGU3", + [12][1] = "DBGUNBIO", [14][1] = "XDP", [15][1] = "OSSSYS", - [16][1] = "HDP", - [17][1] = "LSDMA", - [18][1] = "JPEG", - [19][1] = "VCNU0", - [20][1] = "VCN0", - [21][1] = "VSCH", - [22][1] = "VCNU1", - [23][1] = "VCN1", + [16][1] = "LSDMA", + [17][1] = "JPEG", + [18][1] = "VCNWR", + [19][1] = "VCNU", + [22][1] = "VSCH", + [23][1] = "HDP", }; static uint32_t mmhub_v4_1_0_get_invalidate_req(unsigned int vmid, From a2f54ff15c3bdc0132e20aae041607e2320dbd73 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 28 Jul 2025 13:17:00 +0900 Subject: [PATCH 1189/2411] scsi: core: sysfs: Correct sysfs attributes access rights The SCSI sysfs attributes "supported_mode" and "active_mode" do not define a store method and thus cannot be modified. Correct the DEVICE_ATTR() call for these two attributes to not include S_IWUSR to allow write access as they are read-only. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20250728041700.76660-1-dlemoal@kernel.org Reviewed-by: John Garry Reviewed-by: Johannes Thumshin Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index d772258e29ad..e6464b998960 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -265,7 +265,7 @@ show_shost_supported_mode(struct device *dev, struct device_attribute *attr, return show_shost_mode(supported_mode, buf); } -static DEVICE_ATTR(supported_mode, S_IRUGO | S_IWUSR, show_shost_supported_mode, NULL); +static DEVICE_ATTR(supported_mode, S_IRUGO, show_shost_supported_mode, NULL); static ssize_t show_shost_active_mode(struct device *dev, @@ -279,7 +279,7 @@ show_shost_active_mode(struct device *dev, return show_shost_mode(shost->active_mode, buf); } -static DEVICE_ATTR(active_mode, S_IRUGO | S_IWUSR, show_shost_active_mode, NULL); +static DEVICE_ATTR(active_mode, S_IRUGO, show_shost_active_mode, NULL); static int check_reset_type(const char *str) { From a045246b684badf0545f252651bdc008b2e80835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Tue, 22 Jul 2025 15:56:15 +0300 Subject: [PATCH 1190/2411] drm/i915/display: Write PHY_CMN1_CONTROL only when using AUXLess ALPM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are seeing "dmesg-warn/abort - *ERROR* PHY * failed after 3 retries" since we started configuring LFPS sending. According to Bspec Configuring LFPS sending is needed only when using AUXLess ALPM. This patch avoids these failures by configuring LFPS sending only when using AUXLess ALPM. Bspec: 68849 Fixes: 9dc619680de4 ("drm/i915/display: Add function to configure LFPS sending") Signed-off-by: Jouni Högander Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20250722125618.1842615-2-jouni.hogander@intel.com (cherry picked from commit 8265ce0e0e15ba435eb2af72f2b821e203ebcdb9) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index ed8e640b96b0..2b0305bb04cd 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -3240,11 +3240,10 @@ void intel_lnl_mac_transmit_lfps(struct intel_encoder *encoder, { struct intel_display *display = to_intel_display(encoder); u8 owned_lane_mask = intel_cx0_get_owned_lane_mask(encoder); - bool enable = intel_alpm_is_alpm_aux_less(enc_to_intel_dp(encoder), - crtc_state); int i; - if (DISPLAY_VER(display) < 20) + if (DISPLAY_VER(display) < 20 || + !intel_alpm_is_alpm_aux_less(enc_to_intel_dp(encoder), crtc_state)) return; for (i = 0; i < 4; i++) { @@ -3256,8 +3255,7 @@ void intel_lnl_mac_transmit_lfps(struct intel_encoder *encoder, intel_cx0_rmw(encoder, lane_mask, PHY_CMN1_CONTROL(tx, 0), CONTROL0_MAC_TRANSMIT_LFPS, - enable ? CONTROL0_MAC_TRANSMIT_LFPS : 0, - MB_WRITE_COMMITTED); + CONTROL0_MAC_TRANSMIT_LFPS, MB_WRITE_COMMITTED); } } From c338923c4c8a89bb81f585732b9b49fcf9465cdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Tue, 22 Jul 2025 15:56:16 +0300 Subject: [PATCH 1191/2411] drm/i915/display: Avoid unnecessarily calling intel_cx0_get_owned_lane_mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we are always calling intel_cx0_get_owned_lane_mask when intel_lnl_mac_transmit_lfps is called. Avoid this in cases where it's not needed. Signed-off-by: Jouni Högander Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20250722125618.1842615-3-jouni.hogander@intel.com (cherry picked from commit d487ed7e2b4ab3126239ab93324405eb1e45ccf5) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index 2b0305bb04cd..2993dabdbee6 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -3239,13 +3239,14 @@ void intel_lnl_mac_transmit_lfps(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(encoder); - u8 owned_lane_mask = intel_cx0_get_owned_lane_mask(encoder); int i; + u8 owned_lane_mask; if (DISPLAY_VER(display) < 20 || !intel_alpm_is_alpm_aux_less(enc_to_intel_dp(encoder), crtc_state)) return; + owned_lane_mask = intel_cx0_get_owned_lane_mask(encoder); for (i = 0; i < 4; i++) { int tx = i % 2 + 1; u8 lane_mask = i < 2 ? INTEL_CX0_LANE0 : INTEL_CX0_LANE1; From 3eb63578d8d9bbaffc204a911cfae6763e895dfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Tue, 22 Jul 2025 15:56:17 +0300 Subject: [PATCH 1192/2411] drm/i915/display: Ensure phy is accessible on lfps configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure phy is accessible on lfps configuration by adding intel_cx0_phy_transaction_begin/end around it. Fixes: 9dc619680de4 ("drm/i915/display: Add function to configure LFPS sending") Suggested-by: Gustavo Sousa Signed-off-by: Jouni Högander Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20250722125618.1842615-4-jouni.hogander@intel.com (cherry picked from commit cf433f94f188782166598300c4c05274fd13c5a7) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index 2993dabdbee6..a203937d66db 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -3239,6 +3239,7 @@ void intel_lnl_mac_transmit_lfps(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(encoder); + intel_wakeref_t wakeref; int i; u8 owned_lane_mask; @@ -3247,6 +3248,9 @@ void intel_lnl_mac_transmit_lfps(struct intel_encoder *encoder, return; owned_lane_mask = intel_cx0_get_owned_lane_mask(encoder); + + wakeref = intel_cx0_phy_transaction_begin(encoder); + for (i = 0; i < 4; i++) { int tx = i % 2 + 1; u8 lane_mask = i < 2 ? INTEL_CX0_LANE0 : INTEL_CX0_LANE1; @@ -3258,6 +3262,8 @@ void intel_lnl_mac_transmit_lfps(struct intel_encoder *encoder, CONTROL0_MAC_TRANSMIT_LFPS, CONTROL0_MAC_TRANSMIT_LFPS, MB_WRITE_COMMITTED); } + + intel_cx0_phy_transaction_end(encoder, wakeref); } static u8 cx0_power_control_disable_val(struct intel_encoder *encoder) From 5a569ef4d4ab184a481dd8ecb58f464a89b44d2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Tue, 22 Jul 2025 15:56:18 +0300 Subject: [PATCH 1193/2411] drm/i915/display: Set C10_VDR_CTRL_MSGBUS_ACCESS before phy reg read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to C10 VDR Register programming sequence we need set C10_VDR_CTRL_MSGBUS_ACCESS before accessing PHY internal registers from MsgBus. v2: set C10_VDR_CTRL_MSGBUS_ACCESS once for all owned lanes Bspec: 68962 Fixes: 9dc619680de4 ("drm/i915/display: Add function to configure LFPS sending") Suggested-by: Gustavo Sousa Signed-off-by: Jouni Högander Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20250722125618.1842615-5-jouni.hogander@intel.com (cherry picked from commit 8921dce70d46e3156b5a0b21675f5ac90903d81d) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index a203937d66db..801235a5bc0a 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -3251,6 +3251,10 @@ void intel_lnl_mac_transmit_lfps(struct intel_encoder *encoder, wakeref = intel_cx0_phy_transaction_begin(encoder); + if (intel_encoder_is_c10phy(encoder)) + intel_cx0_rmw(encoder, owned_lane_mask, PHY_C10_VDR_CONTROL(1), 0, + C10_VDR_CTRL_MSGBUS_ACCESS, MB_WRITE_COMMITTED); + for (i = 0; i < 4; i++) { int tx = i % 2 + 1; u8 lane_mask = i < 2 ? INTEL_CX0_LANE0 : INTEL_CX0_LANE1; From 459779d04ae8dfd4083679a7bf9d72af165d1023 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 16 Jun 2025 15:28:56 +0900 Subject: [PATCH 1194/2411] block: Improve read ahead size for rotational devices For a device that does not advertize an optimal I/O size, the function blk_apply_bdi_limits() defaults to an initial setting of the ra_pages field of struct backing_dev_info to VM_READAHEAD_PAGES, that is, 128 KB. This low I/O size value is far from being optimal for hard-disk devices: when reading files from multiple contexts using buffered I/Os, the seek overhead between the small read commands generated to read-ahead multiple files will significantly limit the performance that can be achieved. This fact applies to all ATA devices as ATA does not define an optimal I/O size and the SCSI SAT specification does not define a default value to expose to the host. Modify blk_apply_bdi_limits() to use a device max_sectors limit to calculate the ra_pages field of struct backing_dev_info, when the device is a rotational one (BLK_FEAT_ROTATIONAL feature is set). For a SCSI disk, this defaults to 2560 KB, which significantly improve performance for buffered reads. Using XFS and sequentially reading randomly selected (large) files stored on a SATA HDD, the maximum throughput achieved with 8 readers reading files with 1MB buffered I/Os increases from 122 MB/s to 167 MB/s (+36%). The improvement is even larger when reading files using 128 KB buffered I/Os, with a throughput increasing from 57 MB/s to 165 MB/s (+189%). Signed-off-by: Damien Le Moal Reviewed-by: John Garry Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250616062856.1629897-1-dlemoal@kernel.org Signed-off-by: Jens Axboe --- block/blk-settings.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 91449147bae9..608df4674245 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -62,16 +62,24 @@ EXPORT_SYMBOL(blk_set_stacking_limits); void blk_apply_bdi_limits(struct backing_dev_info *bdi, struct queue_limits *lim) { + u64 io_opt = lim->io_opt; + /* * For read-ahead of large files to be effective, we need to read ahead - * at least twice the optimal I/O size. + * at least twice the optimal I/O size. For rotational devices that do + * not report an optimal I/O size (e.g. ATA HDDs), use the maximum I/O + * size to avoid falling back to the (rather inefficient) small default + * read-ahead size. * * There is no hardware limitation for the read-ahead size and the user * might have increased the read-ahead size through sysfs, so don't ever * decrease it. */ + if (!io_opt && (lim->features & BLK_FEAT_ROTATIONAL)) + io_opt = (u64)lim->max_sectors << SECTOR_SHIFT; + bdi->ra_pages = max3(bdi->ra_pages, - lim->io_opt * 2 / PAGE_SIZE, + io_opt * 2 >> PAGE_SHIFT, VM_READAHEAD_PAGES); bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT; } From 448dfecc7ff807822ecd47a5c052acedca7d09e8 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 29 Jul 2025 09:14:47 +0000 Subject: [PATCH 1195/2411] block: avoid possible overflow for chunk_sectors check in blk_stack_limits() In blk_stack_limits(), we check that the t->chunk_sectors value is a multiple of the t->physical_block_size value. However, by finding the chunk_sectors value in bytes, we may overflow the unsigned int which holds chunk_sectors, so change the check to be based on sectors. Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Signed-off-by: John Garry Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20250729091448.1691334-2-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- block/blk-settings.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 608df4674245..bb192d776bdb 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -857,7 +857,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, } /* chunk_sectors a multiple of the physical block size? */ - if ((t->chunk_sectors << 9) & (t->physical_block_size - 1)) { + if (t->chunk_sectors % (t->physical_block_size >> SECTOR_SHIFT)) { t->chunk_sectors = 0; t->flags |= BLK_FLAG_MISALIGNED; ret = -1; From 1da67b5b1754713b8ea0c3dd847e04790cffd91f Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 29 Jul 2025 09:14:48 +0000 Subject: [PATCH 1196/2411] block: Enforce power-of-2 physical block size The merging/splitting code and other queue limits checking depends on the physical block size being a power-of-2, so enforce it. Reviewed-by: Martin K. Petersen Signed-off-by: John Garry Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20250729091448.1691334-3-john.g.garry@oracle.com [axboe: add missing braces] Signed-off-by: Jens Axboe --- block/blk-settings.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index bb192d776bdb..a7a794baba72 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -320,8 +320,12 @@ int blk_validate_limits(struct queue_limits *lim) pr_warn("Invalid logical block size (%d)\n", lim->logical_block_size); return -EINVAL; } - if (lim->physical_block_size < lim->logical_block_size) + if (lim->physical_block_size < lim->logical_block_size) { lim->physical_block_size = lim->logical_block_size; + } else if (!is_power_of_2(lim->physical_block_size)) { + pr_warn("Invalid physical block size (%d)\n", lim->physical_block_size); + return -EINVAL; + } /* * The minimum I/O size defaults to the physical block size unless From 5421681bc3ef13476bd9443379cd69381e8760fa Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 29 Jul 2025 10:32:29 +0800 Subject: [PATCH 1197/2411] blk-ioc: don't hold queue_lock for ioc_lookup_icq() Currently issue io can grab queue_lock three times from bfq_bio_merge(), bfq_limit_depth() and bfq_prepare_request(), the queue_lock is not necessary if icq is already created because both queue and ioc can't be freed before io issuing is done, hence remove the unnecessary queue_lock and use rcu to protect radix tree lookup. Noted this is also a prep patch to support request batch dispatching[1]. [1] https://lore.kernel.org/all/20250722072431.610354-1-yukuai1@huaweicloud.com/ Signed-off-by: Yu Kuai Reviewed-by: Damien Le Moal Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20250729023229.2944898-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 18 ++---------------- block/blk-ioc.c | 16 ++++++---------- 2 files changed, 8 insertions(+), 26 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 0cb1e9873aab..f71ec0887733 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -454,17 +454,10 @@ static struct bfq_io_cq *icq_to_bic(struct io_cq *icq) */ static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q) { - struct bfq_io_cq *icq; - unsigned long flags; - if (!current->io_context) return NULL; - spin_lock_irqsave(&q->queue_lock, flags); - icq = icq_to_bic(ioc_lookup_icq(q)); - spin_unlock_irqrestore(&q->queue_lock, flags); - - return icq; + return icq_to_bic(ioc_lookup_icq(q)); } /* @@ -2457,15 +2450,8 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { struct bfq_data *bfqd = q->elevator->elevator_data; - struct request *free = NULL; - /* - * bfq_bic_lookup grabs the queue_lock: invoke it now and - * store its return value for later use, to avoid nesting - * queue_lock inside the bfqd->lock. We assume that the bic - * returned by bfq_bic_lookup does not go away before - * bfqd->lock is taken. - */ struct bfq_io_cq *bic = bfq_bic_lookup(q); + struct request *free = NULL; bool ret; spin_lock_irq(&bfqd->lock); diff --git a/block/blk-ioc.c b/block/blk-ioc.c index ce82770c72ab..9fda3906e5f5 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -308,24 +308,23 @@ int __copy_io(unsigned long clone_flags, struct task_struct *tsk) #ifdef CONFIG_BLK_ICQ /** - * ioc_lookup_icq - lookup io_cq from ioc + * ioc_lookup_icq - lookup io_cq from ioc in io issue path * @q: the associated request_queue * * Look up io_cq associated with @ioc - @q pair from @ioc. Must be called - * with @q->queue_lock held. + * from io issue path, either return NULL if current issue io to @q for the + * first time, or return a valid icq. */ struct io_cq *ioc_lookup_icq(struct request_queue *q) { struct io_context *ioc = current->io_context; struct io_cq *icq; - lockdep_assert_held(&q->queue_lock); - /* * icq's are indexed from @ioc using radix tree and hint pointer, - * both of which are protected with RCU. All removals are done - * holding both q and ioc locks, and we're holding q lock - if we - * find a icq which points to us, it's guaranteed to be valid. + * both of which are protected with RCU, io issue path ensures that + * both request_queue and current task are valid, the found icq + * is guaranteed to be valid until the io is done. */ rcu_read_lock(); icq = rcu_dereference(ioc->icq_hint); @@ -419,10 +418,7 @@ struct io_cq *ioc_find_get_icq(struct request_queue *q) task_unlock(current); } else { get_io_context(ioc); - - spin_lock_irq(&q->queue_lock); icq = ioc_lookup_icq(q); - spin_unlock_irq(&q->queue_lock); } if (!icq) { From 10dfd36f078423c51602a9a21ed85e8e6c947a00 Mon Sep 17 00:00:00 2001 From: Romain Gantois Date: Tue, 29 Jul 2025 11:50:57 +0200 Subject: [PATCH 1198/2411] regulator: core: correct convergence check in regulator_set_voltage() The logic in regulator_set_voltage() which checks for a non-convergence condition on a stepped regulator is flawed. regulator_set_voltage() checks if the error in target voltage has increased or decreased, and returns -EWOULDBLOCK if the error has not decreased enough. The correct non-convergence condition is: new_delta - delta > -rdev->constraints->max_uV_step or equivalently: delta - new_delta < rdev->constraints->max_uV_step But the currently used condition is: new_delta - delta > rdev->constraints->max_uV_step Which may cause an infinite loop if the voltage error doesn't converge. Fix this by correcting the convergence condition. Suggested-by: Jon Hunter Fixes: d511206dc7443 ("regulator: core: repeat voltage setting request for stepped regulators") Signed-off-by: Romain Gantois Link: https://patch.msgid.link/20250729-b4-regulator-stepping-fix-v1-1-3f7b8c55d7d7@bootlin.com Tested-by: Jon Hunter Reviewed-by: Jon Hunter Signed-off-by: Mark Brown --- drivers/regulator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 8ed9b96518cf..554d83c4af0c 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -3884,7 +3884,7 @@ static int regulator_set_voltage_unlocked(struct regulator *regulator, new_delta = ret; /* check that voltage is converging quickly enough */ - if (new_delta - delta > rdev->constraints->max_uV_step) { + if (delta - new_delta < rdev->constraints->max_uV_step) { ret = -EWOULDBLOCK; goto out; } From 926406a85ad895fbe6ee4577cdbc4f55245a0742 Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Tue, 29 Jul 2025 15:51:10 +0100 Subject: [PATCH 1199/2411] MAINTAINERS: Add entries for the RZ/V2H(P) RSPI Add the MAINTAINERS entries for the Renesas RZ/V2H(P) RSPI driver. Signed-off-by: Fabrizio Castro Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20250729145110.37258-1-fabrizio.castro.jz@renesas.com Signed-off-by: Mark Brown --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index addb26082fa0..b1c081f9c567 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21208,6 +21208,14 @@ S: Maintained F: Documentation/devicetree/bindings/net/renesas,rzv2h-gbeth.yaml F: drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c +RENESAS RZ/V2H(P) RSPI DRIVER +M: Fabrizio Castro +L: linux-spi@vger.kernel.org +L: linux-renesas-soc@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/spi/renesas,rzv2h-rspi.yaml +F: drivers/spi/spi-rzv2h-rspi.c + RENESAS RZ/V2H(P) USB2PHY PORT RESET DRIVER M: Fabrizio Castro M: Lad Prabhakar From 1d4c5dbba1a53aeaf2c6cc84e7ba94c436d18852 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 28 Jul 2025 09:45:44 -0700 Subject: [PATCH 1200/2411] f2fs: add gc_boost_gc_multiple sysfs node Add a sysfs knob to set a multiplier for the background GC migration window when F2FS Garbage Collection is boosted. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 7 +++++++ fs/f2fs/gc.c | 3 ++- fs/f2fs/gc.h | 1 + fs/f2fs/sysfs.c | 9 +++++++++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index c2a233f2a085..b0026d42dbe8 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -870,3 +870,10 @@ Description: This threshold is used to control triggering garbage collection whi reserved section before preallocating on pinned file. By default, the value is ovp_sections, especially, for zoned ufs, the value is 1. + +What: /sys/fs/f2fs//gc_boost_gc_multiple +Date: June 2025 +Contact: "Daeho Jeong" +Description: Set a multiplier for the background GC migration window when F2FS GC is + boosted. The range should be from 1 to the segment count in a section. + Default: 5 diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 18b9db2e98ba..a9d606b11a3b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -197,6 +197,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO; + gc_th->boost_gc_multiple = BOOST_GC_MULTIPLE; if (f2fs_sb_has_blkzoned(sbi)) { gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED; @@ -1750,7 +1751,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, !has_enough_free_blocks(sbi, sbi->gc_thread->boost_zoned_gc_percent)) window_granularity *= - BOOST_GC_MULTIPLE; + sbi->gc_thread->boost_gc_multiple; end_segno = start_segno + window_granularity; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 11fba7636af7..d7573108bb68 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -68,6 +68,7 @@ struct f2fs_gc_kthread { unsigned int no_zoned_gc_percent; unsigned int boost_zoned_gc_percent; unsigned int valid_thresh_ratio; + unsigned int boost_gc_multiple; }; struct gc_inode_list { diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index bdef926b3377..d11e8af7306d 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -852,6 +852,13 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } + if (!strcmp(a->attr.name, "gc_boost_gc_multiple")) { + if (t < 1 || t > SEGS_PER_SEC(sbi)) + return -EINVAL; + sbi->gc_thread->boost_gc_multiple = (unsigned int)t; + return count; + } + *ui = (unsigned int)t; return count; @@ -1078,6 +1085,7 @@ GC_THREAD_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); GC_THREAD_RW_ATTR(gc_no_zoned_gc_percent, no_zoned_gc_percent); GC_THREAD_RW_ATTR(gc_boost_zoned_gc_percent, boost_zoned_gc_percent); GC_THREAD_RW_ATTR(gc_valid_thresh_ratio, valid_thresh_ratio); +GC_THREAD_RW_ATTR(gc_boost_gc_multiple, boost_gc_multiple); /* SM_INFO ATTR */ SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments); @@ -1249,6 +1257,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_no_zoned_gc_percent), ATTR_LIST(gc_boost_zoned_gc_percent), ATTR_LIST(gc_valid_thresh_ratio), + ATTR_LIST(gc_boost_gc_multiple), ATTR_LIST(gc_idle), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), From c8705cefce44fbe85ca3b180dee0e0b5f3d51dc5 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 28 Jul 2025 10:04:30 -0700 Subject: [PATCH 1201/2411] f2fs: add gc_boost_gc_greedy sysfs node Add this to control GC algorithm for boost GC. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ fs/f2fs/gc.c | 3 ++- fs/f2fs/gc.h | 1 + fs/f2fs/sysfs.c | 9 +++++++++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index b0026d42dbe8..bc0e7fefc39d 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -877,3 +877,9 @@ Contact: "Daeho Jeong" Description: Set a multiplier for the background GC migration window when F2FS GC is boosted. The range should be from 1 to the segment count in a section. Default: 5 + +What: /sys/fs/f2fs//gc_boost_gc_greedy +Date: June 2025 +Contact: "Daeho Jeong" +Description: Control GC algorithm for boost GC. 0: cost benefit, 1: greedy + Default: 1 diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index a9d606b11a3b..098e9f71421e 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -141,7 +141,7 @@ static int gc_thread_func(void *data) FOREGROUND : BACKGROUND); sync_mode = (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) || - gc_control.one_time; + (gc_control.one_time && gc_th->boost_gc_greedy); /* foreground GC was been triggered via f2fs_balance_fs() */ if (foreground && !f2fs_sb_has_blkzoned(sbi)) @@ -198,6 +198,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO; gc_th->boost_gc_multiple = BOOST_GC_MULTIPLE; + gc_th->boost_gc_greedy = GC_GREEDY; if (f2fs_sb_has_blkzoned(sbi)) { gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED; diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index d7573108bb68..24e8b1c27acc 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -69,6 +69,7 @@ struct f2fs_gc_kthread { unsigned int boost_zoned_gc_percent; unsigned int valid_thresh_ratio; unsigned int boost_gc_multiple; + unsigned int boost_gc_greedy; }; struct gc_inode_list { diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index d11e8af7306d..f736052dea50 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -859,6 +859,13 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } + if (!strcmp(a->attr.name, "gc_boost_gc_greedy")) { + if (t > GC_GREEDY) + return -EINVAL; + sbi->gc_thread->boost_gc_greedy = (unsigned int)t; + return count; + } + *ui = (unsigned int)t; return count; @@ -1086,6 +1093,7 @@ GC_THREAD_RW_ATTR(gc_no_zoned_gc_percent, no_zoned_gc_percent); GC_THREAD_RW_ATTR(gc_boost_zoned_gc_percent, boost_zoned_gc_percent); GC_THREAD_RW_ATTR(gc_valid_thresh_ratio, valid_thresh_ratio); GC_THREAD_RW_ATTR(gc_boost_gc_multiple, boost_gc_multiple); +GC_THREAD_RW_ATTR(gc_boost_gc_greedy, boost_gc_greedy); /* SM_INFO ATTR */ SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments); @@ -1258,6 +1266,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_boost_zoned_gc_percent), ATTR_LIST(gc_valid_thresh_ratio), ATTR_LIST(gc_boost_gc_multiple), + ATTR_LIST(gc_boost_gc_greedy), ATTR_LIST(gc_idle), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), From 5dc50b111b40003ed83f74324e8d4023f01bd93e Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Tue, 29 Jul 2025 22:58:49 +0800 Subject: [PATCH 1202/2411] ALSA: hda: Fix the wrong register was used for DVC of TAS2770 The wrong register was used for digital volume control of TAS2770, The definition was changed, and usage was also updated. Fixes: ab29b3460c5c ("ALSA: hda: Add TAS2770 support") Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20250729145849.55057-1-baojun.xu@ti.com Signed-off-by: Takashi Iwai --- include/sound/tas2770-tlv.h | 4 ++-- sound/hda/codecs/side-codecs/tas2781_hda_i2c.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/sound/tas2770-tlv.h b/include/sound/tas2770-tlv.h index c0bd495b4a07..c7380925417a 100644 --- a/include/sound/tas2770-tlv.h +++ b/include/sound/tas2770-tlv.h @@ -14,10 +14,10 @@ #ifndef __TAS2770_TLV_H__ #define __TAS2770_TLV_H__ -#define TAS2770_DVC_LEVEL TASDEVICE_REG(0x0, 0x0, 0x17) +#define TAS2770_DVC_LEVEL TASDEVICE_REG(0x0, 0x0, 0x05) #define TAS2770_AMP_LEVEL TASDEVICE_REG(0x0, 0x0, 0x03) -static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2770_dvc_tlv, 1650, 50, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2770_dvc_tlv, -10000, 50, 0); static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2770_amp_tlv, 1100, 50, 0); #endif diff --git a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c index a0b132681804..45ac5e41bd4f 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c @@ -260,7 +260,7 @@ static const struct snd_kcontrol_new tas2770_snd_controls[] = { 0, 0, 20, 0, tas2781_amp_getvol, tas2781_amp_putvol, tas2770_amp_tlv), ACARD_SINGLE_RANGE_EXT_TLV("Speaker Digital Volume", TAS2770_DVC_LEVEL, - 0, 0, 31, 0, tas2781_amp_getvol, + 0, 0, 200, 1, tas2781_amp_getvol, tas2781_amp_putvol, tas2770_dvc_tlv), }; From 1a967e92bf47cf5170336b88d748117c700edc47 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 29 Jul 2025 14:10:35 +0900 Subject: [PATCH 1203/2411] tracing: Remove "__attribute__()" from the type field of event format With CONFIG_DEBUG_INFO_BTF=y and PAHOLE_HAS_BTF_TAG=y, `__user` is converted to `__attribute__((btf_type_tag("user")))`. In this case, some syscall events have it for __user data, like below; /sys/kernel/tracing # cat events/syscalls/sys_enter_openat/format name: sys_enter_openat ID: 720 format: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1; signed:0; field:int common_pid; offset:4; size:4; signed:1; field:int __syscall_nr; offset:8; size:4; signed:1; field:int dfd; offset:16; size:8; signed:0; field:const char __attribute__((btf_type_tag("user"))) * filename; offset:24; size:8; signed:0; field:int flags; offset:32; size:8; signed:0; field:umode_t mode; offset:40; size:8; signed:0; Then the trace event filter fails to set the string acceptable flag (FILTER_PTR_STRING) to the field and rejects setting string filter; # echo 'filename.ustring ~ "*ftracetest-dir.wbx24v*"' \ >> events/syscalls/sys_enter_openat/filter sh: write error: Invalid argument # cat error_log [ 723.743637] event filter parse error: error: Expecting numeric field Command: filename.ustring ~ "*ftracetest-dir.wbx24v*" Since this __attribute__ makes format parsing complicated and not needed, remove the __attribute__(.*) from the type string. Cc: Mathieu Desnoyers Link: https://lore.kernel.org/175376583493.1688759.12333973498014733551.stgit@mhiramat.tok.corp.google.com Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 28 +++++--- kernel/trace/trace.h | 4 +- kernel/trace/trace_events.c | 130 ++++++++++++++++++++++++++++++------ 3 files changed, 128 insertions(+), 34 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 06ab5b7a8711..945a8ecf2c62 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5930,17 +5930,27 @@ static inline void trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, int len) { } #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */ -static void trace_insert_eval_map(struct module *mod, - struct trace_eval_map **start, int len) +static void +trace_event_update_with_eval_map(struct module *mod, + struct trace_eval_map **start, + int len) { struct trace_eval_map **map; - if (len <= 0) - return; + /* Always run sanitizer only if btf_type_tag attr exists. */ + if (len <= 0) { + if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) && + IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) && + __has_attribute(btf_type_tag))) + return; + } map = start; - trace_event_eval_update(map, len); + trace_event_update_all(map, len); + + if (len <= 0) + return; trace_insert_eval_map_file(mod, start, len); } @@ -10334,7 +10344,7 @@ static void __init eval_map_work_func(struct work_struct *work) int len; len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps; - trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len); + trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len); } static int __init trace_eval_init(void) @@ -10387,9 +10397,6 @@ bool module_exists(const char *module) static void trace_module_add_evals(struct module *mod) { - if (!mod->num_trace_evals) - return; - /* * Modules with bad taint do not have events created, do * not bother with enums either. @@ -10397,7 +10404,8 @@ static void trace_module_add_evals(struct module *mod) if (trace_module_has_bad_taint(mod)) return; - trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals); + /* Even if no trace_evals, this need to sanitize field types. */ + trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals); } #ifdef CONFIG_TRACE_EVAL_MAP_FILE diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index bd084953a98b..1dbf1d3cf2f1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -2125,13 +2125,13 @@ static inline const char *get_syscall_name(int syscall) #ifdef CONFIG_EVENT_TRACING void trace_event_init(void); -void trace_event_eval_update(struct trace_eval_map **map, int len); +void trace_event_update_all(struct trace_eval_map **map, int len); /* Used from boot time tracer */ extern int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); extern int trigger_process_regex(struct trace_event_file *file, char *buff); #else static inline void __init trace_event_init(void) { } -static inline void trace_event_eval_update(struct trace_eval_map **map, int len) { } +static inline void trace_event_update_all(struct trace_eval_map **map, int len) { } #endif #ifdef CONFIG_TRACER_SNAPSHOT diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 6c0783fc4c2c..05447b958a1a 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3264,43 +3264,120 @@ static void add_str_to_module(struct module *module, char *str) list_add(&modstr->next, &module_strings); } +#define ATTRIBUTE_STR "__attribute__(" +#define ATTRIBUTE_STR_LEN (sizeof(ATTRIBUTE_STR) - 1) + +/* Remove all __attribute__() from @type. Return allocated string or @type. */ +static char *sanitize_field_type(const char *type) +{ + char *attr, *tmp, *next, *ret = (char *)type; + int depth; + + next = (char *)type; + while ((attr = strstr(next, ATTRIBUTE_STR))) { + /* Retry if "__attribute__(" is a part of another word. */ + if (attr != next && !isspace(attr[-1])) { + next = attr + ATTRIBUTE_STR_LEN; + continue; + } + + if (ret == type) { + ret = kstrdup(type, GFP_KERNEL); + if (WARN_ON_ONCE(!ret)) + return NULL; + attr = ret + (attr - type); + } + + /* the ATTRIBUTE_STR already has the first '(' */ + depth = 1; + next = attr + ATTRIBUTE_STR_LEN; + do { + tmp = strpbrk(next, "()"); + /* There is unbalanced parentheses */ + if (WARN_ON_ONCE(!tmp)) { + kfree(ret); + return (char *)type; + } + + if (*tmp == '(') + depth++; + else + depth--; + next = tmp + 1; + } while (depth > 0); + next = skip_spaces(next); + strcpy(attr, next); + next = attr; + } + return ret; +} + +static char *find_replacable_eval(const char *type, const char *eval_string, + int len) +{ + char *ptr; + + if (!eval_string) + return NULL; + + ptr = strchr(type, '['); + if (!ptr) + return NULL; + ptr++; + + if (!isalpha(*ptr) && *ptr != '_') + return NULL; + + if (strncmp(eval_string, ptr, len) != 0) + return NULL; + + return ptr; +} + static void update_event_fields(struct trace_event_call *call, struct trace_eval_map *map) { struct ftrace_event_field *field; + const char *eval_string = NULL; struct list_head *head; + int len = 0; char *ptr; char *str; - int len = strlen(map->eval_string); /* Dynamic events should never have field maps */ - if (WARN_ON_ONCE(call->flags & TRACE_EVENT_FL_DYNAMIC)) + if (call->flags & TRACE_EVENT_FL_DYNAMIC) return; + if (map) { + eval_string = map->eval_string; + len = strlen(map->eval_string); + } + head = trace_get_fields(call); list_for_each_entry(field, head, link) { - ptr = strchr(field->type, '['); - if (!ptr) - continue; - ptr++; - - if (!isalpha(*ptr) && *ptr != '_') - continue; - - if (strncmp(map->eval_string, ptr, len) != 0) - continue; - - str = kstrdup(field->type, GFP_KERNEL); - if (WARN_ON_ONCE(!str)) + str = sanitize_field_type(field->type); + if (!str) return; - ptr = str + (ptr - field->type); - ptr = eval_replace(ptr, map, len); - /* enum/sizeof string smaller than value */ - if (WARN_ON_ONCE(!ptr)) { - kfree(str); - continue; + + ptr = find_replacable_eval(str, eval_string, len); + if (ptr) { + if (str == field->type) { + str = kstrdup(field->type, GFP_KERNEL); + if (WARN_ON_ONCE(!str)) + return; + ptr = str + (ptr - field->type); + } + + ptr = eval_replace(ptr, map, len); + /* enum/sizeof string smaller than value */ + if (WARN_ON_ONCE(!ptr)) { + kfree(str); + continue; + } } + if (str == field->type) + continue; /* * If the event is part of a module, then we need to free the string * when the module is removed. Otherwise, it will stay allocated @@ -3310,14 +3387,18 @@ static void update_event_fields(struct trace_event_call *call, add_str_to_module(call->module, str); field->type = str; + if (field->filter_type == FILTER_OTHER) + field->filter_type = filter_assign_type(field->type); } } -void trace_event_eval_update(struct trace_eval_map **map, int len) +/* Update all events for replacing eval and sanitizing */ +void trace_event_update_all(struct trace_eval_map **map, int len) { struct trace_event_call *call, *p; const char *last_system = NULL; bool first = false; + bool updated; int last_i; int i; @@ -3330,6 +3411,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len) last_system = call->class->system; } + updated = false; /* * Since calls are grouped by systems, the likelihood that the * next call in the iteration belongs to the same system as the @@ -3349,8 +3431,12 @@ void trace_event_eval_update(struct trace_eval_map **map, int len) } update_event_printk(call, map[i]); update_event_fields(call, map[i]); + updated = true; } } + /* If not updated yet, update field for sanitizing. */ + if (!updated) + update_event_fields(call, NULL); cond_resched(); } up_write(&trace_event_sem); From 6cff20ce3b92ffbf2fc5eb9e5a030b3672aa414a Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 13 Jul 2025 16:31:01 +0200 Subject: [PATCH 1204/2411] PCI/ACPI: Fix runtime PM ref imbalance on Hot-Plug Capable ports pci_bridge_d3_possible() is called from both pcie_portdrv_probe() and pcie_portdrv_remove() to determine whether runtime power management shall be enabled (on probe) or disabled (on remove) on a PCIe port. The underlying assumption is that pci_bridge_d3_possible() always returns the same value, else a runtime PM reference imbalance would occur. That assumption is not given if the PCIe port is inaccessible on remove due to hot-unplug: pci_bridge_d3_possible() calls pciehp_is_native(), which accesses Config Space to determine whether the port is Hot-Plug Capable. An inaccessible port returns "all ones", which is converted to "all zeroes" by pcie_capability_read_dword(). Hence the port no longer seems Hot-Plug Capable on remove even though it was on probe. The resulting runtime PM ref imbalance causes warning messages such as: pcieport 0000:02:04.0: Runtime PM usage count underflow! Avoid the Config Space access (and thus the runtime PM ref imbalance) by caching the Hot-Plug Capable bit in struct pci_dev. The struct already contains an "is_hotplug_bridge" flag, which however is not only set on Hot-Plug Capable PCIe ports, but also Conventional PCI Hot-Plug bridges and ACPI slots. The flag identifies bridges which are allocated additional MMIO and bus number resources to allow for hierarchy expansion. The kernel is somewhat sloppily using "is_hotplug_bridge" in a number of places to identify Hot-Plug Capable PCIe ports, even though the flag encompasses other devices. Subsequent commits replace these occurrences with the new flag to clearly delineate Hot-Plug Capable PCIe ports from other kinds of hotplug bridges. Document the existing "is_hotplug_bridge" and the new "is_pciehp" flag and document the (non-obvious) requirement that pci_bridge_d3_possible() always returns the same value across the entire lifetime of a bridge, including its hot-removal. Fixes: 5352a44a561d ("PCI: pciehp: Make pciehp_is_native() stricter") Reported-by: Laurent Bigonville Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220216 Reported-by: Mario Limonciello Closes: https://lore.kernel.org/r/20250609020223.269407-3-superm1@kernel.org/ Link: https://lore.kernel.org/all/20250620025535.3425049-3-superm1@kernel.org/T/#u Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki Cc: stable@vger.kernel.org # v4.18+ Link: https://patch.msgid.link/fe5dcc3b2e62ee1df7905d746bde161eb1b3291c.1752390101.git.lukas@wunner.de --- drivers/pci/pci-acpi.c | 4 +--- drivers/pci/pci.c | 6 +++++- drivers/pci/probe.c | 2 +- include/linux/pci.h | 6 ++++++ 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index b78e0e417324..efe478e5073e 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -816,13 +816,11 @@ int pci_acpi_program_hp_params(struct pci_dev *dev) bool pciehp_is_native(struct pci_dev *bridge) { const struct pci_host_bridge *host; - u32 slot_cap; if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) return false; - pcie_capability_read_dword(bridge, PCI_EXP_SLTCAP, &slot_cap); - if (!(slot_cap & PCI_EXP_SLTCAP_HPC)) + if (!bridge->is_pciehp) return false; if (pcie_ports_native) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e9448d55113b..23d8fe98ddf9 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3030,8 +3030,12 @@ static const struct dmi_system_id bridge_d3_blacklist[] = { * pci_bridge_d3_possible - Is it possible to put the bridge into D3 * @bridge: Bridge to check * - * This function checks if it is possible to move the bridge to D3. * Currently we only allow D3 for some PCIe ports and for Thunderbolt. + * + * Return: Whether it is possible to move the bridge to D3. + * + * The return value is guaranteed to be constant across the entire lifetime + * of the bridge, including its hot-removal. */ bool pci_bridge_d3_possible(struct pci_dev *bridge) { diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4b8693ec9e4c..cf50be63bf5f 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1678,7 +1678,7 @@ void set_pcie_hotplug_bridge(struct pci_dev *pdev) pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, ®32); if (reg32 & PCI_EXP_SLTCAP_HPC) - pdev->is_hotplug_bridge = 1; + pdev->is_hotplug_bridge = pdev->is_pciehp = 1; } static void set_pcie_thunderbolt(struct pci_dev *dev) diff --git a/include/linux/pci.h b/include/linux/pci.h index 05e68f35f392..d56d0dd80afb 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -328,6 +328,11 @@ struct rcec_ea; * determined (e.g., for Root Complex Integrated * Endpoints without the relevant Capability * Registers). + * @is_hotplug_bridge: Hotplug bridge of any kind (e.g. PCIe Hot-Plug Capable, + * Conventional PCI Hot-Plug, ACPI slot). + * Such bridges are allocated additional MMIO and bus + * number resources to allow for hierarchy expansion. + * @is_pciehp: PCIe Hot-Plug Capable bridge. */ struct pci_dev { struct list_head bus_list; /* Node in per-bus list */ @@ -451,6 +456,7 @@ struct pci_dev { unsigned int is_physfn:1; unsigned int is_virtfn:1; unsigned int is_hotplug_bridge:1; + unsigned int is_pciehp:1; unsigned int shpc_managed:1; /* SHPC owned by shpchp */ unsigned int is_thunderbolt:1; /* Thunderbolt controller */ /* From 1d60796a62f327cd9e0a6a0865ded7656d2c67f9 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 13 Jul 2025 16:31:02 +0200 Subject: [PATCH 1205/2411] PCI/portdrv: Use is_pciehp instead of is_hotplug_bridge The PCIe port driver erroneously creates a subdevice for hotplug on ACPI slots which are handled by the ACPI hotplug driver. Avoid by checking the is_pciehp flag instead of is_hotplug_bridge when deciding whether to create a subdevice. The latter encompasses ACPI slots whereas the former doesn't. The superfluous subdevice has no real negative impact, it occupies memory and interrupt resources but otherwise just sits there waiting for interrupts from the slot that are never signaled. Fixes: f8415222837b ("PCI: Use cached copy of PCI_EXP_SLTCAP_HPC bit") Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v4.7+ Link: https://patch.msgid.link/40d5a5fe8d40595d505949c620a067fa110ee85e.1752390102.git.lukas@wunner.de --- drivers/pci/pcie/portdrv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pcie/portdrv.c b/drivers/pci/pcie/portdrv.c index e8318fd5f6ed..d1b68c18444f 100644 --- a/drivers/pci/pcie/portdrv.c +++ b/drivers/pci/pcie/portdrv.c @@ -220,7 +220,7 @@ static int get_port_device_capability(struct pci_dev *dev) struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); int services = 0; - if (dev->is_hotplug_bridge && + if (dev->is_pciehp && (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) && (pcie_ports_native || host->native_pcie_hotplug)) { From c6036c33947d7ff7454b163ac24e565a445f4d46 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 13 Jul 2025 16:31:03 +0200 Subject: [PATCH 1206/2411] PCI: pciehp: Use is_pciehp instead of is_hotplug_bridge The PCIe hotplug driver calculates the depth of a nested hotplug port by looking at the is_hotplug_bridge flag. The depth is used as lockdep class to tell hotplug ports apart. The is_hotplug_bridge flag encompasses ACPI slots handled by the ACPI hotplug driver, hence the calculated depth may be too high. Avoid by checking the is_pciehp flag instead. This glitch likely has no user-visible impact: ACPI slots typically only exist at the Root Port level, not in nested hotplug hierarchies. Also, CONFIG_LOCKDEP is usually only used by developers. So this is just for the sake of correctness. Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/59a097376a2bb493da9efd66fb196ae4b66f8a09.1752390102.git.lukas@wunner.de --- drivers/pci/hotplug/pciehp_hpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index ebd342bda235..d783da1dbd24 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -995,7 +995,7 @@ static inline int pcie_hotplug_depth(struct pci_dev *dev) while (bus->parent) { bus = bus->parent; - if (bus->self && bus->self->is_hotplug_bridge) + if (bus->self && bus->self->is_pciehp) depth++; } From c2f9de5e2db29158a8caa86a37aa479488e4ba43 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 13 Jul 2025 16:31:04 +0200 Subject: [PATCH 1207/2411] PCI: Move is_pciehp check out of pciehp_is_native() pci_bridge_d3_possible() seeks to forbid runtime power management on: * Non Hot-Plug Capable PCIe ports which are nevertheless ACPI slots (recognizable as: bridge->is_hotplug_bridge && !bridge->is_pciehp) * Hot-Plug Capable PCIe ports for which platform firmware has not granted PCIe Native Hot-Plug control to the operating system (recognizable as: bridge->is_pciehp && !pciehp_is_native(bridge)) Somewhat confusingly, the check for is_hotplug_bridge is in pci_bridge_d3_possible(), whereas the one for is_pciehp is in pciehp_is_native(). For clarity, check is_pciehp directly in pci_bridge_d3_possible() (and in the other caller of pciehp_is_native(), hotplug_is_native()). Rephrase the code comment preceding these checks to no longer mention "System Management Mode", which is an x86 term inappropriate in generic PCI code. Likewise no longer mention "Thunderbolt on non-Macs", because there is nothing Thunderbolt-specific about these checks. It used to be the case that non-Macs relied on the platform for Thunderbolt tunnel management and hotplug, but they've since moved to OS-native tunnel management (as Macs always have), hence the code comment is no longer accurate. There is a subsequent check for is_hotplug_bridge further down in pci_bridge_d3_possible(). Change the check to is_pciehp because any ports matching "bridge->is_hotplug_bridge && !bridge->is_pciehp" are already filtered out at the top of the function. Do the same for another check in acpi_pci_bridge_d3(), which is called from pci_bridge_d3_possible() via platform_pci_bridge_d3(). No functional change intended. Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/18b2c2110ad0f27a34b189d793310b9c4f2f24a0.1752390102.git.lukas@wunner.de --- drivers/pci/pci-acpi.c | 5 +---- drivers/pci/pci.c | 12 ++++++++---- include/linux/pci_hotplug.h | 3 ++- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index efe478e5073e..ed7ed66a595b 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -820,9 +820,6 @@ bool pciehp_is_native(struct pci_dev *bridge) if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) return false; - if (!bridge->is_pciehp) - return false; - if (pcie_ports_native) return true; @@ -1000,7 +997,7 @@ bool acpi_pci_bridge_d3(struct pci_dev *dev) struct acpi_device *adev, *rpadev; const union acpi_object *obj; - if (acpi_pci_disabled || !dev->is_hotplug_bridge) + if (acpi_pci_disabled || !dev->is_pciehp) return false; adev = ACPI_COMPANION(&dev->dev); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 23d8fe98ddf9..749994dad9dc 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3050,10 +3050,14 @@ bool pci_bridge_d3_possible(struct pci_dev *bridge) return false; /* - * Hotplug ports handled by firmware in System Management Mode - * may not be put into D3 by the OS (Thunderbolt on non-Macs). + * Hotplug ports handled by platform firmware may not be put + * into D3 by the OS, e.g. ACPI slots ... */ - if (bridge->is_hotplug_bridge && !pciehp_is_native(bridge)) + if (bridge->is_hotplug_bridge && !bridge->is_pciehp) + return false; + + /* ... or PCIe hotplug ports not handled natively by the OS. */ + if (bridge->is_pciehp && !pciehp_is_native(bridge)) return false; if (pci_bridge_d3_force) @@ -3072,7 +3076,7 @@ bool pci_bridge_d3_possible(struct pci_dev *bridge) * by vendors for runtime D3 at least until 2018 because there * was no OS support. */ - if (bridge->is_hotplug_bridge) + if (bridge->is_pciehp) return false; if (dmi_check_system(bridge_d3_blacklist)) diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index ec77ccf1fc4d..ddf79641917f 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -104,6 +104,7 @@ static inline bool shpchp_is_native(struct pci_dev *bridge) { return true; } static inline bool hotplug_is_native(struct pci_dev *bridge) { - return pciehp_is_native(bridge) || shpchp_is_native(bridge); + return (bridge->is_pciehp && pciehp_is_native(bridge)) || + shpchp_is_native(bridge); } #endif From 71753c6ed2bf2aee5be26c1bc06a94c9e3713ade Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 29 Jul 2025 14:23:05 -0400 Subject: [PATCH 1208/2411] unwind_user: Add user space unwinding API with frame pointer support Introduce a generic API for unwinding user stacks. In order to expand user space unwinding to be able to handle more complex scenarios, such as deferred unwinding and reading user space information, create a generic interface that all architectures can use that support the various unwinding methods. This is an alternative method for handling user space stack traces from the simple stack_trace_save_user() API. This does not replace that interface, but this interface will be used to expand the functionality of user space stack walking. None of the structures introduced will be exposed to user space tooling. Support for frame pointer unwinding is added. For an architecture to support frame pointer unwinding it needs to enable CONFIG_HAVE_UNWIND_USER_FP and define ARCH_INIT_USER_FP_FRAME. By encoding the frame offsets in struct unwind_user_frame, much of this code can also be reused for future unwinder implementations like sframe. Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Linus Torvalds Cc: Andrew Morton Cc: Jens Axboe Cc: Florian Weimer Cc: Sam James Link: https://lore.kernel.org/20250729182404.975790139@kernel.org Reviewed-by: Jens Remus Signed-off-by: Josh Poimboeuf Co-developed-by: Mathieu Desnoyers Link: https://lore.kernel.org/all/20250710164301.3094-2-mathieu.desnoyers@efficios.com/ Signed-off-by: Mathieu Desnoyers Co-developed-by: Steven Rostedt (Google) Signed-off-by: Steven Rostedt (Google) --- MAINTAINERS | 8 ++ arch/Kconfig | 7 ++ include/asm-generic/Kbuild | 1 + include/asm-generic/unwind_user.h | 5 ++ include/linux/unwind_user.h | 14 ++++ include/linux/unwind_user_types.h | 44 ++++++++++ kernel/Makefile | 1 + kernel/unwind/Makefile | 1 + kernel/unwind/user.c | 128 ++++++++++++++++++++++++++++++ 9 files changed, 209 insertions(+) create mode 100644 include/asm-generic/unwind_user.h create mode 100644 include/linux/unwind_user.h create mode 100644 include/linux/unwind_user_types.h create mode 100644 kernel/unwind/Makefile create mode 100644 kernel/unwind/user.c diff --git a/MAINTAINERS b/MAINTAINERS index fad6cb025a19..370d780fd5f8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -25928,6 +25928,14 @@ F: Documentation/driver-api/uio-howto.rst F: drivers/uio/ F: include/linux/uio_driver.h +USERSPACE STACK UNWINDING +M: Josh Poimboeuf +M: Steven Rostedt +S: Maintained +F: include/linux/unwind*.h +F: kernel/unwind/ + + UTIL-LINUX PACKAGE M: Karel Zak L: util-linux@vger.kernel.org diff --git a/arch/Kconfig b/arch/Kconfig index a3308a220f86..8e3fd723bd74 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -435,6 +435,13 @@ config HAVE_HARDLOCKUP_DETECTOR_ARCH It uses the same command line parameters, and sysctl interface, as the generic hardlockup detectors. +config UNWIND_USER + bool + +config HAVE_UNWIND_USER_FP + bool + select UNWIND_USER + config HAVE_PERF_REGS bool help diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 8675b7b4ad23..295c94a3ccc1 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -59,6 +59,7 @@ mandatory-y += tlbflush.h mandatory-y += topology.h mandatory-y += trace_clock.h mandatory-y += uaccess.h +mandatory-y += unwind_user.h mandatory-y += vermagic.h mandatory-y += vga.h mandatory-y += video.h diff --git a/include/asm-generic/unwind_user.h b/include/asm-generic/unwind_user.h new file mode 100644 index 000000000000..b8882b909944 --- /dev/null +++ b/include/asm-generic/unwind_user.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_UNWIND_USER_H +#define _ASM_GENERIC_UNWIND_USER_H + +#endif /* _ASM_GENERIC_UNWIND_USER_H */ diff --git a/include/linux/unwind_user.h b/include/linux/unwind_user.h new file mode 100644 index 000000000000..7f7282516bf5 --- /dev/null +++ b/include/linux/unwind_user.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_H +#define _LINUX_UNWIND_USER_H + +#include +#include + +#ifndef ARCH_INIT_USER_FP_FRAME + #define ARCH_INIT_USER_FP_FRAME +#endif + +int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries); + +#endif /* _LINUX_UNWIND_USER_H */ diff --git a/include/linux/unwind_user_types.h b/include/linux/unwind_user_types.h new file mode 100644 index 000000000000..a449f15be890 --- /dev/null +++ b/include/linux/unwind_user_types.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_TYPES_H +#define _LINUX_UNWIND_USER_TYPES_H + +#include + +/* + * Unwind types, listed in priority order: lower numbers are attempted first if + * available. + */ +enum unwind_user_type_bits { + UNWIND_USER_TYPE_FP_BIT = 0, + + NR_UNWIND_USER_TYPE_BITS, +}; + +enum unwind_user_type { + /* Type "none" for the start of stack walk iteration. */ + UNWIND_USER_TYPE_NONE = 0, + UNWIND_USER_TYPE_FP = BIT(UNWIND_USER_TYPE_FP_BIT), +}; + +struct unwind_stacktrace { + unsigned int nr; + unsigned long *entries; +}; + +struct unwind_user_frame { + s32 cfa_off; + s32 ra_off; + s32 fp_off; + bool use_fp; +}; + +struct unwind_user_state { + unsigned long ip; + unsigned long sp; + unsigned long fp; + enum unwind_user_type current_type; + unsigned int available_types; + bool done; +}; + +#endif /* _LINUX_UNWIND_USER_TYPES_H */ diff --git a/kernel/Makefile b/kernel/Makefile index 32e80dd626af..541186050251 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -55,6 +55,7 @@ obj-y += rcu/ obj-y += livepatch/ obj-y += dma/ obj-y += entry/ +obj-y += unwind/ obj-$(CONFIG_MODULES) += module/ obj-$(CONFIG_KCMP) += kcmp.o diff --git a/kernel/unwind/Makefile b/kernel/unwind/Makefile new file mode 100644 index 000000000000..349ce3677526 --- /dev/null +++ b/kernel/unwind/Makefile @@ -0,0 +1 @@ + obj-$(CONFIG_UNWIND_USER) += user.o diff --git a/kernel/unwind/user.c b/kernel/unwind/user.c new file mode 100644 index 000000000000..97a8415e3216 --- /dev/null +++ b/kernel/unwind/user.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +/* +* Generic interfaces for unwinding user space +*/ +#include +#include +#include +#include +#include + +static const struct unwind_user_frame fp_frame = { + ARCH_INIT_USER_FP_FRAME +}; + +#define for_each_user_frame(state) \ + for (unwind_user_start(state); !(state)->done; unwind_user_next(state)) + +static int unwind_user_next_fp(struct unwind_user_state *state) +{ + const struct unwind_user_frame *frame = &fp_frame; + unsigned long cfa, fp, ra; + unsigned int shift; + + if (frame->use_fp) { + if (state->fp < state->sp) + return -EINVAL; + cfa = state->fp; + } else { + cfa = state->sp; + } + + /* Get the Canonical Frame Address (CFA) */ + cfa += frame->cfa_off; + + /* stack going in wrong direction? */ + if (cfa <= state->sp) + return -EINVAL; + + /* Make sure that the address is word aligned */ + shift = sizeof(long) == 4 ? 2 : 3; + if (cfa & ((1 << shift) - 1)) + return -EINVAL; + + /* Find the Return Address (RA) */ + if (get_user(ra, (unsigned long *)(cfa + frame->ra_off))) + return -EINVAL; + + if (frame->fp_off && get_user(fp, (unsigned long __user *)(cfa + frame->fp_off))) + return -EINVAL; + + state->ip = ra; + state->sp = cfa; + if (frame->fp_off) + state->fp = fp; + return 0; +} + +static int unwind_user_next(struct unwind_user_state *state) +{ + unsigned long iter_mask = state->available_types; + unsigned int bit; + + if (state->done) + return -EINVAL; + + for_each_set_bit(bit, &iter_mask, NR_UNWIND_USER_TYPE_BITS) { + enum unwind_user_type type = BIT(bit); + + state->current_type = type; + switch (type) { + case UNWIND_USER_TYPE_FP: + if (!unwind_user_next_fp(state)) + return 0; + continue; + default: + WARN_ONCE(1, "Undefined unwind bit %d", bit); + break; + } + break; + } + + /* No successful unwind method. */ + state->current_type = UNWIND_USER_TYPE_NONE; + state->done = true; + return -EINVAL; +} + +static int unwind_user_start(struct unwind_user_state *state) +{ + struct pt_regs *regs = task_pt_regs(current); + + memset(state, 0, sizeof(*state)); + + if ((current->flags & PF_KTHREAD) || !user_mode(regs)) { + state->done = true; + return -EINVAL; + } + + if (IS_ENABLED(CONFIG_HAVE_UNWIND_USER_FP)) + state->available_types |= UNWIND_USER_TYPE_FP; + + state->ip = instruction_pointer(regs); + state->sp = user_stack_pointer(regs); + state->fp = frame_pointer(regs); + + return 0; +} + +int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries) +{ + struct unwind_user_state state; + + trace->nr = 0; + + if (!max_entries) + return -EINVAL; + + if (current->flags & PF_KTHREAD) + return 0; + + for_each_user_frame(&state) { + trace->entries[trace->nr++] = state.ip; + if (trace->nr >= max_entries) + break; + } + + return 0; +} From 5e32d0f15cc5c843a4115c4644d984d42524c794 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 29 Jul 2025 14:23:06 -0400 Subject: [PATCH 1209/2411] unwind_user/deferred: Add unwind_user_faultable() Add a new API to retrieve a user space callstack called unwind_user_faultable(). The difference between this user space stack tracer from the current user space stack tracer is that this must be called from faultable context as it may use routines to access user space data that needs to be faulted in. It can be safely called from entering or exiting a system call as the code can still be faulted in there. This code is based on work by Josh Poimboeuf's deferred unwinding code: Link: https://lore.kernel.org/all/6052e8487746603bdb29b65f4033e739092d9925.1737511963.git.jpoimboe@kernel.org/ Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Linus Torvalds Cc: Andrew Morton Cc: Jens Axboe Cc: Florian Weimer Cc: Sam James Link: https://lore.kernel.org/20250729182405.147896868@kernel.org Reviewed-by: Jens Remus Signed-off-by: Steven Rostedt (Google) --- include/linux/sched.h | 5 +++ include/linux/unwind_deferred.h | 24 +++++++++++ include/linux/unwind_deferred_types.h | 9 ++++ kernel/fork.c | 4 ++ kernel/unwind/Makefile | 2 +- kernel/unwind/deferred.c | 60 +++++++++++++++++++++++++++ 6 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 include/linux/unwind_deferred.h create mode 100644 include/linux/unwind_deferred_types.h create mode 100644 kernel/unwind/deferred.c diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f78a64beb52..59fdf7d9bb1e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -46,6 +46,7 @@ #include #include #include +#include #include /* task_struct member predeclarations (sorted alphabetically): */ @@ -1654,6 +1655,10 @@ struct task_struct { struct user_event_mm *user_event_mm; #endif +#ifdef CONFIG_UNWIND_USER + struct unwind_task_info unwind_info; +#endif + /* CPU-specific state of this task: */ struct thread_struct thread; diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h new file mode 100644 index 000000000000..a5f6e8f8a1a2 --- /dev/null +++ b/include/linux/unwind_deferred.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_DEFERRED_H +#define _LINUX_UNWIND_USER_DEFERRED_H + +#include +#include + +#ifdef CONFIG_UNWIND_USER + +void unwind_task_init(struct task_struct *task); +void unwind_task_free(struct task_struct *task); + +int unwind_user_faultable(struct unwind_stacktrace *trace); + +#else /* !CONFIG_UNWIND_USER */ + +static inline void unwind_task_init(struct task_struct *task) {} +static inline void unwind_task_free(struct task_struct *task) {} + +static inline int unwind_user_faultable(struct unwind_stacktrace *trace) { return -ENOSYS; } + +#endif /* !CONFIG_UNWIND_USER */ + +#endif /* _LINUX_UNWIND_USER_DEFERRED_H */ diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h new file mode 100644 index 000000000000..aa32db574e43 --- /dev/null +++ b/include/linux/unwind_deferred_types.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H +#define _LINUX_UNWIND_USER_DEFERRED_TYPES_H + +struct unwind_task_info { + unsigned long *entries; +}; + +#endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 1ee8eb11f38b..3341d50c61f2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -105,6 +105,7 @@ #include #include #include +#include #include #include @@ -732,6 +733,7 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(refcount_read(&tsk->usage)); WARN_ON(tsk == current); + unwind_task_free(tsk); sched_ext_free(tsk); io_uring_free(tsk); cgroup_free(tsk); @@ -2135,6 +2137,8 @@ __latent_entropy struct task_struct *copy_process( p->bpf_ctx = NULL; #endif + unwind_task_init(p); + /* Perform scheduler related setup. Assign this task to a CPU. */ retval = sched_fork(clone_flags, p); if (retval) diff --git a/kernel/unwind/Makefile b/kernel/unwind/Makefile index 349ce3677526..eae37bea54fd 100644 --- a/kernel/unwind/Makefile +++ b/kernel/unwind/Makefile @@ -1 +1 @@ - obj-$(CONFIG_UNWIND_USER) += user.o + obj-$(CONFIG_UNWIND_USER) += user.o deferred.o diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c new file mode 100644 index 000000000000..a0badbeb3cc1 --- /dev/null +++ b/kernel/unwind/deferred.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Deferred user space unwinding + */ +#include +#include +#include +#include + +#define UNWIND_MAX_ENTRIES 512 + +/** + * unwind_user_faultable - Produce a user stacktrace in faultable context + * @trace: The descriptor that will store the user stacktrace + * + * This must be called in a known faultable context (usually when entering + * or exiting user space). Depending on the available implementations + * the @trace will be loaded with the addresses of the user space stacktrace + * if it can be found. + * + * Return: 0 on success and negative on error + * On success @trace will contain the user space stacktrace + */ +int unwind_user_faultable(struct unwind_stacktrace *trace) +{ + struct unwind_task_info *info = ¤t->unwind_info; + + /* Should always be called from faultable context */ + might_fault(); + + if (current->flags & PF_EXITING) + return -EINVAL; + + if (!info->entries) { + info->entries = kmalloc_array(UNWIND_MAX_ENTRIES, sizeof(long), + GFP_KERNEL); + if (!info->entries) + return -ENOMEM; + } + + trace->nr = 0; + trace->entries = info->entries; + unwind_user(trace, UNWIND_MAX_ENTRIES); + + return 0; +} + +void unwind_task_init(struct task_struct *task) +{ + struct unwind_task_info *info = &task->unwind_info; + + memset(info, 0, sizeof(*info)); +} + +void unwind_task_free(struct task_struct *task) +{ + struct unwind_task_info *info = &task->unwind_info; + + kfree(info->entries); +} From ffdd20555cc6fcb15e8a57d442c458034d169c7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Le=20Goffic?= Date: Fri, 4 Jul 2025 10:39:16 +0200 Subject: [PATCH 1210/2411] i2c: stm32f7: support i2c_*_dma_safe_msg_buf APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `i2c_*_dma_safe_msg_buf` APIs operate on a `struct i2c_msg`. The get operation make sure the I2C buffer is DMA'able according to its buffer length, or if the memory use is DMA coherent for example and return a valid pointer for safe DMA access to be used. The put operation release the pointer. Prefer using generic API's than relying on private tests. Acked-by: Alain Volmat Signed-off-by: Clément Le Goffic Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250704-i2c-upstream-v4-3-84a095a2c728@foss.st.com --- drivers/i2c/busses/i2c-stm32f7.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index c8b4b404f6c1..e6815f6cae78 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -742,11 +742,14 @@ static void stm32f7_i2c_dma_callback(void *arg) { struct stm32f7_i2c_dev *i2c_dev = arg; struct stm32_i2c_dma *dma = i2c_dev->dma; + struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg; stm32f7_i2c_disable_dma_req(i2c_dev); dmaengine_terminate_async(dma->chan_using); dma_unmap_single(i2c_dev->dev, dma->dma_buf, dma->dma_len, dma->dma_data_dir); + if (!f7_msg->smbus) + i2c_put_dma_safe_msg_buf(f7_msg->buf, i2c_dev->msg, true); complete(&dma->dma_complete); } @@ -882,6 +885,7 @@ static void stm32f7_i2c_xfer_msg(struct stm32f7_i2c_dev *i2c_dev, { struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg; void __iomem *base = i2c_dev->base; + u8 *dma_buf; u32 cr1, cr2; int ret; @@ -931,17 +935,23 @@ static void stm32f7_i2c_xfer_msg(struct stm32f7_i2c_dev *i2c_dev, /* Configure DMA or enable RX/TX interrupt */ i2c_dev->use_dma = false; - if (i2c_dev->dma && f7_msg->count >= STM32F7_I2C_DMA_LEN_MIN - && !i2c_dev->atomic) { - ret = stm32_i2c_prep_dma_xfer(i2c_dev->dev, i2c_dev->dma, - msg->flags & I2C_M_RD, - f7_msg->count, f7_msg->buf, - stm32f7_i2c_dma_callback, - i2c_dev); - if (!ret) - i2c_dev->use_dma = true; - else - dev_warn(i2c_dev->dev, "can't use DMA\n"); + if (i2c_dev->dma && !i2c_dev->atomic) { + dma_buf = i2c_get_dma_safe_msg_buf(msg, STM32F7_I2C_DMA_LEN_MIN); + if (dma_buf) { + f7_msg->buf = dma_buf; + ret = stm32_i2c_prep_dma_xfer(i2c_dev->dev, i2c_dev->dma, + msg->flags & I2C_M_RD, + f7_msg->count, f7_msg->buf, + stm32f7_i2c_dma_callback, + i2c_dev); + if (ret) { + dev_warn(i2c_dev->dev, "can't use DMA\n"); + i2c_put_dma_safe_msg_buf(f7_msg->buf, msg, false); + f7_msg->buf = msg->buf; + } else { + i2c_dev->use_dma = true; + } + } } if (!i2c_dev->use_dma) { From 635bf3c8853359a987c5c909d424df92a0d3016a Mon Sep 17 00:00:00 2001 From: Akhil R Date: Thu, 10 Jul 2025 18:42:05 +0530 Subject: [PATCH 1211/2411] i2c: tegra: Use internal reset when reset property is not available For controllers that has an internal software reset, make the reset property optional. This provides and option to use I2C in systems that choose to restrict reset control from Linux or not to implement the ACPI _RST method. Internal reset was not required when the reset control was mandatory. But on platforms where the resets are outside the control of Linux, this had to be implemented by just returning success from BPMP or with an empty _RST method in the ACPI table, basically ignoring the reset. While the internal reset is not identical to the hard reset of the controller, this will reset all the internal state of the controller including FIFOs. This may slightly alter the behaviour in systems which were ignoring the reset but it should not cause any functional difference since all the required I2C registers are configured after this reset, just as in boot. Considering that this sequence is hit during the boot or during the I2C recovery path from an error, the internal reset provides a better alternative than just ignoring the reset. Signed-off-by: Akhil R Reviewed-by: Andy Shevchenko Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250710131206.2316-3-akhilrajeev@nvidia.com --- drivers/i2c/busses/i2c-tegra.c | 44 +++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 4f05afab161f..6088510b25f6 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -134,6 +134,8 @@ #define I2C_MST_FIFO_STATUS_TX GENMASK(23, 16) #define I2C_MST_FIFO_STATUS_RX GENMASK(7, 0) +#define I2C_MASTER_RESET_CNTRL 0x0a8 + /* configuration load timeout in microseconds */ #define I2C_CONFIG_LOAD_TIMEOUT 1000000 @@ -184,6 +186,9 @@ enum msg_end_type { * @has_mst_fifo: The I2C controller contains the new MST FIFO interface that * provides additional features and allows for longer messages to * be transferred in one go. + * @has_mst_reset: The I2C controller contains MASTER_RESET_CTRL register which + * provides an alternative to controller reset when configured as + * I2C master * @quirks: I2C adapter quirks for limiting write/read transfer size and not * allowing 0 length transfers. * @supports_bus_clear: Bus Clear support to recover from bus hang during @@ -213,6 +218,7 @@ struct tegra_i2c_hw_feature { bool has_multi_master_mode; bool has_slcg_override_reg; bool has_mst_fifo; + bool has_mst_reset; const struct i2c_adapter_quirks *quirks; bool supports_bus_clear; bool has_apb_dma; @@ -605,12 +611,42 @@ static int tegra_i2c_wait_for_config_load(struct tegra_i2c_dev *i2c_dev) return 0; } +static int tegra_i2c_master_reset(struct tegra_i2c_dev *i2c_dev) +{ + if (!i2c_dev->hw->has_mst_reset) + return -EOPNOTSUPP; + + /* + * Writing 1 to I2C_MASTER_RESET_CNTRL will reset all internal state of + * Master logic including FIFOs. Clear this bit to 0 for normal operation. + * SW needs to wait for 2us after assertion and de-assertion of this soft + * reset. + */ + i2c_writel(i2c_dev, 0x1, I2C_MASTER_RESET_CNTRL); + fsleep(2); + + i2c_writel(i2c_dev, 0x0, I2C_MASTER_RESET_CNTRL); + fsleep(2); + + return 0; +} + static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) { u32 val, clk_divisor, clk_multiplier, tsu_thd, tlow, thigh, non_hs_mode; struct i2c_timings *t = &i2c_dev->timings; int err; + /* + * Reset the controller before initializing it. + * In case if device_reset() returns -ENOENT, i.e. when the reset is + * not available, the internal software reset will be used if it is + * supported by the controller. + */ + err = device_reset(i2c_dev->dev); + if (err == -ENOENT) + err = tegra_i2c_master_reset(i2c_dev); + /* * The reset shouldn't ever fail in practice. The failure will be a * sign of a severe problem that needs to be resolved. Still we don't @@ -619,7 +655,6 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) * emit a noisy warning on error, which won't stay unnoticed and * won't hose machine entirely. */ - err = device_reset(i2c_dev->dev); WARN_ON_ONCE(err); if (IS_DVC(i2c_dev)) @@ -1468,6 +1503,7 @@ static const struct tegra_i2c_hw_feature tegra20_i2c_hw = { .has_multi_master_mode = false, .has_slcg_override_reg = false, .has_mst_fifo = false, + .has_mst_reset = false, .quirks = &tegra_i2c_quirks, .supports_bus_clear = false, .has_apb_dma = true, @@ -1492,6 +1528,7 @@ static const struct tegra_i2c_hw_feature tegra30_i2c_hw = { .has_multi_master_mode = false, .has_slcg_override_reg = false, .has_mst_fifo = false, + .has_mst_reset = false, .quirks = &tegra_i2c_quirks, .supports_bus_clear = false, .has_apb_dma = true, @@ -1516,6 +1553,7 @@ static const struct tegra_i2c_hw_feature tegra114_i2c_hw = { .has_multi_master_mode = false, .has_slcg_override_reg = false, .has_mst_fifo = false, + .has_mst_reset = false, .quirks = &tegra_i2c_quirks, .supports_bus_clear = true, .has_apb_dma = true, @@ -1540,6 +1578,7 @@ static const struct tegra_i2c_hw_feature tegra124_i2c_hw = { .has_multi_master_mode = false, .has_slcg_override_reg = true, .has_mst_fifo = false, + .has_mst_reset = false, .quirks = &tegra_i2c_quirks, .supports_bus_clear = true, .has_apb_dma = true, @@ -1564,6 +1603,7 @@ static const struct tegra_i2c_hw_feature tegra210_i2c_hw = { .has_multi_master_mode = false, .has_slcg_override_reg = true, .has_mst_fifo = false, + .has_mst_reset = false, .quirks = &tegra_i2c_quirks, .supports_bus_clear = true, .has_apb_dma = true, @@ -1588,6 +1628,7 @@ static const struct tegra_i2c_hw_feature tegra186_i2c_hw = { .has_multi_master_mode = false, .has_slcg_override_reg = true, .has_mst_fifo = false, + .has_mst_reset = false, .quirks = &tegra_i2c_quirks, .supports_bus_clear = true, .has_apb_dma = false, @@ -1612,6 +1653,7 @@ static const struct tegra_i2c_hw_feature tegra194_i2c_hw = { .has_multi_master_mode = true, .has_slcg_override_reg = true, .has_mst_fifo = true, + .has_mst_reset = true, .quirks = &tegra194_i2c_quirks, .supports_bus_clear = true, .has_apb_dma = false, From 315b40df66c8f1d7be8056d9d418bb6976747389 Mon Sep 17 00:00:00 2001 From: Akhil R Date: Thu, 10 Jul 2025 18:42:06 +0530 Subject: [PATCH 1212/2411] i2c: tegra: Remove dma_sync_*() calls Calling dma_sync_*() on a buffer from dma_alloc_coherent() is pointless. The driver should not be doing its own bounce-buffering if the buffer is allocated through dma_alloc_coherent(). Suggested-by: Robin Murphy Signed-off-by: Akhil R Reviewed-by: Thierry Reding Reviewed-by: Andy Shevchenko Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250710131206.2316-4-akhilrajeev@nvidia.com --- drivers/i2c/busses/i2c-tegra.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 6088510b25f6..4eb31b913c1a 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1301,17 +1301,9 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (i2c_dev->dma_mode) { if (i2c_dev->msg_read) { - dma_sync_single_for_device(i2c_dev->dma_dev, - i2c_dev->dma_phys, - xfer_size, DMA_FROM_DEVICE); - err = tegra_i2c_dma_submit(i2c_dev, xfer_size); if (err) return err; - } else { - dma_sync_single_for_cpu(i2c_dev->dma_dev, - i2c_dev->dma_phys, - xfer_size, DMA_TO_DEVICE); } } @@ -1321,11 +1313,6 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (i2c_dev->dma_mode) { memcpy(i2c_dev->dma_buf + I2C_PACKET_HEADER_SIZE, msg->buf, i2c_dev->msg_len); - - dma_sync_single_for_device(i2c_dev->dma_dev, - i2c_dev->dma_phys, - xfer_size, DMA_TO_DEVICE); - err = tegra_i2c_dma_submit(i2c_dev, xfer_size); if (err) return err; @@ -1366,13 +1353,8 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, return -ETIMEDOUT; } - if (i2c_dev->msg_read && i2c_dev->msg_err == I2C_ERR_NONE) { - dma_sync_single_for_cpu(i2c_dev->dma_dev, - i2c_dev->dma_phys, - xfer_size, DMA_FROM_DEVICE); - + if (i2c_dev->msg_read && i2c_dev->msg_err == I2C_ERR_NONE) memcpy(i2c_dev->msg_buf, i2c_dev->dma_buf, i2c_dev->msg_len); - } } time_left = tegra_i2c_wait_completion(i2c_dev, &i2c_dev->msg_complete, From f632472a2ab42536df720464098e52eb1c6b57ea Mon Sep 17 00:00:00 2001 From: Nick Chan Date: Tue, 10 Jun 2025 21:45:20 +0800 Subject: [PATCH 1213/2411] dt-bindings: i2c: apple,i2c: Document Apple A7-A11, T2 compatibles The I2C controllers found on Apple A7-A11, T2 SoCs are compatible with the existing driver so add their per-SoC compatibles. Signed-off-by: Nick Chan Reviewed-by: Sven Peter Acked-by: Conor Dooley Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250610-i2c-no-t2-v2-1-a5a71080fba9@gmail.com --- Documentation/devicetree/bindings/i2c/apple,i2c.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/i2c/apple,i2c.yaml b/Documentation/devicetree/bindings/i2c/apple,i2c.yaml index 077d2a539c83..fed3e1b8c43f 100644 --- a/Documentation/devicetree/bindings/i2c/apple,i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/apple,i2c.yaml @@ -22,6 +22,11 @@ properties: compatible: items: - enum: + - apple,s5l8960x-i2c + - apple,t7000-i2c + - apple,s8000-i2c + - apple,t8010-i2c + - apple,t8015-i2c - apple,t8103-i2c - apple,t8112-i2c - apple,t6000-i2c From 85c34532849dae0fdcf880900ac9d7718a73fd1b Mon Sep 17 00:00:00 2001 From: Kathiravan Thirumoorthy Date: Tue, 13 May 2025 16:38:33 +0530 Subject: [PATCH 1214/2411] i2c: qcom-geni: fix I2C frequency table to achieve accurate bus rates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the I2C frequency table to match the recommended values specified in the I2C hardware programming guide. In the current IPQ5424 configuration where 32MHz is the source clock, the I2C bus frequencies do not meet expectations—for instance, 363KHz is achieved instead of the expected 400KHz. Fixes: 506bb2ab0075 ("i2c: qcom-geni: Support systems with 32MHz serial engine clock") Signed-off-by: Kathiravan Thirumoorthy Cc: # v6.13+ Reviewed-by: Mukesh Kumar Savaliya Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250513-i2c-bus-freq-v1-1-9a333ad5757f@oss.qualcomm.com --- drivers/i2c/busses/i2c-qcom-geni.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 13889f52b6f7..ff2289b52c84 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -155,9 +155,9 @@ static const struct geni_i2c_clk_fld geni_i2c_clk_map_19p2mhz[] = { /* source_clock = 32 MHz */ static const struct geni_i2c_clk_fld geni_i2c_clk_map_32mhz[] = { - { I2C_MAX_STANDARD_MODE_FREQ, 8, 14, 18, 40 }, - { I2C_MAX_FAST_MODE_FREQ, 4, 3, 11, 20 }, - { I2C_MAX_FAST_MODE_PLUS_FREQ, 2, 3, 6, 15 }, + { I2C_MAX_STANDARD_MODE_FREQ, 8, 14, 18, 38 }, + { I2C_MAX_FAST_MODE_FREQ, 4, 3, 9, 19 }, + { I2C_MAX_FAST_MODE_PLUS_FREQ, 2, 3, 5, 15 }, {} }; From 956048a3cd9d2575032e2c7ca62803677357ae18 Mon Sep 17 00:00:00 2001 From: Edip Hazuri Date: Tue, 29 Jul 2025 21:18:48 +0300 Subject: [PATCH 1215/2411] ALSA: hda/realtek - Fix mute LED for HP Victus 16-s0xxx The mute led on this laptop is using ALC245 but requires a quirk to work This patch enables the existing quirk for the device. Tested on Victus 16-S0063NT Laptop. The LED behaviour works as intended. Cc: Signed-off-by: Edip Hazuri Link: https://patch.msgid.link/20250729181848.24432-2-edip@medip.dev Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 33ef08d251d6..bc95caeec41a 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6528,6 +6528,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8bbe, "HP Victus 16-r0xxx (MB 8BBE)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8bc8, "HP Victus 15-fa1xxx", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8bcd, "HP Omen 16-xd0xxx", ALC245_FIXUP_HP_MUTE_LED_V1_COEFBIT), + SND_PCI_QUIRK(0x103c, 0x8bd4, "HP Victus 16-s0xxx (MB 8BD4)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8bdd, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8bde, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8bdf, "HP Envy 15", ALC287_FIXUP_CS35L41_I2C_2), From a9dec0963187d05725369156a5e0e14cd3487bfb Mon Sep 17 00:00:00 2001 From: Edip Hazuri Date: Tue, 29 Jul 2025 21:18:50 +0300 Subject: [PATCH 1216/2411] ALSA: hda/realtek - Fix mute LED for HP Victus 16-d1xxx (MB 8A26) My friend have Victus 16-d1xxx with board ID 8A26, the existing quirk for Victus 16-d1xxx wasn't working because of different board ID Tested on Victus 16-d1015nt Laptop. The LED behaviour works as intended. Cc: Signed-off-by: Edip Hazuri Link: https://patch.msgid.link/20250729181848.24432-4-edip@medip.dev Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index bc95caeec41a..2554b42eeb0f 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6470,6 +6470,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8a0f, "HP Pavilion 14-ec1xxx", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8a20, "HP Laptop 15s-fq5xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8a25, "HP Victus 16-d1xxx (MB 8A25)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), + SND_PCI_QUIRK(0x103c, 0x8a26, "HP Victus 16-d1xxx (MB 8A26)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8a28, "HP Envy 13", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8a29, "HP Envy 15", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8a2a, "HP Envy 15", ALC287_FIXUP_CS35L41_I2C_2), From 8d452accd1380e1cb0b15a9876bcd19b14c5fabb Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Wed, 30 Jul 2025 14:40:54 +0800 Subject: [PATCH 1217/2411] ASoC: wm8962: Clear master mode when enter runtime suspend The enabled master mode causes power consumption to increase in idle state. Clear the MSTR bit in runtime supsend and recover it in runtime resume to reduce power. Signed-off-by: Shengjiu Wang Reviewed-by: Charles Keepax Link: https://patch.msgid.link/20250730064054.3006409-1-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm8962.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index d69aa8b15629..27b4326429a0 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -82,6 +82,7 @@ struct wm8962_priv { #endif int irq; + bool master_flag; }; /* We can't use the same notifier block for more than one supply and @@ -2715,6 +2716,7 @@ static int wm8962_set_dai_sysclk(struct snd_soc_dai *dai, int clk_id, static int wm8962_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) { struct snd_soc_component *component = dai->component; + struct wm8962_priv *wm8962 = snd_soc_component_get_drvdata(component); int aif0 = 0; switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { @@ -2761,9 +2763,11 @@ static int wm8962_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } + wm8962->master_flag = false; switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { case SND_SOC_DAIFMT_CBP_CFP: aif0 |= WM8962_MSTR; + wm8962->master_flag = true; break; case SND_SOC_DAIFMT_CBC_CFC: break; @@ -3903,6 +3907,9 @@ static int wm8962_runtime_resume(struct device *dev) WM8962_BIAS_ENA | WM8962_VMID_SEL_MASK, WM8962_BIAS_ENA | 0x180); + if (wm8962->master_flag) + regmap_update_bits(wm8962->regmap, WM8962_AUDIO_INTERFACE_0, + WM8962_MSTR, WM8962_MSTR); msleep(5); return 0; @@ -3916,6 +3923,10 @@ static int wm8962_runtime_suspend(struct device *dev) { struct wm8962_priv *wm8962 = dev_get_drvdata(dev); + if (wm8962->master_flag) + regmap_update_bits(wm8962->regmap, WM8962_AUDIO_INTERFACE_0, + WM8962_MSTR, 0); + regmap_update_bits(wm8962->regmap, WM8962_PWR_MGMT_1, WM8962_VMID_SEL_MASK | WM8962_BIAS_ENA, 0); From 8936125e232803e64cb29e107326a942981188d6 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 25 Jul 2025 17:52:52 +0800 Subject: [PATCH 1218/2411] apparmor: Remove the unused variable rules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Variable rules is not effectively used, so delete it. security/apparmor/lsm.c:182:23: warning: variable ‘rules’ set but not used. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=22942 Signed-off-by: Jiapeng Chong Signed-off-by: John Johansen --- security/apparmor/lsm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index cecbb985928f..9a64b2db0267 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -179,10 +179,8 @@ static int apparmor_capget(const struct task_struct *target, kernel_cap_t *effec struct label_it i; label_for_each_confined(i, label, profile) { - struct aa_ruleset *rules; kernel_cap_t allowed; - rules = profile->label.rules[0]; allowed = aa_profile_capget(profile); *effective = cap_intersect(*effective, allowed); *permitted = cap_intersect(*permitted, allowed); From f3c0675bb9e0a3a472dd519ec7ccde23bdcf180b Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 30 Jul 2025 03:08:29 -0700 Subject: [PATCH 1219/2411] apparmor: fix test error: WARNING in apparmor_unix_stream_connect commit 88fec3526e84 ("apparmor: make sure unix socket labeling is correctly updated.") added the use of security_sk_alloc() which ensures the sk label is initialized. This means that the AA_BUG in apparmor_unix_stream_connect() is no longer correct, because while the sk is still not being initialized by going through post_create, it is now initialize in sk_alloc(). Remove the now invalid check. Reported-by: syzbot+cd38ee04bcb3866b0c6d@syzkaller.appspotmail.com Fixes: 88fec3526e84 ("apparmor: make sure unix socket labeling is correctly updated.") Signed-off-by: John Johansen --- security/apparmor/lsm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 9a64b2db0267..e4b2944431e4 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1205,8 +1205,9 @@ static int apparmor_unix_stream_connect(struct sock *sk, struct sock *peer_sk, if (error) return error; - /* newsk doesn't go through post_create */ - AA_BUG(rcu_access_pointer(new_ctx->label)); + /* newsk doesn't go through post_create, but does go through + * security_sk_alloc() + */ rcu_assign_pointer(new_ctx->label, aa_get_label(rcu_dereference_protected(peer_ctx->label, true))); From 43584e993293326cfc508e664fe81f56a65f6240 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 30 Jul 2025 03:47:07 -0700 Subject: [PATCH 1220/2411] apparmor: fix Regression on linux-next (next-20250721) sk lock initialization was incorrectly removed, from apparmor_file_alloc_security() while testing changes to changes to apparmor_sk_alloc_security() resulting in the following regression. [ 48.056654] INFO: trying to register non-static key. [ 48.057480] The code is fine but needs lockdep annotation, or maybe [ 48.058416] you didn't initialize this object before use? [ 48.059209] turning off the locking correctness validator. [ 48.060040] CPU: 0 UID: 0 PID: 648 Comm: chronyd Not tainted 6.16.0-rc7-test-next-20250721-11410-g1ee809985e11-dirty #577 NONE [ 48.060049] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 48.060055] Call Trace: [ 48.060059] [ 48.060063] dump_stack_lvl (lib/dump_stack.c:122) [ 48.060075] register_lock_class (kernel/locking/lockdep.c:988 kernel/locking/lockdep.c:1302) [ 48.060084] ? path_name (security/apparmor/file.c:159) [ 48.060093] __lock_acquire (kernel/locking/lockdep.c:5116) [ 48.060103] lock_acquire (kernel/locking/lockdep.c:473 (discriminator 4) kernel/locking/lockdep.c:5873 (discriminator 4) kernel/locking/lockdep.c:5828 (discriminator 4)) [ 48.060109] ? update_file_ctx (security/apparmor/file.c:464) [ 48.060115] ? __pfx_profile_path_perm (security/apparmor/file.c:247) [ 48.060121] _raw_spin_lock (include/linux/spinlock_api_smp.h:134 kernel/locking/spinlock.c:154) [ 48.060130] ? update_file_ctx (security/apparmor/file.c:464) [ 48.060134] update_file_ctx (security/apparmor/file.c:464) [ 48.060140] aa_file_perm (security/apparmor/file.c:532 (discriminator 1) security/apparmor/file.c:642 (discriminator 1)) [ 48.060147] ? __pfx_aa_file_perm (security/apparmor/file.c:607) [ 48.060152] ? do_mmap (mm/mmap.c:558) [ 48.060160] ? __pfx_userfaultfd_unmap_complete (fs/userfaultfd.c:841) [ 48.060170] ? __lock_acquire (kernel/locking/lockdep.c:4677 (discriminator 1) kernel/locking/lockdep.c:5194 (discriminator 1)) [ 48.060176] ? common_file_perm (security/apparmor/lsm.c:535 (discriminator 1)) [ 48.060185] security_mmap_file (security/security.c:3012 (discriminator 2)) [ 48.060192] vm_mmap_pgoff (mm/util.c:574 (discriminator 1)) [ 48.060200] ? find_held_lock (kernel/locking/lockdep.c:5353 (discriminator 1)) [ 48.060206] ? __pfx_vm_mmap_pgoff (mm/util.c:568) [ 48.060212] ? lock_release (kernel/locking/lockdep.c:5539 kernel/locking/lockdep.c:5892 kernel/locking/lockdep.c:5878) [ 48.060219] ? __fget_files (arch/x86/include/asm/preempt.h:85 (discriminator 13) include/linux/rcupdate.h:100 (discriminator 13) include/linux/rcupdate.h:873 (discriminator 13) fs/file.c:1072 (discriminator 13)) [ 48.060229] ksys_mmap_pgoff (mm/mmap.c:604) [ 48.060239] do_syscall_64 (arch/x86/entry/syscall_64.c:63 (discriminator 1) arch/x86/entry/syscall_64.c:94 (discriminator 1)) [ 48.060248] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) [ 48.060254] RIP: 0033:0x7fb6920e30a2 [ 48.060265] Code: 08 00 04 00 00 eb e2 90 41 f7 c1 ff 0f 00 00 75 27 55 89 cd 53 48 89 fb 48 85 ff 74 33 41 89 ea 48 89 df b8 09 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 5e 5b 5d c3 0f 1f 00 c7 05 e6 41 01 00 16 00 All code ======== 0: 08 00 or %al,(%rax) 2: 04 00 add $0x0,%al 4: 00 eb add %ch,%bl 6: e2 90 loop 0xffffffffffffff98 8: 41 f7 c1 ff 0f 00 00 test $0xfff,%r9d f: 75 27 jne 0x38 11: 55 push %rbp 12: 89 cd mov %ecx,%ebp 14: 53 push %rbx 15: 48 89 fb mov %rdi,%rbx 18: 48 85 ff test %rdi,%rdi 1b: 74 33 je 0x50 1d: 41 89 ea mov %ebp,%r10d 20: 48 89 df mov %rbx,%rdi 23: b8 09 00 00 00 mov $0x9,%eax 28: 0f 05 syscall 2a:* 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax <-- trapping instruction 30: 77 5e ja 0x90 32: 5b pop %rbx 33: 5d pop %rbp 34: c3 ret 35: 0f 1f 00 nopl (%rax) 38: c7 .byte 0xc7 39: 05 e6 41 01 00 add $0x141e6,%eax 3e: 16 (bad) ... Code starting with the faulting instruction =========================================== 0: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 6: 77 5e ja 0x66 8: 5b pop %rbx 9: 5d pop %rbp a: c3 ret b: 0f 1f 00 nopl (%rax) e: c7 .byte 0xc7 f: 05 e6 41 01 00 add $0x141e6,%eax 14: 16 (bad) ... [ 48.060270] RSP: 002b:00007ffd2c0d3528 EFLAGS: 00000206 ORIG_RAX: 0000000000000009 [ 48.060279] RAX: ffffffffffffffda RBX: 00007fb691fc8000 RCX: 00007fb6920e30a2 [ 48.060283] RDX: 0000000000000005 RSI: 000000000007d000 RDI: 00007fb691fc8000 [ 48.060287] RBP: 0000000000000812 R08: 0000000000000003 R09: 0000000000011000 [ 48.060290] R10: 0000000000000812 R11: 0000000000000206 R12: 00007ffd2c0d3578 [ 48.060293] R13: 00007fb6920b6160 R14: 00007ffd2c0d39f0 R15: 00000fffa581a6a8 Fixes: 88fec3526e84 ("apparmor: make sure unix socket labeling is correctly updated.") Signed-off-by: John Johansen --- security/apparmor/lsm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index e4b2944431e4..f385913e7d0e 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -505,6 +505,7 @@ static int apparmor_file_alloc_security(struct file *file) struct aa_file_ctx *ctx = file_ctx(file); struct aa_label *label = begin_current_label_crit_section(); + spin_lock_init(&ctx->lock); rcu_assign_pointer(ctx->label, aa_get_label(label)); end_current_label_crit_section(label); return 0; From 49811586be373e26a3ab52f54e0dfa663c02fddd Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Wed, 30 Jul 2025 13:16:07 +0530 Subject: [PATCH 1221/2411] block: move elevator queue allocation logic into blk_mq_init_sched In preparation for allocating sched_tags before freezing the request queue and acquiring ->elevator_lock, move the elevator queue allocation logic from the elevator ops ->init_sched callback into blk_mq_init_sched. As elevator_alloc is now only invoked from block layer core, we don't need to export it, so unexport elevator_alloc function. This refactoring provides a centralized location for elevator queue initialization, which makes it easier to store pre-allocated sched_tags in the struct elevator_queue during later changes. Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Nilay Shroff Link: https://lore.kernel.org/r/20250730074614.2537382-2-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 13 +++---------- block/blk-mq-sched.c | 11 ++++++++--- block/elevator.c | 1 - block/elevator.h | 2 +- block/kyber-iosched.c | 11 ++--------- block/mq-deadline.c | 14 ++------------ 6 files changed, 16 insertions(+), 36 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index f71ec0887733..aca9886c9ee3 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -7218,22 +7218,16 @@ static void bfq_init_root_group(struct bfq_group *root_group, root_group->sched_data.bfq_class_idle_last_service = jiffies; } -static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) +static int bfq_init_queue(struct request_queue *q, struct elevator_queue *eq) { struct bfq_data *bfqd; - struct elevator_queue *eq; unsigned int i; struct blk_independent_access_ranges *ia_ranges = q->disk->ia_ranges; - eq = elevator_alloc(q, e); - if (!eq) + bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node); + if (!bfqd) return -ENOMEM; - bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node); - if (!bfqd) { - kobject_put(&eq->kobj); - return -ENOMEM; - } eq->elevator_data = bfqd; spin_lock_irq(&q->queue_lock); @@ -7391,7 +7385,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) out_free: kfree(bfqd); - kobject_put(&eq->kobj); return -ENOMEM; } diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 55a0fd105147..359e0704e09b 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -475,10 +475,14 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth, BLKDEV_DEFAULT_RQ); + eq = elevator_alloc(q, e); + if (!eq) + return -ENOMEM; + if (blk_mq_is_shared_tags(flags)) { ret = blk_mq_init_sched_shared_tags(q); if (ret) - return ret; + goto err_put_elevator; } queue_for_each_hw_ctx(q, hctx, i) { @@ -487,7 +491,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) goto err_free_map_and_rqs; } - ret = e->ops.init_sched(q, e); + ret = e->ops.init_sched(q, eq); if (ret) goto err_free_map_and_rqs; @@ -508,7 +512,8 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) err_free_map_and_rqs: blk_mq_sched_free_rqs(q); blk_mq_sched_tags_teardown(q, flags); - +err_put_elevator: + kobject_put(&eq->kobj); q->elevator = NULL; return ret; } diff --git a/block/elevator.c b/block/elevator.c index 88f8f36bed98..939b0c590fbe 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -148,7 +148,6 @@ struct elevator_queue *elevator_alloc(struct request_queue *q, return eq; } -EXPORT_SYMBOL(elevator_alloc); static void elevator_release(struct kobject *kobj) { diff --git a/block/elevator.h b/block/elevator.h index a07ce773a38f..a4de5f9ad790 100644 --- a/block/elevator.h +++ b/block/elevator.h @@ -24,7 +24,7 @@ struct blk_mq_alloc_data; struct blk_mq_hw_ctx; struct elevator_mq_ops { - int (*init_sched)(struct request_queue *, struct elevator_type *); + int (*init_sched)(struct request_queue *, struct elevator_queue *); void (*exit_sched)(struct elevator_queue *); int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int); void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int); diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 4dba8405bd01..7b6832cb3a8d 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -402,20 +402,13 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) return ERR_PTR(ret); } -static int kyber_init_sched(struct request_queue *q, struct elevator_type *e) +static int kyber_init_sched(struct request_queue *q, struct elevator_queue *eq) { struct kyber_queue_data *kqd; - struct elevator_queue *eq; - - eq = elevator_alloc(q, e); - if (!eq) - return -ENOMEM; kqd = kyber_queue_data_alloc(q); - if (IS_ERR(kqd)) { - kobject_put(&eq->kobj); + if (IS_ERR(kqd)) return PTR_ERR(kqd); - } blk_stat_enable_accounting(q); diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 2edf1cac06d5..7b6caf30e00a 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -568,20 +568,14 @@ static void dd_exit_sched(struct elevator_queue *e) /* * initialize elevator private data (deadline_data). */ -static int dd_init_sched(struct request_queue *q, struct elevator_type *e) +static int dd_init_sched(struct request_queue *q, struct elevator_queue *eq) { struct deadline_data *dd; - struct elevator_queue *eq; enum dd_prio prio; - int ret = -ENOMEM; - - eq = elevator_alloc(q, e); - if (!eq) - return ret; dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node); if (!dd) - goto put_eq; + return -ENOMEM; eq->elevator_data = dd; @@ -608,10 +602,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e) q->elevator = eq; return 0; - -put_eq: - kobject_put(&eq->kobj); - return ret; } /* From f5a6604f7a4405450e4a1f54e5430f47290c500f Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Wed, 30 Jul 2025 13:16:08 +0530 Subject: [PATCH 1222/2411] block: fix lockdep warning caused by lock dependency in elv_iosched_store Recent lockdep reports [1] have revealed a potential deadlock caused by a lock dependency between the percpu allocator lock and the elevator lock. This issue can be avoided by ensuring that the allocation and release of scheduler tags (sched_tags) are performed outside the elevator lock. Furthermore, the queue does not need to be remain frozen during these operations. To address this, move all sched_tags allocations and deallocations outside of both the ->elevator_lock and the ->freeze_lock. Since the lifetime of the elevator queue and its associated sched_tags is closely tied, the allocated sched_tags are now stored in the elevator queue structure. Then, during the actual elevator switch (which runs under ->freeze_lock and ->elevator_lock), the pre-allocated sched_tags are assigned to the appropriate q->hctx. Once the elevator switch is complete and the locks are released, the old elevator queue and its associated sched_tags are freed. This commit specifically addresses the allocation/deallocation of sched_ tags during elevator switching. Note that sched_tags may also be allocated in other contexts, such as during nr_hw_queues updates. Supporting that use case will require batch allocation/deallocation, which will be handled in a follow-up patch. This restructuring ensures that sched_tags memory management occurs entirely outside of the ->elevator_lock and ->freeze_lock context, eliminating the lock dependency problem seen during scheduler updates. [1] https://lore.kernel.org/all/0659ea8d-a463-47c8-9180-43c719e106eb@linux.ibm.com/ Reported-by: Stefan Haberland Closes: https://lore.kernel.org/all/0659ea8d-a463-47c8-9180-43c719e106eb@linux.ibm.com/ Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Nilay Shroff Link: https://lore.kernel.org/r/20250730074614.2537382-3-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 155 +++++++++++++++++++++++-------------------- block/blk-mq-sched.h | 8 ++- block/elevator.c | 40 +++++++++-- block/elevator.h | 14 +++- 4 files changed, 136 insertions(+), 81 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 359e0704e09b..2d6d1ebdd8fb 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -374,64 +374,17 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge); -static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q, - struct blk_mq_hw_ctx *hctx, - unsigned int hctx_idx) -{ - if (blk_mq_is_shared_tags(q->tag_set->flags)) { - hctx->sched_tags = q->sched_shared_tags; - return 0; - } - - hctx->sched_tags = blk_mq_alloc_map_and_rqs(q->tag_set, hctx_idx, - q->nr_requests); - - if (!hctx->sched_tags) - return -ENOMEM; - return 0; -} - -static void blk_mq_exit_sched_shared_tags(struct request_queue *queue) -{ - blk_mq_free_rq_map(queue->sched_shared_tags); - queue->sched_shared_tags = NULL; -} - /* called in queue's release handler, tagset has gone away */ static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int flags) { struct blk_mq_hw_ctx *hctx; unsigned long i; - queue_for_each_hw_ctx(q, hctx, i) { - if (hctx->sched_tags) { - if (!blk_mq_is_shared_tags(flags)) - blk_mq_free_rq_map(hctx->sched_tags); - hctx->sched_tags = NULL; - } - } + queue_for_each_hw_ctx(q, hctx, i) + hctx->sched_tags = NULL; if (blk_mq_is_shared_tags(flags)) - blk_mq_exit_sched_shared_tags(q); -} - -static int blk_mq_init_sched_shared_tags(struct request_queue *queue) -{ - struct blk_mq_tag_set *set = queue->tag_set; - - /* - * Set initial depth at max so that we don't need to reallocate for - * updating nr_requests. - */ - queue->sched_shared_tags = blk_mq_alloc_map_and_rqs(set, - BLK_MQ_NO_HCTX_IDX, - MAX_SCHED_RQ); - if (!queue->sched_shared_tags) - return -ENOMEM; - - blk_mq_tag_update_sched_shared_tags(queue); - - return 0; + q->sched_shared_tags = NULL; } void blk_mq_sched_reg_debugfs(struct request_queue *q) @@ -458,8 +411,75 @@ void blk_mq_sched_unreg_debugfs(struct request_queue *q) mutex_unlock(&q->debugfs_mutex); } +void blk_mq_free_sched_tags(struct elevator_tags *et, + struct blk_mq_tag_set *set) +{ + unsigned long i; + + /* Shared tags are stored at index 0 in @tags. */ + if (blk_mq_is_shared_tags(set->flags)) + blk_mq_free_map_and_rqs(set, et->tags[0], BLK_MQ_NO_HCTX_IDX); + else { + for (i = 0; i < et->nr_hw_queues; i++) + blk_mq_free_map_and_rqs(set, et->tags[i], i); + } + + kfree(et); +} + +struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set, + unsigned int nr_hw_queues) +{ + unsigned int nr_tags; + int i; + struct elevator_tags *et; + gfp_t gfp = GFP_NOIO | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; + + if (blk_mq_is_shared_tags(set->flags)) + nr_tags = 1; + else + nr_tags = nr_hw_queues; + + et = kmalloc(sizeof(struct elevator_tags) + + nr_tags * sizeof(struct blk_mq_tags *), gfp); + if (!et) + return NULL; + /* + * Default to double of smaller one between hw queue_depth and + * 128, since we don't split into sync/async like the old code + * did. Additionally, this is a per-hw queue depth. + */ + et->nr_requests = 2 * min_t(unsigned int, set->queue_depth, + BLKDEV_DEFAULT_RQ); + et->nr_hw_queues = nr_hw_queues; + + if (blk_mq_is_shared_tags(set->flags)) { + /* Shared tags are stored at index 0 in @tags. */ + et->tags[0] = blk_mq_alloc_map_and_rqs(set, BLK_MQ_NO_HCTX_IDX, + MAX_SCHED_RQ); + if (!et->tags[0]) + goto out; + } else { + for (i = 0; i < et->nr_hw_queues; i++) { + et->tags[i] = blk_mq_alloc_map_and_rqs(set, i, + et->nr_requests); + if (!et->tags[i]) + goto out_unwind; + } + } + + return et; +out_unwind: + while (--i >= 0) + blk_mq_free_map_and_rqs(set, et->tags[i], i); +out: + kfree(et); + return NULL; +} + /* caller must have a reference to @e, will grab another one if successful */ -int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) +int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e, + struct elevator_tags *et) { unsigned int flags = q->tag_set->flags; struct blk_mq_hw_ctx *hctx; @@ -467,40 +487,33 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) unsigned long i; int ret; - /* - * Default to double of smaller one between hw queue_depth and 128, - * since we don't split into sync/async like the old code did. - * Additionally, this is a per-hw queue depth. - */ - q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth, - BLKDEV_DEFAULT_RQ); - - eq = elevator_alloc(q, e); + eq = elevator_alloc(q, e, et); if (!eq) return -ENOMEM; + q->nr_requests = et->nr_requests; + if (blk_mq_is_shared_tags(flags)) { - ret = blk_mq_init_sched_shared_tags(q); - if (ret) - goto err_put_elevator; + /* Shared tags are stored at index 0 in @et->tags. */ + q->sched_shared_tags = et->tags[0]; + blk_mq_tag_update_sched_shared_tags(q); } queue_for_each_hw_ctx(q, hctx, i) { - ret = blk_mq_sched_alloc_map_and_rqs(q, hctx, i); - if (ret) - goto err_free_map_and_rqs; + if (blk_mq_is_shared_tags(flags)) + hctx->sched_tags = q->sched_shared_tags; + else + hctx->sched_tags = et->tags[i]; } ret = e->ops.init_sched(q, eq); if (ret) - goto err_free_map_and_rqs; + goto out; queue_for_each_hw_ctx(q, hctx, i) { if (e->ops.init_hctx) { ret = e->ops.init_hctx(hctx, i); if (ret) { - eq = q->elevator; - blk_mq_sched_free_rqs(q); blk_mq_exit_sched(q, eq); kobject_put(&eq->kobj); return ret; @@ -509,10 +522,8 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) } return 0; -err_free_map_and_rqs: - blk_mq_sched_free_rqs(q); +out: blk_mq_sched_tags_teardown(q, flags); -err_put_elevator: kobject_put(&eq->kobj); q->elevator = NULL; return ret; diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 1326526bb733..0cde00cd1c47 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -18,10 +18,16 @@ void __blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx); void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx); -int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e); +int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e, + struct elevator_tags *et); void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e); void blk_mq_sched_free_rqs(struct request_queue *q); +struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set, + unsigned int nr_hw_queues); +void blk_mq_free_sched_tags(struct elevator_tags *et, + struct blk_mq_tag_set *set); + static inline void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx) { if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) diff --git a/block/elevator.c b/block/elevator.c index 939b0c590fbe..e9dc837b7b70 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -54,6 +54,8 @@ struct elv_change_ctx { struct elevator_queue *old; /* for registering new elevator */ struct elevator_queue *new; + /* holds sched tags data */ + struct elevator_tags *et; }; static DEFINE_SPINLOCK(elv_list_lock); @@ -132,7 +134,7 @@ static struct elevator_type *elevator_find_get(const char *name) static const struct kobj_type elv_ktype; struct elevator_queue *elevator_alloc(struct request_queue *q, - struct elevator_type *e) + struct elevator_type *e, struct elevator_tags *et) { struct elevator_queue *eq; @@ -145,6 +147,7 @@ struct elevator_queue *elevator_alloc(struct request_queue *q, kobject_init(&eq->kobj, &elv_ktype); mutex_init(&eq->sysfs_lock); hash_init(eq->hash); + eq->et = et; return eq; } @@ -165,7 +168,6 @@ static void elevator_exit(struct request_queue *q) lockdep_assert_held(&q->elevator_lock); ioc_clear_queue(q); - blk_mq_sched_free_rqs(q); mutex_lock(&e->sysfs_lock); blk_mq_exit_sched(q, e); @@ -591,7 +593,7 @@ static int elevator_switch(struct request_queue *q, struct elv_change_ctx *ctx) } if (new_e) { - ret = blk_mq_init_sched(q, new_e); + ret = blk_mq_init_sched(q, new_e, ctx->et); if (ret) goto out_unfreeze; ctx->new = q->elevator; @@ -626,8 +628,10 @@ static void elv_exit_and_release(struct request_queue *q) elevator_exit(q); mutex_unlock(&q->elevator_lock); blk_mq_unfreeze_queue(q, memflags); - if (e) + if (e) { + blk_mq_free_sched_tags(e->et, q->tag_set); kobject_put(&e->kobj); + } } static int elevator_change_done(struct request_queue *q, @@ -640,6 +644,7 @@ static int elevator_change_done(struct request_queue *q, &ctx->old->flags); elv_unregister_queue(q, ctx->old); + blk_mq_free_sched_tags(ctx->old->et, q->tag_set); kobject_put(&ctx->old->kobj); if (enable_wbt) wbt_enable_default(q->disk); @@ -658,9 +663,16 @@ static int elevator_change_done(struct request_queue *q, static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx) { unsigned int memflags; + struct blk_mq_tag_set *set = q->tag_set; int ret = 0; - lockdep_assert_held(&q->tag_set->update_nr_hwq_lock); + lockdep_assert_held(&set->update_nr_hwq_lock); + + if (strncmp(ctx->name, "none", 4)) { + ctx->et = blk_mq_alloc_sched_tags(set, set->nr_hw_queues); + if (!ctx->et) + return -ENOMEM; + } memflags = blk_mq_freeze_queue(q); /* @@ -680,6 +692,11 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx) blk_mq_unfreeze_queue(q, memflags); if (!ret) ret = elevator_change_done(q, ctx); + /* + * Free sched tags if it's allocated but we couldn't switch elevator. + */ + if (ctx->et && !ctx->new) + blk_mq_free_sched_tags(ctx->et, set); return ret; } @@ -690,6 +707,7 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx) */ void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e) { + struct blk_mq_tag_set *set = q->tag_set; struct elv_change_ctx ctx = {}; int ret = -ENODEV; @@ -697,15 +715,25 @@ void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e) if (e && !blk_queue_dying(q) && blk_queue_registered(q)) { ctx.name = e->elevator_name; - + ctx.et = blk_mq_alloc_sched_tags(set, set->nr_hw_queues); + if (!ctx.et) { + WARN_ON_ONCE(1); + goto unfreeze; + } mutex_lock(&q->elevator_lock); /* force to reattach elevator after nr_hw_queue is updated */ ret = elevator_switch(q, &ctx); mutex_unlock(&q->elevator_lock); } +unfreeze: blk_mq_unfreeze_queue_nomemrestore(q); if (!ret) WARN_ON_ONCE(elevator_change_done(q, &ctx)); + /* + * Free sched tags if it's allocated but we couldn't switch elevator. + */ + if (ctx.et && !ctx.new) + blk_mq_free_sched_tags(ctx.et, set); } /* diff --git a/block/elevator.h b/block/elevator.h index a4de5f9ad790..adc5c157e17e 100644 --- a/block/elevator.h +++ b/block/elevator.h @@ -23,6 +23,15 @@ enum elv_merge { struct blk_mq_alloc_data; struct blk_mq_hw_ctx; +struct elevator_tags { + /* num. of hardware queues for which tags are allocated */ + unsigned int nr_hw_queues; + /* depth used while allocating tags */ + unsigned int nr_requests; + /* shared tag is stored at index 0 */ + struct blk_mq_tags *tags[]; +}; + struct elevator_mq_ops { int (*init_sched)(struct request_queue *, struct elevator_queue *); void (*exit_sched)(struct elevator_queue *); @@ -113,6 +122,7 @@ struct request *elv_rqhash_find(struct request_queue *q, sector_t offset); struct elevator_queue { struct elevator_type *type; + struct elevator_tags *et; void *elevator_data; struct kobject kobj; struct mutex sysfs_lock; @@ -152,8 +162,8 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *page); ssize_t elv_iosched_store(struct gendisk *disk, const char *page, size_t count); extern bool elv_bio_merge_ok(struct request *, struct bio *); -extern struct elevator_queue *elevator_alloc(struct request_queue *, - struct elevator_type *); +struct elevator_queue *elevator_alloc(struct request_queue *, + struct elevator_type *, struct elevator_tags *); /* * Helper functions. From 04225d13aef11b2a539014def5e47d8c21fd74a5 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Wed, 30 Jul 2025 13:16:09 +0530 Subject: [PATCH 1223/2411] block: fix potential deadlock while running nr_hw_queue update Move scheduler tags (sched_tags) allocation and deallocation outside both the ->elevator_lock and ->freeze_lock when updating nr_hw_queues. This change breaks the dependency chain from the percpu allocator lock to the elevator lock, helping to prevent potential deadlocks, as observed in the reported lockdep splat[1]. This commit introduces batch allocation and deallocation helpers for sched_tags, which are now used from within __blk_mq_update_nr_hw_queues routine while iterating through the tagset. With this change, all sched_tags memory management is handled entirely outside the ->elevator_lock and the ->freeze_lock context, thereby eliminating the lock dependency that could otherwise manifest during nr_hw_queues updates. [1] https://lore.kernel.org/all/0659ea8d-a463-47c8-9180-43c719e106eb@linux.ibm.com/ Reported-by: Stefan Haberland Closes: https://lore.kernel.org/all/0659ea8d-a463-47c8-9180-43c719e106eb@linux.ibm.com/ Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Nilay Shroff Link: https://lore.kernel.org/r/20250730074614.2537382-4-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 65 ++++++++++++++++++++++++++++++++++++++++++++ block/blk-mq-sched.h | 4 +++ block/blk-mq.c | 16 +++++++---- block/blk.h | 4 ++- block/elevator.c | 15 ++++------ 5 files changed, 89 insertions(+), 15 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 2d6d1ebdd8fb..e2ce4a28e6c9 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -427,6 +427,32 @@ void blk_mq_free_sched_tags(struct elevator_tags *et, kfree(et); } +void blk_mq_free_sched_tags_batch(struct xarray *et_table, + struct blk_mq_tag_set *set) +{ + struct request_queue *q; + struct elevator_tags *et; + + lockdep_assert_held_write(&set->update_nr_hwq_lock); + + list_for_each_entry(q, &set->tag_list, tag_set_list) { + /* + * Accessing q->elevator without holding q->elevator_lock is + * safe because we're holding here set->update_nr_hwq_lock in + * the writer context. So, scheduler update/switch code (which + * acquires the same lock but in the reader context) can't run + * concurrently. + */ + if (q->elevator) { + et = xa_load(et_table, q->id); + if (unlikely(!et)) + WARN_ON_ONCE(1); + else + blk_mq_free_sched_tags(et, set); + } + } +} + struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set, unsigned int nr_hw_queues) { @@ -477,6 +503,45 @@ struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set, return NULL; } +int blk_mq_alloc_sched_tags_batch(struct xarray *et_table, + struct blk_mq_tag_set *set, unsigned int nr_hw_queues) +{ + struct request_queue *q; + struct elevator_tags *et; + gfp_t gfp = GFP_NOIO | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; + + lockdep_assert_held_write(&set->update_nr_hwq_lock); + + list_for_each_entry(q, &set->tag_list, tag_set_list) { + /* + * Accessing q->elevator without holding q->elevator_lock is + * safe because we're holding here set->update_nr_hwq_lock in + * the writer context. So, scheduler update/switch code (which + * acquires the same lock but in the reader context) can't run + * concurrently. + */ + if (q->elevator) { + et = blk_mq_alloc_sched_tags(set, nr_hw_queues); + if (!et) + goto out_unwind; + if (xa_insert(et_table, q->id, et, gfp)) + goto out_free_tags; + } + } + return 0; +out_free_tags: + blk_mq_free_sched_tags(et, set); +out_unwind: + list_for_each_entry_continue_reverse(q, &set->tag_list, tag_set_list) { + if (q->elevator) { + et = xa_load(et_table, q->id); + if (et) + blk_mq_free_sched_tags(et, set); + } + } + return -ENOMEM; +} + /* caller must have a reference to @e, will grab another one if successful */ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e, struct elevator_tags *et) diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 0cde00cd1c47..b554e1d55950 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -25,8 +25,12 @@ void blk_mq_sched_free_rqs(struct request_queue *q); struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set, unsigned int nr_hw_queues); +int blk_mq_alloc_sched_tags_batch(struct xarray *et_table, + struct blk_mq_tag_set *set, unsigned int nr_hw_queues); void blk_mq_free_sched_tags(struct elevator_tags *et, struct blk_mq_tag_set *set); +void blk_mq_free_sched_tags_batch(struct xarray *et_table, + struct blk_mq_tag_set *set); static inline void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx) { diff --git a/block/blk-mq.c b/block/blk-mq.c index 9692fa4c3ef2..b67d6c02eceb 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4974,12 +4974,13 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) * Switch back to the elevator type stored in the xarray. */ static void blk_mq_elv_switch_back(struct request_queue *q, - struct xarray *elv_tbl) + struct xarray *elv_tbl, struct xarray *et_tbl) { struct elevator_type *e = xa_load(elv_tbl, q->id); + struct elevator_tags *t = xa_load(et_tbl, q->id); /* The elv_update_nr_hw_queues unfreezes the queue. */ - elv_update_nr_hw_queues(q, e); + elv_update_nr_hw_queues(q, e, t); /* Drop the reference acquired in blk_mq_elv_switch_none. */ if (e) @@ -5031,7 +5032,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int prev_nr_hw_queues = set->nr_hw_queues; unsigned int memflags; int i; - struct xarray elv_tbl; + struct xarray elv_tbl, et_tbl; lockdep_assert_held(&set->tag_list_lock); @@ -5044,6 +5045,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, memflags = memalloc_noio_save(); + xa_init(&et_tbl); + if (blk_mq_alloc_sched_tags_batch(&et_tbl, set, nr_hw_queues) < 0) + goto out_memalloc_restore; + xa_init(&elv_tbl); list_for_each_entry(q, &set->tag_list, tag_set_list) { @@ -5087,7 +5092,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, switch_back: /* The blk_mq_elv_switch_back unfreezes queue for us. */ list_for_each_entry(q, &set->tag_list, tag_set_list) - blk_mq_elv_switch_back(q, &elv_tbl); + blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl); list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_sysfs_register_hctxs(q); @@ -5098,7 +5103,8 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, } xa_destroy(&elv_tbl); - + xa_destroy(&et_tbl); +out_memalloc_restore: memalloc_noio_restore(memflags); /* Free the excess tags when nr_hw_queues shrink. */ diff --git a/block/blk.h b/block/blk.h index 76901a39997f..0a2eccf28ca4 100644 --- a/block/blk.h +++ b/block/blk.h @@ -12,6 +12,7 @@ #include "blk-crypto-internal.h" struct elevator_type; +struct elevator_tags; /* * Default upper limit for the software max_sectors limit used for regular I/Os. @@ -330,7 +331,8 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list, bool blk_insert_flush(struct request *rq); -void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e); +void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e, + struct elevator_tags *t); void elevator_set_default(struct request_queue *q); void elevator_set_none(struct request_queue *q); diff --git a/block/elevator.c b/block/elevator.c index e9dc837b7b70..fe96c6f4753c 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -705,7 +705,8 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx) * The I/O scheduler depends on the number of hardware queues, this forces a * reattachment when nr_hw_queues changes. */ -void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e) +void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e, + struct elevator_tags *t) { struct blk_mq_tag_set *set = q->tag_set; struct elv_change_ctx ctx = {}; @@ -715,25 +716,21 @@ void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e) if (e && !blk_queue_dying(q) && blk_queue_registered(q)) { ctx.name = e->elevator_name; - ctx.et = blk_mq_alloc_sched_tags(set, set->nr_hw_queues); - if (!ctx.et) { - WARN_ON_ONCE(1); - goto unfreeze; - } + ctx.et = t; + mutex_lock(&q->elevator_lock); /* force to reattach elevator after nr_hw_queue is updated */ ret = elevator_switch(q, &ctx); mutex_unlock(&q->elevator_lock); } -unfreeze: blk_mq_unfreeze_queue_nomemrestore(q); if (!ret) WARN_ON_ONCE(elevator_change_done(q, &ctx)); /* * Free sched tags if it's allocated but we couldn't switch elevator. */ - if (ctx.et && !ctx.new) - blk_mq_free_sched_tags(ctx.et, set); + if (t && !ctx.new) + blk_mq_free_sched_tags(t, set); } /* From 11f74f48c14c1f4fe16541900ea5944c42e30ccf Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Wed, 30 Jul 2025 14:49:06 +0200 Subject: [PATCH 1224/2411] ASoC: Intel: avs: Fix uninitialized pointer error in probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If pcim_request_all_regions() fails, error path operates on uninitialized 'bus' pointer. Found out by Coverity static analyzer. Reviewed-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20250730124906.351798-1-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/avs/core.c b/sound/soc/intel/avs/core.c index 7af324753673..5ebadba07ecc 100644 --- a/sound/soc/intel/avs/core.c +++ b/sound/soc/intel/avs/core.c @@ -445,6 +445,8 @@ static int avs_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) adev = devm_kzalloc(dev, sizeof(*adev), GFP_KERNEL); if (!adev) return -ENOMEM; + bus = &adev->base.core; + ret = avs_bus_init(adev, pci, id); if (ret < 0) { dev_err(dev, "failed to init avs bus: %d\n", ret); @@ -455,7 +457,6 @@ static int avs_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) if (ret < 0) return ret; - bus = &adev->base.core; bus->addr = pci_resource_start(pci, 0); bus->remap_addr = pci_ioremap_bar(pci, 0); if (!bus->remap_addr) { From 1d8dd982c409d89b4ffabdbe10b569b3deb80a64 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Jul 2025 08:46:33 +0200 Subject: [PATCH 1225/2411] ALSA: hda/realtek: Enable drivers as default The recent split of Realtek HD-audio driver forced users to choose the right Kconfigs, but most users have no idea which ones to enable. Although the distros tend to enable all of them, individual users may have their own favorites and miss something needed via the version upgrade. For smoother upgrade path from the previous kernel configuration, now we take the following changes: - CONFIG_SND_HDA_CODEC_REALTEK (which is a menuconfig) is changed from bool to tristate again, so that it can take over from the previous config gracefully. - CONFIG_SND_HDA_CODEC_ALC* receive "default y", so that they are enabled as default as long as CONFIG_SND_HDA_CODEC_REALTEK is set. Those can be still disabled if users want to reduce the size, too. At least this allows users to run "make oldconfig" and push RETURN blindly. Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250730064639.25617-2-tiwai@suse.de --- sound/hda/codecs/realtek/Kconfig | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sound/hda/codecs/realtek/Kconfig b/sound/hda/codecs/realtek/Kconfig index 4b3ab28203b4..20899f3fc051 100644 --- a/sound/hda/codecs/realtek/Kconfig +++ b/sound/hda/codecs/realtek/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only menuconfig SND_HDA_CODEC_REALTEK - bool "Realtek HD-audio codec support" + tristate "Realtek HD-audio codec support" if SND_HDA_CODEC_REALTEK @@ -15,6 +15,7 @@ config SND_HDA_CODEC_ALC260 tristate "Build Realtek ALC260 HD-audio codec support" depends on INPUT select SND_HDA_CODEC_REALTEK_LIB + default y help Say Y or M here to include Realtek ALC260 HD-audio codec support @@ -22,6 +23,7 @@ config SND_HDA_CODEC_ALC262 tristate "Build Realtek ALC262 HD-audio codec support" depends on INPUT select SND_HDA_CODEC_REALTEK_LIB + default y help Say Y or M here to include Realtek ALC262 HD-audio codec support @@ -29,6 +31,7 @@ config SND_HDA_CODEC_ALC268 tristate "Build Realtek ALC268 HD-audio codec support" depends on INPUT select SND_HDA_CODEC_REALTEK_LIB + default y help Say Y or M here to include Realtek ALC268 and compatible HD-audio codec support @@ -37,6 +40,7 @@ config SND_HDA_CODEC_ALC269 tristate "Build Realtek ALC269 HD-audio codecs support" depends on INPUT select SND_HDA_CODEC_REALTEK_LIB + default y help Say Y or M here to include Realtek ALC269 and compatible HD-audio codec support @@ -45,6 +49,7 @@ config SND_HDA_CODEC_ALC662 tristate "Build Realtek ALC662 HD-audio codecs support" depends on INPUT select SND_HDA_CODEC_REALTEK_LIB + default y help Say Y or M here to include Realtek ALC662 and compatible HD-audio codec support @@ -53,6 +58,7 @@ config SND_HDA_CODEC_ALC680 tristate "Build Realtek ALC680 HD-audio codecs support" depends on INPUT select SND_HDA_CODEC_REALTEK_LIB + default y help Say Y or M here to include Realtek ALC680 HD-audio codec support @@ -60,6 +66,7 @@ config SND_HDA_CODEC_ALC861 tristate "Build Realtek ALC861 HD-audio codecs support" depends on INPUT select SND_HDA_CODEC_REALTEK_LIB + default y help Say Y or M here to include Realtek ALC861 HD-audio codec support @@ -67,6 +74,7 @@ config SND_HDA_CODEC_ALC861VD tristate "Build Realtek ALC861-VD HD-audio codecs support" depends on INPUT select SND_HDA_CODEC_REALTEK_LIB + default y help Say Y or M here to include Realtek ALC861-VD HD-audio codec support @@ -74,6 +82,7 @@ config SND_HDA_CODEC_ALC880 tristate "Build Realtek ALC880 HD-audio codecs support" depends on INPUT select SND_HDA_CODEC_REALTEK_LIB + default y help Say Y or M here to include Realtek ALC880 HD-audio codec support @@ -81,6 +90,7 @@ config SND_HDA_CODEC_ALC882 tristate "Build Realtek ALC882 HD-audio codecs support" depends on INPUT select SND_HDA_CODEC_REALTEK_LIB + default y help Say Y or M here to include Realtek ALC882 and compatible HD-audio codec support From fc2792a4000e9587080fa7f5b8a868cf393aa62e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Jul 2025 08:46:34 +0200 Subject: [PATCH 1226/2411] ALSA: hda/cirrus: Enable drivers as default Like HD-audio Realtek drivers, Cirrus Logic HD-audio codec driver was split to multiple drivers, too, and now users are forced to choose the right kconfig items. For smoother upgrade path, keep the previous CONFIG_SND_HDA_CODEC_CIRRUS as the menuconfig. The new kconfig CONFIG_SND_HDA_CODEC_CS42* are enabled as default, as long as CONFIG_SND_HDA_CODEC_CIRRUS is set, so that the system with Cirrus codec can keep working. This is only about the default config, and each driver can be still disabled if user wants to reduce the size, too. Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250730064639.25617-3-tiwai@suse.de --- sound/hda/codecs/cirrus/Kconfig | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/hda/codecs/cirrus/Kconfig b/sound/hda/codecs/cirrus/Kconfig index b3a5968e9a02..33cfe52713bc 100644 --- a/sound/hda/codecs/cirrus/Kconfig +++ b/sound/hda/codecs/cirrus/Kconfig @@ -1,8 +1,14 @@ # SPDX-License-Identifier: GPL-2.0-only +menuconfig SND_HDA_CODEC_CIRRUS + tristate "Cirrus Logic HD-audio codec support" + +if SND_HDA_CODEC_CIRRUS + config SND_HDA_CODEC_CS420X tristate "Build Cirrus Logic CS420x codec support" select SND_HDA_GENERIC + default y help Say Y or M here to include Cirrus Logic CS420x codec support in snd-hda-intel driver @@ -13,6 +19,7 @@ comment "Set to Y if you want auto-loading the codec driver" config SND_HDA_CODEC_CS421X tristate "Build Cirrus Logic CS421x codec support" select SND_HDA_GENERIC + default y help Say Y or M here to include Cirrus Logic CS421x codec support in snd-hda-intel driver @@ -29,3 +36,5 @@ config SND_HDA_CODEC_CS8409 comment "Set to Y if you want auto-loading the codec driver" depends on SND_HDA=y && SND_HDA_CODEC_CS8409=m + +endif From 81231ad173d840693f8d5f34ad9ada75aa8ad79f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Jul 2025 08:46:35 +0200 Subject: [PATCH 1227/2411] ALSA: hda/hdmi: Enable drivers as default Like other HD-audio codec drivers, HD-audio HDMI codec driver was split to multiple drivers, and now users are forced to choose the right kconfig items. For smoother upgrade path, keep the previous CONFIG_SND_HDA_CODEC_HDMI as the meuconfig, so that the kconfig can be taken over from the previous config. The all belonging HDMI codec drivers are enabled as default as long as CONFIG_SND_HDA_CODEC_HDMI is set. This is only about the default config, and each driver can be still disabled if user wants to reduce the size, too. The kconfig for the generic HDMI driver is changed to CONFIG_SND_HDA_CODEC_HDMI_GENERIC along with this action. Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250730064639.25617-4-tiwai@suse.de --- sound/hda/codecs/hdmi/Kconfig | 24 +++++++++++++++++++----- sound/hda/codecs/hdmi/Makefile | 2 +- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/sound/hda/codecs/hdmi/Kconfig b/sound/hda/codecs/hdmi/Kconfig index 498000d2c6ae..973ca4ca077b 100644 --- a/sound/hda/codecs/hdmi/Kconfig +++ b/sound/hda/codecs/hdmi/Kconfig @@ -1,9 +1,15 @@ # SPDX-License-Identifier: GPL-2.0-only -config SND_HDA_CODEC_HDMI +menuconfig SND_HDA_CODEC_HDMI + tristate "HD-audio HDMI codec support" + +if SND_HDA_CODEC_HDMI + +config SND_HDA_CODEC_HDMI_GENERIC tristate "Generic HDMI/DisplayPort HD-audio codec support" select SND_DYNAMIC_MINORS select SND_PCM_ELD + default y help Say Y or M here to include Generic HDMI and DisplayPort HD-audio codec support. @@ -13,13 +19,15 @@ config SND_HDA_CODEC_HDMI config SND_HDA_CODEC_HDMI_SIMPLE tristate "Simple HDMI/DisplayPort HD-audio codec support" + default y help Say Y or M here to include Simple HDMI and DisplayPort HD-audio codec support for VIA and other codecs. config SND_HDA_CODEC_HDMI_INTEL tristate "Intel HDMI/DisplayPort HD-audio codec support" - select SND_HDA_CODEC_HDMI + select SND_HDA_CODEC_HDMI_GENERIC + default y help Say Y or M here to include Intel graphics HDMI and DisplayPort HD-audio codec support. @@ -41,14 +49,16 @@ config SND_HDA_INTEL_HDMI_SILENT_STREAM config SND_HDA_CODEC_HDMI_ATI tristate "AMD/ATI HDMI/DisplayPort HD-audio codec support" - select SND_HDA_CODEC_HDMI + select SND_HDA_CODEC_HDMI_GENERIC + default y help Say Y or M here to include AMD/ATI graphics HDMI and DisplayPort HD-audio codec support. config SND_HDA_CODEC_HDMI_NVIDIA tristate "Nvidia HDMI/DisplayPort HD-audio codec support" - select SND_HDA_CODEC_HDMI + select SND_HDA_CODEC_HDMI_GENERIC + default y help Say Y or M here to include HDMI and DisplayPort HD-audio codec support for the recent Nvidia graphics cards. @@ -56,13 +66,17 @@ config SND_HDA_CODEC_HDMI_NVIDIA config SND_HDA_CODEC_HDMI_NVIDIA_MCP tristate "Legacy Nvidia HDMI/DisplayPort HD-audio codec support" select SND_HDA_CODEC_HDMI_SIMPLE + default y help Say Y or M here to include HDMI and DisplayPort HD-audio codec support for the legacy Nvidia graphics like MCP73, MCP67, MCP77/78. config SND_HDA_CODEC_HDMI_TEGRA tristate "Nvidia Tegra HDMI/DisplayPort HD-audio codec support" - select SND_HDA_CODEC_HDMI + select SND_HDA_CODEC_HDMI_GENERIC + default y help Say Y or M here to include HDMI and DisplayPort HD-audio codec support for Nvidia Tegra. + +endif diff --git a/sound/hda/codecs/hdmi/Makefile b/sound/hda/codecs/hdmi/Makefile index c07a0a71b64f..0e49a9421e3b 100644 --- a/sound/hda/codecs/hdmi/Makefile +++ b/sound/hda/codecs/hdmi/Makefile @@ -9,7 +9,7 @@ snd-hda-codec-nvhdmi-y := nvhdmi.o snd-hda-codec-nvhdmi-mcp-y := nvhdmi-mcp.o snd-hda-codec-tegrahdmi-y := tegrahdmi.o -obj-$(CONFIG_SND_HDA_CODEC_HDMI) += snd-hda-codec-hdmi.o +obj-$(CONFIG_SND_HDA_CODEC_HDMI_GENERIC) += snd-hda-codec-hdmi.o obj-$(CONFIG_SND_HDA_CODEC_HDMI_SIMPLE) += snd-hda-codec-simplehdmi.o obj-$(CONFIG_SND_HDA_CODEC_HDMI_INTEL) += snd-hda-codec-intelhdmi.o obj-$(CONFIG_SND_HDA_CODEC_HDMI_ATI) += snd-hda-codec-atihdmi.o From 6f02527729bd31ca4e473bff19fda4ccd5889148 Mon Sep 17 00:00:00 2001 From: Norman Maurer Date: Mon, 28 Jul 2025 20:59:53 -1000 Subject: [PATCH 1228/2411] io_uring/net: Allow to do vectorized send At the moment you have to use sendmsg for vectorized send. While this works it's suboptimal as it also means you need to allocate a struct msghdr that needs to be kept alive until a submission happens. We can remove this limitation by just allowing to use send directly. Signed-off-by: Norman Maurer Link: https://lore.kernel.org/r/20250729065952.26646-1-norman_maurer@apple.com [axboe: remove -EINVAL return for SENDMSG and SEND_VECTORIZED] [axboe: allow send_zc to set SEND_VECTORIZED too] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 4 ++++ io_uring/net.c | 9 +++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index b8a0e70ee2fd..6957dc539d83 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -392,12 +392,16 @@ enum io_uring_op { * the starting buffer ID in cqe->flags as per * usual for provided buffer usage. The buffers * will be contiguous from the starting buffer ID. + * + * IORING_SEND_VECTORIZED If set, SEND[_ZC] will take a pointer to a io_vec + * to allow vectorized send operations. */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) #define IORING_RECV_MULTISHOT (1U << 1) #define IORING_RECVSEND_FIXED_BUF (1U << 2) #define IORING_SEND_ZC_REPORT_USAGE (1U << 3) #define IORING_RECVSEND_BUNDLE (1U << 4) +#define IORING_SEND_VECTORIZED (1U << 5) /* * cqe.res for IORING_CQE_F_NOTIF if diff --git a/io_uring/net.c b/io_uring/net.c index 35585bdc59f3..dd96e355982f 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -382,6 +382,10 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) } if (req->flags & REQ_F_BUFFER_SELECT) return 0; + + if (sr->flags & IORING_SEND_VECTORIZED) + return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE); + return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); } @@ -409,7 +413,7 @@ static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE); } -#define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE) +#define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE | IORING_SEND_VECTORIZED) int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { @@ -1318,7 +1322,8 @@ void io_send_zc_cleanup(struct io_kiocb *req) } #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) -#define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE) +#define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE | \ + IORING_SEND_VECTORIZED) int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { From 0dd1274a053f9ede97e3f3269b5012372567e521 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 30 Jul 2025 10:07:54 -0400 Subject: [PATCH 1229/2411] tracing: Have eprobes have their own config option Eprobes were added in 5.15 and were selected whenever any of the other probe events were selected. If kprobe events were enabled (which it is by default if kprobes are enabled) it would enable eprobe events as well. The same for uprobes and fprobes. Have eprobes have its own config and it gets enabled by default if tracing is enabled. Link: https://lore.kernel.org/all/20250729102636.b7cce553e7cc263722b12365@kernel.org/ Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Namhyung Kim Cc: Jonathan Corbet Cc: Randy Dunlap Link: https://lore.kernel.org/20250730140945.360286733@kernel.org Suggested-by: Masami Hiramatsu (Google) Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/Kconfig | 14 ++++++++++++++ kernel/trace/Makefile | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 93e8e7fc11c0..f80298e6aa16 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -792,6 +792,20 @@ config UPROBE_EVENTS This option is required if you plan to use perf-probe subcommand of perf tools on user space applications. +config EPROBE_EVENTS + bool "Enable event-based dynamic events" + depends on TRACING + depends on HAVE_REGS_AND_STACK_ACCESS_API + select PROBE_EVENTS + select DYNAMIC_EVENTS + default y + help + Eprobes are dynamic events that can be placed on other existing + events. It can be used to limit what fields are recorded in + an event or even dereference a field of an event. It can + convert the type of an event field. For example, turn an + address into a string. + config BPF_EVENTS depends on BPF_SYSCALL depends on (KPROBE_EVENTS || UPROBE_EVENTS) && PERF_EVENTS diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 057cd975d014..dcb4e02afc5f 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -82,7 +82,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o -obj-$(CONFIG_PROBE_EVENTS) += trace_eprobe.o +obj-$(CONFIG_EPROBE_EVENTS) += trace_eprobe.o obj-$(CONFIG_TRACE_EVENT_INJECT) += trace_events_inject.o obj-$(CONFIG_SYNTH_EVENTS) += trace_events_synth.o obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o From 623526ba8984cafdffa0eba7ee424f2e40c8a219 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 30 Jul 2025 10:07:55 -0400 Subject: [PATCH 1230/2411] Documentation: tracing: Add documentation about eprobes Eprobes was added back in 5.15, but was never documented. It became a "secret" interface even though it has been a topic of several presentations. For some reason, when eprobes was added, documenting it never became a priority, until now. Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Namhyung Kim Cc: Jonathan Corbet Link: https://lore.kernel.org/20250730140945.528135548@kernel.org Reviewed-by: Randy Dunlap Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- Documentation/trace/eprobetrace.rst | 269 ++++++++++++++++++++++++++++ Documentation/trace/index.rst | 1 + 2 files changed, 270 insertions(+) create mode 100644 Documentation/trace/eprobetrace.rst diff --git a/Documentation/trace/eprobetrace.rst b/Documentation/trace/eprobetrace.rst new file mode 100644 index 000000000000..89b5157cfab8 --- /dev/null +++ b/Documentation/trace/eprobetrace.rst @@ -0,0 +1,269 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================== +Eprobe - Event-based Probe Tracing +================================== + +:Author: Steven Rostedt + +- Written for v6.17 + +Overview +======== + +Eprobes are dynamic events that are placed on existing events to either +dereference a field that is a pointer, or simply to limit what fields are +recorded in the trace event. + +Eprobes depend on kprobe events so to enable this feature, build your kernel +with CONFIG_EPROBE_EVENTS=y. + +Eprobes are created via the /sys/kernel/tracing/dynamic_events file. + +Synopsis of eprobe_events +------------------------- +:: + + e[:[EGRP/][EEVENT]] GRP.EVENT [FETCHARGS] : Set a probe + -:[EGRP/][EEVENT] : Clear a probe + + EGRP : Group name of the new event. If omitted, use "eprobes" for it. + EEVENT : Event name. If omitted, the event name is generated and will + be the same event name as the event it attached to. + GRP : Group name of the event to attach to. + EVENT : Event name of the event to attach to. + + FETCHARGS : Arguments. Each probe can have up to 128 args. + $FIELD : Fetch the value of the event field called FIELD. + @ADDR : Fetch memory at ADDR (ADDR should be in kernel) + @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol) + $comm : Fetch current task comm. + +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*3)(\*4) + \IMM : Store an immediate value to the argument. + NAME=FETCHARG : Set NAME as the argument name of FETCHARG. + FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types + (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types + (x8/x16/x32/x64), VFS layer common type(%pd/%pD), "char", + "string", "ustring", "symbol", "symstr" and "bitfield" are + supported. + +Types +----- +The FETCHARGS above is very similar to the kprobe events as described in +Documentation/trace/kprobetrace.rst. + +The difference between eprobes and kprobes FETCHARGS is that eprobes has a +$FIELD command that returns the content of the event field of the event +that is attached. Eprobes do not have access to registers, stacks and function +arguments that kprobes has. + +If a field argument is a pointer, it may be dereferenced just like a memory +address using the FETCHARGS syntax. + + +Attaching to dynamic events +--------------------------- + +Eprobes may attach to dynamic events as well as to normal events. It may +attach to a kprobe event, a synthetic event or a fprobe event. This is useful +if the type of a field needs to be changed. See Example 2 below. + +Usage examples +============== + +Example 1 +--------- + +The basic usage of eprobes is to limit the data that is being recorded into +the tracing buffer. For example, a common event to trace is the sched_switch +trace event. That has a format of:: + + field:unsigned short common_type; offset:0; size:2; signed:0; + field:unsigned char common_flags; offset:2; size:1; signed:0; + field:unsigned char common_preempt_count; offset:3; size:1; signed:0; + field:int common_pid; offset:4; size:4; signed:1; + + field:char prev_comm[16]; offset:8; size:16; signed:0; + field:pid_t prev_pid; offset:24; size:4; signed:1; + field:int prev_prio; offset:28; size:4; signed:1; + field:long prev_state; offset:32; size:8; signed:1; + field:char next_comm[16]; offset:40; size:16; signed:0; + field:pid_t next_pid; offset:56; size:4; signed:1; + field:int next_prio; offset:60; size:4; signed:1; + +The first four fields are common to all events and can not be limited. But the +rest of the event has 60 bytes of information. It records the names of the +previous and next tasks being scheduled out and in, as well as their pids and +priorities. It also records the state of the previous task. If only the pids +of the tasks are of interest, why waste the ring buffer with all the other +fields? + +An eprobe can limit what gets recorded. Note, it does not help in performance, +as all the fields are recorded in a temporary buffer to process the eprobe. +:: + + # echo 'e:sched/switch sched.sched_switch prev=$prev_pid:u32 next=$next_pid:u32' >> /sys/kernel/tracing/dynamic_events + # echo 1 > /sys/kernel/tracing/events/sched/switch/enable + # cat /sys/kernel/tracing/trace + + # tracer: nop + # + # entries-in-buffer/entries-written: 2721/2721 #P:8 + # + # _-----=> irqs-off/BH-disabled + # / _----=> need-resched + # | / _---=> hardirq/softirq + # || / _--=> preempt-depth + # ||| / _-=> migrate-disable + # |||| / delay + # TASK-PID CPU# ||||| TIMESTAMP FUNCTION + # | | | ||||| | | + sshd-session-1082 [004] d..4. 5041.239906: switch: (sched.sched_switch) prev=1082 next=0 + bash-1085 [001] d..4. 5041.240198: switch: (sched.sched_switch) prev=1085 next=141 + kworker/u34:5-141 [001] d..4. 5041.240259: switch: (sched.sched_switch) prev=141 next=1085 + -0 [004] d..4. 5041.240354: switch: (sched.sched_switch) prev=0 next=1082 + bash-1085 [001] d..4. 5041.240385: switch: (sched.sched_switch) prev=1085 next=141 + kworker/u34:5-141 [001] d..4. 5041.240410: switch: (sched.sched_switch) prev=141 next=1085 + bash-1085 [001] d..4. 5041.240478: switch: (sched.sched_switch) prev=1085 next=0 + sshd-session-1082 [004] d..4. 5041.240526: switch: (sched.sched_switch) prev=1082 next=0 + -0 [001] d..4. 5041.247524: switch: (sched.sched_switch) prev=0 next=90 + -0 [002] d..4. 5041.247545: switch: (sched.sched_switch) prev=0 next=16 + kworker/1:1-90 [001] d..4. 5041.247580: switch: (sched.sched_switch) prev=90 next=0 + rcu_sched-16 [002] d..4. 5041.247591: switch: (sched.sched_switch) prev=16 next=0 + -0 [002] d..4. 5041.257536: switch: (sched.sched_switch) prev=0 next=16 + rcu_sched-16 [002] d..4. 5041.257573: switch: (sched.sched_switch) prev=16 next=0 + +Note, without adding the "u32" after the prev_pid and next_pid, the values +would default showing in hexadecimal. + +Example 2 +--------- + +If a specific system call is to be recorded but the syscalls events are not +enabled, the raw_syscalls can still be used (syscalls are system call +events are not normal events, but are created from the raw_syscalls events +within the kernel). In order to trace the openat system call, one can create +an event probe on top of the raw_syscalls event: +:: + + # cd /sys/kernel/tracing + # cat events/raw_syscalls/sys_enter/format + name: sys_enter + ID: 395 + format: + field:unsigned short common_type; offset:0; size:2; signed:0; + field:unsigned char common_flags; offset:2; size:1; signed:0; + field:unsigned char common_preempt_count; offset:3; size:1; signed:0; + field:int common_pid; offset:4; size:4; signed:1; + + field:long id; offset:8; size:8; signed:1; + field:unsigned long args[6]; offset:16; size:48; signed:0; + + print fmt: "NR %ld (%lx, %lx, %lx, %lx, %lx, %lx)", REC->id, REC->args[0], REC->args[1], REC->args[2], REC->args[3], REC->args[4], REC->args[5] + +From the source code, the sys_openat() has: +:: + + int sys_openat(int dirfd, const char *path, int flags, mode_t mode) + { + return my_syscall4(__NR_openat, dirfd, path, flags, mode); + } + +The path is the second parameter, and that is what is wanted. +:: + + # echo 'e:openat raw_syscalls.sys_enter nr=$id filename=+8($args):ustring' >> dynamic_events + +This is being run on x86_64 where the word size is 8 bytes and the openat +system call __NR_openat is set at 257. +:: + + # echo 'nr == 257' > events/eprobes/openat/filter + +Now enable the event and look at the trace. +:: + + # echo 1 > events/eprobes/openat/enable + # cat trace + + # tracer: nop + # + # entries-in-buffer/entries-written: 4/4 #P:8 + # + # _-----=> irqs-off/BH-disabled + # / _----=> need-resched + # | / _---=> hardirq/softirq + # || / _--=> preempt-depth + # ||| / _-=> migrate-disable + # |||| / delay + # TASK-PID CPU# ||||| TIMESTAMP FUNCTION + # | | | ||||| | | + cat-1298 [003] ...2. 2060.875970: openat: (raw_syscalls.sys_enter) nr=0x101 filename=(fault) + cat-1298 [003] ...2. 2060.876197: openat: (raw_syscalls.sys_enter) nr=0x101 filename=(fault) + cat-1298 [003] ...2. 2060.879126: openat: (raw_syscalls.sys_enter) nr=0x101 filename=(fault) + cat-1298 [003] ...2. 2060.879639: openat: (raw_syscalls.sys_enter) nr=0x101 filename=(fault) + +The filename shows "(fault)". This is likely because the filename has not been +pulled into memory yet and currently trace events cannot fault in memory that +is not present. When an eprobe tries to read memory that has not been faulted +in yet, it will show the "(fault)" text. + +To get around this, as the kernel will likely pull in this filename and make +it present, attaching it to a synthetic event that can pass the address of the +filename from the entry of the event to the end of the event, this can be used +to show the filename when the system call returns. + +Remove the old eprobe:: + + # echo 1 > events/eprobes/openat/enable + # echo '-:openat' >> dynamic_events + +This time make an eprobe where the address of the filename is saved:: + + # echo 'e:openat_start raw_syscalls.sys_enter nr=$id filename=+8($args):x64' >> dynamic_events + +Create a synthetic event that passes the address of the filename to the +end of the event:: + + # echo 's:filename u64 file' >> dynamic_events + # echo 'hist:keys=common_pid:f=filename if nr == 257' > events/eprobes/openat_start/trigger + # echo 'hist:keys=common_pid:file=$f:onmatch(eprobes.openat_start).trace(filename,$file) if id == 257' > events/raw_syscalls/sys_exit/trigger + +Now that the address of the filename has been passed to the end of the +system call, create another eprobe to attach to the exit event to show the +string:: + + # echo 'e:openat synthetic.filename filename=+0($file):ustring' >> dynamic_events + # echo 1 > events/eprobes/openat/enable + # cat trace + + # tracer: nop + # + # entries-in-buffer/entries-written: 4/4 #P:8 + # + # _-----=> irqs-off/BH-disabled + # / _----=> need-resched + # | / _---=> hardirq/softirq + # || / _--=> preempt-depth + # ||| / _-=> migrate-disable + # |||| / delay + # TASK-PID CPU# ||||| TIMESTAMP FUNCTION + # | | | ||||| | | + cat-1331 [001] ...5. 2944.787977: openat: (synthetic.filename) filename="/etc/ld.so.cache" + cat-1331 [001] ...5. 2944.788480: openat: (synthetic.filename) filename="/lib/x86_64-linux-gnu/libc.so.6" + cat-1331 [001] ...5. 2944.793426: openat: (synthetic.filename) filename="/usr/lib/locale/locale-archive" + cat-1331 [001] ...5. 2944.831362: openat: (synthetic.filename) filename="trace" + +Example 3 +--------- + +If syscall trace events are available, the above would not need the first +eprobe, but it would still need the last one:: + + # echo 's:filename u64 file' >> dynamic_events + # echo 'hist:keys=common_pid:f=filename' > events/syscalls/sys_enter_openat/trigger + # echo 'hist:keys=common_pid:file=$f:onmatch(syscalls.sys_enter_openat).trace(filename,$file)' > events/syscalls/sys_exit_openat/trigger + # echo 'e:openat synthetic.filename filename=+0($file):ustring' >> dynamic_events + # echo 1 > events/eprobes/openat/enable + +And this would produce the same result as Example 2. diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index cc1dc5a087e8..b4a429dc4f7a 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -36,6 +36,7 @@ the Linux kernel. kprobes kprobetrace fprobetrace + eprobetrace fprobe ring-buffer-design From 3dca3d51b933beb3f35a60472ed2110d1bd7046a Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 30 Jul 2025 09:14:43 +0200 Subject: [PATCH 1231/2411] ARM: s3c/gpio: complete the conversion to new GPIO value setters Commit fb52f3226cab ("ARM: s3c/gpio: use new line value setter callbacks") correctly changed the assignment of the callback but missed the check one liner higher. Change it now too to using the recommended callback as the legacy one is going away soon. Fixes: fb52f3226cab ("ARM: s3c/gpio: use new line value setter callbacks") Signed-off-by: Bartosz Golaszewski Signed-off-by: Arnd Bergmann --- arch/arm/mach-s3c/gpio-samsung.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-s3c/gpio-samsung.c b/arch/arm/mach-s3c/gpio-samsung.c index 206a492fbaf5..3ee4ad969cc2 100644 --- a/arch/arm/mach-s3c/gpio-samsung.c +++ b/arch/arm/mach-s3c/gpio-samsung.c @@ -516,7 +516,7 @@ static void __init samsung_gpiolib_add(struct samsung_gpio_chip *chip) gc->direction_input = samsung_gpiolib_2bit_input; if (!gc->direction_output) gc->direction_output = samsung_gpiolib_2bit_output; - if (!gc->set) + if (!gc->set_rv) gc->set_rv = samsung_gpiolib_set; if (!gc->get) gc->get = samsung_gpiolib_get; From 078cad8212ce4f4ebbafcc0936475b8215e1ca2a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 28 Jul 2025 21:37:26 +0000 Subject: [PATCH 1232/2411] f2fs: drop inode from the donation list when the last file is closed Let's drop the inode from the donation list when there is no other open file. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/file.c | 8 +++++++- fs/f2fs/inode.c | 2 +- fs/f2fs/super.c | 1 + 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 97c1a2a3fbd7..7029aa8b430e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -876,6 +876,7 @@ struct f2fs_inode_info { /* linked in global inode list for cache donation */ struct list_head gdonate_list; pgoff_t donate_start, donate_end; /* inclusive */ + atomic_t open_count; /* # of open files */ struct task_struct *atomic_write_task; /* store atomic write task */ struct extent_tree *extent_tree[NR_EXTENT_CACHES]; @@ -3652,6 +3653,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink); void f2fs_update_inode(struct inode *inode, struct folio *node_folio); void f2fs_update_inode_page(struct inode *inode); int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc); +void f2fs_remove_donate_inode(struct inode *inode); void f2fs_evict_inode(struct inode *inode); void f2fs_handle_failed_inode(struct inode *inode); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c1641c693655..84b0fcb454dd 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -628,7 +628,10 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (err) return err; - return finish_preallocate_blocks(inode); + err = finish_preallocate_blocks(inode); + if (!err) + atomic_inc(&F2FS_I(inode)->open_count); + return err; } void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) @@ -2037,6 +2040,9 @@ static long f2fs_fallocate(struct file *file, int mode, static int f2fs_release_file(struct inode *inode, struct file *filp) { + if (atomic_dec_and_test(&F2FS_I(inode)->open_count)) + f2fs_remove_donate_inode(inode); + /* * f2fs_release_file is called at every close calls. So we should * not drop any inmemory pages by close called by other process. diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 154106aa350b..8c4eafe9ffac 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -821,7 +821,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) return 0; } -static void f2fs_remove_donate_inode(struct inode *inode) +void f2fs_remove_donate_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 30c038413040..e16c4e2830c2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1701,6 +1701,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ atomic_set(&fi->dirty_pages, 0); atomic_set(&fi->i_compr_blocks, 0); + atomic_set(&fi->open_count, 0); init_f2fs_rwsem(&fi->i_sem); spin_lock_init(&fi->i_size_lock); INIT_LIST_HEAD(&fi->dirty_list); From 59edbec7a5c70af6c0058e32eb3750bfb8928d7b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 30 Jul 2025 10:34:20 -0300 Subject: [PATCH 1233/2411] perf python: Stop using deprecated PyUnicode_AsString() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As noticed while building for Fedora 43: GEN /tmp/build/perf/python/perf.cpython-314-x86_64-linux-gnu.so /git/perf-6.16.0-rc3/tools/perf/util/python.c: In function ‘get_tracepoint_field’: /git/perf-6.16.0-rc3/tools/perf/util/python.c:340:9: error: ‘_PyUnicode_AsString’ is deprecated [-Werror=deprecated-declarations] 340 | const char *str = _PyUnicode_AsString(PyObject_Str(attr_name)); | ^~~~~ In file included from /usr/include/python3.14/unicodeobject.h:1022, from /usr/include/python3.14/Python.h:89, from /git/perf-6.16.0-rc3/tools/perf/util/python.c:2: /usr/include/python3.14/cpython/unicodeobject.h:648:1: note: declared here 648 | _PyUnicode_AsString(PyObject *unicode) | ^~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors error: command '/usr/bin/gcc' failed with exit code 1 Use PyUnicode_AsUTF8() instead and also check if PyObject_Str() fails before doing so. Signed-off-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/aIofXNK8QLtLIaI3@x1 Signed-off-by: Namhyung Kim --- tools/perf/util/python.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 2f28f71325a8..ea77bea0306f 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -337,7 +337,6 @@ tracepoint_field(const struct pyrf_event *pe, struct tep_format_field *field) static PyObject* get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name) { - const char *str = _PyUnicode_AsString(PyObject_Str(attr_name)); struct evsel *evsel = pevent->evsel; struct tep_event *tp_format = evsel__tp_format(evsel); struct tep_format_field *field; @@ -345,7 +344,18 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name) if (IS_ERR_OR_NULL(tp_format)) return NULL; + PyObject *obj = PyObject_Str(attr_name); + if (obj == NULL) + return NULL; + + const char *str = PyUnicode_AsUTF8(obj); + if (str == NULL) { + Py_DECREF(obj); + return NULL; + } + field = tep_find_any_field(tp_format, str); + Py_DECREF(obj); return field ? tracepoint_field(pevent, field) : NULL; } #endif /* HAVE_LIBTRACEEVENT */ From 1df1fc845d221eb646539836dbf509eb96b41afd Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 30 Jul 2025 15:33:21 +0800 Subject: [PATCH 1234/2411] md: fix create on open mddev lifetime regression Commit 9e59d609763f ("md: call del_gendisk in control path") moves setting MD_DELETED from __mddev_put() to do_md_stop(), however, for the case create on open, mddev can be freed without do_md_stop(): 1) open md_probe md_alloc_and_put md_alloc mddev_alloc atomic_set(&mddev->active, 1); mddev->hold_active = UNTIL_IOCTL mddev_put atomic_dec_and_test(&mddev->active) if (mddev->hold_active) -> active is 0, hold_active is set md_open mddev_get atomic_inc(&mddev->active); 2) ioctl that is not STOP_ARRAY, for example, GET_ARRAY_INFO: md_ioctl mddev->hold_active = 0 3) close md_release mddev_put(mddev); atomic_dec_and_lock(&mddev->active, &all_mddevs_lock) __mddev_put -> hold_active is cleared, mddev will be freed queue_work(md_misc_wq, &mddev->del_work) Now that MD_DELETED is not set, before mddev is freed by mddev_delayed_delete(), md_open can still succeed and break mddev lifetime, causing mddev->kobj refcount underflow or mddev uaf problem. Fix this problem by setting MD_DELETED before queuing del_work. Reported-by: syzbot+9921e319bd6168140b40@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68894408.a00a0220.26d0e1.0012.GAE@google.com/ Reported-by: syzbot+fa3a12519f0d3fd4ec16@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68894408.a00a0220.26d0e1.0013.GAE@google.com/ Fixes: 9e59d609763f ("md: call del_gendisk in control path") Link: https://lore.kernel.org/linux-raid/20250730073321.2583158-1-yukuai1@huaweicloud.com Signed-off-by: Yu Kuai Reviewed-by: Paul Menzel Reviewed-by: Xiao Ni --- drivers/md/md.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 046fe85c76fe..2716d5c59517 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -636,6 +636,12 @@ static void __mddev_put(struct mddev *mddev) mddev->ctime || mddev->hold_active) return; + /* + * If array is freed by stopping array, MD_DELETED is set by + * do_md_stop(), MD_DELETED is still set here in case mddev is freed + * directly by closing a mddev that is created by create_on_open. + */ + set_bit(MD_DELETED, &mddev->flags); /* * Call queue_work inside the spinlock so that flush_workqueue() after * mddev_find will succeed in waiting for the work to be done. From 948b1fe12005d39e2b49087b50e5ee55c9a8f76f Mon Sep 17 00:00:00 2001 From: Heming Zhao Date: Mon, 28 Jul 2025 12:21:40 +0800 Subject: [PATCH 1235/2411] md/md-cluster: handle REMOVE message earlier Commit a1fd37f97808 ("md: Don't wait for MD_RECOVERY_NEEDED for HOT_REMOVE_DISK ioctl") introduced a regression in the md_cluster module. (Failed cases 02r1_Manage_re-add & 02r10_Manage_re-add) Consider a 2-node cluster: - node1 set faulty & remove command on a disk. - node2 must correctly update the array metadata. Before a1fd37f97808, on node1, the delay between msg:METADATA_UPDATED (triggered by faulty) and msg:REMOVE was sufficient for node2 to reload the disk info (written by node1). After a1fd37f97808, node1 no longer waits between faulty and remove, causing it to send msg:REMOVE while node2 is still reloading disk info. This often results in node2 failing to remove the faulty disk. == how to trigger == set up a 2-node cluster (node1 & node2) with disks vdc & vdd. on node1: mdadm -CR /dev/md0 -l1 -b clustered -n2 /dev/vdc /dev/vdd --assume-clean ssh node2-ip mdadm -A /dev/md0 /dev/vdc /dev/vdd mdadm --manage /dev/md0 --fail /dev/vdc --remove /dev/vdc check array status on both nodes with "mdadm -D /dev/md0". node1 output: Number Major Minor RaidDevice State - 0 0 0 removed 1 254 48 1 active sync /dev/vdd node2 output: Number Major Minor RaidDevice State - 0 0 0 removed 1 254 48 1 active sync /dev/vdd 0 254 32 - faulty /dev/vdc Fixes: a1fd37f97808 ("md: Don't wait for MD_RECOVERY_NEEDED for HOT_REMOVE_DISK ioctl") Signed-off-by: Heming Zhao Reviewed-by: Su Yue Link: https://lore.kernel.org/linux-raid/20250728042145.9989-1-heming.zhao@suse.com Signed-off-by: Yu Kuai --- drivers/md/md.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 2716d5c59517..8af97ef80ec5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9777,8 +9777,8 @@ void md_check_recovery(struct mddev *mddev) * remove disk. */ rdev_for_each_safe(rdev, tmp, mddev) { - if (test_and_clear_bit(ClusterRemove, &rdev->flags) && - rdev->raid_disk < 0) + if (rdev->raid_disk < 0 && + test_and_clear_bit(ClusterRemove, &rdev->flags)) md_kick_rdev_from_array(rdev); } } @@ -10084,8 +10084,11 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) /* Check for change of roles in the active devices */ rdev_for_each_safe(rdev2, tmp, mddev) { - if (test_bit(Faulty, &rdev2->flags)) + if (test_bit(Faulty, &rdev2->flags)) { + if (test_bit(ClusterRemove, &rdev2->flags)) + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); continue; + } /* Check if the roles changed */ role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]); From 907a99c314a5a695e35acff78ac61f4ec950a6d3 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Tue, 22 Jul 2025 11:33:40 +0800 Subject: [PATCH 1236/2411] md: rename recovery_cp to resync_offset 'recovery_cp' was used to represent the progress of sync, but its name contains recovery, which can cause confusion. Replaces 'recovery_cp' with 'resync_offset' for clarity. Signed-off-by: Li Nan Link: https://lore.kernel.org/linux-raid/20250722033340.1933388-1-linan666@huaweicloud.com Signed-off-by: Yu Kuai --- drivers/md/dm-raid.c | 42 ++++++++++++++-------------- drivers/md/md-bitmap.c | 8 +++--- drivers/md/md-cluster.c | 16 +++++------ drivers/md/md.c | 50 +++++++++++++++++----------------- drivers/md/md.h | 2 +- drivers/md/raid0.c | 6 ++-- drivers/md/raid1-10.c | 2 +- drivers/md/raid1.c | 10 +++---- drivers/md/raid10.c | 16 +++++------ drivers/md/raid5-ppl.c | 6 ++-- drivers/md/raid5.c | 30 ++++++++++---------- include/uapi/linux/raid/md_p.h | 2 +- 12 files changed, 95 insertions(+), 95 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index e8c0a8c6fb51..9835f2fe26e9 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -439,7 +439,7 @@ static bool rs_is_reshapable(struct raid_set *rs) /* Return true, if raid set in @rs is recovering */ static bool rs_is_recovering(struct raid_set *rs) { - return rs->md.recovery_cp < rs->md.dev_sectors; + return rs->md.resync_offset < rs->md.dev_sectors; } /* Return true, if raid set in @rs is reshaping */ @@ -769,7 +769,7 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r rs->md.layout = raid_type->algorithm; rs->md.new_layout = rs->md.layout; rs->md.delta_disks = 0; - rs->md.recovery_cp = MaxSector; + rs->md.resync_offset = MaxSector; for (i = 0; i < raid_devs; i++) md_rdev_init(&rs->dev[i].rdev); @@ -913,7 +913,7 @@ static int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as) rs->md.external = 0; rs->md.persistent = 1; rs->md.major_version = 2; - } else if (rebuild && !rs->md.recovery_cp) { + } else if (rebuild && !rs->md.resync_offset) { /* * Without metadata, we will not be able to tell if the array * is in-sync or not - we must assume it is not. Therefore, @@ -1696,20 +1696,20 @@ static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors) { /* raid0 does not recover */ if (rs_is_raid0(rs)) - rs->md.recovery_cp = MaxSector; + rs->md.resync_offset = MaxSector; /* * A raid6 set has to be recovered either * completely or for the grown part to * ensure proper parity and Q-Syndrome */ else if (rs_is_raid6(rs)) - rs->md.recovery_cp = dev_sectors; + rs->md.resync_offset = dev_sectors; /* * Other raid set types may skip recovery * depending on the 'nosync' flag. */ else - rs->md.recovery_cp = test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags) + rs->md.resync_offset = test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags) ? MaxSector : dev_sectors; } @@ -2144,7 +2144,7 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev) sb->events = cpu_to_le64(mddev->events); sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset); - sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp); + sb->array_resync_offset = cpu_to_le64(mddev->resync_offset); sb->level = cpu_to_le32(mddev->level); sb->layout = cpu_to_le32(mddev->layout); @@ -2335,18 +2335,18 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) } if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) - mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset); + mddev->resync_offset = le64_to_cpu(sb->array_resync_offset); /* * During load, we set FirstUse if a new superblock was written. * There are two reasons we might not have a superblock: * 1) The raid set is brand new - in which case, all of the * devices must have their In_sync bit set. Also, - * recovery_cp must be 0, unless forced. + * resync_offset must be 0, unless forced. * 2) This is a new device being added to an old raid set * and the new device needs to be rebuilt - in which * case the In_sync bit will /not/ be set and - * recovery_cp must be MaxSector. + * resync_offset must be MaxSector. * 3) This is/are a new device(s) being added to an old * raid set during takeover to a higher raid level * to provide capacity for redundancy or during reshape @@ -2391,8 +2391,8 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) new_devs > 1 ? "s" : ""); return -EINVAL; } else if (!test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags) && rs_is_recovering(rs)) { - DMERR("'rebuild' specified while raid set is not in-sync (recovery_cp=%llu)", - (unsigned long long) mddev->recovery_cp); + DMERR("'rebuild' specified while raid set is not in-sync (resync_offset=%llu)", + (unsigned long long) mddev->resync_offset); return -EINVAL; } else if (rs_is_reshaping(rs)) { DMERR("'rebuild' specified while raid set is being reshaped (reshape_position=%llu)", @@ -2697,11 +2697,11 @@ static int rs_adjust_data_offsets(struct raid_set *rs) } out: /* - * Raise recovery_cp in case data_offset != 0 to + * Raise resync_offset in case data_offset != 0 to * avoid false recovery positives in the constructor. */ - if (rs->md.recovery_cp < rs->md.dev_sectors) - rs->md.recovery_cp += rs->dev[0].rdev.data_offset; + if (rs->md.resync_offset < rs->md.dev_sectors) + rs->md.resync_offset += rs->dev[0].rdev.data_offset; /* Adjust data offsets on all rdevs but on any raid4/5/6 journal device */ rdev_for_each(rdev, &rs->md) { @@ -2756,7 +2756,7 @@ static int rs_setup_takeover(struct raid_set *rs) } clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags); - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; while (d--) { rdev = &rs->dev[d].rdev; @@ -2764,7 +2764,7 @@ static int rs_setup_takeover(struct raid_set *rs) if (test_bit(d, (void *) rs->rebuild_disks)) { clear_bit(In_sync, &rdev->flags); clear_bit(Faulty, &rdev->flags); - mddev->recovery_cp = rdev->recovery_offset = 0; + mddev->resync_offset = rdev->recovery_offset = 0; /* Bitmap has to be created when we do an "up" takeover */ set_bit(MD_ARRAY_FIRST_USE, &mddev->flags); } @@ -3222,7 +3222,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (r) goto bad; - rs_setup_recovery(rs, rs->md.recovery_cp < rs->md.dev_sectors ? rs->md.recovery_cp : rs->md.dev_sectors); + rs_setup_recovery(rs, rs->md.resync_offset < rs->md.dev_sectors ? rs->md.resync_offset : rs->md.dev_sectors); } else { /* This is no size change or it is shrinking, update size and record in superblocks */ r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false); @@ -3446,7 +3446,7 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, } else { if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery)) - r = mddev->recovery_cp; + r = mddev->resync_offset; else r = mddev->curr_resync_completed; @@ -4074,9 +4074,9 @@ static int raid_preresume(struct dm_target *ti) } /* Check for any resize/reshape on @rs and adjust/initiate */ - if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) { + if (mddev->resync_offset && mddev->resync_offset < MaxSector) { set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); - mddev->resync_min = mddev->recovery_cp; + mddev->resync_min = mddev->resync_offset; if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) mddev->resync_max_sectors = mddev->dev_sectors; } diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 7f524a26cebc..334b71404930 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -1987,12 +1987,12 @@ static void bitmap_dirty_bits(struct mddev *mddev, unsigned long s, md_bitmap_set_memory_bits(bitmap, sec, 1); md_bitmap_file_set_bit(bitmap, sec); - if (sec < bitmap->mddev->recovery_cp) + if (sec < bitmap->mddev->resync_offset) /* We are asserting that the array is dirty, - * so move the recovery_cp address back so + * so move the resync_offset address back so * that it is obvious that it is dirty */ - bitmap->mddev->recovery_cp = sec; + bitmap->mddev->resync_offset = sec; } } @@ -2258,7 +2258,7 @@ static int bitmap_load(struct mddev *mddev) || bitmap->events_cleared == mddev->events) /* no need to keep dirty bits to optimise a * re-add of a missing device */ - start = mddev->recovery_cp; + start = mddev->resync_offset; mutex_lock(&mddev->bitmap_info.mutex); err = md_bitmap_init_from_disk(bitmap, start); diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 94221d964d4f..5497eaee96e7 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -337,11 +337,11 @@ static void recover_bitmaps(struct md_thread *thread) md_wakeup_thread(mddev->sync_thread); if (hi > 0) { - if (lo < mddev->recovery_cp) - mddev->recovery_cp = lo; + if (lo < mddev->resync_offset) + mddev->resync_offset = lo; /* wake up thread to continue resync in case resync * is not finished */ - if (mddev->recovery_cp != MaxSector) { + if (mddev->resync_offset != MaxSector) { /* * clear the REMOTE flag since we will launch * resync thread in current node. @@ -863,9 +863,9 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots) lockres_free(bm_lockres); continue; } - if ((hi > 0) && (lo < mddev->recovery_cp)) { + if ((hi > 0) && (lo < mddev->resync_offset)) { set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - mddev->recovery_cp = lo; + mddev->resync_offset = lo; md_check_recovery(mddev); } @@ -1027,7 +1027,7 @@ static int leave(struct mddev *mddev) * Also, we should send BITMAP_NEEDS_SYNC message in * case reshaping is interrupted. */ - if ((cinfo->slot_number > 0 && mddev->recovery_cp != MaxSector) || + if ((cinfo->slot_number > 0 && mddev->resync_offset != MaxSector) || (mddev->reshape_position != MaxSector && test_bit(MD_CLOSING, &mddev->flags))) resync_bitmap(mddev); @@ -1605,8 +1605,8 @@ static int gather_bitmaps(struct md_rdev *rdev) pr_warn("md-cluster: Could not gather bitmaps from slot %d", sn); goto out; } - if ((hi > 0) && (lo < mddev->recovery_cp)) - mddev->recovery_cp = lo; + if ((hi > 0) && (lo < mddev->resync_offset)) + mddev->resync_offset = lo; } out: return err; diff --git a/drivers/md/md.c b/drivers/md/md.c index 8af97ef80ec5..9c7ed23c45ad 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1415,13 +1415,13 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, stru mddev->layout = -1; if (sb->state & (1<recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; else { if (sb->events_hi == sb->cp_events_hi && sb->events_lo == sb->cp_events_lo) { - mddev->recovery_cp = sb->recovery_cp; + mddev->resync_offset = sb->resync_offset; } else - mddev->recovery_cp = 0; + mddev->resync_offset = 0; } memcpy(mddev->uuid+0, &sb->set_uuid0, 4); @@ -1547,13 +1547,13 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev) mddev->minor_version = sb->minor_version; if (mddev->in_sync) { - sb->recovery_cp = mddev->recovery_cp; + sb->resync_offset = mddev->resync_offset; sb->cp_events_hi = (mddev->events>>32); sb->cp_events_lo = (u32)mddev->events; - if (mddev->recovery_cp == MaxSector) + if (mddev->resync_offset == MaxSector) sb->state = (1<< MD_SB_CLEAN); } else - sb->recovery_cp = 0; + sb->resync_offset = 0; sb->layout = mddev->layout; sb->chunk_size = mddev->chunk_sectors << 9; @@ -1901,7 +1901,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struc mddev->bitmap_info.default_space = (4096-1024) >> 9; mddev->reshape_backwards = 0; - mddev->recovery_cp = le64_to_cpu(sb->resync_offset); + mddev->resync_offset = le64_to_cpu(sb->resync_offset); memcpy(mddev->uuid, sb->set_uuid, 16); mddev->max_disks = (4096-256)/2; @@ -2087,7 +2087,7 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) sb->utime = cpu_to_le64((__u64)mddev->utime); sb->events = cpu_to_le64(mddev->events); if (mddev->in_sync) - sb->resync_offset = cpu_to_le64(mddev->recovery_cp); + sb->resync_offset = cpu_to_le64(mddev->resync_offset); else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags)) sb->resync_offset = cpu_to_le64(MaxSector); else @@ -2767,7 +2767,7 @@ void md_update_sb(struct mddev *mddev, int force_change) /* If this is just a dirty<->clean transition, and the array is clean * and 'events' is odd, we can roll back to the previous clean state */ if (nospares - && (mddev->in_sync && mddev->recovery_cp == MaxSector) + && (mddev->in_sync && mddev->resync_offset == MaxSector) && mddev->can_decrease_events && mddev->events != 1) { mddev->events--; @@ -4303,9 +4303,9 @@ __ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store); static ssize_t resync_start_show(struct mddev *mddev, char *page) { - if (mddev->recovery_cp == MaxSector) + if (mddev->resync_offset == MaxSector) return sprintf(page, "none\n"); - return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp); + return sprintf(page, "%llu\n", (unsigned long long)mddev->resync_offset); } static ssize_t @@ -4331,7 +4331,7 @@ resync_start_store(struct mddev *mddev, const char *buf, size_t len) err = -EBUSY; if (!err) { - mddev->recovery_cp = n; + mddev->resync_offset = n; if (mddev->pers) set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); } @@ -6423,7 +6423,7 @@ static void md_clean(struct mddev *mddev) mddev->external_size = 0; mddev->dev_sectors = 0; mddev->raid_disks = 0; - mddev->recovery_cp = 0; + mddev->resync_offset = 0; mddev->resync_min = 0; mddev->resync_max = MaxSector; mddev->reshape_position = MaxSector; @@ -7368,9 +7368,9 @@ int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info) * openned */ if (info->state & (1<recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; else - mddev->recovery_cp = 0; + mddev->resync_offset = 0; mddev->persistent = ! info->not_persistent; mddev->external = 0; @@ -8309,7 +8309,7 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev) seq_printf(seq, "\tresync=REMOTE"); return 1; } - if (mddev->recovery_cp < MaxSector) { + if (mddev->resync_offset < MaxSector) { seq_printf(seq, "\tresync=PENDING"); return 1; } @@ -8952,7 +8952,7 @@ static sector_t md_sync_position(struct mddev *mddev, enum sync_action action) return mddev->resync_min; case ACTION_RESYNC: if (!mddev->bitmap) - return mddev->recovery_cp; + return mddev->resync_offset; return 0; case ACTION_RESHAPE: /* @@ -9190,8 +9190,8 @@ void md_do_sync(struct md_thread *thread) atomic_read(&mddev->recovery_active) == 0); mddev->curr_resync_completed = j; if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && - j > mddev->recovery_cp) - mddev->recovery_cp = j; + j > mddev->resync_offset) + mddev->resync_offset = j; update_time = jiffies; set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); sysfs_notify_dirent_safe(mddev->sysfs_completed); @@ -9311,19 +9311,19 @@ void md_do_sync(struct md_thread *thread) mddev->curr_resync > MD_RESYNC_ACTIVE) { if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { - if (mddev->curr_resync >= mddev->recovery_cp) { + if (mddev->curr_resync >= mddev->resync_offset) { pr_debug("md: checkpointing %s of %s.\n", desc, mdname(mddev)); if (test_bit(MD_RECOVERY_ERROR, &mddev->recovery)) - mddev->recovery_cp = + mddev->resync_offset = mddev->curr_resync_completed; else - mddev->recovery_cp = + mddev->resync_offset = mddev->curr_resync; } } else - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; } else { if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) mddev->curr_resync = MaxSector; @@ -9539,7 +9539,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares) } /* Check if resync is in progress. */ - if (mddev->recovery_cp < MaxSector) { + if (mddev->resync_offset < MaxSector) { remove_spares(mddev, NULL); set_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); @@ -9720,7 +9720,7 @@ void md_check_recovery(struct mddev *mddev) test_bit(MD_RECOVERY_DONE, &mddev->recovery) || (mddev->external == 0 && mddev->safemode == 1) || (mddev->safemode == 2 - && !mddev->in_sync && mddev->recovery_cp == MaxSector) + && !mddev->in_sync && mddev->resync_offset == MaxSector) )) return; diff --git a/drivers/md/md.h b/drivers/md/md.h index 67b365621507..51af29a03079 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -523,7 +523,7 @@ struct mddev { unsigned long normal_io_events; /* IO event timestamp */ atomic_t recovery_active; /* blocks scheduled, but not written */ wait_queue_head_t recovery_wait; - sector_t recovery_cp; + sector_t resync_offset; sector_t resync_min; /* user requested sync * starts here */ sector_t resync_max; /* resync should pause diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index cbe2a9054cb9..f1d8811a542a 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -674,7 +674,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev) mddev->raid_disks--; mddev->delta_disks = -1; /* make sure it will be not marked as dirty */ - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS); create_strip_zones(mddev, &priv_conf); @@ -717,7 +717,7 @@ static void *raid0_takeover_raid10(struct mddev *mddev) mddev->raid_disks += mddev->delta_disks; mddev->degraded = 0; /* make sure it will be not marked as dirty */ - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS); create_strip_zones(mddev, &priv_conf); @@ -760,7 +760,7 @@ static void *raid0_takeover_raid1(struct mddev *mddev) mddev->delta_disks = 1 - mddev->raid_disks; mddev->raid_disks = 1; /* make sure it will be not marked as dirty */ - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS); create_strip_zones(mddev, &priv_conf); diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c index b8b3a9069701..52881e6032da 100644 --- a/drivers/md/raid1-10.c +++ b/drivers/md/raid1-10.c @@ -283,7 +283,7 @@ static inline int raid1_check_read_range(struct md_rdev *rdev, static inline bool raid1_should_read_first(struct mddev *mddev, sector_t this_sector, int len) { - if ((mddev->recovery_cp < this_sector + len)) + if ((mddev->resync_offset < this_sector + len)) return true; if (mddev_is_clustered(mddev) && diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 64b8176907a9..6cee738a645f 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2822,7 +2822,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, } if (mddev->bitmap == NULL && - mddev->recovery_cp == MaxSector && + mddev->resync_offset == MaxSector && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && conf->fullsync == 0) { *skipped = 1; @@ -3282,9 +3282,9 @@ static int raid1_run(struct mddev *mddev) } if (conf->raid_disks - mddev->degraded == 1) - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; - if (mddev->recovery_cp != MaxSector) + if (mddev->resync_offset != MaxSector) pr_info("md/raid1:%s: not clean -- starting background reconstruction\n", mdname(mddev)); pr_info("md/raid1:%s: active with %d out of %d mirrors\n", @@ -3345,8 +3345,8 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors) md_set_array_sectors(mddev, newsize); if (sectors > mddev->dev_sectors && - mddev->recovery_cp > mddev->dev_sectors) { - mddev->recovery_cp = mddev->dev_sectors; + mddev->resync_offset > mddev->dev_sectors) { + mddev->resync_offset = mddev->dev_sectors; set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } mddev->dev_sectors = sectors; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 95dc354a86a0..b60c30bfb6c7 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2117,7 +2117,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) int last = conf->geo.raid_disks - 1; struct raid10_info *p; - if (mddev->recovery_cp < MaxSector) + if (mddev->resync_offset < MaxSector) /* only hot-add to in-sync arrays, as recovery is * very different from resync */ @@ -3185,7 +3185,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, * of a clean array, like RAID1 does. */ if (mddev->bitmap == NULL && - mddev->recovery_cp == MaxSector && + mddev->resync_offset == MaxSector && mddev->reshape_position == MaxSector && !test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && @@ -4145,7 +4145,7 @@ static int raid10_run(struct mddev *mddev) disk->recovery_disabled = mddev->recovery_disabled - 1; } - if (mddev->recovery_cp != MaxSector) + if (mddev->resync_offset != MaxSector) pr_notice("md/raid10:%s: not clean -- starting background reconstruction\n", mdname(mddev)); pr_info("md/raid10:%s: active with %d out of %d devices\n", @@ -4245,8 +4245,8 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors) md_set_array_sectors(mddev, size); if (sectors > mddev->dev_sectors && - mddev->recovery_cp > oldsize) { - mddev->recovery_cp = oldsize; + mddev->resync_offset > oldsize) { + mddev->resync_offset = oldsize; set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } calc_sectors(conf, sectors); @@ -4275,7 +4275,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs) mddev->delta_disks = mddev->raid_disks; mddev->raid_disks *= 2; /* make sure it will be not marked as dirty */ - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; mddev->dev_sectors = size; conf = setup_conf(mddev); @@ -5087,8 +5087,8 @@ static void raid10_finish_reshape(struct mddev *mddev) return; if (mddev->delta_disks > 0) { - if (mddev->recovery_cp > mddev->resync_max_sectors) { - mddev->recovery_cp = mddev->resync_max_sectors; + if (mddev->resync_offset > mddev->resync_max_sectors) { + mddev->resync_offset = mddev->resync_max_sectors; set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } mddev->resync_max_sectors = mddev->array_sectors; diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index c0fb335311aa..56b234683ee6 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -1163,7 +1163,7 @@ static int ppl_load_distributed(struct ppl_log *log) le64_to_cpu(pplhdr->generation)); /* attempt to recover from log if we are starting a dirty array */ - if (pplhdr && !mddev->pers && mddev->recovery_cp != MaxSector) + if (pplhdr && !mddev->pers && mddev->resync_offset != MaxSector) ret = ppl_recover(log, pplhdr, pplhdr_offset); /* write empty header if we are starting the array */ @@ -1422,14 +1422,14 @@ int ppl_init_log(struct r5conf *conf) if (ret) { goto err; - } else if (!mddev->pers && mddev->recovery_cp == 0 && + } else if (!mddev->pers && mddev->resync_offset == 0 && ppl_conf->recovered_entries > 0 && ppl_conf->mismatch_count == 0) { /* * If we are starting a dirty array and the recovery succeeds * without any issues, set the array as clean. */ - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); } else if (mddev->pers && ppl_conf->mismatch_count > 0) { /* no mismatch allowed when enabling PPL for a running array */ diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7ec61ee7b218..023649fe2476 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3740,7 +3740,7 @@ static int want_replace(struct stripe_head *sh, int disk_idx) && !test_bit(Faulty, &rdev->flags) && !test_bit(In_sync, &rdev->flags) && (rdev->recovery_offset <= sh->sector - || rdev->mddev->recovery_cp <= sh->sector)) + || rdev->mddev->resync_offset <= sh->sector)) rv = 1; return rv; } @@ -3832,7 +3832,7 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s, * is missing/faulty, then we need to read everything we can. */ if (!force_rcw && - sh->sector < sh->raid_conf->mddev->recovery_cp) + sh->sector < sh->raid_conf->mddev->resync_offset) /* reconstruct-write isn't being forced */ return 0; for (i = 0; i < s->failed && i < 2; i++) { @@ -4097,7 +4097,7 @@ static int handle_stripe_dirtying(struct r5conf *conf, int disks) { int rmw = 0, rcw = 0, i; - sector_t recovery_cp = conf->mddev->recovery_cp; + sector_t resync_offset = conf->mddev->resync_offset; /* Check whether resync is now happening or should start. * If yes, then the array is dirty (after unclean shutdown or @@ -4107,14 +4107,14 @@ static int handle_stripe_dirtying(struct r5conf *conf, * generate correct data from the parity. */ if (conf->rmw_level == PARITY_DISABLE_RMW || - (recovery_cp < MaxSector && sh->sector >= recovery_cp && + (resync_offset < MaxSector && sh->sector >= resync_offset && s->failed == 0)) { /* Calculate the real rcw later - for now make it * look like rcw is cheaper */ rcw = 1; rmw = 2; - pr_debug("force RCW rmw_level=%u, recovery_cp=%llu sh->sector=%llu\n", - conf->rmw_level, (unsigned long long)recovery_cp, + pr_debug("force RCW rmw_level=%u, resync_offset=%llu sh->sector=%llu\n", + conf->rmw_level, (unsigned long long)resync_offset, (unsigned long long)sh->sector); } else for (i = disks; i--; ) { /* would I have to read this buffer for read_modify_write */ @@ -4770,14 +4770,14 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) if (test_bit(STRIPE_SYNCING, &sh->state)) { /* If there is a failed device being replaced, * we must be recovering. - * else if we are after recovery_cp, we must be syncing + * else if we are after resync_offset, we must be syncing * else if MD_RECOVERY_REQUESTED is set, we also are syncing. * else we can only be replacing * sync and recovery both need to read all devices, and so * use the same flag. */ if (do_recovery || - sh->sector >= conf->mddev->recovery_cp || + sh->sector >= conf->mddev->resync_offset || test_bit(MD_RECOVERY_REQUESTED, &(conf->mddev->recovery))) s->syncing = 1; else @@ -7780,7 +7780,7 @@ static int raid5_run(struct mddev *mddev) int first = 1; int ret = -EIO; - if (mddev->recovery_cp != MaxSector) + if (mddev->resync_offset != MaxSector) pr_notice("md/raid:%s: not clean -- starting background reconstruction\n", mdname(mddev)); @@ -7921,7 +7921,7 @@ static int raid5_run(struct mddev *mddev) mdname(mddev)); mddev->ro = 1; set_disk_ro(mddev->gendisk, 1); - } else if (mddev->recovery_cp == MaxSector) + } else if (mddev->resync_offset == MaxSector) set_bit(MD_JOURNAL_CLEAN, &mddev->flags); } @@ -7988,7 +7988,7 @@ static int raid5_run(struct mddev *mddev) mddev->resync_max_sectors = mddev->dev_sectors; if (mddev->degraded > dirty_parity_disks && - mddev->recovery_cp != MaxSector) { + mddev->resync_offset != MaxSector) { if (test_bit(MD_HAS_PPL, &mddev->flags)) pr_crit("md/raid:%s: starting dirty degraded array with PPL.\n", mdname(mddev)); @@ -8328,8 +8328,8 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors) md_set_array_sectors(mddev, newsize); if (sectors > mddev->dev_sectors && - mddev->recovery_cp > mddev->dev_sectors) { - mddev->recovery_cp = mddev->dev_sectors; + mddev->resync_offset > mddev->dev_sectors) { + mddev->resync_offset = mddev->dev_sectors; set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } mddev->dev_sectors = sectors; @@ -8423,7 +8423,7 @@ static int raid5_start_reshape(struct mddev *mddev) return -EINVAL; /* raid5 can't handle concurrent reshape and recovery */ - if (mddev->recovery_cp < MaxSector) + if (mddev->resync_offset < MaxSector) return -EBUSY; for (i = 0; i < conf->raid_disks; i++) if (conf->disks[i].replacement) @@ -8648,7 +8648,7 @@ static void *raid45_takeover_raid0(struct mddev *mddev, int level) mddev->raid_disks += 1; mddev->delta_disks = 1; /* make sure it will be not marked as dirty */ - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; return setup_conf(mddev); } diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index ff47b6f0ba0f..b13946287277 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -173,7 +173,7 @@ typedef struct mdp_superblock_s { #else #error unspecified endianness #endif - __u32 recovery_cp; /* 11 recovery checkpoint sector count */ + __u32 resync_offset; /* 11 resync checkpoint sector count */ /* There are only valid for minor_version > 90 */ __u64 reshape_position; /* 12,13 next address in array-space for reshape */ __u32 new_level; /* 14 new level we are reshaping to */ From f62408efc8669b82541295a4611494c8c8c52684 Mon Sep 17 00:00:00 2001 From: Lukasz Laguna Date: Tue, 29 Jul 2025 14:34:37 +0200 Subject: [PATCH 1237/2411] drm/xe/vf: Disable CSC support on VF CSC is not accessible by VF drivers, so disable its support flag on VF to prevent further initialization attempts. Fixes: e02cea83d32d ("drm/xe/gsc: add Battlemage support") Signed-off-by: Lukasz Laguna Cc: Alexander Usyskin Cc: Michal Wajdeczko Reviewed-by: Michal Wajdeczko Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20250729123437.5933-1-lukasz.laguna@intel.com (cherry picked from commit 552dbba1caaf0cb40ce961806d757615e26ec668) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 6dc84e4ed281..5bd2f7d7b4ea 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -681,6 +681,7 @@ static void sriov_update_device_info(struct xe_device *xe) /* disable features that are not available/applicable to VFs */ if (IS_SRIOV_VF(xe)) { xe->info.probe_display = 0; + xe->info.has_heci_cscfi = 0; xe->info.has_heci_gscfi = 0; xe->info.skip_guc_pc = 1; xe->info.skip_pcode = 1; From 022245067f07ab913d27054ee9e1fab45256acd5 Mon Sep 17 00:00:00 2001 From: Jan Polensky Date: Fri, 25 Jul 2025 19:08:01 +0200 Subject: [PATCH 1238/2411] perf test: Ensure lock contention using pipe mode The 'kernel lock contention analysis test' requires reliable triggering of lock contention. On some systems, previous benchmark calls failed to generate sufficient contention due to low system activity or resource limits. This patch adds the -p (pipe) option to all calls of perf bench sched messaging, ensuring consistent lock contention without relying on socket-based communication. Suggested-by: Thomas Richter Signed-off-by: Jan Polensky Link: https://lore.kernel.org/r/20250725170801.3176678-1-japo@linux.ibm.com Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/lock_contention.sh | 26 +++++++++++------------ 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh index dde5bc737eb2..d33d9e4392b0 100755 --- a/tools/perf/tests/shell/lock_contention.sh +++ b/tools/perf/tests/shell/lock_contention.sh @@ -44,7 +44,7 @@ check() { test_record() { echo "Testing perf lock record and perf lock contention" - perf lock record -o ${perfdata} -- perf bench sched messaging > /dev/null 2>&1 + perf lock record -o ${perfdata} -- perf bench sched messaging -p > /dev/null 2>&1 # the output goes to the stderr and we expect only 1 output (-E 1) perf lock contention -i ${perfdata} -E 1 -q 2> ${result} if [ "$(cat "${result}" | wc -l)" != "1" ]; then @@ -64,7 +64,7 @@ test_bpf() fi # the perf lock contention output goes to the stderr - perf lock con -a -b -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result} + perf lock con -a -b -E 1 -q -- perf bench sched messaging -p > /dev/null 2> ${result} if [ "$(cat "${result}" | wc -l)" != "1" ]; then echo "[Fail] BPF result count is not 1:" "$(cat "${result}" | wc -l)" err=1 @@ -75,7 +75,7 @@ test_bpf() test_record_concurrent() { echo "Testing perf lock record and perf lock contention at the same time" - perf lock record -o- -- perf bench sched messaging 2> /dev/null | \ + perf lock record -o- -- perf bench sched messaging -p 2> /dev/null | \ perf lock contention -i- -E 1 -q 2> ${result} if [ "$(cat "${result}" | wc -l)" != "1" ]; then echo "[Fail] Recorded result count is not 1:" "$(cat "${result}" | wc -l)" @@ -99,7 +99,7 @@ test_aggr_task() fi # the perf lock contention output goes to the stderr - perf lock con -a -b -t -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result} + perf lock con -a -b -t -E 1 -q -- perf bench sched messaging -p > /dev/null 2> ${result} if [ "$(cat "${result}" | wc -l)" != "1" ]; then echo "[Fail] BPF result count is not 1:" "$(cat "${result}" | wc -l)" err=1 @@ -122,7 +122,7 @@ test_aggr_addr() fi # the perf lock contention output goes to the stderr - perf lock con -a -b -l -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result} + perf lock con -a -b -l -E 1 -q -- perf bench sched messaging -p > /dev/null 2> ${result} if [ "$(cat "${result}" | wc -l)" != "1" ]; then echo "[Fail] BPF result count is not 1:" "$(cat "${result}" | wc -l)" err=1 @@ -140,7 +140,7 @@ test_aggr_cgroup() fi # the perf lock contention output goes to the stderr - perf lock con -a -b -g -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result} + perf lock con -a -b -g -E 1 -q -- perf bench sched messaging -p > /dev/null 2> ${result} if [ "$(cat "${result}" | wc -l)" != "1" ]; then echo "[Fail] BPF result count is not 1:" "$(cat "${result}" | wc -l)" err=1 @@ -162,7 +162,7 @@ test_type_filter() return fi - perf lock con -a -b -Y spinlock -q -- perf bench sched messaging > /dev/null 2> ${result} + perf lock con -a -b -Y spinlock -q -- perf bench sched messaging -p > /dev/null 2> ${result} if [ "$(grep -c -v spinlock "${result}")" != "0" ]; then echo "[Fail] BPF result should not have non-spinlocks:" "$(cat "${result}")" err=1 @@ -194,7 +194,7 @@ test_lock_filter() return fi - perf lock con -a -b -L tasklist_lock -q -- perf bench sched messaging > /dev/null 2> ${result} + perf lock con -a -b -L tasklist_lock -q -- perf bench sched messaging -p > /dev/null 2> ${result} if [ "$(grep -c -v "${test_lock_filter_type}" "${result}")" != "0" ]; then echo "[Fail] BPF result should not have non-${test_lock_filter_type} locks:" "$(cat "${result}")" err=1 @@ -222,7 +222,7 @@ test_stack_filter() return fi - perf lock con -a -b -S unix_stream -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result} + perf lock con -a -b -S unix_stream -E 1 -q -- perf bench sched messaging -p > /dev/null 2> ${result} if [ "$(cat "${result}" | wc -l)" != "1" ]; then echo "[Fail] BPF result should have a lock from unix_stream:" "$(cat "${result}")" err=1 @@ -250,7 +250,7 @@ test_aggr_task_stack_filter() return fi - perf lock con -a -b -t -S unix_stream -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result} + perf lock con -a -b -t -S unix_stream -E 1 -q -- perf bench sched messaging -p > /dev/null 2> ${result} if [ "$(cat "${result}" | wc -l)" != "1" ]; then echo "[Fail] BPF result should have a task from unix_stream:" "$(cat "${result}")" err=1 @@ -266,7 +266,7 @@ test_cgroup_filter() return fi - perf lock con -a -b -g -E 1 -F wait_total -q -- perf bench sched messaging > /dev/null 2> ${result} + perf lock con -a -b -g -E 1 -F wait_total -q -- perf bench sched messaging -p > /dev/null 2> ${result} if [ "$(cat "${result}" | wc -l)" != "1" ]; then echo "[Fail] BPF result should have a cgroup result:" "$(cat "${result}")" err=1 @@ -274,7 +274,7 @@ test_cgroup_filter() fi cgroup=$(cat "${result}" | awk '{ print $3 }') - perf lock con -a -b -g -E 1 -G "${cgroup}" -q -- perf bench sched messaging > /dev/null 2> ${result} + perf lock con -a -b -g -E 1 -G "${cgroup}" -q -- perf bench sched messaging -p > /dev/null 2> ${result} if [ "$(cat "${result}" | wc -l)" != "1" ]; then echo "[Fail] BPF result should have a result with cgroup filter:" "$(cat "${cgroup}")" err=1 @@ -309,7 +309,7 @@ test_csv_output() fi # the perf lock contention output goes to the stderr - perf lock con -a -b -E 1 -x , --output ${result} -- perf bench sched messaging > /dev/null 2>&1 + perf lock con -a -b -E 1 -x , --output ${result} -- perf bench sched messaging -p > /dev/null 2>&1 output=$(grep -v "^#" ${result} | tr -d -c , | wc -c) if [ "${header}" != "${output}" ]; then echo "[Fail] BPF result does not match the number of commas: ${header} != ${output}" From 79aef1a3705bbc95b36dad892af1f313490bd65c Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Wed, 30 Jul 2025 08:31:13 +0700 Subject: [PATCH 1239/2411] of: Clarify OF device context in of_match_device() comment Open Firmware abbreviation (OF) in of_match_device() comment is written in lowercase instead, which is mistaken for prepositional word "of" ([1], [2], [3], [4]) duplicate. Clarify the context. Link: https://lore.kernel.org/all/CAL_JsqLypcBCOVZ8yYWK0J_xc2Vcr+ANrX_3v4vN55Srp4RknQ@mail.gmail.com/ [1] Link: https://lore.kernel.org/all/20220926185852.GA2581083-robh@kernel.org/ [2] Link: https://lore.kernel.org/all/CAL_JsqL4GvgFYzGUfhW5pvm4wYGrFaj6gHOYZjnOMuk2zCz67w@mail.gmail.com/ [3] Link: https://lore.kernel.org/all/20220627173825.GA2637590-robh@kernel.org/ [4] Signed-off-by: Bagas Sanjaya Reviewed-by: Randy Dunlap Link: https://lore.kernel.org/r/20250730013113.11264-1-bagasdotme@gmail.com Signed-off-by: Rob Herring (Arm) --- drivers/of/device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/of/device.c b/drivers/of/device.c index c80426510ec2..f7e75e527667 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -17,8 +17,8 @@ /** * of_match_device - Tell if a struct device matches an of_device_id list - * @matches: array of of device match structures to search in - * @dev: the of device structure to match against + * @matches: array of of_device_id match structures to search in + * @dev: the OF device structure to match against * * Used by a driver to check whether an platform_device present in the * system is in its list of supported devices. From 733b439375b494e8a6950ab47d18a4b615b73cb3 Mon Sep 17 00:00:00 2001 From: Jorge Marques Date: Tue, 24 Jun 2025 11:06:04 +0200 Subject: [PATCH 1240/2411] i3c: master: Add inline i3c_readl_fifo() and i3c_writel_fifo() The I3C abstraction expects u8 buffers, but some controllers operate with a 32-bit bus width FIFO and cannot flag valid bytes individually. To avoid reading or writing outside the buffer bounds, use 32-bit accesses where possible and apply memcpy for any remaining bytes Signed-off-by: Jorge Marques Suggested-by: Wolfram Sang Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250624-i3c-writesl-readsl-v3-1-63ccf0870f01@analog.com Signed-off-by: Alexandre Belloni --- drivers/i3c/internals.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/i3c/internals.h b/drivers/i3c/internals.h index 433f6088b7ce..6a11437fee47 100644 --- a/drivers/i3c/internals.h +++ b/drivers/i3c/internals.h @@ -22,4 +22,41 @@ int i3c_dev_enable_ibi_locked(struct i3c_dev_desc *dev); int i3c_dev_request_ibi_locked(struct i3c_dev_desc *dev, const struct i3c_ibi_setup *req); void i3c_dev_free_ibi_locked(struct i3c_dev_desc *dev); + +/** + * i3c_writel_fifo - Write data buffer to 32bit FIFO + * @addr: FIFO Address to write to + * @buf: Pointer to the data bytes to write + * @nbytes: Number of bytes to write + */ +static inline void i3c_writel_fifo(void __iomem *addr, const void *buf, + int nbytes) +{ + writesl(addr, buf, nbytes / 4); + if (nbytes & 3) { + u32 tmp = 0; + + memcpy(&tmp, buf + (nbytes & ~3), nbytes & 3); + writel(tmp, addr); + } +} + +/** + * i3c_readl_fifo - Read data buffer from 32bit FIFO + * @addr: FIFO Address to read from + * @buf: Pointer to the buffer to store read bytes + * @nbytes: Number of bytes to read + */ +static inline void i3c_readl_fifo(const void __iomem *addr, void *buf, + int nbytes) +{ + readsl(addr, buf, nbytes / 4); + if (nbytes & 3) { + u32 tmp; + + tmp = readl(addr); + memcpy(buf + (nbytes & ~3), &tmp, nbytes & 3); + } +} + #endif /* I3C_INTERNAL_H */ From c20d3fa7049144f519b21616e6020e6939822145 Mon Sep 17 00:00:00 2001 From: Jorge Marques Date: Tue, 24 Jun 2025 11:06:05 +0200 Subject: [PATCH 1241/2411] i3c: master: cdns: Use i3c_writel_fifo() and i3c_readl_fifo() Use common inline i3c_writel_fifo()/i3c_readl_fifo() methods to simplify code since the FIFO of controller is a 32bit width. Signed-off-by: Jorge Marques Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250624-i3c-writesl-readsl-v3-2-63ccf0870f01@analog.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/i3c-master-cdns.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c index 449e85d7ba87..63135e75868d 100644 --- a/drivers/i3c/master/i3c-master-cdns.c +++ b/drivers/i3c/master/i3c-master-cdns.c @@ -23,6 +23,8 @@ #include #include +#include "../internals.h" + #define DEV_ID 0x0 #define DEV_ID_I3C_MASTER 0x5034 @@ -427,25 +429,13 @@ to_cdns_i3c_master(struct i3c_master_controller *master) static void cdns_i3c_master_wr_to_tx_fifo(struct cdns_i3c_master *master, const u8 *bytes, int nbytes) { - writesl(master->regs + TX_FIFO, bytes, nbytes / 4); - if (nbytes & 3) { - u32 tmp = 0; - - memcpy(&tmp, bytes + (nbytes & ~3), nbytes & 3); - writesl(master->regs + TX_FIFO, &tmp, 1); - } + i3c_writel_fifo(master->regs + TX_FIFO, bytes, nbytes); } static void cdns_i3c_master_rd_from_rx_fifo(struct cdns_i3c_master *master, u8 *bytes, int nbytes) { - readsl(master->regs + RX_FIFO, bytes, nbytes / 4); - if (nbytes & 3) { - u32 tmp; - - readsl(master->regs + RX_FIFO, &tmp, 1); - memcpy(bytes + (nbytes & ~3), &tmp, nbytes & 3); - } + i3c_readl_fifo(master->regs + RX_FIFO, bytes, nbytes); } static bool cdns_i3c_master_supports_ccc_cmd(struct i3c_master_controller *m, @@ -1330,12 +1320,7 @@ static void cdns_i3c_master_handle_ibi(struct cdns_i3c_master *master, buf = slot->data; nbytes = IBIR_XFER_BYTES(ibir); - readsl(master->regs + IBI_DATA_FIFO, buf, nbytes / 4); - if (nbytes % 3) { - u32 tmp = __raw_readl(master->regs + IBI_DATA_FIFO); - - memcpy(buf + (nbytes & ~3), &tmp, nbytes & 3); - } + i3c_readl_fifo(master->regs + IBI_DATA_FIFO, buf, nbytes); slot->len = min_t(unsigned int, IBIR_XFER_BYTES(ibir), dev->ibi->max_payload_len); From 6e055b1fb2fc72ad937fc75ac109fe904ce56003 Mon Sep 17 00:00:00 2001 From: Jorge Marques Date: Tue, 24 Jun 2025 11:06:06 +0200 Subject: [PATCH 1242/2411] i3c: master: dw: Use i3c_writel_fifo() and i3c_readl_fifo() Use common inline i3c_writel_fifo()/i3c_readl_fifo() methods to simplify code since the FIFO of controller is a 32bit width. Signed-off-by: Jorge Marques Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250624-i3c-writesl-readsl-v3-3-63ccf0870f01@analog.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/dw-i3c-master.c | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index ae1992665673..cc872b481691 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -23,6 +23,7 @@ #include #include +#include "../internals.h" #include "dw-i3c-master.h" #define DEVICE_CTRL 0x0 @@ -336,37 +337,19 @@ static int dw_i3c_master_get_free_pos(struct dw_i3c_master *master) static void dw_i3c_master_wr_tx_fifo(struct dw_i3c_master *master, const u8 *bytes, int nbytes) { - writesl(master->regs + RX_TX_DATA_PORT, bytes, nbytes / 4); - if (nbytes & 3) { - u32 tmp = 0; - - memcpy(&tmp, bytes + (nbytes & ~3), nbytes & 3); - writesl(master->regs + RX_TX_DATA_PORT, &tmp, 1); - } -} - -static void dw_i3c_master_read_fifo(struct dw_i3c_master *master, - int reg, u8 *bytes, int nbytes) -{ - readsl(master->regs + reg, bytes, nbytes / 4); - if (nbytes & 3) { - u32 tmp; - - readsl(master->regs + reg, &tmp, 1); - memcpy(bytes + (nbytes & ~3), &tmp, nbytes & 3); - } + i3c_writel_fifo(master->regs + RX_TX_DATA_PORT, bytes, nbytes); } static void dw_i3c_master_read_rx_fifo(struct dw_i3c_master *master, u8 *bytes, int nbytes) { - return dw_i3c_master_read_fifo(master, RX_TX_DATA_PORT, bytes, nbytes); + i3c_readl_fifo(master->regs + RX_TX_DATA_PORT, bytes, nbytes); } static void dw_i3c_master_read_ibi_fifo(struct dw_i3c_master *master, u8 *bytes, int nbytes) { - return dw_i3c_master_read_fifo(master, IBI_QUEUE_STATUS, bytes, nbytes); + i3c_readl_fifo(master->regs + IBI_QUEUE_STATUS, bytes, nbytes); } static struct dw_i3c_xfer * From ba12d5f11d52510e804480c14da850f8c3561b69 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Wed, 2 Jul 2025 11:04:24 +0700 Subject: [PATCH 1243/2411] i3c: Fix i3c_device_do_priv_xfers() kernel-doc indentation Sphinx reports indentation warning on i3c_device_do_priv_xfers() return value list: Documentation/driver-api/i3c/device-driver-api:9: ./drivers/i3c/device.c:31: ERROR: Unexpected indentation. [docutils] Format the list as bullet list to fix the warning. Signed-off-by: Bagas Sanjaya Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250702040424.18577-1-bagasdotme@gmail.com Signed-off-by: Alexandre Belloni --- drivers/i3c/device.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/i3c/device.c b/drivers/i3c/device.c index e80e48756914..2396545763ff 100644 --- a/drivers/i3c/device.c +++ b/drivers/i3c/device.c @@ -26,11 +26,12 @@ * * This function can sleep and thus cannot be called in atomic context. * - * Return: 0 in case of success, a negative error core otherwise. - * -EAGAIN: controller lost address arbitration. Target - * (IBI, HJ or controller role request) win the bus. Client - * driver needs to resend the 'xfers' some time later. - * See I3C spec ver 1.1.1 09-Jun-2021. Section: 5.1.2.2.3. + * Return: + * * 0 in case of success, a negative error core otherwise. + * * -EAGAIN: controller lost address arbitration. Target (IBI, HJ or + * controller role request) win the bus. Client driver needs to resend the + * 'xfers' some time later. See I3C spec ver 1.1.1 09-Jun-2021. Section: + * 5.1.2.2.3. */ int i3c_device_do_priv_xfers(struct i3c_device *dev, struct i3c_priv_xfer *xfers, From da9b54708ddf0e76974365854cbec7fd9f1d4709 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 13 Jul 2025 17:24:12 +0200 Subject: [PATCH 1244/2411] i3c: master: cdns: Simplify handling clocks in probe() The two clocks, driver is getting, are not being disabled/re-enabled during runtime of the device. Eliminate one variable in state struct, all error paths and a lot of code from probe() and remove() by using devm_clk_get_enabled(). Signed-off-by: Krzysztof Kozlowski Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250713152411.74917-2-krzysztof.kozlowski@linaro.org Signed-off-by: Alexandre Belloni --- drivers/i3c/master/i3c-master-cdns.c | 51 +++++++--------------------- 1 file changed, 12 insertions(+), 39 deletions(-) diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c index 63135e75868d..97b151564d3d 100644 --- a/drivers/i3c/master/i3c-master-cdns.c +++ b/drivers/i3c/master/i3c-master-cdns.c @@ -414,7 +414,6 @@ struct cdns_i3c_master { } xferqueue; void __iomem *regs; struct clk *sysclk; - struct clk *pclk; struct cdns_i3c_master_caps caps; unsigned long i3c_scl_lim; const struct cdns_i3c_data *devdata; @@ -1551,6 +1550,7 @@ MODULE_DEVICE_TABLE(of, cdns_i3c_master_of_ids); static int cdns_i3c_master_probe(struct platform_device *pdev) { struct cdns_i3c_master *master; + struct clk *pclk; int ret, irq; u32 val; @@ -1566,11 +1566,11 @@ static int cdns_i3c_master_probe(struct platform_device *pdev) if (IS_ERR(master->regs)) return PTR_ERR(master->regs); - master->pclk = devm_clk_get(&pdev->dev, "pclk"); - if (IS_ERR(master->pclk)) - return PTR_ERR(master->pclk); + pclk = devm_clk_get_enabled(&pdev->dev, "pclk"); + if (IS_ERR(pclk)) + return PTR_ERR(pclk); - master->sysclk = devm_clk_get(&pdev->dev, "sysclk"); + master->sysclk = devm_clk_get_enabled(&pdev->dev, "sysclk"); if (IS_ERR(master->sysclk)) return PTR_ERR(master->sysclk); @@ -1578,18 +1578,8 @@ static int cdns_i3c_master_probe(struct platform_device *pdev) if (irq < 0) return irq; - ret = clk_prepare_enable(master->pclk); - if (ret) - return ret; - - ret = clk_prepare_enable(master->sysclk); - if (ret) - goto err_disable_pclk; - - if (readl(master->regs + DEV_ID) != DEV_ID_I3C_MASTER) { - ret = -EINVAL; - goto err_disable_sysclk; - } + if (readl(master->regs + DEV_ID) != DEV_ID_I3C_MASTER) + return -EINVAL; spin_lock_init(&master->xferqueue.lock); INIT_LIST_HEAD(&master->xferqueue.list); @@ -1600,7 +1590,7 @@ static int cdns_i3c_master_probe(struct platform_device *pdev) ret = devm_request_irq(&pdev->dev, irq, cdns_i3c_master_interrupt, 0, dev_name(&pdev->dev), master); if (ret) - goto err_disable_sysclk; + return ret; platform_set_drvdata(pdev, master); @@ -1622,29 +1612,15 @@ static int cdns_i3c_master_probe(struct platform_device *pdev) master->ibi.slots = devm_kcalloc(&pdev->dev, master->ibi.num_slots, sizeof(*master->ibi.slots), GFP_KERNEL); - if (!master->ibi.slots) { - ret = -ENOMEM; - goto err_disable_sysclk; - } + if (!master->ibi.slots) + return -ENOMEM; writel(IBIR_THR(1), master->regs + CMD_IBI_THR_CTRL); writel(MST_INT_IBIR_THR, master->regs + MST_IER); writel(DEVS_CTRL_DEV_CLR_ALL, master->regs + DEVS_CTRL); - ret = i3c_master_register(&master->base, &pdev->dev, - &cdns_i3c_master_ops, false); - if (ret) - goto err_disable_sysclk; - - return 0; - -err_disable_sysclk: - clk_disable_unprepare(master->sysclk); - -err_disable_pclk: - clk_disable_unprepare(master->pclk); - - return ret; + return i3c_master_register(&master->base, &pdev->dev, + &cdns_i3c_master_ops, false); } static void cdns_i3c_master_remove(struct platform_device *pdev) @@ -1653,9 +1629,6 @@ static void cdns_i3c_master_remove(struct platform_device *pdev) cancel_work_sync(&master->hj_work); i3c_master_unregister(&master->base); - - clk_disable_unprepare(master->sysclk); - clk_disable_unprepare(master->pclk); } static struct platform_driver cdns_i3c_master = { From 5523a466e905b6287b94654ddb364536f2f948cf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 25 Jul 2025 11:06:03 +0200 Subject: [PATCH 1245/2411] i3c: fix module_i3c_i2c_driver() with I3C=n When CONFIG_I3C is disabled and the i3c_i2c_driver_register() happens to not be inlined, any driver calling it still references the i3c_driver instance, which then causes a link failure: x86_64-linux-ld: drivers/hwmon/lm75.o: in function `lm75_i3c_reg_read': lm75.c:(.text+0xc61): undefined reference to `i3cdev_to_dev' x86_64-linux-ld: lm75.c:(.text+0xd25): undefined reference to `i3c_device_do_priv_xfers' x86_64-linux-ld: lm75.c:(.text+0xdd8): undefined reference to `i3c_device_do_priv_xfers' This issue was part of the original i3c code, but only now caused problems when i3c support got added to lm75. Change the 'inline' annotations in the header to '__always_inline' to ensure that the dead-code-elimination pass in the compiler can optimize it out as intended. Fixes: 6071d10413ff ("hwmon: (lm75) add I3C support for P3T1755") Fixes: 3a379bbcea0a ("i3c: Add core I3C infrastructure") Signed-off-by: Arnd Bergmann Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Guenter Roeck Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250725090609.2456262-1-arnd@kernel.org Signed-off-by: Alexandre Belloni --- include/linux/i3c/device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index b674f64d0822..7f136de4b73e 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -245,7 +245,7 @@ void i3c_driver_unregister(struct i3c_driver *drv); * * Return: 0 if both registrations succeeds, a negative error code otherwise. */ -static inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, +static __always_inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, struct i2c_driver *i2cdrv) { int ret; @@ -270,7 +270,7 @@ static inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, * Note that when CONFIG_I3C is not enabled, this function only unregisters the * @i2cdrv. */ -static inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv, +static __always_inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv, struct i2c_driver *i2cdrv) { if (IS_ENABLED(CONFIG_I3C)) From 9c0609d685b27a0bb392390680207baa820ed118 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 24 Jul 2025 11:41:40 +0200 Subject: [PATCH 1246/2411] i3c: Standardize defines for specification parameters Align existing defines to follow the consistent pattern: I3C_BUS___. Prepare the codebase for adding new parameters and help avoid duplication. Signed-off-by: Wolfram Sang Tested-by: Tommaso Merciai Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250724094146.6443-2-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master.c | 12 ++++++------ drivers/i3c/master/dw-i3c-master.c | 4 ++-- include/linux/i3c/master.h | 9 +++++---- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index e00991444f31..2ef898a8fd80 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -727,12 +727,12 @@ static int i3c_bus_set_mode(struct i3c_bus *i3cbus, enum i3c_bus_mode mode, switch (i3cbus->mode) { case I3C_BUS_MODE_PURE: if (!i3cbus->scl_rate.i3c) - i3cbus->scl_rate.i3c = I3C_BUS_TYP_I3C_SCL_RATE; + i3cbus->scl_rate.i3c = I3C_BUS_I3C_SCL_TYP_RATE; break; case I3C_BUS_MODE_MIXED_FAST: case I3C_BUS_MODE_MIXED_LIMITED: if (!i3cbus->scl_rate.i3c) - i3cbus->scl_rate.i3c = I3C_BUS_TYP_I3C_SCL_RATE; + i3cbus->scl_rate.i3c = I3C_BUS_I3C_SCL_TYP_RATE; if (!i3cbus->scl_rate.i2c) i3cbus->scl_rate.i2c = max_i2c_scl_rate; break; @@ -754,8 +754,8 @@ static int i3c_bus_set_mode(struct i3c_bus *i3cbus, enum i3c_bus_mode mode, * I3C/I2C frequency may have been overridden, check that user-provided * values are not exceeding max possible frequency. */ - if (i3cbus->scl_rate.i3c > I3C_BUS_MAX_I3C_SCL_RATE || - i3cbus->scl_rate.i2c > I3C_BUS_I2C_FM_PLUS_SCL_RATE) + if (i3cbus->scl_rate.i3c > I3C_BUS_I3C_SCL_MAX_RATE || + i3cbus->scl_rate.i2c > I3C_BUS_I2C_FM_PLUS_SCL_MAX_RATE) return -EINVAL; return 0; @@ -2787,7 +2787,7 @@ int i3c_master_register(struct i3c_master_controller *master, const struct i3c_master_controller_ops *ops, bool secondary) { - unsigned long i2c_scl_rate = I3C_BUS_I2C_FM_PLUS_SCL_RATE; + unsigned long i2c_scl_rate = I3C_BUS_I2C_FM_PLUS_SCL_MAX_RATE; struct i3c_bus *i3cbus = i3c_master_get_bus(master); enum i3c_bus_mode mode = I3C_BUS_MODE_PURE; struct i2c_dev_boardinfo *i2cbi; @@ -2846,7 +2846,7 @@ int i3c_master_register(struct i3c_master_controller *master, } if (i2cbi->lvr & I3C_LVR_I2C_FM_MODE) - i2c_scl_rate = I3C_BUS_I2C_FM_SCL_RATE; + i2c_scl_rate = I3C_BUS_I2C_FM_SCL_MAX_RATE; } ret = i3c_bus_set_mode(i3cbus, mode, i2c_scl_rate); diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index cc872b481691..e61be28cd1e3 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -605,14 +605,14 @@ static int dw_i2c_clk_cfg(struct dw_i3c_master *master) core_period = DIV_ROUND_UP(1000000000, core_rate); lcnt = DIV_ROUND_UP(I3C_BUS_I2C_FMP_TLOW_MIN_NS, core_period); - hcnt = DIV_ROUND_UP(core_rate, I3C_BUS_I2C_FM_PLUS_SCL_RATE) - lcnt; + hcnt = DIV_ROUND_UP(core_rate, I3C_BUS_I2C_FM_PLUS_SCL_MAX_RATE) - lcnt; scl_timing = SCL_I2C_FMP_TIMING_HCNT(hcnt) | SCL_I2C_FMP_TIMING_LCNT(lcnt); writel(scl_timing, master->regs + SCL_I2C_FMP_TIMING); master->i2c_fmp_timing = scl_timing; lcnt = DIV_ROUND_UP(I3C_BUS_I2C_FM_TLOW_MIN_NS, core_period); - hcnt = DIV_ROUND_UP(core_rate, I3C_BUS_I2C_FM_SCL_RATE) - lcnt; + hcnt = DIV_ROUND_UP(core_rate, I3C_BUS_I2C_FM_SCL_MAX_RATE) - lcnt; scl_timing = SCL_I2C_FM_TIMING_HCNT(hcnt) | SCL_I2C_FM_TIMING_LCNT(lcnt); writel(scl_timing, master->regs + SCL_I2C_FM_TIMING); diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index c67922ece617..7dfcbe530515 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -249,10 +249,11 @@ struct i3c_device { */ #define I3C_BUS_MAX_DEVS 11 -#define I3C_BUS_MAX_I3C_SCL_RATE 12900000 -#define I3C_BUS_TYP_I3C_SCL_RATE 12500000 -#define I3C_BUS_I2C_FM_PLUS_SCL_RATE 1000000 -#define I3C_BUS_I2C_FM_SCL_RATE 400000 +/* Taken from the I3C Spec V1.1.1, chapter 6.2. "Timing specification" */ +#define I3C_BUS_I2C_FM_PLUS_SCL_MAX_RATE 1000000 +#define I3C_BUS_I2C_FM_SCL_MAX_RATE 400000 +#define I3C_BUS_I3C_SCL_MAX_RATE 12900000 +#define I3C_BUS_I3C_SCL_TYP_RATE 12500000 #define I3C_BUS_TLOW_OD_MIN_NS 200 /** From 8acf1f3bae1ea48949458b67d68a72a95c3244a4 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 24 Jul 2025 11:41:41 +0200 Subject: [PATCH 1247/2411] i3c: Add more parameters for controllers to the header Add standard timing value definition from specification. Signed-off-by: Wolfram Sang Tested-by: Tommaso Merciai Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250724094146.6443-3-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- include/linux/i3c/master.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 7dfcbe530515..043f5c7ff398 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -254,6 +254,10 @@ struct i3c_device { #define I3C_BUS_I2C_FM_SCL_MAX_RATE 400000 #define I3C_BUS_I3C_SCL_MAX_RATE 12900000 #define I3C_BUS_I3C_SCL_TYP_RATE 12500000 +#define I3C_BUS_TAVAL_MIN_NS 1000 +#define I3C_BUS_TBUF_MIXED_FM_MIN_NS 1300 +#define I3C_BUS_THIGH_MIXED_MAX_NS 41 +#define I3C_BUS_TIDLE_MIN_NS 200000 #define I3C_BUS_TLOW_OD_MIN_NS 200 /** From 94e611b5b9ef3a1d9ba77f41343e95155a5091d2 Mon Sep 17 00:00:00 2001 From: Tommaso Merciai Date: Thu, 24 Jul 2025 11:41:42 +0200 Subject: [PATCH 1248/2411] dt-bindings: i3c: Add Renesas I3C controller Add Renesas I3C controller which is available in R9A08G045 (RZ/G3S) and R9A09G047 (RZ/G3E) SoCs. Signed-off-by: Tommaso Merciai Signed-off-by: Wolfram Sang Reviewed-by: Rob Herring (Arm) Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250724094146.6443-4-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- .../devicetree/bindings/i3c/renesas,i3c.yaml | 179 ++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 Documentation/devicetree/bindings/i3c/renesas,i3c.yaml diff --git a/Documentation/devicetree/bindings/i3c/renesas,i3c.yaml b/Documentation/devicetree/bindings/i3c/renesas,i3c.yaml new file mode 100644 index 000000000000..fe2e9633c46f --- /dev/null +++ b/Documentation/devicetree/bindings/i3c/renesas,i3c.yaml @@ -0,0 +1,179 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/i3c/renesas,i3c.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Renesas RZ/G3S and RZ/G3E I3C Bus Interface + +maintainers: + - Wolfram Sang + - Tommaso Merciai + +properties: + compatible: + items: + - enum: + - renesas,r9a08g045-i3c # RZ/G3S + - renesas,r9a09g047-i3c # RZ/G3E + + reg: + maxItems: 1 + + interrupts: + items: + - description: Non-recoverable internal error interrupt + - description: Normal transfer error interrupt + - description: Normal transfer abort interrupt + - description: Normal response status buffer full interrupt + - description: Normal command buffer empty interrupt + - description: Normal IBI status buffer full interrupt + - description: Normal Rx data buffer full interrupt + - description: Normal Tx data buffer empty interrupt + - description: Normal receive status buffer full interrupt + - description: START condition detection interrupt + - description: STOP condition detection interrupt + - description: Transmit end interrupt + - description: NACK detection interrupt + - description: Arbitration lost interrupt + - description: Timeout detection interrupt + - description: Wake-up condition detection interrupt + - description: HDR Exit Pattern detection interrupt + minItems: 16 + + interrupt-names: + items: + - const: ierr + - const: terr + - const: abort + - const: resp + - const: cmd + - const: ibi + - const: rx + - const: tx + - const: rcv + - const: st + - const: sp + - const: tend + - const: nack + - const: al + - const: tmo + - const: wu + - const: exit + minItems: 16 + + clocks: + items: + - description: APB bus clock + - description: transfer clock + - description: SFRs clock + minItems: 2 + + clock-names: + items: + - const: pclk + - const: tclk + - const: pclkrw + minItems: 2 + + power-domains: + maxItems: 1 + + resets: + items: + - description: Reset signal + - description: APB interface reset signal/SCAN reset signal + + reset-names: + items: + - const: presetn + - const: tresetn + +required: + - compatible + - reg + - interrupts + - interrupt-names + - clock-names + - clocks + - power-domains + - resets + - reset-names + +allOf: + - $ref: i3c.yaml# + + - if: + properties: + compatible: + contains: + const: renesas,r9a08g045-i3c + then: + properties: + clocks: + maxItems: 2 + clock-names: + maxItems: 2 + interrupts: + minItems: 17 + interrupt-names: + minItems: 17 + + - if: + properties: + compatible: + contains: + const: renesas,r9a09g047-i3c + then: + properties: + clocks: + minItems: 3 + clock-names: + minItems: 3 + interrupts: + maxItems: 16 + interrupt-names: + maxItems: 16 + +unevaluatedProperties: false + +examples: + - | + #include + #include + + i3c@1005b000 { + compatible = "renesas,r9a08g045-i3c"; + reg = <0x1005b000 0x1000>; + clocks = <&cpg CPG_MOD R9A08G045_I3C_PCLK>, + <&cpg CPG_MOD R9A08G045_I3C_TCLK>; + clock-names = "pclk", "tclk"; + interrupts = , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + ; + interrupt-names = "ierr", "terr", "abort", "resp", + "cmd", "ibi", "rx", "tx", "rcv", + "st", "sp", "tend", "nack", + "al", "tmo", "wu", "exit"; + resets = <&cpg R9A08G045_I3C_PRESETN>, + <&cpg R9A08G045_I3C_TRESETN>; + reset-names = "presetn", "tresetn"; + power-domains = <&cpg>; + #address-cells = <3>; + #size-cells = <0>; + }; +... From d028219a9f1485914492bf373406f6a0e665ace2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 24 Jul 2025 11:41:43 +0200 Subject: [PATCH 1249/2411] i3c: master: Add basic driver for the Renesas I3C controller Add a basic driver for the I3C controller found in Renesas RZ/G3S and G3E SoCs. Support I3C pure busses (tested with two targets) and mixed busses (two I3C devices plus various I2C targets). DAA and communication with temperature sensors worked reliably at various speeds. Missing features such as IBI, HotJoin, and target mode will be added incrementally. Signed-off-by: Wolfram Sang Tested-by: Tommaso Merciai Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250724094146.6443-5-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- MAINTAINERS | 7 + drivers/i3c/master/Kconfig | 10 + drivers/i3c/master/Makefile | 1 + drivers/i3c/master/renesas-i3c.c | 1404 ++++++++++++++++++++++++++++++ 4 files changed, 1422 insertions(+) create mode 100644 drivers/i3c/master/renesas-i3c.c diff --git a/MAINTAINERS b/MAINTAINERS index d5a173e987c0..35ed8498ab1e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11458,6 +11458,13 @@ S: Maintained F: Documentation/devicetree/bindings/i3c/cdns,i3c-master.yaml F: drivers/i3c/master/i3c-master-cdns.c +I3C DRIVER FOR RENESAS +M: Wolfram Sang +M: Tommaso Merciai +S: Supported +F: Documentation/devicetree/bindings/i3c/renesas,i3c.yaml +F: drivers/i3c/master/renesas-i3c.c + I3C DRIVER FOR SYNOPSYS DESIGNWARE S: Orphan F: Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml diff --git a/drivers/i3c/master/Kconfig b/drivers/i3c/master/Kconfig index 7b30db3253af..13df2944f2ec 100644 --- a/drivers/i3c/master/Kconfig +++ b/drivers/i3c/master/Kconfig @@ -64,3 +64,13 @@ config MIPI_I3C_HCI_PCI This driver can also be built as a module. If so, the module will be called mipi-i3c-hci-pci. + +config RENESAS_I3C + tristate "Renesas I3C controller driver" + depends on HAS_IOMEM + depends on ARCH_RENESAS || COMPILE_TEST + help + Support the Renesas I3C controller as found in some RZ variants. + + This driver can also be built as a module. If so, the module will be + called renesas-i3c. diff --git a/drivers/i3c/master/Makefile b/drivers/i3c/master/Makefile index 3e97960160bc..aac74f3e3851 100644 --- a/drivers/i3c/master/Makefile +++ b/drivers/i3c/master/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_DW_I3C_MASTER) += dw-i3c-master.o obj-$(CONFIG_AST2600_I3C_MASTER) += ast2600-i3c-master.o obj-$(CONFIG_SVC_I3C_MASTER) += svc-i3c-master.o obj-$(CONFIG_MIPI_I3C_HCI) += mipi-i3c-hci/ +obj-$(CONFIG_RENESAS_I3C) += renesas-i3c.o diff --git a/drivers/i3c/master/renesas-i3c.c b/drivers/i3c/master/renesas-i3c.c new file mode 100644 index 000000000000..174d3dc5d276 --- /dev/null +++ b/drivers/i3c/master/renesas-i3c.c @@ -0,0 +1,1404 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Renesas I3C Controller driver + * Copyright (C) 2023-25 Renesas Electronics Corp. + * + * TODO: IBI support, HotJoin support, Target support + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../internals.h" + +#define PRTS 0x00 +#define PRTS_PRTMD BIT(0) + +#define BCTL 0x14 +#define BCTL_INCBA BIT(0) +#define BCTL_HJACKCTL BIT(8) +#define BCTL_ABT BIT(29) +#define BCTL_BUSE BIT(31) + +#define MSDVAD 0x18 +#define MSDVAD_MDYAD(x) FIELD_PREP(GENMASK(21, 16), x) +#define MSDVAD_MDYADV BIT(31) + +#define RSTCTL 0x20 +#define RSTCTL_RI3CRST BIT(0) +#define RSTCTL_INTLRST BIT(16) + +#define INST 0x30 + +#define IBINCTL 0x58 +#define IBINCTL_NRHJCTL BIT(0) +#define IBINCTL_NRMRCTL BIT(1) +#define IBINCTL_NRSIRCTL BIT(3) + +#define SVCTL 0x64 + +#define REFCKCTL 0x70 +#define REFCKCTL_IREFCKS(x) FIELD_PREP(GENMASK(2, 0), x) + +#define STDBR 0x74 +#define STDBR_SBRLO(cond, x) FIELD_PREP(GENMASK(7, 0), (x) >> (cond)) +#define STDBR_SBRHO(cond, x) FIELD_PREP(GENMASK(15, 8), (x) >> (cond)) +#define STDBR_SBRLP(x) FIELD_PREP(GENMASK(21, 16), x) +#define STDBR_SBRHP(x) FIELD_PREP(GENMASK(29, 24), x) +#define STDBR_DSBRPO BIT(31) + +#define EXTBR 0x78 +#define EXTBR_EBRLO(x) FIELD_PREP(GENMASK(7, 0), x) +#define EXTBR_EBRHO(x) FIELD_PREP(GENMASK(15, 8), x) +#define EXTBR_EBRLP(x) FIELD_PREP(GENMASK(21, 16), x) +#define EXTBR_EBRHP(x) FIELD_PREP(GENMASK(29, 24), x) + +#define BFRECDT 0x7c +#define BFRECDT_FRECYC(x) FIELD_PREP(GENMASK(8, 0), x) + +#define BAVLCDT 0x80 +#define BAVLCDT_AVLCYC(x) FIELD_PREP(GENMASK(8, 0), x) + +#define BIDLCDT 0x84 +#define BIDLCDT_IDLCYC(x) FIELD_PREP(GENMASK(17, 0), x) + +#define ACKCTL 0xa0 +#define ACKCTL_ACKT BIT(1) +#define ACKCTL_ACKTWP BIT(2) + +#define SCSTRCTL 0xa4 +#define SCSTRCTL_ACKTWE BIT(0) +#define SCSTRCTL_RWE BIT(1) + +#define SCSTLCTL 0xb0 + +#define CNDCTL 0x140 +#define CNDCTL_STCND BIT(0) +#define CNDCTL_SRCND BIT(1) +#define CNDCTL_SPCND BIT(2) + +#define NCMDQP 0x150 /* Normal Command Queue */ +#define NCMDQP_CMD_ATTR(x) FIELD_PREP(GENMASK(2, 0), x) +#define NCMDQP_IMMED_XFER 0x01 +#define NCMDQP_ADDR_ASSGN 0x02 +#define NCMDQP_TID(x) FIELD_PREP(GENMASK(6, 3), x) +#define NCMDQP_CMD(x) FIELD_PREP(GENMASK(14, 7), x) +#define NCMDQP_CP BIT(15) +#define NCMDQP_DEV_INDEX(x) FIELD_PREP(GENMASK(20, 16), x) +#define NCMDQP_BYTE_CNT(x) FIELD_PREP(GENMASK(25, 23), x) +#define NCMDQP_DEV_COUNT(x) FIELD_PREP(GENMASK(29, 26), x) +#define NCMDQP_MODE(x) FIELD_PREP(GENMASK(28, 26), x) +#define NCMDQP_RNW(x) FIELD_PREP(GENMASK(29, 29), x) +#define NCMDQP_ROC BIT(30) +#define NCMDQP_TOC BIT(31) +#define NCMDQP_DATA_LENGTH(x) FIELD_PREP(GENMASK(31, 16), x) + +#define NRSPQP 0x154 /* Normal Respone Queue */ +#define NRSPQP_NO_ERROR 0 +#define NRSPQP_ERROR_CRC 1 +#define NRSPQP_ERROR_PARITY 2 +#define NRSPQP_ERROR_FRAME 3 +#define NRSPQP_ERROR_IBA_NACK 4 +#define NRSPQP_ERROR_ADDRESS_NACK 5 +#define NRSPQP_ERROR_OVER_UNDER_FLOW 6 +#define NRSPQP_ERROR_TRANSF_ABORT 8 +#define NRSPQP_ERROR_I2C_W_NACK_ERR 9 +#define NRSPQP_ERROR_UNSUPPORTED 10 +#define NRSPQP_DATA_LEN(x) FIELD_GET(GENMASK(15, 0), x) +#define NRSPQP_ERR_STATUS(x) FIELD_GET(GENMASK(31, 28), x) + +#define NTDTBP0 0x158 /* Normal Transfer Data Buffer */ +#define NTDTBP0_DEPTH 16 + +#define NQTHCTL 0x190 +#define NQTHCTL_CMDQTH(x) FIELD_PREP(GENMASK(1, 0), x) +#define NQTHCTL_IBIDSSZ(x) FIELD_PREP(GENMASK(23, 16), x) + +#define NTBTHCTL0 0x194 + +#define NRQTHCTL 0x1c0 + +#define BST 0x1d0 +#define BST_STCNDDF BIT(0) +#define BST_SPCNDDF BIT(1) +#define BST_NACKDF BIT(4) +#define BST_TENDF BIT(8) + +#define BSTE 0x1d4 +#define BSTE_STCNDDE BIT(0) +#define BSTE_SPCNDDE BIT(1) +#define BSTE_NACKDE BIT(4) +#define BSTE_TENDE BIT(8) +#define BSTE_ALE BIT(16) +#define BSTE_TODE BIT(20) +#define BSTE_WUCNDDE BIT(24) +#define BSTE_ALL_FLAG (BSTE_STCNDDE | BSTE_SPCNDDE |\ + BSTE_NACKDE | BSTE_TENDE |\ + BSTE_ALE | BSTE_TODE | BSTE_WUCNDDE) + +#define BIE 0x1d8 +#define BIE_STCNDDIE BIT(0) +#define BIE_SPCNDDIE BIT(1) +#define BIE_NACKDIE BIT(4) +#define BIE_TENDIE BIT(8) + +#define NTST 0x1e0 +#define NTST_TDBEF0 BIT(0) +#define NTST_RDBFF0 BIT(1) +#define NTST_CMDQEF BIT(3) +#define NTST_RSPQFF BIT(4) +#define NTST_TABTF BIT(5) +#define NTST_TEF BIT(9) + +#define NTSTE 0x1e4 +#define NTSTE_TDBEE0 BIT(0) +#define NTSTE_RDBFE0 BIT(1) +#define NTSTE_IBIQEFE BIT(2) +#define NTSTE_CMDQEE BIT(3) +#define NTSTE_RSPQFE BIT(4) +#define NTSTE_TABTE BIT(5) +#define NTSTE_TEE BIT(9) +#define NTSTE_RSQFE BIT(20) +#define NTSTE_ALL_FLAG (NTSTE_TDBEE0 | NTSTE_RDBFE0 |\ + NTSTE_IBIQEFE | NTSTE_CMDQEE |\ + NTSTE_RSPQFE | NTSTE_TABTE |\ + NTSTE_TEE | NTSTE_RSQFE) + +#define NTIE 0x1e8 +#define NTIE_TDBEIE0 BIT(0) +#define NTIE_RDBFIE0 BIT(1) +#define NTIE_IBIQEFIE BIT(2) +#define NTIE_RSPQFIE BIT(4) +#define NTIE_RSQFIE BIT(20) + +#define BCST 0x210 +#define BCST_BFREF BIT(0) + +#define DATBAS(x) (0x224 + 0x8 * (x)) +#define DATBAS_DVSTAD(x) FIELD_PREP(GENMASK(6, 0), x) +#define DATBAS_DVDYAD(x) FIELD_PREP(GENMASK(23, 16), x) + +#define NDBSTLV0 0x398 +#define NDBSTLV0_RDBLV(x) FIELD_GET(GENMASK(15, 8), x) + +#define RENESAS_I3C_MAX_DEVS 8 +#define I2C_INIT_MSG -1 + +enum i3c_internal_state { + I3C_INTERNAL_STATE_DISABLED, + I3C_INTERNAL_STATE_CONTROLLER_IDLE, + I3C_INTERNAL_STATE_CONTROLLER_ENTDAA, + I3C_INTERNAL_STATE_CONTROLLER_SETDASA, + I3C_INTERNAL_STATE_CONTROLLER_WRITE, + I3C_INTERNAL_STATE_CONTROLLER_READ, + I3C_INTERNAL_STATE_CONTROLLER_COMMAND_WRITE, + I3C_INTERNAL_STATE_CONTROLLER_COMMAND_READ, +}; + +enum renesas_i3c_event { + I3C_COMMAND_ADDRESS_ASSIGNMENT, + I3C_WRITE, + I3C_READ, + I3C_COMMAND_WRITE, + I3C_COMMAND_READ, +}; + +struct renesas_i3c_cmd { + u32 cmd0; + u32 len; + const void *tx_buf; + u32 tx_count; + void *rx_buf; + u32 rx_count; + u32 err; + u8 rnw; + /* i2c xfer */ + int i2c_bytes_left; + int i2c_is_last; + u8 *i2c_buf; + const struct i2c_msg *msg; +}; + +struct renesas_i3c_xfer { + struct list_head node; + struct completion comp; + int ret; + bool is_i2c_xfer; + unsigned int ncmds; + struct renesas_i3c_cmd cmds[] __counted_by(ncmds); +}; + +struct renesas_i3c_xferqueue { + struct list_head list; + struct renesas_i3c_xfer *cur; + /* Lock for accessing the xfer queue */ + spinlock_t lock; +}; + +struct renesas_i3c { + struct i3c_master_controller base; + enum i3c_internal_state internal_state; + u16 maxdevs; + u32 free_pos; + u32 i2c_STDBR; + u32 i3c_STDBR; + u8 addrs[RENESAS_I3C_MAX_DEVS]; + struct renesas_i3c_xferqueue xferqueue; + void __iomem *regs; + struct clk *tclk; +}; + +struct renesas_i3c_i2c_dev_data { + u8 index; +}; + +struct renesas_i3c_irq_desc { + const char *name; + irq_handler_t isr; + const char *desc; +}; + +struct renesas_i3c_config { + unsigned int has_pclkrw:1; +}; + +static inline void renesas_i3c_reg_update(void __iomem *reg, u32 mask, u32 val) +{ + u32 data = readl(reg); + + data &= ~mask; + data |= (val & mask); + writel(data, reg); +} + +static inline u32 renesas_readl(void __iomem *base, u32 reg) +{ + return readl(base + reg); +} + +static inline void renesas_writel(void __iomem *base, u32 reg, u32 val) +{ + writel(val, base + reg); +} + +static void renesas_set_bit(void __iomem *base, u32 reg, u32 val) +{ + renesas_i3c_reg_update(base + reg, val, val); +} + +static void renesas_clear_bit(void __iomem *base, u32 reg, u32 val) +{ + renesas_i3c_reg_update(base + reg, val, 0); +} + +static inline struct renesas_i3c *to_renesas_i3c(struct i3c_master_controller *m) +{ + return container_of(m, struct renesas_i3c, base); +} + +static inline u32 datbas_dvdyad_with_parity(u8 addr) +{ + return DATBAS_DVDYAD(addr | (parity8(addr) ? 0 : BIT(7))); +} + +static int renesas_i3c_get_free_pos(struct renesas_i3c *i3c) +{ + if (!(i3c->free_pos & GENMASK(i3c->maxdevs - 1, 0))) + return -ENOSPC; + + return ffs(i3c->free_pos) - 1; +} + +static int renesas_i3c_get_addr_pos(struct renesas_i3c *i3c, u8 addr) +{ + int pos; + + for (pos = 0; pos < i3c->maxdevs; pos++) { + if (addr == i3c->addrs[pos]) + return pos; + } + + return -EINVAL; +} + +static struct renesas_i3c_xfer *renesas_i3c_alloc_xfer(struct renesas_i3c *i3c, + unsigned int ncmds) +{ + struct renesas_i3c_xfer *xfer; + + xfer = kzalloc(struct_size(xfer, cmds, ncmds), GFP_KERNEL); + if (!xfer) + return NULL; + + INIT_LIST_HEAD(&xfer->node); + xfer->ncmds = ncmds; + xfer->ret = -ETIMEDOUT; + + return xfer; +} + +static void renesas_i3c_start_xfer_locked(struct renesas_i3c *i3c) +{ + struct renesas_i3c_xfer *xfer = i3c->xferqueue.cur; + struct renesas_i3c_cmd *cmd; + u32 cmd1; + + if (!xfer) + return; + + cmd = xfer->cmds; + + switch (i3c->internal_state) { + case I3C_INTERNAL_STATE_CONTROLLER_ENTDAA: + case I3C_INTERNAL_STATE_CONTROLLER_SETDASA: + renesas_set_bit(i3c->regs, NTIE, NTIE_RSPQFIE); + renesas_writel(i3c->regs, NCMDQP, cmd->cmd0); + renesas_writel(i3c->regs, NCMDQP, 0); + break; + case I3C_INTERNAL_STATE_CONTROLLER_WRITE: + case I3C_INTERNAL_STATE_CONTROLLER_COMMAND_WRITE: + renesas_set_bit(i3c->regs, NTIE, NTIE_RSPQFIE); + if (cmd->len <= 4) { + cmd->cmd0 |= NCMDQP_CMD_ATTR(NCMDQP_IMMED_XFER); + cmd->cmd0 |= NCMDQP_BYTE_CNT(cmd->len); + cmd->tx_count = cmd->len; + cmd1 = cmd->len == 0 ? 0 : *(u32 *)cmd->tx_buf; + } else { + cmd1 = NCMDQP_DATA_LENGTH(cmd->len); + } + renesas_writel(i3c->regs, NCMDQP, cmd->cmd0); + renesas_writel(i3c->regs, NCMDQP, cmd1); + break; + case I3C_INTERNAL_STATE_CONTROLLER_READ: + case I3C_INTERNAL_STATE_CONTROLLER_COMMAND_READ: + renesas_set_bit(i3c->regs, NTIE, NTIE_RDBFIE0); + cmd1 = NCMDQP_DATA_LENGTH(cmd->len); + renesas_writel(i3c->regs, NCMDQP, cmd->cmd0); + renesas_writel(i3c->regs, NCMDQP, cmd1); + break; + default: + break; + } + + /* Clear the command queue empty flag */ + renesas_clear_bit(i3c->regs, NTST, NTST_CMDQEF); +} + +static void renesas_i3c_dequeue_xfer_locked(struct renesas_i3c *i3c, + struct renesas_i3c_xfer *xfer) +{ + if (i3c->xferqueue.cur == xfer) + i3c->xferqueue.cur = NULL; + else + list_del_init(&xfer->node); +} + +static void renesas_i3c_dequeue_xfer(struct renesas_i3c *i3c, struct renesas_i3c_xfer *xfer) +{ + scoped_guard(spinlock_irqsave, &i3c->xferqueue.lock) + renesas_i3c_dequeue_xfer_locked(i3c, xfer); +} + +static void renesas_i3c_enqueue_xfer(struct renesas_i3c *i3c, struct renesas_i3c_xfer *xfer) +{ + reinit_completion(&xfer->comp); + scoped_guard(spinlock_irqsave, &i3c->xferqueue.lock) { + if (i3c->xferqueue.cur) { + list_add_tail(&xfer->node, &i3c->xferqueue.list); + } else { + i3c->xferqueue.cur = xfer; + if (!xfer->is_i2c_xfer) + renesas_i3c_start_xfer_locked(i3c); + } + } +} + +static void renesas_i3c_wait_xfer(struct renesas_i3c *i3c, struct renesas_i3c_xfer *xfer) +{ + unsigned long time_left; + + renesas_i3c_enqueue_xfer(i3c, xfer); + + time_left = wait_for_completion_timeout(&xfer->comp, msecs_to_jiffies(1000)); + if (!time_left) + renesas_i3c_dequeue_xfer(i3c, xfer); +} + +static void renesas_i3c_set_prts(struct renesas_i3c *i3c, u32 val) +{ + /* Required sequence according to tnrza0140ae */ + renesas_set_bit(i3c->regs, RSTCTL, RSTCTL_INTLRST); + renesas_writel(i3c->regs, PRTS, val); + renesas_clear_bit(i3c->regs, RSTCTL, RSTCTL_INTLRST); +} + +static void renesas_i3c_bus_enable(struct i3c_master_controller *m, bool i3c_mode) +{ + struct renesas_i3c *i3c = to_renesas_i3c(m); + + /* Setup either I3C or I2C protocol */ + if (i3c_mode) { + renesas_i3c_set_prts(i3c, 0); + /* Revisit: INCBA handling, especially after I2C transfers */ + renesas_set_bit(i3c->regs, BCTL, BCTL_HJACKCTL | BCTL_INCBA); + renesas_set_bit(i3c->regs, MSDVAD, MSDVAD_MDYADV); + renesas_writel(i3c->regs, STDBR, i3c->i3c_STDBR); + } else { + renesas_i3c_set_prts(i3c, PRTS_PRTMD); + renesas_writel(i3c->regs, STDBR, i3c->i2c_STDBR); + } + + /* Enable I3C bus */ + renesas_set_bit(i3c->regs, BCTL, BCTL_BUSE); +} + +static int renesas_i3c_reset(struct renesas_i3c *i3c) +{ + u32 val; + + renesas_writel(i3c->regs, BCTL, 0); + renesas_set_bit(i3c->regs, RSTCTL, RSTCTL_RI3CRST); + + return read_poll_timeout(renesas_readl, val, !(val & RSTCTL_RI3CRST), + 0, 1000, false, i3c->regs, RSTCTL); +} + +static int renesas_i3c_bus_init(struct i3c_master_controller *m) +{ + struct renesas_i3c *i3c = to_renesas_i3c(m); + struct i3c_bus *bus = i3c_master_get_bus(m); + struct i3c_device_info info = {}; + struct i2c_timings t; + unsigned long rate; + u32 double_SBR, val; + int cks, pp_high_ticks, pp_low_ticks, i3c_total_ticks; + int od_high_ticks, od_low_ticks, i2c_total_ticks; + int ret; + + rate = clk_get_rate(i3c->tclk); + if (!rate) + return -EINVAL; + + ret = renesas_i3c_reset(i3c); + if (ret) + return ret; + + i2c_total_ticks = DIV_ROUND_UP(rate, bus->scl_rate.i2c); + i3c_total_ticks = DIV_ROUND_UP(rate, bus->scl_rate.i3c); + + i2c_parse_fw_timings(&m->dev, &t, true); + + for (cks = 0; cks < 7; cks++) { + /* SCL low-period calculation in Open-drain mode */ + od_low_ticks = ((i2c_total_ticks * 6) / 10); + + /* SCL clock calculation in Push-Pull mode */ + if (bus->mode == I3C_BUS_MODE_PURE) + pp_high_ticks = ((i3c_total_ticks * 5) / 10); + else + pp_high_ticks = DIV_ROUND_UP(I3C_BUS_THIGH_MIXED_MAX_NS, + NSEC_PER_SEC / rate); + pp_low_ticks = i3c_total_ticks - pp_high_ticks; + + if ((od_low_ticks / 2) <= 0xFF && pp_low_ticks < 0x3F) + break; + + i2c_total_ticks /= 2; + i3c_total_ticks /= 2; + rate /= 2; + } + + /* SCL clock period calculation in Open-drain mode */ + if ((od_low_ticks / 2) > 0xFF || pp_low_ticks > 0x3F) { + dev_err(&m->dev, "invalid speed (i2c-scl = %lu Hz, i3c-scl = %lu Hz). Too slow.\n", + (unsigned long)bus->scl_rate.i2c, (unsigned long)bus->scl_rate.i3c); + return -EINVAL; + } + + /* SCL high-period calculation in Open-drain mode */ + od_high_ticks = i2c_total_ticks - od_low_ticks; + + /* Standard Bit Rate setting */ + double_SBR = od_low_ticks > 0xFF ? 1 : 0; + i3c->i3c_STDBR = (double_SBR ? STDBR_DSBRPO : 0) | + STDBR_SBRLO(double_SBR, od_low_ticks) | + STDBR_SBRHO(double_SBR, od_high_ticks) | + STDBR_SBRLP(pp_low_ticks) | + STDBR_SBRHP(pp_high_ticks); + + od_low_ticks -= t.scl_fall_ns / (NSEC_PER_SEC / rate) + 1; + od_high_ticks -= t.scl_rise_ns / (NSEC_PER_SEC / rate) + 1; + i3c->i2c_STDBR = (double_SBR ? STDBR_DSBRPO : 0) | + STDBR_SBRLO(double_SBR, od_low_ticks) | + STDBR_SBRHO(double_SBR, od_high_ticks) | + STDBR_SBRLP(pp_low_ticks) | + STDBR_SBRHP(pp_high_ticks); + renesas_writel(i3c->regs, STDBR, i3c->i3c_STDBR); + + /* Extended Bit Rate setting */ + renesas_writel(i3c->regs, EXTBR, EXTBR_EBRLO(od_low_ticks) | + EXTBR_EBRHO(od_high_ticks) | + EXTBR_EBRLP(pp_low_ticks) | + EXTBR_EBRHP(pp_high_ticks)); + + renesas_writel(i3c->regs, REFCKCTL, REFCKCTL_IREFCKS(cks)); + + /* Disable Slave Mode */ + renesas_writel(i3c->regs, SVCTL, 0); + + /* Initialize Queue/Buffer threshold */ + renesas_writel(i3c->regs, NQTHCTL, NQTHCTL_IBIDSSZ(6) | + NQTHCTL_CMDQTH(1)); + + /* The only supported configuration is two entries*/ + renesas_writel(i3c->regs, NTBTHCTL0, 0); + /* Interrupt when there is one entry in the queue */ + renesas_writel(i3c->regs, NRQTHCTL, 0); + + /* Enable all Bus/Transfer Status Flags */ + renesas_writel(i3c->regs, BSTE, BSTE_ALL_FLAG); + renesas_writel(i3c->regs, NTSTE, NTSTE_ALL_FLAG); + + /* Interrupt enable settings */ + renesas_writel(i3c->regs, BIE, BIE_NACKDIE | BIE_TENDIE); + renesas_writel(i3c->regs, NTIE, 0); + + /* Clear Status register */ + renesas_writel(i3c->regs, NTST, 0); + renesas_writel(i3c->regs, INST, 0); + renesas_writel(i3c->regs, BST, 0); + + /* Hot-Join Acknowlege setting. */ + renesas_set_bit(i3c->regs, BCTL, BCTL_HJACKCTL); + + renesas_writel(i3c->regs, IBINCTL, IBINCTL_NRHJCTL | IBINCTL_NRMRCTL | + IBINCTL_NRSIRCTL); + + renesas_writel(i3c->regs, SCSTLCTL, 0); + renesas_set_bit(i3c->regs, SCSTRCTL, SCSTRCTL_ACKTWE); + + /* Bus condition timing */ + val = DIV_ROUND_UP(I3C_BUS_TBUF_MIXED_FM_MIN_NS, NSEC_PER_SEC / rate); + renesas_writel(i3c->regs, BFRECDT, BFRECDT_FRECYC(val)); + + val = DIV_ROUND_UP(I3C_BUS_TAVAL_MIN_NS, NSEC_PER_SEC / rate); + renesas_writel(i3c->regs, BAVLCDT, BAVLCDT_AVLCYC(val)); + + val = DIV_ROUND_UP(I3C_BUS_TIDLE_MIN_NS, NSEC_PER_SEC / rate); + renesas_writel(i3c->regs, BIDLCDT, BIDLCDT_IDLCYC(val)); + + ret = i3c_master_get_free_addr(m, 0); + if (ret < 0) + return ret; + + renesas_writel(i3c->regs, MSDVAD, MSDVAD_MDYAD(ret) | MSDVAD_MDYADV); + + memset(&info, 0, sizeof(info)); + info.dyn_addr = ret; + return i3c_master_set_info(&i3c->base, &info); +} + +static void renesas_i3c_bus_cleanup(struct i3c_master_controller *m) +{ + struct renesas_i3c *i3c = to_renesas_i3c(m); + + renesas_i3c_reset(i3c); +} + +static int renesas_i3c_daa(struct i3c_master_controller *m) +{ + struct renesas_i3c *i3c = to_renesas_i3c(m); + struct renesas_i3c_cmd *cmd; + u32 olddevs, newdevs; + u8 last_addr = 0, pos; + int ret; + + struct renesas_i3c_xfer *xfer __free(kfree) = renesas_i3c_alloc_xfer(i3c, 1); + if (!xfer) + return -ENOMEM; + + /* Enable I3C bus. */ + renesas_i3c_bus_enable(m, true); + + olddevs = ~(i3c->free_pos); + i3c->internal_state = I3C_INTERNAL_STATE_CONTROLLER_ENTDAA; + + /* Setting DATBASn registers for target devices. */ + for (pos = 0; pos < i3c->maxdevs; pos++) { + if (olddevs & BIT(pos)) + continue; + + ret = i3c_master_get_free_addr(m, last_addr + 1); + if (ret < 0) + return -ENOSPC; + + i3c->addrs[pos] = ret; + last_addr = ret; + + renesas_writel(i3c->regs, DATBAS(pos), datbas_dvdyad_with_parity(ret)); + } + + init_completion(&xfer->comp); + cmd = xfer->cmds; + cmd->rx_count = 0; + + ret = renesas_i3c_get_free_pos(i3c); + if (ret < 0) + return ret; + + /* + * Setup the command descriptor to start the ENTDAA command + * and starting at the selected device index. + */ + cmd->cmd0 = NCMDQP_CMD_ATTR(NCMDQP_ADDR_ASSGN) | NCMDQP_ROC | + NCMDQP_TID(I3C_COMMAND_ADDRESS_ASSIGNMENT) | + NCMDQP_CMD(I3C_CCC_ENTDAA) | NCMDQP_DEV_INDEX(ret) | + NCMDQP_DEV_COUNT(i3c->maxdevs - ret) | NCMDQP_TOC; + + renesas_i3c_wait_xfer(i3c, xfer); + + newdevs = GENMASK(i3c->maxdevs - cmd->rx_count - 1, 0); + newdevs &= ~olddevs; + + for (pos = 0; pos < i3c->maxdevs; pos++) { + if (newdevs & BIT(pos)) + i3c_master_add_i3c_dev_locked(m, i3c->addrs[pos]); + } + + return ret < 0 ? ret : 0; +} + +static bool renesas_i3c_supports_ccc_cmd(struct i3c_master_controller *m, + const struct i3c_ccc_cmd *cmd) +{ + if (cmd->ndests > 1) + return false; + + switch (cmd->id) { + case I3C_CCC_ENEC(true): + case I3C_CCC_ENEC(false): + case I3C_CCC_DISEC(true): + case I3C_CCC_DISEC(false): + case I3C_CCC_ENTAS(0, true): + case I3C_CCC_ENTAS(1, true): + case I3C_CCC_ENTAS(2, true): + case I3C_CCC_ENTAS(3, true): + case I3C_CCC_ENTAS(0, false): + case I3C_CCC_ENTAS(1, false): + case I3C_CCC_ENTAS(2, false): + case I3C_CCC_ENTAS(3, false): + case I3C_CCC_RSTDAA(true): + case I3C_CCC_RSTDAA(false): + case I3C_CCC_ENTDAA: + case I3C_CCC_DEFSLVS: + case I3C_CCC_SETMWL(true): + case I3C_CCC_SETMWL(false): + case I3C_CCC_SETMRL(true): + case I3C_CCC_SETMRL(false): + case I3C_CCC_ENTTM: + case I3C_CCC_SETDASA: + case I3C_CCC_SETNEWDA: + case I3C_CCC_GETMWL: + case I3C_CCC_GETMRL: + case I3C_CCC_GETPID: + case I3C_CCC_GETBCR: + case I3C_CCC_GETDCR: + case I3C_CCC_GETSTATUS: + case I3C_CCC_GETACCMST: + case I3C_CCC_GETMXDS: + return true; + default: + return false; + } +} + +static int renesas_i3c_send_ccc_cmd(struct i3c_master_controller *m, + struct i3c_ccc_cmd *ccc) +{ + struct renesas_i3c *i3c = to_renesas_i3c(m); + struct renesas_i3c_xfer *xfer; + struct renesas_i3c_cmd *cmd; + int ret, pos = 0; + + if (ccc->id & I3C_CCC_DIRECT) { + pos = renesas_i3c_get_addr_pos(i3c, ccc->dests[0].addr); + if (pos < 0) + return pos; + } + + xfer = renesas_i3c_alloc_xfer(i3c, 1); + if (!xfer) + return -ENOMEM; + + renesas_i3c_bus_enable(m, true); + + init_completion(&xfer->comp); + cmd = xfer->cmds; + cmd->rnw = ccc->rnw; + cmd->cmd0 = 0; + + /* Calculate the command descriptor. */ + switch (ccc->id) { + case I3C_CCC_SETDASA: + renesas_writel(i3c->regs, DATBAS(pos), + DATBAS_DVSTAD(ccc->dests[0].addr) | + DATBAS_DVDYAD(*(u8 *)ccc->dests[0].payload.data >> 1)); + cmd->cmd0 = NCMDQP_CMD_ATTR(NCMDQP_ADDR_ASSGN) | NCMDQP_ROC | + NCMDQP_TID(I3C_COMMAND_ADDRESS_ASSIGNMENT) | + NCMDQP_CMD(I3C_CCC_SETDASA) | NCMDQP_DEV_INDEX(pos) | + NCMDQP_DEV_COUNT(0) | NCMDQP_TOC; + i3c->internal_state = I3C_INTERNAL_STATE_CONTROLLER_SETDASA; + break; + default: + /* Calculate the command descriptor. */ + cmd->cmd0 = NCMDQP_TID(I3C_COMMAND_WRITE) | NCMDQP_MODE(0) | + NCMDQP_RNW(ccc->rnw) | NCMDQP_CMD(ccc->id) | + NCMDQP_ROC | NCMDQP_TOC | NCMDQP_CP | + NCMDQP_DEV_INDEX(pos); + + if (ccc->rnw) { + cmd->rx_buf = ccc->dests[0].payload.data; + cmd->len = ccc->dests[0].payload.len; + cmd->rx_count = 0; + i3c->internal_state = I3C_INTERNAL_STATE_CONTROLLER_COMMAND_READ; + } else { + cmd->tx_buf = ccc->dests[0].payload.data; + cmd->len = ccc->dests[0].payload.len; + cmd->tx_count = 0; + i3c->internal_state = I3C_INTERNAL_STATE_CONTROLLER_COMMAND_WRITE; + } + } + + renesas_i3c_wait_xfer(i3c, xfer); + + ret = xfer->ret; + if (ret) + ccc->err = I3C_ERROR_M2; + + kfree(xfer); + + return ret; +} + +static int renesas_i3c_priv_xfers(struct i3c_dev_desc *dev, struct i3c_priv_xfer *i3c_xfers, + int i3c_nxfers) +{ + struct i3c_master_controller *m = i3c_dev_get_master(dev); + struct renesas_i3c *i3c = to_renesas_i3c(m); + struct renesas_i3c_i2c_dev_data *data = i3c_dev_get_master_data(dev); + struct renesas_i3c_xfer *xfer; + int i; + + /* Enable I3C bus. */ + renesas_i3c_bus_enable(m, true); + + xfer = renesas_i3c_alloc_xfer(i3c, 1); + if (!xfer) + return -ENOMEM; + + init_completion(&xfer->comp); + + for (i = 0; i < i3c_nxfers; i++) { + struct renesas_i3c_cmd *cmd = xfer->cmds; + + /* Calculate the Transfer Command Descriptor */ + cmd->rnw = i3c_xfers[i].rnw; + cmd->cmd0 = NCMDQP_DEV_INDEX(data->index) | NCMDQP_MODE(0) | + NCMDQP_RNW(cmd->rnw) | NCMDQP_ROC | NCMDQP_TOC; + + if (i3c_xfers[i].rnw) { + cmd->rx_count = 0; + cmd->cmd0 |= NCMDQP_TID(I3C_READ); + cmd->rx_buf = i3c_xfers[i].data.in; + cmd->len = i3c_xfers[i].len; + i3c->internal_state = I3C_INTERNAL_STATE_CONTROLLER_READ; + } else { + cmd->tx_count = 0; + cmd->cmd0 |= NCMDQP_TID(I3C_WRITE); + cmd->tx_buf = i3c_xfers[i].data.out; + cmd->len = i3c_xfers[i].len; + i3c->internal_state = I3C_INTERNAL_STATE_CONTROLLER_WRITE; + } + + if (!i3c_xfers[i].rnw && i3c_xfers[i].len > 4) { + i3c_writel_fifo(i3c->regs + NTDTBP0, cmd->tx_buf, cmd->len); + if (cmd->len > NTDTBP0_DEPTH * sizeof(u32)) + renesas_set_bit(i3c->regs, NTIE, NTIE_TDBEIE0); + } + + renesas_i3c_wait_xfer(i3c, xfer); + } + + return 0; +} + +static int renesas_i3c_attach_i3c_dev(struct i3c_dev_desc *dev) +{ + struct i3c_master_controller *m = i3c_dev_get_master(dev); + struct renesas_i3c *i3c = to_renesas_i3c(m); + struct renesas_i3c_i2c_dev_data *data; + int pos; + + pos = renesas_i3c_get_free_pos(i3c); + if (pos < 0) + return pos; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->index = pos; + i3c->addrs[pos] = dev->info.dyn_addr ? : dev->info.static_addr; + i3c->free_pos &= ~BIT(pos); + + renesas_writel(i3c->regs, DATBAS(pos), DATBAS_DVSTAD(dev->info.static_addr) | + datbas_dvdyad_with_parity(i3c->addrs[pos])); + i3c_dev_set_master_data(dev, data); + + return 0; +} + +static int renesas_i3c_reattach_i3c_dev(struct i3c_dev_desc *dev, + u8 old_dyn_addr) +{ + struct i3c_master_controller *m = i3c_dev_get_master(dev); + struct renesas_i3c *i3c = to_renesas_i3c(m); + struct renesas_i3c_i2c_dev_data *data = i3c_dev_get_master_data(dev); + + i3c->addrs[data->index] = dev->info.dyn_addr ? dev->info.dyn_addr : + dev->info.static_addr; + + return 0; +} + +static void renesas_i3c_detach_i3c_dev(struct i3c_dev_desc *dev) +{ + struct renesas_i3c_i2c_dev_data *data = i3c_dev_get_master_data(dev); + struct i3c_master_controller *m = i3c_dev_get_master(dev); + struct renesas_i3c *i3c = to_renesas_i3c(m); + + i3c_dev_set_master_data(dev, NULL); + i3c->addrs[data->index] = 0; + i3c->free_pos |= BIT(data->index); + kfree(data); +} + +static int renesas_i3c_i2c_xfers(struct i2c_dev_desc *dev, + struct i2c_msg *i2c_xfers, + int i2c_nxfers) +{ + struct i3c_master_controller *m = i2c_dev_get_master(dev); + struct renesas_i3c *i3c = to_renesas_i3c(m); + struct renesas_i3c_cmd *cmd; + u8 start_bit = CNDCTL_STCND; + int i; + + struct renesas_i3c_xfer *xfer __free(kfree) = renesas_i3c_alloc_xfer(i3c, 1); + if (!xfer) + return -ENOMEM; + + if (!i2c_nxfers) + return 0; + + renesas_i3c_bus_enable(m, false); + + init_completion(&xfer->comp); + xfer->is_i2c_xfer = true; + cmd = xfer->cmds; + + if (!(renesas_readl(i3c->regs, BCST) & BCST_BFREF)) { + cmd->err = -EBUSY; + return cmd->err; + } + + renesas_writel(i3c->regs, BST, 0); + + renesas_i3c_enqueue_xfer(i3c, xfer); + + for (i = 0; i < i2c_nxfers; i++) { + cmd->i2c_bytes_left = I2C_INIT_MSG; + cmd->i2c_buf = i2c_xfers[i].buf; + cmd->msg = &i2c_xfers[i]; + cmd->i2c_is_last = (i == i2c_nxfers - 1); + + renesas_set_bit(i3c->regs, BIE, BIE_NACKDIE); + renesas_set_bit(i3c->regs, NTIE, NTIE_TDBEIE0); + renesas_set_bit(i3c->regs, BIE, BIE_STCNDDIE); + + /* Issue Start condition */ + renesas_set_bit(i3c->regs, CNDCTL, start_bit); + + renesas_set_bit(i3c->regs, NTSTE, NTSTE_TDBEE0); + + wait_for_completion_timeout(&xfer->comp, m->i2c.timeout); + + if (cmd->err) + break; + + start_bit = CNDCTL_SRCND; + } + + renesas_i3c_dequeue_xfer(i3c, xfer); + return cmd->err; +} + +static int renesas_i3c_attach_i2c_dev(struct i2c_dev_desc *dev) +{ + struct i3c_master_controller *m = i2c_dev_get_master(dev); + struct renesas_i3c *i3c = to_renesas_i3c(m); + struct renesas_i3c_i2c_dev_data *data; + int pos; + + pos = renesas_i3c_get_free_pos(i3c); + if (pos < 0) + return pos; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->index = pos; + i3c->addrs[pos] = dev->addr; + i3c->free_pos &= ~BIT(pos); + i2c_dev_set_master_data(dev, data); + + return 0; +} + +static void renesas_i3c_detach_i2c_dev(struct i2c_dev_desc *dev) +{ + struct renesas_i3c_i2c_dev_data *data = i2c_dev_get_master_data(dev); + struct i3c_master_controller *m = i2c_dev_get_master(dev); + struct renesas_i3c *i3c = to_renesas_i3c(m); + + i2c_dev_set_master_data(dev, NULL); + i3c->addrs[data->index] = 0; + i3c->free_pos |= BIT(data->index); + kfree(data); +} + +static irqreturn_t renesas_i3c_tx_isr(int irq, void *data) +{ + struct renesas_i3c *i3c = data; + struct renesas_i3c_xfer *xfer; + struct renesas_i3c_cmd *cmd; + u8 val; + + scoped_guard(spinlock, &i3c->xferqueue.lock) { + xfer = i3c->xferqueue.cur; + cmd = xfer->cmds; + + if (xfer->is_i2c_xfer) { + if (!cmd->i2c_bytes_left) + return IRQ_NONE; + + if (cmd->i2c_bytes_left != I2C_INIT_MSG) { + val = *cmd->i2c_buf; + cmd->i2c_buf++; + cmd->i2c_bytes_left--; + renesas_writel(i3c->regs, NTDTBP0, val); + } + + if (cmd->i2c_bytes_left == 0) { + renesas_clear_bit(i3c->regs, NTIE, NTIE_TDBEIE0); + renesas_set_bit(i3c->regs, BIE, BIE_TENDIE); + } + + /* Clear the Transmit Buffer Empty status flag. */ + renesas_clear_bit(i3c->regs, NTST, NTST_TDBEF0); + } else { + i3c_writel_fifo(i3c->regs + NTDTBP0, cmd->tx_buf, cmd->len); + } + } + + return IRQ_HANDLED; +} + +static irqreturn_t renesas_i3c_resp_isr(int irq, void *data) +{ + struct renesas_i3c *i3c = data; + struct renesas_i3c_xfer *xfer; + struct renesas_i3c_cmd *cmd; + u32 resp_descriptor = renesas_readl(i3c->regs, NRSPQP); + u32 bytes_remaining = 0; + u32 ntst, data_len; + int ret = 0; + + scoped_guard(spinlock, &i3c->xferqueue.lock) { + xfer = i3c->xferqueue.cur; + cmd = xfer->cmds; + + /* Clear the Respone Queue Full status flag*/ + renesas_clear_bit(i3c->regs, NTST, NTST_RSPQFF); + + data_len = NRSPQP_DATA_LEN(resp_descriptor); + + switch (i3c->internal_state) { + case I3C_INTERNAL_STATE_CONTROLLER_ENTDAA: + cmd->rx_count = data_len; + break; + case I3C_INTERNAL_STATE_CONTROLLER_WRITE: + case I3C_INTERNAL_STATE_CONTROLLER_COMMAND_WRITE: + /* Disable the transmit IRQ if it hasn't been disabled already. */ + renesas_clear_bit(i3c->regs, NTIE, NTIE_TDBEIE0); + break; + case I3C_INTERNAL_STATE_CONTROLLER_READ: + case I3C_INTERNAL_STATE_CONTROLLER_COMMAND_READ: + if (NDBSTLV0_RDBLV(renesas_readl(i3c->regs, NDBSTLV0)) && !cmd->err) + bytes_remaining = data_len - cmd->rx_count; + + i3c_readl_fifo(i3c->regs + NTDTBP0, cmd->rx_buf, bytes_remaining); + renesas_clear_bit(i3c->regs, NTIE, NTIE_RDBFIE0); + break; + default: + break; + } + + switch (NRSPQP_ERR_STATUS(resp_descriptor)) { + case NRSPQP_NO_ERROR: + break; + case NRSPQP_ERROR_PARITY: + case NRSPQP_ERROR_IBA_NACK: + case NRSPQP_ERROR_TRANSF_ABORT: + case NRSPQP_ERROR_CRC: + case NRSPQP_ERROR_FRAME: + ret = -EIO; + break; + case NRSPQP_ERROR_OVER_UNDER_FLOW: + ret = -ENOSPC; + break; + case NRSPQP_ERROR_UNSUPPORTED: + ret = -EOPNOTSUPP; + break; + case NRSPQP_ERROR_I2C_W_NACK_ERR: + case NRSPQP_ERROR_ADDRESS_NACK: + default: + ret = -EINVAL; + break; + } + + /* + * If the transfer was aborted, then the abort flag must be cleared + * before notifying the application that a transfer has completed. + */ + ntst = renesas_readl(i3c->regs, NTST); + if (ntst & NTST_TABTF) + renesas_clear_bit(i3c->regs, BCTL, BCTL_ABT); + + /* Clear error status flags. */ + renesas_clear_bit(i3c->regs, NTST, NTST_TEF | NTST_TABTF); + + xfer->ret = ret; + complete(&xfer->comp); + + xfer = list_first_entry_or_null(&i3c->xferqueue.list, + struct renesas_i3c_xfer, node); + if (xfer) + list_del_init(&xfer->node); + + i3c->xferqueue.cur = xfer; + } + + return IRQ_HANDLED; +} + +static irqreturn_t renesas_i3c_tend_isr(int irq, void *data) +{ + struct renesas_i3c *i3c = data; + struct renesas_i3c_xfer *xfer; + struct renesas_i3c_cmd *cmd; + + scoped_guard(spinlock, &i3c->xferqueue.lock) { + xfer = i3c->xferqueue.cur; + cmd = xfer->cmds; + + if (xfer->is_i2c_xfer) { + if (renesas_readl(i3c->regs, BST) & BST_NACKDF) { + /* We got a NACKIE */ + renesas_readl(i3c->regs, NTDTBP0); /* dummy read */ + renesas_clear_bit(i3c->regs, BST, BST_NACKDF); + cmd->err = -ENXIO; + } else if (cmd->i2c_bytes_left) { + renesas_set_bit(i3c->regs, NTIE, NTIE_TDBEIE0); + return IRQ_NONE; + } + + if (cmd->i2c_is_last || cmd->err) { + renesas_clear_bit(i3c->regs, BIE, BIE_TENDIE); + renesas_set_bit(i3c->regs, BIE, BIE_SPCNDDIE); + renesas_set_bit(i3c->regs, CNDCTL, CNDCTL_SPCND); + } else { + /* Transfer is complete, but do not send STOP */ + renesas_clear_bit(i3c->regs, NTSTE, NTSTE_TDBEE0); + renesas_clear_bit(i3c->regs, BIE, BIE_TENDIE); + xfer->ret = 0; + complete(&xfer->comp); + } + } + + /* Clear the Transmit Buffer Empty status flag. */ + renesas_clear_bit(i3c->regs, BST, BST_TENDF); + } + + return IRQ_HANDLED; +} + +static irqreturn_t renesas_i3c_rx_isr(int irq, void *data) +{ + struct renesas_i3c *i3c = data; + struct renesas_i3c_xfer *xfer; + struct renesas_i3c_cmd *cmd; + int read_bytes; + + /* If resp_isr already read the data and updated 'xfer', we can just leave */ + if (!(renesas_readl(i3c->regs, NTIE) & NTIE_RDBFIE0)) + return IRQ_NONE; + + scoped_guard(spinlock, &i3c->xferqueue.lock) { + xfer = i3c->xferqueue.cur; + cmd = xfer->cmds; + + if (xfer->is_i2c_xfer) { + if (!cmd->i2c_bytes_left) + return IRQ_NONE; + + if (cmd->i2c_bytes_left == I2C_INIT_MSG) { + cmd->i2c_bytes_left = cmd->msg->len; + renesas_set_bit(i3c->regs, SCSTRCTL, SCSTRCTL_RWE); + renesas_readl(i3c->regs, NTDTBP0); /* dummy read */ + if (cmd->i2c_bytes_left == 1) + renesas_writel(i3c->regs, ACKCTL, ACKCTL_ACKT | ACKCTL_ACKTWP); + return IRQ_HANDLED; + } + + if (cmd->i2c_bytes_left == 1) { + /* STOP must come before we set ACKCTL! */ + if (cmd->i2c_is_last) { + renesas_set_bit(i3c->regs, BIE, BIE_SPCNDDIE); + renesas_clear_bit(i3c->regs, BST, BST_SPCNDDF); + renesas_set_bit(i3c->regs, CNDCTL, CNDCTL_SPCND); + } + renesas_writel(i3c->regs, ACKCTL, ACKCTL_ACKT | ACKCTL_ACKTWP); + } else { + renesas_writel(i3c->regs, ACKCTL, ACKCTL_ACKTWP); + } + + /* Reading acks the RIE interrupt */ + *cmd->i2c_buf = renesas_readl(i3c->regs, NTDTBP0); + cmd->i2c_buf++; + cmd->i2c_bytes_left--; + } else { + read_bytes = NDBSTLV0_RDBLV(renesas_readl(i3c->regs, NDBSTLV0)) * sizeof(u32); + i3c_readl_fifo(i3c->regs + NTDTBP0, cmd->rx_buf, read_bytes); + cmd->rx_count = read_bytes; + } + + /* Clear the Read Buffer Full status flag. */ + renesas_clear_bit(i3c->regs, NTST, NTST_RDBFF0); + } + + return IRQ_HANDLED; +} + +static irqreturn_t renesas_i3c_stop_isr(int irq, void *data) +{ + struct renesas_i3c *i3c = data; + struct renesas_i3c_xfer *xfer; + + scoped_guard(spinlock, &i3c->xferqueue.lock) { + xfer = i3c->xferqueue.cur; + + /* read back registers to confirm writes have fully propagated */ + renesas_writel(i3c->regs, BST, 0); + renesas_readl(i3c->regs, BST); + renesas_writel(i3c->regs, BIE, 0); + renesas_clear_bit(i3c->regs, NTST, NTST_TDBEF0 | NTST_RDBFF0); + renesas_clear_bit(i3c->regs, SCSTRCTL, SCSTRCTL_RWE); + + xfer->ret = 0; + complete(&xfer->comp); + } + + return IRQ_HANDLED; +} + +static irqreturn_t renesas_i3c_start_isr(int irq, void *data) +{ + struct renesas_i3c *i3c = data; + struct renesas_i3c_xfer *xfer; + struct renesas_i3c_cmd *cmd; + u8 val; + + scoped_guard(spinlock, &i3c->xferqueue.lock) { + xfer = i3c->xferqueue.cur; + cmd = xfer->cmds; + + if (xfer->is_i2c_xfer) { + if (!cmd->i2c_bytes_left) + return IRQ_NONE; + + if (cmd->i2c_bytes_left == I2C_INIT_MSG) { + if (cmd->msg->flags & I2C_M_RD) { + /* On read, switch over to receive interrupt */ + renesas_clear_bit(i3c->regs, NTIE, NTIE_TDBEIE0); + renesas_set_bit(i3c->regs, NTIE, NTIE_RDBFIE0); + } else { + /* On write, initialize length */ + cmd->i2c_bytes_left = cmd->msg->len; + } + + val = i2c_8bit_addr_from_msg(cmd->msg); + renesas_writel(i3c->regs, NTDTBP0, val); + } + } + + renesas_clear_bit(i3c->regs, BIE, BIE_STCNDDIE); + renesas_clear_bit(i3c->regs, BST, BST_STCNDDF); + } + + return IRQ_HANDLED; +} + +static const struct i3c_master_controller_ops renesas_i3c_ops = { + .bus_init = renesas_i3c_bus_init, + .bus_cleanup = renesas_i3c_bus_cleanup, + .attach_i3c_dev = renesas_i3c_attach_i3c_dev, + .reattach_i3c_dev = renesas_i3c_reattach_i3c_dev, + .detach_i3c_dev = renesas_i3c_detach_i3c_dev, + .do_daa = renesas_i3c_daa, + .supports_ccc_cmd = renesas_i3c_supports_ccc_cmd, + .send_ccc_cmd = renesas_i3c_send_ccc_cmd, + .priv_xfers = renesas_i3c_priv_xfers, + .attach_i2c_dev = renesas_i3c_attach_i2c_dev, + .detach_i2c_dev = renesas_i3c_detach_i2c_dev, + .i2c_xfers = renesas_i3c_i2c_xfers, +}; + +static const struct renesas_i3c_irq_desc renesas_i3c_irqs[] = { + { .name = "resp", .isr = renesas_i3c_resp_isr, .desc = "i3c-resp" }, + { .name = "rx", .isr = renesas_i3c_rx_isr, .desc = "i3c-rx" }, + { .name = "tx", .isr = renesas_i3c_tx_isr, .desc = "i3c-tx" }, + { .name = "st", .isr = renesas_i3c_start_isr, .desc = "i3c-start" }, + { .name = "sp", .isr = renesas_i3c_stop_isr, .desc = "i3c-stop" }, + { .name = "tend", .isr = renesas_i3c_tend_isr, .desc = "i3c-tend" }, + { .name = "nack", .isr = renesas_i3c_tend_isr, .desc = "i3c-nack" }, +}; + +static int renesas_i3c_probe(struct platform_device *pdev) +{ + struct renesas_i3c *i3c; + struct reset_control *reset; + struct clk *clk; + const struct renesas_i3c_config *config = of_device_get_match_data(&pdev->dev); + int ret, i; + + if (!config) + return -ENODATA; + + i3c = devm_kzalloc(&pdev->dev, sizeof(*i3c), GFP_KERNEL); + if (!i3c) + return -ENOMEM; + + i3c->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(i3c->regs)) + return PTR_ERR(i3c->regs); + + clk = devm_clk_get_enabled(&pdev->dev, "pclk"); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + if (config->has_pclkrw) { + clk = devm_clk_get_enabled(&pdev->dev, "pclkrw"); + if (IS_ERR(clk)) + return PTR_ERR(clk); + } + + i3c->tclk = devm_clk_get_enabled(&pdev->dev, "tclk"); + if (IS_ERR(i3c->tclk)) + return PTR_ERR(i3c->tclk); + + reset = devm_reset_control_get_optional_exclusive_deasserted(&pdev->dev, "tresetn"); + if (IS_ERR(reset)) + return dev_err_probe(&pdev->dev, PTR_ERR(reset), + "Error: missing tresetn ctrl\n"); + + reset = devm_reset_control_get_optional_exclusive_deasserted(&pdev->dev, "presetn"); + if (IS_ERR(reset)) + return dev_err_probe(&pdev->dev, PTR_ERR(reset), + "Error: missing presetn ctrl\n"); + + spin_lock_init(&i3c->xferqueue.lock); + INIT_LIST_HEAD(&i3c->xferqueue.list); + + ret = renesas_i3c_reset(i3c); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(renesas_i3c_irqs); i++) { + ret = platform_get_irq_byname(pdev, renesas_i3c_irqs[i].name); + if (ret < 0) + return ret; + + ret = devm_request_irq(&pdev->dev, ret, renesas_i3c_irqs[i].isr, + 0, renesas_i3c_irqs[i].desc, i3c); + if (ret) + return ret; + } + + platform_set_drvdata(pdev, i3c); + + i3c->maxdevs = RENESAS_I3C_MAX_DEVS; + i3c->free_pos = GENMASK(i3c->maxdevs - 1, 0); + + return i3c_master_register(&i3c->base, &pdev->dev, &renesas_i3c_ops, false); +} + +static void renesas_i3c_remove(struct platform_device *pdev) +{ + struct renesas_i3c *i3c = platform_get_drvdata(pdev); + + i3c_master_unregister(&i3c->base); +} + +static const struct renesas_i3c_config empty_i3c_config = { +}; + +static const struct renesas_i3c_config r9a09g047_i3c_config = { + .has_pclkrw = 1, +}; + +static const struct of_device_id renesas_i3c_of_ids[] = { + { .compatible = "renesas,r9a08g045-i3c", .data = &empty_i3c_config }, + { .compatible = "renesas,r9a09g047-i3c", .data = &r9a09g047_i3c_config }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, renesas_i3c_of_ids); + +static struct platform_driver renesas_i3c = { + .probe = renesas_i3c_probe, + .remove = renesas_i3c_remove, + .driver = { + .name = "renesas-i3c", + .of_match_table = renesas_i3c_of_ids, + }, +}; +module_platform_driver(renesas_i3c); + +MODULE_AUTHOR("Wolfram Sang "); +MODULE_AUTHOR("Renesas BSP teams"); +MODULE_DESCRIPTION("Renesas I3C controller driver"); +MODULE_LICENSE("GPL"); From bc4a09d8e79cadccdd505f47b01903a80bc666e7 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Wed, 30 Jul 2025 08:37:19 +0800 Subject: [PATCH 1250/2411] i3c: master: svc: Fix npcm845 FIFO_EMPTY quirk In a private write transfer, the driver pre-fills the FIFO to work around the FIFO_EMPTY quirk. However, if an IBIWON event occurs, the hardware emits a NACK and the driver initiates a retry. During the retry, driver attempts to pre-fill the FIFO again if there is remaining data, but since the FIFO is already full, this leads to data loss. Check available space in FIFO to prevent overflow. Fixes: 4008a74e0f9b ("i3c: master: svc: Fix npcm845 FIFO empty issue") Signed-off-by: Stanley Chu Link: https://lore.kernel.org/r/20250730003719.1825593-1-yschu@nuvoton.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/svc-i3c-master.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 6d0eea80ea34..b2b5db3ed5bb 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -104,6 +104,7 @@ #define SVC_I3C_MDATACTRL_TXTRIG_FIFO_NOT_FULL GENMASK(5, 4) #define SVC_I3C_MDATACTRL_RXTRIG_FIFO_NOT_EMPTY 0 #define SVC_I3C_MDATACTRL_RXCOUNT(x) FIELD_GET(GENMASK(28, 24), (x)) +#define SVC_I3C_MDATACTRL_TXCOUNT(x) FIELD_GET(GENMASK(20, 16), (x)) #define SVC_I3C_MDATACTRL_TXFULL BIT(30) #define SVC_I3C_MDATACTRL_RXEMPTY BIT(31) @@ -1304,14 +1305,19 @@ static int svc_i3c_master_xfer(struct svc_i3c_master *master, * FIFO start filling as soon as possible after EmitStartAddr. */ if (svc_has_quirk(master, SVC_I3C_QUIRK_FIFO_EMPTY) && !rnw && xfer_len) { - u32 end = xfer_len > SVC_I3C_FIFO_SIZE ? 0 : SVC_I3C_MWDATAB_END; - u32 len = min_t(u32, xfer_len, SVC_I3C_FIFO_SIZE); + u32 space, end, len; - writesb(master->regs + SVC_I3C_MWDATAB1, out, len - 1); - /* Mark END bit if this is the last byte */ - writel(out[len - 1] | end, master->regs + SVC_I3C_MWDATAB); - xfer_len -= len; - out += len; + reg = readl(master->regs + SVC_I3C_MDATACTRL); + space = SVC_I3C_FIFO_SIZE - SVC_I3C_MDATACTRL_TXCOUNT(reg); + if (space) { + end = xfer_len > space ? 0 : SVC_I3C_MWDATAB_END; + len = min_t(u32, xfer_len, space); + writesb(master->regs + SVC_I3C_MWDATAB1, out, len - 1); + /* Mark END bit if this is the last byte */ + writel(out[len - 1] | end, master->regs + SVC_I3C_MWDATAB); + xfer_len -= len; + out += len; + } } ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg, From 0c2ce4fba48c3d3f5a2e7c8d1f9bb176969e5268 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 4 Jul 2025 10:54:16 +0300 Subject: [PATCH 1251/2411] i3c: master: svc: Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-reduntant explicit call to pm_runtime_mark_last_busy(). Signed-off-by: Sakari Ailus Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250704075416.3218647-1-sakari.ailus@linux.intel.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/svc-i3c-master.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index b2b5db3ed5bb..701ae165b25b 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -665,7 +665,6 @@ static int svc_i3c_master_set_speed(struct i3c_master_controller *m, } rpm_out: - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return ret; @@ -780,7 +779,6 @@ static int svc_i3c_master_bus_init(struct i3c_master_controller *m) goto rpm_out; rpm_out: - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return ret; @@ -802,7 +800,6 @@ static void svc_i3c_master_bus_cleanup(struct i3c_master_controller *m) /* Disable master */ writel(0, master->regs + SVC_I3C_MCONFIG); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); } @@ -1208,7 +1205,6 @@ static int svc_i3c_master_do_daa(struct i3c_master_controller *m) dev_err(master->dev, "Cannot handle such a list of devices"); rpm_out: - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return ret; @@ -1517,7 +1513,6 @@ static void svc_i3c_master_enqueue_xfer(struct svc_i3c_master *master, } spin_unlock_irqrestore(&master->xferqueue.lock, flags); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); } @@ -1807,7 +1802,6 @@ static int svc_i3c_master_disable_ibi(struct i3c_dev_desc *dev) ret = i3c_master_disec_locked(m, dev->info.dyn_addr, I3C_CCC_EVENT_SIR); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return ret; @@ -1840,7 +1834,6 @@ static int svc_i3c_master_disable_hotjoin(struct i3c_master_controller *m) if (!master->enabled_events) svc_i3c_master_disable_interrupts(master); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return 0; @@ -1960,7 +1953,6 @@ static int svc_i3c_master_probe(struct platform_device *pdev) if (ret) goto rpm_disable; - pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_put_autosuspend(&pdev->dev); return 0; From 5fa62d4ec49a26c5ce747d6b0c205d6b30396bbc Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 4 Jul 2025 10:54:17 +0300 Subject: [PATCH 1252/2411] i3c: dw: Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-reduntant explicit call to pm_runtime_mark_last_busy(). Signed-off-by: Sakari Ailus Link: https://lore.kernel.org/r/20250704075417.3218742-1-sakari.ailus@linux.intel.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/dw-i3c-master.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index e61be28cd1e3..974122b2d20e 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -682,7 +682,6 @@ static int dw_i3c_master_bus_init(struct i3c_master_controller *m) dw_i3c_master_enable(master); rpm_out: - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return ret; } @@ -812,7 +811,6 @@ static int dw_i3c_master_send_ccc_cmd(struct i3c_master_controller *m, else ret = dw_i3c_ccc_set(master, ccc); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return ret; } @@ -895,7 +893,6 @@ static int dw_i3c_master_daa(struct i3c_master_controller *m) dw_i3c_master_free_xfer(xfer); rpm_out: - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return ret; } @@ -981,7 +978,6 @@ static int dw_i3c_master_priv_xfers(struct i3c_dev_desc *dev, ret = xfer->ret; dw_i3c_master_free_xfer(xfer); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return ret; } @@ -1131,7 +1127,6 @@ static int dw_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, ret = xfer->ret; dw_i3c_master_free_xfer(xfer); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return ret; } @@ -1299,7 +1294,6 @@ static int dw_i3c_master_disable_hotjoin(struct i3c_master_controller *m) writel(readl(master->regs + DEVICE_CTRL) | DEV_CTRL_HOT_JOIN_NACK, master->regs + DEVICE_CTRL); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return 0; } @@ -1325,7 +1319,6 @@ static int dw_i3c_master_enable_ibi(struct i3c_dev_desc *dev) if (rc) { dw_i3c_master_set_sir_enabled(master, dev, data->index, false); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); } @@ -1345,7 +1338,6 @@ static int dw_i3c_master_disable_ibi(struct i3c_dev_desc *dev) dw_i3c_master_set_sir_enabled(master, dev, data->index, false); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return 0; } From 3b661ca549b9e5bb11d0bc97ada6110aac3282d2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 17 Jul 2025 14:00:47 +0200 Subject: [PATCH 1253/2411] i3c: add missing include to internal header LKP found a random config which failed to build because IO accessors were not defined: In file included from drivers/i3c/master.c:21: drivers/i3c/internals.h: In function 'i3c_writel_fifo': >> drivers/i3c/internals.h:35:9: error: implicit declaration of function 'writesl' [-Werror=implicit-function-declaration] Add the proper header to where the IO accessors are used. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507150208.BZDzzJ5E-lkp@intel.com/ Signed-off-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250717120046.9022-2-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/internals.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i3c/internals.h b/drivers/i3c/internals.h index 6a11437fee47..0d857cc68cc5 100644 --- a/drivers/i3c/internals.h +++ b/drivers/i3c/internals.h @@ -9,6 +9,7 @@ #define I3C_INTERNALS_H #include +#include void i3c_bus_normaluse_lock(struct i3c_bus *bus); void i3c_bus_normaluse_unlock(struct i3c_bus *bus); From 9063de636cee235bd736ab3e4895e2826e606dea Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Fri, 25 Jul 2025 12:33:04 +0200 Subject: [PATCH 1254/2411] kcm: Fix splice support Flags passed in for splice() syscall should not end up in skb_recv_datagram(). As SPLICE_F_NONBLOCK == MSG_PEEK, kernel gets confused: skb isn't unlinked from a receive queue, while strp_msg::offset and strp_msg::full_len are updated. Unbreak the logic a bit more by mapping both O_NONBLOCK and SPLICE_F_NONBLOCK to MSG_DONTWAIT. This way we align with man splice(2) in regard to errno EAGAIN: SPLICE_F_NONBLOCK was specified in flags or one of the file descriptors had been marked as nonblocking (O_NONBLOCK), and the operation would block. Fixes: 5121197ecc5d ("kcm: close race conditions on sk_receive_queue") Fixes: 91687355b927 ("kcm: Splice support") Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20250725-kcm-splice-v1-1-9a725ad2ee71@rbox.co Signed-off-by: Jakub Kicinski --- net/kcm/kcmsock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index a0be3896a934..a4971e6fa943 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1029,6 +1030,11 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, ssize_t copied; struct sk_buff *skb; + if (sock->file->f_flags & O_NONBLOCK || flags & SPLICE_F_NONBLOCK) + flags = MSG_DONTWAIT; + else + flags = 0; + /* Only support splice for SOCKSEQPACKET */ skb = skb_recv_datagram(sk, flags, &err); From 3fa840230f534385b34a4f39c8dd313fbe723f05 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 25 Jul 2025 19:12:09 +0200 Subject: [PATCH 1255/2411] net: dpaa: fix device leak when querying time stamp info Make sure to drop the reference to the ptp device taken by of_find_device_by_node() when querying the time stamping capabilities. Note that holding a reference to the ptp device does not prevent its driver data from going away. Fixes: 17ae0b0ee9db ("dpaa_eth: add the get_ts_info interface for ethtool") Cc: stable@vger.kernel.org # 4.19 Cc: Yangbo Lu Signed-off-by: Johan Hovold Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250725171213.880-2-johan@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c index 0c588e03b15e..d09e456f14c0 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -371,8 +371,10 @@ static int dpaa_get_ts_info(struct net_device *net_dev, of_node_put(ptp_node); } - if (ptp_dev) + if (ptp_dev) { ptp = platform_get_drvdata(ptp_dev); + put_device(&ptp_dev->dev); + } if (ptp) info->phc_index = ptp->phc_index; From 70458f8a6b44daf3ad39f0d9b6d1097c8a7780ed Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 25 Jul 2025 19:12:10 +0200 Subject: [PATCH 1256/2411] net: enetc: fix device and OF node leak at probe Make sure to drop the references to the IERB OF node and platform device taken by of_parse_phandle() and of_find_device_by_node() during probe. Fixes: e7d48e5fbf30 ("net: enetc: add a mini driver for the Integrated Endpoint Register Block") Cc: stable@vger.kernel.org # 5.13 Cc: Vladimir Oltean Signed-off-by: Johan Hovold Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250725171213.880-3-johan@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index f63a29e2e031..de0fb272c847 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -829,19 +829,29 @@ static int enetc_pf_register_with_ierb(struct pci_dev *pdev) { struct platform_device *ierb_pdev; struct device_node *ierb_node; + int ret; ierb_node = of_find_compatible_node(NULL, NULL, "fsl,ls1028a-enetc-ierb"); - if (!ierb_node || !of_device_is_available(ierb_node)) + if (!ierb_node) return -ENODEV; + if (!of_device_is_available(ierb_node)) { + of_node_put(ierb_node); + return -ENODEV; + } + ierb_pdev = of_find_device_by_node(ierb_node); of_node_put(ierb_node); if (!ierb_pdev) return -EPROBE_DEFER; - return enetc_ierb_register_pf(ierb_pdev, pdev); + ret = enetc_ierb_register_pf(ierb_pdev, pdev); + + put_device(&ierb_pdev->dev); + + return ret; } static const struct enetc_si_ops enetc_psi_ops = { From da717540acd34e5056e3fa35791d50f6b3303f55 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 25 Jul 2025 19:12:11 +0200 Subject: [PATCH 1257/2411] net: gianfar: fix device leak when querying time stamp info Make sure to drop the reference to the ptp device taken by of_find_device_by_node() when querying the time stamping capabilities. Note that holding a reference to the ptp device does not prevent its driver data from going away. Fixes: 7349a74ea75c ("net: ethernet: gianfar_ethtool: get phc index through drvdata") Cc: stable@vger.kernel.org # 4.18 Cc: Yangbo Lu Signed-off-by: Johan Hovold Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250725171213.880-4-johan@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/gianfar_ethtool.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index 28f53cf2a174..5fd1f7327680 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -1475,8 +1475,10 @@ static int gfar_get_ts_info(struct net_device *dev, if (ptp_node) { ptp_dev = of_find_device_by_node(ptp_node); of_node_put(ptp_node); - if (ptp_dev) + if (ptp_dev) { ptp = platform_get_drvdata(ptp_dev); + put_device(&ptp_dev->dev); + } } if (ptp) From 3e13274ca8750823e8b68181bdf185d238febe0d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 25 Jul 2025 19:12:12 +0200 Subject: [PATCH 1258/2411] net: mtk_eth_soc: fix device leak at probe The reference count to the WED devices has already been incremented when looking them up using of_find_device_by_node() so drop the bogus additional reference taken during probe. Fixes: 804775dfc288 ("net: ethernet: mtk_eth_soc: add support for Wireless Ethernet Dispatch (WED)") Cc: stable@vger.kernel.org # 5.19 Cc: Felix Fietkau Signed-off-by: Johan Hovold Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250725171213.880-5-johan@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_wed.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 73c26fcfd85e..0a80d8f8cff7 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -2782,7 +2782,6 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth, if (!pdev) goto err_of_node_put; - get_device(&pdev->dev); irq = platform_get_irq(pdev, 0); if (irq < 0) goto err_put_device; From e05c54974a05ab19658433545d6ced88d9075cf0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 25 Jul 2025 19:12:13 +0200 Subject: [PATCH 1259/2411] net: ti: icss-iep: fix device and OF node leaks at probe Make sure to drop the references to the IEP OF node and device taken by of_parse_phandle() and of_find_device_by_node() when looking up IEP devices during probe. Drop the bogus additional reference taken on successful lookup so that the device is released correctly by icss_iep_put(). Fixes: c1e0230eeaab ("net: ti: icss-iep: Add IEP driver") Cc: stable@vger.kernel.org # 6.6 Cc: Roger Quadros Signed-off-by: Johan Hovold Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250725171213.880-6-johan@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icss_iep.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c index 2a1c43316f46..50bfbc2779e4 100644 --- a/drivers/net/ethernet/ti/icssg/icss_iep.c +++ b/drivers/net/ethernet/ti/icssg/icss_iep.c @@ -685,11 +685,17 @@ struct icss_iep *icss_iep_get_idx(struct device_node *np, int idx) struct platform_device *pdev; struct device_node *iep_np; struct icss_iep *iep; + int ret; iep_np = of_parse_phandle(np, "ti,iep", idx); - if (!iep_np || !of_device_is_available(iep_np)) + if (!iep_np) return ERR_PTR(-ENODEV); + if (!of_device_is_available(iep_np)) { + of_node_put(iep_np); + return ERR_PTR(-ENODEV); + } + pdev = of_find_device_by_node(iep_np); of_node_put(iep_np); @@ -698,21 +704,28 @@ struct icss_iep *icss_iep_get_idx(struct device_node *np, int idx) return ERR_PTR(-EPROBE_DEFER); iep = platform_get_drvdata(pdev); - if (!iep) - return ERR_PTR(-EPROBE_DEFER); + if (!iep) { + ret = -EPROBE_DEFER; + goto err_put_pdev; + } device_lock(iep->dev); if (iep->client_np) { device_unlock(iep->dev); dev_err(iep->dev, "IEP is already acquired by %s", iep->client_np->name); - return ERR_PTR(-EBUSY); + ret = -EBUSY; + goto err_put_pdev; } iep->client_np = np; device_unlock(iep->dev); - get_device(iep->dev); return iep; + +err_put_pdev: + put_device(&pdev->dev); + + return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(icss_iep_get_idx); From 2da4def0f487f24bbb0cece3bb2bcdcb918a0b72 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 25 Jul 2025 18:08:46 -0700 Subject: [PATCH 1260/2411] netpoll: prevent hanging NAPI when netcons gets enabled Paolo spotted hangs in NIPA running driver tests against virtio. The tests hang in virtnet_close() -> virtnet_napi_tx_disable(). The problem is only reproducible if running multiple of our tests in sequence (I used TEST_PROGS="xdp.py ping.py netcons_basic.sh \ netpoll_basic.py stats.py"). Initial suspicion was that this is a simple case of double-disable of NAPI, but instrumenting the code reveals: Deadlocked on NAPI ffff888007cd82c0 (virtnet_poll_tx): state: 0x37, disabled: false, owner: 0, listed: false, weight: 64 The NAPI was not in fact disabled, owner is 0 (rather than -1), so the NAPI "thinks" it's scheduled for CPU 0 but it's not listed (!list_empty(&n->poll_list) => false). It seems odd that normal NAPI processing would wedge itself like this. Better suspicion is that netpoll gets enabled while NAPI is polling, and also grabs the NAPI instance. This confuses napi_complete_done(): [netpoll] [normal NAPI] napi_poll() have = netpoll_poll_lock() rcu_access_pointer(dev->npinfo) return NULL # no netpoll __napi_poll() ->poll(->weight) poll_napi() cmpxchg(->poll_owner, -1, cpu) poll_one_napi() set_bit(NAPI_STATE_NPSVC, ->state) napi_complete_done() if (NAPIF_STATE_NPSVC) return false # exit without clearing SCHED This feels very unlikely, but perhaps virtio has some interactions with the hypervisor in the NAPI ->poll that makes the race window larger? Best I could to to prove the theory was to add and trigger this warning in napi_poll (just before netpoll_poll_unlock()): WARN_ONCE(!have && rcu_access_pointer(n->dev->npinfo) && napi_is_scheduled(n) && list_empty(&n->poll_list), "NAPI race with netpoll %px", n); If this warning hits the next virtio_close() will hang. This patch survived 30 test iterations without a hang (without it the longest clean run was around 10). Credit for triggering this goes to Breno's recent netconsole tests. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Paolo Abeni Link: https://lore.kernel.org/c5a93ed1-9abe-4880-a3bb-8d1678018b1d@redhat.com Acked-by: Jason Wang Reviewed-by: Xuan Zhuo Link: https://patch.msgid.link/20250726010846.1105875-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/netpoll.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a1da97b5b30b..5f65b62346d4 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -768,6 +768,13 @@ int netpoll_setup(struct netpoll *np) if (err) goto flush; rtnl_unlock(); + + /* Make sure all NAPI polls which started before dev->npinfo + * was visible have exited before we start calling NAPI poll. + * NAPI skips locking if dev->npinfo is NULL. + */ + synchronize_rcu(); + return 0; flush: From 6fb5ff63b35b7e849cc8510957f25753f87f63d2 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sat, 26 Jul 2025 16:03:07 +0200 Subject: [PATCH 1261/2411] phy: mscc: Fix parsing of unicast frames According to the 1588 standard, it is possible to use both unicast and multicast frames to send the PTP information. It was noticed that if the frames were unicast they were not processed by the analyzer meaning that they were not timestamped. Therefore fix this to match also these unicast frames. Fixes: ab2bf9339357 ("net: phy: mscc: 1588 block initialization") Signed-off-by: Horatiu Vultur Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250726140307.3039694-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mscc/mscc_ptp.c | 1 + drivers/net/phy/mscc/mscc_ptp.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index 6b800081eed5..275706de5847 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -900,6 +900,7 @@ static int vsc85xx_eth1_conf(struct phy_device *phydev, enum ts_blk blk, get_unaligned_be32(ptp_multicast)); } else { val |= ANA_ETH1_FLOW_ADDR_MATCH2_ANY_MULTICAST; + val |= ANA_ETH1_FLOW_ADDR_MATCH2_ANY_UNICAST; vsc85xx_ts_write_csr(phydev, blk, MSCC_ANA_ETH1_FLOW_ADDR_MATCH2(0), val); vsc85xx_ts_write_csr(phydev, blk, diff --git a/drivers/net/phy/mscc/mscc_ptp.h b/drivers/net/phy/mscc/mscc_ptp.h index da3465360e90..ae9ad925bfa8 100644 --- a/drivers/net/phy/mscc/mscc_ptp.h +++ b/drivers/net/phy/mscc/mscc_ptp.h @@ -98,6 +98,7 @@ #define MSCC_ANA_ETH1_FLOW_ADDR_MATCH2(x) (MSCC_ANA_ETH1_FLOW_ENA(x) + 3) #define ANA_ETH1_FLOW_ADDR_MATCH2_MASK_MASK GENMASK(22, 20) #define ANA_ETH1_FLOW_ADDR_MATCH2_ANY_MULTICAST 0x400000 +#define ANA_ETH1_FLOW_ADDR_MATCH2_ANY_UNICAST 0x200000 #define ANA_ETH1_FLOW_ADDR_MATCH2_FULL_ADDR 0x100000 #define ANA_ETH1_FLOW_ADDR_MATCH2_SRC_DEST_MASK GENMASK(17, 16) #define ANA_ETH1_FLOW_ADDR_MATCH2_SRC_DEST 0x020000 From f2aa00e4f65efcf25ff6bc8198e21f031e7b9b1b Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Mon, 28 Jul 2025 10:35:24 +0200 Subject: [PATCH 1262/2411] net: ipa: add IPA v5.1 and v5.5 to ipa_version_string() Handle the case for v5.1 and v5.5 instead of returning "0.0". Also reword the comment below since I don't see any evidence of such a check happening, and - since 5.5 has been missing - can happen. Fixes: 3aac8ec1c028 ("net: ipa: add some new IPA versions") Signed-off-by: Luca Weiss Reviewed-by: Dawid Osuchowski Reviewed-by: Alex Elder Link: https://patch.msgid.link/20250728-ipa-5-1-5-5-version_string-v1-1-d7a5623d7ece@fairphone.com Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_sysfs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ipa/ipa_sysfs.c b/drivers/net/ipa/ipa_sysfs.c index a59bd215494c..a53e9e6f6cdf 100644 --- a/drivers/net/ipa/ipa_sysfs.c +++ b/drivers/net/ipa/ipa_sysfs.c @@ -37,8 +37,12 @@ static const char *ipa_version_string(struct ipa *ipa) return "4.11"; case IPA_VERSION_5_0: return "5.0"; + case IPA_VERSION_5_1: + return "5.1"; + case IPA_VERSION_5_5: + return "5.5"; default: - return "0.0"; /* Won't happen (checked at probe time) */ + return "0.0"; /* Should not happen */ } } From 57ec5a8735dc5dccd1ee68afdb1114956a3fce0d Mon Sep 17 00:00:00 2001 From: Buday Csaba Date: Mon, 28 Jul 2025 17:29:16 +0200 Subject: [PATCH 1263/2411] net: phy: smsc: add proper reset flags for LAN8710A MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the LAN8710A datasheet (Rev. B, section 3.8.5.1), a hardware reset is required after power-on, and the reference clock (REF_CLK) must be established before asserting reset. Signed-off-by: Buday Csaba Cc: Csókás Bence Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250728152916.46249-2-csokas.bence@prolan.hu Signed-off-by: Jakub Kicinski --- drivers/net/phy/smsc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index b6489da5cfcd..48487149c225 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -785,6 +785,7 @@ static struct phy_driver smsc_phy_driver[] = { /* PHY_BASIC_FEATURES */ + .flags = PHY_RST_AFTER_CLK_EN, .probe = smsc_phy_probe, /* basic functions */ From 3b98c9352511db627b606477fc7944b2fa53a165 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bence=20Cs=C3=B3k=C3=A1s?= Date: Mon, 28 Jul 2025 17:34:55 +0200 Subject: [PATCH 1264/2411] net: mdio_bus: Use devm for getting reset GPIO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit bafbdd527d56 ("phylib: Add device reset GPIO support") removed devm_gpiod_get_optional() in favor of the non-devres managed fwnode_get_named_gpiod(). When it was kind-of reverted by commit 40ba6a12a548 ("net: mdio: switch to using gpiod_get_optional()"), the devm functionality was not reinstated. Nor was the GPIO unclaimed on device remove. This leads to the GPIO being claimed indefinitely, even when the device and/or the driver gets removed. Fixes: bafbdd527d56 ("phylib: Add device reset GPIO support") Fixes: 40ba6a12a548 ("net: mdio: switch to using gpiod_get_optional()") Cc: Csaba Buday Signed-off-by: Bence Csókás Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250728153455.47190-2-csokas.bence@prolan.hu Signed-off-by: Jakub Kicinski --- drivers/net/phy/mdio_bus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index fda2e27c1810..24bdab5bdd24 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -36,8 +36,8 @@ static int mdiobus_register_gpiod(struct mdio_device *mdiodev) { /* Deassert the optional reset signal */ - mdiodev->reset_gpio = gpiod_get_optional(&mdiodev->dev, - "reset", GPIOD_OUT_LOW); + mdiodev->reset_gpio = devm_gpiod_get_optional(&mdiodev->dev, + "reset", GPIOD_OUT_LOW); if (IS_ERR(mdiodev->reset_gpio)) return PTR_ERR(mdiodev->reset_gpio); From de9c4861fb42f0cd72da844c3c34f692d5895b7b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Jul 2025 08:02:07 +0000 Subject: [PATCH 1265/2411] pptp: ensure minimal skb length in pptp_xmit() Commit aabc6596ffb3 ("net: ppp: Add bound checking for skb data on ppp_sync_txmung") fixed ppp_sync_txmunge() We need a similar fix in pptp_xmit(), otherwise we might read uninit data as reported by syzbot. BUG: KMSAN: uninit-value in pptp_xmit+0xc34/0x2720 drivers/net/ppp/pptp.c:193 pptp_xmit+0xc34/0x2720 drivers/net/ppp/pptp.c:193 ppp_channel_bridge_input drivers/net/ppp/ppp_generic.c:2290 [inline] ppp_input+0x1d6/0xe60 drivers/net/ppp/ppp_generic.c:2314 pppoe_rcv_core+0x1e8/0x760 drivers/net/ppp/pppoe.c:379 sk_backlog_rcv+0x142/0x420 include/net/sock.h:1148 __release_sock+0x1d3/0x330 net/core/sock.c:3213 release_sock+0x6b/0x270 net/core/sock.c:3767 pppoe_sendmsg+0x15d/0xcb0 drivers/net/ppp/pppoe.c:904 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x330/0x3d0 net/socket.c:727 ____sys_sendmsg+0x893/0xd80 net/socket.c:2566 ___sys_sendmsg+0x271/0x3b0 net/socket.c:2620 __sys_sendmmsg+0x2d9/0x7c0 net/socket.c:2709 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+afad90ffc8645324afe5@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68887d86.a00a0220.b12ec.00cd.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Reviewed-by: Dawid Osuchowski Link: https://patch.msgid.link/20250729080207.1863408-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ppp/pptp.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 5feaa70b5f47..4cd6f67bd5d3 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -159,9 +159,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) int len; unsigned char *data; __u32 seq_recv; - - - struct rtable *rt; + struct rtable *rt = NULL; struct net_device *tdev; struct iphdr *iph; int max_headroom; @@ -179,16 +177,20 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); - if (!new_skb) { - ip_rt_put(rt); + + if (!new_skb) goto tx_error; - } + if (skb->sk) skb_set_owner_w(new_skb, skb->sk); consume_skb(skb); skb = new_skb; } + /* Ensure we can safely access protocol field and LCP code */ + if (!pskb_may_pull(skb, 3)) + goto tx_error; + data = skb->data; islcp = ((data[0] << 8) + data[1]) == PPP_LCP && 1 <= data[2] && data[2] <= 7; @@ -262,6 +264,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) return 1; tx_error: + ip_rt_put(rt); kfree_skb(skb); return 1; } From 759dfc7d04bab1b0b86113f1164dc1fec192b859 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Mon, 28 Jul 2025 11:06:47 +0300 Subject: [PATCH 1266/2411] netlink: avoid infinite retry looping in netlink_unicast() netlink_attachskb() checks for the socket's read memory allocation constraints. Firstly, it has: rmem < READ_ONCE(sk->sk_rcvbuf) to check if the just increased rmem value fits into the socket's receive buffer. If not, it proceeds and tries to wait for the memory under: rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf) The checks don't cover the case when skb->truesize + sk->sk_rmem_alloc is equal to sk->sk_rcvbuf. Thus the function neither successfully accepts these conditions, nor manages to reschedule the task - and is called in retry loop for indefinite time which is caught as: rcu: INFO: rcu_sched self-detected stall on CPU rcu: 0-....: (25999 ticks this GP) idle=ef2/1/0x4000000000000000 softirq=262269/262269 fqs=6212 (t=26000 jiffies g=230833 q=259957) NMI backtrace for cpu 0 CPU: 0 PID: 22 Comm: kauditd Not tainted 5.10.240 #68 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-4.fc42 04/01/2014 Call Trace: dump_stack lib/dump_stack.c:120 nmi_cpu_backtrace.cold lib/nmi_backtrace.c:105 nmi_trigger_cpumask_backtrace lib/nmi_backtrace.c:62 rcu_dump_cpu_stacks kernel/rcu/tree_stall.h:335 rcu_sched_clock_irq.cold kernel/rcu/tree.c:2590 update_process_times kernel/time/timer.c:1953 tick_sched_handle kernel/time/tick-sched.c:227 tick_sched_timer kernel/time/tick-sched.c:1399 __hrtimer_run_queues kernel/time/hrtimer.c:1652 hrtimer_interrupt kernel/time/hrtimer.c:1717 __sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1113 asm_call_irq_on_stack arch/x86/entry/entry_64.S:808 netlink_attachskb net/netlink/af_netlink.c:1234 netlink_unicast net/netlink/af_netlink.c:1349 kauditd_send_queue kernel/audit.c:776 kauditd_thread kernel/audit.c:897 kthread kernel/kthread.c:328 ret_from_fork arch/x86/entry/entry_64.S:304 Restore the original behavior of the check which commit in Fixes accidentally missed when restructuring the code. Found by Linux Verification Center (linuxtesting.org). Fixes: ae8f160e7eb2 ("netlink: Fix wraparounds of sk->sk_rmem_alloc.") Cc: stable@vger.kernel.org Signed-off-by: Fedor Pchelkin Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250728080727.255138-1-pchelkin@ispras.ru Signed-off-by: Jakub Kicinski --- net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5949855fa29e..e2f7080dd5d7 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1218,7 +1218,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, nlk = nlk_sk(sk); rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); - if ((rmem == skb->truesize || rmem < READ_ONCE(sk->sk_rcvbuf)) && + if ((rmem == skb->truesize || rmem <= READ_ONCE(sk->sk_rcvbuf)) && !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { netlink_skb_set_owner_r(skb, sk); return 0; From 383cd6d879a18acdaa84c29330b25c49cbc0b490 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 29 Jul 2025 07:49:30 +0100 Subject: [PATCH 1267/2411] scsi: scsi_debug: Make read-only arrays static const Don't populate the read-only arrays on the stack at run time, instead make them static const. Also reduces overall size. before: text data bss dec hex filename 367439 89582 5952 462973 7107d drivers/scsi/scsi_debug.o after: text data bss dec hex filename 365847 90702 5952 462501 70ea5 drivers/scsi/scsi_debug.o (gcc 14.2.0, x86-64) Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20250729064930.1659007-1-colin.i.king@gmail.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 91 ++++++++++++++++++++++++--------------- 1 file changed, 57 insertions(+), 34 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 0847767d4d43..353cb60e1abe 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -2674,8 +2674,10 @@ static int resp_rsup_tmfs(struct scsi_cmnd *scp, static int resp_err_recov_pg(unsigned char *p, int pcontrol, int target) { /* Read-Write Error Recovery page for mode_sense */ - unsigned char err_recov_pg[] = {0x1, 0xa, 0xc0, 11, 240, 0, 0, 0, - 5, 0, 0xff, 0xff}; + static const unsigned char err_recov_pg[] = { + 0x1, 0xa, 0xc0, 11, 240, 0, 0, 0, + 5, 0, 0xff, 0xff + }; memcpy(p, err_recov_pg, sizeof(err_recov_pg)); if (1 == pcontrol) @@ -2685,8 +2687,10 @@ static int resp_err_recov_pg(unsigned char *p, int pcontrol, int target) static int resp_disconnect_pg(unsigned char *p, int pcontrol, int target) { /* Disconnect-Reconnect page for mode_sense */ - unsigned char disconnect_pg[] = {0x2, 0xe, 128, 128, 0, 10, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}; + static const unsigned char disconnect_pg[] = { + 0x2, 0xe, 128, 128, 0, 10, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + }; memcpy(p, disconnect_pg, sizeof(disconnect_pg)); if (1 == pcontrol) @@ -2696,9 +2700,11 @@ static int resp_disconnect_pg(unsigned char *p, int pcontrol, int target) static int resp_format_pg(unsigned char *p, int pcontrol, int target) { /* Format device page for mode_sense */ - unsigned char format_pg[] = {0x3, 0x16, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0x40, 0, 0, 0}; + static const unsigned char format_pg[] = { + 0x3, 0x16, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0x40, 0, 0, 0 + }; memcpy(p, format_pg, sizeof(format_pg)); put_unaligned_be16(sdebug_sectors_per, p + 10); @@ -2716,10 +2722,14 @@ static unsigned char caching_pg[] = {0x8, 18, 0x14, 0, 0xff, 0xff, 0, 0, static int resp_caching_pg(unsigned char *p, int pcontrol, int target) { /* Caching page for mode_sense */ - unsigned char ch_caching_pg[] = {/* 0x8, 18, */ 0x4, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - unsigned char d_caching_pg[] = {0x8, 18, 0x14, 0, 0xff, 0xff, 0, 0, - 0xff, 0xff, 0xff, 0xff, 0x80, 0x14, 0, 0, 0, 0, 0, 0}; + static const unsigned char ch_caching_pg[] = { + /* 0x8, 18, */ 0x4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + static const unsigned char d_caching_pg[] = { + 0x8, 18, 0x14, 0, 0xff, 0xff, 0, 0, + 0xff, 0xff, 0xff, 0xff, 0x80, 0x14, 0, 0, 0, 0, 0, 0 + }; if (SDEBUG_OPT_N_WCE & sdebug_opts) caching_pg[2] &= ~0x4; /* set WCE=0 (default WCE=1) */ @@ -2738,8 +2748,10 @@ static int resp_ctrl_m_pg(unsigned char *p, int pcontrol, int target) { /* Control mode page for mode_sense */ unsigned char ch_ctrl_m_pg[] = {/* 0xa, 10, */ 0x6, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - unsigned char d_ctrl_m_pg[] = {0xa, 10, 2, 0, 0, 0, 0, 0, - 0, 0, 0x2, 0x4b}; + static const unsigned char d_ctrl_m_pg[] = { + 0xa, 10, 2, 0, 0, 0, 0, 0, + 0, 0, 0x2, 0x4b + }; if (sdebug_dsense) ctrl_m_pg[2] |= 0x4; @@ -2794,10 +2806,14 @@ static int resp_grouping_m_pg(unsigned char *p, int pcontrol, int target) static int resp_iec_m_pg(unsigned char *p, int pcontrol, int target) { /* Informational Exceptions control mode page for mode_sense */ - unsigned char ch_iec_m_pg[] = {/* 0x1c, 0xa, */ 0x4, 0xf, 0, 0, 0, 0, - 0, 0, 0x0, 0x0}; - unsigned char d_iec_m_pg[] = {0x1c, 0xa, 0x08, 0, 0, 0, 0, 0, - 0, 0, 0x0, 0x0}; + static const unsigned char ch_iec_m_pg[] = { + /* 0x1c, 0xa, */ 0x4, 0xf, 0, 0, 0, 0, + 0, 0, 0x0, 0x0 + }; + static const unsigned char d_iec_m_pg[] = { + 0x1c, 0xa, 0x08, 0, 0, 0, 0, 0, + 0, 0, 0x0, 0x0 + }; memcpy(p, iec_m_pg, sizeof(iec_m_pg)); if (1 == pcontrol) @@ -2809,8 +2825,9 @@ static int resp_iec_m_pg(unsigned char *p, int pcontrol, int target) static int resp_sas_sf_m_pg(unsigned char *p, int pcontrol, int target) { /* SAS SSP mode page - short format for mode_sense */ - unsigned char sas_sf_m_pg[] = {0x19, 0x6, - 0x6, 0x0, 0x7, 0xd0, 0x0, 0x0}; + static const unsigned char sas_sf_m_pg[] = { + 0x19, 0x6, 0x6, 0x0, 0x7, 0xd0, 0x0, 0x0 + }; memcpy(p, sas_sf_m_pg, sizeof(sas_sf_m_pg)); if (1 == pcontrol) @@ -2854,9 +2871,10 @@ static int resp_sas_pcd_m_spg(unsigned char *p, int pcontrol, int target, static int resp_sas_sha_m_spg(unsigned char *p, int pcontrol) { /* SAS SSP shared protocol specific port mode subpage */ - unsigned char sas_sha_m_pg[] = {0x59, 0x2, 0, 0xc, 0, 0x6, 0x10, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - }; + static const unsigned char sas_sha_m_pg[] = { + 0x59, 0x2, 0, 0xc, 0, 0x6, 0x10, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; memcpy(p, sas_sha_m_pg, sizeof(sas_sha_m_pg)); if (1 == pcontrol) @@ -2923,8 +2941,10 @@ static int process_medium_part_m_pg(struct sdebug_dev_info *devip, static int resp_compression_m_pg(unsigned char *p, int pcontrol, int target, unsigned char dce) { /* Compression page for mode_sense (tape) */ - unsigned char compression_pg[] = {0x0f, 14, 0x40, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 00, 00}; + static const unsigned char compression_pg[] = { + 0x0f, 14, 0x40, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 + }; memcpy(p, compression_pg, sizeof(compression_pg)); if (dce) @@ -3282,9 +3302,10 @@ static int resp_mode_select(struct scsi_cmnd *scp, static int resp_temp_l_pg(unsigned char *arr) { - unsigned char temp_l_pg[] = {0x0, 0x0, 0x3, 0x2, 0x0, 38, - 0x0, 0x1, 0x3, 0x2, 0x0, 65, - }; + static const unsigned char temp_l_pg[] = { + 0x0, 0x0, 0x3, 0x2, 0x0, 38, + 0x0, 0x1, 0x3, 0x2, 0x0, 65, + }; memcpy(arr, temp_l_pg, sizeof(temp_l_pg)); return sizeof(temp_l_pg); @@ -3292,8 +3313,9 @@ static int resp_temp_l_pg(unsigned char *arr) static int resp_ie_l_pg(unsigned char *arr) { - unsigned char ie_l_pg[] = {0x0, 0x0, 0x3, 0x3, 0x0, 0x0, 38, - }; + static const unsigned char ie_l_pg[] = { + 0x0, 0x0, 0x3, 0x3, 0x0, 0x0, 38, + }; memcpy(arr, ie_l_pg, sizeof(ie_l_pg)); if (iec_m_pg[2] & 0x4) { /* TEST bit set */ @@ -3305,11 +3327,12 @@ static int resp_ie_l_pg(unsigned char *arr) static int resp_env_rep_l_spg(unsigned char *arr) { - unsigned char env_rep_l_spg[] = {0x0, 0x0, 0x23, 0x8, - 0x0, 40, 72, 0xff, 45, 18, 0, 0, - 0x1, 0x0, 0x23, 0x8, - 0x0, 55, 72, 35, 55, 45, 0, 0, - }; + static const unsigned char env_rep_l_spg[] = { + 0x0, 0x0, 0x23, 0x8, + 0x0, 40, 72, 0xff, 45, 18, 0, 0, + 0x1, 0x0, 0x23, 0x8, + 0x0, 55, 72, 35, 55, 45, 0, 0, + }; memcpy(arr, env_rep_l_spg, sizeof(env_rep_l_spg)); return sizeof(env_rep_l_spg); From cf3fc037623c54de48d2ec1a1ee686e2d1de2d45 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 29 Jul 2025 18:28:07 +0900 Subject: [PATCH 1268/2411] ata: libata-scsi: Fix ata_to_sense_error() status handling Commit 8ae720449fca ("libata: whitespace fixes in ata_to_sense_error()") inadvertantly added the entry 0x40 (ATA_DRDY) to the stat_table array in the function ata_to_sense_error(). This entry ties a failed qc which has a status filed equal to ATA_DRDY to the sense key ILLEGAL REQUEST with the additional sense code UNALIGNED WRITE COMMAND. This entry will be used to generate a failed qc sense key and sense code when the qc is missing sense data and there is no match for the qc error field in the sense_table array of ata_to_sense_error(). As a result, for a failed qc for which we failed to get sense data (e.g. read log 10h failed if qc is an NCQ command, or REQUEST SENSE EXT command failed for the non-ncq case, the user very often end up seeing the completely misleading "unaligned write command" error, even if qc was not a write command. E.g.: sd 0:0:0:0: [sda] tag#12 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s sd 0:0:0:0: [sda] tag#12 Sense Key : Illegal Request [current] sd 0:0:0:0: [sda] tag#12 Add. Sense: Unaligned write command sd 0:0:0:0: [sda] tag#12 CDB: Read(10) 28 00 00 00 10 00 00 00 08 00 I/O error, dev sda, sector 4096 op 0x0:(READ) flags 0x80700 phys_seg 1 prio class 0 Fix this by removing the ATA_DRDY entry from the stat_table array so that we default to always returning ABORTED COMMAND without any additional sense code, since we do not know any better. The entry 0x08 (ATA_DRQ) is also removed since signaling ABORTED COMMAND with a parity error is also misleading (as a parity error would likely be signaled through a bus error). So for this case, also default to returning ABORTED COMMAND without any additional sense code. With this, the previous example error case becomes: sd 0:0:0:0: [sda] tag#17 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s sd 0:0:0:0: [sda] tag#17 Sense Key : Aborted Command [current] sd 0:0:0:0: [sda] tag#17 Add. Sense: No additional sense information sd 0:0:0:0: [sda] tag#17 CDB: Read(10) 28 00 00 00 10 00 00 00 08 00 I/O error, dev sda, sector 4096 op 0x0:(READ) flags 0x80700 phys_seg 1 prio class 0 Together with these fixes, refactor stat_table to make it more readable by putting the entries comments in front of the entries and using the defined status bits macros instead of hardcoded values. Reported-by: Lorenz Brun Reported-by: Brandon Schwartz Fixes: 8ae720449fca ("libata: whitespace fixes in ata_to_sense_error()") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen --- drivers/ata/libata-scsi.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 27b15176db56..9b16c0f553e0 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -859,18 +859,14 @@ static void ata_to_sense_error(u8 drv_stat, u8 drv_err, u8 *sk, u8 *asc, {0xFF, 0xFF, 0xFF, 0xFF}, // END mark }; static const unsigned char stat_table[][4] = { - /* Must be first because BUSY means no other bits valid */ - {0x80, ABORTED_COMMAND, 0x47, 0x00}, - // Busy, fake parity for now - {0x40, ILLEGAL_REQUEST, 0x21, 0x04}, - // Device ready, unaligned write command - {0x20, HARDWARE_ERROR, 0x44, 0x00}, - // Device fault, internal target failure - {0x08, ABORTED_COMMAND, 0x47, 0x00}, - // Timed out in xfer, fake parity for now - {0x04, RECOVERED_ERROR, 0x11, 0x00}, - // Recovered ECC error Medium error, recovered - {0xFF, 0xFF, 0xFF, 0xFF}, // END mark + /* Busy: must be first because BUSY means no other bits valid */ + { ATA_BUSY, ABORTED_COMMAND, 0x00, 0x00 }, + /* Device fault: INTERNAL TARGET FAILURE */ + { ATA_DF, HARDWARE_ERROR, 0x44, 0x00 }, + /* Corrected data error */ + { ATA_CORR, RECOVERED_ERROR, 0x00, 0x00 }, + + { 0xFF, 0xFF, 0xFF, 0xFF }, /* END mark */ }; /* From d2be9ea9a75550a35c5127a6c2633658bc38c76b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 29 Jul 2025 19:37:12 +0900 Subject: [PATCH 1269/2411] ata: libata-scsi: Return aborted command when missing sense and result TF ata_gen_ata_sense() is always called for a failed qc missing sense data so that a sense key, code and code qualifier can be generated using ata_to_sense_error() from the qc status and error fields of its result task file. However, if the qc does not have its result task file filled, ata_gen_ata_sense() returns early without setting a sense key. Improve this by defaulting to returning ABORTED COMMAND without any additional sense code, since we do not know the reason for the failure. The same fix is also applied in ata_gen_passthru_sense() with the additional check that the qc failed (qc->err_mask is set). Fixes: 816be86c7993 ("ata: libata-scsi: Check ATA_QCFLAG_RTF_FILLED before using result_tf") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen --- drivers/ata/libata-scsi.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 9b16c0f553e0..57f674f51b0c 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -938,6 +938,8 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc) if (!(qc->flags & ATA_QCFLAG_RTF_FILLED)) { ata_dev_dbg(dev, "missing result TF: can't generate ATA PT sense data\n"); + if (qc->err_mask) + ata_scsi_set_sense(dev, cmd, ABORTED_COMMAND, 0, 0); return; } @@ -992,8 +994,8 @@ static void ata_gen_ata_sense(struct ata_queued_cmd *qc) if (!(qc->flags & ATA_QCFLAG_RTF_FILLED)) { ata_dev_dbg(dev, - "missing result TF: can't generate sense data\n"); - return; + "Missing result TF: reporting aborted command\n"); + goto aborted; } /* Use ata_to_sense_error() to map status register bits @@ -1004,13 +1006,15 @@ static void ata_gen_ata_sense(struct ata_queued_cmd *qc) ata_to_sense_error(tf->status, tf->error, &sense_key, &asc, &ascq); ata_scsi_set_sense(dev, cmd, sense_key, asc, ascq); - } else { - /* Could not decode error */ - ata_dev_warn(dev, "could not decode error status 0x%x err_mask 0x%x\n", - tf->status, qc->err_mask); - ata_scsi_set_sense(dev, cmd, ABORTED_COMMAND, 0, 0); return; } + + /* Could not decode error */ + ata_dev_warn(dev, + "Could not decode error 0x%x, status 0x%x (err_mask=0x%x)\n", + tf->error, tf->status, qc->err_mask); +aborted: + ata_scsi_set_sense(dev, cmd, ABORTED_COMMAND, 0, 0); } void ata_scsi_sdev_config(struct scsi_device *sdev) From 0060beec0bfa647c4b510df188b1c4673a197839 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 28 Jul 2025 13:04:29 +0900 Subject: [PATCH 1270/2411] ata: libata-sata: Add link_power_management_supported sysfs attribute A port link power management (LPM) policy can be controlled using the link_power_management_policy sysfs host attribute. However, this attribute exists also for hosts that do not support LPM and in such case, attempting to change the LPM policy for the host (port) will fail with -EOPNOTSUPP. Introduce the new sysfs link_power_management_supported host attribute to indicate to the user if a the port and the devices connected to the port for the host support LPM, which implies that the link_power_management_policy attribute can be used. Since checking that a port and its devices support LPM is common between the new ata_scsi_lpm_supported_show() function and the existing ata_scsi_lpm_store() function, the new helper ata_scsi_lpm_supported() is introduced. Fixes: 413e800cadbf ("ata: libata-sata: Disallow changing LPM state if not supported") Reported-by: Borah, Chaitanya Kumar Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202507251014.a5becc3b-lkp@intel.com Signed-off-by: Damien Le Moal Reviewed-by: Martin K. Petersen --- drivers/ata/ata_piix.c | 1 + drivers/ata/libahci.c | 1 + drivers/ata/libata-sata.c | 53 ++++++++++++++++++++++++++++++--------- include/linux/libata.h | 1 + 4 files changed, 44 insertions(+), 12 deletions(-) diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index 229429ba5027..495fa096dd65 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -1089,6 +1089,7 @@ static struct ata_port_operations ich_pata_ops = { }; static struct attribute *piix_sidpr_shost_attrs[] = { + &dev_attr_link_power_management_supported.attr, &dev_attr_link_power_management_policy.attr, NULL }; diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index b335fb7e5cb4..c79abdfcd7a9 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -111,6 +111,7 @@ static DEVICE_ATTR(em_buffer, S_IWUSR | S_IRUGO, static DEVICE_ATTR(em_message_supported, S_IRUGO, ahci_show_em_supported, NULL); static struct attribute *ahci_shost_attrs[] = { + &dev_attr_link_power_management_supported.attr, &dev_attr_link_power_management_policy.attr, &dev_attr_em_message_type.attr, &dev_attr_em_message.attr, diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index 4734465d3b1e..b2817a2995d6 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -900,14 +900,52 @@ static const char *ata_lpm_policy_names[] = { [ATA_LPM_MIN_POWER] = "min_power", }; +/* + * Check if a port supports link power management. + * Must be called with the port locked. + */ +static bool ata_scsi_lpm_supported(struct ata_port *ap) +{ + struct ata_link *link; + struct ata_device *dev; + + if (ap->flags & ATA_FLAG_NO_LPM) + return false; + + ata_for_each_link(link, ap, EDGE) { + ata_for_each_dev(dev, &ap->link, ENABLED) { + if (dev->quirks & ATA_QUIRK_NOLPM) + return false; + } + } + + return true; +} + +static ssize_t ata_scsi_lpm_supported_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct ata_port *ap = ata_shost_to_port(shost); + unsigned long flags; + bool supported; + + spin_lock_irqsave(ap->lock, flags); + supported = ata_scsi_lpm_supported(ap); + spin_unlock_irqrestore(ap->lock, flags); + + return sysfs_emit(buf, "%d\n", supported); +} +DEVICE_ATTR(link_power_management_supported, S_IRUGO, + ata_scsi_lpm_supported_show, NULL); +EXPORT_SYMBOL_GPL(dev_attr_link_power_management_supported); + static ssize_t ata_scsi_lpm_store(struct device *device, struct device_attribute *attr, const char *buf, size_t count) { struct Scsi_Host *shost = class_to_shost(device); struct ata_port *ap = ata_shost_to_port(shost); - struct ata_link *link; - struct ata_device *dev; enum ata_lpm_policy policy; unsigned long flags; @@ -924,20 +962,11 @@ static ssize_t ata_scsi_lpm_store(struct device *device, spin_lock_irqsave(ap->lock, flags); - if (ap->flags & ATA_FLAG_NO_LPM) { + if (!ata_scsi_lpm_supported(ap)) { count = -EOPNOTSUPP; goto out_unlock; } - ata_for_each_link(link, ap, EDGE) { - ata_for_each_dev(dev, &ap->link, ENABLED) { - if (dev->quirks & ATA_QUIRK_NOLPM) { - count = -EOPNOTSUPP; - goto out_unlock; - } - } - } - ap->target_lpm_policy = policy; ata_port_schedule_eh(ap); out_unlock: diff --git a/include/linux/libata.h b/include/linux/libata.h index 912ace523880..0620dd67369f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -545,6 +545,7 @@ typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes) extern struct device_attribute dev_attr_unload_heads; #ifdef CONFIG_SATA_HOST +extern struct device_attribute dev_attr_link_power_management_supported; extern struct device_attribute dev_attr_link_power_management_policy; extern struct device_attribute dev_attr_ncq_prio_supported; extern struct device_attribute dev_attr_ncq_prio_enable; From 199d9ffb31650f948dd342ade1c1b920e157630f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 11 Jul 2025 15:31:36 +0200 Subject: [PATCH 1271/2411] module: move 'struct module_use' to internal.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The struct was moved to the public header file in commit c8e21ced08b3 ("module: fix kdb's illicit use of struct module_use."). Back then the structure was used outside of the module core. Nowadays this is not true anymore, so the structure can be made internal. Signed-off-by: Thomas Weißschuh Reviewed-by: Daniel Gomez Reviewed-by: Petr Pavlu Link: https://lore.kernel.org/r/20250711-kunit-ifdef-modules-v2-1-39443decb1f8@linutronix.de Signed-off-by: Daniel Gomez --- include/linux/module.h | 7 ------- kernel/module/internal.h | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index a7cac01d95e7..97c38e1cd377 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -313,13 +313,6 @@ void *__symbol_get_gpl(const char *symbol); __used __section(".no_trim_symbol") = __stringify(x); \ (typeof(&x))(__symbol_get(__stringify(x))); }) -/* modules using other modules: kdb wants to see this. */ -struct module_use { - struct list_head source_list; - struct list_head target_list; - struct module *source, *target; -}; - enum module_state { MODULE_STATE_LIVE, /* Normal state. */ MODULE_STATE_COMING, /* Full formed, running module_init. */ diff --git a/kernel/module/internal.h b/kernel/module/internal.h index 51ddd8866ef3..618202578b42 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -112,6 +112,13 @@ struct find_symbol_arg { enum mod_license license; }; +/* modules using other modules */ +struct module_use { + struct list_head source_list; + struct list_head target_list; + struct module *source, *target; +}; + int mod_verify_sig(const void *mod, struct load_info *info); int try_to_force_load(struct module *mod, const char *reason); bool find_symbol(struct find_symbol_arg *fsa); From 818783c804bc051f7faf0ac226b5597f8259c6f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 11 Jul 2025 15:31:37 +0200 Subject: [PATCH 1272/2411] module: make structure definitions always visible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To write code that works with both CONFIG_MODULES=y and CONFIG_MODULES=n it is convenient to use "if (IS_ENABLED(CONFIG_MODULES))" over raw #ifdef. The code will still fully typechecked but the unreachable parts are discarded by the compiler. This prevents accidental breakage when a certain kconfig combination was not specifically tested by the developer. This pattern is already supported to some extend by module.h defining empty stub functions if CONFIG_MODULES=n. However some users of module.h work on the structured defined by module.h. Therefore these structure definitions need to be visible, too. Many structure members are still gated by specific configuration settings. The assumption for those is that the code using them will be gated behind the same configuration setting anyways. Signed-off-by: Thomas Weißschuh Reviewed-by: Daniel Gomez Link: https://lore.kernel.org/r/20250711-kunit-ifdef-modules-v2-2-39443decb1f8@linutronix.de Signed-off-by: Daniel Gomez --- include/linux/module.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 97c38e1cd377..5fe812de2d84 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -303,16 +303,6 @@ static typeof(name) __mod_device_table__##type##__##name \ struct notifier_block; -#ifdef CONFIG_MODULES - -/* Get/put a kernel symbol (calls must be symmetric) */ -void *__symbol_get(const char *symbol); -void *__symbol_get_gpl(const char *symbol); -#define symbol_get(x) ({ \ - static const char __notrim[] \ - __used __section(".no_trim_symbol") = __stringify(x); \ - (typeof(&x))(__symbol_get(__stringify(x))); }) - enum module_state { MODULE_STATE_LIVE, /* Normal state. */ MODULE_STATE_COMING, /* Full formed, running module_init. */ @@ -597,6 +587,16 @@ struct module { #define MODULE_ARCH_INIT {} #endif +#ifdef CONFIG_MODULES + +/* Get/put a kernel symbol (calls must be symmetric) */ +void *__symbol_get(const char *symbol); +void *__symbol_get_gpl(const char *symbol); +#define symbol_get(x) ({ \ + static const char __notrim[] \ + __used __section(".no_trim_symbol") = __stringify(x); \ + (typeof(&x))(__symbol_get(__stringify(x))); }) + #ifndef HAVE_ARCH_KALLSYMS_SYMBOL_VALUE static inline unsigned long kallsyms_symbol_value(const Elf_Sym *sym) { From 768da2eae8662ca51102794c32d37c17410acbf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 11 Jul 2025 15:31:38 +0200 Subject: [PATCH 1273/2411] kunit: test: Drop CONFIG_MODULE ifdeffery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function stubs exposed by module.h allow the code to compile properly without the ifdeffery. The generated object code stays the same, as the compiler can optimize away all the dead code. As the code is still typechecked developer errors can be detected faster. Signed-off-by: Thomas Weißschuh Acked-by: David Gow Reviewed-by: Daniel Gomez Link: https://lore.kernel.org/r/20250711-kunit-ifdef-modules-v2-3-39443decb1f8@linutronix.de Signed-off-by: Daniel Gomez --- lib/kunit/test.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/lib/kunit/test.c b/lib/kunit/test.c index f3c6b11f12b8..d2bfa331a2b1 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -802,7 +802,6 @@ void __kunit_test_suites_exit(struct kunit_suite **suites, int num_suites) } EXPORT_SYMBOL_GPL(__kunit_test_suites_exit); -#ifdef CONFIG_MODULES static void kunit_module_init(struct module *mod) { struct kunit_suite_set suite_set, filtered_set; @@ -890,7 +889,6 @@ static struct notifier_block kunit_mod_nb = { .notifier_call = kunit_module_notify, .priority = 0, }; -#endif KUNIT_DEFINE_ACTION_WRAPPER(kfree_action_wrapper, kfree, const void *) @@ -981,20 +979,14 @@ static int __init kunit_init(void) kunit_debugfs_init(); kunit_bus_init(); -#ifdef CONFIG_MODULES return register_module_notifier(&kunit_mod_nb); -#else - return 0; -#endif } late_initcall(kunit_init); static void __exit kunit_exit(void) { memset(&kunit_hooks, 0, sizeof(kunit_hooks)); -#ifdef CONFIG_MODULES unregister_module_notifier(&kunit_mod_nb); -#endif kunit_bus_shutdown(); From a6323bd4e611567913e23df5b58f2d4e4da06789 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 30 Jun 2025 16:32:32 +0200 Subject: [PATCH 1274/2411] module: Prevent silent truncation of module name in delete_module(2) Passing a module name longer than MODULE_NAME_LEN to the delete_module syscall results in its silent truncation. This really isn't much of a problem in practice, but it could theoretically lead to the removal of an incorrect module. It is more sensible to return ENAMETOOLONG or ENOENT in such a case. Update the syscall to return ENOENT, as documented in the delete_module(2) man page to mean "No module by that name exists." This is appropriate because a module with a name longer than MODULE_NAME_LEN cannot be loaded in the first place. Signed-off-by: Petr Pavlu Reviewed-by: Daniel Gomez Link: https://lore.kernel.org/r/20250630143535.267745-2-petr.pavlu@suse.com Signed-off-by: Daniel Gomez --- kernel/module/main.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/module/main.c b/kernel/module/main.c index 81f9df8859dc..120e51550a88 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -779,14 +779,16 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, struct module *mod; char name[MODULE_NAME_LEN]; char buf[MODULE_FLAGS_BUF_SIZE]; - int ret, forced = 0; + int ret, len, forced = 0; if (!capable(CAP_SYS_MODULE) || modules_disabled) return -EPERM; - if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0) - return -EFAULT; - name[MODULE_NAME_LEN-1] = '\0'; + len = strncpy_from_user(name, name_user, MODULE_NAME_LEN); + if (len == 0 || len == MODULE_NAME_LEN) + return -ENOENT; + if (len < 0) + return len; audit_log_kern_module(name); From 6c171b2ccfe677ca97fc5334f853807959f26589 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 30 Jun 2025 16:32:33 +0200 Subject: [PATCH 1275/2411] module: Remove unnecessary +1 from last_unloaded_module::name size The variable last_unloaded_module::name tracks the name of the last unloaded module. It is a string copy of module::name, which is MODULE_NAME_LEN bytes in size and includes the NUL terminator. Therefore, the size of last_unloaded_module::name can also be just MODULE_NAME_LEN, without the need for an extra byte. Fixes: e14af7eeb47e ("debug: track and print last unloaded module in the oops trace") Signed-off-by: Petr Pavlu Reviewed-by: Daniel Gomez Link: https://lore.kernel.org/r/20250630143535.267745-3-petr.pavlu@suse.com Signed-off-by: Daniel Gomez --- kernel/module/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/module/main.c b/kernel/module/main.c index 120e51550a88..7f8bb51aedd4 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -608,7 +608,7 @@ MODINFO_ATTR(version); MODINFO_ATTR(srcversion); static struct { - char name[MODULE_NAME_LEN + 1]; + char name[MODULE_NAME_LEN]; char taints[MODULE_FLAGS_BUF_SIZE]; } last_unloaded_module; From bdc877ba6b7ff1b6d2ebeff11e63da4a50a54854 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 30 Jun 2025 16:32:34 +0200 Subject: [PATCH 1276/2411] module: Restore the moduleparam prefix length check The moduleparam code allows modules to provide their own definition of MODULE_PARAM_PREFIX, instead of using the default KBUILD_MODNAME ".". Commit 730b69d22525 ("module: check kernel param length at compile time, not runtime") added a check to ensure the prefix doesn't exceed MODULE_NAME_LEN, as this is what param_sysfs_builtin() expects. Later, commit 58f86cc89c33 ("VERIFY_OCTAL_PERMISSIONS: stricter checking for sysfs perms.") removed this check, but there is no indication this was intentional. Since the check is still useful for param_sysfs_builtin() to function properly, reintroduce it in __module_param_call(), but in a modernized form using static_assert(). While here, clean up the __module_param_call() comments. In particular, remove the comment "Default value instead of permissions?", which comes from commit 9774a1f54f17 ("[PATCH] Compile-time check re world-writeable module params"). This comment was related to the test variable __param_perm_check_##name, which was removed in the previously mentioned commit 58f86cc89c33. Fixes: 58f86cc89c33 ("VERIFY_OCTAL_PERMISSIONS: stricter checking for sysfs perms.") Signed-off-by: Petr Pavlu Reviewed-by: Daniel Gomez Link: https://lore.kernel.org/r/20250630143535.267745-4-petr.pavlu@suse.com Signed-off-by: Daniel Gomez --- include/linux/moduleparam.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index bfb85fd13e1f..110e9d09de24 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -282,10 +282,9 @@ struct kparam_array #define __moduleparam_const const #endif -/* This is the fundamental function for registering boot/module - parameters. */ +/* This is the fundamental function for registering boot/module parameters. */ #define __module_param_call(prefix, name, ops, arg, perm, level, flags) \ - /* Default value instead of permissions? */ \ + static_assert(sizeof(""prefix) - 1 <= MAX_PARAM_PREFIX_LEN); \ static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used __section("__param") \ From a7c54b2b41dd1f6ec780e7fbfb13f70c64c9731d Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 30 Jun 2025 16:32:35 +0200 Subject: [PATCH 1277/2411] tracing: Replace MAX_PARAM_PREFIX_LEN with MODULE_NAME_LEN Use the MODULE_NAME_LEN definition in module_exists() to obtain the maximum size of a module name, instead of using MAX_PARAM_PREFIX_LEN. The values are the same but MODULE_NAME_LEN is more appropriate in this context. MAX_PARAM_PREFIX_LEN was added in commit 730b69d22525 ("module: check kernel param length at compile time, not runtime") only to break a circular dependency between module.h and moduleparam.h, and should mostly be limited to use in moduleparam.h. Signed-off-by: Petr Pavlu Cc: Steven Rostedt Cc: Masami Hiramatsu Reviewed-by: Daniel Gomez Acked-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20250630143535.267745-5-petr.pavlu@suse.com Signed-off-by: Daniel Gomez --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7996f26c3f46..3112ac128145 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -10367,7 +10367,7 @@ bool module_exists(const char *module) { /* All modules have the symbol __this_module */ static const char this_mod[] = "__this_module"; - char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2]; + char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2]; unsigned long val; int n; From 40a826bd6c82ae45cfd3a19cd2a60a10f56b74c0 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 30 Jun 2025 16:32:36 +0200 Subject: [PATCH 1278/2411] module: Rename MAX_PARAM_PREFIX_LEN to __MODULE_NAME_LEN The maximum module name length (MODULE_NAME_LEN) is somewhat confusingly defined in terms of the maximum parameter prefix length (MAX_PARAM_PREFIX_LEN), when in fact the dependency is in the opposite direction. This split originates from commit 730b69d22525 ("module: check kernel param length at compile time, not runtime"). The code needed to use MODULE_NAME_LEN in moduleparam.h, but because module.h requires moduleparam.h, this created a circular dependency. It was resolved by introducing MAX_PARAM_PREFIX_LEN in moduleparam.h and defining MODULE_NAME_LEN in module.h in terms of MAX_PARAM_PREFIX_LEN. Rename MAX_PARAM_PREFIX_LEN to __MODULE_NAME_LEN for clarity. This matches the similar approach of defining MODULE_INFO in module.h and __MODULE_INFO in moduleparam.h. Signed-off-by: Petr Pavlu Reviewed-by: Daniel Gomez Link: https://lore.kernel.org/r/20250630143535.267745-6-petr.pavlu@suse.com Signed-off-by: Daniel Gomez --- include/linux/module.h | 2 +- include/linux/moduleparam.h | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 5fe812de2d84..313ecb8e5181 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -33,7 +33,7 @@ #include #include -#define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN +#define MODULE_NAME_LEN __MODULE_NAME_LEN struct modversion_info { unsigned long crc; diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 110e9d09de24..a04a2bc4f51e 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -6,6 +6,13 @@ #include #include +/* + * The maximum module name length, including the NUL byte. + * Chosen so that structs with an unsigned long line up, specifically + * modversion_info. + */ +#define __MODULE_NAME_LEN (64 - sizeof(unsigned long)) + /* You can override this manually, but generally this should match the module name. */ #ifdef MODULE @@ -17,9 +24,6 @@ #define __MODULE_INFO_PREFIX KBUILD_MODNAME "." #endif -/* Chosen so that structs with an unsigned long line up. */ -#define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long)) - #define __MODULE_INFO(tag, name, info) \ static const char __UNIQUE_ID(name)[] \ __used __section(".modinfo") __aligned(1) \ @@ -284,7 +288,7 @@ struct kparam_array /* This is the fundamental function for registering boot/module parameters. */ #define __module_param_call(prefix, name, ops, arg, perm, level, flags) \ - static_assert(sizeof(""prefix) - 1 <= MAX_PARAM_PREFIX_LEN); \ + static_assert(sizeof(""prefix) - 1 <= __MODULE_NAME_LEN); \ static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used __section("__param") \ From c71fc0f457ca1c2cd4dff2d974df724beb14f67e Mon Sep 17 00:00:00 2001 From: Nitesh Shetty Date: Wed, 16 Jul 2025 19:09:44 +0530 Subject: [PATCH 1279/2411] nvmet: add support for FDP in fabrics passthru path Add support for admin_get_feature FDP(0x1d) feature id, thus enabling FDP at the initiator side for the target controller and namespaces attached to it. Signed-off-by: Nitesh Shetty Signed-off-by: Christoph Hellwig --- drivers/nvme/target/passthru.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 3b4b0df8f879..0c361b1e3566 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -533,6 +533,8 @@ u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req) case NVME_FEAT_HOST_ID: req->execute = nvmet_execute_get_features; return NVME_SC_SUCCESS; + case NVME_FEAT_FDP: + return nvmet_setup_passthru_command(req); default: return nvmet_passthru_get_set_features(req); } From e715b8733df60aa3280ab3e0de0560c8a72c5c1d Mon Sep 17 00:00:00 2001 From: Kamaljit Singh Date: Mon, 21 Jul 2025 10:36:59 -0700 Subject: [PATCH 1280/2411] nvme: add capability to connect to an administrative controller Add capability to connect to an administrative controller by preventing ioq creation for admin-controllers. Add a nvme_admin_ctrl() to check if a controller's CNTRLTYPE indicates that it is an administrative controller and override ctrl->queue_count to 1 for admin controllers, so that only the admin queue and no I/O queues are created for an administrative controller. This override is done in nvme_init_ctrl_finish() after ctrl->cntrltype has been initialized in nvme_init_identify() so nvme_admin_ctrl() will work correctly. Doing this override in generic code (nvme_init_ctrl_finish) makes it transport agnostic and will work properly for nvme/tcp as well as for nvme/rdma. Suggested-by: Niklas Cassel Reviewed-by: Damien Le Moal Reviewed-by: Niklas Cassel Signed-off-by: Kamaljit Singh Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9d988f4cb87a..812c1565114f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3158,6 +3158,11 @@ static inline bool nvme_discovery_ctrl(struct nvme_ctrl *ctrl) return ctrl->opts && ctrl->opts->discovery_nqn; } +static inline bool nvme_admin_ctrl(struct nvme_ctrl *ctrl) +{ + return ctrl->cntrltype == NVME_CTRL_ADMIN; +} + static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) { @@ -3670,6 +3675,17 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended) if (ret) return ret; + if (nvme_admin_ctrl(ctrl)) { + /* + * An admin controller has one admin queue, but no I/O queues. + * Override queue_count so it only creates an admin queue. + */ + dev_dbg(ctrl->device, + "Subsystem %s is an administrative controller", + ctrl->subsys->subnqn); + ctrl->queue_count = 1; + } + ret = nvme_configure_apst(ctrl); if (ret < 0) return ret; From 528589947c1802b9357c2a9b96d88cc4a11cd88b Mon Sep 17 00:00:00 2001 From: Mohamed Khalfella Date: Fri, 25 Jul 2025 13:50:05 -0700 Subject: [PATCH 1281/2411] nvmet: initialize discovery subsys after debugfs is initialized During nvme target initialization discovery subsystem is initialized before "nvmet" debugfs directory is created. This results in discovery subsystem debugfs directory to be created in debugfs root directory. nvmet_init() -> nvmet_init_discovery() -> nvmet_subsys_alloc() -> nvmet_debugfs_subsys_setup() In other words, the codepath above is exeucted before nvmet_debugfs is created. We get /sys/kernel/debug/nqn.2014-08.org.nvmexpress.discovery instead of /sys/kernel/debug/nvmet/nqn.2014-08.org.nvmexpress.discovery. Move nvmet_init_discovery() call after nvmet_init_debugfs() to fix it. Fixes: 649fd41420a8 ("nvmet: add debugfs support") Signed-off-by: Mohamed Khalfella Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Reviewed-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 884286f90688..83f3d2f8ef2d 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1960,24 +1960,24 @@ static int __init nvmet_init(void) if (!nvmet_wq) goto out_free_buffered_work_queue; - error = nvmet_init_discovery(); + error = nvmet_init_debugfs(); if (error) goto out_free_nvmet_work_queue; - error = nvmet_init_debugfs(); - if (error) - goto out_exit_discovery; - - error = nvmet_init_configfs(); + error = nvmet_init_discovery(); if (error) goto out_exit_debugfs; + error = nvmet_init_configfs(); + if (error) + goto out_exit_discovery; + return 0; -out_exit_debugfs: - nvmet_exit_debugfs(); out_exit_discovery: nvmet_exit_discovery(); +out_exit_debugfs: + nvmet_exit_debugfs(); out_free_nvmet_work_queue: destroy_workqueue(nvmet_wq); out_free_buffered_work_queue: From 4e6e151cf92bbaa0622a4da351ff444e4fd9b865 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 29 Jul 2025 11:12:47 -0700 Subject: [PATCH 1282/2411] nvme-pci: fix leak on sgl setup error We need to free the descriptor that was allocated. We also don't necessarily need to unmap each sgl entry, which was previously being attempted unconditionally. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 071efec25346..2c6d9506b172 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -935,7 +935,7 @@ static blk_status_t nvme_pci_setup_data_sgl(struct request *req, nvme_pci_sgl_set_seg(&iod->cmd.common.dptr.sgl, sgl_dma, mapped); if (unlikely(iter->status)) - nvme_free_sgls(req); + nvme_unmap_data(req); return iter->status; } From b6160cd2c45c38d01405d8ee3758e9b8a6f8e595 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 25 Jul 2025 15:57:22 +0800 Subject: [PATCH 1283/2411] nvme-auth: remove unneeded semicolon No functional modification involved. ./drivers/nvme/host/auth.c:745:2-3: Unneeded semicolon. ./drivers/nvme/host/auth.c:755:2-3: Unneeded semicolon. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=22937 Signed-off-by: Jiapeng Chong Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/auth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index f6ddbe553289..201fc8809a62 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -742,7 +742,7 @@ static int nvme_auth_secure_concat(struct nvme_ctrl *ctrl, "%s: qid %d failed to generate digest, error %d\n", __func__, chap->qid, ret); goto out_free_psk; - }; + } dev_dbg(ctrl->device, "%s: generated digest %s\n", __func__, digest); ret = nvme_auth_derive_tls_psk(chap->hash_id, psk, psk_len, @@ -752,7 +752,7 @@ static int nvme_auth_secure_concat(struct nvme_ctrl *ctrl, "%s: qid %d failed to derive TLS psk, error %d\n", __func__, chap->qid, ret); goto out_free_digest; - }; + } tls_key = nvme_tls_psk_refresh(ctrl->opts->keyring, ctrl->opts->host->nqn, From 367c240b0a99c7ada700a44345dd3144a02b6164 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 30 Jul 2025 15:32:45 -0500 Subject: [PATCH 1284/2411] nvme: fix various comment typos Fix typos in comments. Signed-off-by: Bjorn Helgaas Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 4 ++-- drivers/nvme/host/tcp.c | 2 +- drivers/nvme/target/fc.c | 6 +++--- drivers/nvme/target/rdma.c | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 08a5ea3e9383..3e12d4683ac7 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1363,7 +1363,7 @@ nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) * down, and the related FC-NVME Association ID and Connection IDs * become invalid. * - * The behavior of the fc-nvme initiator is such that it's + * The behavior of the fc-nvme initiator is such that its * understanding of the association and connections will implicitly * be torn down. The action is implicit as it may be due to a loss of * connectivity with the fc-nvme target, so you may never get a @@ -2777,7 +2777,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, * as WRITE ZEROES will return a non-zero rq payload_bytes yet * there is no actual payload to be transferred. * To get it right, key data transmission on there being 1 or - * more physical segments in the sg list. If there is no + * more physical segments in the sg list. If there are no * physical segments, there is no payload. */ if (blk_rq_nr_phys_segments(rq)) { diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 9233f088fac8..c0fe8cfb7229 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2179,7 +2179,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) /* * Only start IO queues for which we have allocated the tagset - * and limitted it to the available queues. On reconnects, the + * and limited it to the available queues. On reconnects, the * queue number might have changed. */ nr_queues = min(ctrl->tagset->nr_hw_queues + 1, ctrl->queue_count); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 25598a46bf0d..a9b18c051f5b 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -459,7 +459,7 @@ nvmet_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) * down, and the related FC-NVME Association ID and Connection IDs * become invalid. * - * The behavior of the fc-nvme target is such that it's + * The behavior of the fc-nvme target is such that its * understanding of the association and connections will implicitly * be torn down. The action is implicit as it may be due to a loss of * connectivity with the fc-nvme host, so the target may never get a @@ -2313,7 +2313,7 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fod->fcpreq); if (ret) { /* - * should be ok to set w/o lock as its in the thread of + * should be ok to set w/o lock as it's in the thread of * execution (not an async timer routine) and doesn't * contend with any clearing action */ @@ -2629,7 +2629,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, * and the api of the FC LLDD which may issue a hw command to send the * response, but the LLDD may not get the hw completion for that command * and upcall the nvmet_fc layer before a new command may be - * asynchronously received - its possible for a command to be received + * asynchronously received - it's possible for a command to be received * before the LLDD and nvmet_fc have recycled the job structure. It gives * the appearance of more commands received than fits in the sq. * To alleviate this scenario, a temporary queue is maintained in the diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 67f61c67c167..0485e25ab797 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1731,7 +1731,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, * We registered an ib_client to handle device removal for queues, * so we only need to handle the listening port cm_ids. In this case * we nullify the priv to prevent double cm_id destruction and destroying - * the cm_id implicitely by returning a non-zero rc to the callout. + * the cm_id implicitly by returning a non-zero rc to the callout. */ static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, struct nvmet_rdma_queue *queue) @@ -1742,7 +1742,7 @@ static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, /* * This is a queue cm_id. we have registered * an ib_client to handle queues removal - * so don't interfear and just return. + * so don't interfere and just return. */ return 0; } @@ -1760,7 +1760,7 @@ static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, /* * We need to return 1 so that the core will destroy - * it's own ID. What a great API design.. + * its own ID. What a great API design.. */ return 1; } From bdf253d580d7d30e7620844c63a5013fe7ba3f87 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 9 Jul 2025 12:09:02 -0700 Subject: [PATCH 1285/2411] dm-verity: remove support for asynchronous hashes The support for asynchronous hashes in dm-verity has outlived its usefulness. It adds significant code complexity and opportunity for bugs. I don't know of anyone using it in practice. (The original submitter of the code possibly was, but that was 8 years ago.) Data I recently collected for en/decryption shows that using off-CPU crypto "accelerators" is consistently much slower than the CPU (https://lore.kernel.org/r/20250704070322.20692-1-ebiggers@kernel.org/), even on CPUs that lack dedicated cryptographic instructions. Similar results are likely to be seen for hashing. I already removed support for asynchronous hashes from fsverity two years ago, and no one ever complained. Moreover, neither dm-verity, fsverity, nor fscrypt has ever actually used the asynchronous crypto algorithms in a truly asynchronous manner. The lack of interest in such optimizations provides further evidence that it's only the CPU-based crypto that actually matters. Historically, it's also been common for people to forget to enable the optimized SHA-256 code, which could contribute to an off-CPU crypto engine being perceived as more useful than it really is. In 6.16 I fixed that: the optimized SHA-256 code is now enabled by default. Therefore, let's drop the support for asynchronous hashes in dm-verity. Tested with verity-compat-test. Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Mikulas Patocka --- drivers/md/dm-verity-fec.c | 4 +- drivers/md/dm-verity-target.c | 185 ++++++---------------------------- drivers/md/dm-verity.h | 22 ++-- 3 files changed, 38 insertions(+), 173 deletions(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 631a887b487c..d382a390d39a 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -191,7 +191,7 @@ static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io, u8 *want_digest, u8 *data) { if (unlikely(verity_hash(v, io, data, 1 << v->data_dev_block_bits, - verity_io_real_digest(v, io), true))) + verity_io_real_digest(v, io)))) return 0; return memcmp(verity_io_real_digest(v, io), want_digest, @@ -392,7 +392,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io, /* Always re-validate the corrected block against the expected hash */ r = verity_hash(v, io, fio->output, 1 << v->data_dev_block_bits, - verity_io_real_digest(v, io), true); + verity_io_real_digest(v, io)); if (unlikely(r < 0)) return r; diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 81186bded1ce..66a00a8ccb39 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -19,7 +19,6 @@ #include "dm-audit.h" #include #include -#include #include #include #include @@ -61,9 +60,6 @@ module_param_array_named(use_bh_bytes, dm_verity_use_bh_bytes, uint, NULL, 0644) static DEFINE_STATIC_KEY_FALSE(use_bh_wq_enabled); -/* Is at least one dm-verity instance using ahash_tfm instead of shash_tfm? */ -static DEFINE_STATIC_KEY_FALSE(ahash_enabled); - struct dm_verity_prefetch_work { struct work_struct work; struct dm_verity *v; @@ -118,100 +114,21 @@ static sector_t verity_position_at_level(struct dm_verity *v, sector_t block, return block >> (level * v->hash_per_block_bits); } -static int verity_ahash_update(struct dm_verity *v, struct ahash_request *req, - const u8 *data, size_t len, - struct crypto_wait *wait) -{ - struct scatterlist sg; - - if (likely(!is_vmalloc_addr(data))) { - sg_init_one(&sg, data, len); - ahash_request_set_crypt(req, &sg, NULL, len); - return crypto_wait_req(crypto_ahash_update(req), wait); - } - - do { - int r; - size_t this_step = min_t(size_t, len, PAGE_SIZE - offset_in_page(data)); - - flush_kernel_vmap_range((void *)data, this_step); - sg_init_table(&sg, 1); - sg_set_page(&sg, vmalloc_to_page(data), this_step, offset_in_page(data)); - ahash_request_set_crypt(req, &sg, NULL, this_step); - r = crypto_wait_req(crypto_ahash_update(req), wait); - if (unlikely(r)) - return r; - data += this_step; - len -= this_step; - } while (len); - - return 0; -} - -/* - * Wrapper for crypto_ahash_init, which handles verity salting. - */ -static int verity_ahash_init(struct dm_verity *v, struct ahash_request *req, - struct crypto_wait *wait, bool may_sleep) -{ - int r; - - ahash_request_set_tfm(req, v->ahash_tfm); - ahash_request_set_callback(req, - may_sleep ? CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG : 0, - crypto_req_done, (void *)wait); - crypto_init_wait(wait); - - r = crypto_wait_req(crypto_ahash_init(req), wait); - - if (unlikely(r < 0)) { - if (r != -ENOMEM) - DMERR("crypto_ahash_init failed: %d", r); - return r; - } - - if (likely(v->salt_size && (v->version >= 1))) - r = verity_ahash_update(v, req, v->salt, v->salt_size, wait); - - return r; -} - -static int verity_ahash_final(struct dm_verity *v, struct ahash_request *req, - u8 *digest, struct crypto_wait *wait) -{ - int r; - - if (unlikely(v->salt_size && (!v->version))) { - r = verity_ahash_update(v, req, v->salt, v->salt_size, wait); - - if (r < 0) { - DMERR("%s failed updating salt: %d", __func__, r); - goto out; - } - } - - ahash_request_set_crypt(req, NULL, digest, 0); - r = crypto_wait_req(crypto_ahash_final(req), wait); -out: - return r; -} - int verity_hash(struct dm_verity *v, struct dm_verity_io *io, - const u8 *data, size_t len, u8 *digest, bool may_sleep) + const u8 *data, size_t len, u8 *digest) { + struct shash_desc *desc = &io->hash_desc; int r; - if (static_branch_unlikely(&ahash_enabled) && !v->shash_tfm) { - struct ahash_request *req = verity_io_hash_req(v, io); - struct crypto_wait wait; - - r = verity_ahash_init(v, req, &wait, may_sleep) ?: - verity_ahash_update(v, req, data, len, &wait) ?: - verity_ahash_final(v, req, digest, &wait); + desc->tfm = v->shash_tfm; + if (unlikely(v->initial_hashstate == NULL)) { + /* Version 0: salt at end */ + r = crypto_shash_init(desc) ?: + crypto_shash_update(desc, data, len) ?: + crypto_shash_update(desc, v->salt, v->salt_size) ?: + crypto_shash_final(desc, digest); } else { - struct shash_desc *desc = verity_io_hash_req(v, io); - - desc->tfm = v->shash_tfm; + /* Version 1: salt at beginning */ r = crypto_shash_import(desc, v->initial_hashstate) ?: crypto_shash_finup(desc, data, len, digest); } @@ -362,7 +279,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, } r = verity_hash(v, io, data, 1 << v->hash_dev_block_bits, - verity_io_real_digest(v, io), !io->in_bh); + verity_io_real_digest(v, io)); if (unlikely(r < 0)) goto release_ret_r; @@ -465,7 +382,7 @@ static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, goto free_ret; r = verity_hash(v, io, buffer, 1 << v->data_dev_block_bits, - verity_io_real_digest(v, io), true); + verity_io_real_digest(v, io)); if (unlikely(r)) goto free_ret; @@ -581,7 +498,7 @@ static int verity_verify_io(struct dm_verity_io *io) } r = verity_hash(v, io, data, block_size, - verity_io_real_digest(v, io), !io->in_bh); + verity_io_real_digest(v, io)); if (unlikely(r < 0)) { kunmap_local(data); return r; @@ -1092,12 +1009,7 @@ static void verity_dtr(struct dm_target *ti) kfree(v->zero_digest); verity_free_sig(v); - if (v->ahash_tfm) { - static_branch_dec(&ahash_enabled); - crypto_free_ahash(v->ahash_tfm); - } else { - crypto_free_shash(v->shash_tfm); - } + crypto_free_shash(v->shash_tfm); kfree(v->alg_name); @@ -1157,7 +1069,8 @@ static int verity_alloc_zero_digest(struct dm_verity *v) if (!v->zero_digest) return r; - io = kmalloc(sizeof(*io) + v->hash_reqsize, GFP_KERNEL); + io = kmalloc(sizeof(*io) + crypto_shash_descsize(v->shash_tfm), + GFP_KERNEL); if (!io) return r; /* verity_dtr will free zero_digest */ @@ -1168,7 +1081,7 @@ static int verity_alloc_zero_digest(struct dm_verity *v) goto out; r = verity_hash(v, io, zero_data, 1 << v->data_dev_block_bits, - v->zero_digest, true); + v->zero_digest); out: kfree(io); @@ -1324,9 +1237,7 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v, static int verity_setup_hash_alg(struct dm_verity *v, const char *alg_name) { struct dm_target *ti = v->ti; - struct crypto_ahash *ahash; - struct crypto_shash *shash = NULL; - const char *driver_name; + struct crypto_shash *shash; v->alg_name = kstrdup(alg_name, GFP_KERNEL); if (!v->alg_name) { @@ -1334,50 +1245,14 @@ static int verity_setup_hash_alg(struct dm_verity *v, const char *alg_name) return -ENOMEM; } - /* - * Allocate the hash transformation object that this dm-verity instance - * will use. The vast majority of dm-verity users use CPU-based - * hashing, so when possible use the shash API to minimize the crypto - * API overhead. If the ahash API resolves to a different driver - * (likely an off-CPU hardware offload), use ahash instead. Also use - * ahash if the obsolete dm-verity format with the appended salt is - * being used, so that quirk only needs to be handled in one place. - */ - ahash = crypto_alloc_ahash(alg_name, 0, - v->use_bh_wq ? CRYPTO_ALG_ASYNC : 0); - if (IS_ERR(ahash)) { + shash = crypto_alloc_shash(alg_name, 0, 0); + if (IS_ERR(shash)) { ti->error = "Cannot initialize hash function"; - return PTR_ERR(ahash); - } - driver_name = crypto_ahash_driver_name(ahash); - if (v->version >= 1 /* salt prepended, not appended? */) { - shash = crypto_alloc_shash(alg_name, 0, 0); - if (!IS_ERR(shash) && - strcmp(crypto_shash_driver_name(shash), driver_name) != 0) { - /* - * ahash gave a different driver than shash, so probably - * this is a case of real hardware offload. Use ahash. - */ - crypto_free_shash(shash); - shash = NULL; - } - } - if (!IS_ERR_OR_NULL(shash)) { - crypto_free_ahash(ahash); - ahash = NULL; - v->shash_tfm = shash; - v->digest_size = crypto_shash_digestsize(shash); - v->hash_reqsize = sizeof(struct shash_desc) + - crypto_shash_descsize(shash); - DMINFO("%s using shash \"%s\"", alg_name, driver_name); - } else { - v->ahash_tfm = ahash; - static_branch_inc(&ahash_enabled); - v->digest_size = crypto_ahash_digestsize(ahash); - v->hash_reqsize = sizeof(struct ahash_request) + - crypto_ahash_reqsize(ahash); - DMINFO("%s using ahash \"%s\"", alg_name, driver_name); + return PTR_ERR(shash); } + v->shash_tfm = shash; + v->digest_size = crypto_shash_digestsize(shash); + DMINFO("%s using \"%s\"", alg_name, crypto_shash_driver_name(shash)); if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) { ti->error = "Digest size too big"; return -EINVAL; @@ -1402,7 +1277,7 @@ static int verity_setup_salt_and_hashstate(struct dm_verity *v, const char *arg) return -EINVAL; } } - if (v->shash_tfm) { + if (v->version) { /* Version 1: salt at beginning */ SHASH_DESC_ON_STACK(desc, v->shash_tfm); int r; @@ -1681,7 +1556,8 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ti->per_io_data_size = sizeof(struct dm_verity_io) + v->hash_reqsize; + ti->per_io_data_size = sizeof(struct dm_verity_io) + + crypto_shash_descsize(v->shash_tfm); r = verity_fec_ctr(v); if (r) @@ -1788,10 +1664,7 @@ static int verity_preresume(struct dm_target *ti) bdev = dm_disk(dm_table_get_md(ti->table))->part0; root_digest.digest = v->root_digest; root_digest.digest_len = v->digest_size; - if (static_branch_unlikely(&ahash_enabled) && !v->shash_tfm) - root_digest.alg = crypto_ahash_alg_name(v->ahash_tfm); - else - root_digest.alg = crypto_shash_alg_name(v->shash_tfm); + root_digest.alg = crypto_shash_alg_name(v->shash_tfm); r = security_bdev_setintegrity(bdev, LSM_INT_DMVERITY_ROOTHASH, &root_digest, sizeof(root_digest)); @@ -1817,7 +1690,7 @@ static struct target_type verity_target = { .name = "verity", /* Note: the LSMs depend on the singleton and immutable features */ .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE, - .version = {1, 11, 0}, + .version = {1, 12, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 8cbb57862ae1..6d141abd965c 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -39,11 +39,10 @@ struct dm_verity { struct dm_target *ti; struct dm_bufio_client *bufio; char *alg_name; - struct crypto_ahash *ahash_tfm; /* either this or shash_tfm is set */ - struct crypto_shash *shash_tfm; /* either this or ahash_tfm is set */ + struct crypto_shash *shash_tfm; u8 *root_digest; /* digest of the root block */ u8 *salt; /* salt: its size is salt_size */ - u8 *initial_hashstate; /* salted initial state, if shash_tfm is set */ + u8 *initial_hashstate; /* salted initial state, if version >= 1 */ u8 *zero_digest; /* digest for a zero block */ #ifdef CONFIG_SECURITY u8 *root_digest_sig; /* signature of the root digest */ @@ -61,7 +60,6 @@ struct dm_verity { bool hash_failed:1; /* set if hash of any block failed */ bool use_bh_wq:1; /* try to verify in BH wq before normal work-queue */ unsigned int digest_size; /* digest size for the current hash algorithm */ - unsigned int hash_reqsize; /* the size of temporary space for crypto */ enum verity_mode mode; /* mode for handling verification errors */ enum verity_mode error_mode;/* mode for handling I/O errors */ unsigned int corrupted_errs;/* Number of errors for corrupted blocks */ @@ -100,19 +98,13 @@ struct dm_verity_io { u8 want_digest[HASH_MAX_DIGESTSIZE]; /* - * This struct is followed by a variable-sized hash request of size - * v->hash_reqsize, either a struct ahash_request or a struct shash_desc - * (depending on whether ahash_tfm or shash_tfm is being used). To - * access it, use verity_io_hash_req(). + * Temporary space for hashing. This is variable-length and must be at + * the end of the struct. struct shash_desc is just the fixed part; + * it's followed by a context of size crypto_shash_descsize(shash_tfm). */ + struct shash_desc hash_desc; }; -static inline void *verity_io_hash_req(struct dm_verity *v, - struct dm_verity_io *io) -{ - return io + 1; -} - static inline u8 *verity_io_real_digest(struct dm_verity *v, struct dm_verity_io *io) { @@ -126,7 +118,7 @@ static inline u8 *verity_io_want_digest(struct dm_verity *v, } extern int verity_hash(struct dm_verity *v, struct dm_verity_io *io, - const u8 *data, size_t len, u8 *digest, bool may_sleep); + const u8 *data, size_t len, u8 *digest); extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, sector_t block, u8 *digest, bool *is_zero); From 487767bff572d46f7c37ad846c4078f6d6c9cc55 Mon Sep 17 00:00:00 2001 From: Purva Yeshi Date: Thu, 10 Jul 2025 13:11:57 +0530 Subject: [PATCH 1286/2411] md: dm-zoned-target: Initialize return variable r to avoid uninitialized use Fix Smatch-detected error: drivers/md/dm-zoned-target.c:1073 dmz_iterate_devices() error: uninitialized symbol 'r'. Smatch detects a possible use of the uninitialized variable 'r' in dmz_iterate_devices() because if dmz->nr_ddevs is zero, the loop is skipped and 'r' is returned without being set, leading to undefined behavior. Initialize 'r' to 0 before the loop. This ensures that if there are no devices to iterate over, the function still returns a defined value. Signed-off-by: Purva Yeshi Signed-off-by: Mikulas Patocka --- drivers/md/dm-zoned-target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 5da3db06da10..9da329078ea4 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -1062,7 +1062,7 @@ static int dmz_iterate_devices(struct dm_target *ti, struct dmz_target *dmz = ti->private; unsigned int zone_nr_sectors = dmz_zone_nr_sectors(dmz->metadata); sector_t capacity; - int i, r; + int i, r = 0; for (i = 0; i < dmz->nr_ddevs; i++) { capacity = dmz->dev[i].capacity & ~(zone_nr_sectors - 1); From 225b2cb640d7ddbb2df38130f3f34f4a84497426 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 14 Jul 2025 18:27:46 +0200 Subject: [PATCH 1287/2411] vdo: omit need_resched() before cond_resched() There's no need to call need_resched() because cond_resched() will do nothing if need_resched() returns false. Reviewed-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/funnel-workqueue.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/md/dm-vdo/funnel-workqueue.c b/drivers/md/dm-vdo/funnel-workqueue.c index ae11941c90a9..0613c82bbe8e 100644 --- a/drivers/md/dm-vdo/funnel-workqueue.c +++ b/drivers/md/dm-vdo/funnel-workqueue.c @@ -252,8 +252,7 @@ static void service_work_queue(struct simple_work_queue *queue) * This speeds up some performance tests; that "other work" might include other VDO * threads. */ - if (need_resched()) - cond_resched(); + cond_resched(); } run_finish_hook(queue); From 8d05316d79d8afd20ba767efea8706d8238a9d46 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Mon, 21 Jul 2025 11:49:13 +0800 Subject: [PATCH 1288/2411] dm-raid: do not include dm-core.h In commit 4cc96131afce ("dm: move request-based code out to dm-rq.[hc]") we have a note: "DM targets should _never_ include dm-core.h!". And it is not used in any DM targets except dm-raid now, so let's remove it from dm-raid for consistency, also use special helpers instead of accessing dm_table and mapper_device fields directly. This change is merely a cleanup and should not affect functionality. Signed-off-by: Pavel Tikhomirov Reviewed-by: Yu Kuai Signed-off-by: Mikulas Patocka --- drivers/md/dm-raid.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index c4fa8e0e76d2..7257bf430037 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -14,7 +14,6 @@ #include "raid5.h" #include "raid10.h" #include "md-bitmap.h" -#include "dm-core.h" #include @@ -3309,7 +3308,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) /* Disable/enable discard support on raid set. */ configure_discard_support(rs); - rs->md.dm_gendisk = ti->table->md->disk; + rs->md.dm_gendisk = dm_disk(dm_table_get_md(ti->table)); mddev_unlock(&rs->md); return 0; From 9576e1aecf627ac99c369fc8cd265b4847ed0c50 Mon Sep 17 00:00:00 2001 From: LongPing Wei Date: Thu, 31 Jul 2025 16:53:27 +0800 Subject: [PATCH 1289/2411] dm-thin: update the documentation 1. convert KB/MB/GB to KiB/MiB/GiB; 2. change the number of sectors for 128MiB from 256000 to 262144 as 256000 sectors is neither 128 MB nor 128 MiB. Signed-off-by: LongPing Wei Signed-off-by: Mikulas Patocka --- .../device-mapper/thin-provisioning.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/admin-guide/device-mapper/thin-provisioning.rst b/Documentation/admin-guide/device-mapper/thin-provisioning.rst index bafebf79da4b..b2fa49a5608a 100644 --- a/Documentation/admin-guide/device-mapper/thin-provisioning.rst +++ b/Documentation/admin-guide/device-mapper/thin-provisioning.rst @@ -80,11 +80,11 @@ less sharing than average you'll need a larger-than-average metadata device. As a guide, we suggest you calculate the number of bytes to use in the metadata device as 48 * $data_dev_size / $data_block_size but round it up -to 2MB if the answer is smaller. If you're creating large numbers of +to 2MiB if the answer is smaller. If you're creating large numbers of snapshots which are recording large amounts of change, you may find you need to increase this. -The largest size supported is 16GB: If the device is larger, +The largest size supported is 16GiB: If the device is larger, a warning will be issued and the excess space will not be used. Reloading a pool table @@ -107,13 +107,13 @@ Using an existing pool device $data_block_size gives the smallest unit of disk space that can be allocated at a time expressed in units of 512-byte sectors. -$data_block_size must be between 128 (64KB) and 2097152 (1GB) and a -multiple of 128 (64KB). $data_block_size cannot be changed after the +$data_block_size must be between 128 (64KiB) and 2097152 (1GiB) and a +multiple of 128 (64KiB). $data_block_size cannot be changed after the thin-pool is created. People primarily interested in thin provisioning -may want to use a value such as 1024 (512KB). People doing lots of -snapshotting may want a smaller value such as 128 (64KB). If you are +may want to use a value such as 1024 (512KiB). People doing lots of +snapshotting may want a smaller value such as 128 (64KiB). If you are not zeroing newly-allocated data, a larger $data_block_size in the -region of 256000 (128MB) is suggested. +region of 262144 (128MiB) is suggested. $low_water_mark is expressed in blocks of size $data_block_size. If free space on the data device drops below this level then a dm event @@ -291,7 +291,7 @@ i) Constructor error_if_no_space: Error IOs, instead of queueing, if no space. - Data block size must be between 64KB (128 sectors) and 1GB + Data block size must be between 64KiB (128 sectors) and 1GiB (2097152 sectors) inclusive. From b9c73524106e1c0c857006fb9ff2e5a510dc4021 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 29 Jul 2025 14:23:07 -0400 Subject: [PATCH 1290/2411] unwind_user/deferred: Add unwind cache Cache the results of the unwind to ensure the unwind is only performed once, even when called by multiple tracers. The cache nr_entries gets cleared every time the task exits the kernel. When a stacktrace is requested, nr_entries gets set to the number of entries in the stacktrace. If another stacktrace is requested, if nr_entries is not zero, then it contains the same stacktrace that would be retrieved so it is not processed again and the entries is given to the caller. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Linus Torvalds Cc: Andrew Morton Cc: Jens Axboe Cc: Florian Weimer Cc: Sam James Link: https://lore.kernel.org/20250729182405.319691167@kernel.org Reviewed-by: Jens Remus Reviewed-By: Indu Bhagat Co-developed-by: Steven Rostedt (Google) Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) --- include/linux/entry-common.h | 2 ++ include/linux/unwind_deferred.h | 8 +++++++ include/linux/unwind_deferred_types.h | 7 +++++- kernel/unwind/deferred.c | 31 +++++++++++++++++++++------ 4 files changed, 40 insertions(+), 8 deletions(-) diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index f94f3fdf15fc..8908b8eeb99b 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -362,6 +363,7 @@ static __always_inline void exit_to_user_mode(void) lockdep_hardirqs_on_prepare(); instrumentation_end(); + unwind_reset_info(); user_enter_irqoff(); arch_exit_to_user_mode(); lockdep_hardirqs_on(CALLER_ADDR0); diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h index a5f6e8f8a1a2..baacf4a1eb4c 100644 --- a/include/linux/unwind_deferred.h +++ b/include/linux/unwind_deferred.h @@ -12,6 +12,12 @@ void unwind_task_free(struct task_struct *task); int unwind_user_faultable(struct unwind_stacktrace *trace); +static __always_inline void unwind_reset_info(void) +{ + if (unlikely(current->unwind_info.cache)) + current->unwind_info.cache->nr_entries = 0; +} + #else /* !CONFIG_UNWIND_USER */ static inline void unwind_task_init(struct task_struct *task) {} @@ -19,6 +25,8 @@ static inline void unwind_task_free(struct task_struct *task) {} static inline int unwind_user_faultable(struct unwind_stacktrace *trace) { return -ENOSYS; } +static inline void unwind_reset_info(void) {} + #endif /* !CONFIG_UNWIND_USER */ #endif /* _LINUX_UNWIND_USER_DEFERRED_H */ diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h index aa32db574e43..db5b54b18828 100644 --- a/include/linux/unwind_deferred_types.h +++ b/include/linux/unwind_deferred_types.h @@ -2,8 +2,13 @@ #ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H #define _LINUX_UNWIND_USER_DEFERRED_TYPES_H +struct unwind_cache { + unsigned int nr_entries; + unsigned long entries[]; +}; + struct unwind_task_info { - unsigned long *entries; + struct unwind_cache *cache; }; #endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */ diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c index a0badbeb3cc1..96368a5aa522 100644 --- a/kernel/unwind/deferred.c +++ b/kernel/unwind/deferred.c @@ -4,10 +4,13 @@ */ #include #include +#include #include #include -#define UNWIND_MAX_ENTRIES 512 +/* Make the cache fit in a 4K page */ +#define UNWIND_MAX_ENTRIES \ + ((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long)) /** * unwind_user_faultable - Produce a user stacktrace in faultable context @@ -24,6 +27,7 @@ int unwind_user_faultable(struct unwind_stacktrace *trace) { struct unwind_task_info *info = ¤t->unwind_info; + struct unwind_cache *cache; /* Should always be called from faultable context */ might_fault(); @@ -31,17 +35,30 @@ int unwind_user_faultable(struct unwind_stacktrace *trace) if (current->flags & PF_EXITING) return -EINVAL; - if (!info->entries) { - info->entries = kmalloc_array(UNWIND_MAX_ENTRIES, sizeof(long), - GFP_KERNEL); - if (!info->entries) + if (!info->cache) { + info->cache = kzalloc(struct_size(cache, entries, UNWIND_MAX_ENTRIES), + GFP_KERNEL); + if (!info->cache) return -ENOMEM; } + cache = info->cache; + trace->entries = cache->entries; + + if (cache->nr_entries) { + /* + * The user stack has already been previously unwound in this + * entry context. Skip the unwind and use the cache. + */ + trace->nr = cache->nr_entries; + return 0; + } + trace->nr = 0; - trace->entries = info->entries; unwind_user(trace, UNWIND_MAX_ENTRIES); + cache->nr_entries = trace->nr; + return 0; } @@ -56,5 +73,5 @@ void unwind_task_free(struct task_struct *task) { struct unwind_task_info *info = &task->unwind_info; - kfree(info->entries); + kfree(info->cache); } From 2dffa355f6c279e7d2e574abf9446c41a631c9e5 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 29 Jul 2025 14:23:08 -0400 Subject: [PATCH 1291/2411] unwind_user/deferred: Add deferred unwinding interface Add an interface for scheduling task work to unwind the user space stack before returning to user space. This solves several problems for its callers: - Ensure the unwind happens in task context even if the caller may be running in interrupt context. - Avoid duplicate unwinds, whether called multiple times by the same caller or by different callers. - Create a "context cookie" which allows trace post-processing to correlate kernel unwinds/traces with the user unwind. A concept of a "cookie" is created to detect when the stacktrace is the same. A cookie is generated the first time a user space stacktrace is requested after the task enters the kernel. As the stacktrace is saved on the task_struct while the task is in the kernel, if another request comes in, if the cookie is still the same, it will use the saved stacktrace, and not have to regenerate one. The cookie is passed to the caller on request, and when the stacktrace is generated upon returning to user space, it calls the requester's callback with the cookie as well as the stacktrace. The cookie is cleared when it goes back to user space. Note, this currently adds another conditional to the unwind_reset_info() path that is always called returning to user space, but future changes will put this back to a single conditional. A global list is created and protected by a global mutex that holds tracers that register with the unwind infrastructure. The number of registered tracers will be limited in future changes. Each perf program or ftrace instance will register its own descriptor to use for deferred unwind stack traces. Note, in the function unwind_deferred_task_work() that gets called when returning to user space, it uses a global mutex for synchronization which will cause a big bottleneck. This will be replaced by SRCU, but that change adds some complex synchronization that deservers its own commit. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Jens Axboe Cc: Florian Weimer Cc: Sam James Link: https://lore.kernel.org/20250729182405.488066537@kernel.org Co-developed-by: Steven Rostedt (Google) Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) --- include/linux/unwind_deferred.h | 24 ++++ include/linux/unwind_deferred_types.h | 24 ++++ kernel/unwind/deferred.c | 156 +++++++++++++++++++++++++- 3 files changed, 203 insertions(+), 1 deletion(-) diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h index baacf4a1eb4c..14efd8c027aa 100644 --- a/include/linux/unwind_deferred.h +++ b/include/linux/unwind_deferred.h @@ -2,9 +2,19 @@ #ifndef _LINUX_UNWIND_USER_DEFERRED_H #define _LINUX_UNWIND_USER_DEFERRED_H +#include #include #include +struct unwind_work; + +typedef void (*unwind_callback_t)(struct unwind_work *work, struct unwind_stacktrace *trace, u64 cookie); + +struct unwind_work { + struct list_head list; + unwind_callback_t func; +}; + #ifdef CONFIG_UNWIND_USER void unwind_task_init(struct task_struct *task); @@ -12,8 +22,19 @@ void unwind_task_free(struct task_struct *task); int unwind_user_faultable(struct unwind_stacktrace *trace); +int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func); +int unwind_deferred_request(struct unwind_work *work, u64 *cookie); +void unwind_deferred_cancel(struct unwind_work *work); + static __always_inline void unwind_reset_info(void) { + if (unlikely(current->unwind_info.id.id)) + current->unwind_info.id.id = 0; + /* + * As unwind_user_faultable() can be called directly and + * depends on nr_entries being cleared on exit to user, + * this needs to be a separate conditional. + */ if (unlikely(current->unwind_info.cache)) current->unwind_info.cache->nr_entries = 0; } @@ -24,6 +45,9 @@ static inline void unwind_task_init(struct task_struct *task) {} static inline void unwind_task_free(struct task_struct *task) {} static inline int unwind_user_faultable(struct unwind_stacktrace *trace) { return -ENOSYS; } +static inline int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) { return -ENOSYS; } +static inline int unwind_deferred_request(struct unwind_work *work, u64 *timestamp) { return -ENOSYS; } +static inline void unwind_deferred_cancel(struct unwind_work *work) {} static inline void unwind_reset_info(void) {} diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h index db5b54b18828..104c477d5609 100644 --- a/include/linux/unwind_deferred_types.h +++ b/include/linux/unwind_deferred_types.h @@ -7,8 +7,32 @@ struct unwind_cache { unsigned long entries[]; }; +/* + * The unwind_task_id is a unique identifier that maps to a user space + * stacktrace. It is generated the first time a deferred user space + * stacktrace is requested after a task has entered the kerenl and + * is cleared to zero when it exits. The mapped id will be a non-zero + * number. + * + * To simplify the generation of the 64 bit number, 32 bits will be + * the CPU it was generated on, and the other 32 bits will be a per + * cpu counter that gets incremented by two every time a new identifier + * is generated. The LSB will always be set to keep the value + * from being zero. + */ +union unwind_task_id { + struct { + u32 cpu; + u32 cnt; + }; + u64 id; +}; + struct unwind_task_info { struct unwind_cache *cache; + struct callback_head work; + union unwind_task_id id; + int pending; }; #endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */ diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c index 96368a5aa522..2cbae2ada309 100644 --- a/kernel/unwind/deferred.c +++ b/kernel/unwind/deferred.c @@ -2,16 +2,63 @@ /* * Deferred user space unwinding */ +#include +#include +#include +#include #include #include #include #include -#include +#include /* Make the cache fit in a 4K page */ #define UNWIND_MAX_ENTRIES \ ((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long)) +/* Guards adding to and reading the list of callbacks */ +static DEFINE_MUTEX(callback_mutex); +static LIST_HEAD(callbacks); + +/* + * This is a unique percpu identifier for a given task entry context. + * Conceptually, it's incremented every time the CPU enters the kernel from + * user space, so that each "entry context" on the CPU gets a unique ID. In + * reality, as an optimization, it's only incremented on demand for the first + * deferred unwind request after a given entry-from-user. + * + * It's combined with the CPU id to make a systemwide-unique "context cookie". + */ +static DEFINE_PER_CPU(u32, unwind_ctx_ctr); + +/* + * The context cookie is a unique identifier that is assigned to a user + * space stacktrace. As the user space stacktrace remains the same while + * the task is in the kernel, the cookie is an identifier for the stacktrace. + * Although it is possible for the stacktrace to get another cookie if another + * request is made after the cookie was cleared and before reentering user + * space. + */ +static u64 get_cookie(struct unwind_task_info *info) +{ + u32 cnt = 1; + u32 old = 0; + + if (info->id.cpu) + return info->id.id; + + /* LSB is always set to ensure 0 is an invalid value */ + cnt |= __this_cpu_read(unwind_ctx_ctr) + 2; + if (try_cmpxchg(&info->id.cnt, &old, cnt)) { + /* Update the per cpu counter */ + __this_cpu_write(unwind_ctx_ctr, cnt); + } + /* Interrupts are disabled, the CPU will always be same */ + info->id.cpu = smp_processor_id() + 1; /* Must be non zero */ + + return info->id.id; +} + /** * unwind_user_faultable - Produce a user stacktrace in faultable context * @trace: The descriptor that will store the user stacktrace @@ -62,11 +109,117 @@ int unwind_user_faultable(struct unwind_stacktrace *trace) return 0; } +static void unwind_deferred_task_work(struct callback_head *head) +{ + struct unwind_task_info *info = container_of(head, struct unwind_task_info, work); + struct unwind_stacktrace trace; + struct unwind_work *work; + u64 cookie; + + if (WARN_ON_ONCE(!info->pending)) + return; + + /* Allow work to come in again */ + WRITE_ONCE(info->pending, 0); + + /* + * From here on out, the callback must always be called, even if it's + * just an empty trace. + */ + trace.nr = 0; + trace.entries = NULL; + + unwind_user_faultable(&trace); + + cookie = info->id.id; + + guard(mutex)(&callback_mutex); + list_for_each_entry(work, &callbacks, list) { + work->func(work, &trace, cookie); + } +} + +/** + * unwind_deferred_request - Request a user stacktrace on task kernel exit + * @work: Unwind descriptor requesting the trace + * @cookie: The cookie of the first request made for this task + * + * Schedule a user space unwind to be done in task work before exiting the + * kernel. + * + * The returned @cookie output is the generated cookie of the very first + * request for a user space stacktrace for this task since it entered the + * kernel. It can be from a request by any caller of this infrastructure. + * Its value will also be passed to the callback function. It can be + * used to stitch kernel and user stack traces together in post-processing. + * + * It's valid to call this function multiple times for the same @work within + * the same task entry context. Each call will return the same cookie + * while the task hasn't left the kernel. If the callback is not pending + * because it has already been previously called for the same entry context, + * it will be called again with the same stack trace and cookie. + * + * Return: 1 if the the callback was already queued. + * 0 if the callback successfully was queued. + * Negative if there's an error. + * @cookie holds the cookie of the first request by any user + */ +int unwind_deferred_request(struct unwind_work *work, u64 *cookie) +{ + struct unwind_task_info *info = ¤t->unwind_info; + int ret; + + *cookie = 0; + + if (WARN_ON_ONCE(in_nmi())) + return -EINVAL; + + if ((current->flags & (PF_KTHREAD | PF_EXITING)) || + !user_mode(task_pt_regs(current))) + return -EINVAL; + + guard(irqsave)(); + + *cookie = get_cookie(info); + + /* callback already pending? */ + if (info->pending) + return 1; + + /* The work has been claimed, now schedule it. */ + ret = task_work_add(current, &info->work, TWA_RESUME); + if (WARN_ON_ONCE(ret)) + return ret; + + info->pending = 1; + return 0; +} + +void unwind_deferred_cancel(struct unwind_work *work) +{ + if (!work) + return; + + guard(mutex)(&callback_mutex); + list_del(&work->list); +} + +int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) +{ + memset(work, 0, sizeof(*work)); + + guard(mutex)(&callback_mutex); + list_add(&work->list, &callbacks); + work->func = func; + return 0; +} + void unwind_task_init(struct task_struct *task) { struct unwind_task_info *info = &task->unwind_info; memset(info, 0, sizeof(*info)); + init_task_work(&info->work, unwind_deferred_task_work); } void unwind_task_free(struct task_struct *task) @@ -74,4 +227,5 @@ void unwind_task_free(struct task_struct *task) struct unwind_task_info *info = &task->unwind_info; kfree(info->cache); + task_work_cancel(task, &info->work); } From 055c7060e7ca71bb86da616158fc74254730ae2a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 29 Jul 2025 14:23:09 -0400 Subject: [PATCH 1292/2411] unwind_user/deferred: Make unwind deferral requests NMI-safe Make unwind_deferred_request() NMI-safe so tracers in NMI context can call it and safely request a user space stacktrace when the task exits. Note, this is only allowed for architectures that implement a safe cmpxchg. If an architecture requests a deferred stack trace from NMI context that does not support a safe NMI cmpxchg, it will get an -EINVAL and trigger a warning. For those architectures, they would need another method (perhaps an irqwork), to request a deferred user space stack trace. That can be dealt with later if one of theses architectures require this feature. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Josh Poimboeuf Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Jens Axboe Cc: Florian Weimer Cc: Sam James Link: https://lore.kernel.org/20250729182405.657072238@kernel.org Suggested-by: Peter Zijlstra Signed-off-by: Steven Rostedt (Google) --- kernel/unwind/deferred.c | 52 +++++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c index 2cbae2ada309..c5ac087d2396 100644 --- a/kernel/unwind/deferred.c +++ b/kernel/unwind/deferred.c @@ -12,6 +12,31 @@ #include #include +/* + * For requesting a deferred user space stack trace from NMI context + * the architecture must support a safe cmpxchg in NMI context. + * For those architectures that do not have that, then it cannot ask + * for a deferred user space stack trace from an NMI context. If it + * does, then it will get -EINVAL. + */ +#if defined(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) +# define CAN_USE_IN_NMI 1 +static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt) +{ + u32 old = 0; + + return try_cmpxchg(&info->id.cnt, &old, cnt); +} +#else +# define CAN_USE_IN_NMI 0 +/* When NMIs are not allowed, this always succeeds */ +static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt) +{ + info->id.cnt = cnt; + return true; +} +#endif + /* Make the cache fit in a 4K page */ #define UNWIND_MAX_ENTRIES \ ((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long)) @@ -42,14 +67,13 @@ static DEFINE_PER_CPU(u32, unwind_ctx_ctr); static u64 get_cookie(struct unwind_task_info *info) { u32 cnt = 1; - u32 old = 0; if (info->id.cpu) return info->id.id; /* LSB is always set to ensure 0 is an invalid value */ cnt |= __this_cpu_read(unwind_ctx_ctr) + 2; - if (try_cmpxchg(&info->id.cnt, &old, cnt)) { + if (try_assign_cnt(info, cnt)) { /* Update the per cpu counter */ __this_cpu_write(unwind_ctx_ctr, cnt); } @@ -167,31 +191,43 @@ static void unwind_deferred_task_work(struct callback_head *head) int unwind_deferred_request(struct unwind_work *work, u64 *cookie) { struct unwind_task_info *info = ¤t->unwind_info; + long pending; int ret; *cookie = 0; - if (WARN_ON_ONCE(in_nmi())) - return -EINVAL; - if ((current->flags & (PF_KTHREAD | PF_EXITING)) || !user_mode(task_pt_regs(current))) return -EINVAL; + /* + * NMI requires having safe cmpxchg operations. + * Trigger a warning to make it obvious that an architecture + * is using this in NMI when it should not be. + */ + if (WARN_ON_ONCE(!CAN_USE_IN_NMI && in_nmi())) + return -EINVAL; + guard(irqsave)(); *cookie = get_cookie(info); /* callback already pending? */ - if (info->pending) + pending = READ_ONCE(info->pending); + if (pending) + return 1; + + /* Claim the work unless an NMI just now swooped in to do so. */ + if (!try_cmpxchg(&info->pending, &pending, 1)) return 1; /* The work has been claimed, now schedule it. */ ret = task_work_add(current, &info->work, TWA_RESUME); - if (WARN_ON_ONCE(ret)) + if (WARN_ON_ONCE(ret)) { + WRITE_ONCE(info->pending, 0); return ret; + } - info->pending = 1; return 0; } From be3d526a5b34109cecf3bc23b96f0081ad600a5b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 29 Jul 2025 14:23:10 -0400 Subject: [PATCH 1293/2411] unwind deferred: Use bitmask to determine which callbacks to call In order to know which registered callback requested a stacktrace for when the task goes back to user space, add a bitmask to keep track of all registered tracers. The bitmask is the size of long, which means that on a 32 bit machine, it can have at most 32 registered tracers, and on 64 bit, it can have at most 64 registered tracers. This should not be an issue as there should not be more than 10 (unless BPF can abuse this?). When a tracer registers with unwind_deferred_init() it will get a bit number assigned to it. When a tracer requests a stacktrace, it will have its bit set within the task_struct. When the task returns back to user space, it will call the callbacks for all the registered tracers where their bits are set in the task's mask. When a tracer is removed by the unwind_deferred_cancel() all current tasks will clear the associated bit, just in case another tracer gets registered immediately afterward and then gets their callback called unexpectedly. To prevent live locks from happening if an event that happens between the task_work and when the task goes back to user space, triggers the deferred unwind, have the unwind_mask get cleared on exit to user space and not after the callback is made. Move the pending bit from a value on the task_struct to bit zero of the unwind_mask (saves space on the task_struct). This will allow modifying the pending bit along with the work bits atomically. Instead of clearing a work's bit after its callback is called, it is delayed until exit. If the work is requested again, the task_work is not queued again and the request will be notified that the task has already been called by returning a positive number (the same as if it was already pending). The pending bit is cleared before calling the callback functions but the current work bits remain. If one of the called works registers again, it will not trigger a task_work if its bit is still present in the task's unwind_mask. If a new work requests a deferred unwind, then it will set both the pending bit and its own bit. Note this will also cause any work that was previously queued and had their callback already executed to be executed again. Future work will remove these spurious callbacks. The use of atomic_long bit operations were suggested by Peter Zijlstra: Link: https://lore.kernel.org/all/20250715102912.GQ1613200@noisy.programming.kicks-ass.net/ The unwind_mask could not be converted to atomic_long_t do to atomic_long not having all the bit operations needed by unwind_mask. Instead it follows other use cases in the kernel and just typecasts the unwind_mask to atomic_long_t when using the two atomic_long functions. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Jens Axboe Cc: Florian Weimer Cc: Sam James Link: https://lore.kernel.org/20250729182405.822789300@kernel.org Signed-off-by: Steven Rostedt (Google) --- include/linux/unwind_deferred.h | 26 +++++++- include/linux/unwind_deferred_types.h | 2 +- kernel/unwind/deferred.c | 87 +++++++++++++++++++++------ 3 files changed, 92 insertions(+), 23 deletions(-) diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h index 14efd8c027aa..337ead927d4d 100644 --- a/include/linux/unwind_deferred.h +++ b/include/linux/unwind_deferred.h @@ -13,10 +13,19 @@ typedef void (*unwind_callback_t)(struct unwind_work *work, struct unwind_stackt struct unwind_work { struct list_head list; unwind_callback_t func; + int bit; }; #ifdef CONFIG_UNWIND_USER +enum { + UNWIND_PENDING_BIT = 0, +}; + +enum { + UNWIND_PENDING = BIT(UNWIND_PENDING_BIT), +}; + void unwind_task_init(struct task_struct *task); void unwind_task_free(struct task_struct *task); @@ -28,15 +37,26 @@ void unwind_deferred_cancel(struct unwind_work *work); static __always_inline void unwind_reset_info(void) { - if (unlikely(current->unwind_info.id.id)) + struct unwind_task_info *info = ¤t->unwind_info; + unsigned long bits; + + /* Was there any unwinding? */ + if (unlikely(info->unwind_mask)) { + bits = info->unwind_mask; + do { + /* Is a task_work going to run again before going back */ + if (bits & UNWIND_PENDING) + return; + } while (!try_cmpxchg(&info->unwind_mask, &bits, 0UL)); current->unwind_info.id.id = 0; + } /* * As unwind_user_faultable() can be called directly and * depends on nr_entries being cleared on exit to user, * this needs to be a separate conditional. */ - if (unlikely(current->unwind_info.cache)) - current->unwind_info.cache->nr_entries = 0; + if (unlikely(info->cache)) + info->cache->nr_entries = 0; } #else /* !CONFIG_UNWIND_USER */ diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h index 104c477d5609..5dc9cda141ff 100644 --- a/include/linux/unwind_deferred_types.h +++ b/include/linux/unwind_deferred_types.h @@ -29,10 +29,10 @@ union unwind_task_id { }; struct unwind_task_info { + unsigned long unwind_mask; struct unwind_cache *cache; struct callback_head work; union unwind_task_id id; - int pending; }; #endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */ diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c index c5ac087d2396..e19f02ef416d 100644 --- a/kernel/unwind/deferred.c +++ b/kernel/unwind/deferred.c @@ -45,6 +45,16 @@ static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt) static DEFINE_MUTEX(callback_mutex); static LIST_HEAD(callbacks); +#define RESERVED_BITS (UNWIND_PENDING) + +/* Zero'd bits are available for assigning callback users */ +static unsigned long unwind_mask = RESERVED_BITS; + +static inline bool unwind_pending(struct unwind_task_info *info) +{ + return test_bit(UNWIND_PENDING_BIT, &info->unwind_mask); +} + /* * This is a unique percpu identifier for a given task entry context. * Conceptually, it's incremented every time the CPU enters the kernel from @@ -138,14 +148,15 @@ static void unwind_deferred_task_work(struct callback_head *head) struct unwind_task_info *info = container_of(head, struct unwind_task_info, work); struct unwind_stacktrace trace; struct unwind_work *work; + unsigned long bits; u64 cookie; - if (WARN_ON_ONCE(!info->pending)) + if (WARN_ON_ONCE(!unwind_pending(info))) return; - /* Allow work to come in again */ - WRITE_ONCE(info->pending, 0); - + /* Clear pending bit but make sure to have the current bits */ + bits = atomic_long_fetch_andnot(UNWIND_PENDING, + (atomic_long_t *)&info->unwind_mask); /* * From here on out, the callback must always be called, even if it's * just an empty trace. @@ -159,7 +170,8 @@ static void unwind_deferred_task_work(struct callback_head *head) guard(mutex)(&callback_mutex); list_for_each_entry(work, &callbacks, list) { - work->func(work, &trace, cookie); + if (test_bit(work->bit, &bits)) + work->func(work, &trace, cookie); } } @@ -183,15 +195,16 @@ static void unwind_deferred_task_work(struct callback_head *head) * because it has already been previously called for the same entry context, * it will be called again with the same stack trace and cookie. * - * Return: 1 if the the callback was already queued. - * 0 if the callback successfully was queued. + * Return: 0 if the callback successfully was queued. + * 1 if the callback is pending or was already executed. * Negative if there's an error. * @cookie holds the cookie of the first request by any user */ int unwind_deferred_request(struct unwind_work *work, u64 *cookie) { struct unwind_task_info *info = ¤t->unwind_info; - long pending; + unsigned long old, bits; + unsigned long bit = BIT(work->bit); int ret; *cookie = 0; @@ -212,32 +225,59 @@ int unwind_deferred_request(struct unwind_work *work, u64 *cookie) *cookie = get_cookie(info); - /* callback already pending? */ - pending = READ_ONCE(info->pending); - if (pending) + old = READ_ONCE(info->unwind_mask); + + /* Is this already queued or executed */ + if (old & bit) return 1; - /* Claim the work unless an NMI just now swooped in to do so. */ - if (!try_cmpxchg(&info->pending, &pending, 1)) - return 1; + /* + * This work's bit hasn't been set yet. Now set it with the PENDING + * bit and fetch the current value of unwind_mask. If ether the + * work's bit or PENDING was already set, then this is already queued + * to have a callback. + */ + bits = UNWIND_PENDING | bit; + old = atomic_long_fetch_or(bits, (atomic_long_t *)&info->unwind_mask); + if (old & bits) { + /* + * If the work's bit was set, whatever set it had better + * have also set pending and queued a callback. + */ + WARN_ON_ONCE(!(old & UNWIND_PENDING)); + return old & bit; + } /* The work has been claimed, now schedule it. */ ret = task_work_add(current, &info->work, TWA_RESUME); - if (WARN_ON_ONCE(ret)) { - WRITE_ONCE(info->pending, 0); - return ret; - } - return 0; + if (WARN_ON_ONCE(ret)) + WRITE_ONCE(info->unwind_mask, 0); + + return ret; } void unwind_deferred_cancel(struct unwind_work *work) { + struct task_struct *g, *t; + if (!work) return; + /* No work should be using a reserved bit */ + if (WARN_ON_ONCE(BIT(work->bit) & RESERVED_BITS)) + return; + guard(mutex)(&callback_mutex); list_del(&work->list); + + __clear_bit(work->bit, &unwind_mask); + + guard(rcu)(); + /* Clear this bit from all threads */ + for_each_process_thread(g, t) { + clear_bit(work->bit, &t->unwind_info.unwind_mask); + } } int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) @@ -245,6 +285,14 @@ int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) memset(work, 0, sizeof(*work)); guard(mutex)(&callback_mutex); + + /* See if there's a bit in the mask available */ + if (unwind_mask == ~0UL) + return -EBUSY; + + work->bit = ffz(unwind_mask); + __set_bit(work->bit, &unwind_mask); + list_add(&work->list, &callbacks); work->func = func; return 0; @@ -256,6 +304,7 @@ void unwind_task_init(struct task_struct *task) memset(info, 0, sizeof(*info)); init_task_work(&info->work, unwind_deferred_task_work); + info->unwind_mask = 0; } void unwind_task_free(struct task_struct *task) From 4c75133e745aa95636c9ccbab1603ed363dabcd4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 29 Jul 2025 14:23:11 -0400 Subject: [PATCH 1294/2411] unwind deferred: Add unwind_completed mask to stop spurious callbacks If there's more than one registered tracer to the unwind deferred infrastructure, it is currently possible that one tracer could cause extra callbacks to happen for another tracer if the former requests a deferred stacktrace after the latter's callback was executed and before the task went back to user space. Here's an example of how this could occur: [Task enters kernel] tracer 1 request -> add cookie to its buffer tracer 1 request -> add cookie to its buffer <..> [ task work executes ] tracer 1 callback -> add trace + cookie to its buffer [tracer 2 requests and triggers the task work again] [ task work executes again ] tracer 1 callback -> add trace + cookie to its buffer tracer 2 callback -> add trace + cookie to its buffer [Task exits back to user space] This is because the bit for tracer 1 gets set in the task's unwind_mask when it did its request and does not get cleared until the task returns back to user space. But if another tracer were to request another deferred stacktrace, then the next task work will executed all tracer's callbacks that have their bits set in the task's unwind_mask. To fix this issue, add another mask called unwind_completed and place it into the task's info->cache structure. The cache structure is allocated on the first occurrence of a deferred stacktrace and this unwind_completed mask is not needed until then. It's better to have it in the cache than to permanently waste space in the task_struct. After a tracer's callback is executed, it's bit gets set in this unwind_completed mask. When the task_work enters, it will AND the task's unwind_mask with the inverse of the unwind_completed which will eliminate any work that already had its callback executed since the task entered the kernel. When the task leaves the kernel, it will reset this unwind_completed mask just like it resets the other values as it enters user space. Link: https://lore.kernel.org/all/20250716142609.47f0e4a5@batman.local.home/ Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Jens Axboe Cc: Florian Weimer Cc: Sam James Link: https://lore.kernel.org/20250729182405.989222722@kernel.org Signed-off-by: Steven Rostedt (Google) --- include/linux/unwind_deferred.h | 4 +++- include/linux/unwind_deferred_types.h | 1 + kernel/unwind/deferred.c | 19 +++++++++++++++---- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h index 337ead927d4d..b9ec4c8515c7 100644 --- a/include/linux/unwind_deferred.h +++ b/include/linux/unwind_deferred.h @@ -55,8 +55,10 @@ static __always_inline void unwind_reset_info(void) * depends on nr_entries being cleared on exit to user, * this needs to be a separate conditional. */ - if (unlikely(info->cache)) + if (unlikely(info->cache)) { info->cache->nr_entries = 0; + info->cache->unwind_completed = 0; + } } #else /* !CONFIG_UNWIND_USER */ diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h index 5dc9cda141ff..33b62ac25c86 100644 --- a/include/linux/unwind_deferred_types.h +++ b/include/linux/unwind_deferred_types.h @@ -3,6 +3,7 @@ #define _LINUX_UNWIND_USER_DEFERRED_TYPES_H struct unwind_cache { + unsigned long unwind_completed; unsigned int nr_entries; unsigned long entries[]; }; diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c index e19f02ef416d..a3d26014a2e6 100644 --- a/kernel/unwind/deferred.c +++ b/kernel/unwind/deferred.c @@ -166,12 +166,18 @@ static void unwind_deferred_task_work(struct callback_head *head) unwind_user_faultable(&trace); + if (info->cache) + bits &= ~(info->cache->unwind_completed); + cookie = info->id.id; guard(mutex)(&callback_mutex); list_for_each_entry(work, &callbacks, list) { - if (test_bit(work->bit, &bits)) + if (test_bit(work->bit, &bits)) { work->func(work, &trace, cookie); + if (info->cache) + info->cache->unwind_completed |= BIT(work->bit); + } } } @@ -260,23 +266,28 @@ int unwind_deferred_request(struct unwind_work *work, u64 *cookie) void unwind_deferred_cancel(struct unwind_work *work) { struct task_struct *g, *t; + int bit; if (!work) return; + bit = work->bit; + /* No work should be using a reserved bit */ - if (WARN_ON_ONCE(BIT(work->bit) & RESERVED_BITS)) + if (WARN_ON_ONCE(BIT(bit) & RESERVED_BITS)) return; guard(mutex)(&callback_mutex); list_del(&work->list); - __clear_bit(work->bit, &unwind_mask); + __clear_bit(bit, &unwind_mask); guard(rcu)(); /* Clear this bit from all threads */ for_each_process_thread(g, t) { - clear_bit(work->bit, &t->unwind_info.unwind_mask); + clear_bit(bit, &t->unwind_info.unwind_mask); + if (t->unwind_info.cache) + clear_bit(bit, &t->unwind_info.cache->unwind_completed); } } From 858fa8a3b083e862114bb6483b9fb50b3e2bc4c3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 29 Jul 2025 14:23:12 -0400 Subject: [PATCH 1295/2411] unwind: Add USED bit to only have one conditional on way back to user space On the way back to user space, the function unwind_reset_info() is called unconditionally (but always inlined). It currently has two conditionals. One that checks the unwind_mask which is set whenever a deferred trace is called and is used to know that the mask needs to be cleared. The other checks if the cache has been allocated, and if so, it resets the nr_entries so that the unwinder knows it needs to do the work to get a new user space stack trace again (it only does it once per entering the kernel). Use one of the bits in the unwind mask as a "USED" bit that gets set whenever a trace is created. This will make it possible to only check the unwind_mask in the unwind_reset_info() to know if it needs to do work or not and eliminates a conditional that happens every time the task goes back to user space. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Jens Axboe Cc: Florian Weimer Cc: Sam James Link: https://lore.kernel.org/20250729182406.155422551@kernel.org Signed-off-by: Steven Rostedt (Google) --- include/linux/unwind_deferred.h | 18 +++++++++--------- kernel/unwind/deferred.c | 5 ++++- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h index b9ec4c8515c7..2efbda01e959 100644 --- a/include/linux/unwind_deferred.h +++ b/include/linux/unwind_deferred.h @@ -20,10 +20,14 @@ struct unwind_work { enum { UNWIND_PENDING_BIT = 0, + UNWIND_USED_BIT, }; enum { UNWIND_PENDING = BIT(UNWIND_PENDING_BIT), + + /* Set if the unwinding was used (directly or deferred) */ + UNWIND_USED = BIT(UNWIND_USED_BIT) }; void unwind_task_init(struct task_struct *task); @@ -49,15 +53,11 @@ static __always_inline void unwind_reset_info(void) return; } while (!try_cmpxchg(&info->unwind_mask, &bits, 0UL)); current->unwind_info.id.id = 0; - } - /* - * As unwind_user_faultable() can be called directly and - * depends on nr_entries being cleared on exit to user, - * this needs to be a separate conditional. - */ - if (unlikely(info->cache)) { - info->cache->nr_entries = 0; - info->cache->unwind_completed = 0; + + if (unlikely(info->cache)) { + info->cache->nr_entries = 0; + info->cache->unwind_completed = 0; + } } } diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c index a3d26014a2e6..2311b725d691 100644 --- a/kernel/unwind/deferred.c +++ b/kernel/unwind/deferred.c @@ -45,7 +45,7 @@ static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt) static DEFINE_MUTEX(callback_mutex); static LIST_HEAD(callbacks); -#define RESERVED_BITS (UNWIND_PENDING) +#define RESERVED_BITS (UNWIND_PENDING | UNWIND_USED) /* Zero'd bits are available for assigning callback users */ static unsigned long unwind_mask = RESERVED_BITS; @@ -140,6 +140,9 @@ int unwind_user_faultable(struct unwind_stacktrace *trace) cache->nr_entries = trace->nr; + /* Clear nr_entries on way back to user space */ + set_bit(UNWIND_USED_BIT, &info->unwind_mask); + return 0; } From 357eda2d745054eb737397368bc9b0f84814b0a5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 29 Jul 2025 14:23:13 -0400 Subject: [PATCH 1296/2411] unwind deferred: Use SRCU unwind_deferred_task_work() Instead of using the callback_mutex to protect the link list of callbacks in unwind_deferred_task_work(), use SRCU instead. This gets called every time a task exits that has to record a stack trace that was requested. This can happen for many tasks on several CPUs at the same time. A mutex is a bottleneck and can cause a bit of contention and slow down performance. As the callbacks themselves are allowed to sleep, regular RCU cannot be used to protect the list. Instead use SRCU, as that still allows the callbacks to sleep and the list can be read without needing to hold the callback_mutex. Link: https://lore.kernel.org/all/ca9bd83a-6c80-4ee0-a83c-224b9d60b755@efficios.com/ Cc: "Paul E. McKenney" Cc: Masami Hiramatsu Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Jens Axboe Cc: Florian Weimer Cc: Sam James Link: https://lore.kernel.org/20250729182406.331548065@kernel.org Suggested-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- kernel/unwind/deferred.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c index 2311b725d691..a5ef1c1f915e 100644 --- a/kernel/unwind/deferred.c +++ b/kernel/unwind/deferred.c @@ -41,7 +41,7 @@ static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt) #define UNWIND_MAX_ENTRIES \ ((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long)) -/* Guards adding to and reading the list of callbacks */ +/* Guards adding to or removing from the list of callbacks */ static DEFINE_MUTEX(callback_mutex); static LIST_HEAD(callbacks); @@ -49,6 +49,7 @@ static LIST_HEAD(callbacks); /* Zero'd bits are available for assigning callback users */ static unsigned long unwind_mask = RESERVED_BITS; +DEFINE_STATIC_SRCU(unwind_srcu); static inline bool unwind_pending(struct unwind_task_info *info) { @@ -174,8 +175,9 @@ static void unwind_deferred_task_work(struct callback_head *head) cookie = info->id.id; - guard(mutex)(&callback_mutex); - list_for_each_entry(work, &callbacks, list) { + guard(srcu)(&unwind_srcu); + list_for_each_entry_srcu(work, &callbacks, list, + srcu_read_lock_held(&unwind_srcu)) { if (test_bit(work->bit, &bits)) { work->func(work, &trace, cookie); if (info->cache) @@ -213,7 +215,7 @@ int unwind_deferred_request(struct unwind_work *work, u64 *cookie) { struct unwind_task_info *info = ¤t->unwind_info; unsigned long old, bits; - unsigned long bit = BIT(work->bit); + unsigned long bit; int ret; *cookie = 0; @@ -230,6 +232,14 @@ int unwind_deferred_request(struct unwind_work *work, u64 *cookie) if (WARN_ON_ONCE(!CAN_USE_IN_NMI && in_nmi())) return -EINVAL; + /* Do not allow cancelled works to request again */ + bit = READ_ONCE(work->bit); + if (WARN_ON_ONCE(bit < 0)) + return -EINVAL; + + /* Only need the mask now */ + bit = BIT(bit); + guard(irqsave)(); *cookie = get_cookie(info); @@ -281,10 +291,15 @@ void unwind_deferred_cancel(struct unwind_work *work) return; guard(mutex)(&callback_mutex); - list_del(&work->list); + list_del_rcu(&work->list); + + /* Do not allow any more requests and prevent callbacks */ + work->bit = -1; __clear_bit(bit, &unwind_mask); + synchronize_srcu(&unwind_srcu); + guard(rcu)(); /* Clear this bit from all threads */ for_each_process_thread(g, t) { @@ -307,7 +322,7 @@ int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) work->bit = ffz(unwind_mask); __set_bit(work->bit, &unwind_mask); - list_add(&work->list, &callbacks); + list_add_rcu(&work->list, &callbacks); work->func = func; return 0; } From b3b9cb11aa034cfa9eb880bb9bb3d5aaf732e479 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 29 Jul 2025 14:23:14 -0400 Subject: [PATCH 1297/2411] unwind: Finish up unwind when a task exits On do_exit() when a task is exiting, if a unwind is requested and the deferred user stacktrace is deferred via the task_work, the task_work callback is called after exit_mm() is called in do_exit(). This means that the user stack trace will not be retrieved and an empty stack is created. Instead, add a function unwind_deferred_task_exit() and call it just before exit_mm() so that the unwinder can call the requested callbacks with the user space stack. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Jens Axboe Cc: Florian Weimer Cc: Sam James Link: https://lore.kernel.org/20250729182406.504259474@kernel.org Signed-off-by: Steven Rostedt (Google) --- include/linux/unwind_deferred.h | 3 +++ kernel/exit.c | 2 ++ kernel/unwind/deferred.c | 23 ++++++++++++++++++++--- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h index 2efbda01e959..26122d00708a 100644 --- a/include/linux/unwind_deferred.h +++ b/include/linux/unwind_deferred.h @@ -39,6 +39,8 @@ int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func); int unwind_deferred_request(struct unwind_work *work, u64 *cookie); void unwind_deferred_cancel(struct unwind_work *work); +void unwind_deferred_task_exit(struct task_struct *task); + static __always_inline void unwind_reset_info(void) { struct unwind_task_info *info = ¤t->unwind_info; @@ -71,6 +73,7 @@ static inline int unwind_deferred_init(struct unwind_work *work, unwind_callback static inline int unwind_deferred_request(struct unwind_work *work, u64 *timestamp) { return -ENOSYS; } static inline void unwind_deferred_cancel(struct unwind_work *work) {} +static inline void unwind_deferred_task_exit(struct task_struct *task) {} static inline void unwind_reset_info(void) {} #endif /* !CONFIG_UNWIND_USER */ diff --git a/kernel/exit.c b/kernel/exit.c index bb184a67ac73..1d8c8ac33c4f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -68,6 +68,7 @@ #include #include #include +#include #include #include @@ -938,6 +939,7 @@ void __noreturn do_exit(long code) tsk->exit_code = code; taskstats_exit(tsk, group_dead); + unwind_deferred_task_exit(tsk); trace_sched_process_exit(tsk, group_dead); /* diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c index a5ef1c1f915e..dc6040aae3ee 100644 --- a/kernel/unwind/deferred.c +++ b/kernel/unwind/deferred.c @@ -114,7 +114,7 @@ int unwind_user_faultable(struct unwind_stacktrace *trace) /* Should always be called from faultable context */ might_fault(); - if (current->flags & PF_EXITING) + if (!current->mm) return -EINVAL; if (!info->cache) { @@ -147,9 +147,9 @@ int unwind_user_faultable(struct unwind_stacktrace *trace) return 0; } -static void unwind_deferred_task_work(struct callback_head *head) +static void process_unwind_deferred(struct task_struct *task) { - struct unwind_task_info *info = container_of(head, struct unwind_task_info, work); + struct unwind_task_info *info = &task->unwind_info; struct unwind_stacktrace trace; struct unwind_work *work; unsigned long bits; @@ -186,6 +186,23 @@ static void unwind_deferred_task_work(struct callback_head *head) } } +static void unwind_deferred_task_work(struct callback_head *head) +{ + process_unwind_deferred(current); +} + +void unwind_deferred_task_exit(struct task_struct *task) +{ + struct unwind_task_info *info = ¤t->unwind_info; + + if (!unwind_pending(info)) + return; + + process_unwind_deferred(task); + + task_work_cancel(task, &info->work); +} + /** * unwind_deferred_request - Request a user stacktrace on task kernel exit * @work: Unwind descriptor requesting the trace From 55a0fbd2ac3fe8f61a30ea697b2eb3034f6778c8 Mon Sep 17 00:00:00 2001 From: LongPing Wei Date: Wed, 30 Jul 2025 14:17:19 +0800 Subject: [PATCH 1298/2411] dm: set DM_TARGET_PASSES_CRYPTO feature for dm-thin dm-thin obviously can pass through inline crypto support. Signed-off-by: LongPing Wei Signed-off-by: Mikulas Patocka --- drivers/md/dm-thin.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 05cf4e3f2bbe..007bb93e5fca 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -4111,8 +4111,8 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | - DM_TARGET_IMMUTABLE, - .version = {1, 23, 0}, + DM_TARGET_IMMUTABLE | DM_TARGET_PASSES_CRYPTO, + .version = {1, 24, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -4497,7 +4497,8 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type thin_target = { .name = "thin", - .version = {1, 23, 0}, + .features = DM_TARGET_PASSES_CRYPTO, + .version = {1, 24, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, From e9df1755485dd90a89656e8a21ec4d71c909fa30 Mon Sep 17 00:00:00 2001 From: "noble.yang" Date: Thu, 31 Jul 2025 19:06:14 +0800 Subject: [PATCH 1299/2411] ALSA: usb-audio: Add DSD support for Comtrue USB Audio device The vendor Comtrue Inc. (0x2fc6) produces USB audio chipsets like the CT7601 which are capable of Native DSD playback. This patch adds QUIRK_FLAG_DSD_RAW for Comtrue (VID 0x2fc6), which enables native DSD playback (DSD_U32_LE) on their USB Audio device. This has been verified under Ubuntu 25.04 with JRiver. Signed-off-by: noble.yang Link: https://patch.msgid.link/20250731110614.4070-1-noble228@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index bd24f3a78ea9..e75b0b1df6eb 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2408,6 +2408,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_DSD_RAW), VENDOR_FLG(0x2d87, /* Cayin device */ QUIRK_FLAG_DSD_RAW), + VENDOR_FLG(0x2fc6, /* Comture-inc devices */ + QUIRK_FLAG_DSD_RAW), VENDOR_FLG(0x3336, /* HEM devices */ QUIRK_FLAG_DSD_RAW), VENDOR_FLG(0x3353, /* Khadas devices */ From 6260da046819b7bda828bacae148fc8856fdebd7 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Thu, 31 Jul 2025 18:02:22 +0800 Subject: [PATCH 1300/2411] selftests: ALSA: fix memory leak in utimer test Free the malloc'd buffer in TEST_F(timer_f, utimer) to prevent memory leak. Fixes: 1026392d10af ("selftests: ALSA: Cover userspace-driven timers with test") Reported-by: Jun Zhan Signed-off-by: WangYuli Link: https://patch.msgid.link/DE4D931FCF54F3DB+20250731100222.65748-1-wangyuli@uniontech.com Signed-off-by: Takashi Iwai --- tools/testing/selftests/alsa/utimer-test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/alsa/utimer-test.c b/tools/testing/selftests/alsa/utimer-test.c index 32ee3ce57721..37964f311a33 100644 --- a/tools/testing/selftests/alsa/utimer-test.c +++ b/tools/testing/selftests/alsa/utimer-test.c @@ -135,6 +135,7 @@ TEST_F(timer_f, utimer) { pthread_join(ticking_thread, NULL); ASSERT_EQ(total_ticks, TICKS_COUNT); pclose(rfp); + free(buf); } TEST(wrong_timers_test) { From 80d2a9eb9af399fe60a6d0dddab10d75364698b8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 31 Jul 2025 11:11:05 +0200 Subject: [PATCH 1301/2411] arm: Update HD-audio configs again The Realtek and HDMI HD-audio codec configs have been slightly updated again since the previous change. Follow the new kconfig changes for multi_v7_defconfig and tegra_defconfig, and add a few other configs for HDMI codecs, too. Fixes: 1d8dd982c409 ("ALSA: hda/realtek: Enable drivers as default") Fixes: 81231ad173d8 ("ALSA: hda/hdmi: Enable drivers as default") Cc: linux-arm-kernel@lists.infradead.org Cc: linux-tegra@vger.kernel.org Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250731091109.16901-2-tiwai@suse.de --- arch/arm/configs/multi_v7_defconfig | 4 +++- arch/arm/configs/tegra_defconfig | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 02ddd7ce9e3e..3df90c4b30b1 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -791,10 +791,12 @@ CONFIG_SND=m CONFIG_SND_HDA_TEGRA=m CONFIG_SND_HDA_INPUT_BEEP=y CONFIG_SND_HDA_PATCH_LOADER=y -CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_CODEC_REALTEK=m CONFIG_SND_HDA_CODEC_REALTEK_LIB=m CONFIG_SND_HDA_CODEC_ALC269=m CONFIG_SND_HDA_CODEC_HDMI=m +CONFIG_SND_HDA_CODEC_HDMI_GENERIC=m +CONFIG_SND_HDA_CODEC_HDMI_NVIDIA=m CONFIG_SND_HDA_CODEC_HDMI_TEGRA=m CONFIG_SND_USB_AUDIO=m CONFIG_SND_SOC=m diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 3a9bda2bf422..ba863b445417 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -225,7 +225,12 @@ CONFIG_SND_HDA_TEGRA=y CONFIG_SND_HDA_INPUT_BEEP=y CONFIG_SND_HDA_PATCH_LOADER=y CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_CODEC_REALTEK_LIB=y +CONFIG_SND_HDA_CODEC_ALC269=y CONFIG_SND_HDA_CODEC_HDMI=y +CONFIG_SND_HDA_CODEC_HDMI_GENERIC=y +CONFIG_SND_HDA_CODEC_HDMI_NVIDIA=y +CONFIG_SND_HDA_CODEC_HDMI_TEGRA=y # CONFIG_SND_ARM is not set # CONFIG_SND_SPI is not set # CONFIG_SND_USB is not set From 1e7e0a2df77d919a3c1a58b8a4efd818a1895bd2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 31 Jul 2025 11:11:06 +0200 Subject: [PATCH 1302/2411] LoongArch: Update HD-audio codec configs The HD-audio codec driver configs have been updated again the drivers got split with different kconfigs. Enable all Realtek HD-audio codecs and HDMI codecs (except for NVIDIA_MCP and TEGRA) per request. Fixes: 1d8dd982c409 ("ALSA: hda/realtek: Enable drivers as default") Fixes: 81231ad173d8 ("ALSA: hda/hdmi: Enable drivers as default") Cc: loongarch@lists.linux.dev Reviewed-by: Huacai Chen Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250731091109.16901-3-tiwai@suse.de --- arch/loongarch/configs/loongson3_defconfig | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 0d59af6007b7..7993020ffebb 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -784,8 +784,23 @@ CONFIG_SND_HDA_HWDEP=y CONFIG_SND_HDA_INPUT_BEEP=y CONFIG_SND_HDA_PATCH_LOADER=y CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_CODEC_REALTEK_LIB=y +CONFIG_SND_HDA_CODEC_ALC260=y +CONFIG_SND_HDA_CODEC_ALC262=y +CONFIG_SND_HDA_CODEC_ALC268=y +CONFIG_SND_HDA_CODEC_ALC269=y +CONFIG_SND_HDA_CODEC_ALC662=y +CONFIG_SND_HDA_CODEC_ALC680=y +CONFIG_SND_HDA_CODEC_ALC861=y +CONFIG_SND_HDA_CODEC_ALC861VD=y +CONFIG_SND_HDA_CODEC_ALC880=y +CONFIG_SND_HDA_CODEC_ALC882=y CONFIG_SND_HDA_CODEC_SIGMATEL=y CONFIG_SND_HDA_CODEC_HDMI=y +CONFIG_SND_HDA_CODEC_HDMI_GENERIC=y +CONFIG_SND_HDA_CODEC_HDMI_INTEL=y +CONFIG_SND_HDA_CODEC_HDMI_ATI=y +CONFIG_SND_HDA_CODEC_HDMI_NVIDIA=y CONFIG_SND_HDA_CODEC_CONEXANT=y CONFIG_SND_USB_AUDIO=m CONFIG_SND_SOC=m From 5e0753df9623559542404e167172ba97e412f45e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 31 Jul 2025 11:11:07 +0200 Subject: [PATCH 1303/2411] mips: Update HD-audio configs again The HD-audio codec driver configs have been updated again since the previous change. Correct the types and enable all Realtek HD-audio codecs for loongson, per request. Fixes: 1d8dd982c409 ("ALSA: hda/realtek: Enable drivers as default") Fixes: 81231ad173d8 ("ALSA: hda/hdmi: Enable drivers as default") Cc: linux-mips@vger.kernel.org Reviewed-by: Huacai Chen Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250731091109.16901-4-tiwai@suse.de --- arch/mips/configs/loongson2k_defconfig | 11 +++++++++++ arch/mips/configs/loongson3_defconfig | 15 ++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/arch/mips/configs/loongson2k_defconfig b/arch/mips/configs/loongson2k_defconfig index 4b7f914d01d0..7a86632b87e9 100644 --- a/arch/mips/configs/loongson2k_defconfig +++ b/arch/mips/configs/loongson2k_defconfig @@ -257,6 +257,17 @@ CONFIG_SND_HDA_INTEL=y CONFIG_SND_HDA_HWDEP=y CONFIG_SND_HDA_PATCH_LOADER=y CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_CODEC_REALTEK_LIB=y +CONFIG_SND_HDA_CODEC_ALC260=y +CONFIG_SND_HDA_CODEC_ALC262=y +CONFIG_SND_HDA_CODEC_ALC268=y +CONFIG_SND_HDA_CODEC_ALC269=y +CONFIG_SND_HDA_CODEC_ALC662=y +CONFIG_SND_HDA_CODEC_ALC680=y +CONFIG_SND_HDA_CODEC_ALC861=y +CONFIG_SND_HDA_CODEC_ALC861VD=y +CONFIG_SND_HDA_CODEC_ALC880=y +CONFIG_SND_HDA_CODEC_ALC882=y CONFIG_SND_HDA_CODEC_ANALOG=y CONFIG_SND_HDA_CODEC_SIGMATEL=y CONFIG_SND_HDA_CODEC_VIA=y diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index 5ff0c1554168..315ab7ec17b6 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -292,11 +292,24 @@ CONFIG_SND_SEQ_DUMMY=m # CONFIG_SND_ISA is not set CONFIG_SND_HDA_INTEL=m CONFIG_SND_HDA_PATCH_LOADER=y -CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_CODEC_REALTEK=m CONFIG_SND_HDA_CODEC_REALTEK_LIB=m +CONFIG_SND_HDA_CODEC_ALC260=m +CONFIG_SND_HDA_CODEC_ALC262=m +CONFIG_SND_HDA_CODEC_ALC268=m CONFIG_SND_HDA_CODEC_ALC269=m +CONFIG_SND_HDA_CODEC_ALC662=m +CONFIG_SND_HDA_CODEC_ALC680=m +CONFIG_SND_HDA_CODEC_ALC861=m +CONFIG_SND_HDA_CODEC_ALC861VD=m +CONFIG_SND_HDA_CODEC_ALC880=m +CONFIG_SND_HDA_CODEC_ALC882=m CONFIG_SND_HDA_CODEC_SIGMATEL=m CONFIG_SND_HDA_CODEC_HDMI=m +CONFIG_SND_HDA_CODEC_HDMI_GENERIC=m +CONFIG_SND_HDA_CODEC_HDMI_INTEL=m +CONFIG_SND_HDA_CODEC_HDMI_ATI=m +CONFIG_SND_HDA_CODEC_HDMI_NVIDIA=m CONFIG_SND_HDA_CODEC_CONEXANT=m # CONFIG_SND_USB is not set CONFIG_HIDRAW=y From df485a4b2b3ee5b35c80f990beb554e38a8a5fb1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 31 Jul 2025 07:37:08 +0200 Subject: [PATCH 1304/2411] ALSA: usb: scarlett2: Fix missing NULL check scarlett2_input_select_ctl_info() sets up the string arrays allocated via kasprintf(), but it misses NULL checks, which may lead to NULL dereference Oops. Let's add the proper NULL check. Fixes: 8eba063b5b2b ("ALSA: scarlett2: Simplify linked channel handling") Link: https://patch.msgid.link/20250731053714.29414-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett2.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/usb/mixer_scarlett2.c b/sound/usb/mixer_scarlett2.c index 15bbdafc4894..1ec203cbbd94 100644 --- a/sound/usb/mixer_scarlett2.c +++ b/sound/usb/mixer_scarlett2.c @@ -3978,8 +3978,13 @@ static int scarlett2_input_select_ctl_info( goto unlock; /* Loop through each input */ - for (i = 0; i < inputs; i++) + for (i = 0; i < inputs; i++) { values[i] = kasprintf(GFP_KERNEL, "Input %d", i + 1); + if (!values[i]) { + err = -ENOMEM; + goto unlock; + } + } err = snd_ctl_enum_info(uinfo, 1, i, (const char * const *)values); From c36049da6c903b732f238eb6fd13c2051fac96cd Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 31 Jul 2025 18:18:02 +0200 Subject: [PATCH 1305/2411] arm64: tegra: Remove numa-node-id properties These were initially added because some software was checking for their presence. However, the device is not NUMA, so adding these is wrong and hence they should be removed. Signed-off-by: Thierry Reding --- arch/arm64/boot/dts/nvidia/tegra264.dtsi | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra264.dtsi b/arch/arm64/boot/dts/nvidia/tegra264.dtsi index 62c87a387b14..e02659efa233 100644 --- a/arch/arm64/boot/dts/nvidia/tegra264.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra264.dtsi @@ -11,7 +11,6 @@ / { interrupt-parent = <&gic>; #address-cells = <2>; #size-cells = <2>; - numa-node-id = <0>; reserved-memory { #address-cells = <2>; @@ -341,7 +340,6 @@ cpu0: cpu@0 { status = "okay"; enable-method = "psci"; - numa-node-id = <0>; i-cache-size = <65536>; i-cache-line-size = <64>; @@ -358,7 +356,6 @@ cpu1: cpu@1 { status = "okay"; enable-method = "psci"; - numa-node-id = <0>; i-cache-size = <65536>; i-cache-line-size = <64>; From 123b7c7c2ba725daf3bfa5ce421d65b92cb5c075 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 23 Jul 2025 15:39:12 +0200 Subject: [PATCH 1306/2411] s390/ap: Unmask SLCF bit in card and queue ap functions sysfs The SLCF bit ("stateless command filtering") introduced with CEX8 cards was because of the function mask's default value suppressed when user space read the ap function for an AP card or queue. Unmask this bit so that user space applications like lszcrypt can evaluate and list this feature. Fixes: d4c53ae8e494 ("s390/ap: store TAPQ hwinfo in struct ap_card") Signed-off-by: Harald Freudenberger Reviewed-by: Holger Dengler Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/ap.h | 2 +- drivers/s390/crypto/ap_bus.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index 395b02d6a133..352108727d7e 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -103,7 +103,7 @@ struct ap_tapq_hwinfo { unsigned int accel : 1; /* A */ unsigned int ep11 : 1; /* X */ unsigned int apxa : 1; /* APXA */ - unsigned int : 1; + unsigned int slcf : 1; /* Cmd filtering avail. */ unsigned int class : 8; unsigned int bs : 2; /* SE bind/assoc */ unsigned int : 14; diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 88b625ba1978..4b7ffa840563 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -180,7 +180,7 @@ struct ap_card { atomic64_t total_request_count; /* # requests ever for this AP device.*/ }; -#define TAPQ_CARD_HWINFO_MASK 0xFEFF0000FFFF0F0FUL +#define TAPQ_CARD_HWINFO_MASK 0xFFFF0000FFFF0F0FUL #define ASSOC_IDX_INVALID 0x10000 #define to_ap_card(x) container_of((x), struct ap_card, ap_dev.device) From 56f4cfab1c93b14da422cdcd23898eb008033696 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 29 Jul 2025 14:24:36 +0200 Subject: [PATCH 1307/2411] s390/mm: Set high_memory at the end of the identity mapping The value of high_memory variable is set by set_high_memory() function to a value returned by memblock_end_of_DRAM(). The latter function returns by default the upper bound of the last online memory block, not the upper bound of the directly mapped memory region. As result, in case the end of memory happens to be offline, high_memory variable is set to a value that is short on the last offline memory blocks size: RANGE SIZE STATE REMOVABLE BLOCK 0x0000000000000000-0x000000ffffffffff 1T online yes 0-511 0x0000010000000000-0x0000011fffffffff 128G offline 512-575 Memory block size: 2G Total online memory: 1T Total offline memory: 128G crash> p/x vm_layout $1 = { kaslr_offset = 0x3453e918000, kaslr_offset_phys = 0xa534218000, identity_base = 0x0, identity_size = 0x12000000000 } crash> p/x high_memory $2 = 0x10000000000 In the past the value of high_memory was derived from max_low_pfn, which in turn was derived from the identity_size. Since identity_size accommodates the whole memory size - including tailing offline blocks, the offlined blocks did not impose any problem. But since commit e120d1bc12da ("arch, mm: set high_memory in free_area_init()") the value of high_memory is derived from the last memblock online region, and that is where the problem comes from. The value of high_memory is used by several drivers and by external tools (e.g. crash tool aborts while loading a dump). Similarily to ARM, use the override path provided by set_high_memory() function and set the value of high_memory at the end of the identity mapping early. That forces set_high_memory() to leave in high_memory the correct value, even when the end of available memory is offline. Fixes: e120d1bc12da ("arch, mm: set high_memory in free_area_init()") Tested-by: Mikhail Zaslonko Reviewed-by: Heiko Carstens Reviewed-by: Gerald Schaefer Signed-off-by: Alexander Gordeev --- arch/s390/kernel/setup.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f244c5560e7f..5c9789804120 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -719,6 +719,11 @@ static void __init memblock_add_physmem_info(void) memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); } +static void __init setup_high_memory(void) +{ + high_memory = __va(ident_map_size); +} + /* * Reserve memory used for lowcore. */ @@ -951,6 +956,7 @@ void __init setup_arch(char **cmdline_p) free_physmem_info(); setup_memory_end(); + setup_high_memory(); memblock_dump_all(); setup_memory(); From ccb0aa03d046ff84492396d2e90b408fa5b24a00 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Thu, 10 Jul 2025 17:05:22 +0200 Subject: [PATCH 1308/2411] s390: Support CONFIG_TRACE_MMIO_ACCESS Enable the functionality of commits d593d64f043a ("lib: Add register read/write tracing support") 210031971cdd ("asm-generic/io: Add logging support for MMIO accessors"). It only depends on explicit function calls for the tracing. Signed-off-by: Steffen Maier Reviewed-by: Niklas Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 25a773e6596e..ac162e62da86 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -102,6 +102,7 @@ config S390 select ARCH_HAS_UBSAN select ARCH_HAS_VDSO_TIME_DATA select ARCH_HAVE_NMI_SAFE_CMPXCHG + select ARCH_HAVE_TRACE_MMIO_ACCESS select ARCH_INLINE_READ_LOCK select ARCH_INLINE_READ_LOCK_BH select ARCH_INLINE_READ_LOCK_IRQ From 10dd5a0009898ba35eafeb6087e5c83b84742ff1 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 29 Jul 2025 13:16:19 +0200 Subject: [PATCH 1309/2411] s390/mm: Enable THP_SWAP and THP_MIGRATION After hugetlbfs PTE_MARKER support for s390 introduced region-third and segment table swap entries, it is now possible to also enable THP_SWAP and THP_MIGRATION for s390. s390 has different layout for PTE and region / segment table entries (RSTE). This is also true for swap entries, and their swap type and offset encoding. For hugetlbfs PTE_MARKER support, s390 has internal __swp_type_rste() and __swp_offset_rste() helpers to correctly handle RSTE swap entries. But common swap code does not know about this difference, and only uses __swp_type(), __swp_offset() and __swp_entry() helpers for conversion between arch-dependent and arch-independent representation of swp_entry_t for all pagetable levels. On s390, those helpers only work for PTE swap entries. Therefore, implement __pmd_to_swp_entry() to build a fake PTE swap entry and return the arch-dependent representation of that. Correspondingly, implement __swp_entry_to_pmd() to convert that into a proper PMD swap entry again. With this, the arch-dependent swp_entry_t representation will always look like a PTE swap entry in common code. This is somewhat similar to fake PTEs in hugetlbfs code for s390, but only requires conversion of the swap type and offset, and not all the possible PTE bits. For PMD swap entry SOFT_DIRTY handling, use the same helpers as for normal PMDs. Similar to PTEs, the SOFT_DIRTY bit location is the same for swap and normal entries. Reviewed-by: Heiko Carstens Signed-off-by: Gerald Schaefer Signed-off-by: Alexander Gordeev --- arch/s390/Kconfig | 2 ++ arch/s390/include/asm/pgtable.h | 45 +++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ac162e62da86..f41f604f0297 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -74,6 +74,7 @@ config S390 select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 + select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL @@ -150,6 +151,7 @@ config S390 select ARCH_WANT_KERNEL_PMD_MKWRITE select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP + select ARCH_WANTS_THP_SWAP select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 select DCACHE_WORD_ACCESS if !KMSAN diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 6d8bc27a366e..c1a7a92f0575 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -963,6 +963,12 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) return clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_SOFT_DIRTY)); } +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +#define pmd_swp_soft_dirty(pmd) pmd_soft_dirty(pmd) +#define pmd_swp_mksoft_dirty(pmd) pmd_mksoft_dirty(pmd) +#define pmd_swp_clear_soft_dirty(pmd) pmd_clear_soft_dirty(pmd) +#endif + /* * query functions pte_write/pte_dirty/pte_young only work if * pte_present() is true. Undefined behaviour if not.. @@ -1979,6 +1985,45 @@ static inline unsigned long __swp_offset_rste(swp_entry_t entry) #define __rste_to_swp_entry(rste) ((swp_entry_t) { rste }) +/* + * s390 has different layout for PTE and region / segment table entries (RSTE). + * This is also true for swap entries, and their swap type and offset encoding. + * For hugetlbfs PTE_MARKER support, s390 has internal __swp_type_rste() and + * __swp_offset_rste() helpers to correctly handle RSTE swap entries. + * + * But common swap code does not know about this difference, and only uses + * __swp_type(), __swp_offset() and __swp_entry() helpers for conversion between + * arch-dependent and arch-independent representation of swp_entry_t for all + * pagetable levels. On s390, those helpers only work for PTE swap entries. + * + * Therefore, implement __pmd_to_swp_entry() to build a fake PTE swap entry + * and return the arch-dependent representation of that. Correspondingly, + * implement __swp_entry_to_pmd() to convert that into a proper PMD swap + * entry again. With this, the arch-dependent swp_entry_t representation will + * always look like a PTE swap entry in common code. + * + * This is somewhat similar to fake PTEs in hugetlbfs code for s390, but only + * requires conversion of the swap type and offset, and not all the possible + * PTE bits. + */ +static inline swp_entry_t __pmd_to_swp_entry(pmd_t pmd) +{ + swp_entry_t arch_entry; + pte_t pte; + + arch_entry = __rste_to_swp_entry(pmd_val(pmd)); + pte = mk_swap_pte(__swp_type_rste(arch_entry), __swp_offset_rste(arch_entry)); + return __pte_to_swp_entry(pte); +} + +static inline pmd_t __swp_entry_to_pmd(swp_entry_t arch_entry) +{ + pmd_t pmd; + + pmd = __pmd(mk_swap_rste(__swp_type(arch_entry), __swp_offset(arch_entry))); + return pmd; +} + extern int vmem_add_mapping(unsigned long start, unsigned long size); extern void vmem_remove_mapping(unsigned long start, unsigned long size); extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc); From 6235ce77749f45cac27f630337e2fdf04e8a6c73 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 31 Jul 2025 00:03:30 -0700 Subject: [PATCH 1310/2411] perf record: Cache build-ID of hit DSOs only It post-processes samples to find which DSO has samples. Based on that info, it can save used DSOs in the build-ID cache directory. But for some reason, it saves all DSOs without checking the hit mark. Skipping unused DSOs can give some speedup especially with --buildid-mmap being default. On my idle machine, `time perf record -a sleep 1` goes down from 3 sec to 1.5 sec with this change. Fixes: e29386c8f7d71fa5 ("perf record: Add --buildid-mmap option to enable PERF_RECORD_MMAP2's build id") Reviewed-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20250731070330.57116-1-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/util/build-id.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index e2b295fe4d2f..a7018a3b0437 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -872,7 +872,7 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine, char *allocated_name = NULL; int ret = 0; - if (!dso__has_build_id(dso)) + if (!dso__has_build_id(dso) || !dso__hit(dso)) return 0; if (dso__is_kcore(dso)) { From 01051012887329ea78eaca19b1d2eac4c9f601b5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 30 Jul 2025 10:21:37 -0700 Subject: [PATCH 1311/2411] netlink: specs: ethtool: fix module EEPROM input/output arguments Module (SFP) eeprom GET has a lot of input params, they are all mistakenly listed as output in the spec. Looks like kernel doesn't output them at all. Correct what are the inputs and what the outputs. Reported-by: Duo Yi Fixes: a353318ebf24 ("tools: ynl: populate most of the ethtool spec") Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250730172137.1322351-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 1063d5d32fea..1bc1bd7d33c2 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -2342,9 +2342,6 @@ operations: do: &module-eeprom-get-op request: - attributes: - - header - reply: attributes: - header - offset @@ -2352,6 +2349,9 @@ operations: - page - bank - i2c-address + reply: + attributes: + - header - data dump: *module-eeprom-get-op - From 12df58ad294253ac1d8df0c9bb9cf726397a671d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 31 Jul 2025 01:47:30 +0200 Subject: [PATCH 1312/2411] bpf: Add cookie object to bpf maps Add a cookie to BPF maps to uniquely identify BPF maps for the timespan when the node is up. This is different to comparing a pointer or BPF map id which could get rolled over and reused. Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20250730234733.530041-1-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + kernel/bpf/syscall.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f9cd2164ed23..308530c8326b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -310,6 +310,7 @@ struct bpf_map { bool free_after_rcu_gp; atomic64_t sleepable_refcnt; s64 __percpu *elem_count; + u64 cookie; /* write-once */ }; static inline const char *btf_field_type_name(enum btf_field_type type) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e63039817af3..7a814e98d5f5 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -53,6 +54,7 @@ #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) DEFINE_PER_CPU(int, bpf_prog_active); +DEFINE_COOKIE(bpf_map_cookie); static DEFINE_IDR(prog_idr); static DEFINE_SPINLOCK(prog_idr_lock); static DEFINE_IDR(map_idr); @@ -1487,6 +1489,10 @@ static int map_create(union bpf_attr *attr, bool kernel) if (err < 0) goto free_map; + preempt_disable(); + map->cookie = gen_cookie_next(&bpf_map_cookie); + preempt_enable(); + atomic64_set(&map->refcnt, 1); atomic64_set(&map->usercnt, 1); mutex_init(&map->freeze_mutex); From fd1c98f0ef5cbcec842209776505d9e70d8fcd53 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 31 Jul 2025 01:47:31 +0200 Subject: [PATCH 1313/2411] bpf: Move bpf map owner out of common struct Given this is only relevant for BPF tail call maps, it is adding up space and penalizing other map types. We also need to extend this with further objects to track / compare to. Therefore, lets move this out into a separate structure and dynamically allocate it only for BPF tail call maps. Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20250730234733.530041-2-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 36 ++++++++++++++++++++++++------------ kernel/bpf/core.c | 35 ++++++++++++++++++----------------- kernel/bpf/syscall.c | 13 +++++++------ 3 files changed, 49 insertions(+), 35 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 308530c8326b..a87646cc5398 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -260,6 +260,18 @@ struct bpf_list_node_kern { void *owner; } __attribute__((aligned(8))); +/* 'Ownership' of program-containing map is claimed by the first program + * that is going to use this map or by the first program which FD is + * stored in the map to make sure that all callers and callees have the + * same prog type, JITed flag and xdp_has_frags flag. + */ +struct bpf_map_owner { + enum bpf_prog_type type; + bool jited; + bool xdp_has_frags; + const struct btf_type *attach_func_proto; +}; + struct bpf_map { const struct bpf_map_ops *ops; struct bpf_map *inner_map_meta; @@ -292,18 +304,8 @@ struct bpf_map { struct rcu_head rcu; }; atomic64_t writecnt; - /* 'Ownership' of program-containing map is claimed by the first program - * that is going to use this map or by the first program which FD is - * stored in the map to make sure that all callers and callees have the - * same prog type, JITed flag and xdp_has_frags flag. - */ - struct { - const struct btf_type *attach_func_proto; - spinlock_t lock; - enum bpf_prog_type type; - bool jited; - bool xdp_has_frags; - } owner; + spinlock_t owner_lock; + struct bpf_map_owner *owner; bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ bool free_after_mult_rcu_gp; @@ -2109,6 +2111,16 @@ static inline bool bpf_map_flags_access_ok(u32 access_flags) (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); } +static inline struct bpf_map_owner *bpf_map_owner_alloc(struct bpf_map *map) +{ + return kzalloc(sizeof(*map->owner), GFP_ATOMIC); +} + +static inline void bpf_map_owner_free(struct bpf_map *map) +{ + kfree(map->owner); +} + struct bpf_event_entry { struct perf_event *event; struct file *perf_file; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 09dde5b00d0c..6e5b3a67e87f 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2377,28 +2377,29 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp) { enum bpf_prog_type prog_type = resolve_prog_type(fp); - bool ret; struct bpf_prog_aux *aux = fp->aux; + bool ret = false; if (fp->kprobe_override) - return false; + return ret; - spin_lock(&map->owner.lock); - if (!map->owner.type) { - /* There's no owner yet where we could check for - * compatibility. - */ - map->owner.type = prog_type; - map->owner.jited = fp->jited; - map->owner.xdp_has_frags = aux->xdp_has_frags; - map->owner.attach_func_proto = aux->attach_func_proto; + spin_lock(&map->owner_lock); + /* There's no owner yet where we could check for compatibility. */ + if (!map->owner) { + map->owner = bpf_map_owner_alloc(map); + if (!map->owner) + goto err; + map->owner->type = prog_type; + map->owner->jited = fp->jited; + map->owner->xdp_has_frags = aux->xdp_has_frags; + map->owner->attach_func_proto = aux->attach_func_proto; ret = true; } else { - ret = map->owner.type == prog_type && - map->owner.jited == fp->jited && - map->owner.xdp_has_frags == aux->xdp_has_frags; + ret = map->owner->type == prog_type && + map->owner->jited == fp->jited && + map->owner->xdp_has_frags == aux->xdp_has_frags; if (ret && - map->owner.attach_func_proto != aux->attach_func_proto) { + map->owner->attach_func_proto != aux->attach_func_proto) { switch (prog_type) { case BPF_PROG_TYPE_TRACING: case BPF_PROG_TYPE_LSM: @@ -2411,8 +2412,8 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map, } } } - spin_unlock(&map->owner.lock); - +err: + spin_unlock(&map->owner_lock); return ret; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7a814e98d5f5..0fbfa8532c39 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -887,6 +887,7 @@ static void bpf_map_free_deferred(struct work_struct *work) security_bpf_map_free(map); bpf_map_release_memcg(map); + bpf_map_owner_free(map); bpf_map_free(map); } @@ -981,12 +982,12 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) struct bpf_map *map = filp->private_data; u32 type = 0, jited = 0; - if (map_type_contains_progs(map)) { - spin_lock(&map->owner.lock); - type = map->owner.type; - jited = map->owner.jited; - spin_unlock(&map->owner.lock); + spin_lock(&map->owner_lock); + if (map->owner) { + type = map->owner->type; + jited = map->owner->jited; } + spin_unlock(&map->owner_lock); seq_printf(m, "map_type:\t%u\n" @@ -1496,7 +1497,7 @@ static int map_create(union bpf_attr *attr, bool kernel) atomic64_set(&map->refcnt, 1); atomic64_set(&map->usercnt, 1); mutex_init(&map->freeze_mutex); - spin_lock_init(&map->owner.lock); + spin_lock_init(&map->owner_lock); if (attr->btf_key_type_id || attr->btf_value_type_id || /* Even the map's value is a kernel's struct, From 9621e60f59eae87eb9ffe88d90f24f391a1ef0f0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 31 Jul 2025 01:47:32 +0200 Subject: [PATCH 1314/2411] bpf: Move cgroup iterator helpers to bpf.h Move them into bpf.h given we also need them in core code. Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20250730234733.530041-3-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- include/linux/bpf-cgroup.h | 5 ----- include/linux/bpf.h | 22 ++++++++++++++-------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 082ccd8ad96b..aedf573bdb42 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -77,9 +77,6 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) extern struct static_key_false cgroup_bpf_enabled_key[MAX_CGROUP_BPF_ATTACH_TYPE]; #define cgroup_bpf_enabled(atype) static_branch_unlikely(&cgroup_bpf_enabled_key[atype]) -#define for_each_cgroup_storage_type(stype) \ - for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) - struct bpf_cgroup_storage_map; struct bpf_storage_buffer { @@ -510,8 +507,6 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ kernel_optval) ({ 0; }) -#define for_each_cgroup_storage_type(stype) for (; false; ) - #endif /* CONFIG_CGROUP_BPF */ #endif /* _BPF_CGROUP_H */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a87646cc5398..02aa41e301a5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -208,6 +208,20 @@ enum btf_field_type { BPF_RES_SPIN_LOCK = (1 << 12), }; +enum bpf_cgroup_storage_type { + BPF_CGROUP_STORAGE_SHARED, + BPF_CGROUP_STORAGE_PERCPU, + __BPF_CGROUP_STORAGE_MAX +#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX +}; + +#ifdef CONFIG_CGROUP_BPF +# define for_each_cgroup_storage_type(stype) \ + for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) +#else +# define for_each_cgroup_storage_type(stype) for (; false; ) +#endif /* CONFIG_CGROUP_BPF */ + typedef void (*btf_dtor_kfunc_t)(void *); struct btf_field_kptr { @@ -1085,14 +1099,6 @@ struct bpf_prog_offload { u32 jited_len; }; -enum bpf_cgroup_storage_type { - BPF_CGROUP_STORAGE_SHARED, - BPF_CGROUP_STORAGE_PERCPU, - __BPF_CGROUP_STORAGE_MAX -}; - -#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX - /* The longest tracepoint has 12 args. * See include/trace/bpf_probe.h */ From abad3d0bad72a52137e0c350c59542d75ae4f513 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 31 Jul 2025 01:47:33 +0200 Subject: [PATCH 1315/2411] bpf: Fix oob access in cgroup local storage Lonial reported that an out-of-bounds access in cgroup local storage can be crafted via tail calls. Given two programs each utilizing a cgroup local storage with a different value size, and one program doing a tail call into the other. The verifier will validate each of the indivial programs just fine. However, in the runtime context the bpf_cg_run_ctx holds an bpf_prog_array_item which contains the BPF program as well as any cgroup local storage flavor the program uses. Helpers such as bpf_get_local_storage() pick this up from the runtime context: ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); storage = ctx->prog_item->cgroup_storage[stype]; if (stype == BPF_CGROUP_STORAGE_SHARED) ptr = &READ_ONCE(storage->buf)->data[0]; else ptr = this_cpu_ptr(storage->percpu_buf); For the second program which was called from the originally attached one, this means bpf_get_local_storage() will pick up the former program's map, not its own. With mismatching sizes, this can result in an unintended out-of-bounds access. To fix this issue, we need to extend bpf_map_owner with an array of storage_cookie[] to match on i) the exact maps from the original program if the second program was using bpf_get_local_storage(), or ii) allow the tail call combination if the second program was not using any of the cgroup local storage maps. Fixes: 7d9c3427894f ("bpf: Make cgroup storages shared between programs on the same cgroup") Reported-by: Lonial Con Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20250730234733.530041-4-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + kernel/bpf/core.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 02aa41e301a5..cc700925b802 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -283,6 +283,7 @@ struct bpf_map_owner { enum bpf_prog_type type; bool jited; bool xdp_has_frags; + u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE]; const struct btf_type *attach_func_proto; }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 6e5b3a67e87f..5d1650af899d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2378,7 +2378,9 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map, { enum bpf_prog_type prog_type = resolve_prog_type(fp); struct bpf_prog_aux *aux = fp->aux; + enum bpf_cgroup_storage_type i; bool ret = false; + u64 cookie; if (fp->kprobe_override) return ret; @@ -2393,11 +2395,24 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map, map->owner->jited = fp->jited; map->owner->xdp_has_frags = aux->xdp_has_frags; map->owner->attach_func_proto = aux->attach_func_proto; + for_each_cgroup_storage_type(i) { + map->owner->storage_cookie[i] = + aux->cgroup_storage[i] ? + aux->cgroup_storage[i]->cookie : 0; + } ret = true; } else { ret = map->owner->type == prog_type && map->owner->jited == fp->jited && map->owner->xdp_has_frags == aux->xdp_has_frags; + for_each_cgroup_storage_type(i) { + if (!ret) + break; + cookie = aux->cgroup_storage[i] ? + aux->cgroup_storage[i]->cookie : 0; + ret = map->owner->storage_cookie[i] == cookie || + !cookie; + } if (ret && map->owner->attach_func_proto != aux->attach_func_proto) { switch (prog_type) { From 13cb75730b7a8b2dc8fe32874e159b2c7b75efde Mon Sep 17 00:00:00 2001 From: Achill Gilgenast Date: Tue, 29 Jul 2025 11:45:53 +0200 Subject: [PATCH 1316/2411] libbpf: Avoid possible use of uninitialized mod_len Though mod_len is only read when mod_name != NULL and both are initialized together, gcc15 produces a warning with -Werror=maybe-uninitialized: libbpf.c: In function 'find_kernel_btf_id.constprop': libbpf.c:10100:33: error: 'mod_len' may be used uninitialized [-Werror=maybe-uninitialized] 10100 | if (mod_name && strncmp(mod->name, mod_name, mod_len) != 0) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ libbpf.c:10070:21: note: 'mod_len' was declared here 10070 | int ret, i, mod_len; | ^~~~~~~ Silence the false positive. Signed-off-by: Achill Gilgenast Acked-by: Yonghong Song Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250729094611.2065713-1-fossdd@pwned.life Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e067cb5776bd..fb4d92c5c339 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10096,7 +10096,7 @@ static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, enum bpf_attach_type attach_type, int *btf_obj_fd, int *btf_type_id) { - int ret, i, mod_len; + int ret, i, mod_len = 0; const char *fn_name, *mod_name = NULL; fn_name = strchr(attach_name, ':'); From e2ba58ccc9099514380c3300cbc0750b5055fc1c Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 30 Jul 2025 21:49:53 -0700 Subject: [PATCH 1317/2411] block: Fix default IO priority if there is no IO context Upstream commit 53889bcaf536 ("block: make __get_task_ioprio() easier to read") changes the IO priority returned to the caller if no IO context is defined for the task. Prior to this commit, the returned IO priority was determined by task_nice_ioclass() and task_nice_ioprio(). Now it is always IOPRIO_DEFAULT, which translates to IOPRIO_CLASS_NONE with priority 0. However, task_nice_ioclass() returns IOPRIO_CLASS_IDLE, IOPRIO_CLASS_RT, or IOPRIO_CLASS_BE depending on the task scheduling policy, and task_nice_ioprio() returns a value determined by task_nice(). This causes regressions in test code checking the IO priority and class of IO operations on tasks with no IO context. Fix the problem by returning the IO priority calculated from task_nice_ioclass() and task_nice_ioprio() if no IO context is defined to match earlier behavior. Fixes: 53889bcaf536 ("block: make __get_task_ioprio() easier to read") Cc: Jens Axboe Cc: Bart Van Assche Signed-off-by: Guenter Roeck Reviewed-by: Yu Kuai Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20250731044953.1852690-1-linux@roeck-us.net Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index b25377b6ea98..5210e8371238 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -60,7 +60,8 @@ static inline int __get_task_ioprio(struct task_struct *p) int prio; if (!ioc) - return IOPRIO_DEFAULT; + return IOPRIO_PRIO_VALUE(task_nice_ioclass(p), + task_nice_ioprio(p)); if (p != current) lockdep_assert_held(&p->alloc_lock); From 765761851d89c772f482494d452e266795460278 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Thu, 31 Jul 2025 20:07:45 +0900 Subject: [PATCH 1318/2411] zloop: fix KASAN use-after-free of tag set When a zoned loop device, or zloop device, is removed, KASAN enabled kernel reports "BUG KASAN use-after-free" in blk_mq_free_tag_set(). The BUG happens because zloop_ctl_remove() calls put_disk(), which invokes zloop_free_disk(). The zloop_free_disk() frees the memory allocated for the zlo pointer. However, after the memory is freed, zloop_ctl_remove() calls blk_mq_free_tag_set(&zlo->tag_set), which accesses the freed zlo. Hence the KASAN use-after-free. zloop_ctl_remove() put_disk(zlo->disk) put_device() kobject_put() ... zloop_free_disk() kvfree(zlo) blk_mq_free_tag_set(&zlo->tag_set) To avoid the BUG, move the call to blk_mq_free_tag_set(&zlo->tag_set) from zloop_ctl_remove() into zloop_free_disk(). This ensures that the tag_set is freed before the call to kvfree(zlo). Fixes: eb0570c7df23 ("block: new zoned loop block device driver") CC: stable@vger.kernel.org Signed-off-by: Shin'ichiro Kawasaki Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250731110745.165751-1-shinichiro.kawasaki@wdc.com Signed-off-by: Jens Axboe --- drivers/block/zloop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c index 553b1a713ab9..a423228e201b 100644 --- a/drivers/block/zloop.c +++ b/drivers/block/zloop.c @@ -700,6 +700,8 @@ static void zloop_free_disk(struct gendisk *disk) struct zloop_device *zlo = disk->private_data; unsigned int i; + blk_mq_free_tag_set(&zlo->tag_set); + for (i = 0; i < zlo->nr_zones; i++) { struct zloop_zone *zone = &zlo->zones[i]; @@ -1080,7 +1082,6 @@ static int zloop_ctl_remove(struct zloop_options *opts) del_gendisk(zlo->disk); put_disk(zlo->disk); - blk_mq_free_tag_set(&zlo->tag_set); pr_info("Removed device %d\n", opts->id); From fad6551fcf537375702b9af012508156a16a1ff7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 31 Jul 2025 08:22:28 -0700 Subject: [PATCH 1319/2411] block: ensure discard_granularity is zero when discard is not supported Documentation/ABI/stable/sysfs-block states: What: /sys/block//queue/discard_granularity [...] A discard_granularity of 0 means that the device does not support discard functionality. but this got broken when sorting out the block limits updates. Fix this by setting the discard_granularity limit to zero when the combined max_discard_sectors is zero. Fixes: 3c407dc723bb ("block: default the discard granularity to sector size") Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250731152228.873923-1-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index a7a794baba72..07874e9b609f 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -400,12 +400,19 @@ int blk_validate_limits(struct queue_limits *lim) lim->max_discard_sectors = min(lim->max_hw_discard_sectors, lim->max_user_discard_sectors); + /* + * When discard is not supported, discard_granularity should be reported + * as 0 to userspace. + */ + if (lim->max_discard_sectors) + lim->discard_granularity = + max(lim->discard_granularity, lim->physical_block_size); + else + lim->discard_granularity = 0; + if (!lim->max_discard_segments) lim->max_discard_segments = 1; - if (lim->discard_granularity < lim->physical_block_size) - lim->discard_granularity = lim->physical_block_size; - /* * By default there is no limit on the segment boundary alignment, * but if there is one it can't be smaller than the page size as From 709580086fbba45c1796c28bab8b4a27887ee32d Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 10 Jul 2025 13:07:30 -0500 Subject: [PATCH 1320/2411] dt-bindings: PCI: Convert st,spear1340-pcie to DT schema Convert the ST SPEAr1340 PCIe binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) [mani: added the license] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250710180731.2969879-1-robh@kernel.org --- .../bindings/pci/spear13xx-pcie.txt | 14 ------ .../bindings/pci/st,spear1340-pcie.yaml | 45 +++++++++++++++++++ 2 files changed, 45 insertions(+), 14 deletions(-) delete mode 100644 Documentation/devicetree/bindings/pci/spear13xx-pcie.txt create mode 100644 Documentation/devicetree/bindings/pci/st,spear1340-pcie.yaml diff --git a/Documentation/devicetree/bindings/pci/spear13xx-pcie.txt b/Documentation/devicetree/bindings/pci/spear13xx-pcie.txt deleted file mode 100644 index d5a14f5dad46..000000000000 --- a/Documentation/devicetree/bindings/pci/spear13xx-pcie.txt +++ /dev/null @@ -1,14 +0,0 @@ -SPEAr13XX PCIe DT detail: -================================ - -SPEAr13XX uses the Synopsys DesignWare PCIe controller and ST MiPHY as PHY -controller. - -Required properties: -- compatible : should be "st,spear1340-pcie", "snps,dw-pcie". -- phys : phandle to PHY node associated with PCIe controller -- phy-names : must be "pcie-phy" -- All other definitions as per generic PCI bindings - - Optional properties: -- st,pcie-is-gen1 indicates that forced gen1 initialization is needed. diff --git a/Documentation/devicetree/bindings/pci/st,spear1340-pcie.yaml b/Documentation/devicetree/bindings/pci/st,spear1340-pcie.yaml new file mode 100644 index 000000000000..784f97b3cb7a --- /dev/null +++ b/Documentation/devicetree/bindings/pci/st,spear1340-pcie.yaml @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pci/st,spear1340-pcie.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ST SPEAr1340 PCIe controller + +maintainers: + - Pratyush Anand + +description: + SPEAr13XX uses the Synopsys DesignWare PCIe controller and ST MiPHY as PHY + controller. + +select: + properties: + compatible: + contains: + const: st,spear1340-pcie + required: + - compatible + +properties: + compatible: + items: + - const: st,spear1340-pcie + - const: snps,dw-pcie + + phys: + maxItems: 1 + + st,pcie-is-gen1: + type: boolean + description: Indicates forced gen1 initialization is needed. + +required: + - compatible + - phys + - phy-names + +allOf: + - $ref: snps,dw-pcie.yaml# + +unevaluatedProperties: false From 5c2796adb127c6569e14afab8f18f8bc8db58fb1 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 10 Jul 2025 13:07:40 -0500 Subject: [PATCH 1321/2411] dt-bindings: PCI: Convert axis,artpec6-pcie to DT schema Convert the Axis ARTPEC-6/7 PCIe binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250710180741.2970148-1-robh@kernel.org --- .../bindings/pci/axis,artpec6-pcie.txt | 50 -------- .../bindings/pci/axis,artpec6-pcie.yaml | 118 ++++++++++++++++++ 2 files changed, 118 insertions(+), 50 deletions(-) delete mode 100644 Documentation/devicetree/bindings/pci/axis,artpec6-pcie.txt create mode 100644 Documentation/devicetree/bindings/pci/axis,artpec6-pcie.yaml diff --git a/Documentation/devicetree/bindings/pci/axis,artpec6-pcie.txt b/Documentation/devicetree/bindings/pci/axis,artpec6-pcie.txt deleted file mode 100644 index cc6dcdb676b9..000000000000 --- a/Documentation/devicetree/bindings/pci/axis,artpec6-pcie.txt +++ /dev/null @@ -1,50 +0,0 @@ -* Axis ARTPEC-6 PCIe interface - -This PCIe host controller is based on the Synopsys DesignWare PCIe IP -and thus inherits all the common properties defined in snps,dw-pcie.yaml. - -Required properties: -- compatible: "axis,artpec6-pcie", "snps,dw-pcie" for ARTPEC-6 in RC mode; - "axis,artpec6-pcie-ep", "snps,dw-pcie" for ARTPEC-6 in EP mode; - "axis,artpec7-pcie", "snps,dw-pcie" for ARTPEC-7 in RC mode; - "axis,artpec7-pcie-ep", "snps,dw-pcie" for ARTPEC-7 in EP mode; -- reg: base addresses and lengths of the PCIe controller (DBI), - the PHY controller, and configuration address space. -- reg-names: Must include the following entries: - - "dbi" - - "phy" - - "config" -- interrupts: A list of interrupt outputs of the controller. Must contain an - entry for each entry in the interrupt-names property. -- interrupt-names: Must include the following entries: - - "msi": The interrupt that is asserted when an MSI is received -- axis,syscon-pcie: A phandle pointing to the ARTPEC-6 system controller, - used to enable and control the Synopsys IP. - -Example: - - pcie@f8050000 { - compatible = "axis,artpec6-pcie", "snps,dw-pcie"; - reg = <0xf8050000 0x2000 - 0xf8040000 0x1000 - 0xc0000000 0x2000>; - reg-names = "dbi", "phy", "config"; - #address-cells = <3>; - #size-cells = <2>; - device_type = "pci"; - /* downstream I/O */ - ranges = <0x81000000 0 0 0xc0002000 0 0x00010000 - /* non-prefetchable memory */ - 0x82000000 0 0xc0012000 0xc0012000 0 0x1ffee000>; - num-lanes = <2>; - bus-range = <0x00 0xff>; - interrupts = ; - interrupt-names = "msi"; - #interrupt-cells = <1>; - interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 2 &intc GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 3 &intc GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 4 &intc GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>; - axis,syscon-pcie = <&syscon>; - }; diff --git a/Documentation/devicetree/bindings/pci/axis,artpec6-pcie.yaml b/Documentation/devicetree/bindings/pci/axis,artpec6-pcie.yaml new file mode 100644 index 000000000000..dcc5661aa004 --- /dev/null +++ b/Documentation/devicetree/bindings/pci/axis,artpec6-pcie.yaml @@ -0,0 +1,118 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright 2025 Axis AB +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pci/axis,artpec6-pcie.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Axis ARTPEC-6 PCIe host controller + +maintainers: + - Jesper Nilsson + +description: + This PCIe host controller is based on the Synopsys DesignWare PCIe IP. + +select: + properties: + compatible: + contains: + enum: + - axis,artpec6-pcie + - axis,artpec6-pcie-ep + - axis,artpec7-pcie + - axis,artpec7-pcie-ep + required: + - compatible + +properties: + compatible: + items: + - enum: + - axis,artpec6-pcie + - axis,artpec6-pcie-ep + - axis,artpec7-pcie + - axis,artpec7-pcie-ep + - const: snps,dw-pcie + + reg: + minItems: 3 + maxItems: 4 + + reg-names: + minItems: 3 + maxItems: 4 + + interrupts: + maxItems: 1 + + interrupt-names: + items: + - const: msi + + axis,syscon-pcie: + $ref: /schemas/types.yaml#/definitions/phandle + description: + System controller phandle used to enable and control the Synopsys IP. + +required: + - compatible + - reg + - reg-names + - interrupts + - interrupt-names + - axis,syscon-pcie + +oneOf: + - $ref: snps,dw-pcie.yaml# + properties: + reg: + maxItems: 3 + + reg-names: + items: + - const: dbi + - const: phy + - const: config + + - $ref: snps,dw-pcie-ep.yaml# + properties: + reg: + minItems: 4 + + reg-names: + items: + - const: dbi + - const: dbi2 + - const: phy + - const: addr_space + +unevaluatedProperties: false + +examples: + - | + #include + + pcie@f8050000 { + compatible = "axis,artpec6-pcie", "snps,dw-pcie"; + device_type = "pci"; + reg = <0xf8050000 0x2000 + 0xf8040000 0x1000 + 0xc0000000 0x2000>; + reg-names = "dbi", "phy", "config"; + #address-cells = <3>; + #size-cells = <2>; + ranges = <0x81000000 0 0 0xc0002000 0 0x00010000>, + <0x82000000 0 0xc0012000 0xc0012000 0 0x1ffee000>; + num-lanes = <2>; + bus-range = <0x00 0xff>; + interrupts = ; + interrupt-names = "msi"; + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 0 0x7>; + interrupt-map = <0 0 0 1 &intc GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &intc GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &intc GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &intc GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>; + axis,syscon-pcie = <&syscon>; + }; From f6b5ad2c6c10bea11b8e22cfb9732238f921579a Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 10 Jul 2025 13:07:48 -0500 Subject: [PATCH 1322/2411] dt-bindings: PCI: Convert apm,xgene-pcie to DT schema Convert the Applied Micro X-Gene PCIe binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250710180749.2970379-1-robh@kernel.org --- .../bindings/pci/apm,xgene-pcie.yaml | 84 +++++++++++++++++++ .../devicetree/bindings/pci/xgene-pci.txt | 50 ----------- MAINTAINERS | 2 +- 3 files changed, 85 insertions(+), 51 deletions(-) create mode 100644 Documentation/devicetree/bindings/pci/apm,xgene-pcie.yaml delete mode 100644 Documentation/devicetree/bindings/pci/xgene-pci.txt diff --git a/Documentation/devicetree/bindings/pci/apm,xgene-pcie.yaml b/Documentation/devicetree/bindings/pci/apm,xgene-pcie.yaml new file mode 100644 index 000000000000..2504b8235889 --- /dev/null +++ b/Documentation/devicetree/bindings/pci/apm,xgene-pcie.yaml @@ -0,0 +1,84 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pci/apm,xgene-pcie.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: AppliedMicro X-Gene PCIe interface + +maintainers: + - Toan Le + +allOf: + - $ref: /schemas/pci/pci-host-bridge.yaml# + +properties: + compatible: + oneOf: + - items: + - const: apm,xgene-storm-pcie + - const: apm,xgene-pcie + - items: + - const: apm,xgene-pcie + + reg: + items: + - description: Controller configuration registers + - description: PCI configuration space registers + + reg-names: + items: + - const: csr + - const: cfg + + clocks: + maxItems: 1 + + clock-names: + items: + - const: pcie + + dma-coherent: true + + msi-parent: + maxItems: 1 + +required: + - compatible + - reg + - reg-names + - '#interrupt-cells' + - interrupt-map-mask + - interrupt-map + - clocks + +unevaluatedProperties: false + +examples: + - | + bus { + #address-cells = <2>; + #size-cells = <2>; + + pcie@1f2b0000 { + compatible = "apm,xgene-storm-pcie", "apm,xgene-pcie"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <0x00 0x1f2b0000 0x0 0x00010000>, /* Controller registers */ + <0xe0 0xd0000000 0x0 0x00040000>; /* PCI config space */ + reg-names = "csr", "cfg"; + ranges = <0x01000000 0x00 0x00000000 0xe0 0x10000000 0x00 0x00010000>, /* io */ + <0x02000000 0x00 0x80000000 0xe1 0x80000000 0x00 0x80000000>; /* mem */ + dma-ranges = <0x42000000 0x80 0x00000000 0x80 0x00000000 0x00 0x80000000>, + <0x42000000 0x00 0x00000000 0x00 0x00000000 0x80 0x00000000>; + interrupt-map-mask = <0x0 0x0 0x0 0x7>; + interrupt-map = <0x0 0x0 0x0 0x1 &gic 0x0 0xc2 0x1>, + <0x0 0x0 0x0 0x2 &gic 0x0 0xc3 0x1>, + <0x0 0x0 0x0 0x3 &gic 0x0 0xc4 0x1>, + <0x0 0x0 0x0 0x4 &gic 0x0 0xc5 0x1>; + dma-coherent; + clocks = <&pcie0clk 0>; + }; + }; diff --git a/Documentation/devicetree/bindings/pci/xgene-pci.txt b/Documentation/devicetree/bindings/pci/xgene-pci.txt deleted file mode 100644 index 92490330dc1c..000000000000 --- a/Documentation/devicetree/bindings/pci/xgene-pci.txt +++ /dev/null @@ -1,50 +0,0 @@ -* AppliedMicro X-Gene PCIe interface - -Required properties: -- device_type: set to "pci" -- compatible: should contain "apm,xgene-pcie" to identify the core. -- reg: A list of physical base address and length for each set of controller - registers. Must contain an entry for each entry in the reg-names - property. -- reg-names: Must include the following entries: - "csr": controller configuration registers. - "cfg": PCIe configuration space registers. -- #address-cells: set to <3> -- #size-cells: set to <2> -- ranges: ranges for the outbound memory, I/O regions. -- dma-ranges: ranges for the inbound memory regions. -- #interrupt-cells: set to <1> -- interrupt-map-mask and interrupt-map: standard PCI properties - to define the mapping of the PCIe interface to interrupt - numbers. -- clocks: from common clock binding: handle to pci clock. - -Optional properties: -- status: Either "ok" or "disabled". -- dma-coherent: Present if DMA operations are coherent - -Example: - - pcie0: pcie@1f2b0000 { - status = "disabled"; - device_type = "pci"; - compatible = "apm,xgene-storm-pcie", "apm,xgene-pcie"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = < 0x00 0x1f2b0000 0x0 0x00010000 /* Controller registers */ - 0xe0 0xd0000000 0x0 0x00040000>; /* PCI config space */ - reg-names = "csr", "cfg"; - ranges = <0x01000000 0x00 0x00000000 0xe0 0x10000000 0x00 0x00010000 /* io */ - 0x02000000 0x00 0x80000000 0xe1 0x80000000 0x00 0x80000000>; /* mem */ - dma-ranges = <0x42000000 0x80 0x00000000 0x80 0x00000000 0x00 0x80000000 - 0x42000000 0x00 0x00000000 0x00 0x00000000 0x80 0x00000000>; - interrupt-map-mask = <0x0 0x0 0x0 0x7>; - interrupt-map = <0x0 0x0 0x0 0x1 &gic 0x0 0xc2 0x1 - 0x0 0x0 0x0 0x2 &gic 0x0 0xc3 0x1 - 0x0 0x0 0x0 0x3 &gic 0x0 0xc4 0x1 - 0x0 0x0 0x0 0x4 &gic 0x0 0xc5 0x1>; - dma-coherent; - clocks = <&pcie0clk 0>; - }; - diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..41cda6564aa4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18913,7 +18913,7 @@ M: Toan Le L: linux-pci@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -F: Documentation/devicetree/bindings/pci/xgene-pci.txt +F: Documentation/devicetree/bindings/pci/apm,xgene-pcie.yaml F: drivers/pci/controller/pci-xgene.c PCI DRIVER FOR ARM VERSATILE PLATFORM From 9e71c41469391c14f8dc5e6242f0d0ed89ce8978 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 10 Jul 2025 13:08:05 -0500 Subject: [PATCH 1323/2411] dt-bindings: PCI: Convert marvell,armada-3700-pcie to DT schema Convert the Marvell Armada 3700 PCIe binding to DT schema format. The 'clocks' property was missing and has been added. Signed-off-by: Rob Herring (Arm) Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250710180811.2970846-1-robh@kernel.org --- .../devicetree/bindings/pci/aardvark-pci.txt | 59 ----------- .../pci/marvell,armada-3700-pcie.yaml | 99 +++++++++++++++++++ MAINTAINERS | 2 +- 3 files changed, 100 insertions(+), 60 deletions(-) delete mode 100644 Documentation/devicetree/bindings/pci/aardvark-pci.txt create mode 100644 Documentation/devicetree/bindings/pci/marvell,armada-3700-pcie.yaml diff --git a/Documentation/devicetree/bindings/pci/aardvark-pci.txt b/Documentation/devicetree/bindings/pci/aardvark-pci.txt deleted file mode 100644 index 2b8ca920a7fa..000000000000 --- a/Documentation/devicetree/bindings/pci/aardvark-pci.txt +++ /dev/null @@ -1,59 +0,0 @@ -Aardvark PCIe controller - -This PCIe controller is used on the Marvell Armada 3700 ARM64 SoC. - -The Device Tree node describing an Aardvark PCIe controller must -contain the following properties: - - - compatible: Should be "marvell,armada-3700-pcie" - - reg: range of registers for the PCIe controller - - interrupts: the interrupt line of the PCIe controller - - #address-cells: set to <3> - - #size-cells: set to <2> - - device_type: set to "pci" - - ranges: ranges for the PCI memory and I/O regions - - #interrupt-cells: set to <1> - - msi-controller: indicates that the PCIe controller can itself - handle MSI interrupts - - msi-parent: pointer to the MSI controller to be used - - interrupt-map-mask and interrupt-map: standard PCI properties to - define the mapping of the PCIe interface to interrupt numbers. - - bus-range: PCI bus numbers covered - - phys: the PCIe PHY handle - - max-link-speed: see pci.txt - - reset-gpios: see pci.txt - -In addition, the Device Tree describing an Aardvark PCIe controller -must include a sub-node that describes the legacy interrupt controller -built into the PCIe controller. This sub-node must have the following -properties: - - - interrupt-controller - - #interrupt-cells: set to <1> - -Example: - - pcie0: pcie@d0070000 { - compatible = "marvell,armada-3700-pcie"; - device_type = "pci"; - reg = <0 0xd0070000 0 0x20000>; - #address-cells = <3>; - #size-cells = <2>; - bus-range = <0x00 0xff>; - interrupts = ; - #interrupt-cells = <1>; - msi-controller; - msi-parent = <&pcie0>; - ranges = <0x82000000 0 0xe8000000 0 0xe8000000 0 0x1000000 /* Port 0 MEM */ - 0x81000000 0 0xe9000000 0 0xe9000000 0 0x10000>; /* Port 0 IO*/ - interrupt-map-mask = <0 0 0 7>; - interrupt-map = <0 0 0 1 &pcie_intc 0>, - <0 0 0 2 &pcie_intc 1>, - <0 0 0 3 &pcie_intc 2>, - <0 0 0 4 &pcie_intc 3>; - phys = <&comphy1 0>; - pcie_intc: interrupt-controller { - interrupt-controller; - #interrupt-cells = <1>; - }; - }; diff --git a/Documentation/devicetree/bindings/pci/marvell,armada-3700-pcie.yaml b/Documentation/devicetree/bindings/pci/marvell,armada-3700-pcie.yaml new file mode 100644 index 000000000000..68090b3ca419 --- /dev/null +++ b/Documentation/devicetree/bindings/pci/marvell,armada-3700-pcie.yaml @@ -0,0 +1,99 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pci/marvell,armada-3700-pcie.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Marvell Armada 3700 (Aardvark) PCIe Controller + +maintainers: + - Thomas Petazzoni + - Pali Rohár + +allOf: + - $ref: /schemas/pci/pci-host-bridge.yaml# + +properties: + compatible: + const: marvell,armada-3700-pcie + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + interrupts: + maxItems: 1 + + msi-controller: true + + msi-parent: + maxItems: 1 + + phys: + maxItems: 1 + + reset-gpios: + description: PCIe reset GPIO signals. + + interrupt-controller: + type: object + additionalProperties: false + + properties: + interrupt-controller: true + + '#interrupt-cells': + const: 1 + + required: + - interrupt-controller + - '#interrupt-cells' + +required: + - compatible + - reg + - interrupts + - '#interrupt-cells' + +unevaluatedProperties: false + +examples: + - | + #include + #include + + bus { + #address-cells = <2>; + #size-cells = <2>; + + pcie@d0070000 { + compatible = "marvell,armada-3700-pcie"; + device_type = "pci"; + reg = <0 0xd0070000 0 0x20000>; + #address-cells = <3>; + #size-cells = <2>; + bus-range = <0x00 0xff>; + interrupts = ; + msi-controller; + msi-parent = <&pcie0>; + ranges = <0x82000000 0 0xe8000000 0 0xe8000000 0 0x1000000>, + <0x81000000 0 0xe9000000 0 0xe9000000 0 0x10000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 0 7>; + interrupt-map = <0 0 0 1 &pcie_intc 0>, + <0 0 0 2 &pcie_intc 1>, + <0 0 0 3 &pcie_intc 2>, + <0 0 0 4 &pcie_intc 3>; + phys = <&comphy1 0>; + max-link-speed = <2>; + reset-gpios = <&gpio1 15 GPIO_ACTIVE_LOW>; + + pcie_intc: interrupt-controller { + interrupt-controller; + #interrupt-cells = <1>; + }; + }; + }; diff --git a/MAINTAINERS b/MAINTAINERS index 41cda6564aa4..e1b062761dee 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18898,7 +18898,7 @@ M: Pali Rohár L: linux-pci@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -F: Documentation/devicetree/bindings/pci/aardvark-pci.txt +F: Documentation/devicetree/bindings/pci/marvell,armada-3700-pcie.yaml F: drivers/pci/controller/pci-aardvark.c PCI DRIVER FOR ALTERA PCIE IP From bf9d32f203a23950917d2949c812b89bcb8a0340 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 10 Jul 2025 13:08:23 -0500 Subject: [PATCH 1324/2411] dt-bindings: PCI: Convert amazon,al-alpine-v[23]-pcie to DT schema Convert the Amazon Alpine PCIe binding to DT schema format. It's a straight forward conversion. Signed-off-by: Rob Herring (Arm) Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250710180825.2971248-1-robh@kernel.org --- .../pci/amazon,al-alpine-v3-pcie.yaml | 71 +++++++++++++++++++ .../devicetree/bindings/pci/pcie-al.txt | 46 ------------ .../devicetree/bindings/pci/snps,dw-pcie.yaml | 2 +- MAINTAINERS | 2 +- 4 files changed, 73 insertions(+), 48 deletions(-) create mode 100644 Documentation/devicetree/bindings/pci/amazon,al-alpine-v3-pcie.yaml delete mode 100644 Documentation/devicetree/bindings/pci/pcie-al.txt diff --git a/Documentation/devicetree/bindings/pci/amazon,al-alpine-v3-pcie.yaml b/Documentation/devicetree/bindings/pci/amazon,al-alpine-v3-pcie.yaml new file mode 100644 index 000000000000..45244cad5f30 --- /dev/null +++ b/Documentation/devicetree/bindings/pci/amazon,al-alpine-v3-pcie.yaml @@ -0,0 +1,71 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pci/amazon,al-alpine-v3-pcie.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Amazon Annapurna Labs Alpine v3 PCIe Host Bridge + +maintainers: + - Jonathan Chocron + +description: + Amazon's Annapurna Labs PCIe Host Controller is based on the Synopsys + DesignWare PCI controller. + +allOf: + - $ref: snps,dw-pcie.yaml# + +properties: + compatible: + enum: + - amazon,al-alpine-v2-pcie + - amazon,al-alpine-v3-pcie + + reg: + items: + - description: PCIe ECAM space + - description: AL proprietary registers + - description: Designware PCIe registers + + reg-names: + items: + - const: config + - const: controller + - const: dbi + + interrupts: + maxItems: 1 + +unevaluatedProperties: false + +required: + - compatible + - reg + - reg-names + +examples: + - | + #include + + bus { + #address-cells = <2>; + #size-cells = <2>; + + pcie@fb600000 { + compatible = "amazon,al-alpine-v3-pcie"; + reg = <0x0 0xfb600000 0x0 0x00100000 + 0x0 0xfd800000 0x0 0x00010000 + 0x0 0xfd810000 0x0 0x00001000>; + reg-names = "config", "controller", "dbi"; + bus-range = <0 255>; + device_type = "pci"; + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + interrupts = ; + interrupt-map-mask = <0x00 0 0 7>; + interrupt-map = <0x0000 0 0 1 &gic GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>; /* INTa */ + ranges = <0x02000000 0x0 0xc0010000 0x0 0xc0010000 0x0 0x07ff0000>; + }; + }; diff --git a/Documentation/devicetree/bindings/pci/pcie-al.txt b/Documentation/devicetree/bindings/pci/pcie-al.txt deleted file mode 100644 index 2ad1fe466eab..000000000000 --- a/Documentation/devicetree/bindings/pci/pcie-al.txt +++ /dev/null @@ -1,46 +0,0 @@ -* Amazon Annapurna Labs PCIe host bridge - -Amazon's Annapurna Labs PCIe Host Controller is based on the Synopsys DesignWare -PCI core. It inherits common properties defined in -Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml. - -Properties of the host controller node that differ from it are: - -- compatible: - Usage: required - Value type: - Definition: Value should contain - - "amazon,al-alpine-v2-pcie" for alpine_v2 - - "amazon,al-alpine-v3-pcie" for alpine_v3 - -- reg: - Usage: required - Value type: - Definition: Register ranges as listed in the reg-names property - -- reg-names: - Usage: required - Value type: - Definition: Must include the following entries - - "config" PCIe ECAM space - - "controller" AL proprietary registers - - "dbi" Designware PCIe registers - -Example: - - pcie-external0: pcie@fb600000 { - compatible = "amazon,al-alpine-v3-pcie"; - reg = <0x0 0xfb600000 0x0 0x00100000 - 0x0 0xfd800000 0x0 0x00010000 - 0x0 0xfd810000 0x0 0x00001000>; - reg-names = "config", "controller", "dbi"; - bus-range = <0 255>; - device_type = "pci"; - #address-cells = <3>; - #size-cells = <2>; - #interrupt-cells = <1>; - interrupts = ; - interrupt-map-mask = <0x00 0 0 7>; - interrupt-map = <0x0000 0 0 1 &gic GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>; /* INTa */ - ranges = <0x02000000 0x0 0xc0010000 0x0 0xc0010000 0x0 0x07ff0000>; - }; diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml index 69e82f438f58..b3216141881c 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml @@ -108,7 +108,7 @@ properties: - description: See native 'dbi' CSR region for details. enum: [ ctrl ] - description: See native 'elbi/app' CSR region for details. - enum: [ apb, mgmt, link, ulreg, appl ] + enum: [ apb, mgmt, link, ulreg, appl, controller ] - description: See native 'atu' CSR region for details. enum: [ atu_dma ] - description: Syscon-related CSR regions. diff --git a/MAINTAINERS b/MAINTAINERS index e1b062761dee..0de111385acc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19232,7 +19232,7 @@ PCIE DRIVER FOR AMAZON ANNAPURNA LABS M: Jonathan Chocron L: linux-pci@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/pci/pcie-al.txt +F: Documentation/devicetree/bindings/pci/amazon,al-alpine-v3-pcie.yaml F: drivers/pci/controller/dwc/pcie-al.c PCIE DRIVER FOR AMLOGIC MESON From 51e78d97e7bf553c03f47d2c5e9650a1e18f78e7 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 10 Jul 2025 13:08:42 -0500 Subject: [PATCH 1325/2411] dt-bindings: PCI: Remove 83xx-512x-pci.txt This binding is already covered by fsl,mpc8xxx-pci.yaml schema. While the MPC512x is mentioned here, its compatible strings aren't actually documented and remain that way. Signed-off-by: Rob Herring (Arm) Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250710180843.2971667-1-robh@kernel.org --- .../devicetree/bindings/pci/83xx-512x-pci.txt | 39 ------------------- 1 file changed, 39 deletions(-) delete mode 100644 Documentation/devicetree/bindings/pci/83xx-512x-pci.txt diff --git a/Documentation/devicetree/bindings/pci/83xx-512x-pci.txt b/Documentation/devicetree/bindings/pci/83xx-512x-pci.txt deleted file mode 100644 index 3abeecf4983f..000000000000 --- a/Documentation/devicetree/bindings/pci/83xx-512x-pci.txt +++ /dev/null @@ -1,39 +0,0 @@ -* Freescale 83xx and 512x PCI bridges - -Freescale 83xx and 512x SOCs include the same PCI bridge core. - -83xx/512x specific notes: -- reg: should contain two address length tuples - The first is for the internal PCI bridge registers - The second is for the PCI config space access registers - -Example (MPC8313ERDB) - pci0: pci@e0008500 { - interrupt-map-mask = <0xf800 0x0 0x0 0x7>; - interrupt-map = < - /* IDSEL 0x0E -mini PCI */ - 0x7000 0x0 0x0 0x1 &ipic 18 0x8 - 0x7000 0x0 0x0 0x2 &ipic 18 0x8 - 0x7000 0x0 0x0 0x3 &ipic 18 0x8 - 0x7000 0x0 0x0 0x4 &ipic 18 0x8 - - /* IDSEL 0x0F - PCI slot */ - 0x7800 0x0 0x0 0x1 &ipic 17 0x8 - 0x7800 0x0 0x0 0x2 &ipic 18 0x8 - 0x7800 0x0 0x0 0x3 &ipic 17 0x8 - 0x7800 0x0 0x0 0x4 &ipic 18 0x8>; - interrupt-parent = <&ipic>; - interrupts = <66 0x8>; - bus-range = <0x0 0x0>; - ranges = <0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000 - 0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000 - 0x01000000 0x0 0x00000000 0xe2000000 0x0 0x00100000>; - clock-frequency = <66666666>; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0xe0008500 0x100 /* internal registers */ - 0xe0008300 0x8>; /* config space access registers */ - compatible = "fsl,mpc8349-pci"; - device_type = "pci"; - }; From fbcbd66fddd2e9ad295d6e3707e2421f062727d5 Mon Sep 17 00:00:00 2001 From: Ziyue Zhang Date: Fri, 18 Jul 2025 16:17:16 +0800 Subject: [PATCH 1326/2411] dt-bindings: PCI: qcom,pcie-sa8775p: Document 'link_down' reset Each PCIe controller on SA8775P includes a 'link_down' reset line in hardware. This patch documents the reset in the device tree binding. The 'link_down' reset is used to forcefully bring down the PCIe link layer, which is useful in scenarios such as link recovery after errors, power management transitions, and hotplug events. Including this reset line improves robustness and provides finer control over PCIe controller behavior. As the 'link_down' reset was omitted in the initial submission, it is now being documented. While this reset is not required for most of the block's basic functionality, and device trees lacking it will continue to function correctly in most cases, it is necessary to ensure maximum robustness when shutting down or recovering the PCIe core. Therefore, its inclusion is justified despite the minor ABI change. Signed-off-by: Ziyue Zhang Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Reviewed-by: Rob Herring (Arm) Reviewed-by: Johan Hovold Link: https://patch.msgid.link/20250718081718.390790-3-ziyue.zhang@oss.qualcomm.com --- .../devicetree/bindings/pci/qcom,pcie-sa8775p.yaml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie-sa8775p.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie-sa8775p.yaml index 4b91b5608013..19afe2a03409 100644 --- a/Documentation/devicetree/bindings/pci/qcom,pcie-sa8775p.yaml +++ b/Documentation/devicetree/bindings/pci/qcom,pcie-sa8775p.yaml @@ -66,11 +66,14 @@ properties: - const: global resets: - maxItems: 1 + items: + - description: PCIe controller reset + - description: PCIe link down reset reset-names: items: - const: pci + - const: link_down required: - interconnects @@ -166,8 +169,10 @@ examples: power-domains = <&gcc PCIE_0_GDSC>; - resets = <&gcc GCC_PCIE_0_BCR>; - reset-names = "pci"; + resets = <&gcc GCC_PCIE_0_BCR>, + <&gcc GCC_PCIE_0_LINK_DOWN_BCR>; + reset-names = "pci", + "link_down"; perst-gpios = <&tlmm 2 GPIO_ACTIVE_LOW>; wake-gpios = <&tlmm 0 GPIO_ACTIVE_HIGH>; From 2f2d42a17b5a6711378d39df74f1f69a831c5d4e Mon Sep 17 00:00:00 2001 From: Zhengxu Zhang Date: Thu, 19 Jun 2025 09:33:31 +0800 Subject: [PATCH 1327/2411] exfat: fdatasync flag should be same like generic_write_sync() Test: androbench by default setting, use 64GB sdcard. the random write speed: without this patch 3.5MB/s with this patch 7MB/s After patch "11a347fb6cef", the random write speed decreased significantly. the .write_iter() interface had been modified, and check the differences with generic_file_write_iter(), when calling generic_write_sync() and exfat_file_write_iter() to call vfs_fsync_range(), the fdatasync flag is wrong, and make not use the fdatasync mode, and make random write speed decreased. So use generic_write_sync() instead of vfs_fsync_range(). Fixes: 11a347fb6cef ("exfat: change to get file size from DataLength") Signed-off-by: Zhengxu Zhang Acked-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/file.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 6b82497572b4..538d2b6ac2ec 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -622,9 +622,8 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) if (pos > valid_size) pos = valid_size; - if (iocb_is_dsync(iocb) && iocb->ki_pos > pos) { - ssize_t err = vfs_fsync_range(file, pos, iocb->ki_pos - 1, - iocb->ki_flags & IOCB_SYNC); + if (iocb->ki_pos > pos) { + ssize_t err = generic_write_sync(iocb, iocb->ki_pos - pos); if (err < 0) return err; } From 99f9a97dce39ad413c39b92c90393bbd6778f3fd Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Tue, 18 Mar 2025 17:00:49 +0800 Subject: [PATCH 1328/2411] exfat: add cluster chain loop check for dir An infinite loop may occur if the following conditions occur due to file system corruption. (1) Condition for exfat_count_dir_entries() to loop infinitely. - The cluster chain includes a loop. - There is no UNUSED entry in the cluster chain. (2) Condition for exfat_create_upcase_table() to loop infinitely. - The cluster chain of the root directory includes a loop. - There are no UNUSED entry and up-case table entry in the cluster chain of the root directory. (3) Condition for exfat_load_bitmap() to loop infinitely. - The cluster chain of the root directory includes a loop. - There are no UNUSED entry and bitmap entry in the cluster chain of the root directory. (4) Condition for exfat_find_dir_entry() to loop infinitely. - The cluster chain includes a loop. - The unused directory entries were exhausted by some operation. (5) Condition for exfat_check_dir_empty() to loop infinitely. - The cluster chain includes a loop. - The unused directory entries were exhausted by some operation. - All files and sub-directories under the directory are deleted. This commit adds checks to break the above infinite loop. Signed-off-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/dir.c | 12 ++++++++++++ fs/exfat/fatent.c | 10 ++++++++++ fs/exfat/namei.c | 5 +++++ fs/exfat/super.c | 32 +++++++++++++++++++++----------- 4 files changed, 48 insertions(+), 11 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 3103b932b674..ee060e26f51d 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -996,6 +996,7 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, struct exfat_hint_femp candi_empty; struct exfat_sb_info *sbi = EXFAT_SB(sb); int num_entries = exfat_calc_num_entries(p_uniname); + unsigned int clu_count = 0; if (num_entries < 0) return num_entries; @@ -1133,6 +1134,10 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, } else { if (exfat_get_next_cluster(sb, &clu.dir)) return -EIO; + + /* break if the cluster chain includes a loop */ + if (unlikely(++clu_count > EXFAT_DATA_CLUSTER_COUNT(sbi))) + goto not_found; } } @@ -1195,6 +1200,7 @@ int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir) int i, count = 0; int dentries_per_clu; unsigned int entry_type; + unsigned int clu_count = 0; struct exfat_chain clu; struct exfat_dentry *ep; struct exfat_sb_info *sbi = EXFAT_SB(sb); @@ -1227,6 +1233,12 @@ int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir) } else { if (exfat_get_next_cluster(sb, &(clu.dir))) return -EIO; + + if (unlikely(++clu_count > sbi->used_clusters)) { + exfat_fs_error(sb, "FAT or bitmap is corrupted"); + return -EIO; + } + } } diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c index 23065f948ae7..232cc7f8ab92 100644 --- a/fs/exfat/fatent.c +++ b/fs/exfat/fatent.c @@ -490,5 +490,15 @@ int exfat_count_num_clusters(struct super_block *sb, } *ret_count = count; + + /* + * since exfat_count_used_clusters() is not called, sbi->used_clusters + * cannot be used here. + */ + if (unlikely(i == sbi->num_clusters && clu != EXFAT_EOF_CLUSTER)) { + exfat_fs_error(sb, "The cluster chain has a loop"); + return -EIO; + } + return 0; } diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index fede0283d6e2..f5f1c4e8a29f 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -890,6 +890,7 @@ static int exfat_check_dir_empty(struct super_block *sb, { int i, dentries_per_clu; unsigned int type; + unsigned int clu_count = 0; struct exfat_chain clu; struct exfat_dentry *ep; struct exfat_sb_info *sbi = EXFAT_SB(sb); @@ -926,6 +927,10 @@ static int exfat_check_dir_empty(struct super_block *sb, } else { if (exfat_get_next_cluster(sb, &(clu.dir))) return -EIO; + + /* break if the cluster chain includes a loop */ + if (unlikely(++clu_count > EXFAT_DATA_CLUSTER_COUNT(sbi))) + break; } } diff --git a/fs/exfat/super.c b/fs/exfat/super.c index ea5c1334a214..8926e63f5bb7 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -341,13 +341,12 @@ static void exfat_hash_init(struct super_block *sb) INIT_HLIST_HEAD(&sbi->inode_hashtable[i]); } -static int exfat_read_root(struct inode *inode) +static int exfat_read_root(struct inode *inode, struct exfat_chain *root_clu) { struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_inode_info *ei = EXFAT_I(inode); - struct exfat_chain cdir; - int num_subdirs, num_clu = 0; + int num_subdirs; exfat_chain_set(&ei->dir, sbi->root_dir, 0, ALLOC_FAT_CHAIN); ei->entry = -1; @@ -360,12 +359,9 @@ static int exfat_read_root(struct inode *inode) ei->hint_stat.clu = sbi->root_dir; ei->hint_femp.eidx = EXFAT_HINT_NONE; - exfat_chain_set(&cdir, sbi->root_dir, 0, ALLOC_FAT_CHAIN); - if (exfat_count_num_clusters(sb, &cdir, &num_clu)) - return -EIO; - i_size_write(inode, num_clu << sbi->cluster_size_bits); + i_size_write(inode, EXFAT_CLU_TO_B(root_clu->size, sbi)); - num_subdirs = exfat_count_dir_entries(sb, &cdir); + num_subdirs = exfat_count_dir_entries(sb, root_clu); if (num_subdirs < 0) return -EIO; set_nlink(inode, num_subdirs + EXFAT_MIN_SUBDIR); @@ -578,7 +574,8 @@ static int exfat_verify_boot_region(struct super_block *sb) } /* mount the file system volume */ -static int __exfat_fill_super(struct super_block *sb) +static int __exfat_fill_super(struct super_block *sb, + struct exfat_chain *root_clu) { int ret; struct exfat_sb_info *sbi = EXFAT_SB(sb); @@ -595,6 +592,18 @@ static int __exfat_fill_super(struct super_block *sb) goto free_bh; } + /* + * Call exfat_count_num_cluster() before searching for up-case and + * bitmap directory entries to avoid infinite loop if they are missing + * and the cluster chain includes a loop. + */ + exfat_chain_set(root_clu, sbi->root_dir, 0, ALLOC_FAT_CHAIN); + ret = exfat_count_num_clusters(sb, root_clu, &root_clu->size); + if (ret) { + exfat_err(sb, "failed to count the number of clusters in root"); + goto free_bh; + } + ret = exfat_create_upcase_table(sb); if (ret) { exfat_err(sb, "failed to load upcase table"); @@ -627,6 +636,7 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc) struct exfat_sb_info *sbi = sb->s_fs_info; struct exfat_mount_options *opts = &sbi->options; struct inode *root_inode; + struct exfat_chain root_clu; int err; if (opts->allow_utime == (unsigned short)-1) @@ -645,7 +655,7 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_time_min = EXFAT_MIN_TIMESTAMP_SECS; sb->s_time_max = EXFAT_MAX_TIMESTAMP_SECS; - err = __exfat_fill_super(sb); + err = __exfat_fill_super(sb, &root_clu); if (err) { exfat_err(sb, "failed to recognize exfat type"); goto check_nls_io; @@ -680,7 +690,7 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc) root_inode->i_ino = EXFAT_ROOT_INO; inode_set_iversion(root_inode, 1); - err = exfat_read_root(root_inode); + err = exfat_read_root(root_inode, &root_clu); if (err) { exfat_err(sb, "failed to initialize root inode"); goto put_inode; From c9edbb6aecc532c7d0a9bee990beedb27be33851 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 30 Jul 2025 11:59:59 +0100 Subject: [PATCH 1329/2411] ata: libata-core: Remove space before newline There is a extraneous space before a newline in a ata_dev_dbg message. Remove the space. Signed-off-by: Colin Ian King Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 97d9f0488cc1..ff53f5f029b4 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4602,7 +4602,7 @@ static unsigned int ata_dev_init_params(struct ata_device *dev, return AC_ERR_INVALID; /* set up init dev params taskfile */ - ata_dev_dbg(dev, "init dev params \n"); + ata_dev_dbg(dev, "init dev params\n"); ata_tf_init(dev, &tf); tf.command = ATA_CMD_INIT_DEV_PARAMS; From 64c7cac9d64eb3ed1062a59fa77b36bfa293fe4e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 30 Jul 2025 12:04:42 +0100 Subject: [PATCH 1330/2411] ata: pata_macio: Remove space before newline There is a extraneous space before a newline in a dev_dbg message. Remove the space. Signed-off-by: Colin Ian King Signed-off-by: Damien Le Moal --- drivers/ata/pata_macio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index f7a933eefe05..9eefdc5df5df 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -758,7 +758,7 @@ static void pata_macio_irq_clear(struct ata_port *ap) static void pata_macio_reset_hw(struct pata_macio_priv *priv, int resume) { - dev_dbg(priv->dev, "Enabling & resetting... \n"); + dev_dbg(priv->dev, "Enabling & resetting...\n"); if (priv->mediabay) return; From 6cb43739b93c64c4a2148222bd606e6920257752 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 31 Jul 2025 09:02:03 +0100 Subject: [PATCH 1331/2411] ata: pata_pdc2027x: Remove space before newline and abbreviations There is a extraneous space before a newline in handful of ata_port_dbg messages. Remove the spaces. Capitalize pio, udma, mdma. Signed-off-by: Colin Ian King Signed-off-by: Damien Le Moal --- drivers/ata/pata_pdc2027x.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c index d792ce6d97bf..ae914dcb0c83 100644 --- a/drivers/ata/pata_pdc2027x.c +++ b/drivers/ata/pata_pdc2027x.c @@ -295,7 +295,7 @@ static void pdc2027x_set_piomode(struct ata_port *ap, struct ata_device *adev) } /* Set the PIO timing registers using value table for 133MHz */ - ata_port_dbg(ap, "Set pio regs... \n"); + ata_port_dbg(ap, "Set PIO regs...\n"); ctcr0 = ioread32(dev_mmio(ap, adev, PDC_CTCR0)); ctcr0 &= 0xffff0000; @@ -308,7 +308,7 @@ static void pdc2027x_set_piomode(struct ata_port *ap, struct ata_device *adev) ctcr1 |= (pdc2027x_pio_timing_tbl[pio].value2 << 24); iowrite32(ctcr1, dev_mmio(ap, adev, PDC_CTCR1)); - ata_port_dbg(ap, "Set to pio mode[%u] \n", pio); + ata_port_dbg(ap, "Set to PIO mode[%u]\n", pio); } /** @@ -341,7 +341,7 @@ static void pdc2027x_set_dmamode(struct ata_port *ap, struct ata_device *adev) iowrite32(ctcr1 & ~(1 << 7), dev_mmio(ap, adev, PDC_CTCR1)); } - ata_port_dbg(ap, "Set udma regs... \n"); + ata_port_dbg(ap, "Set UDMA regs...\n"); ctcr1 = ioread32(dev_mmio(ap, adev, PDC_CTCR1)); ctcr1 &= 0xff000000; @@ -350,14 +350,14 @@ static void pdc2027x_set_dmamode(struct ata_port *ap, struct ata_device *adev) (pdc2027x_udma_timing_tbl[udma_mode].value2 << 16); iowrite32(ctcr1, dev_mmio(ap, adev, PDC_CTCR1)); - ata_port_dbg(ap, "Set to udma mode[%u] \n", udma_mode); + ata_port_dbg(ap, "Set to UDMA mode[%u]\n", udma_mode); } else if ((dma_mode >= XFER_MW_DMA_0) && (dma_mode <= XFER_MW_DMA_2)) { /* Set the MDMA timing registers with value table for 133MHz */ unsigned int mdma_mode = dma_mode & 0x07; - ata_port_dbg(ap, "Set mdma regs... \n"); + ata_port_dbg(ap, "Set MDMA regs...\n"); ctcr0 = ioread32(dev_mmio(ap, adev, PDC_CTCR0)); ctcr0 &= 0x0000ffff; @@ -366,7 +366,7 @@ static void pdc2027x_set_dmamode(struct ata_port *ap, struct ata_device *adev) iowrite32(ctcr0, dev_mmio(ap, adev, PDC_CTCR0)); - ata_port_dbg(ap, "Set to mdma mode[%u] \n", mdma_mode); + ata_port_dbg(ap, "Set to MDMA mode[%u]\n", mdma_mode); } else { ata_port_err(ap, "Unknown dma mode [%u] ignored\n", dma_mode); } From 5ccaeedb489b41ce6cb857d0de488992746be282 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 1 Aug 2025 00:10:06 +0000 Subject: [PATCH 1332/2411] cfi: add C CFI type macro Currently x86 and riscv open-code 4 instances of the same logic to define a u32 variable with the KCFI typeid of a given function. Replace the duplicate logic with a common macro. Signed-off-by: Mark Rutland Co-developed-by: Maxwell Bland Signed-off-by: Maxwell Bland Co-developed-by: Sami Tolvanen Signed-off-by: Sami Tolvanen Tested-by: Dao Huang Acked-by: Will Deacon Link: https://lore.kernel.org/r/20250801001004.1859976-6-samitolvanen@google.com Signed-off-by: Alexei Starovoitov --- arch/riscv/kernel/cfi.c | 35 +++-------------------------------- arch/x86/kernel/alternative.c | 31 +++---------------------------- include/linux/cfi_types.h | 23 +++++++++++++++++++++++ 3 files changed, 29 insertions(+), 60 deletions(-) diff --git a/arch/riscv/kernel/cfi.c b/arch/riscv/kernel/cfi.c index 64bdd3e1ab8c..e7aec5f36dd5 100644 --- a/arch/riscv/kernel/cfi.c +++ b/arch/riscv/kernel/cfi.c @@ -4,6 +4,7 @@ * * Copyright (C) 2023 Google LLC */ +#include #include #include @@ -82,41 +83,11 @@ struct bpf_insn; /* Must match bpf_func_t / DEFINE_BPF_PROG_RUN() */ extern unsigned int __bpf_prog_runX(const void *ctx, const struct bpf_insn *insn); - -/* - * Force a reference to the external symbol so the compiler generates - * __kcfi_typid. - */ -__ADDRESSABLE(__bpf_prog_runX); - -/* u32 __ro_after_init cfi_bpf_hash = __kcfi_typeid___bpf_prog_runX; */ -asm ( -" .pushsection .data..ro_after_init,\"aw\",@progbits \n" -" .type cfi_bpf_hash,@object \n" -" .globl cfi_bpf_hash \n" -" .p2align 2, 0x0 \n" -"cfi_bpf_hash: \n" -" .word __kcfi_typeid___bpf_prog_runX \n" -" .size cfi_bpf_hash, 4 \n" -" .popsection \n" -); +DEFINE_CFI_TYPE(cfi_bpf_hash, __bpf_prog_runX); /* Must match bpf_callback_t */ extern u64 __bpf_callback_fn(u64, u64, u64, u64, u64); - -__ADDRESSABLE(__bpf_callback_fn); - -/* u32 __ro_after_init cfi_bpf_subprog_hash = __kcfi_typeid___bpf_callback_fn; */ -asm ( -" .pushsection .data..ro_after_init,\"aw\",@progbits \n" -" .type cfi_bpf_subprog_hash,@object \n" -" .globl cfi_bpf_subprog_hash \n" -" .p2align 2, 0x0 \n" -"cfi_bpf_subprog_hash: \n" -" .word __kcfi_typeid___bpf_callback_fn \n" -" .size cfi_bpf_subprog_hash, 4 \n" -" .popsection \n" -); +DEFINE_CFI_TYPE(cfi_bpf_subprog_hash, __bpf_callback_fn); u32 cfi_get_func_hash(void *func) { diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ea1d984166cd..a555665b4d9c 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -2,6 +2,7 @@ #define pr_fmt(fmt) "SMP alternatives: " fmt #include +#include #include #include #include @@ -1189,37 +1190,11 @@ struct bpf_insn; /* Must match bpf_func_t / DEFINE_BPF_PROG_RUN() */ extern unsigned int __bpf_prog_runX(const void *ctx, const struct bpf_insn *insn); - -KCFI_REFERENCE(__bpf_prog_runX); - -/* u32 __ro_after_init cfi_bpf_hash = __kcfi_typeid___bpf_prog_runX; */ -asm ( -" .pushsection .data..ro_after_init,\"aw\",@progbits \n" -" .type cfi_bpf_hash,@object \n" -" .globl cfi_bpf_hash \n" -" .p2align 2, 0x0 \n" -"cfi_bpf_hash: \n" -" .long __kcfi_typeid___bpf_prog_runX \n" -" .size cfi_bpf_hash, 4 \n" -" .popsection \n" -); +DEFINE_CFI_TYPE(cfi_bpf_hash, __bpf_prog_runX); /* Must match bpf_callback_t */ extern u64 __bpf_callback_fn(u64, u64, u64, u64, u64); - -KCFI_REFERENCE(__bpf_callback_fn); - -/* u32 __ro_after_init cfi_bpf_subprog_hash = __kcfi_typeid___bpf_callback_fn; */ -asm ( -" .pushsection .data..ro_after_init,\"aw\",@progbits \n" -" .type cfi_bpf_subprog_hash,@object \n" -" .globl cfi_bpf_subprog_hash \n" -" .p2align 2, 0x0 \n" -"cfi_bpf_subprog_hash: \n" -" .long __kcfi_typeid___bpf_callback_fn \n" -" .size cfi_bpf_subprog_hash, 4 \n" -" .popsection \n" -); +DEFINE_CFI_TYPE(cfi_bpf_subprog_hash, __bpf_callback_fn); u32 cfi_get_func_hash(void *func) { diff --git a/include/linux/cfi_types.h b/include/linux/cfi_types.h index 6b8713675765..685f7181780f 100644 --- a/include/linux/cfi_types.h +++ b/include/linux/cfi_types.h @@ -41,5 +41,28 @@ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_CFI_CLANG +#define DEFINE_CFI_TYPE(name, func) \ + /* \ + * Force a reference to the function so the compiler generates \ + * __kcfi_typeid_. \ + */ \ + __ADDRESSABLE(func); \ + /* u32 name __ro_after_init = __kcfi_typeid_ */ \ + extern u32 name; \ + asm ( \ + " .pushsection .data..ro_after_init,\"aw\",\%progbits \n" \ + " .type " #name ",\%object \n" \ + " .globl " #name " \n" \ + " .p2align 2, 0x0 \n" \ + #name ": \n" \ + " .4byte __kcfi_typeid_" #func " \n" \ + " .size " #name ", 4 \n" \ + " .popsection \n" \ + ); +#endif + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_CFI_TYPES_H */ From f1befc82addda926c8301436123d041bf3249505 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 1 Aug 2025 00:10:07 +0000 Subject: [PATCH 1333/2411] cfi: Move BPF CFI types and helpers to generic code Instead of duplicating the same code for each architecture, move the CFI type hash variables for BPF function types and related helper functions to generic CFI code, and allow architectures to override the function definitions if needed. Signed-off-by: Sami Tolvanen Link: https://lore.kernel.org/r/20250801001004.1859976-7-samitolvanen@google.com Signed-off-by: Alexei Starovoitov --- arch/riscv/include/asm/cfi.h | 16 ------------ arch/riscv/kernel/cfi.c | 24 ------------------ arch/x86/include/asm/cfi.h | 10 ++------ arch/x86/kernel/alternative.c | 12 --------- include/linux/cfi.h | 47 +++++++++++++++++++++++++++++------ kernel/cfi.c | 15 +++++++++++ 6 files changed, 56 insertions(+), 68 deletions(-) diff --git a/arch/riscv/include/asm/cfi.h b/arch/riscv/include/asm/cfi.h index fb9696d7a3f2..4508aaa7a2fd 100644 --- a/arch/riscv/include/asm/cfi.h +++ b/arch/riscv/include/asm/cfi.h @@ -14,27 +14,11 @@ struct pt_regs; #ifdef CONFIG_CFI_CLANG enum bug_trap_type handle_cfi_failure(struct pt_regs *regs); #define __bpfcall -static inline int cfi_get_offset(void) -{ - return 4; -} - -#define cfi_get_offset cfi_get_offset -extern u32 cfi_bpf_hash; -extern u32 cfi_bpf_subprog_hash; -extern u32 cfi_get_func_hash(void *func); #else static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs) { return BUG_TRAP_TYPE_NONE; } - -#define cfi_bpf_hash 0U -#define cfi_bpf_subprog_hash 0U -static inline u32 cfi_get_func_hash(void *func) -{ - return 0; -} #endif /* CONFIG_CFI_CLANG */ #endif /* _ASM_RISCV_CFI_H */ diff --git a/arch/riscv/kernel/cfi.c b/arch/riscv/kernel/cfi.c index e7aec5f36dd5..6ec9dbd7292e 100644 --- a/arch/riscv/kernel/cfi.c +++ b/arch/riscv/kernel/cfi.c @@ -4,7 +4,6 @@ * * Copyright (C) 2023 Google LLC */ -#include #include #include @@ -76,26 +75,3 @@ enum bug_trap_type handle_cfi_failure(struct pt_regs *regs) return report_cfi_failure(regs, regs->epc, &target, type); } - -#ifdef CONFIG_CFI_CLANG -struct bpf_insn; - -/* Must match bpf_func_t / DEFINE_BPF_PROG_RUN() */ -extern unsigned int __bpf_prog_runX(const void *ctx, - const struct bpf_insn *insn); -DEFINE_CFI_TYPE(cfi_bpf_hash, __bpf_prog_runX); - -/* Must match bpf_callback_t */ -extern u64 __bpf_callback_fn(u64, u64, u64, u64, u64); -DEFINE_CFI_TYPE(cfi_bpf_subprog_hash, __bpf_callback_fn); - -u32 cfi_get_func_hash(void *func) -{ - u32 hash; - - if (get_kernel_nofault(hash, func - cfi_get_offset())) - return 0; - - return hash; -} -#endif diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h index 3e51ba459154..1751f1eb95ef 100644 --- a/arch/x86/include/asm/cfi.h +++ b/arch/x86/include/asm/cfi.h @@ -116,8 +116,6 @@ struct pt_regs; #ifdef CONFIG_CFI_CLANG enum bug_trap_type handle_cfi_failure(struct pt_regs *regs); #define __bpfcall -extern u32 cfi_bpf_hash; -extern u32 cfi_bpf_subprog_hash; static inline int cfi_get_offset(void) { @@ -135,6 +133,8 @@ static inline int cfi_get_offset(void) #define cfi_get_offset cfi_get_offset extern u32 cfi_get_func_hash(void *func); +#define cfi_get_func_hash cfi_get_func_hash + extern int cfi_get_func_arity(void *func); #ifdef CONFIG_FINEIBT @@ -153,12 +153,6 @@ static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs) { return BUG_TRAP_TYPE_NONE; } -#define cfi_bpf_hash 0U -#define cfi_bpf_subprog_hash 0U -static inline u32 cfi_get_func_hash(void *func) -{ - return 0; -} static inline int cfi_get_func_arity(void *func) { return 0; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a555665b4d9c..9f6b7dab2d9a 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -2,7 +2,6 @@ #define pr_fmt(fmt) "SMP alternatives: " fmt #include -#include #include #include #include @@ -1185,17 +1184,6 @@ bool cfi_bhi __ro_after_init = false; #endif #ifdef CONFIG_CFI_CLANG -struct bpf_insn; - -/* Must match bpf_func_t / DEFINE_BPF_PROG_RUN() */ -extern unsigned int __bpf_prog_runX(const void *ctx, - const struct bpf_insn *insn); -DEFINE_CFI_TYPE(cfi_bpf_hash, __bpf_prog_runX); - -/* Must match bpf_callback_t */ -extern u64 __bpf_callback_fn(u64, u64, u64, u64, u64); -DEFINE_CFI_TYPE(cfi_bpf_subprog_hash, __bpf_callback_fn); - u32 cfi_get_func_hash(void *func) { u32 hash; diff --git a/include/linux/cfi.h b/include/linux/cfi.h index 1db17ecbb86c..52a98886a455 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -11,16 +11,9 @@ #include #include +#ifdef CONFIG_CFI_CLANG extern bool cfi_warn; -#ifndef cfi_get_offset -static inline int cfi_get_offset(void) -{ - return 0; -} -#endif - -#ifdef CONFIG_CFI_CLANG enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, unsigned long *target, u32 type); @@ -29,6 +22,44 @@ static inline enum bug_trap_type report_cfi_failure_noaddr(struct pt_regs *regs, { return report_cfi_failure(regs, addr, NULL, 0); } + +#ifndef cfi_get_offset +/* + * Returns the CFI prefix offset. By default, the compiler emits only + * a 4-byte CFI type hash before the function. If an architecture + * uses -fpatchable-function-entry=N,M where M>0 to change the prefix + * offset, they must override this function. + */ +static inline int cfi_get_offset(void) +{ + return 4; +} +#endif + +#ifndef cfi_get_func_hash +static inline u32 cfi_get_func_hash(void *func) +{ + u32 hash; + + if (get_kernel_nofault(hash, func - cfi_get_offset())) + return 0; + + return hash; +} +#endif + +/* CFI type hashes for BPF function types */ +extern u32 cfi_bpf_hash; +extern u32 cfi_bpf_subprog_hash; + +#else /* CONFIG_CFI_CLANG */ + +static inline int cfi_get_offset(void) { return 0; } +static inline u32 cfi_get_func_hash(void *func) { return 0; } + +#define cfi_bpf_hash 0U +#define cfi_bpf_subprog_hash 0U + #endif /* CONFIG_CFI_CLANG */ #ifdef CONFIG_ARCH_USES_CFI_TRAPS diff --git a/kernel/cfi.c b/kernel/cfi.c index 422fa4f958ae..4dad04ead06c 100644 --- a/kernel/cfi.c +++ b/kernel/cfi.c @@ -5,6 +5,8 @@ * Copyright (C) 2022 Google LLC */ +#include +#include #include bool cfi_warn __ro_after_init = IS_ENABLED(CONFIG_CFI_PERMISSIVE); @@ -27,6 +29,19 @@ enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, return BUG_TRAP_TYPE_BUG; } +/* + * Declare two non-existent functions with types that match bpf_func_t and + * bpf_callback_t pointers, and use DEFINE_CFI_TYPE to define type hash + * variables for each function type. The cfi_bpf_* variables are used by + * arch-specific BPF JIT implementations to ensure indirectly callable JIT + * code has matching CFI type hashes. + */ +extern typeof(*(bpf_func_t)0) __bpf_prog_runX; +DEFINE_CFI_TYPE(cfi_bpf_hash, __bpf_prog_runX); + +extern typeof(*(bpf_callback_t)0) __bpf_callback_fn; +DEFINE_CFI_TYPE(cfi_bpf_subprog_hash, __bpf_callback_fn); + #ifdef CONFIG_ARCH_USES_CFI_TRAPS static inline unsigned long trap_address(s32 *p) { From 710618c760c0a3267221517d78f4cfb65ca7b882 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Fri, 1 Aug 2025 00:10:08 +0000 Subject: [PATCH 1334/2411] arm64/cfi,bpf: Support kCFI + BPF on arm64 Currently, bpf_dispatcher_*_func() is marked with `__nocfi` therefore calling BPF programs from this interface doesn't cause CFI warnings. When BPF programs are called directly from C: from BPF helpers or struct_ops, CFI warnings are generated. Implement proper CFI prologues for the BPF programs and callbacks and drop __nocfi for arm64. Fix the trampoline generation code to emit kCFI prologue when a struct_ops trampoline is being prepared. Signed-off-by: Puranjay Mohan Co-developed-by: Maxwell Bland Signed-off-by: Maxwell Bland Co-developed-by: Sami Tolvanen Signed-off-by: Sami Tolvanen Tested-by: Dao Huang Acked-by: Will Deacon Link: https://lore.kernel.org/r/20250801001004.1859976-8-samitolvanen@google.com Signed-off-by: Alexei Starovoitov --- arch/arm64/include/asm/cfi.h | 7 +++++++ arch/arm64/net/bpf_jit_comp.c | 30 +++++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 3 deletions(-) create mode 100644 arch/arm64/include/asm/cfi.h diff --git a/arch/arm64/include/asm/cfi.h b/arch/arm64/include/asm/cfi.h new file mode 100644 index 000000000000..ab90f0351b7a --- /dev/null +++ b/arch/arm64/include/asm/cfi.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ARM64_CFI_H +#define _ASM_ARM64_CFI_H + +#define __bpfcall + +#endif /* _ASM_ARM64_CFI_H */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 97dfd5432809..52ffe115a8c4 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -114,6 +115,14 @@ static inline void emit(const u32 insn, struct jit_ctx *ctx) ctx->idx++; } +static inline void emit_u32_data(const u32 data, struct jit_ctx *ctx) +{ + if (ctx->image != NULL && ctx->write) + ctx->image[ctx->idx] = data; + + ctx->idx++; +} + static inline void emit_a64_mov_i(const int is64, const int reg, const s32 val, struct jit_ctx *ctx) { @@ -174,6 +183,12 @@ static inline void emit_bti(u32 insn, struct jit_ctx *ctx) emit(insn, ctx); } +static inline void emit_kcfi(u32 hash, struct jit_ctx *ctx) +{ + if (IS_ENABLED(CONFIG_CFI_CLANG)) + emit_u32_data(hash, ctx); +} + /* * Kernel addresses in the vmalloc space use at most 48 bits, and the * remaining bits are guaranteed to be 0x1. So we can compose the address @@ -503,7 +518,6 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; const u8 priv_sp = bpf2a64[PRIVATE_SP]; void __percpu *priv_stack_ptr; - const int idx0 = ctx->idx; int cur_offset; /* @@ -529,6 +543,9 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) * */ + emit_kcfi(is_main_prog ? cfi_bpf_hash : cfi_bpf_subprog_hash, ctx); + const int idx0 = ctx->idx; + /* bpf function may be invoked by 3 instruction types: * 1. bl, attached via freplace to bpf prog via short jump * 2. br, attached via freplace to bpf prog via long jump @@ -2146,9 +2163,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) jit_data->ro_header = ro_header; } - prog->bpf_func = (void *)ctx.ro_image; + prog->bpf_func = (void *)ctx.ro_image + cfi_get_offset(); prog->jited = 1; - prog->jited_len = prog_size; + prog->jited_len = prog_size - cfi_get_offset(); if (!prog->is_func || extra_pass) { int i; @@ -2527,6 +2544,12 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, /* return address locates above FP */ retaddr_off = stack_size + 8; + if (flags & BPF_TRAMP_F_INDIRECT) { + /* + * Indirect call for bpf_struct_ops + */ + emit_kcfi(cfi_get_func_hash(func_addr), ctx); + } /* bpf trampoline may be invoked by 3 instruction types: * 1. bl, attached to bpf prog or kernel function via short jump * 2. br, attached to bpf prog or kernel function via long jump @@ -3045,6 +3068,7 @@ void bpf_jit_free(struct bpf_prog *prog) sizeof(jit_data->header->size)); kfree(jit_data); } + prog->bpf_func -= cfi_get_offset(); hdr = bpf_jit_binary_pack_hdr(prog); bpf_jit_binary_pack_free(hdr, NULL); priv_stack_ptr = prog->aux->priv_stack_ptr; From a967e758f8e9d8ce5ef096743393df5e6e51644b Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 31 Jul 2025 20:46:41 -0300 Subject: [PATCH 1335/2411] smb: client: set symlink type as native for POSIX mounts SMB3.1.1 POSIX mounts require symlinks to be created natively with IO_REPARSE_TAG_SYMLINK reparse point. Cc: linux-cifs@vger.kernel.org Cc: Ralph Boehme Cc: David Howells Cc: Reported-by: Matthew Richardson Closes: https://marc.info/?i=1124e7cd-6a46-40a6-9f44-b7664a66654b@ed.ac.uk Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 2 +- fs/smb/client/fs_context.c | 18 ------------------ fs/smb/client/fs_context.h | 18 +++++++++++++++++- fs/smb/client/link.c | 11 +++-------- fs/smb/client/reparse.c | 2 +- 5 files changed, 22 insertions(+), 29 deletions(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 0fdadd668a81..31930b7266db 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -723,7 +723,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root) else seq_puts(s, ",nativesocket"); seq_show_option(s, "symlink", - cifs_symlink_type_str(get_cifs_symlink_type(cifs_sb))); + cifs_symlink_type_str(cifs_symlink_type(cifs_sb))); seq_printf(s, ",rsize=%u", cifs_sb->ctx->rsize); seq_printf(s, ",wsize=%u", cifs_sb->ctx->wsize); diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 3f34bb07997b..cc8bd79ebca9 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1829,24 +1829,6 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, return -EINVAL; } -enum cifs_symlink_type get_cifs_symlink_type(struct cifs_sb_info *cifs_sb) -{ - if (cifs_sb->ctx->symlink_type == CIFS_SYMLINK_TYPE_DEFAULT) { - if (cifs_sb->ctx->mfsymlinks) - return CIFS_SYMLINK_TYPE_MFSYMLINKS; - else if (cifs_sb->ctx->sfu_emul) - return CIFS_SYMLINK_TYPE_SFU; - else if (cifs_sb->ctx->linux_ext && !cifs_sb->ctx->no_linux_ext) - return CIFS_SYMLINK_TYPE_UNIX; - else if (cifs_sb->ctx->reparse_type != CIFS_REPARSE_TYPE_NONE) - return CIFS_SYMLINK_TYPE_NATIVE; - else - return CIFS_SYMLINK_TYPE_NONE; - } else { - return cifs_sb->ctx->symlink_type; - } -} - int smb3_init_fs_context(struct fs_context *fc) { struct smb3_fs_context *ctx; diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index 9e83302ce4b8..b0fec6b9a23b 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -341,7 +341,23 @@ struct smb3_fs_context { extern const struct fs_parameter_spec smb3_fs_parameters[]; -extern enum cifs_symlink_type get_cifs_symlink_type(struct cifs_sb_info *cifs_sb); +static inline enum cifs_symlink_type cifs_symlink_type(struct cifs_sb_info *cifs_sb) +{ + bool posix = cifs_sb_master_tcon(cifs_sb)->posix_extensions; + + if (cifs_sb->ctx->symlink_type != CIFS_SYMLINK_TYPE_DEFAULT) + return cifs_sb->ctx->symlink_type; + + if (cifs_sb->ctx->mfsymlinks) + return CIFS_SYMLINK_TYPE_MFSYMLINKS; + else if (cifs_sb->ctx->sfu_emul) + return CIFS_SYMLINK_TYPE_SFU; + else if (cifs_sb->ctx->linux_ext && !cifs_sb->ctx->no_linux_ext) + return posix ? CIFS_SYMLINK_TYPE_NATIVE : CIFS_SYMLINK_TYPE_UNIX; + else if (cifs_sb->ctx->reparse_type != CIFS_REPARSE_TYPE_NONE) + return CIFS_SYMLINK_TYPE_NATIVE; + return CIFS_SYMLINK_TYPE_NONE; +} extern int smb3_init_fs_context(struct fs_context *fc); extern void smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx); diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c index 2ecd705e9e8c..afe76367d2c8 100644 --- a/fs/smb/client/link.c +++ b/fs/smb/client/link.c @@ -605,14 +605,7 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, /* BB what if DFS and this volume is on different share? BB */ rc = -EOPNOTSUPP; - switch (get_cifs_symlink_type(cifs_sb)) { - case CIFS_SYMLINK_TYPE_DEFAULT: - /* should not happen, get_cifs_symlink_type() resolves the default */ - break; - - case CIFS_SYMLINK_TYPE_NONE: - break; - + switch (cifs_symlink_type(cifs_sb)) { case CIFS_SYMLINK_TYPE_UNIX: #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY if (pTcon->unix_ext) { @@ -648,6 +641,8 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, goto symlink_exit; } break; + default: + break; } if (rc == 0) { diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index 33c1d970747c..7869cec58f52 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -38,7 +38,7 @@ int create_reparse_symlink(const unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, const char *symname) { - switch (get_cifs_symlink_type(CIFS_SB(inode->i_sb))) { + switch (cifs_symlink_type(CIFS_SB(inode->i_sb))) { case CIFS_SYMLINK_TYPE_NATIVE: return create_native_symlink(xid, inode, dentry, tcon, full_path, symname); case CIFS_SYMLINK_TYPE_NFS: From 55a984928bfa30c7877e28f16910e6de1c170f1f Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Fri, 1 Aug 2025 10:26:13 +0200 Subject: [PATCH 1336/2411] Revert "tty: vt: use _IO() to define ioctl numbers" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit f1180ca37abe3d117e4a19be12142fe722612a7c. Since the commit, the vt ioctl numbers are defined differently on platforms where _IOC_NONE is non-zero: alpha, mips, powerpc, sparc. Signed-off-by: "Jiri Slaby (SUSE)" Reported-by: Christophe Leroy Link: https://lore.kernel.org/all/436489B9-E67B-4630-909F-386C30A2AAC9@xenosoft.de/ Link: https://lore.kernel.org/all/97ec2636-915a-498c-903b-d66957420d21@csgroup.eu/ Cc: Nicolas Pitre Cc: Ilpo Järvinen Link: https://lore.kernel.org/r/20250801082613.2564584-1-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/vt.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/include/uapi/linux/vt.h b/include/uapi/linux/vt.h index b60fcdfb2746..714483d68c69 100644 --- a/include/uapi/linux/vt.h +++ b/include/uapi/linux/vt.h @@ -14,9 +14,9 @@ /* Note: the ioctl VT_GETSTATE does not work for consoles 16 and higher (since it returns a short) */ -/* 'V' to avoid collision with termios and kd */ +/* 0x56 is 'V', to avoid collision with termios and kd */ -#define VT_OPENQRY _IO('V', 0x00) /* find available vt */ +#define VT_OPENQRY 0x5600 /* find available vt */ struct vt_mode { __u8 mode; /* vt mode */ @@ -25,8 +25,8 @@ struct vt_mode { __s16 acqsig; /* signal to raise on acquisition */ __s16 frsig; /* unused (set to 0) */ }; -#define VT_GETMODE _IO('V', 0x01) /* get mode of active vt */ -#define VT_SETMODE _IO('V', 0x02) /* set mode of active vt */ +#define VT_GETMODE 0x5601 /* get mode of active vt */ +#define VT_SETMODE 0x5602 /* set mode of active vt */ #define VT_AUTO 0x00 /* auto vt switching */ #define VT_PROCESS 0x01 /* process controls switching */ #define VT_ACKACQ 0x02 /* acknowledge switch */ @@ -36,21 +36,21 @@ struct vt_stat { __u16 v_signal; /* signal to send */ __u16 v_state; /* vt bitmask */ }; -#define VT_GETSTATE _IO('V', 0x03) /* get global vt state info */ -#define VT_SENDSIG _IO('V', 0x04) /* signal to send to bitmask of vts */ +#define VT_GETSTATE 0x5603 /* get global vt state info */ +#define VT_SENDSIG 0x5604 /* signal to send to bitmask of vts */ -#define VT_RELDISP _IO('V', 0x05) /* release display */ +#define VT_RELDISP 0x5605 /* release display */ -#define VT_ACTIVATE _IO('V', 0x06) /* make vt active */ -#define VT_WAITACTIVE _IO('V', 0x07) /* wait for vt active */ -#define VT_DISALLOCATE _IO('V', 0x08) /* free memory associated to vt */ +#define VT_ACTIVATE 0x5606 /* make vt active */ +#define VT_WAITACTIVE 0x5607 /* wait for vt active */ +#define VT_DISALLOCATE 0x5608 /* free memory associated to vt */ struct vt_sizes { __u16 v_rows; /* number of rows */ __u16 v_cols; /* number of columns */ __u16 v_scrollsize; /* number of lines of scrollback */ }; -#define VT_RESIZE _IO('V', 0x09) /* set kernel's idea of screensize */ +#define VT_RESIZE 0x5609 /* set kernel's idea of screensize */ struct vt_consize { __u16 v_rows; /* number of rows */ @@ -60,10 +60,10 @@ struct vt_consize { __u16 v_vcol; /* number of pixel columns on screen */ __u16 v_ccol; /* number of pixel columns per character */ }; -#define VT_RESIZEX _IO('V', 0x0A) /* set kernel's idea of screensize + more */ -#define VT_LOCKSWITCH _IO('V', 0x0B) /* disallow vt switching */ -#define VT_UNLOCKSWITCH _IO('V', 0x0C) /* allow vt switching */ -#define VT_GETHIFONTMASK _IO('V', 0x0D) /* return hi font mask */ +#define VT_RESIZEX 0x560A /* set kernel's idea of screensize + more */ +#define VT_LOCKSWITCH 0x560B /* disallow vt switching */ +#define VT_UNLOCKSWITCH 0x560C /* allow vt switching */ +#define VT_GETHIFONTMASK 0x560D /* return hi font mask */ struct vt_event { __u32 event; @@ -77,14 +77,14 @@ struct vt_event { __u32 pad[4]; /* Padding for expansion */ }; -#define VT_WAITEVENT _IO('V', 0x0E) /* Wait for an event */ +#define VT_WAITEVENT 0x560E /* Wait for an event */ struct vt_setactivate { __u32 console; struct vt_mode mode; }; -#define VT_SETACTIVATE _IO('V', 0x0F) /* Activate and set the mode of a console */ +#define VT_SETACTIVATE 0x560F /* Activate and set the mode of a console */ /* get console size and cursor position */ struct vt_consizecsrpos { From 9d9b193ed73a65ec47cf1fd39925b09da8216461 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 31 Jul 2025 09:41:47 +0800 Subject: [PATCH 1337/2411] crypto: hash - Increase HASH_MAX_DESCSIZE for hmac(sha3-224-s390) The value of HASH_MAX_DESCSIZE is off by one for hmac(sha3-224-s390). Fix this so that hmac(sha3-224-s390) can be registered. Reported-by: Ingo Franzki Reported-by: Eric Biggers Fixes: 6f90ba706551 ("crypto: s390/sha3 - Use API partial block handling") Cc: Signed-off-by: Herbert Xu --- include/crypto/hash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 6f6b9de12cd3..ed63b904837d 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -184,7 +184,7 @@ struct shash_desc { * Worst case is hmac(sha3-224-s390). Its context is a nested 'shash_desc' * containing a 'struct s390_sha_ctx'. */ -#define HASH_MAX_DESCSIZE (sizeof(struct shash_desc) + 360) +#define HASH_MAX_DESCSIZE (sizeof(struct shash_desc) + 361) #define MAX_SYNC_HASH_REQSIZE (sizeof(struct ahash_request) + \ HASH_MAX_DESCSIZE) From 1da33858af6250184d2ef907494d698af03283de Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 31 Jul 2025 21:38:18 +0100 Subject: [PATCH 1338/2411] regmap: irq: Free the regmap-irq mutex We do not currently free the mutex allocated by regmap-irq, do so. Tested-by: Russell King (Oracle) Reviewed-by: Russell King (Oracle) Signed-off-by: Mark Brown Link: https://patch.msgid.link/20250731-regmap-irq-nesting-v1-1-98b4d1bf20f0@kernel.org Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-irq.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index d1585f073776..4aac12d38215 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -816,7 +816,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, d->mask_buf[i], chip->irq_drv_data); if (ret) - goto err_alloc; + goto err_mutex; } if (chip->mask_base && !chip->handle_mask_sync) { @@ -827,7 +827,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, if (ret) { dev_err(map->dev, "Failed to set masks in 0x%x: %d\n", reg, ret); - goto err_alloc; + goto err_mutex; } } @@ -838,7 +838,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, if (ret) { dev_err(map->dev, "Failed to set masks in 0x%x: %d\n", reg, ret); - goto err_alloc; + goto err_mutex; } } @@ -855,7 +855,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, if (ret != 0) { dev_err(map->dev, "Failed to read IRQ status: %d\n", ret); - goto err_alloc; + goto err_mutex; } } @@ -879,7 +879,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, if (ret != 0) { dev_err(map->dev, "Failed to ack 0x%x: %d\n", reg, ret); - goto err_alloc; + goto err_mutex; } } } @@ -901,7 +901,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, if (ret != 0) { dev_err(map->dev, "Failed to set masks in 0x%x: %d\n", reg, ret); - goto err_alloc; + goto err_mutex; } } } @@ -910,7 +910,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, if (chip->status_is_level) { ret = read_irq_data(d); if (ret < 0) - goto err_alloc; + goto err_mutex; memcpy(d->prev_status_buf, d->status_buf, array_size(d->chip->num_regs, sizeof(d->prev_status_buf[0]))); @@ -918,7 +918,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, ret = regmap_irq_create_domain(fwnode, irq_base, chip, d); if (ret) - goto err_alloc; + goto err_mutex; ret = request_threaded_irq(irq, NULL, regmap_irq_thread, irq_flags | IRQF_ONESHOT, @@ -935,6 +935,8 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, err_domain: /* Should really dispose of the domain but... */ +err_mutex: + mutex_destroy(&d->lock); err_alloc: kfree(d->type_buf); kfree(d->type_buf_def); @@ -1027,6 +1029,7 @@ void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *d) kfree(d->config_buf[i]); kfree(d->config_buf); } + mutex_destroy(&d->lock); kfree(d); } EXPORT_SYMBOL_GPL(regmap_del_irq_chip); From 76b6e14aa7b081337d118a82397d919b5e072bb4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 31 Jul 2025 21:38:19 +0100 Subject: [PATCH 1339/2411] regmap: irq: Avoid lockdep warnings with nested regmap-irq chips While handling interrupts through regmap-irq we use a mutex to protect the updates we are caching while genirq runs in atomic context. Russell King reported that while running on the nVidia Jetson Xavier NX this generates lockdep warnings since that platform has a regmap-irq for the max77686 RTC which is a child of a max77620 which also uses regmap-irq. [ 46.723127] rtcwake/3984 is trying to acquire lock: [ 46.723235] ffff0000813b2c68 (&d->lock){+.+.}-{4:4}, at: regmap_irq_lock+0x18/0x24 [ 46.723452] but task is already holding lock: [ 46.723556] ffff00008504dc68 (&d->lock){+.+.}-{4:4}, at: regmap_irq_lock+0x18/0x24 This happens because by default lockdep uses a single lockdep class for all mutexes initialised from a single mutex_init() call and is unable to tell that two distinct mutex are being taken and verify that the ordering of operations is safe. This should be a very rare situation since normally anything using regmap-irq will be a leaf interrupt controller due to being on a slow bus like I2C. We can avoid these warnings by providing the lockdep key for the regmap-irq explicitly, allocating one for each chip so that lockdep can distinguish between them. Thanks to Russell for the report and analysis. Reported-by: Russell King (Oracle) Tested-by: Russell King (Oracle) Reviewed-by: Russell King (Oracle) Signed-off-by: Mark Brown Link: https://patch.msgid.link/20250731-regmap-irq-nesting-v1-2-98b4d1bf20f0@kernel.org Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-irq.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 4aac12d38215..6112d942499b 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -21,6 +21,7 @@ struct regmap_irq_chip_data { struct mutex lock; + struct lock_class_key lock_key; struct irq_chip irq_chip; struct regmap *map; @@ -801,7 +802,13 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, goto err_alloc; } - mutex_init(&d->lock); + /* + * If one regmap-irq is the parent of another then we'll try + * to lock the child with the parent locked, use an explicit + * lock_key so lockdep can figure out what's going on. + */ + lockdep_register_key(&d->lock_key); + mutex_init_with_key(&d->lock, &d->lock_key); for (i = 0; i < chip->num_irqs; i++) d->mask_buf_def[chip->irqs[i].reg_offset / map->reg_stride] @@ -937,6 +944,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, /* Should really dispose of the domain but... */ err_mutex: mutex_destroy(&d->lock); + lockdep_unregister_key(&d->lock_key); err_alloc: kfree(d->type_buf); kfree(d->type_buf_def); @@ -1030,6 +1038,7 @@ void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *d) kfree(d->config_buf); } mutex_destroy(&d->lock); + lockdep_unregister_key(&d->lock_key); kfree(d); } EXPORT_SYMBOL_GPL(regmap_del_irq_chip); From 89a216ed973e49d6f39a6976bcead3b631171b64 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 9 Jul 2025 16:55:31 -0400 Subject: [PATCH 1340/2411] virtio: fix comments, readability Fix a couple of comments to match reality. Initialize config_driver_disabled to be consistent with other fields (note: the structure is already zero initialized, so this is not a bugfix as such). Signed-off-by: Michael S. Tsirkin Message-Id: <7b74a55a5f3dc066d954472f5b68c29022f11b43.1752094439.git.mst@redhat.com> Acked-by: Jason Wang --- drivers/virtio/virtio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 95d5d7993e5b..c441c8cc71ef 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -147,7 +147,7 @@ EXPORT_SYMBOL_GPL(virtio_config_changed); /** * virtio_config_driver_disable - disable config change reporting by drivers - * @dev: the device to reset + * @dev: the device to disable * * This is only allowed to be called by a driver and disabling can't * be nested. @@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(virtio_config_driver_disable); /** * virtio_config_driver_enable - enable config change reporting by drivers - * @dev: the device to reset + * @dev: the device to enable * * This is only allowed to be called by a driver and enabling can't * be nested. @@ -530,6 +530,7 @@ int register_virtio_device(struct virtio_device *dev) goto out_ida_remove; spin_lock_init(&dev->config_lock); + dev->config_driver_disabled = false; dev->config_core_enabled = false; dev->config_change_pending = false; From 2507789a724d607fa9e162dcadeb9f51b071fc49 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 7 May 2025 10:28:21 +0200 Subject: [PATCH 1341/2411] drm/virtio: implement virtio_gpu_shutdown Calling drm_dev_unplug() is the drm way to say the device is gone and can not be accessed any more. Cc: Michael S. Tsirkin Signed-off-by: Gerd Hoffmann Reviewed-by: Eric Auger Tested-by: Eric Auger Message-Id: <20250507082821.2710706-1-kraxel@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/gpu/drm/virtio/virtgpu_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index e32e680c7197..71c6ccad4b99 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -130,10 +130,10 @@ static void virtio_gpu_remove(struct virtio_device *vdev) static void virtio_gpu_shutdown(struct virtio_device *vdev) { - /* - * drm does its own synchronization on shutdown. - * Do nothing here, opt out of device reset. - */ + struct drm_device *dev = vdev->priv; + + /* stop talking to the device */ + drm_dev_unplug(dev); } static void virtio_gpu_config_changed(struct virtio_device *vdev) From 482bd84f1fab20ac6c4d112945ae2d1bdb36839f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 27 May 2025 10:26:29 -0400 Subject: [PATCH 1342/2411] virtio: document ENOSPC drivers handle ENOSPC specially since it's an error one can get from a working VQ. Document the semantics. Message-Id: <2e6ec46b8d5e6755be291cec8e2ec57ef286e97b.1748356035.git.mst@redhat.com> Reported-by: Parav Pandit Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Reviewed-by: Parav Pandit --- drivers/virtio/virtio_ring.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 4397392bfef0..f5062061c408 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2296,6 +2296,10 @@ static inline int virtqueue_add(struct virtqueue *_vq, * at the same time (except where noted). * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + * + * NB: ENOSPC is a special code that is only returned on an attempt to add a + * buffer to a full VQ. It indicates that some buffers are outstanding and that + * the operation can be retried after some buffers have been used. */ int virtqueue_add_sgs(struct virtqueue *_vq, struct scatterlist *sgs[], From 564a69ad90d15c782176e1a8c9e1c95661e1aed0 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 21 May 2025 17:03:46 +0530 Subject: [PATCH 1343/2411] virtio-mmio: Remove virtqueue list from mmio device The MMIO transport implementation creates a list of virtqueues for a virtio device, while the same is already available in the struct virtio_device. Don't create a duplicate list, and use the other one instead. While at it, fix the virtio_device_for_each_vq() macro to accept an argument like "&vm_dev->vdev" (which currently fails to build). Signed-off-by: Viresh Kumar Message-Id: <3e56c6f74002987e22f364d883cbad177cd9ad9c.1747827066.git.viresh.kumar@linaro.org> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/virtio/virtio_mmio.c | 52 +++--------------------------------- include/linux/virtio.h | 2 +- 2 files changed, 4 insertions(+), 50 deletions(-) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 5d78c2d572ab..b152a1eca05a 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -65,7 +65,6 @@ #include #include #include -#include #include #include #include @@ -88,22 +87,8 @@ struct virtio_mmio_device { void __iomem *base; unsigned long version; - - /* a list of queues so we can dispatch IRQs */ - spinlock_t lock; - struct list_head virtqueues; }; -struct virtio_mmio_vq_info { - /* the actual virtqueue */ - struct virtqueue *vq; - - /* the list node for the virtqueues list */ - struct list_head node; -}; - - - /* Configuration interface */ static u64 vm_get_features(struct virtio_device *vdev) @@ -300,9 +285,8 @@ static bool vm_notify_with_data(struct virtqueue *vq) static irqreturn_t vm_interrupt(int irq, void *opaque) { struct virtio_mmio_device *vm_dev = opaque; - struct virtio_mmio_vq_info *info; + struct virtqueue *vq; unsigned long status; - unsigned long flags; irqreturn_t ret = IRQ_NONE; /* Read and acknowledge interrupts */ @@ -315,10 +299,8 @@ static irqreturn_t vm_interrupt(int irq, void *opaque) } if (likely(status & VIRTIO_MMIO_INT_VRING)) { - spin_lock_irqsave(&vm_dev->lock, flags); - list_for_each_entry(info, &vm_dev->virtqueues, node) - ret |= vring_interrupt(irq, info->vq); - spin_unlock_irqrestore(&vm_dev->lock, flags); + virtio_device_for_each_vq(&vm_dev->vdev, vq) + ret |= vring_interrupt(irq, vq); } return ret; @@ -329,14 +311,8 @@ static irqreturn_t vm_interrupt(int irq, void *opaque) static void vm_del_vq(struct virtqueue *vq) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); - struct virtio_mmio_vq_info *info = vq->priv; - unsigned long flags; unsigned int index = vq->index; - spin_lock_irqsave(&vm_dev->lock, flags); - list_del(&info->node); - spin_unlock_irqrestore(&vm_dev->lock, flags); - /* Select and deactivate the queue */ writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL); if (vm_dev->version == 1) { @@ -347,8 +323,6 @@ static void vm_del_vq(struct virtqueue *vq) } vring_del_virtqueue(vq); - - kfree(info); } static void vm_del_vqs(struct virtio_device *vdev) @@ -375,9 +349,7 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int in { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); bool (*notify)(struct virtqueue *vq); - struct virtio_mmio_vq_info *info; struct virtqueue *vq; - unsigned long flags; unsigned int num; int err; @@ -399,13 +371,6 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int in goto error_available; } - /* Allocate and fill out our active queue description */ - info = kmalloc(sizeof(*info), GFP_KERNEL); - if (!info) { - err = -ENOMEM; - goto error_kmalloc; - } - num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX); if (num == 0) { err = -ENOENT; @@ -463,13 +428,6 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int in writel(1, vm_dev->base + VIRTIO_MMIO_QUEUE_READY); } - vq->priv = info; - info->vq = vq; - - spin_lock_irqsave(&vm_dev->lock, flags); - list_add(&info->node, &vm_dev->virtqueues); - spin_unlock_irqrestore(&vm_dev->lock, flags); - return vq; error_bad_pfn: @@ -481,8 +439,6 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int in writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_READY); WARN_ON(readl(vm_dev->base + VIRTIO_MMIO_QUEUE_READY)); } - kfree(info); -error_kmalloc: error_available: return ERR_PTR(err); } @@ -627,8 +583,6 @@ static int virtio_mmio_probe(struct platform_device *pdev) vm_dev->vdev.dev.release = virtio_mmio_release_dev; vm_dev->vdev.config = &virtio_mmio_config_ops; vm_dev->pdev = pdev; - INIT_LIST_HEAD(&vm_dev->virtqueues); - spin_lock_init(&vm_dev->lock); vm_dev->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(vm_dev->base)) { diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 64cb4b04be7a..8b745ce0cf5f 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -196,7 +196,7 @@ int virtio_device_reset_done(struct virtio_device *dev); size_t virtio_max_dma_size(const struct virtio_device *vdev); #define virtio_device_for_each_vq(vdev, vq) \ - list_for_each_entry(vq, &vdev->vqs, list) + list_for_each_entry(vq, &(vdev)->vqs, list) /** * struct virtio_driver - operations for a virtio I/O driver From 4d0efa600ecf30aa61c14681164290f75c328f8a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 29 May 2025 13:00:27 +0530 Subject: [PATCH 1344/2411] virtio-vdpa: Remove virtqueue list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The virtio vdpa implementation creates a list of virtqueues, while the same is already available in the struct virtio_device. This list is never traversed though, and only the pointer to the struct virtio_vdpa_vq_info is used in the callback, where the virtqueue pointer could be directly used. Remove the unwanted code to simplify the driver. Signed-off-by: Viresh Kumar Message-Id: <7808f2f7e484987b95f172fffb6c71a5da20ed1e.1748503784.git.viresh.kumar@linaro.org> Signed-off-by: Michael S. Tsirkin Acked-by: Eugenio Pérez --- drivers/virtio/virtio_vdpa.c | 44 +++--------------------------------- 1 file changed, 3 insertions(+), 41 deletions(-) diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 1f60c9d5cb18..e25610e3393a 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -28,19 +28,6 @@ struct virtio_vdpa_device { struct virtio_device vdev; struct vdpa_device *vdpa; u64 features; - - /* The lock to protect virtqueue list */ - spinlock_t lock; - /* List of virtio_vdpa_vq_info */ - struct list_head virtqueues; -}; - -struct virtio_vdpa_vq_info { - /* the actual virtqueue */ - struct virtqueue *vq; - - /* the list node for the virtqueues list */ - struct list_head node; }; static inline struct virtio_vdpa_device * @@ -135,9 +122,9 @@ static irqreturn_t virtio_vdpa_config_cb(void *private) static irqreturn_t virtio_vdpa_virtqueue_cb(void *private) { - struct virtio_vdpa_vq_info *info = private; + struct virtqueue *vq = private; - return vring_interrupt(0, info->vq); + return vring_interrupt(0, vq); } static struct virtqueue * @@ -145,18 +132,15 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, void (*callback)(struct virtqueue *vq), const char *name, bool ctx) { - struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev); struct vdpa_device *vdpa = vd_get_vdpa(vdev); struct device *dma_dev; const struct vdpa_config_ops *ops = vdpa->config; - struct virtio_vdpa_vq_info *info; bool (*notify)(struct virtqueue *vq) = virtio_vdpa_notify; struct vdpa_callback cb; struct virtqueue *vq; u64 desc_addr, driver_addr, device_addr; /* Assume split virtqueue, switch to packed if necessary */ struct vdpa_vq_state state = {0}; - unsigned long flags; u32 align, max_num, min_num = 1; bool may_reduce_num = true; int err; @@ -179,10 +163,6 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, if (ops->get_vq_ready(vdpa, index)) return ERR_PTR(-ENOENT); - /* Allocate and fill out our active queue description */ - info = kmalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return ERR_PTR(-ENOMEM); if (ops->get_vq_size) max_num = ops->get_vq_size(vdpa, index); else @@ -217,7 +197,7 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, /* Setup virtqueue callback */ cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL; - cb.private = info; + cb.private = vq; cb.trigger = NULL; ops->set_vq_cb(vdpa, index, &cb); ops->set_vq_num(vdpa, index, virtqueue_get_vring_size(vq)); @@ -248,13 +228,6 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, ops->set_vq_ready(vdpa, index, 1); - vq->priv = info; - info->vq = vq; - - spin_lock_irqsave(&vd_dev->lock, flags); - list_add(&info->node, &vd_dev->virtqueues); - spin_unlock_irqrestore(&vd_dev->lock, flags); - return vq; err_vq: @@ -263,7 +236,6 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, ops->set_vq_ready(vdpa, index, 0); /* VDPA driver should make sure vq is stopeed here */ WARN_ON(ops->get_vq_ready(vdpa, index)); - kfree(info); return ERR_PTR(err); } @@ -272,20 +244,12 @@ static void virtio_vdpa_del_vq(struct virtqueue *vq) struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vq->vdev); struct vdpa_device *vdpa = vd_dev->vdpa; const struct vdpa_config_ops *ops = vdpa->config; - struct virtio_vdpa_vq_info *info = vq->priv; unsigned int index = vq->index; - unsigned long flags; - - spin_lock_irqsave(&vd_dev->lock, flags); - list_del(&info->node); - spin_unlock_irqrestore(&vd_dev->lock, flags); /* Select and deactivate the queue (best effort) */ ops->set_vq_ready(vdpa, index, 0); vring_del_virtqueue(vq); - - kfree(info); } static void virtio_vdpa_del_vqs(struct virtio_device *vdev) @@ -501,8 +465,6 @@ static int virtio_vdpa_probe(struct vdpa_device *vdpa) vd_dev->vdev.dev.release = virtio_vdpa_release_dev; vd_dev->vdev.config = &virtio_vdpa_config_ops; vd_dev->vdpa = vdpa; - INIT_LIST_HEAD(&vd_dev->virtqueues); - spin_lock_init(&vd_dev->lock); vd_dev->vdev.id.device = ops->get_device_id(vdpa); if (vd_dev->vdev.id.device == 0) From c0883c1af14c5d351201ace00b1a46df2b157329 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Thu, 29 May 2025 01:42:39 -0700 Subject: [PATCH 1345/2411] virtio: Fix typo in register_virtio_device() doc comment Corrected "suceess" to "success" in the function documentation for clarity. Signed-off-by: Alok Tiwari Acked-by: Jason Wang Message-Id: <20250529084350.3145699-1-alok.a.tiwari@oracle.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo --- drivers/virtio/virtio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index c441c8cc71ef..b25eadf59477 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -506,7 +506,7 @@ static int virtio_device_of_init(struct virtio_device *dev) * On error, the caller must call put_device on &@dev->dev (and not kfree), * as another code path may have obtained a reference to @dev. * - * Returns: 0 on suceess, -error on failure + * Returns: 0 on success, -error on failure */ int register_virtio_device(struct virtio_device *dev) { From 32d89a405adc204d82bea6ae2ba27a62d35568b4 Mon Sep 17 00:00:00 2001 From: Pei Xiao Date: Wed, 4 Jun 2025 14:55:48 +0800 Subject: [PATCH 1346/2411] vhost: Use ERR_CAST inlined function instead of ERR_PTR(PTR_ERR(...)) cocci warning: ./kernel/vhost_task.c:148:9-16: WARNING: ERR_CAST can be used with tsk Use ERR_CAST inlined function instead of ERR_PTR(PTR_ERR(...)). Signed-off-by: Pei Xiao Message-Id: <1a8499a5da53e4f72cf21aca044ae4b26db8b2ad.1749020055.git.xiaopei01@kylinos.cn> Signed-off-by: Michael S. Tsirkin --- kernel/vhost_task.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c index 2f844c279a3e..bc738fa90c1d 100644 --- a/kernel/vhost_task.c +++ b/kernel/vhost_task.c @@ -145,7 +145,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *), tsk = copy_process(NULL, 0, NUMA_NO_NODE, &args); if (IS_ERR(tsk)) { kfree(vtsk); - return ERR_PTR(PTR_ERR(tsk)); + return ERR_CAST(tsk); } vtsk->task = tsk; From 6f0f3d7fc4e05797b801ded4910a64d16db230e9 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Wed, 4 Jun 2025 21:48:01 +0300 Subject: [PATCH 1347/2411] vdpa/mlx5: Fix needs_teardown flag calculation needs_teardown is a device flag that indicates when virtual queues need to be recreated. This happens for certain configuration changes: queue size and some specific features. Currently, the needs_teardown state can be incorrectly reset by subsequent .set_vq_num() calls. For example, for 1 rx VQ with size 512 and 1 tx VQ with size 256: .set_vq_num(0, 512) -> sets needs_teardown to true (rx queue has a non-default size) .set_vq_num(1, 256) -> sets needs_teardown to false (tx queue has a default size) This change takes into account the previous value of the needs_teardown flag when re-calculating it during VQ size configuration. Fixes: 0fe963d6fc16 ("vdpa/mlx5: Re-create HW VQs under certain conditions") Signed-off-by: Dragos Tatulea Reviewed-by: Shahar Shitrit Reviewed-by: Si-Wei Liu Tested-by: Si-Wei Liu Message-Id: <20250604184802.2625300-1-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index cccc49a08a1a..efb5fa694f1e 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2491,7 +2491,7 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) } mvq = &ndev->vqs[idx]; - ndev->needs_teardown = num != mvq->num_ent; + ndev->needs_teardown |= num != mvq->num_ent; mvq->num_ent = num; } From 652abad08571a067b16c5bb47ad5ea7478517e7d Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Wed, 11 Jun 2025 07:39:21 -0700 Subject: [PATCH 1348/2411] vhost-scsi: Fix typos and formatting in comments and logs This patch corrects several minor typos and formatting issues. Changes include: Fixing misspellings like in comments - "explict" -> "explicit" - "infight" -> "inflight", - "with generate" -> "will generate" formatting in logs - Correcting log formatting specifier from "%dd" to "%d" - Adding a missing space in the sysfs emit string to prevent misinterpreted output like "X86_64on ". changing to "X86_64 on " - Cleaning up stray semicolons in struct definition endings These changes improve code readability and consistency. no functionality changes. Signed-off-by: Alok Tiwari Message-Id: <20250611143932.2443796-1-alok.a.tiwari@oracle.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Reviewed-by: Mike Christie --- drivers/vhost/scsi.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index c12a0d4e6386..508ff3b29f39 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -152,7 +152,7 @@ struct vhost_scsi_nexus { struct vhost_scsi_tpg { /* Vhost port target portal group tag for TCM */ u16 tport_tpgt; - /* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus shutdown */ + /* Used to track number of TPG Port/Lun Links wrt to explicit I_T Nexus shutdown */ int tv_tpg_port_count; /* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */ int tv_tpg_vhost_count; @@ -311,12 +311,12 @@ static void vhost_scsi_init_inflight(struct vhost_scsi *vs, mutex_lock(&vq->mutex); - /* store old infight */ + /* store old inflight */ idx = vs->vqs[i].inflight_idx; if (old_inflight) old_inflight[i] = &vs->vqs[i].inflights[idx]; - /* setup new infight */ + /* setup new inflight */ vs->vqs[i].inflight_idx = idx ^ 1; new_inflight = &vs->vqs[i].inflights[idx ^ 1]; kref_init(&new_inflight->kref); @@ -1249,7 +1249,7 @@ vhost_scsi_setup_resp_iovs(struct vhost_scsi_cmd *cmd, struct iovec *in_iovs, if (!in_iovs_cnt) return 0; /* - * Initiator's normally just put the virtio_scsi_cmd_resp in the first + * Initiators normally just put the virtio_scsi_cmd_resp in the first * iov, but just in case they wedged in some data with it we check for * greater than or equal to the response struct. */ @@ -1457,7 +1457,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) cmd = vhost_scsi_get_cmd(vq, tag); if (IS_ERR(cmd)) { ret = PTR_ERR(cmd); - vq_err(vq, "vhost_scsi_get_tag failed %dd\n", ret); + vq_err(vq, "vhost_scsi_get_tag failed %d\n", ret); goto err; } cmd->tvc_vq = vq; @@ -2609,7 +2609,7 @@ static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg, return -ENOMEM; } /* - * Since we are running in 'demo mode' this call with generate a + * Since we are running in 'demo mode' this call will generate a * struct se_node_acl for the vhost_scsi struct se_portal_group with * the SCSI Initiator port name of the passed configfs group 'name'. */ @@ -2915,7 +2915,7 @@ static ssize_t vhost_scsi_wwn_version_show(struct config_item *item, char *page) { return sysfs_emit(page, "TCM_VHOST fabric module %s on %s/%s" - "on "UTS_RELEASE"\n", VHOST_SCSI_VERSION, utsname()->sysname, + " on "UTS_RELEASE"\n", VHOST_SCSI_VERSION, utsname()->sysname, utsname()->machine); } @@ -2983,13 +2983,13 @@ static int __init vhost_scsi_init(void) vhost_scsi_deregister(); out: return ret; -}; +} static void vhost_scsi_exit(void) { target_unregister_template(&vhost_scsi_ops); vhost_scsi_deregister(); -}; +} MODULE_DESCRIPTION("VHOST_SCSI series fabric driver"); MODULE_ALIAS("tcm_vhost"); From 69cd720a8a5e9ef0f05ce5dd8c9ea6e018245c82 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 11 Jun 2025 16:01:13 -0500 Subject: [PATCH 1349/2411] vhost-scsi: Fix log flooding with target does not exist errors As part of the normal initiator side scanning the guest's scsi layer will loop over all possible targets and send an inquiry. Since the max number of targets for virtio-scsi is 256, this can result in 255 error messages about targets not existing if you only have a single target. When there's more than 1 vhost-scsi device each with a single target, then you get N * 255 log messages. It looks like the log message was added by accident in: commit 3f8ca2e115e5 ("vhost/scsi: Extract common handling code from control queue handler") when we added common helpers. Then in: commit 09d7583294aa ("vhost/scsi: Use common handling code in request queue handler") we converted the scsi command processing path to use the new helpers so we started to see the extra log messages during scanning. The patches were just making some code common but added the vq_err call and I'm guessing the patch author forgot to enable the vq_err call (vq_err is implemented by pr_debug which defaults to off). So this patch removes the call since it's expected to hit this path during device discovery. Fixes: 09d7583294aa ("vhost/scsi: Use common handling code in request queue handler") Signed-off-by: Mike Christie Reviewed-by: Stefan Hajnoczi Reviewed-by: Stefano Garzarella Message-Id: <20250611210113.10912-1-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 508ff3b29f39..fd9a517dfe13 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1226,10 +1226,8 @@ vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc, /* validated at handler entry */ vs_tpg = vhost_vq_get_backend(vq); tpg = READ_ONCE(vs_tpg[*vc->target]); - if (unlikely(!tpg)) { - vq_err(vq, "Target 0x%x does not exist\n", *vc->target); + if (unlikely(!tpg)) goto out; - } } if (tpgp) From 569c392e191361cd05fba1fd87ed02ef0d130ef7 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 17 Jun 2025 01:18:36 +0100 Subject: [PATCH 1350/2411] vhost: vringh: Remove unused iotlb functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The functions: vringh_abandon_iotlb() vringh_notify_disable_iotlb() and vringh_notify_enable_iotlb() were added in 2020 by commit 9ad9c49cfe97 ("vringh: IOTLB support") but have remained unused. Remove them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Simon Horman Message-Id: <20250617001838.114457-2-linux@treblig.org> Signed-off-by: Michael S. Tsirkin Acked-by: Eugenio Pérez Tested-by: Lei Yang --- drivers/vhost/vringh.c | 43 ------------------------------------------ include/linux/vringh.h | 5 ----- 2 files changed, 48 deletions(-) diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index bbce65452701..67a028d6fb5f 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -1534,23 +1534,6 @@ ssize_t vringh_iov_push_iotlb(struct vringh *vrh, } EXPORT_SYMBOL(vringh_iov_push_iotlb); -/** - * vringh_abandon_iotlb - we've decided not to handle the descriptor(s). - * @vrh: the vring. - * @num: the number of descriptors to put back (ie. num - * vringh_get_iotlb() to undo). - * - * The next vringh_get_iotlb() will return the old descriptor(s) again. - */ -void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num) -{ - /* We only update vring_avail_event(vr) when we want to be notified, - * so we haven't changed that yet. - */ - vrh->last_avail_idx -= num; -} -EXPORT_SYMBOL(vringh_abandon_iotlb); - /** * vringh_complete_iotlb - we've finished with descriptor, publish it. * @vrh: the vring. @@ -1571,32 +1554,6 @@ int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len) } EXPORT_SYMBOL(vringh_complete_iotlb); -/** - * vringh_notify_enable_iotlb - we want to know if something changes. - * @vrh: the vring. - * - * This always enables notifications, but returns false if there are - * now more buffers available in the vring. - */ -bool vringh_notify_enable_iotlb(struct vringh *vrh) -{ - return __vringh_notify_enable(vrh, getu16_iotlb, putu16_iotlb); -} -EXPORT_SYMBOL(vringh_notify_enable_iotlb); - -/** - * vringh_notify_disable_iotlb - don't tell us if something changes. - * @vrh: the vring. - * - * This is our normal running state: we disable and then only enable when - * we're going to sleep. - */ -void vringh_notify_disable_iotlb(struct vringh *vrh) -{ - __vringh_notify_disable(vrh, putu16_iotlb); -} -EXPORT_SYMBOL(vringh_notify_disable_iotlb); - /** * vringh_need_notify_iotlb - must we tell the other side about used buffers? * @vrh: the vring we've called vringh_complete_iotlb() on. diff --git a/include/linux/vringh.h b/include/linux/vringh.h index c3a8117dabe8..af8bd2695a7b 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -319,13 +319,8 @@ ssize_t vringh_iov_push_iotlb(struct vringh *vrh, struct vringh_kiov *wiov, const void *src, size_t len); -void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num); - int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len); -bool vringh_notify_enable_iotlb(struct vringh *vrh); -void vringh_notify_disable_iotlb(struct vringh *vrh); - int vringh_need_notify_iotlb(struct vringh *vrh); #endif /* CONFIG_VHOST_IOTLB */ From 6e9ef6937c726b97d4a6d49332d06e999acc15f5 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 17 Jun 2025 01:18:37 +0100 Subject: [PATCH 1351/2411] vhost: vringh: Remove unused functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The functions: vringh_abandon_kern() vringh_abandon_user() vringh_iov_pull_kern() and vringh_iov_push_kern() were all added in 2013 by commit f87d0fbb5798 ("vringh: host-side implementation of virtio rings.") but have remained unused. Remove them and the two helper functions they used. Signed-off-by: Dr. David Alan Gilbert Message-Id: <20250617001838.114457-3-linux@treblig.org> Signed-off-by: Michael S. Tsirkin Acked-by: Eugenio Pérez Tested-by: Lei Yang Reviewed-by: Simon Horman --- drivers/vhost/vringh.c | 75 ------------------------------------------ include/linux/vringh.h | 7 ---- 2 files changed, 82 deletions(-) diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index 67a028d6fb5f..9f27c3f6091b 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -779,22 +779,6 @@ ssize_t vringh_iov_push_user(struct vringh_iov *wiov, } EXPORT_SYMBOL(vringh_iov_push_user); -/** - * vringh_abandon_user - we've decided not to handle the descriptor(s). - * @vrh: the vring. - * @num: the number of descriptors to put back (ie. num - * vringh_get_user() to undo). - * - * The next vringh_get_user() will return the old descriptor(s) again. - */ -void vringh_abandon_user(struct vringh *vrh, unsigned int num) -{ - /* We only update vring_avail_event(vr) when we want to be notified, - * so we haven't changed that yet. */ - vrh->last_avail_idx -= num; -} -EXPORT_SYMBOL(vringh_abandon_user); - /** * vringh_complete_user - we've finished with descriptor, publish it. * @vrh: the vring. @@ -900,20 +884,6 @@ static inline int putused_kern(const struct vringh *vrh, return 0; } -static inline int xfer_kern(const struct vringh *vrh, void *src, - void *dst, size_t len) -{ - memcpy(dst, src, len); - return 0; -} - -static inline int kern_xfer(const struct vringh *vrh, void *dst, - void *src, size_t len) -{ - memcpy(dst, src, len); - return 0; -} - /** * vringh_init_kern - initialize a vringh for a kernelspace vring. * @vrh: the vringh to initialize. @@ -998,51 +968,6 @@ int vringh_getdesc_kern(struct vringh *vrh, } EXPORT_SYMBOL(vringh_getdesc_kern); -/** - * vringh_iov_pull_kern - copy bytes from vring_iov. - * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume) - * @dst: the place to copy. - * @len: the maximum length to copy. - * - * Returns the bytes copied <= len or a negative errno. - */ -ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len) -{ - return vringh_iov_xfer(NULL, riov, dst, len, xfer_kern); -} -EXPORT_SYMBOL(vringh_iov_pull_kern); - -/** - * vringh_iov_push_kern - copy bytes into vring_iov. - * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume) - * @src: the place to copy from. - * @len: the maximum length to copy. - * - * Returns the bytes copied <= len or a negative errno. - */ -ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov, - const void *src, size_t len) -{ - return vringh_iov_xfer(NULL, wiov, (void *)src, len, kern_xfer); -} -EXPORT_SYMBOL(vringh_iov_push_kern); - -/** - * vringh_abandon_kern - we've decided not to handle the descriptor(s). - * @vrh: the vring. - * @num: the number of descriptors to put back (ie. num - * vringh_get_kern() to undo). - * - * The next vringh_get_kern() will return the old descriptor(s) again. - */ -void vringh_abandon_kern(struct vringh *vrh, unsigned int num) -{ - /* We only update vring_avail_event(vr) when we want to be notified, - * so we haven't changed that yet. */ - vrh->last_avail_idx -= num; -} -EXPORT_SYMBOL(vringh_abandon_kern); - /** * vringh_complete_kern - we've finished with descriptor, publish it. * @vrh: the vring. diff --git a/include/linux/vringh.h b/include/linux/vringh.h index af8bd2695a7b..49e7cbc9697a 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -175,9 +175,6 @@ int vringh_complete_multi_user(struct vringh *vrh, const struct vring_used_elem used[], unsigned num_used); -/* Pretend we've never seen descriptor (for easy error handling). */ -void vringh_abandon_user(struct vringh *vrh, unsigned int num); - /* Do we need to fire the eventfd to notify the other side? */ int vringh_need_notify_user(struct vringh *vrh); @@ -235,10 +232,6 @@ int vringh_getdesc_kern(struct vringh *vrh, u16 *head, gfp_t gfp); -ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len); -ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov, - const void *src, size_t len); -void vringh_abandon_kern(struct vringh *vrh, unsigned int num); int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len); bool vringh_notify_enable_kern(struct vringh *vrh); From 8a0d18a9348f61c63b33a23b9eece1f66af1d70c Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Sun, 15 Jun 2025 10:39:11 -0700 Subject: [PATCH 1352/2411] vhost: Fix typos Fix multiple typos and improve comment clarity across vhost.c. Spelling errors: "thead" -> "thread", "RUNNUNG" -> "RUNNING" and "available". Signed-off-by: Alok Tiwari Message-Id: <20250615173933.1610324-1-alok.a.tiwari@oracle.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Simon Horman --- drivers/vhost/vhost.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 3a5ebb973dba..4390e3a14218 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -594,10 +594,10 @@ static void vhost_attach_mm(struct vhost_dev *dev) if (dev->use_worker) { dev->mm = get_task_mm(current); } else { - /* vDPA device does not use worker thead, so there's - * no need to hold the address space for mm. This help + /* vDPA device does not use worker thread, so there's + * no need to hold the address space for mm. This helps * to avoid deadlock in the case of mmap() which may - * held the refcnt of the file and depends on release + * hold the refcnt of the file and depends on release * method to remove vma. */ dev->mm = current->mm; @@ -731,7 +731,7 @@ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq, * We don't want to call synchronize_rcu for every vq during setup * because it will slow down VM startup. If we haven't done * VHOST_SET_VRING_KICK and not done the driver specific - * SET_ENDPOINT/RUNNUNG then we can skip the sync since there will + * SET_ENDPOINT/RUNNING then we can skip the sync since there will * not be any works queued for scsi and net. */ mutex_lock(&vq->mutex); @@ -2860,7 +2860,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev, } EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n); -/* return true if we're sure that avaiable ring is empty */ +/* return true if we're sure that available ring is empty */ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) { int r; From 95109b46764665e3de4a118185e4f732e8e849fd Mon Sep 17 00:00:00 2001 From: WangYuli Date: Mon, 23 Jun 2025 14:52:10 +0800 Subject: [PATCH 1353/2411] virtio: virtio_dma_buf: fix missing parameter documentation Add missing parameter documentation for virtio_dma_buf_attach() function to fix kernel-doc warnings: Warning: drivers/virtio/virtio_dma_buf.c:41 function parameter 'dma_buf' not described in 'virtio_dma_buf_attach' Warning: drivers/virtio/virtio_dma_buf.c:41 function parameter 'attach' not described in 'virtio_dma_buf_attach' The function documentation was missing descriptions for both the 'dma_buf' and 'attach' parameters. Add proper parameter documentation following kernel-doc format. Signed-off-by: WangYuli Message-Id: <241C7118259DA110+20250623065210.270237-1-wangyuli@uniontech.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Xuan Zhuo --- drivers/virtio/virtio_dma_buf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_dma_buf.c b/drivers/virtio/virtio_dma_buf.c index 3fe1d03b0645..95c10632f84a 100644 --- a/drivers/virtio/virtio_dma_buf.c +++ b/drivers/virtio/virtio_dma_buf.c @@ -36,6 +36,8 @@ EXPORT_SYMBOL(virtio_dma_buf_export); /** * virtio_dma_buf_attach - mandatory attach callback for virtio dma-bufs + * @dma_buf: [in] buffer to attach + * @attach: [in] attachment structure */ int virtio_dma_buf_attach(struct dma_buf *dma_buf, struct dma_buf_attachment *attach) From 400cad513c78f9af72c5a20f3611c1f1dc71d465 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Sat, 28 Jun 2025 11:33:53 -0700 Subject: [PATCH 1354/2411] vhost-scsi: Fix check for inline_sg_cnt exceeding preallocated limit The condition comparing ret to VHOST_SCSI_PREALLOC_SGLS was incorrect, as ret holds the result of kstrtouint() (typically 0 on success), not the parsed value. Update the check to use cnt, which contains the actual user-provided value. prevents silently accepting values exceeding the maximum inline_sg_cnt. Fixes: bca939d5bcd0 ("vhost-scsi: Dynamically allocate scatterlists") Signed-off-by: Alok Tiwari Reviewed-by: Mike Christie Reviewed-by: Stefan Hajnoczi Message-Id: <20250628183405.3979538-1-alok.a.tiwari@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index fd9a517dfe13..abf51332a5c5 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -71,7 +71,7 @@ static int vhost_scsi_set_inline_sg_cnt(const char *buf, if (ret) return ret; - if (ret > VHOST_SCSI_PREALLOC_SGLS) { + if (cnt > VHOST_SCSI_PREALLOC_SGLS) { pr_err("Max inline_sg_cnt is %u\n", VHOST_SCSI_PREALLOC_SGLS); return -EINVAL; } From cc51a66815999afb7e9cd845968de4fdf07567b7 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 8 Jul 2025 12:04:24 +0000 Subject: [PATCH 1355/2411] vdpa/mlx5: Fix release of uninitialized resources on error path The commit in the fixes tag made sure that mlx5_vdpa_free() is the single entrypoint for removing the vdpa device resources added in mlx5_vdpa_dev_add(), even in the cleanup path of mlx5_vdpa_dev_add(). This means that all functions from mlx5_vdpa_free() should be able to handle uninitialized resources. This was not the case though: mlx5_vdpa_destroy_mr_resources() and mlx5_cmd_cleanup_async_ctx() were not able to do so. This caused the splat below when adding a vdpa device without a MAC address. This patch fixes these remaining issues: - Makes mlx5_vdpa_destroy_mr_resources() return early if called on uninitialized resources. - Moves mlx5_cmd_init_async_ctx() early on during device addition because it can't fail. This means that mlx5_cmd_cleanup_async_ctx() also can't fail. To mirror this, move the call site of mlx5_cmd_cleanup_async_ctx() in mlx5_vdpa_free(). An additional comment was added in mlx5_vdpa_free() to document the expectations of functions called from this context. Splat: mlx5_core 0000:b5:03.2: mlx5_vdpa_dev_add:3950:(pid 2306) warning: No mac address provisioned? ------------[ cut here ]------------ WARNING: CPU: 13 PID: 2306 at kernel/workqueue.c:4207 __flush_work+0x9a/0xb0 [...] Call Trace: ? __try_to_del_timer_sync+0x61/0x90 ? __timer_delete_sync+0x2b/0x40 mlx5_vdpa_destroy_mr_resources+0x1c/0x40 [mlx5_vdpa] mlx5_vdpa_free+0x45/0x160 [mlx5_vdpa] vdpa_release_dev+0x1e/0x50 [vdpa] device_release+0x31/0x90 kobject_cleanup+0x37/0x130 mlx5_vdpa_dev_add+0x327/0x890 [mlx5_vdpa] vdpa_nl_cmd_dev_add_set_doit+0x2c1/0x4d0 [vdpa] genl_family_rcv_msg_doit+0xd8/0x130 genl_family_rcv_msg+0x14b/0x220 ? __pfx_vdpa_nl_cmd_dev_add_set_doit+0x10/0x10 [vdpa] genl_rcv_msg+0x47/0xa0 ? __pfx_genl_rcv_msg+0x10/0x10 netlink_rcv_skb+0x53/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x27b/0x3b0 netlink_sendmsg+0x1f7/0x430 __sys_sendto+0x1fa/0x210 ? ___pte_offset_map+0x17/0x160 ? next_uptodate_folio+0x85/0x2b0 ? percpu_counter_add_batch+0x51/0x90 ? filemap_map_pages+0x515/0x660 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x7b/0x2c0 ? do_read_fault+0x108/0x220 ? do_pte_missing+0x14a/0x3e0 ? __handle_mm_fault+0x321/0x730 ? count_memcg_events+0x13f/0x180 ? handle_mm_fault+0x1fb/0x2d0 ? do_user_addr_fault+0x20c/0x700 ? syscall_exit_work+0x104/0x140 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f0c25b0feca [...] ---[ end trace 0000000000000000 ]--- Signed-off-by: Dragos Tatulea Fixes: 83e445e64f48 ("vdpa/mlx5: Fix error path during device add") Reported-by: Wenli Quan Closes: https://lore.kernel.org/virtualization/CADZSLS0r78HhZAStBaN1evCSoPqRJU95Lt8AqZNJ6+wwYQ6vPQ@mail.gmail.com/ Reviewed-by: Tariq Toukan Reviewed-by: Cosmin Ratiu Message-Id: <20250708120424.2363354-2-dtatulea@nvidia.com> Tested-by: Wenli Quan Acked-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/core/mr.c | 3 +++ drivers/vdpa/mlx5/net/mlx5_vnet.c | 10 ++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 61424342c096..c7a20278bc3c 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -908,6 +908,9 @@ void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev) { struct mlx5_vdpa_mr_resources *mres = &mvdev->mres; + if (!mres->wq_gc) + return; + atomic_set(&mres->shutdown, 1); flush_delayed_work(&mres->gc_dwork_ent); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index efb5fa694f1e..0ed2fc28e1ce 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -3432,15 +3432,17 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev) ndev = to_mlx5_vdpa_ndev(mvdev); + /* Functions called here should be able to work with + * uninitialized resources. + */ free_fixed_resources(ndev); mlx5_vdpa_clean_mrs(mvdev); mlx5_vdpa_destroy_mr_resources(&ndev->mvdev); - mlx5_cmd_cleanup_async_ctx(&mvdev->async_ctx); - if (!is_zero_ether_addr(ndev->config.mac)) { pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); } + mlx5_cmd_cleanup_async_ctx(&mvdev->async_ctx); mlx5_vdpa_free_resources(&ndev->mvdev); free_irqs(ndev); kfree(ndev->event_cbs); @@ -3888,6 +3890,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, mvdev->actual_features = (device_features & BIT_ULL(VIRTIO_F_VERSION_1)); + mlx5_cmd_init_async_ctx(mdev, &mvdev->async_ctx); + ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL); ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL); if (!ndev->vqs || !ndev->event_cbs) { @@ -3960,8 +3964,6 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, ndev->rqt_size = 1; } - mlx5_cmd_init_async_ctx(mdev, &mvdev->async_ctx); - ndev->mvdev.mlx_features = device_features; mvdev->vdev.dma_dev = &mdev->pdev->dev; err = mlx5_vdpa_alloc_resources(&ndev->mvdev); From d9ea58b5dc6b4b50fbb6a10c73f840e8b10442b7 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 4 Jul 2025 14:53:35 +0200 Subject: [PATCH 1356/2411] vdpa: Fix IDR memory leak in VDUSE module exit Add missing idr_destroy() call in vduse_exit() to properly free the vduse_idr radix tree nodes. Without this, module load/unload cycles leak 576-byte radix tree node allocations, detectable by kmemleak as: unreferenced object (size 576): backtrace: [] radix_tree_node_alloc+0xa0/0xf0 [] idr_get_free+0x128/0x280 The vduse_idr is initialized via DEFINE_IDR() at line 136 and used throughout the VDUSE (vDPA Device in Userspace) driver for device ID management. The fix follows the documented pattern in lib/idr.c and matches the cleanup approach used by other drivers. This leak was discovered through comprehensive module testing with cumulative kmemleak detection across 10 load/unload iterations per module. Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace") Signed-off-by: Anders Roxell Message-Id: <20250704125335.1084649-1-anders.roxell@linaro.org> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_user/vduse_dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 6a9a37351310..04620bb77203 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -2216,6 +2216,7 @@ static void vduse_exit(void) cdev_del(&vduse_ctrl_cdev); unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); class_unregister(&vduse_class); + idr_destroy(&vduse_idr); } module_exit(vduse_exit); From 7d9896e9f6d02d8aa85e63f736871f96c59a5263 Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Mon, 14 Jul 2025 15:12:32 +0800 Subject: [PATCH 1357/2411] vhost: Reintroduce kthread API and add mode selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 6e890c5d5021 ("vhost: use vhost_tasks for worker threads"), the vhost uses vhost_task and operates as a child of the owner thread. This is required for correct CPU usage accounting, especially when using containers. However, this change has caused confusion for some legacy userspace applications, and we didn't notice until it's too late. Unfortunately, it's too late to revert - we now have userspace depending both on old and new behaviour :( To address the issue, reintroduce kthread mode for vhost workers and provide a configuration to select between kthread and task worker. - Add 'fork_owner' parameter to vhost_dev to let users select kthread or task mode. Default mode is task mode(VHOST_FORK_OWNER_TASK). - Reintroduce kthread mode support: * Bring back the original vhost_worker() implementation, and renamed to vhost_run_work_kthread_list(). * Add cgroup support for the kthread * Introduce struct vhost_worker_ops: - Encapsulates create / stop / wake‑up callbacks. - vhost_worker_create() selects the proper ops according to inherit_owner. - Userspace configuration interface: * New IOCTLs: - VHOST_SET_FORK_FROM_OWNER lets userspace select task mode (VHOST_FORK_OWNER_TASK) or kthread mode (VHOST_FORK_OWNER_KTHREAD) - VHOST_GET_FORK_FROM_OWNER reads the current worker mode * Expose module parameter 'fork_from_owner_default' to allow system administrators to configure the default mode for vhost workers * Kconfig option CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL controls whether these IOCTLs and the parameter are available - The VHOST_NEW_WORKER functionality requires fork_owner to be set to true, with validation added to ensure proper configuration This partially reverts or improves upon: commit 6e890c5d5021 ("vhost: use vhost_tasks for worker threads") commit 1cdaafa1b8b4 ("vhost: replace single worker pointer with xarray") Fixes: 6e890c5d5021 ("vhost: use vhost_tasks for worker threads"), Signed-off-by: Cindy Lu Message-Id: <20250714071333.59794-2-lulu@redhat.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Tested-by: Lei Yang --- drivers/vhost/Kconfig | 18 +++ drivers/vhost/vhost.c | 244 ++++++++++++++++++++++++++++++++++--- drivers/vhost/vhost.h | 22 ++++ include/uapi/linux/vhost.h | 29 +++++ 4 files changed, 295 insertions(+), 18 deletions(-) diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 020d4fbb947c..bc0f38574497 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -95,4 +95,22 @@ config VHOST_CROSS_ENDIAN_LEGACY If unsure, say "N". +config VHOST_ENABLE_FORK_OWNER_CONTROL + bool "Enable VHOST_ENABLE_FORK_OWNER_CONTROL" + default y + help + This option enables two IOCTLs: VHOST_SET_FORK_FROM_OWNER and + VHOST_GET_FORK_FROM_OWNER. These allow userspace applications + to modify the vhost worker mode for vhost devices. + + Also expose module parameter 'fork_from_owner_default' to allow users + to configure the default mode for vhost workers. + + By default, `VHOST_ENABLE_FORK_OWNER_CONTROL` is set to `y`, + users can change the worker thread mode as needed. + If this config is disabled (n),the related IOCTLs and parameters will + be unavailable. + + If unsure, say "Y". + endif diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 4390e3a14218..f4c1bc6adeda 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,13 @@ static int max_iotlb_entries = 2048; module_param(max_iotlb_entries, int, 0444); MODULE_PARM_DESC(max_iotlb_entries, "Maximum number of iotlb entries. (default: 2048)"); +static bool fork_from_owner_default = VHOST_FORK_OWNER_TASK; + +#ifdef CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL +module_param(fork_from_owner_default, bool, 0444); +MODULE_PARM_DESC(fork_from_owner_default, + "Set task mode as the default(default: Y)"); +#endif enum { VHOST_MEMORY_F_LOG = 0x1, @@ -242,7 +250,7 @@ static void vhost_worker_queue(struct vhost_worker *worker, * test_and_set_bit() implies a memory barrier. */ llist_add(&work->node, &worker->work_list); - vhost_task_wake(worker->vtsk); + worker->ops->wakeup(worker); } } @@ -388,6 +396,44 @@ static void vhost_vq_reset(struct vhost_dev *dev, __vhost_vq_meta_reset(vq); } +static int vhost_run_work_kthread_list(void *data) +{ + struct vhost_worker *worker = data; + struct vhost_work *work, *work_next; + struct vhost_dev *dev = worker->dev; + struct llist_node *node; + + kthread_use_mm(dev->mm); + + for (;;) { + /* mb paired w/ kthread_stop */ + set_current_state(TASK_INTERRUPTIBLE); + + if (kthread_should_stop()) { + __set_current_state(TASK_RUNNING); + break; + } + node = llist_del_all(&worker->work_list); + if (!node) + schedule(); + + node = llist_reverse_order(node); + /* make sure flag is seen after deletion */ + smp_wmb(); + llist_for_each_entry_safe(work, work_next, node, node) { + clear_bit(VHOST_WORK_QUEUED, &work->flags); + __set_current_state(TASK_RUNNING); + kcov_remote_start_common(worker->kcov_handle); + work->fn(work); + kcov_remote_stop(); + cond_resched(); + } + } + kthread_unuse_mm(dev->mm); + + return 0; +} + static bool vhost_run_work_list(void *data) { struct vhost_worker *worker = data; @@ -552,6 +598,7 @@ void vhost_dev_init(struct vhost_dev *dev, dev->byte_weight = byte_weight; dev->use_worker = use_worker; dev->msg_handler = msg_handler; + dev->fork_owner = fork_from_owner_default; init_waitqueue_head(&dev->wait); INIT_LIST_HEAD(&dev->read_list); INIT_LIST_HEAD(&dev->pending_list); @@ -581,6 +628,46 @@ long vhost_dev_check_owner(struct vhost_dev *dev) } EXPORT_SYMBOL_GPL(vhost_dev_check_owner); +struct vhost_attach_cgroups_struct { + struct vhost_work work; + struct task_struct *owner; + int ret; +}; + +static void vhost_attach_cgroups_work(struct vhost_work *work) +{ + struct vhost_attach_cgroups_struct *s; + + s = container_of(work, struct vhost_attach_cgroups_struct, work); + s->ret = cgroup_attach_task_all(s->owner, current); +} + +static int vhost_attach_task_to_cgroups(struct vhost_worker *worker) +{ + struct vhost_attach_cgroups_struct attach; + int saved_cnt; + + attach.owner = current; + + vhost_work_init(&attach.work, vhost_attach_cgroups_work); + vhost_worker_queue(worker, &attach.work); + + mutex_lock(&worker->mutex); + + /* + * Bypass attachment_cnt check in __vhost_worker_flush: + * Temporarily change it to INT_MAX to bypass the check + */ + saved_cnt = worker->attachment_cnt; + worker->attachment_cnt = INT_MAX; + __vhost_worker_flush(worker); + worker->attachment_cnt = saved_cnt; + + mutex_unlock(&worker->mutex); + + return attach.ret; +} + /* Caller should have device mutex */ bool vhost_dev_has_owner(struct vhost_dev *dev) { @@ -626,7 +713,7 @@ static void vhost_worker_destroy(struct vhost_dev *dev, WARN_ON(!llist_empty(&worker->work_list)); xa_erase(&dev->worker_xa, worker->id); - vhost_task_stop(worker->vtsk); + worker->ops->stop(worker); kfree(worker); } @@ -649,42 +736,115 @@ static void vhost_workers_free(struct vhost_dev *dev) xa_destroy(&dev->worker_xa); } +static void vhost_task_wakeup(struct vhost_worker *worker) +{ + return vhost_task_wake(worker->vtsk); +} + +static void vhost_kthread_wakeup(struct vhost_worker *worker) +{ + wake_up_process(worker->kthread_task); +} + +static void vhost_task_do_stop(struct vhost_worker *worker) +{ + return vhost_task_stop(worker->vtsk); +} + +static void vhost_kthread_do_stop(struct vhost_worker *worker) +{ + kthread_stop(worker->kthread_task); +} + +static int vhost_task_worker_create(struct vhost_worker *worker, + struct vhost_dev *dev, const char *name) +{ + struct vhost_task *vtsk; + u32 id; + int ret; + + vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed, + worker, name); + if (IS_ERR(vtsk)) + return PTR_ERR(vtsk); + + worker->vtsk = vtsk; + vhost_task_start(vtsk); + ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL); + if (ret < 0) { + vhost_task_do_stop(worker); + return ret; + } + worker->id = id; + return 0; +} + +static int vhost_kthread_worker_create(struct vhost_worker *worker, + struct vhost_dev *dev, const char *name) +{ + struct task_struct *task; + u32 id; + int ret; + + task = kthread_create(vhost_run_work_kthread_list, worker, "%s", name); + if (IS_ERR(task)) + return PTR_ERR(task); + + worker->kthread_task = task; + wake_up_process(task); + ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL); + if (ret < 0) + goto stop_worker; + + ret = vhost_attach_task_to_cgroups(worker); + if (ret) + goto stop_worker; + + worker->id = id; + return 0; + +stop_worker: + vhost_kthread_do_stop(worker); + return ret; +} + +static const struct vhost_worker_ops kthread_ops = { + .create = vhost_kthread_worker_create, + .stop = vhost_kthread_do_stop, + .wakeup = vhost_kthread_wakeup, +}; + +static const struct vhost_worker_ops vhost_task_ops = { + .create = vhost_task_worker_create, + .stop = vhost_task_do_stop, + .wakeup = vhost_task_wakeup, +}; + static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) { struct vhost_worker *worker; - struct vhost_task *vtsk; char name[TASK_COMM_LEN]; int ret; - u32 id; + const struct vhost_worker_ops *ops = dev->fork_owner ? &vhost_task_ops : + &kthread_ops; worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT); if (!worker) return NULL; worker->dev = dev; + worker->ops = ops; snprintf(name, sizeof(name), "vhost-%d", current->pid); - vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed, - worker, name); - if (IS_ERR(vtsk)) - goto free_worker; - mutex_init(&worker->mutex); init_llist_head(&worker->work_list); worker->kcov_handle = kcov_common_handle(); - worker->vtsk = vtsk; - - vhost_task_start(vtsk); - - ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL); + ret = ops->create(worker, dev, name); if (ret < 0) - goto stop_worker; - worker->id = id; + goto free_worker; return worker; -stop_worker: - vhost_task_stop(vtsk); free_worker: kfree(worker); return NULL; @@ -865,6 +1025,14 @@ long vhost_worker_ioctl(struct vhost_dev *dev, unsigned int ioctl, switch (ioctl) { /* dev worker ioctls */ case VHOST_NEW_WORKER: + /* + * vhost_tasks will account for worker threads under the parent's + * NPROC value but kthreads do not. To avoid userspace overflowing + * the system with worker threads fork_owner must be true. + */ + if (!dev->fork_owner) + return -EFAULT; + ret = vhost_new_worker(dev, &state); if (!ret && copy_to_user(argp, &state, sizeof(state))) ret = -EFAULT; @@ -982,6 +1150,7 @@ void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_iotlb *umem) vhost_dev_cleanup(dev); + dev->fork_owner = fork_from_owner_default; dev->umem = umem; /* We don't need VQ locks below since vhost_dev_cleanup makes sure * VQs aren't running. @@ -2135,6 +2304,45 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) goto done; } +#ifdef CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL + if (ioctl == VHOST_SET_FORK_FROM_OWNER) { + /* Only allow modification before owner is set */ + if (vhost_dev_has_owner(d)) { + r = -EBUSY; + goto done; + } + u8 fork_owner_val; + + if (get_user(fork_owner_val, (u8 __user *)argp)) { + r = -EFAULT; + goto done; + } + if (fork_owner_val != VHOST_FORK_OWNER_TASK && + fork_owner_val != VHOST_FORK_OWNER_KTHREAD) { + r = -EINVAL; + goto done; + } + d->fork_owner = !!fork_owner_val; + r = 0; + goto done; + } + if (ioctl == VHOST_GET_FORK_FROM_OWNER) { + u8 fork_owner_val = d->fork_owner; + + if (fork_owner_val != VHOST_FORK_OWNER_TASK && + fork_owner_val != VHOST_FORK_OWNER_KTHREAD) { + r = -EINVAL; + goto done; + } + if (put_user(fork_owner_val, (u8 __user *)argp)) { + r = -EFAULT; + goto done; + } + r = 0; + goto done; + } +#endif + /* You must be the owner to do anything else */ r = vhost_dev_check_owner(d); if (r) diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index bb75a292d50c..ab704d84fb34 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -26,7 +26,18 @@ struct vhost_work { unsigned long flags; }; +struct vhost_worker; +struct vhost_dev; + +struct vhost_worker_ops { + int (*create)(struct vhost_worker *worker, struct vhost_dev *dev, + const char *name); + void (*stop)(struct vhost_worker *worker); + void (*wakeup)(struct vhost_worker *worker); +}; + struct vhost_worker { + struct task_struct *kthread_task; struct vhost_task *vtsk; struct vhost_dev *dev; /* Used to serialize device wide flushing with worker swapping. */ @@ -36,6 +47,7 @@ struct vhost_worker { u32 id; int attachment_cnt; bool killed; + const struct vhost_worker_ops *ops; }; /* Poll a file (eventfd or socket) */ @@ -176,6 +188,16 @@ struct vhost_dev { int byte_weight; struct xarray worker_xa; bool use_worker; + /* + * If fork_owner is true we use vhost_tasks to create + * the worker so all settings/limits like cgroups, NPROC, + * scheduler, etc are inherited from the owner. If false, + * we use kthreads and only attach to the same cgroups + * as the owner for compat with older kernels. + * here we use true as default value. + * The default value is set by fork_from_owner_default + */ + bool fork_owner; int (*msg_handler)(struct vhost_dev *dev, u32 asid, struct vhost_iotlb_msg *msg); }; diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index d4b3e2ae1314..e72f2655459e 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -235,4 +235,33 @@ */ #define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \ struct vhost_vring_state) + +/* fork_owner values for vhost */ +#define VHOST_FORK_OWNER_KTHREAD 0 +#define VHOST_FORK_OWNER_TASK 1 + +/** + * VHOST_SET_FORK_FROM_OWNER - Set the fork_owner flag for the vhost device, + * This ioctl must called before VHOST_SET_OWNER. + * Only available when CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y + * + * @param fork_owner: An 8-bit value that determines the vhost thread mode + * + * When fork_owner is set to VHOST_FORK_OWNER_TASK(default value): + * - Vhost will create vhost worker as tasks forked from the owner, + * inheriting all of the owner's attributes. + * + * When fork_owner is set to VHOST_FORK_OWNER_KTHREAD: + * - Vhost will create vhost workers as kernel threads. + */ +#define VHOST_SET_FORK_FROM_OWNER _IOW(VHOST_VIRTIO, 0x83, __u8) + +/** + * VHOST_GET_FORK_OWNER - Get the current fork_owner flag for the vhost device. + * Only available when CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y + * + * @return: An 8-bit value indicating the current thread mode. + */ +#define VHOST_GET_FORK_FROM_OWNER _IOR(VHOST_VIRTIO, 0x84, __u8) + #endif From b4ba1207d45adaafa2982c035898b36af2d3e518 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 14 Jul 2025 16:47:53 +0800 Subject: [PATCH 1358/2411] vhost: fail early when __vhost_add_used() fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fails vhost_add_used_n() early when __vhost_add_used() fails to make sure used idx is not updated with stale used ring information. Reported-by: Eugenio Pérez Signed-off-by: Jason Wang Message-Id: <20250714084755.11921-2-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang --- drivers/vhost/vhost.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index f4c1bc6adeda..b38e39242fb9 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2983,6 +2983,9 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, } r = __vhost_add_used_n(vq, heads, count); + if (r < 0) + return r; + /* Make sure buffer is written before we update index. */ smp_wmb(); if (vhost_put_used_idx(vq)) { From 67a873df0c410915275f735fedb401b9637d6faf Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 14 Jul 2025 16:47:54 +0800 Subject: [PATCH 1359/2411] vhost: basic in order support This patch adds basic in order support for vhost. Two optimizations are implemented in this patch: 1) Since driver uses descriptor in order, vhost can deduce the next avail ring head by counting the number of descriptors that has been used in next_avail_head. This eliminate the need to access the available ring in vhost. 2) vhost_add_used_and_singal_n() is extended to accept the number of batched buffers per used elem. While this increases the times of userspace memory access but it helps to reduce the chance of used ring access of both the driver and vhost. Vhost-net will be the first user for this. Acked-by: Jonah Palmer Signed-off-by: Jason Wang Message-Id: <20250714084755.11921-3-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang --- drivers/vhost/net.c | 6 ++- drivers/vhost/vhost.c | 120 ++++++++++++++++++++++++++++++++++-------- drivers/vhost/vhost.h | 8 ++- 3 files changed, 109 insertions(+), 25 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 7cbfc7d718b3..4f9c67f17b49 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -374,7 +374,8 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net, while (j) { add = min(UIO_MAXIOV - nvq->done_idx, j); vhost_add_used_and_signal_n(vq->dev, vq, - &vq->heads[nvq->done_idx], add); + &vq->heads[nvq->done_idx], + NULL, add); nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV; j -= add; } @@ -457,7 +458,8 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq) if (!nvq->done_idx) return; - vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx); + vhost_add_used_and_signal_n(dev, vq, vq->heads, NULL, + nvq->done_idx); nvq->done_idx = 0; } diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index b38e39242fb9..a4873d116df1 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -372,6 +372,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->avail = NULL; vq->used = NULL; vq->last_avail_idx = 0; + vq->next_avail_head = 0; vq->avail_idx = 0; vq->last_used_idx = 0; vq->signalled_used = 0; @@ -501,6 +502,8 @@ static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) vq->log = NULL; kfree(vq->heads); vq->heads = NULL; + kfree(vq->nheads); + vq->nheads = NULL; } /* Helper to allocate iovec buffers for all vqs. */ @@ -518,7 +521,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) GFP_KERNEL); vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads), GFP_KERNEL); - if (!vq->indirect || !vq->log || !vq->heads) + vq->nheads = kmalloc_array(dev->iov_limit, sizeof(*vq->nheads), + GFP_KERNEL); + if (!vq->indirect || !vq->log || !vq->heads || !vq->nheads) goto err_nomem; } return 0; @@ -2159,14 +2164,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg break; } if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { - vq->last_avail_idx = s.num & 0xffff; + vq->next_avail_head = vq->last_avail_idx = + s.num & 0xffff; vq->last_used_idx = (s.num >> 16) & 0xffff; } else { if (s.num > 0xffff) { r = -EINVAL; break; } - vq->last_avail_idx = s.num; + vq->next_avail_head = vq->last_avail_idx = s.num; } /* Forget the cached index value. */ vq->avail_idx = vq->last_avail_idx; @@ -2798,11 +2804,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, unsigned int *out_num, unsigned int *in_num, struct vhost_log *log, unsigned int *log_num) { + bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER); struct vring_desc desc; unsigned int i, head, found = 0; u16 last_avail_idx = vq->last_avail_idx; __virtio16 ring_head; - int ret, access; + int ret, access, c = 0; if (vq->avail_idx == vq->last_avail_idx) { ret = vhost_get_avail_idx(vq); @@ -2813,17 +2820,21 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, return vq->num; } - /* Grab the next descriptor number they're advertising, and increment - * the index we've seen. */ - if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) { - vq_err(vq, "Failed to read head: idx %d address %p\n", - last_avail_idx, - &vq->avail->ring[last_avail_idx % vq->num]); - return -EFAULT; + if (in_order) + head = vq->next_avail_head & (vq->num - 1); + else { + /* Grab the next descriptor number they're + * advertising, and increment the index we've seen. */ + if (unlikely(vhost_get_avail_head(vq, &ring_head, + last_avail_idx))) { + vq_err(vq, "Failed to read head: idx %d address %p\n", + last_avail_idx, + &vq->avail->ring[last_avail_idx % vq->num]); + return -EFAULT; + } + head = vhost16_to_cpu(vq, ring_head); } - head = vhost16_to_cpu(vq, ring_head); - /* If their number is silly, that's an error. */ if (unlikely(head >= vq->num)) { vq_err(vq, "Guest says index %u > %u is available", @@ -2866,6 +2877,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, "in indirect descriptor at idx %d\n", i); return ret; } + ++c; continue; } @@ -2901,10 +2913,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, } *out_num += ret; } + ++c; } while ((i = next_desc(vq, &desc)) != -1); /* On success, increment avail index. */ vq->last_avail_idx++; + vq->next_avail_head += c; /* Assume notifications from guest are disabled at this point, * if they aren't we would need to update avail_event index. */ @@ -2928,8 +2942,9 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) cpu_to_vhost32(vq, head), cpu_to_vhost32(vq, len) }; + u16 nheads = 1; - return vhost_add_used_n(vq, &heads, 1); + return vhost_add_used_n(vq, &heads, &nheads, 1); } EXPORT_SYMBOL_GPL(vhost_add_used); @@ -2965,10 +2980,9 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, return 0; } -/* After we've used one of their buffers, we tell them about it. We'll then - * want to notify the guest, using eventfd. */ -int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, - unsigned count) +static int vhost_add_used_n_ooo(struct vhost_virtqueue *vq, + struct vring_used_elem *heads, + unsigned count) { int start, n, r; @@ -2981,7 +2995,69 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, heads += n; count -= n; } - r = __vhost_add_used_n(vq, heads, count); + return __vhost_add_used_n(vq, heads, count); +} + +static int vhost_add_used_n_in_order(struct vhost_virtqueue *vq, + struct vring_used_elem *heads, + const u16 *nheads, + unsigned count) +{ + vring_used_elem_t __user *used; + u16 old, new = vq->last_used_idx; + int start, i; + + if (!nheads) + return -EINVAL; + + start = vq->last_used_idx & (vq->num - 1); + used = vq->used->ring + start; + + for (i = 0; i < count; i++) { + if (vhost_put_used(vq, &heads[i], start, 1)) { + vq_err(vq, "Failed to write used"); + return -EFAULT; + } + start += nheads[i]; + new += nheads[i]; + if (start >= vq->num) + start -= vq->num; + } + + if (unlikely(vq->log_used)) { + /* Make sure data is seen before log. */ + smp_wmb(); + /* Log used ring entry write. */ + log_used(vq, ((void __user *)used - (void __user *)vq->used), + (vq->num - start) * sizeof *used); + if (start + count > vq->num) + log_used(vq, 0, + (start + count - vq->num) * sizeof *used); + } + + old = vq->last_used_idx; + vq->last_used_idx = new; + /* If the driver never bothers to signal in a very long while, + * used index might wrap around. If that happens, invalidate + * signalled_used index we stored. TODO: make sure driver + * signals at least once in 2^16 and remove this. */ + if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old))) + vq->signalled_used_valid = false; + return 0; +} + +/* After we've used one of their buffers, we tell them about it. We'll then + * want to notify the guest, using eventfd. */ +int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, + u16 *nheads, unsigned count) +{ + bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER); + int r; + + if (!in_order || !nheads) + r = vhost_add_used_n_ooo(vq, heads, count); + else + r = vhost_add_used_n_in_order(vq, heads, nheads, count); if (r < 0) return r; @@ -3064,9 +3140,11 @@ EXPORT_SYMBOL_GPL(vhost_add_used_and_signal); /* multi-buffer version of vhost_add_used_and_signal */ void vhost_add_used_and_signal_n(struct vhost_dev *dev, struct vhost_virtqueue *vq, - struct vring_used_elem *heads, unsigned count) + struct vring_used_elem *heads, + u16 *nheads, + unsigned count) { - vhost_add_used_n(vq, heads, count); + vhost_add_used_n(vq, heads, nheads, count); vhost_signal(dev, vq); } EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index ab704d84fb34..24f3540b08a2 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -115,6 +115,8 @@ struct vhost_virtqueue { * Values are limited to 0x7fff, and the high bit is used as * a wrap counter when using VIRTIO_F_RING_PACKED. */ u16 last_avail_idx; + /* Next avail ring head when VIRTIO_F_IN_ORDER is negoitated */ + u16 next_avail_head; /* Caches available index value from user. */ u16 avail_idx; @@ -141,6 +143,7 @@ struct vhost_virtqueue { struct iovec iotlb_iov[64]; struct iovec *indirect; struct vring_used_elem *heads; + u16 *nheads; /* Protected by virtqueue mutex. */ struct vhost_iotlb *umem; struct vhost_iotlb *iotlb; @@ -235,11 +238,12 @@ bool vhost_vq_is_setup(struct vhost_virtqueue *vq); int vhost_vq_init_access(struct vhost_virtqueue *); int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len); int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads, - unsigned count); + u16 *nheads, unsigned count); void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *, unsigned int id, int len); void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *, - struct vring_used_elem *heads, unsigned count); + struct vring_used_elem *heads, u16 *nheads, + unsigned count); void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *); void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *); bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *); From 45347e79b544928d8ace9eb07c4d8f4fcc525752 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 14 Jul 2025 16:47:55 +0800 Subject: [PATCH 1360/2411] vhost_net: basic in_order support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces basic in-order support for vhost-net. By recording the number of batched buffers in an array when calling `vhost_add_used_and_signal_n()`, we can reduce the number of userspace accesses. Note that the vhost-net batching logic is kept as we still count the number of buffers there. Testing Results: With testpmd: - TX: txonly mode + vhost_net with XDP_DROP on TAP shows a 17.5% improvement, from 4.75 Mpps to 5.35 Mpps. - RX: No obvious improvements were observed. With virtio-ring in-order experimental code in the guest: - TX: pktgen in the guest + XDP_DROP on TAP shows a 19% improvement, from 5.2 Mpps to 6.2 Mpps. - RX: pktgen on TAP with vhost_net + XDP_DROP in the guest achieves a 6.1% improvement, from 3.47 Mpps to 3.61 Mpps. Acked-by: Jonah Palmer Acked-by: Eugenio Pérez Signed-off-by: Jason Wang Message-Id: <20250714084755.11921-4-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang --- drivers/vhost/net.c | 86 ++++++++++++++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 25 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 4f9c67f17b49..8ac994b3228a 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -74,7 +74,8 @@ enum { (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | (1ULL << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_ACCESS_PLATFORM) | - (1ULL << VIRTIO_F_RING_RESET) + (1ULL << VIRTIO_F_RING_RESET) | + (1ULL << VIRTIO_F_IN_ORDER) }; enum { @@ -450,7 +451,8 @@ static int vhost_net_enable_vq(struct vhost_net *n, return vhost_poll_start(poll, sock->file); } -static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq) +static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq, + unsigned int count) { struct vhost_virtqueue *vq = &nvq->vq; struct vhost_dev *dev = vq->dev; @@ -458,8 +460,8 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq) if (!nvq->done_idx) return; - vhost_add_used_and_signal_n(dev, vq, vq->heads, NULL, - nvq->done_idx); + vhost_add_used_and_signal_n(dev, vq, vq->heads, + vq->nheads, count); nvq->done_idx = 0; } @@ -468,6 +470,8 @@ static void vhost_tx_batch(struct vhost_net *net, struct socket *sock, struct msghdr *msghdr) { + struct vhost_virtqueue *vq = &nvq->vq; + bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER); struct tun_msg_ctl ctl = { .type = TUN_MSG_PTR, .num = nvq->batched_xdp, @@ -475,6 +479,11 @@ static void vhost_tx_batch(struct vhost_net *net, }; int i, err; + if (in_order) { + vq->heads[0].len = 0; + vq->nheads[0] = nvq->done_idx; + } + if (nvq->batched_xdp == 0) goto signal_used; @@ -496,7 +505,7 @@ static void vhost_tx_batch(struct vhost_net *net, } signal_used: - vhost_net_signal_used(nvq); + vhost_net_signal_used(nvq, in_order ? 1 : nvq->done_idx); nvq->batched_xdp = 0; } @@ -758,6 +767,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) int sent_pkts = 0; bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX); bool busyloop_intr; + bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER); do { busyloop_intr = false; @@ -794,11 +804,13 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) break; } - /* We can't build XDP buff, go for single - * packet path but let's flush batched - * packets. - */ - vhost_tx_batch(net, nvq, sock, &msg); + if (nvq->batched_xdp) { + /* We can't build XDP buff, go for single + * packet path but let's flush batched + * packets. + */ + vhost_tx_batch(net, nvq, sock, &msg); + } msg.msg_control = NULL; } else { if (tx_can_batch(vq, total_len)) @@ -819,8 +831,12 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) pr_debug("Truncated TX packet: len %d != %zd\n", err, len); done: - vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head); - vq->heads[nvq->done_idx].len = 0; + if (in_order) { + vq->heads[0].id = cpu_to_vhost32(vq, head); + } else { + vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head); + vq->heads[nvq->done_idx].len = 0; + } ++nvq->done_idx; } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); @@ -999,7 +1015,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk) } static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk, - bool *busyloop_intr) + bool *busyloop_intr, unsigned int count) { struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX]; struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX]; @@ -1009,7 +1025,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk, if (!len && rvq->busyloop_timeout) { /* Flush batched heads first */ - vhost_net_signal_used(rnvq); + vhost_net_signal_used(rnvq, count); /* Both tx vq and rx socket were polled here */ vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true); @@ -1021,7 +1037,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk, /* This is a multi-buffer version of vhost_get_desc, that works if * vq has read descriptors only. - * @vq - the relevant virtqueue + * @nvq - the relevant vhost_net virtqueue * @datalen - data length we'll be reading * @iovcount - returned count of io vectors we fill * @log - vhost log @@ -1029,14 +1045,17 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk, * @quota - headcount quota, 1 for big buffer * returns number of buffer heads allocated, negative on error */ -static int get_rx_bufs(struct vhost_virtqueue *vq, +static int get_rx_bufs(struct vhost_net_virtqueue *nvq, struct vring_used_elem *heads, + u16 *nheads, int datalen, unsigned *iovcount, struct vhost_log *log, unsigned *log_num, unsigned int quota) { + struct vhost_virtqueue *vq = &nvq->vq; + bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER); unsigned int out, in; int seg = 0; int headcount = 0; @@ -1073,14 +1092,16 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, nlogs += *log_num; log += *log_num; } - heads[headcount].id = cpu_to_vhost32(vq, d); len = iov_length(vq->iov + seg, in); - heads[headcount].len = cpu_to_vhost32(vq, len); - datalen -= len; + if (!in_order) { + heads[headcount].id = cpu_to_vhost32(vq, d); + heads[headcount].len = cpu_to_vhost32(vq, len); + } ++headcount; + datalen -= len; seg += in; } - heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen); + *iovcount = seg; if (unlikely(log)) *log_num = nlogs; @@ -1090,6 +1111,15 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, r = UIO_MAXIOV + 1; goto err; } + + if (!in_order) + heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen); + else { + heads[0].len = cpu_to_vhost32(vq, len + datalen); + heads[0].id = cpu_to_vhost32(vq, d); + nheads[0] = headcount; + } + return headcount; err: vhost_discard_vq_desc(vq, headcount); @@ -1102,6 +1132,8 @@ static void handle_rx(struct vhost_net *net) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX]; struct vhost_virtqueue *vq = &nvq->vq; + bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER); + unsigned int count = 0; unsigned in, log; struct vhost_log *vq_log; struct msghdr msg = { @@ -1149,12 +1181,13 @@ static void handle_rx(struct vhost_net *net) do { sock_len = vhost_net_rx_peek_head_len(net, sock->sk, - &busyloop_intr); + &busyloop_intr, count); if (!sock_len) break; sock_len += sock_hlen; vhost_len = sock_len + vhost_hlen; - headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx, + headcount = get_rx_bufs(nvq, vq->heads + count, + vq->nheads + count, vhost_len, &in, vq_log, &log, likely(mergeable) ? UIO_MAXIOV : 1); /* On error, stop handling until the next kick. */ @@ -1230,8 +1263,11 @@ static void handle_rx(struct vhost_net *net) goto out; } nvq->done_idx += headcount; - if (nvq->done_idx > VHOST_NET_BATCH) - vhost_net_signal_used(nvq); + count += in_order ? 1 : headcount; + if (nvq->done_idx > VHOST_NET_BATCH) { + vhost_net_signal_used(nvq, count); + count = 0; + } if (unlikely(vq_log)) vhost_log_write(vq, vq_log, log, vhost_len, vq->iov, in); @@ -1243,7 +1279,7 @@ static void handle_rx(struct vhost_net *net) else if (!sock_len) vhost_net_enable_vq(net, vq); out: - vhost_net_signal_used(nvq); + vhost_net_signal_used(nvq, count); mutex_unlock(&vq->mutex); } From 10a886aaed293c4db3417951f396827216299e3d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 17 Jul 2025 10:01:08 +0100 Subject: [PATCH 1361/2411] vhost/vsock: Avoid allocating arbitrarily-sized SKBs vhost_vsock_alloc_skb() returns NULL for packets advertising a length larger than VIRTIO_VSOCK_MAX_PKT_BUF_SIZE in the packet header. However, this is only checked once the SKB has been allocated and, if the length in the packet header is zero, the SKB may not be freed immediately. Hoist the size check before the SKB allocation so that an iovec larger than VIRTIO_VSOCK_MAX_PKT_BUF_SIZE + the header size is rejected outright. The subsequent check on the length field in the header can then simply check that the allocated SKB is indeed large enough to hold the packet. Cc: Fixes: 71dc9ec9ac7d ("virtio/vsock: replace virtio_vsock_pkt with sk_buff") Reviewed-by: Stefano Garzarella Signed-off-by: Will Deacon Message-Id: <20250717090116.11987-2-will@kernel.org> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vsock.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 802153e23073..66a0f060770e 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -344,6 +344,9 @@ vhost_vsock_alloc_skb(struct vhost_virtqueue *vq, len = iov_length(vq->iov, out); + if (len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM) + return NULL; + /* len contains both payload and hdr */ skb = virtio_vsock_alloc_skb(len, GFP_KERNEL); if (!skb) @@ -367,8 +370,7 @@ vhost_vsock_alloc_skb(struct vhost_virtqueue *vq, return skb; /* The pkt is too big or the length in the header is invalid */ - if (payload_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE || - payload_len + sizeof(*hdr) > len) { + if (payload_len + sizeof(*hdr) > len) { kfree_skb(skb); return NULL; } From 0dab92484474587b82e8e0455839eaf5ac7bf894 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 17 Jul 2025 10:01:09 +0100 Subject: [PATCH 1362/2411] vsock/virtio: Validate length in packet header before skb_put() When receiving a vsock packet in the guest, only the virtqueue buffer size is validated prior to virtio_vsock_skb_rx_put(). Unfortunately, virtio_vsock_skb_rx_put() uses the length from the packet header as the length argument to skb_put(), potentially resulting in SKB overflow if the host has gone wonky. Validate the length as advertised by the packet header before calling virtio_vsock_skb_rx_put(). Cc: Fixes: 71dc9ec9ac7d ("virtio/vsock: replace virtio_vsock_pkt with sk_buff") Signed-off-by: Will Deacon Message-Id: <20250717090116.11987-3-will@kernel.org> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- net/vmw_vsock/virtio_transport.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index f0e48e6911fc..eb08a393413d 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -624,8 +624,9 @@ static void virtio_transport_rx_work(struct work_struct *work) do { virtqueue_disable_cb(vq); for (;;) { + unsigned int len, payload_len; + struct virtio_vsock_hdr *hdr; struct sk_buff *skb; - unsigned int len; if (!virtio_transport_more_replies(vsock)) { /* Stop rx until the device processes already @@ -642,12 +643,19 @@ static void virtio_transport_rx_work(struct work_struct *work) vsock->rx_buf_nr--; /* Drop short/long packets */ - if (unlikely(len < sizeof(struct virtio_vsock_hdr) || + if (unlikely(len < sizeof(*hdr) || len > virtio_vsock_skb_len(skb))) { kfree_skb(skb); continue; } + hdr = virtio_vsock_hdr(skb); + payload_len = le32_to_cpu(hdr->len); + if (unlikely(payload_len > len - sizeof(*hdr))) { + kfree_skb(skb); + continue; + } + virtio_vsock_skb_rx_put(skb); virtio_transport_deliver_tap_pkt(skb); virtio_transport_recv_pkt(&virtio_transport, skb); From 87dbae5e36613a6020f3d64a2eaeac0a1e0e6dc6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 17 Jul 2025 10:01:10 +0100 Subject: [PATCH 1363/2411] vsock/virtio: Move length check to callers of virtio_vsock_skb_rx_put() virtio_vsock_skb_rx_put() only calls skb_put() if the length in the packet header is not zero even though skb_put() handles this case gracefully. Remove the functionally redundant check from virtio_vsock_skb_rx_put() and, on the assumption that this is a worthwhile optimisation for handling credit messages, augment the existing length checks in virtio_transport_rx_work() to elide the call for zero-length payloads. Since the callers all have the length, extend virtio_vsock_skb_rx_put() to take it as an additional parameter rather than fish it back out of the packet header. Note that the vhost code already has similar logic in vhost_vsock_alloc_skb(). Reviewed-by: Stefano Garzarella Signed-off-by: Will Deacon Message-Id: <20250717090116.11987-4-will@kernel.org> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vsock.c | 2 +- include/linux/virtio_vsock.h | 9 ++------- net/vmw_vsock/virtio_transport.c | 4 +++- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 66a0f060770e..4c4a642945eb 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -375,7 +375,7 @@ vhost_vsock_alloc_skb(struct vhost_virtqueue *vq, return NULL; } - virtio_vsock_skb_rx_put(skb); + virtio_vsock_skb_rx_put(skb, payload_len); nbytes = copy_from_iter(skb->data, payload_len, &iov_iter); if (nbytes != payload_len) { diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 36fb3edfa403..97465f378ade 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -47,14 +47,9 @@ static inline void virtio_vsock_skb_clear_tap_delivered(struct sk_buff *skb) VIRTIO_VSOCK_SKB_CB(skb)->tap_delivered = false; } -static inline void virtio_vsock_skb_rx_put(struct sk_buff *skb) +static inline void virtio_vsock_skb_rx_put(struct sk_buff *skb, u32 len) { - u32 len; - - len = le32_to_cpu(virtio_vsock_hdr(skb)->len); - - if (len > 0) - skb_put(skb, len); + skb_put(skb, len); } static inline struct sk_buff *virtio_vsock_alloc_skb(unsigned int size, gfp_t mask) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index eb08a393413d..0166919f8705 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -656,7 +656,9 @@ static void virtio_transport_rx_work(struct work_struct *work) continue; } - virtio_vsock_skb_rx_put(skb); + if (payload_len) + virtio_vsock_skb_rx_put(skb, payload_len); + virtio_transport_deliver_tap_pkt(skb); virtio_transport_recv_pkt(&virtio_transport, skb); } From 03a92f036a04fed2b00d69f5f46f1a486e70dc5c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 17 Jul 2025 10:01:11 +0100 Subject: [PATCH 1364/2411] vsock/virtio: Resize receive buffers so that each SKB fits in a 4K page When allocating receive buffers for the vsock virtio RX virtqueue, an SKB is allocated with a 4140 data payload (the 44-byte packet header + VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE). Even when factoring in the SKB overhead, the resulting 8KiB allocation thanks to the rounding in kmalloc_reserve() is wasteful (~3700 unusable bytes) and results in a higher-order page allocation on systems with 4KiB pages just for the sake of a few hundred bytes of packet data. Limit the vsock virtio RX buffers to 4KiB per SKB, resulting in much better memory utilisation and removing the need to allocate higher-order pages entirely. Reviewed-by: Stefano Garzarella Signed-off-by: Will Deacon Message-Id: <20250717090116.11987-5-will@kernel.org> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_vsock.h | 7 ++++++- net/vmw_vsock/virtio_transport.c | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 97465f378ade..879f1dfa7d3a 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -106,7 +106,12 @@ static inline size_t virtio_vsock_skb_len(struct sk_buff *skb) return (size_t)(skb_end_pointer(skb) - skb->head); } -#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) +/* Dimension the RX SKB so that the entire thing fits exactly into + * a single 4KiB page. This avoids wasting memory due to alloc_skb() + * rounding up to the next page order and also means that we + * don't leave higher-order pages sitting around in the RX queue. + */ +#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE SKB_WITH_OVERHEAD(1024 * 4) #define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL #define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 0166919f8705..39f346890f7f 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -307,7 +307,7 @@ virtio_transport_cancel_pkt(struct vsock_sock *vsk) static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) { - int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM; + int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; struct scatterlist pkt, *p; struct virtqueue *vq; struct sk_buff *skb; From 2304c64a2866c58534560c63dc6e79d09b8f8d8d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 17 Jul 2025 10:01:12 +0100 Subject: [PATCH 1365/2411] vsock/virtio: Rename virtio_vsock_alloc_skb() In preparation for nonlinear allocations for large SKBs, rename virtio_vsock_alloc_skb() to virtio_vsock_alloc_linear_skb() to indicate that it returns linear SKBs unconditionally and switch all callers over to this new interface for now. No functional change. Reviewed-by: Stefano Garzarella Signed-off-by: Will Deacon Message-Id: <20250717090116.11987-6-will@kernel.org> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vsock.c | 2 +- include/linux/virtio_vsock.h | 3 ++- net/vmw_vsock/virtio_transport.c | 2 +- net/vmw_vsock/virtio_transport_common.c | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 4c4a642945eb..1ad96613680e 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -348,7 +348,7 @@ vhost_vsock_alloc_skb(struct vhost_virtqueue *vq, return NULL; /* len contains both payload and hdr */ - skb = virtio_vsock_alloc_skb(len, GFP_KERNEL); + skb = virtio_vsock_alloc_linear_skb(len, GFP_KERNEL); if (!skb) return NULL; diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 879f1dfa7d3a..4504ea29ff82 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -52,7 +52,8 @@ static inline void virtio_vsock_skb_rx_put(struct sk_buff *skb, u32 len) skb_put(skb, len); } -static inline struct sk_buff *virtio_vsock_alloc_skb(unsigned int size, gfp_t mask) +static inline struct sk_buff * +virtio_vsock_alloc_linear_skb(unsigned int size, gfp_t mask) { struct sk_buff *skb; diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 39f346890f7f..80dcf6ac1e72 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -316,7 +316,7 @@ static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) vq = vsock->vqs[VSOCK_VQ_RX]; do { - skb = virtio_vsock_alloc_skb(total_len, GFP_KERNEL); + skb = virtio_vsock_alloc_linear_skb(total_len, GFP_KERNEL); if (!skb) break; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 1b5d9896edae..c9eb7f7ac00d 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -261,7 +261,7 @@ static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info * if (!zcopy) skb_len += payload_len; - skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL); + skb = virtio_vsock_alloc_linear_skb(skb_len, GFP_KERNEL); if (!skb) return NULL; From fac6b82e0f3eaca33c8c67ec401681b21143ae17 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 17 Jul 2025 10:01:13 +0100 Subject: [PATCH 1366/2411] vsock/virtio: Move SKB allocation lower-bound check to callers virtio_vsock_alloc_linear_skb() checks that the requested size is at least big enough for the packet header (VIRTIO_VSOCK_SKB_HEADROOM). Of the three callers of virtio_vsock_alloc_linear_skb(), only vhost_vsock_alloc_skb() can potentially pass a packet smaller than the header size and, as it already has a check against the maximum packet size, extend its bounds checking to consider the minimum packet size and remove the check from virtio_vsock_alloc_linear_skb(). Reviewed-by: Stefano Garzarella Signed-off-by: Will Deacon Message-Id: <20250717090116.11987-7-will@kernel.org> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vsock.c | 3 ++- include/linux/virtio_vsock.h | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 1ad96613680e..24b7547b05a6 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -344,7 +344,8 @@ vhost_vsock_alloc_skb(struct vhost_virtqueue *vq, len = iov_length(vq->iov, out); - if (len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM) + if (len < VIRTIO_VSOCK_SKB_HEADROOM || + len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM) return NULL; /* len contains both payload and hdr */ diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 4504ea29ff82..36dd0cd55368 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -57,9 +57,6 @@ virtio_vsock_alloc_linear_skb(unsigned int size, gfp_t mask) { struct sk_buff *skb; - if (size < VIRTIO_VSOCK_SKB_HEADROOM) - return NULL; - skb = alloc_skb(size, mask); if (!skb) return NULL; From ab9aa2f3afc2713c14f6c4c6b90c9a0933b837f1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 17 Jul 2025 10:01:14 +0100 Subject: [PATCH 1367/2411] vhost/vsock: Allocate nonlinear SKBs for handling large receive buffers When receiving a packet from a guest, vhost_vsock_handle_tx_kick() calls vhost_vsock_alloc_linear_skb() to allocate and fill an SKB with the receive data. Unfortunately, these are always linear allocations and can therefore result in significant pressure on kmalloc() considering that the maximum packet size (VIRTIO_VSOCK_MAX_PKT_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM) is a little over 64KiB, resulting in a 128KiB allocation for each packet. Rework the vsock SKB allocation so that, for sizes with page order greater than PAGE_ALLOC_COSTLY_ORDER, a nonlinear SKB is allocated instead with the packet header in the SKB and the receive data in the fragments. Finally, add a debug warning if virtio_vsock_skb_rx_put() is ever called on an SKB with a non-zero length, as this would be destructive for the nonlinear case. Reviewed-by: Stefano Garzarella Signed-off-by: Will Deacon Message-Id: <20250717090116.11987-8-will@kernel.org> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vsock.c | 8 +++----- include/linux/virtio_vsock.h | 40 +++++++++++++++++++++++++++++------- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 24b7547b05a6..0679a706ebc0 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -349,7 +349,7 @@ vhost_vsock_alloc_skb(struct vhost_virtqueue *vq, return NULL; /* len contains both payload and hdr */ - skb = virtio_vsock_alloc_linear_skb(len, GFP_KERNEL); + skb = virtio_vsock_alloc_skb(len, GFP_KERNEL); if (!skb) return NULL; @@ -378,10 +378,8 @@ vhost_vsock_alloc_skb(struct vhost_virtqueue *vq, virtio_vsock_skb_rx_put(skb, payload_len); - nbytes = copy_from_iter(skb->data, payload_len, &iov_iter); - if (nbytes != payload_len) { - vq_err(vq, "Expected %zu byte payload, got %zu bytes\n", - payload_len, nbytes); + if (skb_copy_datagram_from_iter(skb, 0, &iov_iter, payload_len)) { + vq_err(vq, "Failed to copy %zu byte payload\n", payload_len); kfree_skb(skb); return NULL; } diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 36dd0cd55368..fa5934ea9c81 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -49,20 +49,46 @@ static inline void virtio_vsock_skb_clear_tap_delivered(struct sk_buff *skb) static inline void virtio_vsock_skb_rx_put(struct sk_buff *skb, u32 len) { - skb_put(skb, len); + DEBUG_NET_WARN_ON_ONCE(skb->len); + + if (skb_is_nonlinear(skb)) + skb->len = len; + else + skb_put(skb, len); +} + +static inline struct sk_buff * +__virtio_vsock_alloc_skb_with_frags(unsigned int header_len, + unsigned int data_len, + gfp_t mask) +{ + struct sk_buff *skb; + int err; + + skb = alloc_skb_with_frags(header_len, data_len, + PAGE_ALLOC_COSTLY_ORDER, &err, mask); + if (!skb) + return NULL; + + skb_reserve(skb, VIRTIO_VSOCK_SKB_HEADROOM); + skb->data_len = data_len; + return skb; } static inline struct sk_buff * virtio_vsock_alloc_linear_skb(unsigned int size, gfp_t mask) { - struct sk_buff *skb; + return __virtio_vsock_alloc_skb_with_frags(size, 0, mask); +} - skb = alloc_skb(size, mask); - if (!skb) - return NULL; +static inline struct sk_buff *virtio_vsock_alloc_skb(unsigned int size, gfp_t mask) +{ + if (size <= SKB_WITH_OVERHEAD(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + return virtio_vsock_alloc_linear_skb(size, mask); - skb_reserve(skb, VIRTIO_VSOCK_SKB_HEADROOM); - return skb; + size -= VIRTIO_VSOCK_SKB_HEADROOM; + return __virtio_vsock_alloc_skb_with_frags(VIRTIO_VSOCK_SKB_HEADROOM, + size, mask); } static inline void From 8ca76151d2c8219edea82f1925a2a25907ff6a9d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 17 Jul 2025 10:01:15 +0100 Subject: [PATCH 1368/2411] vsock/virtio: Rename virtio_vsock_skb_rx_put() In preparation for using virtio_vsock_skb_rx_put() when populating SKBs on the vsock TX path, rename virtio_vsock_skb_rx_put() to virtio_vsock_skb_put(). No functional change. Reviewed-by: Stefano Garzarella Signed-off-by: Will Deacon Message-Id: <20250717090116.11987-9-will@kernel.org> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vsock.c | 2 +- include/linux/virtio_vsock.h | 2 +- net/vmw_vsock/virtio_transport.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 0679a706ebc0..ae01457ea2cd 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -376,7 +376,7 @@ vhost_vsock_alloc_skb(struct vhost_virtqueue *vq, return NULL; } - virtio_vsock_skb_rx_put(skb, payload_len); + virtio_vsock_skb_put(skb, payload_len); if (skb_copy_datagram_from_iter(skb, 0, &iov_iter, payload_len)) { vq_err(vq, "Failed to copy %zu byte payload\n", payload_len); diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index fa5934ea9c81..0c67543a45c8 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -47,7 +47,7 @@ static inline void virtio_vsock_skb_clear_tap_delivered(struct sk_buff *skb) VIRTIO_VSOCK_SKB_CB(skb)->tap_delivered = false; } -static inline void virtio_vsock_skb_rx_put(struct sk_buff *skb, u32 len) +static inline void virtio_vsock_skb_put(struct sk_buff *skb, u32 len) { DEBUG_NET_WARN_ON_ONCE(skb->len); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 80dcf6ac1e72..b6569b0ca2bb 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -657,7 +657,7 @@ static void virtio_transport_rx_work(struct work_struct *work) } if (payload_len) - virtio_vsock_skb_rx_put(skb, payload_len); + virtio_vsock_skb_put(skb, payload_len); virtio_transport_deliver_tap_pkt(skb); virtio_transport_recv_pkt(&virtio_transport, skb); From 6693731487a8145a9b039bc983d77edc47693855 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 17 Jul 2025 10:01:16 +0100 Subject: [PATCH 1369/2411] vsock/virtio: Allocate nonlinear SKBs for handling large transmit buffers When transmitting a vsock packet, virtio_transport_send_pkt_info() calls virtio_transport_alloc_linear_skb() to allocate and fill SKBs with the transmit data. Unfortunately, these are always linear allocations and can therefore result in significant pressure on kmalloc() considering that the maximum packet size (VIRTIO_VSOCK_MAX_PKT_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM) is a little over 64KiB, resulting in a 128KiB allocation for each packet. Rework the vsock SKB allocation so that, for sizes with page order greater than PAGE_ALLOC_COSTLY_ORDER, a nonlinear SKB is allocated instead with the packet header in the SKB and the transmit data in the fragments. Note that this affects both the vhost and virtio transports. Reviewed-by: Stefano Garzarella Signed-off-by: Will Deacon Message-Id: <20250717090116.11987-10-will@kernel.org> Signed-off-by: Michael S. Tsirkin --- net/vmw_vsock/virtio_transport_common.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index c9eb7f7ac00d..fe92e5fa95b4 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -109,7 +109,8 @@ static int virtio_transport_fill_skb(struct sk_buff *skb, return __zerocopy_sg_from_iter(info->msg, NULL, skb, &info->msg->msg_iter, len, NULL); - return memcpy_from_msg(skb_put(skb, len), info->msg, len); + virtio_vsock_skb_put(skb, len); + return skb_copy_datagram_from_iter(skb, 0, &info->msg->msg_iter, len); } static void virtio_transport_init_hdr(struct sk_buff *skb, @@ -261,7 +262,7 @@ static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info * if (!zcopy) skb_len += payload_len; - skb = virtio_vsock_alloc_linear_skb(skb_len, GFP_KERNEL); + skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL); if (!skb) return NULL; From 918b744af3d4d11a087814ebb6c390016e5242f2 Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Fri, 1 Aug 2025 11:51:35 +0530 Subject: [PATCH 1370/2411] ASoC: SOF: amd: Add sof audio support for acp7.2 platform Add pci revision id to support sof audio for acp7.2 platfom. Signed-off-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250801062207.579388-2-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/sof/amd/acp.c | 8 ++++++++ sound/soc/sof/amd/acp.h | 1 + sound/soc/sof/amd/pci-acp70.c | 1 + 3 files changed, 10 insertions(+) diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c index 7132916aa253..71a18f156de2 100644 --- a/sound/soc/sof/amd/acp.c +++ b/sound/soc/sof/amd/acp.c @@ -59,6 +59,7 @@ static void init_dma_descriptor(struct acp_dev_data *adata) switch (acp_data->pci_rev) { case ACP70_PCI_ID: case ACP71_PCI_ID: + case ACP72_PCI_ID: acp_dma_desc_base_addr = ACP70_DMA_DESC_BASE_ADDR; acp_dma_desc_max_num_dscr = ACP70_DMA_DESC_MAX_NUM_DSCR; break; @@ -99,6 +100,7 @@ static int config_dma_channel(struct acp_dev_data *adata, unsigned int ch, switch (acp_data->pci_rev) { case ACP70_PCI_ID: case ACP71_PCI_ID: + case ACP72_PCI_ID: acp_dma_cntl_0 = ACP70_DMA_CNTL_0; acp_dma_ch_rst_sts = ACP70_DMA_CH_RST_STS; acp_dma_dscr_err_sts_0 = ACP70_DMA_ERR_STS_0; @@ -339,6 +341,7 @@ int acp_dma_status(struct acp_dev_data *adata, unsigned char ch) switch (adata->pci_rev) { case ACP70_PCI_ID: case ACP71_PCI_ID: + case ACP72_PCI_ID: acp_dma_ch_sts = ACP70_DMA_CH_STS; break; default: @@ -522,6 +525,7 @@ static irqreturn_t acp_irq_handler(int irq, void *dev_id) switch (adata->pci_rev) { case ACP70_PCI_ID: case ACP71_PCI_ID: + case ACP72_PCI_ID: wake_irq_flag = amd_sof_check_and_handle_acp70_sdw_wake_irq(sdev); break; } @@ -559,6 +563,7 @@ static int acp_power_on(struct snd_sof_dev *sdev) break; case ACP70_PCI_ID: case ACP71_PCI_ID: + case ACP72_PCI_ID: acp_pgfsm_status_mask = ACP70_PGFSM_STATUS_MASK; acp_pgfsm_cntl_mask = ACP70_PGFSM_CNTL_POWER_ON_MASK; break; @@ -661,6 +666,7 @@ static int acp_init(struct snd_sof_dev *sdev) switch (acp_data->pci_rev) { case ACP70_PCI_ID: case ACP71_PCI_ID: + case ACP72_PCI_ID: sdw0_wake_en = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP70_SW0_WAKE_EN); sdw1_wake_en = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP70_SW1_WAKE_EN); if (sdw0_wake_en || sdw1_wake_en) @@ -712,6 +718,7 @@ int amd_sof_acp_suspend(struct snd_sof_dev *sdev, u32 target_state) switch (acp_data->pci_rev) { case ACP70_PCI_ID: case ACP71_PCI_ID: + case ACP72_PCI_ID: enable = true; break; } @@ -738,6 +745,7 @@ int amd_sof_acp_resume(struct snd_sof_dev *sdev) switch (acp_data->pci_rev) { case ACP70_PCI_ID: case ACP71_PCI_ID: + case ACP72_PCI_ID: snd_sof_dsp_write(sdev, ACP_DSP_BAR, ACP70_PME_EN, 1); break; } diff --git a/sound/soc/sof/amd/acp.h b/sound/soc/sof/amd/acp.h index d3c5b2386cdf..2b7ea8c64106 100644 --- a/sound/soc/sof/amd/acp.h +++ b/sound/soc/sof/amd/acp.h @@ -75,6 +75,7 @@ #define ACP63_PCI_ID 0x63 #define ACP70_PCI_ID 0x70 #define ACP71_PCI_ID 0x71 +#define ACP72_PCI_ID 0x72 #define HOST_BRIDGE_CZN 0x1630 #define HOST_BRIDGE_VGH 0x1645 diff --git a/sound/soc/sof/amd/pci-acp70.c b/sound/soc/sof/amd/pci-acp70.c index 51d36d43c42b..3523c9a92a94 100644 --- a/sound/soc/sof/amd/pci-acp70.c +++ b/sound/soc/sof/amd/pci-acp70.c @@ -77,6 +77,7 @@ static int acp70_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_ switch (pci->revision) { case ACP70_PCI_ID: case ACP71_PCI_ID: + case ACP72_PCI_ID: break; default: return -ENODEV; From 60e5b2441d7c035e732e4a1166779c6cc316c46b Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Fri, 1 Aug 2025 11:51:36 +0530 Subject: [PATCH 1371/2411] ASoC: amd: ps: Add SoundWire pci and dma driver support for acp7.2 platform Add SoundWire pci and dma driver support for acp7.2 platform. Signed-off-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250801062207.579388-3-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/ps/acp63.h | 1 + sound/soc/amd/ps/pci-ps.c | 4 ++++ sound/soc/amd/ps/ps-sdw-dma.c | 5 +++++ 3 files changed, 10 insertions(+) diff --git a/sound/soc/amd/ps/acp63.h b/sound/soc/amd/ps/acp63.h index d7c994e26e4d..90fc016dac0b 100644 --- a/sound/soc/amd/ps/acp63.h +++ b/sound/soc/amd/ps/acp63.h @@ -14,6 +14,7 @@ #define ACP63_PCI_REV 0x63 #define ACP70_PCI_REV 0x70 #define ACP71_PCI_REV 0x71 +#define ACP72_PCI_REV 0x72 #define ACP_SOFT_RESET_SOFTRESET_AUDDONE_MASK 0x00010001 #define ACP63_PGFSM_CNTL_POWER_ON_MASK 1 diff --git a/sound/soc/amd/ps/pci-ps.c b/sound/soc/amd/ps/pci-ps.c index 7936b3173632..c62299b29204 100644 --- a/sound/soc/amd/ps/pci-ps.c +++ b/sound/soc/amd/ps/pci-ps.c @@ -117,6 +117,7 @@ static short int check_and_handle_sdw_dma_irq(struct acp63_dev_data *adata, u32 break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: adata->acp70_sdw0_dma_intr_stat[stream_id] = 1; break; } @@ -141,6 +142,7 @@ static short int check_and_handle_sdw_dma_irq(struct acp63_dev_data *adata, u32 break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: if (ext_intr_stat1 & ACP70_P1_SDW_DMA_IRQ_MASK) { for (index = ACP70_P1_AUDIO2_RX_THRESHOLD; index <= ACP70_P1_AUDIO0_TX_THRESHOLD; index++) { @@ -552,6 +554,7 @@ static int acp_hw_init_ops(struct acp63_dev_data *adata, struct pci_dev *pci) break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: acp70_hw_init_ops(adata->hw_ops); break; default: @@ -581,6 +584,7 @@ static int snd_acp63_probe(struct pci_dev *pci, case ACP63_PCI_REV: case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: break; default: dev_dbg(&pci->dev, "acp63/acp70/acp71 pci device not found\n"); diff --git a/sound/soc/amd/ps/ps-sdw-dma.c b/sound/soc/amd/ps/ps-sdw-dma.c index 1b933a017c06..5449323e2728 100644 --- a/sound/soc/amd/ps/ps-sdw-dma.c +++ b/sound/soc/amd/ps/ps-sdw-dma.c @@ -269,6 +269,7 @@ static int acp63_configure_sdw_ringbuffer(void __iomem *acp_base, u32 stream_id, break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: switch (manager_instance) { case ACP_SDW0: reg_dma_size = acp70_sdw0_dma_reg[stream_id].reg_dma_size; @@ -382,6 +383,7 @@ static int acp63_sdw_dma_hw_params(struct snd_soc_component *component, break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: switch (stream->instance) { case ACP_SDW0: sdw_data->acp70_sdw0_dma_stream[stream_id] = substream; @@ -451,6 +453,7 @@ static u64 acp63_sdw_get_byte_count(struct acp_sdw_dma_stream *stream, void __io break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: switch (stream->instance) { case ACP_SDW0: pos_low_reg = acp70_sdw0_dma_reg[stream->stream_id].pos_low_reg; @@ -529,6 +532,7 @@ static int acp63_sdw_dma_close(struct snd_soc_component *component, break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: switch (stream->instance) { case ACP_SDW0: sdw_data->acp70_sdw0_dma_stream[stream->stream_id] = NULL; @@ -574,6 +578,7 @@ static int acp63_sdw_dma_enable(struct snd_pcm_substream *substream, break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: switch (stream->instance) { case ACP_SDW0: sdw_dma_en_reg = acp70_sdw0_dma_enable_reg[stream_id]; From 0df24f34794d2eea4bdc819fba0ba28f226286e6 Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Fri, 1 Aug 2025 11:51:37 +0530 Subject: [PATCH 1372/2411] ASoC: amd: acp: Add SoundWire legacy machine driver support for acp7.2 platform Add SoundWire legacy machine driver support for acp7.2 platform. Signed-off-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250801062207.579388-4-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-sdw-legacy-mach.c | 3 +++ sound/soc/amd/acp/soc_amd_sdw_common.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/sound/soc/amd/acp/acp-sdw-legacy-mach.c b/sound/soc/amd/acp/acp-sdw-legacy-mach.c index 6c24f9d8694e..c2197b75a7dd 100644 --- a/sound/soc/amd/acp/acp-sdw-legacy-mach.c +++ b/sound/soc/amd/acp/acp-sdw-legacy-mach.c @@ -158,6 +158,7 @@ static int create_sdw_dailink(struct snd_soc_card *card, break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: ret = get_acp70_cpu_pin_id(ffs(soc_end->link_mask - 1), *be_id, &cpu_pin_id, dev); if (ret) @@ -264,6 +265,7 @@ static int create_sdw_dailinks(struct snd_soc_card *card, case ACP63_PCI_REV: case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: sdw_platform_component->name = "amd_ps_sdw_dma.0"; break; default: @@ -311,6 +313,7 @@ static int create_dmic_dailinks(struct snd_soc_card *card, case ACP63_PCI_REV: case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: pdm_cpu->name = "acp_ps_pdm_dma.0"; pdm_platform->name = "acp_ps_pdm_dma.0"; break; diff --git a/sound/soc/amd/acp/soc_amd_sdw_common.h b/sound/soc/amd/acp/soc_amd_sdw_common.h index 1f24e0e06487..3930cc46fa58 100644 --- a/sound/soc/amd/acp/soc_amd_sdw_common.h +++ b/sound/soc/amd/acp/soc_amd_sdw_common.h @@ -21,6 +21,8 @@ #define ACP63_PCI_REV 0x63 #define ACP70_PCI_REV 0x70 #define ACP71_PCI_REV 0x71 +#define ACP72_PCI_REV 0x72 + #define SOC_JACK_JDSRC(quirk) ((quirk) & GENMASK(3, 0)) #define ASOC_SDW_FOUR_SPK BIT(4) #define ASOC_SDW_ACP_DMIC BIT(5) From 1c4c768d068616fa8948826ab714a4ac1f3b9aa9 Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Fri, 1 Aug 2025 11:51:38 +0530 Subject: [PATCH 1373/2411] ASoC: amd: acp: Add SoundWire SOF machine driver support for acp7.2 platform Add SoundWire SOF machine driver support for acp7.2 platform. Signed-off-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250801062207.579388-5-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-sdw-sof-mach.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/amd/acp/acp-sdw-sof-mach.c b/sound/soc/amd/acp/acp-sdw-sof-mach.c index 654fe78b2e2e..91d72d4bb9a2 100644 --- a/sound/soc/amd/acp/acp-sdw-sof-mach.c +++ b/sound/soc/amd/acp/acp-sdw-sof-mach.c @@ -130,6 +130,7 @@ static int create_sdw_dailink(struct snd_soc_card *card, break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: ret = get_acp70_cpu_pin_id(ffs(sof_end->link_mask - 1), *be_id, &cpu_pin_id, dev); if (ret) From 9843cf7b6fd6f938c16fde51e86dd0e3ddbefb12 Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Fri, 1 Aug 2025 10:16:18 +0800 Subject: [PATCH 1374/2411] ASoC: tas2781: Fix the wrong step for TLV on tas2781 The step for TLV on tas2781, should be 50 (-0.5dB). Fixes: 678f38eba1f2 ("ASoC: tas2781: Add Header file for tas2781 driver") Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20250801021618.64627-1-baojun.xu@ti.com Signed-off-by: Mark Brown --- include/sound/tas2781-tlv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sound/tas2781-tlv.h b/include/sound/tas2781-tlv.h index d87263e43fdb..ef9b9f19d212 100644 --- a/include/sound/tas2781-tlv.h +++ b/include/sound/tas2781-tlv.h @@ -15,7 +15,7 @@ #ifndef __TAS2781_TLV_H__ #define __TAS2781_TLV_H__ -static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 50, 0); static const __maybe_unused DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0); #endif From 1b03391d073dad748636a1ad9668b837cce58265 Mon Sep 17 00:00:00 2001 From: Peter Jakubek Date: Thu, 31 Jul 2025 18:21:04 +0100 Subject: [PATCH 1375/2411] ASoC: Intel: sof_sdw: Add quirk for Alienware Area 51 (2025) 0CCC SKU Add DMI quirk entry for Alienware systems with SKU "0CCC" to enable proper speaker codec configuration (SOC_SDW_CODEC_SPKR). This system requires the same audio configuration as some existing Dell systems. Without this patch, the laptop's speakers and microphone will not work. Signed-off-by: Peter Jakubek Link: https://patch.msgid.link/20250731172104.2009007-1-peterjakubek@gmail.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index c639df2cacdd..f997b2dc221b 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -741,6 +741,14 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { }, .driver_data = (void *)(SOC_SDW_CODEC_SPKR), }, + { + .callback = sof_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0CCC") + }, + .driver_data = (void *)(SOC_SDW_CODEC_SPKR), + }, /* Pantherlake devices*/ { .callback = sof_sdw_quirk_cb, From ffcfd071eec7973e58c4ffff7da4cb0e9ca7b667 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Thu, 31 Jul 2025 16:01:09 +0000 Subject: [PATCH 1376/2411] spi: cs42l43: Property entry should be a null-terminated array The software node does not specify a count of property entries, so the array must be null-terminated. When unterminated, this can lead to a fault in the downstream cs35l56 amplifier driver, because the node parse walks off the end of the array into unknown memory. Fixes: 0ca645ab5b15 ("spi: cs42l43: Add speaker id support to the bridge configuration") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220371 Signed-off-by: Simon Trimmer Link: https://patch.msgid.link/20250731160109.1547131-1-simont@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/spi/spi-cs42l43.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-cs42l43.c b/drivers/spi/spi-cs42l43.c index b28a840b3b04..14307dd800b7 100644 --- a/drivers/spi/spi-cs42l43.c +++ b/drivers/spi/spi-cs42l43.c @@ -295,7 +295,7 @@ static struct spi_board_info *cs42l43_create_bridge_amp(struct cs42l43_spi *priv struct spi_board_info *info; if (spkid >= 0) { - props = devm_kmalloc(priv->dev, sizeof(*props), GFP_KERNEL); + props = devm_kcalloc(priv->dev, 2, sizeof(*props), GFP_KERNEL); if (!props) return NULL; From 49f848788a4d157bb6648a57963cb060fed3d56e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 30 Jul 2025 08:04:37 -0700 Subject: [PATCH 1377/2411] x86/cpu: Add new Intel CPU model numbers for Wildcatlake and Novalake Wildcatlake is a mobile CPU. Novalake has both desktop and mobile versions. [ bp: Merge into a single patch. ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250730150437.4701-1-tony.luck@intel.com --- arch/x86/include/asm/intel-family.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index be10c188614f..e345dbdf933e 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -150,6 +150,11 @@ #define INTEL_PANTHERLAKE_L IFM(6, 0xCC) /* Cougar Cove / Crestmont */ +#define INTEL_WILDCATLAKE_L IFM(6, 0xD5) + +#define INTEL_NOVALAKE IFM(18, 0x01) +#define INTEL_NOVALAKE_L IFM(18, 0x03) + /* "Small Core" Processors (Atom/E-Core) */ #define INTEL_ATOM_BONNELL IFM(6, 0x1C) /* Diamondville, Pineview */ From ead3d7b2b6afa5ee7958620c4329982a7d9c2b78 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Fri, 1 Aug 2025 11:47:23 +0200 Subject: [PATCH 1378/2411] bpf: Check flow_dissector ctx accesses are aligned flow_dissector_is_valid_access doesn't check that the context access is aligned. As a consequence, an unaligned access within one of the exposed field is considered valid and later rejected by flow_dissector_convert_ctx_access when we try to convert it. The later rejection is problematic because it's reported as a verifier bug with a kernel warning and doesn't point to the right instruction in verifier logs. Fixes: d58e468b1112 ("flow_dissector: implements flow dissector BPF hook") Reported-by: syzbot+ccac90e482b2a81d74aa@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=ccac90e482b2a81d74aa Signed-off-by: Paul Chaignon Acked-by: Yonghong Song Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/cc1b036be484c99be45eddf48bd78cc6f72839b1.1754039605.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- net/core/filter.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index c09a85c17496..da391e2b0788 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9458,6 +9458,9 @@ static bool flow_dissector_is_valid_access(int off, int size, if (off < 0 || off >= sizeof(struct __sk_buff)) return false; + if (off % size != 0) + return false; + if (type == BPF_WRITE) return false; From 9e6448f7b1efb27f8d508b067ecd33ed664a4246 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Fri, 1 Aug 2025 11:48:15 +0200 Subject: [PATCH 1379/2411] bpf: Check netfilter ctx accesses are aligned Similarly to the previous patch fixing the flow_dissector ctx accesses, nf_is_valid_access also doesn't check that ctx accesses are aligned. Contrary to flow_dissector programs, netfilter programs don't have context conversion. The unaligned ctx accesses are therefore allowed by the verifier. Fixes: fd9c663b9ad6 ("bpf: minimal support for programs hooked into netfilter framework") Signed-off-by: Paul Chaignon Acked-by: Yonghong Song Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/853ae9ed5edaa5196e8472ff0f1bb1cc24059214.1754039605.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- net/netfilter/nf_bpf_link.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index 3e4fb9ddcd36..46e667a50d98 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c @@ -296,6 +296,9 @@ static bool nf_is_valid_access(int off, int size, enum bpf_access_type type, if (off < 0 || off >= sizeof(struct bpf_nf_ctx)) return false; + if (off % size != 0) + return false; + if (type == BPF_WRITE) return false; From f914876eec9e72ae94b5cee81a9dc7935c255b2f Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Fri, 1 Aug 2025 11:49:15 +0200 Subject: [PATCH 1380/2411] bpf: Improve ctx access verifier error message We've already had two "error during ctx access conversion" warnings triggered by syzkaller. Let's improve the error message by dumping the cnt variable so that we can more easily differentiate between the different error cases. Signed-off-by: Paul Chaignon Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/cc94316c30dd76fae4a75a664b61a2dbfe68e205.1754039605.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 399f03e62508..0806295945e4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -21445,7 +21445,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) &target_size); if (cnt == 0 || cnt >= INSN_BUF_SIZE || (ctx_field_size && !target_size)) { - verifier_bug(env, "error during ctx access conversion"); + verifier_bug(env, "error during ctx access conversion (%d)", cnt); return -EFAULT; } From bb324f85f722848f5e5e53325bc00f13302e01d0 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Wed, 9 Jul 2025 09:45:01 +0700 Subject: [PATCH 1381/2411] drm/gpuvm: Wrap drm_gpuvm_sm_map_exec_lock() expected usage in literal code block Stephen Rothwell reports multiple indentation warnings when merging drm-msm tree: Documentation/gpu/drm-mm:506: ./drivers/gpu/drm/drm_gpuvm.c:2445: ERROR: Unexpected indentation. [docutils] Documentation/gpu/drm-mm:506: ./drivers/gpu/drm/drm_gpuvm.c:2447: WARNING: Block quote ends without a blank line; unexpected unindent. [docutils] Documentation/gpu/drm-mm:506: ./drivers/gpu/drm/drm_gpuvm.c:2451: WARNING: Definition list ends without a blank line; unexpected unindent. [docutils] Documentation/gpu/drm-mm:506: ./drivers/gpu/drm/drm_gpuvm.c:2452: WARNING: Definition list ends without a blank line; unexpected unindent. [docutils] Documentation/gpu/drm-mm:506: ./drivers/gpu/drm/drm_gpuvm.c:2456: ERROR: Unexpected indentation. [docutils] Documentation/gpu/drm-mm:506: ./drivers/gpu/drm/drm_gpuvm.c:2457: WARNING: Definition list ends without a blank line; unexpected unindent. [docutils] Documentation/gpu/drm-mm:506: ./drivers/gpu/drm/drm_gpuvm.c:2458: WARNING: Definition list ends without a blank line; unexpected unindent. [docutils] Documentation/gpu/drm-mm:506: ./drivers/gpu/drm/drm_gpuvm.c:2459: WARNING: Definition list ends without a blank line; unexpected unindent. [docutils] Fix these by wrapping drm_gpuvm_sm_map_exec_lock() expected usage example in literal code block. Fixes: 471920ce25d5 ("drm/gpuvm: Add locking helpers") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/linux-next/20250708192038.6b0fd31d@canb.auug.org.au/ Signed-off-by: Bagas Sanjaya Acked-by: Randy Dunlap Tested-by: Randy Dunlap Acked-by: Danilo Krummrich Patchwork: https://patchwork.freedesktop.org/patch/663121/ Signed-off-by: Rob Clark --- drivers/gpu/drm/drm_gpuvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index bbc7fecb6f4a..f62005ff9b2e 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -2430,7 +2430,7 @@ static const struct drm_gpuvm_ops lock_ops = { * remapped, and locks+prepares (drm_exec_prepare_object()) objects that * will be newly mapped. * - * The expected usage is: + * The expected usage is:: * * vm_bind { * struct drm_exec exec; From 7abb543ff03e7874eba50a27ab025f09c96f6f7a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 9 Jul 2025 07:08:38 -0700 Subject: [PATCH 1382/2411] drm/msm: Fix build with KMS disabled When commit 98290b0a7d60 ("drm/msm: make it possible to disable KMS-related code.") was rebased on top of commit 3bebfd53af0f ("drm/msm: Defer VMA unmap for fb unpins"), the additional use of msm_kms was overlooked, resulting in a build break when KMS is disabled. Add some additional ifdef to fix that. Reported-by: Arnd Bergmann Fixes: 98290b0a7d60 ("drm/msm: make it possible to disable KMS-related code.") Signed-off-by: Rob Clark Tested-by: Arnd Bergmann Reviewed-by: Jessica Zhang Patchwork: https://patchwork.freedesktop.org/patch/663240/ --- drivers/gpu/drm/msm/msm_gem.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 33d3354c6102..c853ab3a2cda 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -96,7 +96,6 @@ void msm_gem_vma_get(struct drm_gem_object *obj) void msm_gem_vma_put(struct drm_gem_object *obj) { struct msm_drm_private *priv = obj->dev->dev_private; - struct drm_exec exec; if (atomic_dec_return(&to_msm_bo(obj)->vma_ref)) return; @@ -104,9 +103,13 @@ void msm_gem_vma_put(struct drm_gem_object *obj) if (!priv->kms) return; +#ifdef CONFIG_DRM_MSM_KMS + struct drm_exec exec; + msm_gem_lock_vm_and_obj(&exec, obj, priv->kms->vm); put_iova_spaces(obj, priv->kms->vm, true, "vma_put"); drm_exec_fini(&exec); /* drop locks */ +#endif } /* @@ -664,9 +667,13 @@ int msm_gem_set_iova(struct drm_gem_object *obj, static bool is_kms_vm(struct drm_gpuvm *vm) { +#ifdef CONFIG_DRM_MSM_KMS struct msm_drm_private *priv = vm->drm->dev_private; return priv->kms && (priv->kms->vm == vm); +#else + return false; +#endif } /* From f4ca529de235791aeeddc32ee6741a6b6872f564 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 17 Jul 2025 08:19:30 -0700 Subject: [PATCH 1383/2411] drm/msm: Fix pagetables setup/teardown serialization An atomic counter is not sufficient, as one task could still be in the process of tearing things down while another task increments the counter back up to one and begins setup again. The race condition existed since commit b145c6e65eb0 ("drm/msm: Add support to create a local pagetable") but got bigger in commit dbbde63c9e9d ("drm/msm: Add PRR support"). Fixes: dbbde63c9e9d ("drm/msm: Add PRR support") Fixes: b145c6e65eb0 ("drm/msm: Add support to create a local pagetable") Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/664433/ --- drivers/gpu/drm/msm/msm_iommu.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 55c29f49b788..76cdd5ea06a0 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -14,7 +14,9 @@ struct msm_iommu { struct msm_mmu base; struct iommu_domain *domain; - atomic_t pagetables; + + struct mutex init_lock; /* protects pagetables counter and prr_page */ + int pagetables; struct page *prr_page; struct kmem_cache *pt_cache; @@ -227,7 +229,8 @@ static void msm_iommu_pagetable_destroy(struct msm_mmu *mmu) * If this is the last attached pagetable for the parent, * disable TTBR0 in the arm-smmu driver */ - if (atomic_dec_return(&iommu->pagetables) == 0) { + mutex_lock(&iommu->init_lock); + if (--iommu->pagetables == 0) { adreno_smmu->set_ttbr0_cfg(adreno_smmu->cookie, NULL); if (adreno_smmu->set_prr_bit) { @@ -236,6 +239,7 @@ static void msm_iommu_pagetable_destroy(struct msm_mmu *mmu) iommu->prr_page = NULL; } } + mutex_unlock(&iommu->init_lock); free_io_pgtable_ops(pagetable->pgtbl_ops); kfree(pagetable); @@ -568,9 +572,12 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent, bool kernel_m * If this is the first pagetable that we've allocated, send it back to * the arm-smmu driver as a trigger to set up TTBR0 */ - if (atomic_inc_return(&iommu->pagetables) == 1) { + mutex_lock(&iommu->init_lock); + if (iommu->pagetables++ == 0) { ret = adreno_smmu->set_ttbr0_cfg(adreno_smmu->cookie, &ttbr0_cfg); if (ret) { + iommu->pagetables--; + mutex_unlock(&iommu->init_lock); free_io_pgtable_ops(pagetable->pgtbl_ops); kfree(pagetable); return ERR_PTR(ret); @@ -595,6 +602,7 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent, bool kernel_m adreno_smmu->set_prr_bit(adreno_smmu->cookie, true); } } + mutex_unlock(&iommu->init_lock); /* Needed later for TLB flush */ pagetable->parent = parent; @@ -730,7 +738,7 @@ struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks) iommu->domain = domain; msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU); - atomic_set(&iommu->pagetables, 0); + mutex_init(&iommu->init_lock); ret = iommu_attach_device(iommu->domain, dev); if (ret) { From 25654a1756a4ace072404e89882d7ba8391900bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Sun, 20 Jul 2025 18:42:31 -0300 Subject: [PATCH 1384/2411] drm/msm: Update global fault counter when faulty process has already ended MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The global fault counter is no longer used since commit 12578c075f89 ("drm/msm/gpu: Skip retired submits in recover worker"). However, it's still needed, as we need to handle cases where a GPU fault occurs after the faulting process has already ended. Hence, increment the global fault counter when the submitting process had already ended. This way, the number of faults returned by MSM_PARAM_FAULTS will stay consistent. While here, s/unusuable/unusable. Fixes: 12578c075f89 ("drm/msm/gpu: Skip retired submits in recover worker") Signed-off-by: Maíra Canal Patchwork: https://patchwork.freedesktop.org/patch/664853/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gpu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index c317b25a8162..416d47185ef0 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -465,6 +465,7 @@ static void recover_worker(struct kthread_work *work) struct msm_gem_submit *submit; struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); char *comm = NULL, *cmd = NULL; + struct task_struct *task; int i; mutex_lock(&gpu->lock); @@ -482,16 +483,20 @@ static void recover_worker(struct kthread_work *work) /* Increment the fault counts */ submit->queue->faults++; - if (submit->vm) { + + task = get_pid_task(submit->pid, PIDTYPE_PID); + if (!task) + gpu->global_faults++; + else { struct msm_gem_vm *vm = to_msm_vm(submit->vm); vm->faults++; /* * If userspace has opted-in to VM_BIND (and therefore userspace - * management of the VM), faults mark the VM as unusuable. This + * management of the VM), faults mark the VM as unusable. This * matches vulkan expectations (vulkan is the main target for - * VM_BIND) + * VM_BIND). */ if (!vm->managed) msm_gem_vm_unusable(submit->vm); From de651b6e040ba419418a37401e45d24f133e8a59 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 23 Jul 2025 12:08:49 -0700 Subject: [PATCH 1385/2411] drm/msm: Fix refcnt underflow in error path If we hit an error path in GEM obj creation before msm_gem_new_handle() updates obj->resv to point to the gpuvm resv object, then obj->resv still points to &obj->_resv. In this case we don't want to decrement the refcount of the object being freed (since the refcnt is already zero). This fixes the following splat: ------------[ cut here ]------------ refcount_t: underflow; use-after-free. WARNING: CPU: 9 PID: 7013 at lib/refcount.c:28 refcount_warn_saturate+0xf4/0x148 Modules linked in: uinput snd_seq_dummy snd_hrtimer aes_ce_ccm snd_soc_wsa884x regmap_sdw q6prm_clocks q6apm_lpass_da> qcom_pil_info i2c_hid drm_kms_helper qcom_common qcom_q6v5 phy_snps_eusb2 qcom_geni_serial drm qcom_sysmon pinctrl_s> CPU: 9 UID: 1000 PID: 7013 Comm: deqp-vk Not tainted 6.16.0-rc4-debug+ #25 PREEMPT(voluntary) Hardware name: LENOVO 83ED/LNVNB161216, BIOS NHCN53WW 08/02/2024 pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) pc : refcount_warn_saturate+0xf4/0x148 lr : refcount_warn_saturate+0xf4/0x148 sp : ffff8000a2073920 x29: ffff8000a2073920 x28: 0000000000000010 x27: 0000000000000010 x26: 0000000000000042 x25: ffff000810e09800 x24: 0000000000000010 x23: ffff8000a2073b94 x22: ffff000ddb22de00 x21: ffff000ddb22dc00 x20: ffff000ddb22ddf8 x19: ffff0008024934e0 x18: 000000000000000a x17: 0000000000000000 x16: ffff9f8c67d77340 x15: 0000000000000000 x14: 00000000ffffffff x13: 2e656572662d7265 x12: 7466612d65737520 x11: 3b776f6c66726564 x10: 00000000ffff7fff x9 : ffff9f8c67506c70 x8 : ffff9f8c69fa26f0 x7 : 00000000000bffe8 x6 : c0000000ffff7fff x5 : ffff000f53e14548 x4 : ffff6082ea2b2000 x3 : ffff0008b86ab080 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0008b86ab080 Call trace: refcount_warn_saturate+0xf4/0x148 (P) msm_gem_free_object+0x248/0x260 [msm] drm_gem_object_free+0x24/0x40 [drm] msm_gem_new+0x1c4/0x1e0 [msm] msm_gem_new_handle+0x3c/0x1a0 [msm] msm_ioctl_gem_new+0x38/0x70 [msm] drm_ioctl_kernel+0xc8/0x138 [drm] drm_ioctl+0x2c8/0x618 [drm] __arm64_sys_ioctl+0xac/0x108 invoke_syscall.constprop.0+0x64/0xe8 el0_svc_common.constprop.0+0x40/0xe8 do_el0_svc+0x24/0x38 el0_svc+0x54/0x1d8 el0t_64_sync_handler+0x10c/0x138 el0t_64_sync+0x19c/0x1a0 irq event stamp: 3698694 hardirqs last enabled at (3698693): [] __up_console_sem+0x74/0x90 hardirqs last disabled at (3698694): [] el1_dbg+0x24/0x90 softirqs last enabled at (3697578): [] handle_softirqs+0x454/0x4b0 softirqs last disabled at (3697567): [] __do_softirq+0x1c/0x28 ---[ end trace 0000000000000000 ]--- Fixes: b58e12a66e47 ("drm/msm: Add _NO_SHARE flag") Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/665355/ --- drivers/gpu/drm/msm/msm_gem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index c853ab3a2cda..9f0f5b77f1bd 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -1121,10 +1121,12 @@ static void msm_gem_free_object(struct drm_gem_object *obj) put_pages(obj); } - if (msm_obj->flags & MSM_BO_NO_SHARE) { + if (obj->resv != &obj->_resv) { struct drm_gem_object *r_obj = container_of(obj->resv, struct drm_gem_object, _resv); + WARN_ON(!(msm_obj->flags & MSM_BO_NO_SHARE)); + /* Drop reference we hold to shared resv obj: */ drm_gem_object_put(r_obj); } From ad70e46e130a7f4024961a5dd5ae0ee8e7d9a3c4 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 23 Jul 2025 12:08:50 -0700 Subject: [PATCH 1386/2411] drm/msm: Fix submit error path cleanup submit_unpin_objects() should come before we unlock the objects. This fixes the splat: WARNING: CPU: 2 PID: 2171 at drivers/gpu/drm/msm/msm_gem.h:395 msm_gem_unpin_locked+0x8c/0xd8 [msm] Modules linked in: uinput snd_seq_dummy snd_hrtimer aes_ce_ccm snd_soc_wsa884x regmap_sdw q6prm_clocks q6apm_lpass_dais q6apm_dai snd_q6dsp_common q6prm snd_q6apm qcom_pd_mapper cdc_mbim cdc_wdm cdc_ncm r8153_ecm cdc_ether usbnet sunrpc nls_ascii nls_cp437 vfat fat snd_soc_x1e80100 snd_soc_lpass_rx_macro snd_soc_lpass_tx_macro snd_soc_lpass_va_macro snd_soc_lpass_wsa_macro snd_soc_qcom_common soundwire_qcom snd_soc_lpass_macro_common snd_soc_hdmi_codec snd_soc_qcom_sdw ext4 snd_soc_core snd_compress soundwire_bus snd_pcm_dmaengine snd_seq mbcache jbd2 snd_seq_device snd_pcm pm8941_pwrkey snd_timer r8152 qcom_spmi_temp_alarm industrialio snd lenovo_yoga_slim7x ath12k mii arm_smccc_trng soundcore rng_core evdev loop panel_samsung_atna33xc20 msm ubwc_config drm_client_lib drm_gpuvm drm_exec gpu_sched drm_display_helper pmic_glink_altmode aux_hpd_bridge ucsi_glink qcom_battmgr phy_qcom_qmp_combo ps883x cec aux_bridge drm_dp_aux_bus i2c_hid_of aes_ce_blk drm_kms_helper aes_ce_cipher i2c_hid qcom_q6v5_pas ghash_ce qcom_pil_info drm sha1_ce qcom_common phy_snps_eusb2 qcom_geni_serial qcom_q6v5 qcom_sysmon pinctrl_sm8550_lpass_lpi lpasscc_sc8280xp sbsa_gwdt mdt_loader gpio_keys pmic_glink i2c_dev efivarfs autofs4 CPU: 2 UID: 1000 PID: 2171 Comm: gnome-shell Not tainted 6.16.0-rc4-debug+ #25 PREEMPT(voluntary) Hardware name: LENOVO 83ED/LNVNB161216, BIOS NHCN53WW 08/02/2024 pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) pc : msm_gem_unpin_locked+0x8c/0xd8 [msm] lr : msm_gem_unpin_locked+0x88/0xd8 [msm] sp : ffff80009c963820 x29: ffff80009c963820 x28: ffff80009c9639f8 x27: ffff00080552a830 x26: 0000000000000000 x25: ffff0009d5655800 x24: 0000000000000000 x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000000000 x20: ffff000831db5480 x19: ffff000816e74400 x18: 0000000000000000 x17: 0000000000000000 x16: ffffc1396afdd720 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000000 x12: ffff0008c065bc00 x11: ffff0008c065c000 x10: 0000000000000000 x9 : ffffc13945b19074 x8 : 0000000000000000 x7 : 0000000000000209 x6 : 0000000000000002 x5 : 0000000000019d01 x4 : ffff0008ba8db080 x3 : 000000000004093f x2 : ffff3ed5e727f000 x1 : 0000000000000000 x0 : 0000000000000000 Call trace: msm_gem_unpin_locked+0x8c/0xd8 [msm] (P) msm_ioctl_gem_submit+0x32c/0x1760 [msm] drm_ioctl_kernel+0xc8/0x138 [drm] drm_ioctl+0x2c8/0x618 [drm] __arm64_sys_ioctl+0xac/0x108 invoke_syscall.constprop.0+0x64/0xe8 el0_svc_common.constprop.0+0x40/0xe8 do_el0_svc+0x24/0x38 el0_svc+0x54/0x1d8 el0t_64_sync_handler+0x10c/0x138 el0t_64_sync+0x19c/0x1a0 irq event stamp: 2185036 hardirqs last enabled at (2185035): [] _raw_spin_unlock_irqrestore+0x74/0x80 hardirqs last disabled at (2185036): [] el1_dbg+0x24/0x90 softirqs last enabled at (2184778): [] fpsimd_restore_current_state+0x3c/0x328 softirqs last disabled at (2184776): [] fpsimd_restore_current_state+0xc/0x328 ---[ end trace 0000000000000000 ]--- Fixes: 111fdd2198e6 ("drm/msm: drm_gpuvm conversion") Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/665357/ --- drivers/gpu/drm/msm/msm_gem_submit.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 5f8e939a5906..0ac4c199ec93 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -514,14 +514,15 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob */ static void submit_cleanup(struct msm_gem_submit *submit, bool error) { + if (error) + submit_unpin_objects(submit); + if (submit->exec.objects) drm_exec_fini(&submit->exec); - if (error) { - submit_unpin_objects(submit); - /* job wasn't enqueued to scheduler, so early retirement: */ + /* if job wasn't enqueued to scheduler, early retirement: */ + if (error) msm_submit_retire(submit); - } } void msm_submit_retire(struct msm_gem_submit *submit) From f22853435bbd1e9836d0dce7fd99c040b94c2bf1 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 23 Jul 2025 13:28:22 -0700 Subject: [PATCH 1387/2411] drm/msm: Defer fd_install in SUBMIT ioctl Avoid fd_install() until there are no more potential error paths, to avoid put_unused_fd() after the fd is made visible to userspace. Fixes: 68dc6c2d5eec ("drm/msm: Fix submit error-path leaks") Reported-by: Dan Carpenter Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/665363/ --- drivers/gpu/drm/msm/msm_gem_submit.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 0ac4c199ec93..bfea19baf6d9 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -770,12 +770,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (ret == 0 && args->flags & MSM_SUBMIT_FENCE_FD_OUT) { sync_file = sync_file_create(submit->user_fence); - if (!sync_file) { + if (!sync_file) ret = -ENOMEM; - } else { - fd_install(out_fence_fd, sync_file->file); - args->fence_fd = out_fence_fd; - } } if (ret) @@ -813,10 +809,14 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, out_unlock: mutex_unlock(&queue->lock); out_post_unlock: - if (ret && (out_fence_fd >= 0)) { - put_unused_fd(out_fence_fd); + if (ret) { + if (out_fence_fd >= 0) + put_unused_fd(out_fence_fd); if (sync_file) fput(sync_file->file); + } else if (sync_file) { + fd_install(out_fence_fd, sync_file->file); + args->fence_fd = out_fence_fd; } if (!IS_ERR_OR_NULL(submit)) { From c89504a703fb779052213add0e8ed642f4a4f1c8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Aug 2025 16:37:23 -0400 Subject: [PATCH 1388/2411] tracing: Remove unneeded goto out logic Several places in the trace.c file there's a goto out where the out is simply a return. There's no reason to jump to the out label if it's not doing any more logic but simply returning from the function. Replace the goto outs with a return and remove the out labels. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://lore.kernel.org/20250801203857.538726745@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 945a8ecf2c62..0ec9cab9a812 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1841,7 +1841,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, ret = get_user(ch, ubuf++); if (ret) - goto out; + return ret; read++; cnt--; @@ -1855,7 +1855,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, while (cnt && isspace(ch)) { ret = get_user(ch, ubuf++); if (ret) - goto out; + return ret; read++; cnt--; } @@ -1865,8 +1865,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, /* only spaces were written */ if (isspace(ch) || !ch) { *ppos += read; - ret = read; - goto out; + return read; } } @@ -1874,13 +1873,12 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, while (cnt && !isspace(ch) && ch) { if (parser->idx < parser->size - 1) parser->buffer[parser->idx++] = ch; - else { - ret = -EINVAL; - goto out; - } + else + return -EINVAL; + ret = get_user(ch, ubuf++); if (ret) - goto out; + return ret; read++; cnt--; } @@ -1895,15 +1893,11 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, /* Make sure the parsed string always terminates with '\0'. */ parser->buffer[parser->idx] = 0; } else { - ret = -EINVAL; - goto out; + return -EINVAL; } *ppos += read; - ret = read; - -out: - return ret; + return read; } /* TODO add a seq_buf_to_buffer() */ @@ -2405,10 +2399,10 @@ int __init register_tracer(struct tracer *type) mutex_unlock(&trace_types_lock); if (ret || !default_bootup_tracer) - goto out_unlock; + return ret; if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) - goto out_unlock; + return 0; printk(KERN_INFO "Starting tracer '%s'\n", type->name); /* Do we want this tracer to start on bootup? */ @@ -2420,8 +2414,7 @@ int __init register_tracer(struct tracer *type) /* disable other selftests, since this will break it. */ disable_tracing_selftest("running a tracer"); - out_unlock: - return ret; + return 0; } static void tracing_reset_cpu(struct array_buffer *buf, int cpu) @@ -8963,12 +8956,12 @@ ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash, out_reg: ret = tracing_arm_snapshot(tr); if (ret < 0) - goto out; + return ret; ret = register_ftrace_function_probe(glob, tr, ops, count); if (ret < 0) tracing_disarm_snapshot(tr); - out: + return ret < 0 ? ret : 0; } @@ -11070,7 +11063,7 @@ __init static int tracer_alloc_buffers(void) BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE); if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) - goto out; + return -ENOMEM; if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL)) goto out_free_buffer_mask; @@ -11188,7 +11181,6 @@ __init static int tracer_alloc_buffers(void) free_cpumask_var(global_trace.tracing_cpumask); out_free_buffer_mask: free_cpumask_var(tracing_buffer_mask); -out: return ret; } From 788fa4b47cdcd9b3d8c2d02ac0b3cd2540305f18 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Aug 2025 16:37:24 -0400 Subject: [PATCH 1389/2411] tracing: Add guard(ring_buffer_nest) Some calls to the tracing ring buffer can happen when the ring buffer is already being written to by the same context (for example, a trace_printk() in between a ring_buffer_lock_reserve() and a ring_buffer_unlock_commit()). In order to not trigger the recursion detection, these functions use ring_buffer_nest_start() and ring_buffer_nest_end(). Create a guard() for these functions so that their use cases can be simplified and not need to use goto for the release. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://lore.kernel.org/20250801203857.710501021@kernel.org Signed-off-by: Steven Rostedt (Google) --- include/linux/ring_buffer.h | 3 ++ kernel/trace/trace.c | 69 +++++++++++++------------------ kernel/trace/trace_events_synth.c | 6 +-- 3 files changed, 34 insertions(+), 44 deletions(-) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index cd7f0ae26615..8253cb69540c 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -144,6 +144,9 @@ int ring_buffer_write(struct trace_buffer *buffer, void ring_buffer_nest_start(struct trace_buffer *buffer); void ring_buffer_nest_end(struct trace_buffer *buffer); +DEFINE_GUARD(ring_buffer_nest, struct trace_buffer *, + ring_buffer_nest_start(_T), ring_buffer_nest_end(_T)) + struct ring_buffer_event * ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0ec9cab9a812..332487179e1d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1160,13 +1160,11 @@ int __trace_array_puts(struct trace_array *tr, unsigned long ip, trace_ctx = tracing_gen_ctx(); buffer = tr->array_buffer.buffer; - ring_buffer_nest_start(buffer); + guard(ring_buffer_nest)(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, trace_ctx); - if (!event) { - size = 0; - goto out; - } + if (!event) + return 0; entry = ring_buffer_event_data(event); entry->ip = ip; @@ -1182,8 +1180,6 @@ int __trace_array_puts(struct trace_array *tr, unsigned long ip, __buffer_unlock_commit(buffer, event); ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); - out: - ring_buffer_nest_end(buffer); return size; } EXPORT_SYMBOL_GPL(__trace_array_puts); @@ -1213,7 +1209,6 @@ int __trace_bputs(unsigned long ip, const char *str) struct bputs_entry *entry; unsigned int trace_ctx; int size = sizeof(struct bputs_entry); - int ret = 0; if (!printk_binsafe(tr)) return __trace_puts(ip, str, strlen(str)); @@ -1227,11 +1222,11 @@ int __trace_bputs(unsigned long ip, const char *str) trace_ctx = tracing_gen_ctx(); buffer = tr->array_buffer.buffer; - ring_buffer_nest_start(buffer); + guard(ring_buffer_nest)(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, trace_ctx); if (!event) - goto out; + return 0; entry = ring_buffer_event_data(event); entry->ip = ip; @@ -1240,10 +1235,7 @@ int __trace_bputs(unsigned long ip, const char *str) __buffer_unlock_commit(buffer, event); ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); - ret = 1; - out: - ring_buffer_nest_end(buffer); - return ret; + return 1; } EXPORT_SYMBOL_GPL(__trace_bputs); @@ -3397,21 +3389,19 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) size = sizeof(*entry) + sizeof(u32) * len; buffer = tr->array_buffer.buffer; - ring_buffer_nest_start(buffer); - event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, - trace_ctx); - if (!event) - goto out; - entry = ring_buffer_event_data(event); - entry->ip = ip; - entry->fmt = fmt; + scoped_guard(ring_buffer_nest, buffer) { + event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, + trace_ctx); + if (!event) + goto out_put; + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->fmt = fmt; - memcpy(entry->buf, tbuffer, sizeof(u32) * len); - __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); - -out: - ring_buffer_nest_end(buffer); + memcpy(entry->buf, tbuffer, sizeof(u32) * len); + __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); + } out_put: put_trace_buf(); @@ -3452,20 +3442,19 @@ int __trace_array_vprintk(struct trace_buffer *buffer, len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); size = sizeof(*entry) + len + 1; - ring_buffer_nest_start(buffer); - event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, - trace_ctx); - if (!event) - goto out; - entry = ring_buffer_event_data(event); - entry->ip = ip; - - memcpy(&entry->buf, tbuffer, len + 1); - __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL); + scoped_guard(ring_buffer_nest, buffer) { + event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, + trace_ctx); + if (!event) + goto out; + entry = ring_buffer_event_data(event); + entry->ip = ip; + memcpy(&entry->buf, tbuffer, len + 1); + __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL); + } out: - ring_buffer_nest_end(buffer); put_trace_buf(); out_nobuffer: diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 33cfbd4ed76d..f24ee61f8884 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -536,12 +536,12 @@ static notrace void trace_event_raw_event_synth(void *__data, * is being performed within another event. */ buffer = trace_file->tr->array_buffer.buffer; - ring_buffer_nest_start(buffer); + guard(ring_buffer_nest)(buffer); entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry) + fields_size); if (!entry) - goto out; + return; for (i = 0, n_u64 = 0; i < event->n_fields; i++) { val_idx = var_ref_idx[i]; @@ -584,8 +584,6 @@ static notrace void trace_event_raw_event_synth(void *__data, } trace_event_buffer_commit(&fbuffer); -out: - ring_buffer_nest_end(buffer); } static void free_synth_event_print_fmt(struct trace_event_call *call) From debe57fbe12cb16881b2db1f1787eb9673a8b8b0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Aug 2025 16:37:25 -0400 Subject: [PATCH 1390/2411] tracing: Add guard() around locks and mutexes in trace.c There's several locations in trace.c that can be simplified by using guards around raw_spin_lock_irqsave, mutexes and preempt disabling. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://lore.kernel.org/20250801203857.879085376@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 144 ++++++++++++++----------------------------- 1 file changed, 46 insertions(+), 98 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 332487179e1d..4299e89ed04e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -432,15 +432,13 @@ static void ftrace_exports(struct ring_buffer_event *event, int flag) { struct trace_export *export; - preempt_disable_notrace(); + guard(preempt_notrace)(); export = rcu_dereference_raw_check(ftrace_exports_list); while (export) { trace_process_export(export, event, flag); export = rcu_dereference_raw_check(export->next); } - - preempt_enable_notrace(); } static inline void @@ -497,27 +495,18 @@ int register_ftrace_export(struct trace_export *export) if (WARN_ON_ONCE(!export->write)) return -1; - mutex_lock(&ftrace_export_lock); + guard(mutex)(&ftrace_export_lock); add_ftrace_export(&ftrace_exports_list, export); - mutex_unlock(&ftrace_export_lock); - return 0; } EXPORT_SYMBOL_GPL(register_ftrace_export); int unregister_ftrace_export(struct trace_export *export) { - int ret; - - mutex_lock(&ftrace_export_lock); - - ret = rm_ftrace_export(&ftrace_exports_list, export); - - mutex_unlock(&ftrace_export_lock); - - return ret; + guard(mutex)(&ftrace_export_lock); + return rm_ftrace_export(&ftrace_exports_list, export); } EXPORT_SYMBOL_GPL(unregister_ftrace_export); @@ -640,9 +629,8 @@ void trace_array_put(struct trace_array *this_tr) if (!this_tr) return; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); __trace_array_put(this_tr); - mutex_unlock(&trace_types_lock); } EXPORT_SYMBOL_GPL(trace_array_put); @@ -1424,13 +1412,8 @@ static int tracing_arm_snapshot_locked(struct trace_array *tr) int tracing_arm_snapshot(struct trace_array *tr) { - int ret; - - mutex_lock(&trace_types_lock); - ret = tracing_arm_snapshot_locked(tr); - mutex_unlock(&trace_types_lock); - - return ret; + guard(mutex)(&trace_types_lock); + return tracing_arm_snapshot_locked(tr); } void tracing_disarm_snapshot(struct trace_array *tr) @@ -2483,9 +2466,8 @@ void tracing_reset_all_online_cpus_unlocked(void) void tracing_reset_all_online_cpus(void) { - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); tracing_reset_all_online_cpus_unlocked(); - mutex_unlock(&trace_types_lock); } int is_tracing_stopped(void) @@ -2496,18 +2478,17 @@ int is_tracing_stopped(void) static void tracing_start_tr(struct trace_array *tr) { struct trace_buffer *buffer; - unsigned long flags; if (tracing_disabled) return; - raw_spin_lock_irqsave(&tr->start_lock, flags); + guard(raw_spinlock_irqsave)(&tr->start_lock); if (--tr->stop_count) { if (WARN_ON_ONCE(tr->stop_count < 0)) { /* Someone screwed up their debugging */ tr->stop_count = 0; } - goto out; + return; } /* Prevent the buffers from switching */ @@ -2524,9 +2505,6 @@ static void tracing_start_tr(struct trace_array *tr) #endif arch_spin_unlock(&tr->max_lock); - - out: - raw_spin_unlock_irqrestore(&tr->start_lock, flags); } /** @@ -2544,11 +2522,10 @@ void tracing_start(void) static void tracing_stop_tr(struct trace_array *tr) { struct trace_buffer *buffer; - unsigned long flags; - raw_spin_lock_irqsave(&tr->start_lock, flags); + guard(raw_spinlock_irqsave)(&tr->start_lock); if (tr->stop_count++) - goto out; + return; /* Prevent the buffers from switching */ arch_spin_lock(&tr->max_lock); @@ -2564,9 +2541,6 @@ static void tracing_stop_tr(struct trace_array *tr) #endif arch_spin_unlock(&tr->max_lock); - - out: - raw_spin_unlock_irqrestore(&tr->start_lock, flags); } /** @@ -2679,12 +2653,12 @@ void trace_buffered_event_enable(void) per_cpu(trace_buffered_event, cpu) = event; - preempt_disable(); - if (cpu == smp_processor_id() && - __this_cpu_read(trace_buffered_event) != - per_cpu(trace_buffered_event, cpu)) - WARN_ON_ONCE(1); - preempt_enable(); + scoped_guard(preempt,) { + if (cpu == smp_processor_id() && + __this_cpu_read(trace_buffered_event) != + per_cpu(trace_buffered_event, cpu)) + WARN_ON_ONCE(1); + } } } @@ -3029,7 +3003,7 @@ static void __ftrace_trace_stack(struct trace_array *tr, skip++; #endif - preempt_disable_notrace(); + guard(preempt_notrace)(); stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1; @@ -3087,8 +3061,6 @@ static void __ftrace_trace_stack(struct trace_array *tr, /* Again, don't let gcc optimize things here */ barrier(); __this_cpu_dec(ftrace_stack_reserve); - preempt_enable_notrace(); - } static inline void ftrace_trace_stack(struct trace_array *tr, @@ -3171,9 +3143,9 @@ ftrace_trace_userstack(struct trace_array *tr, * prevent recursion, since the user stack tracing may * trigger other kernel events. */ - preempt_disable(); + guard(preempt)(); if (__this_cpu_read(user_stack_count)) - goto out; + return; __this_cpu_inc(user_stack_count); @@ -3191,8 +3163,6 @@ ftrace_trace_userstack(struct trace_array *tr, out_drop_count: __this_cpu_dec(user_stack_count); - out: - preempt_enable(); } #else /* CONFIG_USER_STACKTRACE_SUPPORT */ static void ftrace_trace_userstack(struct trace_array *tr, @@ -3374,7 +3344,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) pause_graph_tracing(); trace_ctx = tracing_gen_ctx(); - preempt_disable_notrace(); + guard(preempt_notrace)(); tbuffer = get_trace_buf(); if (!tbuffer) { @@ -3406,7 +3376,6 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) put_trace_buf(); out_nobuffer: - preempt_enable_notrace(); unpause_graph_tracing(); return len; @@ -3430,7 +3399,7 @@ int __trace_array_vprintk(struct trace_buffer *buffer, pause_graph_tracing(); trace_ctx = tracing_gen_ctx(); - preempt_disable_notrace(); + guard(preempt_notrace)(); tbuffer = get_trace_buf(); @@ -3458,7 +3427,6 @@ int __trace_array_vprintk(struct trace_buffer *buffer, put_trace_buf(); out_nobuffer: - preempt_enable_notrace(); unpause_graph_tracing(); return len; @@ -4788,20 +4756,16 @@ int tracing_open_file_tr(struct inode *inode, struct file *filp) if (ret) return ret; - mutex_lock(&event_mutex); + guard(mutex)(&event_mutex); /* Fail if the file is marked for removal */ if (file->flags & EVENT_FILE_FL_FREED) { trace_array_put(file->tr); - ret = -ENODEV; + return -ENODEV; } else { event_file_get(file); } - mutex_unlock(&event_mutex); - if (ret) - return ret; - filp->private_data = inode->i_private; return 0; @@ -5945,9 +5909,9 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, char buf[MAX_TRACER_SIZE+2]; int r; - mutex_lock(&trace_types_lock); - r = sprintf(buf, "%s\n", tr->current_trace->name); - mutex_unlock(&trace_types_lock); + scoped_guard(mutex, &trace_types_lock) { + r = sprintf(buf, "%s\n", tr->current_trace->name); + } return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -6249,15 +6213,13 @@ int tracing_update_buffers(struct trace_array *tr) { int ret = 0; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); update_last_data(tr); if (!tr->ring_buffer_expanded) ret = __tracing_resize_ring_buffer(tr, trace_buf_size, RING_BUFFER_ALL_CPUS); - mutex_unlock(&trace_types_lock); - return ret; } @@ -6554,7 +6516,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) if (ret) return ret; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); cpu = tracing_get_cpu(inode); ret = open_pipe_on_cpu(tr, cpu); if (ret) @@ -6598,7 +6560,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) tr->trace_ref++; - mutex_unlock(&trace_types_lock); return ret; fail: @@ -6607,7 +6568,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) close_pipe_on_cpu(tr, cpu); fail_pipe_on_cpu: __trace_array_put(tr); - mutex_unlock(&trace_types_lock); return ret; } @@ -6616,14 +6576,13 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) struct trace_iterator *iter = file->private_data; struct trace_array *tr = inode->i_private; - mutex_lock(&trace_types_lock); + scoped_guard(mutex, &trace_types_lock) { + tr->trace_ref--; - tr->trace_ref--; - - if (iter->trace->pipe_close) - iter->trace->pipe_close(iter); - close_pipe_on_cpu(tr, iter->cpu_file); - mutex_unlock(&trace_types_lock); + if (iter->trace->pipe_close) + iter->trace->pipe_close(iter); + close_pipe_on_cpu(tr, iter->cpu_file); + } free_trace_iter_content(iter); kfree(iter); @@ -7426,7 +7385,7 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr) if (i == ARRAY_SIZE(trace_clocks)) return -EINVAL; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); tr->clock_id = i; @@ -7450,8 +7409,6 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr) tscratch->clock_id = i; } - mutex_unlock(&trace_types_lock); - return 0; } @@ -7503,15 +7460,13 @@ static int tracing_time_stamp_mode_show(struct seq_file *m, void *v) { struct trace_array *tr = m->private; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer)) seq_puts(m, "delta [absolute]\n"); else seq_puts(m, "[delta] absolute\n"); - mutex_unlock(&trace_types_lock); - return 0; } @@ -8099,14 +8054,14 @@ static void clear_tracing_err_log(struct trace_array *tr) { struct tracing_log_err *err, *next; - mutex_lock(&tracing_err_log_lock); + guard(mutex)(&tracing_err_log_lock); + list_for_each_entry_safe(err, next, &tr->err_log, list) { list_del(&err->list); free_tracing_log_err(err); } tr->n_err_log_entries = 0; - mutex_unlock(&tracing_err_log_lock); } static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos) @@ -8377,7 +8332,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) struct ftrace_buffer_info *info = file->private_data; struct trace_iterator *iter = &info->iter; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); iter->tr->trace_ref--; @@ -8388,8 +8343,6 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) info->spare_cpu, info->spare); kvfree(info); - mutex_unlock(&trace_types_lock); - return 0; } @@ -8597,14 +8550,13 @@ static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned * An ioctl call with cmd 0 to the ring buffer file will wake up all * waiters */ - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); /* Make sure the waiters see the new wait_index */ (void)atomic_fetch_inc_release(&iter->wait_index); ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); - mutex_unlock(&trace_types_lock); return 0; } @@ -9094,10 +9046,9 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, return -EINVAL; if (!!(topt->flags->val & topt->opt->bit) != val) { - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); ret = __set_tracer_option(topt->tr, topt->flags, topt->opt, !val); - mutex_unlock(&trace_types_lock); if (ret) return ret; } @@ -9406,7 +9357,7 @@ rb_simple_write(struct file *filp, const char __user *ubuf, return ret; if (buffer) { - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); if (!!val == tracer_tracing_is_on(tr)) { val = 0; /* do nothing */ } else if (val) { @@ -9420,7 +9371,6 @@ rb_simple_write(struct file *filp, const char __user *ubuf, /* Wake up any waiters */ ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS); } - mutex_unlock(&trace_types_lock); } (*ppos)++; @@ -9804,10 +9754,9 @@ static void __update_tracer_options(struct trace_array *tr) static void update_tracer_options(struct trace_array *tr) { - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); tracer_options_updated = true; __update_tracer_options(tr); - mutex_unlock(&trace_types_lock); } /* Must have trace_types_lock held */ @@ -9829,11 +9778,10 @@ struct trace_array *trace_array_find_get(const char *instance) { struct trace_array *tr; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); tr = trace_array_find(instance); if (tr) tr->ref++; - mutex_unlock(&trace_types_lock); return tr; } From 12d5189615862a9eb06d4aa7c8a990bcde2ebb01 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Aug 2025 16:37:26 -0400 Subject: [PATCH 1391/2411] tracing: Use __free(kfree) in trace.c to remove gotos There's a couple of locations that have goto out in trace.c for the only purpose of freeing a variable that was allocated. These can be replaced with __free(kfree). Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://lore.kernel.org/20250801203858.040892777@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4299e89ed04e..d0b1964648c1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5042,7 +5042,7 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { struct trace_array *tr = file_inode(filp)->i_private; - char *mask_str; + char *mask_str __free(kfree) = NULL; int len; len = snprintf(NULL, 0, "%*pb\n", @@ -5053,16 +5053,10 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf, len = snprintf(mask_str, len, "%*pb\n", cpumask_pr_args(tr->tracing_cpumask)); - if (len >= count) { - count = -EINVAL; - goto out_err; - } - count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); + if (len >= count) + return -EINVAL; -out_err: - kfree(mask_str); - - return count; + return simple_read_from_buffer(ubuf, count, ppos, mask_str, len); } int tracing_set_cpumask(struct trace_array *tr, @@ -10739,7 +10733,8 @@ ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, int (*createfn)(const char *)) { - char *kbuf, *buf, *tmp; + char *kbuf __free(kfree) = NULL; + char *buf, *tmp; int ret = 0; size_t done = 0; size_t size; @@ -10754,10 +10749,9 @@ ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, if (size >= WRITE_BUFSIZE) size = WRITE_BUFSIZE - 1; - if (copy_from_user(kbuf, buffer + done, size)) { - ret = -EFAULT; - goto out; - } + if (copy_from_user(kbuf, buffer + done, size)) + return -EFAULT; + kbuf[size] = '\0'; buf = kbuf; do { @@ -10773,8 +10767,7 @@ ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */ pr_warn("Line length is too long: Should be less than %d\n", WRITE_BUFSIZE - 2); - ret = -EINVAL; - goto out; + return -EINVAL; } } done += size; @@ -10787,17 +10780,12 @@ ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, ret = createfn(buf); if (ret) - goto out; + return ret; buf += size; } while (done < count); } - ret = done; - -out: - kfree(kbuf); - - return ret; + return done; } #ifdef CONFIG_TRACER_MAX_TRACE From db5f0c3e3e60939bb2ecc2dbdea4e6f32252620b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Aug 2025 16:37:27 -0400 Subject: [PATCH 1392/2411] ring-buffer: Convert ring_buffer_write() to use guard(preempt_notrace) The function ring_buffer_write() has a goto out to only do a preempt_enable_notrace(). This can be replaced by a guard. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://lore.kernel.org/20250801203858.205479143@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 00fc38d70e86..9d7bf17fbfba 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4714,26 +4714,26 @@ int ring_buffer_write(struct trace_buffer *buffer, int ret = -EBUSY; int cpu; - preempt_disable_notrace(); + guard(preempt_notrace)(); if (atomic_read(&buffer->record_disabled)) - goto out; + return -EBUSY; cpu = raw_smp_processor_id(); if (!cpumask_test_cpu(cpu, buffer->cpumask)) - goto out; + return -EBUSY; cpu_buffer = buffer->buffers[cpu]; if (atomic_read(&cpu_buffer->record_disabled)) - goto out; + return -EBUSY; if (length > buffer->max_data_size) - goto out; + return -EBUSY; if (unlikely(trace_recursive_lock(cpu_buffer))) - goto out; + return -EBUSY; event = rb_reserve_next_event(buffer, cpu_buffer, length); if (!event) @@ -4751,10 +4751,6 @@ int ring_buffer_write(struct trace_buffer *buffer, out_unlock: trace_recursive_unlock(cpu_buffer); - - out: - preempt_enable_notrace(); - return ret; } EXPORT_SYMBOL_GPL(ring_buffer_write); From f8fded7536a9350ce849f21eee124d66056aa54c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 31 Jul 2025 14:09:14 +0300 Subject: [PATCH 1393/2411] selftests: net: Fix flaky neighbor garbage collection test The purpose of the "Periodic garbage collection" test case is to make sure that "extern_valid" neighbors are not flushed during periodic garbage collection, unlike regular neighbor entries. The test case is currently doing the following: 1. Changing the base reachable time to 10 seconds so that periodic garbage collection will run every 5 seconds. 2. Changing the garbage collection stale time to 5 seconds so that neighbors that have not been used in the last 5 seconds will be considered for removal. 3. Waiting for the base reachable time change to take effect. 4. Adding an "extern_valid" neighbor, a non-"extern_valid" neighbor and a bunch of other neighbors so that the threshold ("thresh1") will be crossed and stale neighbors will be flushed during garbage collection. 5. Waiting for 10 seconds to give garbage collection a chance to run. 6. Checking that the "extern_valid" neighbor was not flushed and that the non-"extern_valid" neighbor was flushed. The test sometimes fails in the netdev CI because the non-"extern_valid" neighbor was not flushed. I am unable to reproduce this locally, but my theory that since we do not know exactly when the periodic garbage collection runs, it is possible for it to run at a time when the non-"extern_valid" neighbor is still not considered stale. Fix by moving the addition of the two neighbors before step 3 and by reducing the garbage collection stale time to 1 second, to ensure that both neighbors are considered stale when garbage collection runs. Fixes: 171f2ee31a42 ("selftests: net: Add a selftest for externally validated neighbor entries") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20250728093504.4ebbd73c@kernel.org/ Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20250731110914.506890-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/test_neigh.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/test_neigh.sh b/tools/testing/selftests/net/test_neigh.sh index 388056472b5b..7c594bf6ead0 100755 --- a/tools/testing/selftests/net/test_neigh.sh +++ b/tools/testing/selftests/net/test_neigh.sh @@ -289,11 +289,11 @@ extern_valid_common() orig_base_reachable=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["base_reachable"]') run_cmd "ip ntable change name $tbl_name thresh1 10 base_reachable 10000" orig_gc_stale=$(ip -n "$ns1" -j ntable show name "$tbl_name" dev veth0 | jq '.[]["gc_stale"]') - run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale 5000" - # Wait orig_base_reachable/2 for the new interval to take effect. - run_cmd "sleep $(((orig_base_reachable / 1000) / 2 + 2))" + run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale 1000" run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0" + # Wait orig_base_reachable/2 for the new interval to take effect. + run_cmd "sleep $(((orig_base_reachable / 1000) / 2 + 2))" for i in {1..20}; do run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0" done From 38358fa3cc8e16c6862a3e5c5c233f9f652e3a6d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 31 Jul 2025 12:29:08 +0200 Subject: [PATCH 1394/2411] net: airoha: Fix PPE table access in airoha_ppe_debugfs_foe_show() In order to avoid any possible race we need to hold the ppe_lock spinlock accessing the hw PPE table. airoha_ppe_foe_get_entry routine is always executed holding ppe_lock except in airoha_ppe_debugfs_foe_show routine. Fix the problem introducing airoha_ppe_foe_get_entry_locked routine. Fixes: 3fe15c640f380 ("net: airoha: Introduce PPE debugfs support") Reviewed-by: Dawid Osuchowski Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250731-airoha_ppe_foe_get_entry_locked-v2-1-50efbd8c0fd6@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_ppe.c | 26 ++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index c354d536bc66..47411d2cbd28 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -508,9 +508,11 @@ static void airoha_ppe_foe_flow_stats_update(struct airoha_ppe *ppe, FIELD_PREP(AIROHA_FOE_IB2_NBQ, nbq); } -struct airoha_foe_entry *airoha_ppe_foe_get_entry(struct airoha_ppe *ppe, - u32 hash) +static struct airoha_foe_entry * +airoha_ppe_foe_get_entry_locked(struct airoha_ppe *ppe, u32 hash) { + lockdep_assert_held(&ppe_lock); + if (hash < PPE_SRAM_NUM_ENTRIES) { u32 *hwe = ppe->foe + hash * sizeof(struct airoha_foe_entry); struct airoha_eth *eth = ppe->eth; @@ -537,6 +539,18 @@ struct airoha_foe_entry *airoha_ppe_foe_get_entry(struct airoha_ppe *ppe, return ppe->foe + hash * sizeof(struct airoha_foe_entry); } +struct airoha_foe_entry *airoha_ppe_foe_get_entry(struct airoha_ppe *ppe, + u32 hash) +{ + struct airoha_foe_entry *hwe; + + spin_lock_bh(&ppe_lock); + hwe = airoha_ppe_foe_get_entry_locked(ppe, hash); + spin_unlock_bh(&ppe_lock); + + return hwe; +} + static bool airoha_ppe_foe_compare_entry(struct airoha_flow_table_entry *e, struct airoha_foe_entry *hwe) { @@ -651,7 +665,7 @@ airoha_ppe_foe_commit_subflow_entry(struct airoha_ppe *ppe, struct airoha_flow_table_entry *f; int type; - hwe_p = airoha_ppe_foe_get_entry(ppe, hash); + hwe_p = airoha_ppe_foe_get_entry_locked(ppe, hash); if (!hwe_p) return -EINVAL; @@ -703,7 +717,7 @@ static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe, spin_lock_bh(&ppe_lock); - hwe = airoha_ppe_foe_get_entry(ppe, hash); + hwe = airoha_ppe_foe_get_entry_locked(ppe, hash); if (!hwe) goto unlock; @@ -818,7 +832,7 @@ airoha_ppe_foe_flow_l2_entry_update(struct airoha_ppe *ppe, u32 ib1, state; int idle; - hwe = airoha_ppe_foe_get_entry(ppe, iter->hash); + hwe = airoha_ppe_foe_get_entry_locked(ppe, iter->hash); if (!hwe) continue; @@ -855,7 +869,7 @@ static void airoha_ppe_foe_flow_entry_update(struct airoha_ppe *ppe, if (e->hash == 0xffff) goto unlock; - hwe_p = airoha_ppe_foe_get_entry(ppe, e->hash); + hwe_p = airoha_ppe_foe_get_entry_locked(ppe, e->hash); if (!hwe_p) goto unlock; From 60bda1ba062a003efcb96c91c8541c3efb212d69 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 31 Jul 2025 15:41:38 +0100 Subject: [PATCH 1395/2411] sfc: unfix not-a-typo in comment Commit fe09560f8241 ("net: Fix typos") removed duplicated word 'fallback', but this was not a typo and change altered the semantic meaning of the comment. Partially revert, using the phrase 'fallback of the fallback' to make the meaning more clear to future readers so that they won't try to change it again. Signed-off-by: Edward Cree Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250731144138.2637949-1-edward.cree@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/tc_encap_actions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.c b/drivers/net/ethernet/sfc/tc_encap_actions.c index 2258f854e5be..e872f926e438 100644 --- a/drivers/net/ethernet/sfc/tc_encap_actions.c +++ b/drivers/net/ethernet/sfc/tc_encap_actions.c @@ -442,7 +442,7 @@ static void efx_tc_update_encap(struct efx_nic *efx, rule = container_of(acts, struct efx_tc_flow_rule, acts); if (rule->fallback) fallback = rule->fallback; - else /* fallback: deliver to PF */ + else /* fallback of the fallback: deliver to PF */ fallback = &efx->tc->facts.pf; rc = efx_mae_update_rule(efx, fallback->fw_id, rule->fw_id); From 77bf1c55b2acc7fa3734b14f4561e3d75aea1a90 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Tue, 29 Jul 2025 11:34:00 -0700 Subject: [PATCH 1396/2411] net/mlx5: Correctly set gso_segs when LRO is used When gso_segs is left at 0, a number of assumptions will end up being incorrect throughout the stack. For example, in the GRO-path, we set NAPI_GRO_CB()->count to gso_segs. So, if a non-LRO'ed packet followed by an LRO'ed packet is being processed in GRO, the first one will have NAPI_GRO_CB()->count set to 1 and the next one to 0 (in dev_gro_receive()). Since commit 531d0d32de3e ("net/mlx5: Correctly set gso_size when LRO is used") these packets will get merged (as their gso_size now matches). So, we end up in gro_complete() with NAPI_GRO_CB()->count == 1 and thus don't call inet_gro_complete(). Meaning, checksum-validation in tcp_checksum_complete() will fail with a "hw csum failure". Even before the above mentioned commit, incorrect gso_segs means that other things like TCP's accounting of incoming packets (tp->segs_in, data_segs_in, rcv_ooopack) will be incorrect. Which means that if one does bytes_received/data_segs_in, the result will be bigger than the MTU. Fix this by initializing gso_segs correctly when LRO is used. Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files") Reported-by: Gal Pressman Closes: https://lore.kernel.org/netdev/6583783f-f0fb-4fb1-a415-feec8155bc69@nvidia.com/ Signed-off-by: Christoph Paasch Reviewed-by: Gal Pressman Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250729-mlx5_gso_segs-v1-1-b48c480c1c12@openai.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 218b1a09534c..b8c609d91d11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1574,6 +1574,7 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, unsigned int hdrlen = mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt - hdrlen, lro_num_seg); + skb_shinfo(skb)->gso_segs = lro_num_seg; /* Subtract one since we already counted this as one * "regular" packet in mlx5e_complete_rx_cqe() */ From a4f0866e3dbbf3fee4078bce0b78d65a0875c0bc Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 30 Jul 2025 13:23:32 +0200 Subject: [PATCH 1397/2411] dpll: Make ZL3073X invisible Currently, the user is always asked about the Microchip Azurite DPLL/PTP/SyncE core driver, even when I2C and SPI are disabled, and thus the driver cannot be used at all. Fix this by making the Kconfig symbol for the core driver invisible (unless compile-testing), and selecting it by the bus glue sub-drivers. Drop the modular defaults, as drivers should not default to enabled. Fixes: 2df8e64e01c10a4b ("dpll: Add basic Microchip ZL3073x support") Signed-off-by: Geert Uytterhoeven Link: https://patch.msgid.link/97804163aeb262f0e0706d00c29d9bb751844454.1753874405.git.geert+renesas@glider.be Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/dpll/zl3073x/Kconfig b/drivers/dpll/zl3073x/Kconfig index 7db262ab8458..9915f7423dea 100644 --- a/drivers/dpll/zl3073x/Kconfig +++ b/drivers/dpll/zl3073x/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config ZL3073X - tristate "Microchip Azurite DPLL/PTP/SyncE devices" + tristate "Microchip Azurite DPLL/PTP/SyncE devices" if COMPILE_TEST depends on NET select DPLL select NET_DEVLINK @@ -16,9 +16,9 @@ config ZL3073X config ZL3073X_I2C tristate "I2C bus implementation for Microchip Azurite devices" - depends on I2C && ZL3073X + depends on I2C select REGMAP_I2C - default m + select ZL3073X help This is I2C bus implementation for Microchip Azurite DPLL/PTP/SyncE devices. @@ -28,9 +28,9 @@ config ZL3073X_I2C config ZL3073X_SPI tristate "SPI bus implementation for Microchip Azurite devices" - depends on SPI && ZL3073X + depends on SPI select REGMAP_SPI - default m + select ZL3073X help This is SPI bus implementation for Microchip Azurite DPLL/PTP/SyncE devices. From 7cbd49795d4ca86fba5830084e94fece3b343b79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Jul 2025 11:53:13 +0000 Subject: [PATCH 1398/2411] selftests: avoid using ifconfig ifconfig is deprecated and not always present, use ip command instead. Fixes: e0f3b3e5c77a ("selftests: Add test cases for vlan_filter modification during runtime") Signed-off-by: Eric Dumazet Cc: Dong Chenchen Reviewed-by: Hangbin Liu Link: https://patch.msgid.link/20250730115313.3356036-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/vlan_hw_filter.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh index 0fb56baf28e4..e195d5cab6f7 100755 --- a/tools/testing/selftests/net/vlan_hw_filter.sh +++ b/tools/testing/selftests/net/vlan_hw_filter.sh @@ -55,10 +55,10 @@ test_vlan0_del_crash_01() { ip netns exec ${NETNS} ip link add bond0 type bond mode 0 ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off - ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ip link set dev bond0 up ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on - ip netns exec ${NETNS} ifconfig bond0 down - ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ip link set dev bond0 down + ip netns exec ${NETNS} ip link set dev bond0 up ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" cleanup } @@ -68,11 +68,11 @@ test_vlan0_del_crash_02() { setup ip netns exec ${NETNS} ip link add bond0 type bond mode 0 ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off - ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ip link set dev bond0 up ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q - ip netns exec ${NETNS} ifconfig bond0 down - ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ip link set dev bond0 down + ip netns exec ${NETNS} ip link set dev bond0 up ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" cleanup } @@ -84,9 +84,9 @@ test_vlan0_del_crash_03() { ip netns exec ${NETNS} ip link add bond0 type bond mode 0 ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off - ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ip link set dev bond0 up ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on - ip netns exec ${NETNS} ifconfig bond0 down + ip netns exec ${NETNS} ip link set dev bond0 down ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" cleanup } From d45cf1e7d7180256e17c9ce88e32e8061a7887fe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Jul 2025 13:17:38 +0000 Subject: [PATCH 1399/2411] ipv6: reject malicious packets in ipv6_gso_segment() syzbot was able to craft a packet with very long IPv6 extension headers leading to an overflow of skb->transport_header. This 16bit field has a limited range. Add skb_reset_transport_header_careful() helper and use it from ipv6_gso_segment() WARNING: CPU: 0 PID: 5871 at ./include/linux/skbuff.h:3032 skb_reset_transport_header include/linux/skbuff.h:3032 [inline] WARNING: CPU: 0 PID: 5871 at ./include/linux/skbuff.h:3032 ipv6_gso_segment+0x15e2/0x21e0 net/ipv6/ip6_offload.c:151 Modules linked in: CPU: 0 UID: 0 PID: 5871 Comm: syz-executor211 Not tainted 6.16.0-rc6-syzkaller-g7abc678e3084 #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 RIP: 0010:skb_reset_transport_header include/linux/skbuff.h:3032 [inline] RIP: 0010:ipv6_gso_segment+0x15e2/0x21e0 net/ipv6/ip6_offload.c:151 Call Trace: skb_mac_gso_segment+0x31c/0x640 net/core/gso.c:53 nsh_gso_segment+0x54a/0xe10 net/nsh/nsh.c:110 skb_mac_gso_segment+0x31c/0x640 net/core/gso.c:53 __skb_gso_segment+0x342/0x510 net/core/gso.c:124 skb_gso_segment include/net/gso.h:83 [inline] validate_xmit_skb+0x857/0x11b0 net/core/dev.c:3950 validate_xmit_skb_list+0x84/0x120 net/core/dev.c:4000 sch_direct_xmit+0xd3/0x4b0 net/sched/sch_generic.c:329 __dev_xmit_skb net/core/dev.c:4102 [inline] __dev_queue_xmit+0x17b6/0x3a70 net/core/dev.c:4679 Fixes: d1da932ed4ec ("ipv6: Separate ipv6 offload support") Reported-by: syzbot+af43e647fd835acc02df@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/688a1a05.050a0220.5d226.0008.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Reviewed-by: Dawid Osuchowski Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250730131738.3385939-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 23 +++++++++++++++++++++++ net/ipv6/ip6_offload.c | 4 +++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b8b06e71b73e..14b923ddb6df 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3033,6 +3033,29 @@ static inline void skb_reset_transport_header(struct sk_buff *skb) skb->transport_header = offset; } +/** + * skb_reset_transport_header_careful - conditionally reset transport header + * @skb: buffer to alter + * + * Hardened version of skb_reset_transport_header(). + * + * Returns: true if the operation was a success. + */ +static inline bool __must_check +skb_reset_transport_header_careful(struct sk_buff *skb) +{ + long offset = skb->data - skb->head; + + if (unlikely(offset != (typeof(skb->transport_header))offset)) + return false; + + if (unlikely(offset == (typeof(skb->transport_header))~0U)) + return false; + + skb->transport_header = offset; + return true; +} + static inline void skb_set_transport_header(struct sk_buff *skb, const int offset) { diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 9822163428b0..fce91183797a 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -148,7 +148,9 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, ops = rcu_dereference(inet6_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) { - skb_reset_transport_header(skb); + if (!skb_reset_transport_header_careful(skb)) + goto out; + segs = ops->callbacks.gso_segment(skb, features); if (!segs) skb->network_header = skb_mac_header(skb) + nhoff - skb->head; From a81649a4efd382497bf3d34a623360263adc6993 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 30 Jul 2025 13:25:33 -0700 Subject: [PATCH 1400/2411] net: mdio: mdio-bcm-unimac: Correct rate fallback logic When the parent clock is a gated clock which has multiple parents, the clock provider (clk-scmi typically) might return a rate of 0 since there is not one of those particular parent clocks that should be chosen for returning a rate. Prior to ee975351cf0c ("net: mdio: mdio-bcm-unimac: Manage clock around I/O accesses"), we would not always be passing a clock reference depending upon how mdio-bcm-unimac was instantiated. In that case, we would take the fallback path where the rate is hard coded to 250MHz. Make sure that we still fallback to using a fixed rate for the divider calculation, otherwise we simply ignore the desired MDIO bus clock frequency which can prevent us from interfacing with Ethernet PHYs properly. Fixes: ee975351cf0c ("net: mdio: mdio-bcm-unimac: Manage clock around I/O accesses") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250730202533.3463529-1-florian.fainelli@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/mdio/mdio-bcm-unimac.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/mdio/mdio-bcm-unimac.c b/drivers/net/mdio/mdio-bcm-unimac.c index b6e30bdf5325..7baab230008a 100644 --- a/drivers/net/mdio/mdio-bcm-unimac.c +++ b/drivers/net/mdio/mdio-bcm-unimac.c @@ -209,10 +209,9 @@ static int unimac_mdio_clk_set(struct unimac_mdio_priv *priv) if (ret) return ret; - if (!priv->clk) + rate = clk_get_rate(priv->clk); + if (!rate) rate = 250000000; - else - rate = clk_get_rate(priv->clk); div = (rate / (2 * priv->clk_freq)) - 1; if (div & ~MDIO_CLK_DIV_MASK) { From d8d2d9d12f141302aaec3ff9a3a8cbed4ac0546c Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Fri, 1 Aug 2025 11:49:44 +0200 Subject: [PATCH 1401/2411] selftests/bpf: Test for unaligned flow_dissector ctx access This patch adds tests for two context fields where unaligned accesses were not properly rejected. Note the new macro is similar to the existing narrow_load macro, but we need a different description and access offset. Combining the two macros into one is probably doable but I don't think it would help readability. vmlinux.h is included in place of bpf.h so we have the definition of struct bpf_nf_ctx. Signed-off-by: Paul Chaignon Tested-by: Eduard Zingerman Acked-by: Yonghong Song Link: https://lore.kernel.org/r/bf014046ddcf41677fb8b98d150c14027e9fddba.1754039605.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_ctx.c | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx.c b/tools/testing/selftests/bpf/progs/verifier_ctx.c index 0450840c92d9..424463094760 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ctx.c +++ b/tools/testing/selftests/bpf/progs/verifier_ctx.c @@ -1,10 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 /* Converted from tools/testing/selftests/bpf/verifier/ctx.c */ -#include +#include "vmlinux.h" #include #include "bpf_misc.h" +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) + SEC("tc") __description("context stores via BPF_ATOMIC") __failure __msg("BPF_ATOMIC stores into R1 ctx is not allowed") @@ -243,4 +245,23 @@ narrow_load("sockops", bpf_sock_ops, skb_data); narrow_load("sockops", bpf_sock_ops, skb_data_end); narrow_load("sockops", bpf_sock_ops, skb_hwtstamp); +#define unaligned_access(type, ctx, field) \ + SEC(type) \ + __description("unaligned access on field " #field " of " #ctx) \ + __failure __msg("invalid bpf_context access") \ + __naked void unaligned_ctx_access_##ctx##field(void) \ + { \ + asm volatile (" \ + r1 = *(u%[size] *)(r1 + %[off]); \ + r0 = 0; \ + exit;" \ + : \ + : __imm_const(size, sizeof_field(struct ctx, field) * 8), \ + __imm_const(off, offsetof(struct ctx, field) + 1) \ + : __clobber_all); \ + } + +unaligned_access("flow_dissector", __sk_buff, data); +unaligned_access("netfilter", bpf_nf_ctx, skb); + char _license[] SEC("license") = "GPL"; From d46e51f1c78b9ab9323610feb14238d06d46d519 Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Wed, 30 Jul 2025 18:14:58 +0800 Subject: [PATCH 1402/2411] net: drop UFO packets in udp_rcv_segment() When sending a packet with virtio_net_hdr to tun device, if the gso_type in virtio_net_hdr is SKB_GSO_UDP and the gso_size is less than udphdr size, below crash may happen. ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:4572! Oops: invalid opcode: 0000 [#1] SMP NOPTI CPU: 0 UID: 0 PID: 62 Comm: mytest Not tainted 6.16.0-rc7 #203 PREEMPT(voluntary) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:skb_pull_rcsum+0x8e/0xa0 Code: 00 00 5b c3 cc cc cc cc 8b 93 88 00 00 00 f7 da e8 37 44 38 00 f7 d8 89 83 88 00 00 00 48 8b 83 c8 00 00 00 5b c3 cc cc cc cc <0f> 0b 0f 0b 66 66 2e 0f 1f 84 00 000 RSP: 0018:ffffc900001fba38 EFLAGS: 00000297 RAX: 0000000000000004 RBX: ffff8880040c1000 RCX: ffffc900001fb948 RDX: ffff888003e6d700 RSI: 0000000000000008 RDI: ffff88800411a062 RBP: ffff8880040c1000 R08: 0000000000000000 R09: 0000000000000001 R10: ffff888003606c00 R11: 0000000000000001 R12: 0000000000000000 R13: ffff888004060900 R14: ffff888004050000 R15: ffff888004060900 FS: 000000002406d3c0(0000) GS:ffff888084a19000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000040 CR3: 0000000004007000 CR4: 00000000000006f0 Call Trace: udp_queue_rcv_one_skb+0x176/0x4b0 net/ipv4/udp.c:2445 udp_queue_rcv_skb+0x155/0x1f0 net/ipv4/udp.c:2475 udp_unicast_rcv_skb+0x71/0x90 net/ipv4/udp.c:2626 __udp4_lib_rcv+0x433/0xb00 net/ipv4/udp.c:2690 ip_protocol_deliver_rcu+0xa6/0x160 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x72/0x90 net/ipv4/ip_input.c:233 ip_sublist_rcv_finish+0x5f/0x70 net/ipv4/ip_input.c:579 ip_sublist_rcv+0x122/0x1b0 net/ipv4/ip_input.c:636 ip_list_rcv+0xf7/0x130 net/ipv4/ip_input.c:670 __netif_receive_skb_list_core+0x21d/0x240 net/core/dev.c:6067 netif_receive_skb_list_internal+0x186/0x2b0 net/core/dev.c:6210 napi_complete_done+0x78/0x180 net/core/dev.c:6580 tun_get_user+0xa63/0x1120 drivers/net/tun.c:1909 tun_chr_write_iter+0x65/0xb0 drivers/net/tun.c:1984 vfs_write+0x300/0x420 fs/read_write.c:593 ksys_write+0x60/0xd0 fs/read_write.c:686 do_syscall_64+0x50/0x1c0 arch/x86/entry/syscall_64.c:63 To trigger gso segment in udp_queue_rcv_skb(), we should also set option UDP_ENCAP_ESPINUDP to enable udp_sk(sk)->encap_rcv. When the encap_rcv hook return 1 in udp_queue_rcv_one_skb(), udp_csum_pull_header() will try to pull udphdr, but the skb size has been segmented to gso size, which leads to this crash. Previous commit cf329aa42b66 ("udp: cope with UDP GRO packet misdirection") introduces segmentation in UDP receive path only for GRO, which was never intended to be used for UFO, so drop UFO packets in udp_rcv_segment(). Link: https://lore.kernel.org/netdev/20250724083005.3918375-1-wangliang74@huawei.com/ Link: https://lore.kernel.org/netdev/20250729123907.3318425-1-wangliang74@huawei.com/ Fixes: cf329aa42b66 ("udp: cope with UDP GRO packet misdirection") Suggested-by: Willem de Bruijn Signed-off-by: Wang Liang Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250730101458.3470788-1-wangliang74@huawei.com Signed-off-by: Jakub Kicinski --- include/net/udp.h | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index f8ae2c4ade14..e2af3bda90c9 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -586,6 +586,16 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, { netdev_features_t features = NETIF_F_SG; struct sk_buff *segs; + int drop_count; + + /* + * Segmentation in UDP receive path is only for UDP GRO, drop udp + * fragmentation offload (UFO) packets. + */ + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) { + drop_count = 1; + goto drop; + } /* Avoid csum recalculation by skb_segment unless userspace explicitly * asks for the final checksum values @@ -609,16 +619,18 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, */ segs = __skb_gso_segment(skb, features, false); if (IS_ERR_OR_NULL(segs)) { - int segs_nr = skb_shinfo(skb)->gso_segs; - - atomic_add(segs_nr, &sk->sk_drops); - SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, segs_nr); - kfree_skb(skb); - return NULL; + drop_count = skb_shinfo(skb)->gso_segs; + goto drop; } consume_skb(skb); return segs; + +drop: + atomic_add(drop_count, &sk->sk_drops); + SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, drop_count); + kfree_skb(skb); + return NULL; } static inline void udp_post_segment_fix_csum(struct sk_buff *skb) From ae8508b25def57982493c48694ef135973bfabe0 Mon Sep 17 00:00:00 2001 From: Takamitsu Iwai Date: Tue, 29 Jul 2025 02:31:49 +0900 Subject: [PATCH 1403/2411] net/sched: taprio: enforce minimum value for picos_per_byte Syzbot reported a WARNING in taprio_get_start_time(). When link speed is 470,589 or greater, q->picos_per_byte becomes too small, causing length_to_duration(q, ETH_ZLEN) to return zero. This zero value leads to validation failures in fill_sched_entry() and parse_taprio_schedule(), allowing arbitrary values to be assigned to entry->interval and cycle_time. As a result, sched->cycle can become zero. Since SPEED_800000 is the largest defined speed in include/uapi/linux/ethtool.h, this issue can occur in realistic scenarios. To ensure length_to_duration() returns a non-zero value for minimum-sized Ethernet frames (ETH_ZLEN = 60), picos_per_byte must be at least 17 (60 * 17 > PSEC_PER_NSEC which is 1000). This patch enforces a minimum value of 17 for picos_per_byte when the calculated value would be lower, and adds a warning message to inform users that scheduling accuracy may be affected at very high link speeds. Fixes: fb66df20a720 ("net/sched: taprio: extend minimum interval restriction to entire cycle too") Reported-by: syzbot+398e1ee4ca2cac05fddb@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=398e1ee4ca2cac05fddb Signed-off-by: Takamitsu Iwai Link: https://patch.msgid.link/20250728173149.45585-1-takamitz@amazon.co.jp Signed-off-by: Jakub Kicinski --- net/sched/sch_taprio.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index e759e43ad27e..39b735386996 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -43,6 +43,11 @@ static struct static_key_false taprio_have_working_mqprio; #define TAPRIO_SUPPORTED_FLAGS \ (TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD) #define TAPRIO_FLAGS_INVALID U32_MAX +/* Minimum value for picos_per_byte to ensure non-zero duration + * for minimum-sized Ethernet frames (ETH_ZLEN = 60). + * 60 * 17 > PSEC_PER_NSEC (1000) + */ +#define TAPRIO_PICOS_PER_BYTE_MIN 17 struct sched_entry { /* Durations between this GCL entry and the GCL entry where the @@ -1284,7 +1289,8 @@ static void taprio_start_sched(struct Qdisc *sch, } static void taprio_set_picos_per_byte(struct net_device *dev, - struct taprio_sched *q) + struct taprio_sched *q, + struct netlink_ext_ack *extack) { struct ethtool_link_ksettings ecmd; int speed = SPEED_10; @@ -1300,6 +1306,15 @@ static void taprio_set_picos_per_byte(struct net_device *dev, skip: picos_per_byte = (USEC_PER_SEC * 8) / speed; + if (picos_per_byte < TAPRIO_PICOS_PER_BYTE_MIN) { + if (!extack) + pr_warn("Link speed %d is too high. Schedule may be inaccurate.\n", + speed); + NL_SET_ERR_MSG_FMT_MOD(extack, + "Link speed %d is too high. Schedule may be inaccurate.", + speed); + picos_per_byte = TAPRIO_PICOS_PER_BYTE_MIN; + } atomic64_set(&q->picos_per_byte, picos_per_byte); netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n", @@ -1324,7 +1339,7 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event, if (dev != qdisc_dev(q->root)) continue; - taprio_set_picos_per_byte(dev, q); + taprio_set_picos_per_byte(dev, q, NULL); stab = rtnl_dereference(q->root->stab); @@ -1844,7 +1859,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, q->flags = taprio_flags; /* Needed for length_to_duration() during netlink attribute parsing */ - taprio_set_picos_per_byte(dev, q); + taprio_set_picos_per_byte(dev, q, extack); err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags); if (err < 0) From 1dbf1d590d10a6d1978e8184f8dfe20af22d680a Mon Sep 17 00:00:00 2001 From: Sharath Chandra Vurukala Date: Wed, 30 Jul 2025 16:21:18 +0530 Subject: [PATCH 1404/2411] net: Add locking to protect skb->dev access in ip_output In ip_output() skb->dev is updated from the skb_dst(skb)->dev this can become invalid when the interface is unregistered and freed, Introduced new skb_dst_dev_rcu() function to be used instead of skb_dst_dev() within rcu_locks in ip_output.This will ensure that all the skb's associated with the dev being deregistered will be transnmitted out first, before freeing the dev. Given that ip_output() is called within an rcu_read_lock() critical section or from a bottom-half context, it is safe to introduce an RCU read-side critical section within it. Multiple panic call stacks were observed when UL traffic was run in concurrency with device deregistration from different functions, pasting one sample for reference. [496733.627565][T13385] Call trace: [496733.627570][T13385] bpf_prog_ce7c9180c3b128ea_cgroupskb_egres+0x24c/0x7f0 [496733.627581][T13385] __cgroup_bpf_run_filter_skb+0x128/0x498 [496733.627595][T13385] ip_finish_output+0xa4/0xf4 [496733.627605][T13385] ip_output+0x100/0x1a0 [496733.627613][T13385] ip_send_skb+0x68/0x100 [496733.627618][T13385] udp_send_skb+0x1c4/0x384 [496733.627625][T13385] udp_sendmsg+0x7b0/0x898 [496733.627631][T13385] inet_sendmsg+0x5c/0x7c [496733.627639][T13385] __sys_sendto+0x174/0x1e4 [496733.627647][T13385] __arm64_sys_sendto+0x28/0x3c [496733.627653][T13385] invoke_syscall+0x58/0x11c [496733.627662][T13385] el0_svc_common+0x88/0xf4 [496733.627669][T13385] do_el0_svc+0x2c/0xb0 [496733.627676][T13385] el0_svc+0x2c/0xa4 [496733.627683][T13385] el0t_64_sync_handler+0x68/0xb4 [496733.627689][T13385] el0t_64_sync+0x1a4/0x1a8 Changes in v3: - Replaced WARN_ON() with WARN_ON_ONCE(), as suggested by Willem de Bruijn. - Dropped legacy lines mistakenly pulled in from an outdated branch. Changes in v2: - Addressed review comments from Eric Dumazet - Used READ_ONCE() to prevent potential load/store tearing - Added skb_dst_dev_rcu() and used along with rcu_read_lock() in ip_output Signed-off-by: Sharath Chandra Vurukala Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250730105118.GA26100@hu-sharathv-hyd.qualcomm.com Signed-off-by: Jakub Kicinski --- include/net/dst.h | 12 ++++++++++++ net/ipv4/ip_output.c | 15 ++++++++++----- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 00467c1b5093..bab01363bb97 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -568,11 +568,23 @@ static inline struct net_device *dst_dev(const struct dst_entry *dst) return READ_ONCE(dst->dev); } +static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst) +{ + /* In the future, use rcu_dereference(dst->dev) */ + WARN_ON_ONCE(!rcu_read_lock_held()); + return READ_ONCE(dst->dev); +} + static inline struct net_device *skb_dst_dev(const struct sk_buff *skb) { return dst_dev(skb_dst(skb)); } +static inline struct net_device *skb_dst_dev_rcu(const struct sk_buff *skb) +{ + return dst_dev_rcu(skb_dst(skb)); +} + static inline struct net *skb_dst_dev_net(const struct sk_buff *skb) { return dev_net(skb_dst_dev(skb)); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 10a1d182fd84..84e7f8a2f50f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -425,15 +425,20 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb) int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_device *dev = skb_dst_dev(skb), *indev = skb->dev; + struct net_device *dev, *indev = skb->dev; + int ret_val; + rcu_read_lock(); + dev = skb_dst_dev_rcu(skb); skb->dev = dev; skb->protocol = htons(ETH_P_IP); - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, - net, sk, skb, indev, dev, - ip_finish_output, - !(IPCB(skb)->flags & IPSKB_REROUTED)); + ret_val = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, + net, sk, skb, indev, dev, + ip_finish_output, + !(IPCB(skb)->flags & IPSKB_REROUTED)); + rcu_read_unlock(); + return ret_val; } EXPORT_SYMBOL(ip_output); From 3ca824369b71d4b441e1fdcdee8e66bcb05510a9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Aug 2025 16:56:01 -0400 Subject: [PATCH 1405/2411] tracing: Have unsigned int function args displayed as hexadecimal Most function arguments that are passed in as unsigned int or unsigned long are better displayed as hexadecimal than normal integer. For example, the functions: static void __create_object(unsigned long ptr, size_t size, int min_count, gfp_t gfp, unsigned int objflags); static bool stack_access_ok(struct unwind_state *state, unsigned long _addr, size_t len); void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); Show up in the trace as: __create_object(ptr=-131387050520576, size=4096, min_count=1, gfp=3264, objflags=0) <-kmem_cache_alloc_noprof stack_access_ok(state=0xffffc9000233fc98, _addr=-60473102566256, len=8) <-unwind_next_frame __local_bh_disable_ip(ip=-2127311112, cnt=256) <-handle_softirqs Instead, by displaying unsigned as hexadecimal, they look more like this: __create_object(ptr=0xffff8881028d2080, size=0x280, min_count=1, gfp=0x82820, objflags=0x0) <-kmem_cache_alloc_node_noprof stack_access_ok(state=0xffffc90000003938, _addr=0xffffc90000003930, len=0x8) <-unwind_next_frame __local_bh_disable_ip(ip=0xffffffff8133cef8, cnt=0x100) <-handle_softirqs Which is much easier to understand as most unsigned longs are usually just pointers. Even the "unsigned int cnt" in __local_bh_disable_ip() looks better as hexadecimal as a lot of flags are passed as unsigned. Changes since v2: https://lore.kernel.org/20250801111453.01502861@gandalf.local.home - Use btf_int_encoding() instead of open coding it (Martin KaFai Lau) Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Douglas Raillard Cc: Martin KaFai Lau Link: https://lore.kernel.org/20250801165601.7770d65c@gandalf.local.home Acked-by: Yonghong Song Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_output.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 0b3db02030a7..97db0b0ccf3e 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -701,6 +701,7 @@ void print_function_args(struct trace_seq *s, unsigned long *args, struct btf *btf; s32 tid, nr = 0; int a, p, x; + u16 encode; trace_seq_printf(s, "("); @@ -744,7 +745,12 @@ void print_function_args(struct trace_seq *s, unsigned long *args, trace_seq_printf(s, "0x%lx", arg); break; case BTF_KIND_INT: - trace_seq_printf(s, "%ld", arg); + encode = btf_int_encoding(t); + /* Print unsigned ints as hex */ + if (encode & BTF_INT_SIGNED) + trace_seq_printf(s, "%ld", arg); + else + trace_seq_printf(s, "0x%lx", arg); break; case BTF_KIND_ENUM: trace_seq_printf(s, "%ld", arg); From 83e6384374bac8a9da3411fae7f24376a7dbd2a3 Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Tue, 22 Jul 2025 09:18:18 -0700 Subject: [PATCH 1406/2411] smp: Fix spelling in on_each_cpu_cond_mask()'s doc-comment "boolean" is spelt as "blooean". Fix that. Signed-off-by: Roman Kisel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250722161818.6139-1-romank@linux.microsoft.com --- kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/smp.c b/kernel/smp.c index 4649fa4872ff..56f83aa58ec8 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -1018,7 +1018,7 @@ void __init smp_init(void) * @cond_func: A callback function that is passed a cpu id and * the info parameter. The function is called * with preemption disabled. The function should - * return a blooean value indicating whether to IPI + * return a boolean value indicating whether to IPI * the specified CPU. * @func: The function to run on all applicable CPUs. * This must be fast and non-blocking. From e703b7e247503b8bf87b62c02a4392749b09eca8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jul 2025 21:44:55 +0200 Subject: [PATCH 1407/2411] futex: Move futex cleanup to __mmdrop() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Futex hash allocations are done in mm_init() and the cleanup happens in __mmput(). That works most of the time, but there are mm instances which are instantiated via mm_alloc() and freed via mmdrop(), which causes the futex hash to be leaked. Move the cleanup to __mmdrop(). Fixes: 56180dd20c19 ("futex: Use RCU-based per-CPU reference counting instead of rcuref_t") Reported-by: André Draszik Signed-off-by: Thomas Gleixner Tested-by: André Draszik Link: https://lore.kernel.org/all/87ldo5ihu0.ffs@tglx Closes: https://lore.kernel.org/all/0c8cc83bb73abf080faf584f319008b67d0931db.camel@linaro.org --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/fork.c b/kernel/fork.c index f82b77eef7fe..1b0535ee5ffa 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -686,6 +686,7 @@ void __mmdrop(struct mm_struct *mm) mm_pasid_drop(mm); mm_destroy_cid(mm); percpu_counter_destroy_many(mm->rss_stat, NR_MM_COUNTERS); + futex_hash_free(mm); free_mm(mm); } @@ -1133,7 +1134,6 @@ static inline void __mmput(struct mm_struct *mm) if (mm->binfmt) module_put(mm->binfmt->module); lru_gen_del_mm(mm); - futex_hash_free(mm); mmdrop(mm); } From 0808da36b982442afc4a34555e492a16f2e5973e Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 1 Aug 2025 11:47:10 -0400 Subject: [PATCH 1408/2411] ALSA: usb-audio: Don't use printk_ratelimit for debug prints printk_ratelimit is deprecated, since it shares state with all other printk sites. Additionally, the suppression message is printed at warning level even though the actual messages are printed at debug and are (usually) invisible! This can result in thousands of messages like retire_capture_urb: 4992 callbacks suppressed in the console, and can inhibit debugging since it is unclear what the source of the suppressed callbacks is. Switch to dev_dbg_ratelimited which doesn't print anything unless debug is enabled. Signed-off-by: Sean Anderson Link: https://patch.msgid.link/20250801154710.739464-1-sean.anderson@linux.dev Signed-off-by: Takashi Iwai --- sound/usb/pcm.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index b24ee38fad72..bff92505e408 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -1336,11 +1336,10 @@ static void retire_capture_urb(struct snd_usb_substream *subs, for (i = 0; i < urb->number_of_packets; i++) { cp = (unsigned char *)urb->transfer_buffer + urb->iso_frame_desc[i].offset + subs->pkt_offset_adj; - if (urb->iso_frame_desc[i].status && printk_ratelimit()) { - dev_dbg(&subs->dev->dev, "frame %d active: %d\n", - i, urb->iso_frame_desc[i].status); - // continue; - } + if (urb->iso_frame_desc[i].status) + dev_dbg_ratelimited(&subs->dev->dev, + "frame %d active: %d\n", i, + urb->iso_frame_desc[i].status); bytes = urb->iso_frame_desc[i].actual_length; if (subs->stream_offset_adj > 0) { unsigned int adj = min(subs->stream_offset_adj, bytes); From 1b30d44417278196a90c79244bb43e8428586345 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 1 Aug 2025 16:23:30 -0700 Subject: [PATCH 1409/2411] bpf: Fix memory leak of bpf_scc_info objects env->scc_info array contains references to bpf_scc_info objects allocated lazily in verifier.c:scc_visit_alloc(). env->scc_cnt was supposed to track env->scc_info array size in order to free referenced objects in verifier.c:free_states(). Fix initialization of env->scc_cnt that was omitted in verifier.c:compute_scc(). To reproduce the bug: - build with CONFIG_DEBUG_KMEMLEAK - boot and load bpf program with loops, e.g.: ./veristat -q pyperf180.bpf.o - initiate memleak scan and check results: echo scan > /sys/kernel/debug/kmemleak cat /sys/kernel/debug/kmemleak Fixes: c9e31900b54c ("bpf: propagate read/precision marks over state graph backedges") Reported-by: Jens Axboe Closes: https://lore.kernel.org/bpf/CAADnVQKXUWg9uRCPD5ebRXwN4dmBCRUFFM7kN=GxymYz3zU25A@mail.gmail.com/T/ Suggested-by: Alexei Starovoitov Tested-by: Jens Axboe Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250801232330.1800436-1-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0806295945e4..c4f69a9e9af6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -23114,6 +23114,8 @@ static void free_states(struct bpf_verifier_env *env) for (i = 0; i < env->scc_cnt; ++i) { info = env->scc_info[i]; + if (!info) + continue; for (j = 0; j < info->num_visits; j++) free_backedges(&info->visits[j]); kvfree(info); @@ -24554,6 +24556,7 @@ static int compute_scc(struct bpf_verifier_env *env) err = -ENOMEM; goto exit; } + env->scc_cnt = next_scc_id; exit: kvfree(stack); kvfree(pre); From be71ce9796c36517c677ab1d3c6691423dd0bdec Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 8 Jul 2025 10:51:23 +0200 Subject: [PATCH 1410/2411] drm/bridge: fix OF node leak Make sure to drop the OF node reference taken when creating the aux bridge device when the device is later released. Fixes: 6914968a0b52 ("drm/bridge: properly refcount DT nodes in aux bridge drivers") Cc: Dmitry Baryshkov Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250708085124.15445-2-johan@kernel.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/bridge/aux-bridge.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/bridge/aux-bridge.c b/drivers/gpu/drm/bridge/aux-bridge.c index b63304d3a80f..b3e4cdff61d6 100644 --- a/drivers/gpu/drm/bridge/aux-bridge.c +++ b/drivers/gpu/drm/bridge/aux-bridge.c @@ -18,6 +18,7 @@ static void drm_aux_bridge_release(struct device *dev) { struct auxiliary_device *adev = to_auxiliary_dev(dev); + of_node_put(dev->of_node); ida_free(&drm_aux_bridge_ida, adev->id); kfree(adev); @@ -65,6 +66,7 @@ int drm_aux_bridge_register(struct device *parent) ret = auxiliary_device_init(adev); if (ret) { + of_node_put(adev->dev.of_node); ida_free(&drm_aux_bridge_ida, adev->id); kfree(adev); return ret; From 5c241ed8d031693dadf33dd98ed2e7cc363e9b66 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 28 Jul 2025 15:52:59 +0800 Subject: [PATCH 1411/2411] mm/shmem, swap: improve cached mTHP handling and fix potential hang The current swap-in code assumes that, when a swap entry in shmem mapping is order 0, its cached folios (if present) must be order 0 too, which turns out not always correct. The problem is shmem_split_large_entry is called before verifying the folio will eventually be swapped in, one possible race is: CPU1 CPU2 shmem_swapin_folio /* swap in of order > 0 swap entry S1 */ folio = swap_cache_get_folio /* folio = NULL */ order = xa_get_order /* order > 0 */ folio = shmem_swap_alloc_folio /* mTHP alloc failure, folio = NULL */ <... Interrupted ...> shmem_swapin_folio /* S1 is swapped in */ shmem_writeout /* S1 is swapped out, folio cached */ shmem_split_large_entry(..., S1) /* S1 is split, but the folio covering it has order > 0 now */ Now any following swapin of S1 will hang: `xa_get_order` returns 0, and folio lookup will return a folio with order > 0. The `xa_get_order(&mapping->i_pages, index) != folio_order(folio)` will always return false causing swap-in to return -EEXIST. And this looks fragile. So fix this up by allowing seeing a larger folio in swap cache, and check the whole shmem mapping range covered by the swapin have the right swap value upon inserting the folio. And drop the redundant tree walks before the insertion. This will actually improve performance, as it avoids two redundant Xarray tree walks in the hot path, and the only side effect is that in the failure path, shmem may redundantly reallocate a few folios causing temporary slight memory pressure. And worth noting, it may seems the order and value check before inserting might help reducing the lock contention, which is not true. The swap cache layer ensures raced swapin will either see a swap cache folio or failed to do a swapin (we have SWAP_HAS_CACHE bit even if swap cache is bypassed), so holding the folio lock and checking the folio flag is already good enough for avoiding the lock contention. The chance that a folio passes the swap entry value check but the shmem mapping slot has changed should be very low. Link: https://lkml.kernel.org/r/20250728075306.12704-1-ryncsn@gmail.com Link: https://lkml.kernel.org/r/20250728075306.12704-2-ryncsn@gmail.com Fixes: 809bc86517cc ("mm: shmem: support large folio swap out") Signed-off-by: Kairui Song Reviewed-by: Kemeng Shi Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: Hugh Dickins Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Cc: Dev Jain Cc: Signed-off-by: Andrew Morton --- mm/shmem.c | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 7570a24e0ae4..1d0fd266c29b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -891,7 +891,9 @@ static int shmem_add_to_page_cache(struct folio *folio, pgoff_t index, void *expected, gfp_t gfp) { XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio)); - long nr = folio_nr_pages(folio); + unsigned long nr = folio_nr_pages(folio); + swp_entry_t iter, swap; + void *entry; VM_BUG_ON_FOLIO(index != round_down(index, nr), folio); VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); @@ -903,14 +905,25 @@ static int shmem_add_to_page_cache(struct folio *folio, gfp &= GFP_RECLAIM_MASK; folio_throttle_swaprate(folio, gfp); + swap = radix_to_swp_entry(expected); do { + iter = swap; xas_lock_irq(&xas); - if (expected != xas_find_conflict(&xas)) { - xas_set_err(&xas, -EEXIST); - goto unlock; + xas_for_each_conflict(&xas, entry) { + /* + * The range must either be empty, or filled with + * expected swap entries. Shmem swap entries are never + * partially freed without split of both entry and + * folio, so there shouldn't be any holes. + */ + if (!expected || entry != swp_to_radix_entry(iter)) { + xas_set_err(&xas, -EEXIST); + goto unlock; + } + iter.val += 1 << xas_get_order(&xas); } - if (expected && xas_find_conflict(&xas)) { + if (expected && iter.val - nr != swap.val) { xas_set_err(&xas, -EEXIST); goto unlock; } @@ -2359,7 +2372,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, error = -ENOMEM; goto failed; } - } else if (order != folio_order(folio)) { + } else if (order > folio_order(folio)) { /* * Swap readahead may swap in order 0 folios into swapcache * asynchronously, while the shmem mapping can still stores @@ -2384,15 +2397,23 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); } + } else if (order < folio_order(folio)) { + swap.val = round_down(swap.val, 1 << folio_order(folio)); + index = round_down(index, 1 << folio_order(folio)); } alloced: - /* We have to do this with folio locked to prevent races */ + /* + * We have to do this with the folio locked to prevent races. + * The shmem_confirm_swap below only checks if the first swap + * entry matches the folio, that's enough to ensure the folio + * is not used outside of shmem, as shmem swap entries + * and swap cache folios are never partially freed. + */ folio_lock(folio); if ((!skip_swapcache && !folio_test_swapcache(folio)) || - folio->swap.val != swap.val || !shmem_confirm_swap(mapping, index, swap) || - xa_get_order(&mapping->i_pages, index) != folio_order(folio)) { + folio->swap.val != swap.val) { error = -EEXIST; goto unlock; } From fefbeed8c6f62dc10f80a6b1787e75de2c64ad0d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 14 Jul 2025 17:20:02 -0700 Subject: [PATCH 1412/2411] init/Kconfig: restore CONFIG_BROKEN help text Linus added it in 2003, it later was removed. Put it back. Cc: Anshuman Khandual Cc: Borislav Betkov Cc: David S. Miller Cc: Ingo Molnar Cc: Thomas Gleinxer Cc: Christophe Leroy Signed-off-by: Andrew Morton --- init/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/init/Kconfig b/init/Kconfig index 666783eb50ab..c66a33865f1f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -169,6 +169,10 @@ menu "General setup" config BROKEN bool + help + This option allows you to choose whether you want to try to + compile (and fix) old drivers that haven't been updated to + new infrastructure. config BROKEN_ON_SMP bool From 6c6d8f8ba7789c221a2e4c43a0ed982c7a41f428 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 16 Jul 2025 14:32:45 +0100 Subject: [PATCH 1413/2411] lib/xxhash: remove unused functions xxh32_digest() and xxh32_update() were added in 2017 in the original xxhash commit, but have remained unused. Remove them. Link: https://lkml.kernel.org/r/20250716133245.243363-1-linux@treblig.org Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Christoph Hellwig Cc: Dave Gilbert Cc: Nick Terrell Signed-off-by: Andrew Morton --- include/linux/xxhash.h | 26 ---------- lib/xxhash.c | 107 ----------------------------------------- 2 files changed, 133 deletions(-) diff --git a/include/linux/xxhash.h b/include/linux/xxhash.h index df42511438d0..27f57eca8cb1 100644 --- a/include/linux/xxhash.h +++ b/include/linux/xxhash.h @@ -177,32 +177,6 @@ struct xxh64_state { */ void xxh32_reset(struct xxh32_state *state, uint32_t seed); -/** - * xxh32_update() - hash the data given and update the xxh32 state - * - * @state: The xxh32 state to update. - * @input: The data to hash. - * @length: The length of the data to hash. - * - * After calling xxh32_reset() call xxh32_update() as many times as necessary. - * - * Return: Zero on success, otherwise an error code. - */ -int xxh32_update(struct xxh32_state *state, const void *input, size_t length); - -/** - * xxh32_digest() - produce the current xxh32 hash - * - * @state: Produce the current xxh32 hash of this state. - * - * A hash value can be produced at any time. It is still possible to continue - * inserting input into the hash state after a call to xxh32_digest(), and - * generate new hashes later on, by calling xxh32_digest() again. - * - * Return: The xxh32 hash stored in the state. - */ -uint32_t xxh32_digest(const struct xxh32_state *state); - /** * xxh64_reset() - reset the xxh64 state to start a new hashing operation * diff --git a/lib/xxhash.c b/lib/xxhash.c index b5bd567aa6b3..cf629766f376 100644 --- a/lib/xxhash.c +++ b/lib/xxhash.c @@ -267,113 +267,6 @@ void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed) } EXPORT_SYMBOL(xxh64_reset); -int xxh32_update(struct xxh32_state *state, const void *input, const size_t len) -{ - const uint8_t *p = (const uint8_t *)input; - const uint8_t *const b_end = p + len; - - if (input == NULL) - return -EINVAL; - - state->total_len_32 += (uint32_t)len; - state->large_len |= (len >= 16) | (state->total_len_32 >= 16); - - if (state->memsize + len < 16) { /* fill in tmp buffer */ - memcpy((uint8_t *)(state->mem32) + state->memsize, input, len); - state->memsize += (uint32_t)len; - return 0; - } - - if (state->memsize) { /* some data left from previous update */ - const uint32_t *p32 = state->mem32; - - memcpy((uint8_t *)(state->mem32) + state->memsize, input, - 16 - state->memsize); - - state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32)); - p32++; - state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32)); - p32++; - state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32)); - p32++; - state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32)); - p32++; - - p += 16-state->memsize; - state->memsize = 0; - } - - if (p <= b_end - 16) { - const uint8_t *const limit = b_end - 16; - uint32_t v1 = state->v1; - uint32_t v2 = state->v2; - uint32_t v3 = state->v3; - uint32_t v4 = state->v4; - - do { - v1 = xxh32_round(v1, get_unaligned_le32(p)); - p += 4; - v2 = xxh32_round(v2, get_unaligned_le32(p)); - p += 4; - v3 = xxh32_round(v3, get_unaligned_le32(p)); - p += 4; - v4 = xxh32_round(v4, get_unaligned_le32(p)); - p += 4; - } while (p <= limit); - - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } - - if (p < b_end) { - memcpy(state->mem32, p, (size_t)(b_end-p)); - state->memsize = (uint32_t)(b_end-p); - } - - return 0; -} -EXPORT_SYMBOL(xxh32_update); - -uint32_t xxh32_digest(const struct xxh32_state *state) -{ - const uint8_t *p = (const uint8_t *)state->mem32; - const uint8_t *const b_end = (const uint8_t *)(state->mem32) + - state->memsize; - uint32_t h32; - - if (state->large_len) { - h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) + - xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18); - } else { - h32 = state->v3 /* == seed */ + PRIME32_5; - } - - h32 += state->total_len_32; - - while (p + 4 <= b_end) { - h32 += get_unaligned_le32(p) * PRIME32_3; - h32 = xxh_rotl32(h32, 17) * PRIME32_4; - p += 4; - } - - while (p < b_end) { - h32 += (*p) * PRIME32_5; - h32 = xxh_rotl32(h32, 11) * PRIME32_1; - p++; - } - - h32 ^= h32 >> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; -} -EXPORT_SYMBOL(xxh32_digest); - int xxh64_update(struct xxh64_state *state, const void *input, const size_t len) { const uint8_t *p = (const uint8_t *)input; From ed4f142f72a9191b8236778093074c277435bf8a Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 18 Jul 2025 16:39:28 +0100 Subject: [PATCH 1414/2411] stackdepot: make max number of pools boot-time configurable We're hitting the WARN in depot_init_pool() about reaching the stack depot limit because we have long stacks that don't dedup very well. Introduce a new start-up parameter to allow users to set the number of maximum stack depot pools. Link: https://lkml.kernel.org/r/20250718153928.94229-1-matt@readmodwrite.com Signed-off-by: Matt Fleming Acked-by: Vlastimil Babka Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitriy Vyukov Cc: Oscar Salvador Signed-off-by: Andrew Morton --- .../admin-guide/kernel-parameters.txt | 5 ++ lib/stackdepot.c | 67 ++++++++++++++++--- 2 files changed, 63 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3d1e55ed4382..1673e803c47f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -7029,6 +7029,11 @@ consumed by the stack hash table. By default this is set to false. + stack_depot_max_pools= [KNL,EARLY] + Specify the maximum number of pools to use for storing + stack traces. Pools are allocated on-demand up to this + limit. Default value is 8191 pools. + stacktrace [FTRACE] Enabled the stack tracer on boot up. diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 73d7b50924ef..de0b0025af2b 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -36,11 +36,11 @@ #include #include -#define DEPOT_POOLS_CAP 8192 -/* The pool_index is offset by 1 so the first record does not have a 0 handle. */ -#define DEPOT_MAX_POOLS \ - (((1LL << (DEPOT_POOL_INDEX_BITS)) - 1 < DEPOT_POOLS_CAP) ? \ - (1LL << (DEPOT_POOL_INDEX_BITS)) - 1 : DEPOT_POOLS_CAP) +/* + * The pool_index is offset by 1 so the first record does not have a 0 handle. + */ +static unsigned int stack_max_pools __read_mostly = + MIN((1LL << DEPOT_POOL_INDEX_BITS) - 1, 8192); static bool stack_depot_disabled; static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); @@ -62,7 +62,7 @@ static unsigned int stack_bucket_number_order; static unsigned int stack_hash_mask; /* Array of memory regions that store stack records. */ -static void *stack_pools[DEPOT_MAX_POOLS]; +static void **stack_pools; /* Newly allocated pool that is not yet added to stack_pools. */ static void *new_pool; /* Number of pools in stack_pools. */ @@ -101,6 +101,34 @@ static int __init disable_stack_depot(char *str) } early_param("stack_depot_disable", disable_stack_depot); +static int __init parse_max_pools(char *str) +{ + const long long limit = (1LL << (DEPOT_POOL_INDEX_BITS)) - 1; + unsigned int max_pools; + int rv; + + rv = kstrtouint(str, 0, &max_pools); + if (rv) + return rv; + + if (max_pools < 1024) { + pr_err("stack_depot_max_pools below 1024, using default of %u\n", + stack_max_pools); + goto out; + } + + if (max_pools > limit) { + pr_err("stack_depot_max_pools exceeds %lld, using default of %u\n", + limit, stack_max_pools); + goto out; + } + + stack_max_pools = max_pools; +out: + return 0; +} +early_param("stack_depot_max_pools", parse_max_pools); + void __init stack_depot_request_early_init(void) { /* Too late to request early init now. */ @@ -182,6 +210,17 @@ int __init stack_depot_early_init(void) } init_stack_table(entries); + pr_info("allocating space for %u stack pools via memblock\n", + stack_max_pools); + stack_pools = + memblock_alloc(stack_max_pools * sizeof(void *), PAGE_SIZE); + if (!stack_pools) { + pr_err("stack pools allocation failed, disabling\n"); + memblock_free(stack_table, entries * sizeof(struct list_head)); + stack_depot_disabled = true; + return -ENOMEM; + } + return 0; } @@ -231,6 +270,16 @@ int stack_depot_init(void) stack_hash_mask = entries - 1; init_stack_table(entries); + pr_info("allocating space for %u stack pools via kvcalloc\n", + stack_max_pools); + stack_pools = kvcalloc(stack_max_pools, sizeof(void *), GFP_KERNEL); + if (!stack_pools) { + pr_err("stack pools allocation failed, disabling\n"); + kvfree(stack_table); + stack_depot_disabled = true; + ret = -ENOMEM; + } + out_unlock: mutex_unlock(&stack_depot_init_mutex); @@ -245,9 +294,9 @@ static bool depot_init_pool(void **prealloc) { lockdep_assert_held(&pool_lock); - if (unlikely(pools_num >= DEPOT_MAX_POOLS)) { + if (unlikely(pools_num >= stack_max_pools)) { /* Bail out if we reached the pool limit. */ - WARN_ON_ONCE(pools_num > DEPOT_MAX_POOLS); /* should never happen */ + WARN_ON_ONCE(pools_num > stack_max_pools); /* should never happen */ WARN_ON_ONCE(!new_pool); /* to avoid unnecessary pre-allocation */ WARN_ONCE(1, "Stack depot reached limit capacity"); return false; @@ -273,7 +322,7 @@ static bool depot_init_pool(void **prealloc) * NULL; do not reset to NULL if we have reached the maximum number of * pools. */ - if (pools_num < DEPOT_MAX_POOLS) + if (pools_num < stack_max_pools) WRITE_ONCE(new_pool, NULL); else WRITE_ONCE(new_pool, STACK_DEPOT_POISON); From 07d24902977e4704fab8472981e73a0ad6dfa1fd Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 10 Jun 2025 08:53:27 +0000 Subject: [PATCH 1415/2411] kexec: enable CMA based contiguous allocation When booting a new kernel with kexec_file, the kernel picks a target location that the kernel should live at, then allocates random pages, checks whether any of those patches magically happens to coincide with a target address range and if so, uses them for that range. For every page allocated this way, it then creates a page list that the relocation code - code that executes while all CPUs are off and we are just about to jump into the new kernel - copies to their final memory location. We can not put them there before, because chances are pretty good that at least some page in the target range is already in use by the currently running Linux environment. Copying is happening from a single CPU at RAM rate, which takes around 4-50 ms per 100 MiB. All of this is inefficient and error prone. To successfully kexec, we need to quiesce all devices of the outgoing kernel so they don't scribble over the new kernel's memory. We have seen cases where that does not happen properly (*cough* GIC *cough*) and hence the new kernel was corrupted. This started a month long journey to root cause failing kexecs to eventually see memory corruption, because the new kernel was corrupted severely enough that it could not emit output to tell us about the fact that it was corrupted. By allocating memory for the next kernel from a memory range that is guaranteed scribbling free, we can boot the next kernel up to a point where it is at least able to detect corruption and maybe even stop it before it becomes severe. This increases the chance for successful kexecs. Since kexec got introduced, Linux has gained the CMA framework which can perform physically contiguous memory mappings, while keeping that memory available for movable memory when it is not needed for contiguous allocations. The default CMA allocator is for DMA allocations. This patch adds logic to the kexec file loader to attempt to place the target payload at a location allocated from CMA. If successful, it uses that memory range directly instead of creating copy instructions during the hot phase. To ensure that there is a safety net in case anything goes wrong with the CMA allocation, it also adds a flag for user space to force disable CMA allocations. Using CMA allocations has two advantages: 1) Faster by 4-50 ms per 100 MiB. There is no more need to copy in the hot phase. 2) More robust. Even if by accident some page is still in use for DMA, the new kernel image will be safe from that access because it resides in a memory region that is considered allocated in the old kernel and has a chance to reinitialize that component. Link: https://lkml.kernel.org/r/20250610085327.51817-1-graf@amazon.com Signed-off-by: Alexander Graf Acked-by: Baoquan He Reviewed-by: Pasha Tatashin Cc: Zhongkun He Signed-off-by: Andrew Morton --- arch/riscv/kernel/kexec_elf.c | 1 + include/linux/kexec.h | 10 ++++ include/uapi/linux/kexec.h | 1 + kernel/kexec.c | 2 +- kernel/kexec_core.c | 100 +++++++++++++++++++++++++++++++--- kernel/kexec_file.c | 51 ++++++++++++++++- kernel/kexec_internal.h | 2 +- 7 files changed, 156 insertions(+), 11 deletions(-) diff --git a/arch/riscv/kernel/kexec_elf.c b/arch/riscv/kernel/kexec_elf.c index f4755d49b89e..56444c7bd34e 100644 --- a/arch/riscv/kernel/kexec_elf.c +++ b/arch/riscv/kernel/kexec_elf.c @@ -95,6 +95,7 @@ static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, kbuf.buf_align = PMD_SIZE; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE); + kbuf.cma = NULL; kbuf.top_down = false; ret = arch_kexec_locate_mem_hole(&kbuf); if (!ret) { diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 03f85ad03025..1b10a5d84b68 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -79,6 +79,12 @@ extern note_buf_t __percpu *crash_notes; typedef unsigned long kimage_entry_t; +/* + * This is a copy of the UAPI struct kexec_segment and must be identical + * to it because it gets copied straight from user space into kernel + * memory. Do not modify this structure unless you change the way segments + * get ingested from user space. + */ struct kexec_segment { /* * This pointer can point to user memory if kexec_load() system @@ -172,6 +178,7 @@ int kexec_image_post_load_cleanup_default(struct kimage *image); * @buf_align: Minimum alignment needed. * @buf_min: The buffer can't be placed below this address. * @buf_max: The buffer can't be placed above this address. + * @cma: CMA page if the buffer is backed by CMA. * @top_down: Allocate from top of memory. * @random: Place the buffer at a random position. */ @@ -184,6 +191,7 @@ struct kexec_buf { unsigned long buf_align; unsigned long buf_min; unsigned long buf_max; + struct page *cma; bool top_down; #ifdef CONFIG_CRASH_DUMP bool random; @@ -340,6 +348,7 @@ struct kimage { unsigned long nr_segments; struct kexec_segment segment[KEXEC_SEGMENT_MAX]; + struct page *segment_cma[KEXEC_SEGMENT_MAX]; struct list_head control_pages; struct list_head dest_pages; @@ -361,6 +370,7 @@ struct kimage { */ unsigned int hotplug_support:1; #endif + unsigned int no_cma:1; #ifdef ARCH_HAS_KIMAGE_ARCH struct kimage_arch arch; diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index 5ae1741ea8ea..8958ebfcff94 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -27,6 +27,7 @@ #define KEXEC_FILE_ON_CRASH 0x00000002 #define KEXEC_FILE_NO_INITRAMFS 0x00000004 #define KEXEC_FILE_DEBUG 0x00000008 +#define KEXEC_FILE_NO_CMA 0x00000010 /* These values match the ELF architecture values. * Unless there is a good reason that should continue to be the case. diff --git a/kernel/kexec.c b/kernel/kexec.c index a6b3f96bb50c..28008e3d462e 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -152,7 +152,7 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments, goto out; for (i = 0; i < nr_segments; i++) { - ret = kimage_load_segment(image, &image->segment[i]); + ret = kimage_load_segment(image, i); if (ret) goto out; } diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 3a9a9f240dbc..e390c0df6d55 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -553,6 +554,24 @@ static void kimage_free_entry(kimage_entry_t entry) kimage_free_pages(page); } +static void kimage_free_cma(struct kimage *image) +{ + unsigned long i; + + for (i = 0; i < image->nr_segments; i++) { + struct page *cma = image->segment_cma[i]; + u32 nr_pages = image->segment[i].memsz >> PAGE_SHIFT; + + if (!cma) + continue; + + arch_kexec_pre_free_pages(page_address(cma), nr_pages); + dma_release_from_contiguous(NULL, cma, nr_pages); + image->segment_cma[i] = NULL; + } + +} + void kimage_free(struct kimage *image) { kimage_entry_t *ptr, entry; @@ -591,6 +610,9 @@ void kimage_free(struct kimage *image) /* Free the kexec control pages... */ kimage_free_page_list(&image->control_pages); + /* Free CMA allocations */ + kimage_free_cma(image); + /* * Free up any temporary buffers allocated. This might hit if * error occurred much later after buffer allocation. @@ -716,9 +738,69 @@ static struct page *kimage_alloc_page(struct kimage *image, return page; } -static int kimage_load_normal_segment(struct kimage *image, - struct kexec_segment *segment) +static int kimage_load_cma_segment(struct kimage *image, int idx) { + struct kexec_segment *segment = &image->segment[idx]; + struct page *cma = image->segment_cma[idx]; + char *ptr = page_address(cma); + unsigned long maddr; + size_t ubytes, mbytes; + int result = 0; + unsigned char __user *buf = NULL; + unsigned char *kbuf = NULL; + + if (image->file_mode) + kbuf = segment->kbuf; + else + buf = segment->buf; + ubytes = segment->bufsz; + mbytes = segment->memsz; + maddr = segment->mem; + + /* Then copy from source buffer to the CMA one */ + while (mbytes) { + size_t uchunk, mchunk; + + ptr += maddr & ~PAGE_MASK; + mchunk = min_t(size_t, mbytes, + PAGE_SIZE - (maddr & ~PAGE_MASK)); + uchunk = min(ubytes, mchunk); + + if (uchunk) { + /* For file based kexec, source pages are in kernel memory */ + if (image->file_mode) + memcpy(ptr, kbuf, uchunk); + else + result = copy_from_user(ptr, buf, uchunk); + ubytes -= uchunk; + if (image->file_mode) + kbuf += uchunk; + else + buf += uchunk; + } + + if (result) { + result = -EFAULT; + goto out; + } + + ptr += mchunk; + maddr += mchunk; + mbytes -= mchunk; + + cond_resched(); + } + + /* Clear any remainder */ + memset(ptr, 0, mbytes); + +out: + return result; +} + +static int kimage_load_normal_segment(struct kimage *image, int idx) +{ + struct kexec_segment *segment = &image->segment[idx]; unsigned long maddr; size_t ubytes, mbytes; int result; @@ -733,6 +815,9 @@ static int kimage_load_normal_segment(struct kimage *image, mbytes = segment->memsz; maddr = segment->mem; + if (image->segment_cma[idx]) + return kimage_load_cma_segment(image, idx); + result = kimage_set_destination(image, maddr); if (result < 0) goto out; @@ -787,13 +872,13 @@ static int kimage_load_normal_segment(struct kimage *image, } #ifdef CONFIG_CRASH_DUMP -static int kimage_load_crash_segment(struct kimage *image, - struct kexec_segment *segment) +static int kimage_load_crash_segment(struct kimage *image, int idx) { /* For crash dumps kernels we simply copy the data from * user space to it's destination. * We do things a page at a time for the sake of kmap. */ + struct kexec_segment *segment = &image->segment[idx]; unsigned long maddr; size_t ubytes, mbytes; int result; @@ -858,18 +943,17 @@ static int kimage_load_crash_segment(struct kimage *image, } #endif -int kimage_load_segment(struct kimage *image, - struct kexec_segment *segment) +int kimage_load_segment(struct kimage *image, int idx) { int result = -ENOMEM; switch (image->type) { case KEXEC_TYPE_DEFAULT: - result = kimage_load_normal_segment(image, segment); + result = kimage_load_normal_segment(image, idx); break; #ifdef CONFIG_CRASH_DUMP case KEXEC_TYPE_CRASH: - result = kimage_load_crash_segment(image, segment); + result = kimage_load_crash_segment(image, idx); break; #endif } diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 69fe76fd9233..41271eee0f99 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "kexec_internal.h" #ifdef CONFIG_KEXEC_SIG @@ -253,6 +254,8 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, ret = 0; } + image->no_cma = !!(flags & KEXEC_FILE_NO_CMA); + if (cmdline_len) { image->cmdline_buf = memdup_user(cmdline_ptr, cmdline_len); if (IS_ERR(image->cmdline_buf)) { @@ -434,7 +437,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, i, ksegment->buf, ksegment->bufsz, ksegment->mem, ksegment->memsz); - ret = kimage_load_segment(image, &image->segment[i]); + ret = kimage_load_segment(image, i); if (ret) goto out; } @@ -663,6 +666,43 @@ static int kexec_walk_resources(struct kexec_buf *kbuf, return walk_system_ram_res(0, ULONG_MAX, kbuf, func); } +static int kexec_alloc_contig(struct kexec_buf *kbuf) +{ + size_t nr_pages = kbuf->memsz >> PAGE_SHIFT; + unsigned long mem; + struct page *p; + + /* User space disabled CMA allocations, bail out. */ + if (kbuf->image->no_cma) + return -EPERM; + + /* Skip CMA logic for crash kernel */ + if (kbuf->image->type == KEXEC_TYPE_CRASH) + return -EPERM; + + p = dma_alloc_from_contiguous(NULL, nr_pages, get_order(kbuf->buf_align), true); + if (!p) + return -ENOMEM; + + pr_debug("allocated %zu DMA pages at 0x%lx", nr_pages, page_to_boot_pfn(p)); + + mem = page_to_boot_pfn(p) << PAGE_SHIFT; + + if (kimage_is_destination_range(kbuf->image, mem, mem + kbuf->memsz)) { + /* Our region is already in use by a statically defined one. Bail out. */ + pr_debug("CMA overlaps existing mem: 0x%lx+0x%lx\n", mem, kbuf->memsz); + dma_release_from_contiguous(NULL, p, nr_pages); + return -EBUSY; + } + + kbuf->mem = page_to_boot_pfn(p) << PAGE_SHIFT; + kbuf->cma = p; + + arch_kexec_post_alloc_pages(page_address(p), (int)nr_pages, 0); + + return 0; +} + /** * kexec_locate_mem_hole - find free memory for the purgatory or the next kernel * @kbuf: Parameters for the memory search. @@ -687,6 +727,13 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf) if (ret <= 0) return ret; + /* + * Try to find a free physically contiguous block of memory first. With that, we + * can avoid any copying at kexec time. + */ + if (!kexec_alloc_contig(kbuf)) + return 0; + if (!IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) ret = kexec_walk_resources(kbuf, locate_mem_hole_callback); else @@ -732,6 +779,7 @@ int kexec_add_buffer(struct kexec_buf *kbuf) /* Ensure minimum alignment needed for segments. */ kbuf->memsz = ALIGN(kbuf->memsz, PAGE_SIZE); kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE); + kbuf->cma = NULL; /* Walk the RAM ranges and allocate a suitable range for the buffer */ ret = arch_kexec_locate_mem_hole(kbuf); @@ -744,6 +792,7 @@ int kexec_add_buffer(struct kexec_buf *kbuf) ksegment->bufsz = kbuf->bufsz; ksegment->mem = kbuf->mem; ksegment->memsz = kbuf->memsz; + kbuf->image->segment_cma[kbuf->image->nr_segments] = kbuf->cma; kbuf->image->nr_segments++; return 0; } diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h index 30a733a55a67..228bb88c018b 100644 --- a/kernel/kexec_internal.h +++ b/kernel/kexec_internal.h @@ -10,7 +10,7 @@ struct kimage *do_kimage_alloc_init(void); int sanity_check_segment_list(struct kimage *image); void kimage_free_page_list(struct list_head *list); void kimage_free(struct kimage *image); -int kimage_load_segment(struct kimage *image, struct kexec_segment *segment); +int kimage_load_segment(struct kimage *image, int idx); void kimage_terminate(struct kimage *image); int kimage_is_destination_range(struct kimage *image, unsigned long start, unsigned long end); From f8cd9193b62e92ad25def5370ca8ea2bc7585381 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 21 Jul 2025 19:45:57 +0200 Subject: [PATCH 1416/2411] ucount: fix atomic_long_inc_below() argument type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The type of u argument of atomic_long_inc_below() should be long to avoid unwanted truncation to int. The patch fixes the wrong argument type of an internal function to prevent unwanted argument truncation. It fixes an internal locking primitive; it should not have any direct effect on userspace. Mark said : AFAICT there's no problem in practice because atomic_long_inc_below() : is only used by inc_ucount(), and it looks like the value is : constrained between 0 and INT_MAX. : : In inc_ucount() the limit value is taken from : user_namespace::ucount_max[], and AFAICT that's only written by : sysctls, to the table setup by setup_userns_sysctls(), where : UCOUNT_ENTRY() limits the value between 0 and INT_MAX. : : This is certainly a cleanup, but there might be no functional issue in : practice as above. Link: https://lkml.kernel.org/r/20250721174610.28361-1-ubizjak@gmail.com Fixes: f9c82a4ea89c ("Increase size of ucounts to atomic_long_t") Signed-off-by: Uros Bizjak Reviewed-by: "Eric W. Biederman" Cc: Sebastian Andrzej Siewior Cc: "Paul E. McKenney" Cc: Alexey Gladkov Cc: Roman Gushchin Cc: MengEn Sun Cc: "Thomas Weißschuh" Cc: Mark Rutland Signed-off-by: Andrew Morton --- kernel/ucount.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/ucount.c b/kernel/ucount.c index 8686e329b8f2..f629db485a07 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -199,7 +199,7 @@ void put_ucounts(struct ucounts *ucounts) } } -static inline bool atomic_long_inc_below(atomic_long_t *v, int u) +static inline bool atomic_long_inc_below(atomic_long_t *v, long u) { long c, old; c = atomic_long_read(v); From 58b4fba81a2e400a47ddbe7c1dc0a2bc038313b7 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 21 Jul 2025 19:45:58 +0200 Subject: [PATCH 1417/2411] ucount: use atomic_long_try_cmpxchg() in atomic_long_inc_below() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use atomic_long_try_cmpxchg() instead of atomic_long_cmpxchg (*ptr, old, new) == old in atomic_long_inc_below(). x86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). Also, atomic_long_try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg fails, enabling further code simplifications. No functional change intended. Link: https://lkml.kernel.org/r/20250721174610.28361-2-ubizjak@gmail.com Signed-off-by: Uros Bizjak Reviewed-by: Alexey Gladkov Cc: Sebastian Andrzej Siewior Cc: "Paul E. McKenney" Cc: Alexey Gladkov Cc: Roman Gushchin Cc: MengEn Sun Cc: "Thomas Weißschuh" Signed-off-by: Andrew Morton --- kernel/ucount.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/kernel/ucount.c b/kernel/ucount.c index f629db485a07..586af49fc03e 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -201,16 +201,14 @@ void put_ucounts(struct ucounts *ucounts) static inline bool atomic_long_inc_below(atomic_long_t *v, long u) { - long c, old; - c = atomic_long_read(v); - for (;;) { + long c = atomic_long_read(v); + + do { if (unlikely(c >= u)) return false; - old = atomic_long_cmpxchg(v, c, c+1); - if (likely(old == c)) - return true; - c = old; - } + } while (!atomic_long_try_cmpxchg(v, &c, c+1)); + + return true; } struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, From 1f03d55e5ef0b041bd66fbf7803952c901a93fcb Mon Sep 17 00:00:00 2001 From: Wang Yaxin Date: Mon, 21 Jul 2025 09:40:49 +0800 Subject: [PATCH 1418/2411] MAINTAINERS: add maintainers for delaytop The delaytop tool supports showing system delays and task-level delays, effectively identifying the top-n tasks with high latency in the system, which is highly beneficial for improving system performance. Wang Yaxin and her colleague Fan Yu focus on locating system delay issues. To promote the thriving development of delaytop, we hope to serve as maintainers to continuously improve it, aiming to provide a more effective solution for system latency issues in the future. Link: https://lkml.kernel.org/r/20250721094049958ImB8XG_imntcPqpQn1KfG@zte.com.cn Signed-off-by: Wang Yaxin Signed-off-by: Fan Yu Reviewed-by: Yang Yang Cc: Balbir Singh Cc: xu xin Cc: Krzysztof Kozlowski Signed-off-by: Andrew Morton --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 947ec6bf5b95..87897a285bc1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19462,6 +19462,16 @@ S: Maintained F: include/linux/delayacct.h F: kernel/delayacct.c +TASK DELAY MONITORING TOOLS +M: Andrew Morton +M: Wang Yaxin +M: Fan Yu +L: linux-kernel@vger.kernel.org +S: Maintained +F: Documentation/accounting/delay-accounting.rst +F: tools/accounting/delaytop.c +F: tools/accounting/getdelays.c + PERFORMANCE EVENTS SUBSYSTEM M: Peter Zijlstra M: Ingo Molnar From a30469cac8ce6555284948dab30066ce1ea43548 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Tue, 22 Jul 2025 15:34:24 +0800 Subject: [PATCH 1419/2411] KVM: x86: fix typo "notifer" Patch series "treewide: Fix typo "notifer"", v3. There are some spelling mistakes of 'notifer' in comments which should be 'notifier'. Fix them and add it to scripts/spelling.txt. This patch (of 8): There are some spelling mistakes of 'notifer' which should be 'notifier'. Link: https://lkml.kernel.org/r/576F0D85F6853074+20250722072734.19367-1-wangyuli@uniontech.com Link: https://lkml.kernel.org/r/7F05778C3A1A9F8B+20250722073431.21983-1-wangyuli@uniontech.com Signed-off-by: WangYuli Signed-off-by: Andrew Morton --- arch/x86/kvm/i8254.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 739aa6c0d0c3..9ff55112900a 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -641,7 +641,7 @@ static void kvm_pit_reset(struct kvm_pit *pit) kvm_pit_reset_reinject(pit); } -static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask) +static void pit_mask_notifier(struct kvm_irq_mask_notifier *kimn, bool mask) { struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier); @@ -694,7 +694,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) pit_state->irq_ack_notifier.gsi = 0; pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq; - pit->mask_notifier.func = pit_mask_notifer; + pit->mask_notifier.func = pit_mask_notifier; kvm_pit_reset(pit); From fbedfb051a4c74854c23f9c898fc6b29fab7be60 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Tue, 22 Jul 2025 15:34:25 +0800 Subject: [PATCH 1420/2411] cxl: mce: fix typo "notifer" According to the context, "mce_notifer" should be "mce_notifier". Link: https://lkml.kernel.org/r/E1EB1BA9FDF07D53+20250722073431.21983-2-wangyuli@uniontech.com Fixes: 516e5bd0b6bf ("cxl: Add mce notifier to emit aliased address for extended linear cache") Signed-off-by: WangYuli Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Signed-off-by: Andrew Morton --- drivers/cxl/core/mce.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/mce.h b/drivers/cxl/core/mce.h index ace73424eeb6..ca272e8db6c7 100644 --- a/drivers/cxl/core/mce.h +++ b/drivers/cxl/core/mce.h @@ -7,7 +7,7 @@ #ifdef CONFIG_CXL_MCE int devm_cxl_register_mce_notifier(struct device *dev, - struct notifier_block *mce_notifer); + struct notifier_block *mce_notifier); #else static inline int devm_cxl_register_mce_notifier(struct device *dev, From 26197b0fd220ceb2b26f2ea2948c00fdd9855fae Mon Sep 17 00:00:00 2001 From: WangYuli Date: Tue, 22 Jul 2025 15:34:26 +0800 Subject: [PATCH 1421/2411] drm/xe: fix typo "notifer" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a spelling mistake of 'notifer' in the comment which should be 'notifier'. Link: https://lkml.kernel.org/r/94190C5F54A19F3E+20250722073431.21983-3-wangyuli@uniontech.com Signed-off-by: WangYuli Reviewed-by: Thomas Hellström Signed-off-by: Andrew Morton --- drivers/gpu/drm/xe/xe_vm_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 1979e9bdbdf3..0ca27579fd1f 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -259,7 +259,7 @@ struct xe_vm { * up for revalidation. Protected from access with the * @invalidated_lock. Removing items from the list * additionally requires @lock in write mode, and adding - * items to the list requires either the @userptr.notifer_lock in + * items to the list requires either the @userptr.notifier_lock in * write mode, OR @lock in write mode. */ struct list_head invalidated; From 545040384e78d6eaabb20e1f4baa85ace864dcfc Mon Sep 17 00:00:00 2001 From: WangYuli Date: Tue, 22 Jul 2025 15:34:27 +0800 Subject: [PATCH 1422/2411] net: mvneta: fix typo "notifer" There is a spelling mistake of 'notifer' in the comment which should be 'notifier'. Link: https://lkml.kernel.org/r/0CB4300CB6F49007+20250722073431.21983-4-wangyuli@uniontech.com Signed-off-by: WangYuli Reviewed-by: Simon Horman Signed-off-by: Andrew Morton --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 147571fdada3..ee4696600146 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4610,7 +4610,7 @@ static int mvneta_stop(struct net_device *dev) /* Inform that we are stopping so we don't want to setup the * driver for new CPUs in the notifiers. The code of the * notifier for CPU online is protected by the same spinlock, - * so when we get the lock, the notifer work is done. + * so when we get the lock, the notifier work is done. */ spin_lock(&pp->lock); pp->is_stopped = true; From 004f42dd90b7ef542a51983bdaa5b2ef621ed41d Mon Sep 17 00:00:00 2001 From: WangYuli Date: Tue, 22 Jul 2025 15:34:30 +0800 Subject: [PATCH 1423/2411] xen/xenbus: fix typo "notifer" There is a spelling mistake of 'notifer' in the comment which should be 'notifier'. Link: https://lkml.kernel.org/r/C6633C66376C709A+20250722073431.21983-7-wangyuli@uniontech.com Signed-off-by: WangYuli Reviewed-by: Juergen Gross Signed-off-by: Andrew Morton --- include/xen/xenbus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 3f90bdd387b6..00b84f2e402b 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -180,7 +180,7 @@ int xenbus_printf(struct xenbus_transaction t, * sprintf-style type string, and pointer. Returns 0 or errno.*/ int xenbus_gather(struct xenbus_transaction t, const char *dir, ...); -/* notifer routines for when the xenstore comes up */ +/* notifier routines for when the xenstore comes up */ extern int xenstored_ready; int register_xenstore_notifier(struct notifier_block *nb); void unregister_xenstore_notifier(struct notifier_block *nb); From 53f433891e698e76aaf01b84b30a17a79a53535c Mon Sep 17 00:00:00 2001 From: WangYuli Date: Tue, 22 Jul 2025 15:34:31 +0800 Subject: [PATCH 1424/2411] scripts/spelling.txt: add notifer||notifier to spelling.txt This typo was not listed in scripts/spelling.txt, thus it was more difficult to detect. Add it for convenience. Link: https://lkml.kernel.org/r/02153C05ED7B49B7+20250722073431.21983-8-wangyuli@uniontech.com Signed-off-by: WangYuli Reviewed-by: Jonathan Cameron Signed-off-by: Andrew Morton --- scripts/spelling.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/spelling.txt b/scripts/spelling.txt index ac94fa1c2415..1e89b92c2f9a 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -1099,6 +1099,7 @@ notication||notification notications||notifications notifcations||notifications notifed||notified +notifer||notifier notity||notify notfify||notify nubmer||number From fb0e9db99eefc17cb8693ce93afe5c5dbc5148a5 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 24 Jul 2025 16:42:10 +0900 Subject: [PATCH 1425/2411] fat: fix too many log in fat_chain_add() This log was excessive for a serial console. So use the ratelimited version instead. Link: https://lkml.kernel.org/r/87qzy611d9.fsf@mail.parknet.co.jp Signed-off-by: OGAWA Hirofumi Reported-by: syzbot+fa7ef54f66c189c04b73@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fa7ef54f66c189c04b73 Cc: Namjae Jeon Cc: Sungjong Seo Signed-off-by: Andrew Morton --- fs/fat/misc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/fat/misc.c b/fs/fat/misc.c index c7a2d27120ba..950da09f0961 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -158,9 +158,9 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster) mark_inode_dirty(inode); } if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) { - fat_fs_error(sb, "clusters badly computed (%d != %llu)", - new_fclus, - (llu)(inode->i_blocks >> (sbi->cluster_bits - 9))); + fat_fs_error_ratelimit( + sb, "clusters badly computed (%d != %llu)", new_fclus, + (llu)(inode->i_blocks >> (sbi->cluster_bits - 9))); fat_cache_inval_inode(inode); } inode->i_blocks += nr_cluster << (sbi->cluster_bits - 9); From 8c54f7e3e0eab0174683a562051417317c4ea297 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 24 Jul 2025 12:17:15 +0100 Subject: [PATCH 1426/2411] samples: Kconfig: fix spelling mistake "instancess" -> "instances" There is a spelling mistake in the SAMPLE_TRACE_ARRAY config. Fix it. Link: https://lkml.kernel.org/r/20250724111715.141826-1-colin.i.king@gmail.com Signed-off-by: Colin Ian King Signed-off-by: Andrew Morton --- samples/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/Kconfig b/samples/Kconfig index a8880c62d4c8..6e072a5f1ed8 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -54,7 +54,7 @@ config SAMPLE_FTRACE_OPS measures the time taken to invoke one function a number of times. config SAMPLE_TRACE_ARRAY - tristate "Build sample module for kernel access to Ftrace instancess" + tristate "Build sample module for kernel access to Ftrace instances" depends on EVENT_TRACING && m help This builds a module that demonstrates the use of various APIs to From d92dccd05a20b7a9c2836d4e46e22128f5b73367 Mon Sep 17 00:00:00 2001 From: "fan.yu9@zte.com.cn" Date: Mon, 28 Jul 2025 16:28:34 +0800 Subject: [PATCH 1427/2411] delaytop: enhance error logging and add PSI feature description This patch improves error diagnostics and documentation for delaytop: 1) Enhanced error logging: - Added explicit error messages in critical failure paths - Implemented BOOL_FPRINT macro for robust output handling 2) PSI feature documentation: - Updated header comment to reflect PSI monitoring capability - Improved output formatting for PSI information System Pressure Information: (avg10/avg60/avg300/total) CPU some: 0.0%/ 0.0%/ 0.0%/ 345(ms) CPU full: 0.0%/ 0.0%/ 0.0%/ 0(ms) Memory full: 0.0%/ 0.0%/ 0.0%/ 0(ms) Memory some: 0.0%/ 0.0%/ 0.0%/ 0(ms) IO full: 0.0%/ 0.0%/ 0.0%/ 65(ms) IO some: 0.0%/ 0.0%/ 0.0%/ 79(ms) IRQ full: 0.0%/ 0.0%/ 0.0%/ 0(ms) Link: https://lkml.kernel.org/r/202507281628341752gMXCMN7S-Vz_LHYHum9r@zte.com.cn Signed-off-by: Fan Yu Signed-off-by: Wang Yaxin Acked-by: Yang Yang Cc: Fan Yu Cc: Jonathan Corbet Cc: xu xin Signed-off-by: Andrew Morton --- Documentation/accounting/delay-accounting.rst | 61 ++++--- tools/accounting/delaytop.c | 162 ++++++++++++------ 2 files changed, 143 insertions(+), 80 deletions(-) diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst index 664950328fb7..8ccc5af5ea1e 100644 --- a/Documentation/accounting/delay-accounting.rst +++ b/Documentation/accounting/delay-accounting.rst @@ -132,38 +132,47 @@ Get IO accounting for pid 1, it works only with -p:: The above command can be used with -v to get more debug information. -After the system starts, use `delaytop` to get the Top-N high-latency tasks. -this tool supports sorting by CPU latency in descending order by default, +After the system starts, use `delaytop` to get the system-wide delay information, +which includes system-wide PSI information and Top-N high-latency tasks. + +`delaytop` supports sorting by CPU latency in descending order by default, displays the top 20 high-latency tasks by default, and refreshes the latency data every 2 seconds by default. -Get Top-N tasks delay, since system boot:: +Get PSI information and Top-N tasks delay, since system boot:: bash# ./delaytop + System Pressure Information: (avg10/avg60/avg300/total) + CPU some: 0.0%/ 0.0%/ 0.0%/ 345(ms) + CPU full: 0.0%/ 0.0%/ 0.0%/ 0(ms) + Memory full: 0.0%/ 0.0%/ 0.0%/ 0(ms) + Memory some: 0.0%/ 0.0%/ 0.0%/ 0(ms) + IO full: 0.0%/ 0.0%/ 0.0%/ 65(ms) + IO some: 0.0%/ 0.0%/ 0.0%/ 79(ms) + IRQ full: 0.0%/ 0.0%/ 0.0%/ 0(ms) Top 20 processes (sorted by CPU delay): - - PID TGID COMMAND CPU(ms) IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms) - --------------------------------------------------------------------------------------------- - 32 32 kworker/2:0H-sy 23.65 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 497 497 kworker/R-scsi_ 1.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 495 495 kworker/R-scsi_ 1.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 494 494 scsi_eh_0 1.12 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 485 485 kworker/R-ata_s 0.90 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 574 574 kworker/R-kdmfl 0.36 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 34 34 idle_inject/3 0.33 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 1123 1123 nde-netfilter 0.28 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 60 60 ksoftirqd/7 0.25 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 114 114 kworker/0:2-cgr 0.25 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 496 496 scsi_eh_1 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 51 51 cpuhp/6 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 1667 1667 atd 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 45 45 cpuhp/5 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 1102 1102 nde-backupservi 0.22 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 1098 1098 systemsettings 0.21 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 1100 1100 audit-monitor 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 53 53 migration/6 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 1482 1482 sshd 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 39 39 cpuhp/4 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + PID TGID COMMAND CPU(ms) IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms) + ---------------------------------------------------------------------------------------------- + 161 161 zombie_memcg_re 1.40 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 130 130 blkcg_punt_bio 1.37 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 444 444 scsi_tmf_0 0.73 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1280 1280 rsyslogd 0.53 0.04 0.00 0.00 0.00 0.00 0.00 0.00 + 12 12 ksoftirqd/0 0.47 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1277 1277 nbd-server 0.44 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 308 308 kworker/2:2-sys 0.41 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 55 55 netns 0.36 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1187 1187 acpid 0.31 0.03 0.00 0.00 0.00 0.00 0.00 0.00 + 6184 6184 kworker/1:2-sys 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 186 186 kaluad 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 18 18 ksoftirqd/1 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 185 185 kmpath_rdacd 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 190 190 kstrp 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 2759 2759 agetty 0.20 0.03 0.00 0.00 0.00 0.00 0.00 0.00 + 1190 1190 kworker/0:3-sys 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1272 1272 sshd 0.15 0.04 0.00 0.00 0.00 0.00 0.00 0.00 + 1156 1156 license 0.15 0.11 0.00 0.00 0.00 0.00 0.00 0.00 + 134 134 md 0.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 6142 6142 kworker/3:2-xfs 0.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00 Dynamic interactive interface of delaytop:: diff --git a/tools/accounting/delaytop.c b/tools/accounting/delaytop.c index cd848af9a856..9afb1ffc00ba 100644 --- a/tools/accounting/delaytop.c +++ b/tools/accounting/delaytop.c @@ -1,16 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 /* - * delaytop.c - task delay monitoring tool. + * delaytop.c - system-wide delay monitoring tool. * * This tool provides real-time monitoring and statistics of * system, container, and task-level delays, including CPU, - * memory, IO, and IRQ and delay accounting. It supports both - * interactive (top-like), and can output delay information - * for the whole system, specific containers (cgroups), or - * individual tasks (PIDs). + * memory, IO, and IRQ. It supports both interactive (top-like), + * and can output delay information for the whole system, specific + * containers (cgroups), or individual tasks (PIDs). * * Key features: * - Collects per-task delay accounting statistics via taskstats. + * - Collects system-wide PSI information. * - Supports sorting, filtering. * - Supports both interactive (screen refresh). * @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -41,7 +42,6 @@ #include #include #include -#include #define PSI_CPU_SOME "/proc/pressure/cpu" #define PSI_CPU_FULL "/proc/pressure/cpu" @@ -62,6 +62,12 @@ #define MAX_MSG_SIZE 1024 #define MAX_TASKS 1000 #define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field +#define BOOL_FPRINT(stream, fmt, ...) \ +({ \ + int ret = fprintf(stream, fmt, ##__VA_ARGS__); \ + ret >= 0; \ +}) +#define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n" /* Program settings structure */ struct config { @@ -262,6 +268,7 @@ static int create_nl_socket(void) local.nl_family = AF_NETLINK; if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) { + fprintf(stderr, "Failed to bind socket when create nl_socket\n"); close(fd); return -1; } @@ -332,13 +339,17 @@ static int get_family_id(int sd) rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, CTRL_ATTR_FAMILY_NAME, (void *)name, strlen(TASKSTATS_GENL_NAME)+1); - if (rc < 0) + if (rc < 0) { + fprintf(stderr, "Failed to send cmd for family id\n"); return 0; + } rep_len = recv(sd, &ans, sizeof(ans), 0); if (ans.n.nlmsg_type == NLMSG_ERROR || - (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) + (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) { + fprintf(stderr, "Failed to receive response for family id\n"); return 0; + } na = (struct nlattr *) GENLMSG_DATA(&ans); na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); @@ -433,26 +444,30 @@ static void read_psi_stats(void) static int read_comm(int pid, char *comm_buf, size_t buf_size) { char path[64]; + int ret = -1; size_t len; FILE *fp; snprintf(path, sizeof(path), "/proc/%d/comm", pid); fp = fopen(path, "r"); - if (!fp) - return -1; + if (!fp) { + fprintf(stderr, "Failed to open comm file /proc/%d/comm\n", pid); + return ret; + } + if (fgets(comm_buf, buf_size, fp)) { len = strlen(comm_buf); if (len > 0 && comm_buf[len - 1] == '\n') comm_buf[len - 1] = '\0'; - } else { - fclose(fp); - return -1; + ret = 0; } + fclose(fp); - return 0; + + return ret; } -static int fetch_and_fill_task_info(int pid, const char *comm) +static void fetch_and_fill_task_info(int pid, const char *comm) { struct { struct nlmsghdr n; @@ -466,13 +481,21 @@ static int fetch_and_fill_task_info(int pid, const char *comm) int nl_len; int rc; + /* Send request for task stats */ if (send_cmd(nl_sd, family_id, getpid(), TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &pid, sizeof(pid)) < 0) { - return -1; + fprintf(stderr, "Failed to send request for task stats\n"); + return; } + + /* Receive response */ rc = recv(nl_sd, &resp, sizeof(resp), 0); - if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) - return -1; + if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) { + fprintf(stderr, "Failed to receive response for task stats\n"); + return; + } + + /* Parse response */ nl_len = GENLMSG_PAYLOAD(&resp.n); na = (struct nlattr *) GENLMSG_DATA(&resp); while (nl_len > 0) { @@ -515,7 +538,7 @@ static int fetch_and_fill_task_info(int pid, const char *comm) nl_len -= NLA_ALIGN(na->nla_len); na = NLA_NEXT(na); } - return 0; + return; } static void get_task_delays(void) @@ -654,54 +677,82 @@ static void display_results(void) { time_t now = time(NULL); struct tm *tm_now = localtime(&now); - char timestamp[32]; - int i, count; FILE *out = stdout; + char timestamp[32]; + bool suc = true; + int i, count; + + /* Clear terminal screen */ + suc &= BOOL_FPRINT(out, "\033[H\033[J"); - fprintf(out, "\033[H\033[J"); /* PSI output (one-line, no cat style) */ - fprintf(out, "System Pressure Information: "); - fprintf(out, "(avg10/avg60/avg300/total)\n"); - fprintf(out, "CPU:"); - fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.cpu_full_avg10, - psi.cpu_full_avg60, psi.cpu_full_avg300, psi.cpu_full_total); - fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.cpu_some_avg10, - psi.cpu_some_avg60, psi.cpu_some_avg300, psi.cpu_some_total); + suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60/avg300/total)\n"); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "CPU some:", + psi.cpu_some_avg10, + psi.cpu_some_avg60, + psi.cpu_some_avg300, + psi.cpu_some_total / 1000); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "CPU full:", + psi.cpu_full_avg10, + psi.cpu_full_avg60, + psi.cpu_full_avg300, + psi.cpu_full_total / 1000); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "Memory full:", + psi.memory_full_avg10, + psi.memory_full_avg60, + psi.memory_full_avg300, + psi.memory_full_total / 1000); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "Memory some:", + psi.memory_some_avg10, + psi.memory_some_avg60, + psi.memory_some_avg300, + psi.memory_some_total / 1000); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "IO full:", + psi.io_full_avg10, + psi.io_full_avg60, + psi.io_full_avg300, + psi.io_full_total / 1000); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "IO some:", + psi.io_some_avg10, + psi.io_some_avg60, + psi.io_some_avg300, + psi.io_some_total / 1000); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "IRQ full:", + psi.irq_full_avg10, + psi.irq_full_avg60, + psi.irq_full_avg300, + psi.irq_full_total / 1000); - fprintf(out, "Memory:"); - fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.memory_full_avg10, - psi.memory_full_avg60, psi.memory_full_avg300, psi.memory_full_total); - fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.memory_some_avg10, - psi.memory_some_avg60, psi.memory_some_avg300, psi.memory_some_total); - - fprintf(out, "IO:"); - fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.io_full_avg10, - psi.io_full_avg60, psi.io_full_avg300, psi.io_full_total); - fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.io_some_avg10, - psi.io_some_avg60, psi.io_some_avg300, psi.io_some_total); - fprintf(out, "IRQ:"); - fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n\n", psi.irq_full_avg10, - psi.irq_full_avg60, psi.irq_full_avg300, psi.irq_full_total); if (cfg.container_path) { - fprintf(out, "Container Information (%s):\n", cfg.container_path); - fprintf(out, "Processes: running=%d, sleeping=%d, ", + suc &= BOOL_FPRINT(out, "Container Information (%s):\n", cfg.container_path); + suc &= BOOL_FPRINT(out, "Processes: running=%d, sleeping=%d, ", container_stats.nr_running, container_stats.nr_sleeping); - fprintf(out, "stopped=%d, uninterruptible=%d, io_wait=%d\n\n", + suc &= BOOL_FPRINT(out, "stopped=%d, uninterruptible=%d, io_wait=%d\n\n", container_stats.nr_stopped, container_stats.nr_uninterruptible, container_stats.nr_io_wait); } - fprintf(out, "Top %d processes (sorted by CPU delay):\n\n", + suc &= BOOL_FPRINT(out, "Top %d processes (sorted by CPU delay):\n", cfg.max_processes); - fprintf(out, " PID TGID COMMAND CPU(ms) IO(ms) "); - fprintf(out, "SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms)\n"); - fprintf(out, "-----------------------------------------------"); - fprintf(out, "----------------------------------------------\n"); + suc &= BOOL_FPRINT(out, "%5s %5s %-17s", "PID", "TGID", "COMMAND"); + suc &= BOOL_FPRINT(out, "%7s %7s %7s %7s %7s %7s %7s %7s\n", + "CPU(ms)", "IO(ms)", "SWAP(ms)", "RCL(ms)", + "THR(ms)", "CMP(ms)", "WP(ms)", "IRQ(ms)"); + + suc &= BOOL_FPRINT(out, "-----------------------------------------------"); + suc &= BOOL_FPRINT(out, "----------------------------------------------\n"); count = task_count < cfg.max_processes ? task_count : cfg.max_processes; for (i = 0; i < count; i++) { - fprintf(out, "%5d %5d %-15s ", + suc &= BOOL_FPRINT(out, "%5d %5d %-15s", tasks[i].pid, tasks[i].tgid, tasks[i].command); - fprintf(out, "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n", + suc &= BOOL_FPRINT(out, "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n", average_ms(tasks[i].cpu_delay_total, tasks[i].cpu_count), average_ms(tasks[i].blkio_delay_total, tasks[i].blkio_count), average_ms(tasks[i].swapin_delay_total, tasks[i].swapin_count), @@ -712,7 +763,10 @@ static void display_results(void) average_ms(tasks[i].irq_delay_total, tasks[i].irq_count)); } - fprintf(out, "\n"); + suc &= BOOL_FPRINT(out, "\n"); + + if (!suc) + perror("Error writing to output"); } /* Main function */ From b753522bed0b7e388a643f58d91bd81d8849ba43 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 27 Jul 2025 11:37:33 +0300 Subject: [PATCH 1428/2411] kho: add test for kexec handover Testing kexec handover requires a kernel driver that will generate some data and preserve it with KHO on the first boot and then restore that data and verify it was preserved properly after kexec. To facilitate such test, along with the kernel driver responsible for data generation, preservation and restoration add a script that runs a kernel in a VM with a minimal /init. The /init enables KHO, loads a kernel image for kexec and runs kexec reboot. After the boot of the kexeced kernel, the driver verifies that the data was properly preserved. [rppt@kernel.org: fix section mismatch] Link: https://lkml.kernel.org/r/aIiRC8fXiOXKbPM_@kernel.org Link: https://lkml.kernel.org/r/20250727083733.2590139-1-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Cc: Alexander Graf Cc: Changyuan Lyu Cc: Pasha Tatashin Cc: Pratyush Yadav Cc: Shuah Khan Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + lib/Kconfig.debug | 21 ++ lib/Makefile | 1 + lib/test_kho.c | 305 +++++++++++++++++++++++++ tools/testing/selftests/kho/arm64.conf | 9 + tools/testing/selftests/kho/init.c | 100 ++++++++ tools/testing/selftests/kho/vmtest.sh | 183 +++++++++++++++ tools/testing/selftests/kho/x86.conf | 7 + 8 files changed, 627 insertions(+) create mode 100644 lib/test_kho.c create mode 100644 tools/testing/selftests/kho/arm64.conf create mode 100644 tools/testing/selftests/kho/init.c create mode 100755 tools/testing/selftests/kho/vmtest.sh create mode 100644 tools/testing/selftests/kho/x86.conf diff --git a/MAINTAINERS b/MAINTAINERS index 87897a285bc1..d16d76f24c6e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13353,6 +13353,7 @@ F: Documentation/admin-guide/mm/kho.rst F: Documentation/core-api/kho/* F: include/linux/kexec_handover.h F: kernel/kexec_handover.c +F: tools/testing/selftests/kho/ KEYS-ENCRYPTED M: Mimi Zohar diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ebe33181b6e6..4f82d38e3c45 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -3225,6 +3225,27 @@ config TEST_OBJPOOL If unsure, say N. +config TEST_KEXEC_HANDOVER + bool "Test for Kexec HandOver" + default n + depends on KEXEC_HANDOVER + help + This option enables test for Kexec HandOver (KHO). + The test consists of two parts: saving kernel data before kexec and + restoring the data after kexec and verifying that it was properly + handed over. This test module creates and saves data on the boot of + the first kernel and restores and verifies the data on the boot of + kexec'ed kernel. + + For detailed documentation about KHO, see Documentation/core-api/kho. + + To run the test run: + + tools/testing/selftests/kho/vmtest.sh -h + + If unsure, say N. + + config INT_POW_KUNIT_TEST tristate "Integer exponentiation (int_pow) test" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/Makefile b/lib/Makefile index 88d6228089a8..dadf0028b319 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -102,6 +102,7 @@ obj-$(CONFIG_TEST_HMM) += test_hmm.o obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o obj-$(CONFIG_TEST_REF_TRACKER) += test_ref_tracker.o obj-$(CONFIG_TEST_OBJPOOL) += test_objpool.o +obj-$(CONFIG_TEST_KEXEC_HANDOVER) += test_kho.o obj-$(CONFIG_TEST_FPU) += test_fpu.o test_fpu-y := test_fpu_glue.o test_fpu_impl.o diff --git a/lib/test_kho.c b/lib/test_kho.c new file mode 100644 index 000000000000..c2eb899c3b45 --- /dev/null +++ b/lib/test_kho.c @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test module for KHO + * Copyright (c) 2025 Microsoft Corporation. + * + * Authors: + * Saurabh Sengar + * Mike Rapoport + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define KHO_TEST_MAGIC 0x4b484f21 /* KHO! */ +#define KHO_TEST_FDT "kho_test" +#define KHO_TEST_COMPAT "kho-test-v1" + +static long max_mem = (PAGE_SIZE << MAX_PAGE_ORDER) * 2; +module_param(max_mem, long, 0644); + +struct kho_test_state { + unsigned int nr_folios; + struct folio **folios; + struct folio *fdt; + __wsum csum; +}; + +static struct kho_test_state kho_test_state; + +static int kho_test_notifier(struct notifier_block *self, unsigned long cmd, + void *v) +{ + struct kho_test_state *state = &kho_test_state; + struct kho_serialization *ser = v; + int err = 0; + + switch (cmd) { + case KEXEC_KHO_ABORT: + return NOTIFY_DONE; + case KEXEC_KHO_FINALIZE: + /* Handled below */ + break; + default: + return NOTIFY_BAD; + } + + err |= kho_preserve_folio(state->fdt); + err |= kho_add_subtree(ser, KHO_TEST_FDT, folio_address(state->fdt)); + + return err ? NOTIFY_BAD : NOTIFY_DONE; +} + +static struct notifier_block kho_test_nb = { + .notifier_call = kho_test_notifier, +}; + +static int kho_test_save_data(struct kho_test_state *state, void *fdt) +{ + phys_addr_t *folios_info __free(kvfree) = NULL; + int err = 0; + + folios_info = kvmalloc_array(state->nr_folios, sizeof(*folios_info), + GFP_KERNEL); + if (!folios_info) + return -ENOMEM; + + for (int i = 0; i < state->nr_folios; i++) { + struct folio *folio = state->folios[i]; + unsigned int order = folio_order(folio); + + folios_info[i] = virt_to_phys(folio_address(folio)) | order; + + err = kho_preserve_folio(folio); + if (err) + return err; + } + + err |= fdt_begin_node(fdt, "data"); + err |= fdt_property(fdt, "nr_folios", &state->nr_folios, + sizeof(state->nr_folios)); + err |= fdt_property(fdt, "folios_info", folios_info, + state->nr_folios * sizeof(*folios_info)); + err |= fdt_property(fdt, "csum", &state->csum, sizeof(state->csum)); + err |= fdt_end_node(fdt); + + return err; +} + +static int kho_test_prepare_fdt(struct kho_test_state *state) +{ + const char compatible[] = KHO_TEST_COMPAT; + unsigned int magic = KHO_TEST_MAGIC; + ssize_t fdt_size; + int err = 0; + void *fdt; + + fdt_size = state->nr_folios * sizeof(phys_addr_t) + PAGE_SIZE; + state->fdt = folio_alloc(GFP_KERNEL, get_order(fdt_size)); + if (!state->fdt) + return -ENOMEM; + + fdt = folio_address(state->fdt); + + err |= fdt_create(fdt, fdt_size); + err |= fdt_finish_reservemap(fdt); + + err |= fdt_begin_node(fdt, ""); + err |= fdt_property(fdt, "compatible", compatible, sizeof(compatible)); + err |= fdt_property(fdt, "magic", &magic, sizeof(magic)); + err |= kho_test_save_data(state, fdt); + err |= fdt_end_node(fdt); + + err |= fdt_finish(fdt); + + if (err) + folio_put(state->fdt); + + return err; +} + +static int kho_test_generate_data(struct kho_test_state *state) +{ + size_t alloc_size = 0; + __wsum csum = 0; + + while (alloc_size < max_mem) { + int order = get_random_u32() % NR_PAGE_ORDERS; + struct folio *folio; + unsigned int size; + void *addr; + + /* cap allocation so that we won't exceed max_mem */ + if (alloc_size + (PAGE_SIZE << order) > max_mem) { + order = get_order(max_mem - alloc_size); + if (order) + order--; + } + size = PAGE_SIZE << order; + + folio = folio_alloc(GFP_KERNEL | __GFP_NORETRY, order); + if (!folio) + goto err_free_folios; + + state->folios[state->nr_folios++] = folio; + addr = folio_address(folio); + get_random_bytes(addr, size); + csum = csum_partial(addr, size, csum); + alloc_size += size; + } + + state->csum = csum; + return 0; + +err_free_folios: + for (int i = 0; i < state->nr_folios; i++) + folio_put(state->folios[i]); + return -ENOMEM; +} + +static int kho_test_save(void) +{ + struct kho_test_state *state = &kho_test_state; + struct folio **folios __free(kvfree) = NULL; + unsigned long max_nr; + int err; + + max_mem = PAGE_ALIGN(max_mem); + max_nr = max_mem >> PAGE_SHIFT; + + folios = kvmalloc_array(max_nr, sizeof(*state->folios), GFP_KERNEL); + if (!folios) + return -ENOMEM; + state->folios = folios; + + err = kho_test_generate_data(state); + if (err) + return err; + + err = kho_test_prepare_fdt(state); + if (err) + return err; + + return register_kho_notifier(&kho_test_nb); +} + +static int kho_test_restore_data(const void *fdt, int node) +{ + const unsigned int *nr_folios; + const phys_addr_t *folios_info; + const __wsum *old_csum; + __wsum csum = 0; + int len; + + node = fdt_path_offset(fdt, "/data"); + + nr_folios = fdt_getprop(fdt, node, "nr_folios", &len); + if (!nr_folios || len != sizeof(*nr_folios)) + return -EINVAL; + + old_csum = fdt_getprop(fdt, node, "csum", &len); + if (!old_csum || len != sizeof(*old_csum)) + return -EINVAL; + + folios_info = fdt_getprop(fdt, node, "folios_info", &len); + if (!folios_info || len != sizeof(*folios_info) * *nr_folios) + return -EINVAL; + + for (int i = 0; i < *nr_folios; i++) { + unsigned int order = folios_info[i] & ~PAGE_MASK; + phys_addr_t phys = folios_info[i] & PAGE_MASK; + unsigned int size = PAGE_SIZE << order; + struct folio *folio; + + folio = kho_restore_folio(phys); + if (!folio) + break; + + if (folio_order(folio) != order) + break; + + csum = csum_partial(folio_address(folio), size, csum); + folio_put(folio); + } + + if (csum != *old_csum) + return -EINVAL; + + return 0; +} + +static int kho_test_restore(phys_addr_t fdt_phys) +{ + void *fdt = phys_to_virt(fdt_phys); + const unsigned int *magic; + int node, len, err; + + node = fdt_path_offset(fdt, "/"); + if (node < 0) + return -EINVAL; + + if (fdt_node_check_compatible(fdt, node, KHO_TEST_COMPAT)) + return -EINVAL; + + magic = fdt_getprop(fdt, node, "magic", &len); + if (!magic || len != sizeof(*magic)) + return -EINVAL; + + if (*magic != KHO_TEST_MAGIC) + return -EINVAL; + + err = kho_test_restore_data(fdt, node); + if (err) + return err; + + pr_info("KHO restore succeeded\n"); + return 0; +} + +static int __init kho_test_init(void) +{ + phys_addr_t fdt_phys; + int err; + + err = kho_retrieve_subtree(KHO_TEST_FDT, &fdt_phys); + if (!err) + return kho_test_restore(fdt_phys); + + if (err != -ENOENT) { + pr_warn("failed to retrieve %s FDT: %d\n", KHO_TEST_FDT, err); + return err; + } + + return kho_test_save(); +} +module_init(kho_test_init); + +static void kho_test_cleanup(void) +{ + for (int i = 0; i < kho_test_state.nr_folios; i++) + folio_put(kho_test_state.folios[i]); + + kvfree(kho_test_state.folios); +} + +static void __exit kho_test_exit(void) +{ + unregister_kho_notifier(&kho_test_nb); + kho_test_cleanup(); +} +module_exit(kho_test_exit); + +MODULE_AUTHOR("Mike Rapoport "); +MODULE_DESCRIPTION("KHO test module"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/kho/arm64.conf b/tools/testing/selftests/kho/arm64.conf new file mode 100644 index 000000000000..ee696807cd35 --- /dev/null +++ b/tools/testing/selftests/kho/arm64.conf @@ -0,0 +1,9 @@ +QEMU_CMD="qemu-system-aarch64 -M virt -cpu max" +QEMU_KCONFIG=" +CONFIG_SERIAL_AMBA_PL010=y +CONFIG_SERIAL_AMBA_PL010_CONSOLE=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +" +KERNEL_IMAGE="Image" +KERNEL_CMDLINE="console=ttyAMA0" diff --git a/tools/testing/selftests/kho/init.c b/tools/testing/selftests/kho/init.c new file mode 100644 index 000000000000..8034e24c6bf6 --- /dev/null +++ b/tools/testing/selftests/kho/init.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef NOLIBC +#include +#include +#include +#include +#include +#include +#include +#endif + +/* from arch/x86/include/asm/setup.h */ +#define COMMAND_LINE_SIZE 2048 + +/* from include/linux/kexex.h */ +#define KEXEC_FILE_NO_INITRAMFS 0x00000004 + +#define KHO_FINILIZE "/debugfs/kho/out/finalize" +#define KERNEL_IMAGE "/kernel" + +static int mount_filesystems(void) +{ + if (mount("debugfs", "/debugfs", "debugfs", 0, NULL) < 0) + return -1; + + return mount("proc", "/proc", "proc", 0, NULL); +} + +static int kho_enable(void) +{ + const char enable[] = "1"; + int fd; + + fd = open(KHO_FINILIZE, O_RDWR); + if (fd < 0) + return -1; + + if (write(fd, enable, sizeof(enable)) != sizeof(enable)) + return 1; + + close(fd); + return 0; +} + +static long kexec_file_load(int kernel_fd, int initrd_fd, + unsigned long cmdline_len, const char *cmdline, + unsigned long flags) +{ + return syscall(__NR_kexec_file_load, kernel_fd, initrd_fd, cmdline_len, + cmdline, flags); +} + +static int kexec_load(void) +{ + char cmdline[COMMAND_LINE_SIZE]; + ssize_t len; + int fd, err; + + fd = open("/proc/cmdline", O_RDONLY); + if (fd < 0) + return -1; + + len = read(fd, cmdline, sizeof(cmdline)); + close(fd); + if (len < 0) + return -1; + + /* replace \n with \0 */ + cmdline[len - 1] = 0; + fd = open(KERNEL_IMAGE, O_RDONLY); + if (fd < 0) + return -1; + + err = kexec_file_load(fd, -1, len, cmdline, KEXEC_FILE_NO_INITRAMFS); + close(fd); + + return err ? : 0; +} + +int main(int argc, char *argv[]) +{ + if (mount_filesystems()) + goto err_reboot; + + if (kho_enable()) + goto err_reboot; + + if (kexec_load()) + goto err_reboot; + + if (reboot(RB_KEXEC)) + goto err_reboot; + + return 0; + +err_reboot: + reboot(RB_AUTOBOOT); + return -1; +} diff --git a/tools/testing/selftests/kho/vmtest.sh b/tools/testing/selftests/kho/vmtest.sh new file mode 100755 index 000000000000..ec70a17bd476 --- /dev/null +++ b/tools/testing/selftests/kho/vmtest.sh @@ -0,0 +1,183 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -ue + +CROSS_COMPILE="${CROSS_COMPILE:-""}" + +test_dir=$(realpath "$(dirname "$0")") +kernel_dir=$(realpath "$test_dir/../../../..") + +tmp_dir=$(mktemp -d /tmp/kho-test.XXXXXXXX) +headers_dir="$tmp_dir/usr" +initrd_dir="$tmp_dir/initrd" +initrd="$tmp_dir/initrd.cpio" + +source "$test_dir/../kselftest/ktap_helpers.sh" + +function usage() { + cat < "$kho_config" </dev/null || skip "$opt is missing" + done < "$kho_config" + + $make_cmd "$kimage" + $make_cmd headers_install INSTALL_HDR_PATH="$headers_dir" +} + +function mkinitrd() { + local kernel=$1 + + mkdir -p "$initrd_dir"/{dev,debugfs,proc} + sudo mknod "$initrd_dir/dev/console" c 5 1 + + "$CROSS_COMPILE"gcc -s -static -Os -nostdinc -I"$headers_dir/include" \ + -fno-asynchronous-unwind-tables -fno-ident -nostdlib \ + -include "$test_dir/../../../include/nolibc/nolibc.h" \ + -o "$initrd_dir/init" "$test_dir/init.c" \ + + cp "$kernel" "$initrd_dir/kernel" + + pushd "$initrd_dir" &>/dev/null + find . | cpio -H newc --create > "$initrd" 2>/dev/null + popd &>/dev/null +} + +function run_qemu() { + local qemu_cmd=$1 + local cmdline=$2 + local kernel=$3 + local serial="$tmp_dir/qemu.serial" + + cmdline="$cmdline kho=on panic=-1" + + $qemu_cmd -m 1G -smp 2 -no-reboot -nographic -nodefaults \ + -accel kvm -accel hvf -accel tcg \ + -serial file:"$serial" \ + -append "$cmdline" \ + -kernel "$kernel" \ + -initrd "$initrd" + + grep "KHO restore succeeded" "$serial" &> /dev/null || fail "KHO failed" +} + +function target_to_arch() { + local target=$1 + + case $target in + aarch64) echo "arm64" ;; + x86_64) echo "x86" ;; + *) skip "architecture $target is not supported" + esac +} + +function main() { + local build_dir="$kernel_dir/.kho" + local jobs=$(($(nproc) * 2)) + local target="$(uname -m)" + + # skip the test if any of the preparation steps fails + set -o errtrace + trap skip ERR + + while getopts 'hd:j:t:' opt; do + case $opt in + d) + build_dir="$OPTARG" + ;; + j) + jobs="$OPTARG" + ;; + t) + target="$OPTARG" + ;; + h) + usage + exit 0 + ;; + *) + echo Unknown argument "$opt" + usage + exit 1 + ;; + esac + done + + ktap_print_header + ktap_set_plan 1 + + if [[ "$target" != "$(uname -m)" ]] && [[ -z "$CROSS_COMPILE" ]]; then + skip "Cross-platform testing needs to specify CROSS_COMPILE" + fi + + mkdir -p "$build_dir" + local arch=$(target_to_arch "$target") + source "$test_dir/$arch.conf" + + # build the kernel and create initrd + # initrd includes the kernel image that will be kexec'ed + local make_cmd="make ARCH=$arch CROSS_COMPILE=$CROSS_COMPILE -j$jobs" + build_kernel "$build_dir" "$make_cmd" "$QEMU_KCONFIG" "$KERNEL_IMAGE" + + local kernel="$build_dir/arch/$arch/boot/$KERNEL_IMAGE" + mkinitrd "$kernel" + + run_qemu "$QEMU_CMD" "$KERNEL_CMDLINE" "$kernel" + + ktap_test_pass "KHO succeeded" +} + +main "$@" diff --git a/tools/testing/selftests/kho/x86.conf b/tools/testing/selftests/kho/x86.conf new file mode 100644 index 000000000000..b419e610ca22 --- /dev/null +++ b/tools/testing/selftests/kho/x86.conf @@ -0,0 +1,7 @@ +QEMU_CMD=qemu-system-x86_64 +QEMU_KCONFIG=" +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +" +KERNEL_IMAGE="bzImage" +KERNEL_CMDLINE="console=ttyS0" From 085dece6cc88b5c6fc6f2eca0403bfd2c5fbc7cb Mon Sep 17 00:00:00 2001 From: Fan Yu Date: Thu, 31 Jul 2025 22:53:26 +0800 Subject: [PATCH 1429/2411] tools/getdelays: add backward compatibility for taskstats version Add version checks to print_delayacct() to handle differences in struct taskstats across kernel versions. Field availability depends on taskstats version (t->version), corresponding to TASKSTATS_VERSION in kernel headers (see include/uapi/linux/taskstats.h). Version feature mapping: - version >= 11 - supports COMPACT statistics - version >= 13 - supports WPCOPY statistics - version >= 14 - supports IRQ statistics - version >= 16 - supports *_max and *_min delay statistics This ensures the tool works correctly with both older and newer kernel versions by conditionally printing fields based on the reported version. eg.1 bash# grep -r "#define TASKSTATS_VERSION" /usr/include/linux/taskstats.h "#define TASKSTATS_VERSION 10" bash# ./getdelays -d -p 1 CPU count real total virtual total delay total delay average 7481 3786181709 3807098291 36393725 0.005ms IO count delay total delay average 369 1116046035 3.025ms SWAP count delay total delay average 0 0 0.000ms RECLAIM count delay total delay average 0 0 0.000ms THRASHING count delay total delay average 0 0 0.000ms eg.2 bash# grep -r "#define TASKSTATS_VERSION" /usr/include/linux/taskstats.h "#define TASKSTATS_VERSION 14" bash# ./getdelays -d -p 1 CPU count real total virtual total delay total delay average 68862 163474790046 174584722267 19962496806 0.290ms IO count delay total delay average 0 0 0.000ms SWAP count delay total delay average 0 0 0.000ms RECLAIM count delay total delay average 0 0 0.000ms THRASHING count delay total delay average 0 0 0.000ms COMPACT count delay total delay average 0 0 0.000ms WPCOPY count delay total delay average 0 0 0.000ms IRQ count delay total delay average 0 0 0.000ms Link: https://lkml.kernel.org/r/20250731225326549CttJ7g9NfjTlaqBwl015T@zte.com.cn Signed-off-by: Fan Yu Cc: Fan Yu Cc: Jonathan Corbet Cc: Wang Yaxin Cc: xu xin Cc: Yang Yang Signed-off-by: Andrew Morton --- tools/accounting/getdelays.c | 167 +++++++++++++++++++++-------------- 1 file changed, 100 insertions(+), 67 deletions(-) diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 3feac0482fe9..21cb3c3d1331 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -194,75 +194,108 @@ static int get_family_id(int sd) #define average_ms(t, c) (t / 1000000ULL / (c ? c : 1)) #define delay_ms(t) (t / 1000000ULL) +/* + * Version compatibility note: + * Field availability depends on taskstats version (t->version), + * corresponding to TASKSTATS_VERSION in kernel headers + * see include/uapi/linux/taskstats.h + * + * Version feature mapping: + * version >= 11 - supports COMPACT statistics + * version >= 13 - supports WPCOPY statistics + * version >= 14 - supports IRQ statistics + * version >= 16 - supports *_max and *_min delay statistics + * + * Always verify version before accessing version-dependent fields + * to maintain backward compatibility. + */ +#define PRINT_CPU_DELAY(version, t) \ + do { \ + if (version >= 16) { \ + printf("%-10s%15s%15s%15s%15s%15s%15s%15s\n", \ + "CPU", "count", "real total", "virtual total", \ + "delay total", "delay average", "delay max", "delay min"); \ + printf(" %15llu%15llu%15llu%15llu%15.3fms%13.6fms%13.6fms\n", \ + (unsigned long long)(t)->cpu_count, \ + (unsigned long long)(t)->cpu_run_real_total, \ + (unsigned long long)(t)->cpu_run_virtual_total, \ + (unsigned long long)(t)->cpu_delay_total, \ + average_ms((double)(t)->cpu_delay_total, (t)->cpu_count), \ + delay_ms((double)(t)->cpu_delay_max), \ + delay_ms((double)(t)->cpu_delay_min)); \ + } else { \ + printf("%-10s%15s%15s%15s%15s%15s\n", \ + "CPU", "count", "real total", "virtual total", \ + "delay total", "delay average"); \ + printf(" %15llu%15llu%15llu%15llu%15.3fms\n", \ + (unsigned long long)(t)->cpu_count, \ + (unsigned long long)(t)->cpu_run_real_total, \ + (unsigned long long)(t)->cpu_run_virtual_total, \ + (unsigned long long)(t)->cpu_delay_total, \ + average_ms((double)(t)->cpu_delay_total, (t)->cpu_count)); \ + } \ + } while (0) +#define PRINT_FILED_DELAY(name, version, t, count, total, max, min) \ + do { \ + if (version >= 16) { \ + printf("%-10s%15s%15s%15s%15s%15s\n", \ + name, "count", "delay total", "delay average", \ + "delay max", "delay min"); \ + printf(" %15llu%15llu%15.3fms%13.6fms%13.6fms\n", \ + (unsigned long long)(t)->count, \ + (unsigned long long)(t)->total, \ + average_ms((double)(t)->total, (t)->count), \ + delay_ms((double)(t)->max), \ + delay_ms((double)(t)->min)); \ + } else { \ + printf("%-10s%15s%15s%15s\n", \ + name, "count", "delay total", "delay average"); \ + printf(" %15llu%15llu%15.3fms\n", \ + (unsigned long long)(t)->count, \ + (unsigned long long)(t)->total, \ + average_ms((double)(t)->total, (t)->count)); \ + } \ + } while (0) + static void print_delayacct(struct taskstats *t) { - printf("\n\nCPU %15s%15s%15s%15s%15s%15s%15s\n" - " %15llu%15llu%15llu%15llu%15.3fms%13.6fms%13.6fms\n" - "IO %15s%15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" - "SWAP %15s%15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" - "RECLAIM %12s%15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" - "THRASHING%12s%15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" - "COMPACT %12s%15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" - "WPCOPY %12s%15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" - "IRQ %15s%15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms%13.6fms\n", - "count", "real total", "virtual total", - "delay total", "delay average", "delay max", "delay min", - (unsigned long long)t->cpu_count, - (unsigned long long)t->cpu_run_real_total, - (unsigned long long)t->cpu_run_virtual_total, - (unsigned long long)t->cpu_delay_total, - average_ms((double)t->cpu_delay_total, t->cpu_count), - delay_ms((double)t->cpu_delay_max), - delay_ms((double)t->cpu_delay_min), - "count", "delay total", "delay average", "delay max", "delay min", - (unsigned long long)t->blkio_count, - (unsigned long long)t->blkio_delay_total, - average_ms((double)t->blkio_delay_total, t->blkio_count), - delay_ms((double)t->blkio_delay_max), - delay_ms((double)t->blkio_delay_min), - "count", "delay total", "delay average", "delay max", "delay min", - (unsigned long long)t->swapin_count, - (unsigned long long)t->swapin_delay_total, - average_ms((double)t->swapin_delay_total, t->swapin_count), - delay_ms((double)t->swapin_delay_max), - delay_ms((double)t->swapin_delay_min), - "count", "delay total", "delay average", "delay max", "delay min", - (unsigned long long)t->freepages_count, - (unsigned long long)t->freepages_delay_total, - average_ms((double)t->freepages_delay_total, t->freepages_count), - delay_ms((double)t->freepages_delay_max), - delay_ms((double)t->freepages_delay_min), - "count", "delay total", "delay average", "delay max", "delay min", - (unsigned long long)t->thrashing_count, - (unsigned long long)t->thrashing_delay_total, - average_ms((double)t->thrashing_delay_total, t->thrashing_count), - delay_ms((double)t->thrashing_delay_max), - delay_ms((double)t->thrashing_delay_min), - "count", "delay total", "delay average", "delay max", "delay min", - (unsigned long long)t->compact_count, - (unsigned long long)t->compact_delay_total, - average_ms((double)t->compact_delay_total, t->compact_count), - delay_ms((double)t->compact_delay_max), - delay_ms((double)t->compact_delay_min), - "count", "delay total", "delay average", "delay max", "delay min", - (unsigned long long)t->wpcopy_count, - (unsigned long long)t->wpcopy_delay_total, - average_ms((double)t->wpcopy_delay_total, t->wpcopy_count), - delay_ms((double)t->wpcopy_delay_max), - delay_ms((double)t->wpcopy_delay_min), - "count", "delay total", "delay average", "delay max", "delay min", - (unsigned long long)t->irq_count, - (unsigned long long)t->irq_delay_total, - average_ms((double)t->irq_delay_total, t->irq_count), - delay_ms((double)t->irq_delay_max), - delay_ms((double)t->irq_delay_min)); + printf("\n\n"); + + PRINT_CPU_DELAY(t->version, t); + + PRINT_FILED_DELAY("IO", t->version, t, + blkio_count, blkio_delay_total, + blkio_delay_max, blkio_delay_min); + + PRINT_FILED_DELAY("SWAP", t->version, t, + swapin_count, swapin_delay_total, + swapin_delay_max, swapin_delay_min); + + PRINT_FILED_DELAY("RECLAIM", t->version, t, + freepages_count, freepages_delay_total, + freepages_delay_max, freepages_delay_min); + + PRINT_FILED_DELAY("THRASHING", t->version, t, + thrashing_count, thrashing_delay_total, + thrashing_delay_max, thrashing_delay_min); + + if (t->version >= 11) { + PRINT_FILED_DELAY("COMPACT", t->version, t, + compact_count, compact_delay_total, + compact_delay_max, compact_delay_min); + } + + if (t->version >= 13) { + PRINT_FILED_DELAY("WPCOPY", t->version, t, + wpcopy_count, wpcopy_delay_total, + wpcopy_delay_max, wpcopy_delay_min); + } + + if (t->version >= 14) { + PRINT_FILED_DELAY("IRQ", t->version, t, + irq_count, irq_delay_total, + irq_delay_max, irq_delay_min); + } } static void task_context_switch_counts(struct taskstats *t) From 8d58d65621118fdca3ed6a0b3d658ba7e0e5153c Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 31 Jul 2025 09:53:43 +0800 Subject: [PATCH 1430/2411] mm: shmem: fix the shmem large folio allocation for the i915 driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit acd7ccb284b8 ("mm: shmem: add large folio support for tmpfs"), we extend the 'huge=' option to allow any sized large folios for tmpfs, which means tmpfs will allow getting a highest order hint based on the size of write() and fallocate() paths, and then will try each allowable large order. However, when the i915 driver allocates shmem memory, it doesn't provide hint information about the size of the large folio to be allocated, resulting in the inability to allocate PMD-sized shmem, which in turn affects GPU performance. Patryk added: : In my tests, the performance drop ranges from a few percent up to 13% : in Unigine Superposition under heavy memory usage on the CPU Core Ultra : 155H with the Xe 128 EU GPU. Other users have reported performance : impact up to 30% on certain workloads. Please find more in the : regressions reports: : https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14645 : https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13845 : : I believe the change should be backported to all active kernel branches : after version 6.12. To fix this issue, we can use the inode's size as a write size hint in shmem_read_folio_gfp() to help allocate PMD-sized large folios. Link: https://lkml.kernel.org/r/f7e64e99a3a87a8144cc6b2f1dddf7a89c12ce44.1753926601.git.baolin.wang@linux.alibaba.com Fixes: acd7ccb284b8 ("mm: shmem: add large folio support for tmpfs") Signed-off-by: Baolin Wang Reported-by: Patryk Kowalczyk Reported-by: Ville Syrjälä Tested-by: Patryk Kowalczyk Suggested-by: Hugh Dickins Cc: Signed-off-by: Andrew Morton --- mm/shmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 1d0fd266c29b..5e9ec28fab85 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -5981,8 +5981,8 @@ struct folio *shmem_read_folio_gfp(struct address_space *mapping, struct folio *folio; int error; - error = shmem_get_folio_gfp(inode, index, 0, &folio, SGP_CACHE, - gfp, NULL, NULL); + error = shmem_get_folio_gfp(inode, index, i_size_read(inode), + &folio, SGP_CACHE, gfp, NULL, NULL); if (error) return ERR_PTR(error); From b50e37889f9f343b772d9162d00105bb7a26c2f5 Mon Sep 17 00:00:00 2001 From: wang lian Date: Mon, 21 Jul 2025 19:46:14 +0800 Subject: [PATCH 1431/2411] selftests/mm: add process_madvise() tests Add tests for process_madvise(), focusing on verifying behavior under various conditions including valid usage and error cases. [lianux.mm@gmail.com: v7] Link: https://lkml.kernel.org/r/20250729113109.12272-1-lianux.mm@gmail.com Link: https://lkml.kernel.org/r/20250729113109.12272-1-lianux.mm@gmail.com Link: https://lkml.kernel.org/r/20250721114614.40996-1-lianux.mm@gmail.com Signed-off-by: wang lian Suggested-by: Lorenzo Stoakes Suggested-by: David Hildenbrand Suggested-by: Zi Yan Suggested-by: Mark Brown Acked-by: SeongJae Park Reviewed-by: Zi Yan Tested-by: Zi Yan Cc: Christian Brauner Cc: Jann Horn Cc: Kairui Song Cc: Liam Howlett Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/.gitignore | 1 + tools/testing/selftests/mm/Makefile | 1 + tools/testing/selftests/mm/process_madv.c | 344 ++++++++++++++++++++++ tools/testing/selftests/mm/run_vmtests.sh | 5 + 4 files changed, 351 insertions(+) create mode 100644 tools/testing/selftests/mm/process_madv.c diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index f2dafa0b700b..e7b23a8a05fe 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -21,6 +21,7 @@ on-fault-limit transhuge-stress pagemap_ioctl pfnmap +process_madv *.tmp* protection_keys protection_keys_32 diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index ae6f994d3add..d13b3cef2a2b 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -85,6 +85,7 @@ TEST_GEN_FILES += mseal_test TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += pagemap_ioctl TEST_GEN_FILES += pfnmap +TEST_GEN_FILES += process_madv TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress diff --git a/tools/testing/selftests/mm/process_madv.c b/tools/testing/selftests/mm/process_madv.c new file mode 100644 index 000000000000..471cae8427f1 --- /dev/null +++ b/tools/testing/selftests/mm/process_madv.c @@ -0,0 +1,344 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE +#include "../kselftest_harness.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "vm_util.h" + +#include "../pidfd/pidfd.h" + +FIXTURE(process_madvise) +{ + unsigned long page_size; + pid_t child_pid; + int remote_pidfd; + int pidfd; +}; + +FIXTURE_SETUP(process_madvise) +{ + self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); + self->pidfd = PIDFD_SELF; + self->remote_pidfd = -1; + self->child_pid = -1; +}; + +FIXTURE_TEARDOWN_PARENT(process_madvise) +{ + /* This teardown is guaranteed to run, even if tests SKIP or ASSERT */ + if (self->child_pid > 0) { + kill(self->child_pid, SIGKILL); + waitpid(self->child_pid, NULL, 0); + } + + if (self->remote_pidfd >= 0) + close(self->remote_pidfd); +} + +static ssize_t sys_process_madvise(int pidfd, const struct iovec *iovec, + size_t vlen, int advice, unsigned int flags) +{ + return syscall(__NR_process_madvise, pidfd, iovec, vlen, advice, flags); +} + +/* + * This test uses PIDFD_SELF to target the current process. The main + * goal is to verify the basic behavior of process_madvise() with + * a vector of non-contiguous memory ranges, not its cross-process + * capabilities. + */ +TEST_F(process_madvise, basic) +{ + const unsigned long pagesize = self->page_size; + const int madvise_pages = 4; + struct iovec vec[madvise_pages]; + int pidfd = self->pidfd; + ssize_t ret; + char *map; + + /* + * Create a single large mapping. We will pick pages from this + * mapping to advise on. This ensures we test non-contiguous iovecs. + */ + map = mmap(NULL, pagesize * 10, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (map == MAP_FAILED) + SKIP(return, "mmap failed, not enough memory.\n"); + + /* Fill the entire region with a known pattern. */ + memset(map, 'A', pagesize * 10); + + /* + * Setup the iovec to point to 4 non-contiguous pages + * within the mapping. + */ + vec[0].iov_base = &map[0 * pagesize]; + vec[0].iov_len = pagesize; + vec[1].iov_base = &map[3 * pagesize]; + vec[1].iov_len = pagesize; + vec[2].iov_base = &map[5 * pagesize]; + vec[2].iov_len = pagesize; + vec[3].iov_base = &map[8 * pagesize]; + vec[3].iov_len = pagesize; + + ret = sys_process_madvise(pidfd, vec, madvise_pages, MADV_DONTNEED, 0); + if (ret == -1 && errno == EPERM) + SKIP(return, + "process_madvise() unsupported or permission denied, try running as root.\n"); + else if (errno == EINVAL) + SKIP(return, + "process_madvise() unsupported or parameter invalid, please check arguments.\n"); + + /* The call should succeed and report the total bytes processed. */ + ASSERT_EQ(ret, madvise_pages * pagesize); + + /* Check that advised pages are now zero. */ + for (int i = 0; i < madvise_pages; i++) { + char *advised_page = (char *)vec[i].iov_base; + + /* Content must be 0, not 'A'. */ + ASSERT_EQ(*advised_page, '\0'); + } + + /* Check that an un-advised page in between is still 'A'. */ + char *unadvised_page = &map[1 * pagesize]; + + for (int i = 0; i < pagesize; i++) + ASSERT_EQ(unadvised_page[i], 'A'); + + /* Cleanup. */ + ASSERT_EQ(munmap(map, pagesize * 10), 0); +} + +/* + * This test deterministically validates process_madvise() with MADV_COLLAPSE + * on a remote process, other advices are difficult to verify reliably. + * + * The test verifies that a memory region in a child process, + * focus on process_madv remote result, only check addresses and lengths. + * The correctness of the MADV_COLLAPSE can be found in the relevant test examples in khugepaged. + */ +TEST_F(process_madvise, remote_collapse) +{ + const unsigned long pagesize = self->page_size; + long huge_page_size; + int pipe_info[2]; + ssize_t ret; + struct iovec vec; + + struct child_info { + pid_t pid; + void *map_addr; + } info; + + huge_page_size = read_pmd_pagesize(); + if (huge_page_size <= 0) + SKIP(return, "Could not determine a valid huge page size.\n"); + + ASSERT_EQ(pipe(pipe_info), 0); + + self->child_pid = fork(); + ASSERT_NE(self->child_pid, -1); + + if (self->child_pid == 0) { + char *map; + size_t map_size = 2 * huge_page_size; + + close(pipe_info[0]); + + map = mmap(NULL, map_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(map, MAP_FAILED); + + /* Fault in as small pages */ + for (size_t i = 0; i < map_size; i += pagesize) + map[i] = 'A'; + + /* Send info and pause */ + info.pid = getpid(); + info.map_addr = map; + ret = write(pipe_info[1], &info, sizeof(info)); + ASSERT_EQ(ret, sizeof(info)); + close(pipe_info[1]); + + pause(); + exit(0); + } + + close(pipe_info[1]); + + /* Receive child info */ + ret = read(pipe_info[0], &info, sizeof(info)); + if (ret <= 0) { + waitpid(self->child_pid, NULL, 0); + SKIP(return, "Failed to read child info from pipe.\n"); + } + ASSERT_EQ(ret, sizeof(info)); + close(pipe_info[0]); + self->child_pid = info.pid; + + self->remote_pidfd = syscall(__NR_pidfd_open, self->child_pid, 0); + ASSERT_GE(self->remote_pidfd, 0); + + vec.iov_base = info.map_addr; + vec.iov_len = huge_page_size; + + ret = sys_process_madvise(self->remote_pidfd, &vec, 1, MADV_COLLAPSE, + 0); + if (ret == -1) { + if (errno == EINVAL) + SKIP(return, "PROCESS_MADV_ADVISE is not supported.\n"); + else if (errno == EPERM) + SKIP(return, + "No process_madvise() permissions, try running as root.\n"); + return; + } + + ASSERT_EQ(ret, huge_page_size); +} + +/* + * Test process_madvise() with a pidfd for a process that has already + * exited to ensure correct error handling. + */ +TEST_F(process_madvise, exited_process_pidfd) +{ + const unsigned long pagesize = self->page_size; + struct iovec vec; + char *map; + ssize_t ret; + + map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, + 0); + if (map == MAP_FAILED) + SKIP(return, "mmap failed, not enough memory.\n"); + + vec.iov_base = map; + vec.iov_len = pagesize; + + /* + * Using a pidfd for a process that has already exited should fail + * with ESRCH. + */ + self->child_pid = fork(); + ASSERT_NE(self->child_pid, -1); + + if (self->child_pid == 0) + exit(0); + + self->remote_pidfd = syscall(__NR_pidfd_open, self->child_pid, 0); + ASSERT_GE(self->remote_pidfd, 0); + + /* Wait for the child to ensure it has terminated. */ + waitpid(self->child_pid, NULL, 0); + + ret = sys_process_madvise(self->remote_pidfd, &vec, 1, MADV_DONTNEED, + 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ESRCH); +} + +/* + * Test process_madvise() with bad pidfds to ensure correct error + * handling. + */ +TEST_F(process_madvise, bad_pidfd) +{ + const unsigned long pagesize = self->page_size; + struct iovec vec; + char *map; + ssize_t ret; + + map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, + 0); + if (map == MAP_FAILED) + SKIP(return, "mmap failed, not enough memory.\n"); + + vec.iov_base = map; + vec.iov_len = pagesize; + + /* Using an invalid fd number (-1) should fail with EBADF. */ + ret = sys_process_madvise(-1, &vec, 1, MADV_DONTNEED, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EBADF); + + /* + * Using a valid fd that is not a pidfd (e.g. stdin) should fail + * with EBADF. + */ + ret = sys_process_madvise(STDIN_FILENO, &vec, 1, MADV_DONTNEED, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EBADF); +} + +/* + * Test that process_madvise() rejects vlen > UIO_MAXIOV. + * The kernel should return -EINVAL when the number of iovecs exceeds 1024. + */ +TEST_F(process_madvise, invalid_vlen) +{ + const unsigned long pagesize = self->page_size; + int pidfd = self->pidfd; + struct iovec vec; + char *map; + ssize_t ret; + + map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, + 0); + if (map == MAP_FAILED) + SKIP(return, "mmap failed, not enough memory.\n"); + + vec.iov_base = map; + vec.iov_len = pagesize; + + ret = sys_process_madvise(pidfd, &vec, 1025, MADV_DONTNEED, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EINVAL); + + /* Cleanup. */ + ASSERT_EQ(munmap(map, pagesize), 0); +} + +/* + * Test process_madvise() with an invalid flag value. Currently, only a flag + * value of 0 is supported. This test is reserved for the future, e.g., if + * synchronous flags are added. + */ +TEST_F(process_madvise, flag) +{ + const unsigned long pagesize = self->page_size; + unsigned int invalid_flag; + int pidfd = self->pidfd; + struct iovec vec; + char *map; + ssize_t ret; + + map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, + 0); + if (map == MAP_FAILED) + SKIP(return, "mmap failed, not enough memory.\n"); + + vec.iov_base = map; + vec.iov_len = pagesize; + + invalid_flag = 0x80000000; + + ret = sys_process_madvise(pidfd, &vec, 1, MADV_DONTNEED, invalid_flag); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EINVAL); + + /* Cleanup. */ + ASSERT_EQ(munmap(map, pagesize), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index a38c984103ce..471e539d82b8 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -65,6 +65,8 @@ separated by spaces: test pagemap_scan IOCTL - pfnmap tests for VM_PFNMAP handling +- process_madv + test for process_madv - cow test copy-on-write semantics - thp @@ -425,6 +427,9 @@ CATEGORY="madv_guard" run_test ./guard-regions # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate +# PROCESS_MADV test +CATEGORY="process_madv" run_test ./process_madv + CATEGORY="vma_merge" run_test ./merge if [ -x ./memfd_secret ] From d171b10b2d7b067c16d79e1d069a23a34f088d23 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 22 Jul 2025 11:22:30 -0700 Subject: [PATCH 1432/2411] mm/page-flags: remove folio_start_writeback_keepwrite() Commit cd57b77197a4 ("ext4: Convert ext4_bio_write_page() to use a folio) removed set_page_writeback_keepwrite() which was the last/only caller of folio_start_writeback_keepwrite(). Link: https://lkml.kernel.org/r/20250722182230.2114587-1-joannelkoong@gmail.com Signed-off-by: Joanne Koong Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 8e4d6eda8a8d..8d3fa3a91ce4 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -837,8 +837,6 @@ void set_page_writeback(struct page *page); #define folio_start_writeback(folio) \ __folio_start_writeback(folio, false) -#define folio_start_writeback_keepwrite(folio) \ - __folio_start_writeback(folio, true) static __always_inline bool folio_test_head(const struct folio *folio) { From 56bdf83de7f1151d141e1d020e19cc1c56ff0db4 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 23 Jul 2025 16:59:19 +0200 Subject: [PATCH 1433/2411] kasan: skip quarantine if object is still accessible under RCU Currently, enabling KASAN masks bugs where a lockless lookup path gets a pointer to a SLAB_TYPESAFE_BY_RCU object that might concurrently be recycled and is insufficiently careful about handling recycled objects: KASAN puts freed objects in SLAB_TYPESAFE_BY_RCU slabs onto its quarantine queues, even when it can't actually detect UAF in these objects, and the quarantine prevents fast recycling. When I introduced CONFIG_SLUB_RCU_DEBUG, my intention was that enabling CONFIG_SLUB_RCU_DEBUG should cause KASAN to mark such objects as freed after an RCU grace period and put them on the quarantine, while disabling CONFIG_SLUB_RCU_DEBUG should allow such objects to be reused immediately; but that hasn't actually been working. I discovered such a UAF bug involving SLAB_TYPESAFE_BY_RCU yesterday; I could only trigger this bug in a KASAN build by disabling CONFIG_SLUB_RCU_DEBUG and applying this patch. Link: https://lkml.kernel.org/r/20250723-kasan-tsbrcu-noquarantine-v1-1-846c8645976c@google.com Signed-off-by: Jann Horn Acked-by: Vlastimil Babka Reviewed-by: Alexander Potapenko Acked-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitriy Vyukov Cc: Vincenzo Frascino Signed-off-by: Andrew Morton --- mm/kasan/common.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index ed4873e18c75..9142964ab9c9 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -230,16 +230,12 @@ static bool check_slab_allocation(struct kmem_cache *cache, void *object, } static inline void poison_slab_object(struct kmem_cache *cache, void *object, - bool init, bool still_accessible) + bool init) { void *tagged_object = object; object = kasan_reset_tag(object); - /* RCU slabs could be legally used after free within the RCU period. */ - if (unlikely(still_accessible)) - return; - kasan_poison(object, round_up(cache->object_size, KASAN_GRANULE_SIZE), KASAN_SLAB_FREE, init); @@ -261,7 +257,22 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object, bool init, if (!kasan_arch_is_ready() || is_kfence_address(object)) return false; - poison_slab_object(cache, object, init, still_accessible); + /* + * If this point is reached with an object that must still be + * accessible under RCU, we can't poison it; in that case, also skip the + * quarantine. This should mostly only happen when CONFIG_SLUB_RCU_DEBUG + * has been disabled manually. + * + * Putting the object on the quarantine wouldn't help catch UAFs (since + * we can't poison it here), and it would mask bugs caused by + * SLAB_TYPESAFE_BY_RCU users not being careful enough about object + * reuse; so overall, putting the object into the quarantine here would + * be counterproductive. + */ + if (still_accessible) + return false; + + poison_slab_object(cache, object, init); /* * If the object is put into quarantine, do not let slab put the object @@ -519,7 +530,7 @@ bool __kasan_mempool_poison_object(void *ptr, unsigned long ip) if (check_slab_allocation(slab->slab_cache, ptr, ip)) return false; - poison_slab_object(slab->slab_cache, ptr, false, false); + poison_slab_object(slab->slab_cache, ptr, false); return true; } From 881388f34338197f4ea3adf4d08dc6374c3420c8 Mon Sep 17 00:00:00 2001 From: Xuanye Liu Date: Wed, 23 Jul 2025 18:09:00 +0800 Subject: [PATCH 1434/2411] mm: add process info to bad rss-counter warning Enhance the debugging information in check_mm() by including the process name and PID when reporting bad rss-counter states. This helps identify which process is associated with the memory accounting issue. Link: https://lkml.kernel.org/r/20250723100901.1909683-1-liuqiye2025@163.com Signed-off-by: Xuanye Liu Acked-by: SeongJae Park Cc: Ben Segall Cc: David Hildenbrand Cc: Dietmar Eggemann Cc: Ingo Molnar Cc: Juri Lelli Cc: Kees Cook Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Rapoport Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Suren Baghdasaryan Cc: Valentin Schneider Cc: Vincent Guittot Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- kernel/fork.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 1ee8eb11f38b..f799d128b968 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -585,9 +585,12 @@ static void check_mm(struct mm_struct *mm) for (i = 0; i < NR_MM_COUNTERS; i++) { long x = percpu_counter_sum(&mm->rss_stat[i]); - if (unlikely(x)) - pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", - mm, resident_page_types[i], x); + if (unlikely(x)) { + pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld Comm:%s Pid:%d\n", + mm, resident_page_types[i], x, + current->comm, + task_pid_nr(current)); + } } if (mm_pgtables_bytes(mm)) From d6a511dea45ce3e851326b6bdc63f827ebb3e765 Mon Sep 17 00:00:00 2001 From: Suresh K C Date: Wed, 9 Jul 2025 23:16:57 +0530 Subject: [PATCH 1435/2411] selftests: cachestat: add tests for mmap, refactor and enhance mmap test for cachestat validation Add a cohesive test case that verifies cachestat behavior with memory-mapped files using mmap(). Also refactor the test logic to reduce redundancy, improve error reporting, and clarify failure messages for both shmem and mmap file types. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20250709174657.6916-1-suresh.k.chandrappa@gmail.com Signed-off-by: Suresh K C Reviewed-by: Joshua Hahn Tested-by: Nhat Pham Acked-by: Nhat Pham Cc: Johannes Weiner Cc: Shuah Khan Signed-off-by: Andrew Morton --- .../selftests/cachestat/test_cachestat.c | 62 ++++++++++++++++--- 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c index 632ab44737ec..c952640f163b 100644 --- a/tools/testing/selftests/cachestat/test_cachestat.c +++ b/tools/testing/selftests/cachestat/test_cachestat.c @@ -33,6 +33,11 @@ void print_cachestat(struct cachestat *cs) cs->nr_evicted, cs->nr_recently_evicted); } +enum file_type { + FILE_MMAP, + FILE_SHMEM +}; + bool write_exactly(int fd, size_t filesize) { int random_fd = open("/dev/urandom", O_RDONLY); @@ -201,8 +206,20 @@ static int test_cachestat(const char *filename, bool write_random, bool create, out: return ret; } +const char *file_type_str(enum file_type type) +{ + switch (type) { + case FILE_SHMEM: + return "shmem"; + case FILE_MMAP: + return "mmap"; + default: + return "unknown"; + } +} -bool test_cachestat_shmem(void) + +bool run_cachestat_test(enum file_type type) { size_t PS = sysconf(_SC_PAGESIZE); size_t filesize = PS * 512 * 2; /* 2 2MB huge pages */ @@ -212,27 +229,50 @@ bool test_cachestat_shmem(void) char *filename = "tmpshmcstat"; struct cachestat cs; bool ret = true; + int fd; unsigned long num_pages = compute_len / PS; - int fd = shm_open(filename, O_CREAT | O_RDWR, 0600); + if (type == FILE_SHMEM) + fd = shm_open(filename, O_CREAT | O_RDWR, 0600); + else + fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0666); if (fd < 0) { - ksft_print_msg("Unable to create shmem file.\n"); + ksft_print_msg("Unable to create %s file.\n", + file_type_str(type)); ret = false; goto out; } if (ftruncate(fd, filesize)) { - ksft_print_msg("Unable to truncate shmem file.\n"); + ksft_print_msg("Unable to truncate %s file.\n",file_type_str(type)); ret = false; goto close_fd; } + switch (type) { + case FILE_SHMEM: + if (!write_exactly(fd, filesize)) { + ksft_print_msg("Unable to write to file.\n"); + ret = false; + goto close_fd; + } + break; + case FILE_MMAP: + char *map = mmap(NULL, filesize, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); - if (!write_exactly(fd, filesize)) { - ksft_print_msg("Unable to write to shmem file.\n"); + if (map == MAP_FAILED) { + ksft_print_msg("mmap failed.\n"); + ret = false; + goto close_fd; + } + for (int i = 0; i < filesize; i++) + map[i] = 'A'; + break; + default: + ksft_print_msg("Unsupported file type.\n"); ret = false; goto close_fd; } - syscall_ret = syscall(__NR_cachestat, fd, &cs_range, &cs, 0); if (syscall_ret) { @@ -308,12 +348,18 @@ int main(void) break; } - if (test_cachestat_shmem()) + if (run_cachestat_test(FILE_SHMEM)) ksft_test_result_pass("cachestat works with a shmem file\n"); else { ksft_test_result_fail("cachestat fails with a shmem file\n"); ret = 1; } + if (run_cachestat_test(FILE_MMAP)) + ksft_test_result_pass("cachestat works with a mmap file\n"); + else { + ksft_test_result_fail("cachestat fails with a mmap file\n"); + ret = 1; + } return ret; } From dee3ab621f2bab8e58e343bee0302d66c9b035ef Mon Sep 17 00:00:00 2001 From: Bijan Tabatabai Date: Fri, 25 Jul 2025 11:33:00 -0500 Subject: [PATCH 1436/2411] mm/damon/vaddr: skip isolating folios already in destination nid damos_va_migrate_dests_add() determines the node a folio should be in based on the struct damos_migrate_dests associated with the migration scheme and adds the folio to the linked list corresponding to that node so it can be migrated later. Currently, folios are isolated and added to the list even if they are already in the node they should be in. In using damon weighted interleave more, I've found that the overhead of needlessly adding these folios to the migration lists can be quite high. The overhead comes from isolating folios and placing them in the migration lists inside of damos_va_migrate_dests_add(), as well as the cost of handling those folios in damon_migrate_pages(). This patch eliminates that overhead by simply avoiding the addition of folios that are already in their intended location to the migration list. To show the benefit of this patch, we start the test workload and start a DAMON instance attached to that workload with a migrate_hot scheme that has one dest field sending data to the local node. This way, we are only measuring the overheads of the scheme, and not the cost of migrating pages, since data will be allocated to the local node by default. I tested with two workloads: the embedding reduction workload used in [1] and a microbenchmark that allocates 20GB of data then sleeps, which is similar to the memory usage of the embedding reduction workload. The time taken in damos_va_migrate_dests_add() and damon_migrate_pages() each aggregation interval is shown below. Before this patch: damos_va_migrate_dests_add damon_migrate_pages microbenchmark ~2ms ~3ms embedding reduction ~1s ~3s After this patch: damos_va_migrate_dests_add damon_migrate_pages microbenchmark 0us ~40us embedding reduction 0us ~100us I did not do an in depth analysis for why things are much slower in the embedding reduction workload than the microbenchmark. However, I assume it's because the embedding reduction workload oversaturates the bandwidth of the local memory node, increasing the memory access latency, and in turn making the pointer chasing involved in iterating through a linked list much slower. Regardless of that, this patch results in a significant speedup. [1] https://lore.kernel.org/damon/20250709005952.17776-1-bijan311@gmail.com/ Link: https://lkml.kernel.org/r/20250725163300.4602-1-bijan311@gmail.com Fixes: 19c1dc15c859 ("mm/damon/vaddr: use damos->migrate_dests in migrate_{hot,cold}") Signed-off-by: Bijan Tabatabai Reviewed-by: SeongJae Park Reviewed-by: Raghavendra K T Signed-off-by: Andrew Morton --- mm/damon/vaddr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 94af19c4dfed..87e825349bdf 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -711,6 +711,10 @@ static void damos_va_migrate_dests_add(struct folio *folio, target -= dests->weight_arr[i]; } + /* If the folio is already in the right node, don't do anything */ + if (folio_nid(folio) == dests->node_id_arr[i]) + return; + isolate: if (!folio_isolate_lru(folio)) return; From f225b34f1e6c81c50e48f6207ddb6d290be1b932 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 25 Jul 2025 09:29:41 +0100 Subject: [PATCH 1437/2411] mm/mseal: always define VM_SEALED Patch series "mseal cleanups", v4. Perform a number of cleanups to the mseal logic. Firstly, VM_SEALED is treated differently from every other VMA flag, it really doesn't make sense to do this, so we start by making this consistent with everything else. Next we place the madvise logic where it belongs - in mm/madvise.c. It really makes no sense to abstract this elsewhere. In doing so, we go to great lengths to explain very clearly the previously very confusing logic as to what sealed mappings are impacted here. In doing so, we retain existing logic regarding treatment of madvise() discard operations for a sealed, read-only MAP_PRIVATE file-backed mapping. This is something we likely need to revisit. We then abstract out and explain the 'are there are any gaps in this range in the mm?' check being performed as a prerequisite to mseal being performed. Finally, we simplify the actual mseal logic which is really quite straightforward. No functional change is intended. This patch (of 4): There is no reason to treat VM_SEALED in a special way, in each other case in which a VMA flag is unavailable due to configuration, we simply assign that flag to VM_NONE, so make VM_SEALED consistent with all other VMA flags in this respect. Additionally, use the next available bit for VM_SEALED, 42, rather than arbitrarily putting it at 63 and update the declaration to match all other VMA flags. No functional change intended. Link: https://lkml.kernel.org/r/cover.1753431105.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/aeb398a77029b6e7377cd944328bc9bbc3c90537.1753431105.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Pedro Falcato Acked-by: David Hildenbrand Cc: Jann Horn Cc: Jeff Xu Cc: Kees Cook Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 ++++-- tools/testing/vma/vma_internal.h | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 8e3a4c5b78ff..ceaa780a703a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -414,8 +414,10 @@ extern unsigned int kobjsize(const void *objp); #endif #ifdef CONFIG_64BIT -/* VM is sealed, in vm_flags */ -#define VM_SEALED _BITUL(63) +#define VM_SEALED_BIT 42 +#define VM_SEALED BIT(VM_SEALED_BIT) +#else +#define VM_SEALED VM_NONE #endif /* Bits set in the VMA until the stack is in its final location */ diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 991022e9e0d3..0fe52fd6782b 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -108,8 +108,10 @@ extern unsigned long dac_mmap_min_addr; #define CAP_IPC_LOCK 14 #ifdef CONFIG_64BIT -/* VM is sealed, in vm_flags */ -#define VM_SEALED _BITUL(63) +#define VM_SEALED_BIT 42 +#define VM_SEALED BIT(VM_SEALED_BIT) +#else +#define VM_SEALED VM_NONE #endif #define FIRST_USER_ADDRESS 0UL From d0b47a6866f1047247061f3a38f12a981825b265 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 25 Jul 2025 09:29:42 +0100 Subject: [PATCH 1438/2411] mm/mseal: update madvise() logic The madvise() logic is inexplicably performed in mm/mseal.c - this ought to be located in mm/madvise.c. Additionally can_modify_vma_madv() is inconsistently named and, in combination with is_ro_anon(), is very confusing logic. Put a static function in mm/madvise.c instead - can_madvise_modify() - that spells out exactly what's happening. Also explicitly check for an anon VMA. Also add commentary to explain what's going on. Essentially - we disallow discarding of data in mseal()'d mappings in instances where the user couldn't otherwise write to that data. We retain the existing behaviour here regarding MAP_PRIVATE mappings of file-backed mappings, which entails some complexity - while this, strictly speaking - appears to violate mseal() semantics, it may interact badly with users which expect to be able to madvise(MADV_DONTNEED) .text mappings for instance. We may revisit this at a later date. No functional change intended. Link: https://lkml.kernel.org/r/492a98d9189646e92c8f23f4cce41ed323fe01df.1753431105.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Pedro Falcato Acked-by: David Hildenbrand Cc: Jann Horn Cc: Jeff Xu Cc: Kees Cook Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/madvise.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++- mm/mseal.c | 49 ------------------------------------ mm/vma.h | 7 ------ 3 files changed, 70 insertions(+), 57 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index bb80fc5ea08f..7f9af2dbd044 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1256,6 +1257,74 @@ static long madvise_guard_remove(struct madvise_behavior *madv_behavior) &guard_remove_walk_ops, NULL); } +#ifdef CONFIG_64BIT +/* Does the madvise operation result in discarding of mapped data? */ +static bool is_discard(int behavior) +{ + switch (behavior) { + case MADV_FREE: + case MADV_DONTNEED: + case MADV_DONTNEED_LOCKED: + case MADV_REMOVE: + case MADV_DONTFORK: + case MADV_WIPEONFORK: + case MADV_GUARD_INSTALL: + return true; + } + + return false; +} + +/* + * We are restricted from madvise()'ing mseal()'d VMAs only in very particular + * circumstances - discarding of data from read-only anonymous SEALED mappings. + * + * This is because users cannot trivally discard data from these VMAs, and may + * only do so via an appropriate madvise() call. + */ +static bool can_madvise_modify(struct madvise_behavior *madv_behavior) +{ + struct vm_area_struct *vma = madv_behavior->vma; + + /* If the VMA isn't sealed we're good. */ + if (can_modify_vma(vma)) + return true; + + /* For a sealed VMA, we only care about discard operations. */ + if (!is_discard(madv_behavior->behavior)) + return true; + + /* + * We explicitly permit all file-backed mappings, whether MAP_SHARED or + * MAP_PRIVATE. + * + * The latter causes some complications. Because now, one can mmap() + * read/write a MAP_PRIVATE mapping, write to it, then mprotect() + * read-only, mseal() and a discard will be permitted. + * + * However, in order to avoid issues with potential use of madvise(..., + * MADV_DONTNEED) of mseal()'d .text mappings we, for the time being, + * permit this. + */ + if (!vma_is_anonymous(vma)) + return true; + + /* If the user could write to the mapping anyway, then this is fine. */ + if ((vma->vm_flags & VM_WRITE) && + arch_vma_access_permitted(vma, /* write= */ true, + /* execute= */ false, /* foreign= */ false)) + return true; + + /* Otherwise, we are not permitted to perform this operation. */ + return false; +} +#else +static bool can_madvise_modify(struct madvise_behavior *madv_behavior) +{ + return true; +} +#endif + /* * Apply an madvise behavior to a region of a vma. madvise_update_vma * will handle splitting a vm area into separate areas, each area with its own @@ -1269,7 +1338,7 @@ static int madvise_vma_behavior(struct madvise_behavior *madv_behavior) struct madvise_behavior_range *range = &madv_behavior->range; int error; - if (unlikely(!can_modify_vma_madv(madv_behavior->vma, behavior))) + if (unlikely(!can_madvise_modify(madv_behavior))) return -EPERM; switch (behavior) { diff --git a/mm/mseal.c b/mm/mseal.c index c27197ac04e8..1308e88ab184 100644 --- a/mm/mseal.c +++ b/mm/mseal.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include "internal.h" @@ -21,54 +20,6 @@ static inline void set_vma_sealed(struct vm_area_struct *vma) vm_flags_set(vma, VM_SEALED); } -static bool is_madv_discard(int behavior) -{ - switch (behavior) { - case MADV_FREE: - case MADV_DONTNEED: - case MADV_DONTNEED_LOCKED: - case MADV_REMOVE: - case MADV_DONTFORK: - case MADV_WIPEONFORK: - case MADV_GUARD_INSTALL: - return true; - } - - return false; -} - -static bool is_ro_anon(struct vm_area_struct *vma) -{ - /* check anonymous mapping. */ - if (vma->vm_file || vma->vm_flags & VM_SHARED) - return false; - - /* - * check for non-writable: - * PROT=RO or PKRU is not writeable. - */ - if (!(vma->vm_flags & VM_WRITE) || - !arch_vma_access_permitted(vma, true, false, false)) - return true; - - return false; -} - -/* - * Check if a vma is allowed to be modified by madvise. - */ -bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior) -{ - if (!is_madv_discard(behavior)) - return true; - - if (unlikely(!can_modify_vma(vma) && is_ro_anon(vma))) - return false; - - /* Allow by default. */ - return true; -} - static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, vm_flags_t newflags) diff --git a/mm/vma.h b/mm/vma.h index acdcc515c459..85db5e880fcc 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -577,8 +577,6 @@ static inline bool can_modify_vma(struct vm_area_struct *vma) return true; } -bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior); - #else static inline bool can_modify_vma(struct vm_area_struct *vma) @@ -586,11 +584,6 @@ static inline bool can_modify_vma(struct vm_area_struct *vma) return true; } -static inline bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior) -{ - return true; -} - #endif #if defined(CONFIG_STACK_GROWSUP) From 8b2914162aa3a56062d4b7c716149946672d48a6 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 25 Jul 2025 09:29:43 +0100 Subject: [PATCH 1439/2411] mm/mseal: small cleanups Drop the wholly unnecessary set_vma_sealed() helper(), which is used only once, and place VMA_ITERATOR() declarations in the correct place. Retain vma_is_sealed(), and use it instead of the confusingly named can_modify_vma(), so it's abundantly clear what's being tested, rather then a nebulous sense of 'can the VMA be modified'. No functional change intended. Link: https://lkml.kernel.org/r/98cf28d04583d632a6eb698e9ad23733bb6af26b.1753431105.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Pedro Falcato Acked-by: David Hildenbrand Acked-by: Jeff Xu Cc: Jann Horn Cc: Kees Cook Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/madvise.c | 2 +- mm/mprotect.c | 2 +- mm/mremap.c | 2 +- mm/mseal.c | 9 +-------- mm/vma.c | 4 ++-- mm/vma.h | 20 ++------------------ 6 files changed, 8 insertions(+), 31 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 7f9af2dbd044..35ed4ab0d7c5 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1287,7 +1287,7 @@ static bool can_madvise_modify(struct madvise_behavior *madv_behavior) struct vm_area_struct *vma = madv_behavior->vma; /* If the VMA isn't sealed we're good. */ - if (can_modify_vma(vma)) + if (!vma_is_sealed(vma)) return true; /* For a sealed VMA, we only care about discard operations. */ diff --git a/mm/mprotect.c b/mm/mprotect.c index 2ddd37b2f462..78bded7acf79 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -766,7 +766,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb, unsigned long charged = 0; int error; - if (!can_modify_vma(vma)) + if (vma_is_sealed(vma)) return -EPERM; if (newflags == oldflags) { diff --git a/mm/mremap.c b/mm/mremap.c index e15cf2e444c7..ac39845e9718 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1651,7 +1651,7 @@ static int check_prep_vma(struct vma_remap_struct *vrm) return -EFAULT; /* If mseal()'d, mremap() is prohibited. */ - if (!can_modify_vma(vma)) + if (vma_is_sealed(vma)) return -EPERM; /* Align to hugetlb page size, if required. */ diff --git a/mm/mseal.c b/mm/mseal.c index 1308e88ab184..adbcc65e9660 100644 --- a/mm/mseal.c +++ b/mm/mseal.c @@ -15,11 +15,6 @@ #include #include "internal.h" -static inline void set_vma_sealed(struct vm_area_struct *vma) -{ - vm_flags_set(vma, VM_SEALED); -} - static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, vm_flags_t newflags) @@ -36,7 +31,7 @@ static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, goto out; } - set_vma_sealed(vma); + vm_flags_set(vma, VM_SEALED); out: *prev = vma; return ret; @@ -53,7 +48,6 @@ static int check_mm_seal(unsigned long start, unsigned long end) { struct vm_area_struct *vma; unsigned long nstart = start; - VMA_ITERATOR(vmi, current->mm, start); /* going through each vma to check. */ @@ -78,7 +72,6 @@ static int apply_mm_seal(unsigned long start, unsigned long end) { unsigned long nstart; struct vm_area_struct *vma, *prev; - VMA_ITERATOR(vmi, current->mm, start); vma = vma_iter_load(&vmi); diff --git a/mm/vma.c b/mm/vma.c index fc502b741dcf..75fd2759964b 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -1351,7 +1351,7 @@ static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, } /* Don't bother splitting the VMA if we can't unmap it anyway */ - if (!can_modify_vma(vms->vma)) { + if (vma_is_sealed(vms->vma)) { error = -EPERM; goto start_split_failed; } @@ -1371,7 +1371,7 @@ static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, for_each_vma_range(*(vms->vmi), next, vms->end) { long nrpages; - if (!can_modify_vma(next)) { + if (vma_is_sealed(next)) { error = -EPERM; goto modify_vma_failed; } diff --git a/mm/vma.h b/mm/vma.h index 85db5e880fcc..b123a9cdedb0 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -559,31 +559,15 @@ struct vm_area_struct *vma_iter_next_rewind(struct vma_iterator *vmi, } #ifdef CONFIG_64BIT - static inline bool vma_is_sealed(struct vm_area_struct *vma) { return (vma->vm_flags & VM_SEALED); } - -/* - * check if a vma is sealed for modification. - * return true, if modification is allowed. - */ -static inline bool can_modify_vma(struct vm_area_struct *vma) -{ - if (unlikely(vma_is_sealed(vma))) - return false; - - return true; -} - #else - -static inline bool can_modify_vma(struct vm_area_struct *vma) +static inline bool vma_is_sealed(struct vm_area_struct *vma) { - return true; + return false; } - #endif #if defined(CONFIG_STACK_GROWSUP) From 530e090964130d538dfa74874012ca461ef692fa Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 25 Jul 2025 09:29:44 +0100 Subject: [PATCH 1440/2411] mm/mseal: simplify and rename VMA gap check The check_mm_seal() function is doing something general - checking whether a range contains only VMAs (or rather that it does NOT contain any unmapped regions). So rename this function to range_contains_unmapped(). Additionally simplify the logic, we are simply checking whether the last vma->vm_end has either a VMA starting after it or ends before the end parameter. This check is rather dubious, so it is sensible to keep it local to mm/mseal.c as at a later stage it may be removed, and we don't want any other mm code to perform such a check. No functional change intended. [lorenzo.stoakes@oracle.com: add comment explaining why we disallow gaps on mseal()] Link: https://lkml.kernel.org/r/d85b3d55-09dc-43ba-8204-b48267a96751@lucifer.local Link: https://lkml.kernel.org/r/dd50984eff1e242b5f7f0f070a3360ef760e06b8.1753431105.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Acked-by: David Hildenbrand Acked-by: Jeff Xu Reviewed-by: Pedro Falcato Cc: Jann Horn Cc: Kees Cook Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/mseal.c | 51 ++++++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/mm/mseal.c b/mm/mseal.c index adbcc65e9660..d140f569c4c3 100644 --- a/mm/mseal.c +++ b/mm/mseal.c @@ -38,31 +38,40 @@ static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, } /* - * Check for do_mseal: - * 1> start is part of a valid vma. - * 2> end is part of a valid vma. - * 3> No gap (unallocated address) between start and end. - * 4> map is sealable. + * mseal() disallows an input range which contain unmapped ranges (VMA holes). + * + * It disallows unmapped regions from start to end whether they exist at the + * start, in the middle, or at the end of the range, or any combination thereof. + * + * This is because after sealng a range, there's nothing to stop memory mapping + * of ranges in the remaining gaps later, meaning that the user might then + * wrongly consider the entirety of the mseal()'d range to be sealed when it + * in fact isn't. */ -static int check_mm_seal(unsigned long start, unsigned long end) + +/* + * Does the [start, end) range contain any unmapped memory? + * + * We ensure that: + * - start is part of a valid VMA. + * - end is part of a valid VMA. + * - no gap (unallocated memory) exists between start and end. + */ +static bool range_contains_unmapped(struct mm_struct *mm, + unsigned long start, unsigned long end) { struct vm_area_struct *vma; - unsigned long nstart = start; + unsigned long prev_end = start; VMA_ITERATOR(vmi, current->mm, start); - /* going through each vma to check. */ for_each_vma_range(vmi, vma, end) { - if (vma->vm_start > nstart) - /* unallocated memory found. */ - return -ENOMEM; + if (vma->vm_start > prev_end) + return true; - if (vma->vm_end >= end) - return 0; - - nstart = vma->vm_end; + prev_end = vma->vm_end; } - return -ENOMEM; + return prev_end < end; } /* @@ -184,14 +193,10 @@ int do_mseal(unsigned long start, size_t len_in, unsigned long flags) if (mmap_write_lock_killable(mm)) return -EINTR; - /* - * First pass, this helps to avoid - * partial sealing in case of error in input address range, - * e.g. ENOMEM error. - */ - ret = check_mm_seal(start, end); - if (ret) + if (range_contains_unmapped(mm, start, end)) { + ret = -ENOMEM; goto out; + } /* * Second pass, this should success, unless there are errors From 6c2da14ae1e0a0146587381594559027bd46c059 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 25 Jul 2025 09:29:45 +0100 Subject: [PATCH 1441/2411] mm/mseal: rework mseal apply logic The logic can be simplified - firstly by renaming the inconsistently named apply_mm_seal() to mseal_apply(). We then wrap mseal_fixup() into the main loop as the logic is simple enough to not require it, equally it isn't a hugely pleasant pattern in mprotect() etc. so it's not something we want to perpetuate. We eliminate the need for invoking vma_iter_end() on each loop by directly determining if the VMA was merged - the only thing we need concern ourselves with is whether the start/end of the (gapless) range are offset into VMAs. This refactoring also avoids the rather horrid 'pass pointer to prev around' pattern used in mprotect() et al. No functional change intended. Link: https://lkml.kernel.org/r/ddfa4376ce29f19a589d7dc8c92cb7d4f7605a4c.1753431105.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Pedro Falcato Reviewed-by: Liam R. Howlett Acked-by: David Hildenbrand Acked-by: Jeff Xu Cc: Jann Horn Cc: Kees Cook Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/mseal.c | 65 ++++++++++++++++-------------------------------------- 1 file changed, 19 insertions(+), 46 deletions(-) diff --git a/mm/mseal.c b/mm/mseal.c index d140f569c4c3..e5b205562d2e 100644 --- a/mm/mseal.c +++ b/mm/mseal.c @@ -15,28 +15,6 @@ #include #include "internal.h" -static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, - struct vm_area_struct **prev, unsigned long start, - unsigned long end, vm_flags_t newflags) -{ - int ret = 0; - vm_flags_t oldflags = vma->vm_flags; - - if (newflags == oldflags) - goto out; - - vma = vma_modify_flags(vmi, *prev, vma, start, end, newflags); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto out; - } - - vm_flags_set(vma, VM_SEALED); -out: - *prev = vma; - return ret; -} - /* * mseal() disallows an input range which contain unmapped ranges (VMA holes). * @@ -74,38 +52,33 @@ static bool range_contains_unmapped(struct mm_struct *mm, return prev_end < end; } -/* - * Apply sealing. - */ -static int apply_mm_seal(unsigned long start, unsigned long end) +static int mseal_apply(struct mm_struct *mm, + unsigned long start, unsigned long end) { - unsigned long nstart; struct vm_area_struct *vma, *prev; - VMA_ITERATOR(vmi, current->mm, start); + unsigned long curr_start = start; + VMA_ITERATOR(vmi, mm, start); + /* We know there are no gaps so this will be non-NULL. */ vma = vma_iter_load(&vmi); - /* - * Note: check_mm_seal should already checked ENOMEM case. - * so vma should not be null, same for the other ENOMEM cases. - */ prev = vma_prev(&vmi); if (start > vma->vm_start) prev = vma; - nstart = start; for_each_vma_range(vmi, vma, end) { - int error; - unsigned long tmp; - vm_flags_t newflags; + unsigned long curr_end = MIN(vma->vm_end, end); - newflags = vma->vm_flags | VM_SEALED; - tmp = vma->vm_end; - if (tmp > end) - tmp = end; - error = mseal_fixup(&vmi, vma, &prev, nstart, tmp, newflags); - if (error) - return error; - nstart = vma_iter_end(&vmi); + if (!(vma->vm_flags & VM_SEALED)) { + vma = vma_modify_flags(&vmi, prev, vma, + curr_start, curr_end, + vma->vm_flags | VM_SEALED); + if (IS_ERR(vma)) + return PTR_ERR(vma); + vm_flags_set(vma, VM_SEALED); + } + + prev = vma; + curr_start = curr_end; } return 0; @@ -204,10 +177,10 @@ int do_mseal(unsigned long start, size_t len_in, unsigned long flags) * reaching the max supported VMAs, however, those cases shall * be rare. */ - ret = apply_mm_seal(start, end); + ret = mseal_apply(mm, start, end); out: - mmap_write_unlock(current->mm); + mmap_write_unlock(mm); return ret; } From 9109bd52559b44a66e4dbde69d0dd36f3e4dcae8 Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Fri, 25 Jul 2025 11:31:12 +0800 Subject: [PATCH 1442/2411] mm/memory-failure: hold PTL in hwpoison_hugetlb_range Hold PTL in hwpoison_hugetlb_range() to avoid operating on stale page, as hwpoison_pte_range() have done. This change is not known to address any issues which users have experienced. Link: https://lkml.kernel.org/r/20250725033112.2690158-1-tujinjiang@huawei.com Signed-off-by: Jinjiang Tu Acked-by: David Hildenbrand Cc: Andrei Vagin Cc: Andrii Nakryiko Cc: Baolin Wang Cc: Brahmajit Das Cc: Catalin Marinas Cc: Christophe Leroy Cc: David Rientjes Cc: Dev Jain Cc: Hugh Dickins Cc: Joern Engel Cc: Kefeng Wang Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Ryan Roberts Cc: Thiago Jung Bauermann Signed-off-by: Andrew Morton --- mm/memory-failure.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 9e2cff199934..f0f0b23dcf2d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -837,11 +837,17 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask, struct mm_walk *walk) { struct hwpoison_walk *hwp = walk->private; - pte_t pte = huge_ptep_get(walk->mm, addr, ptep); struct hstate *h = hstate_vma(walk->vma); + spinlock_t *ptl; + pte_t pte; + int ret; - return check_hwpoisoned_entry(pte, addr, huge_page_shift(h), - hwp->pfn, &hwp->tk); + ptl = huge_pte_lock(h, walk->mm, ptep); + pte = huge_ptep_get(walk->mm, addr, ptep); + ret = check_hwpoisoned_entry(pte, addr, huge_page_shift(h), + hwp->pfn, &hwp->tk); + spin_unlock(ptl); + return ret; } #else #define hwpoison_hugetlb_range NULL From 1623717b057f904d558eb0489fbd592a18750c1e Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Thu, 24 Jul 2025 17:09:58 +0800 Subject: [PATCH 1443/2411] mm/mincore: hold PTL in mincore_hugetlb Hold PTL in mincore_hugetlb() to avoid operating on stale page, as mincore_pte_range() have done. Link: https://lkml.kernel.org/r/20250724090958.455887-4-tujinjiang@huawei.com Signed-off-by: Jinjiang Tu Acked-by: David Hildenbrand Cc: Andrei Vagin Cc: Andrii Nakryiko Cc: Baolin Wang Cc: Brahmajit Das Cc: Catalin Marinas Cc: Christophe Leroy Cc: David Rientjes Cc: Dev Jain Cc: Hugh Dickins Cc: Joern Engel Cc: Kefeng Wang Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Ryan Roberts Cc: Thiago Jung Bauermann Signed-off-by: Andrew Morton --- mm/mincore.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/mincore.c b/mm/mincore.c index 42d6c9c8da86..10dabefc3acc 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -29,7 +29,9 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, #ifdef CONFIG_HUGETLB_PAGE unsigned char present; unsigned char *vec = walk->private; + spinlock_t *ptl; + ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte); /* * Hugepages under user process are always in RAM and never * swapped out, but theoretically it needs to be checked. @@ -38,6 +40,7 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, for (; addr != end; vec++, addr += PAGE_SIZE) *vec = present; walk->private = vec; + spin_unlock(ptl); #else BUG(); #endif From 3dfde97800e06882960cc926d2c428f2128b7c70 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 24 Jul 2025 10:52:59 +0530 Subject: [PATCH 1444/2411] mm: add get_and_clear_ptes() and clear_ptes() Patch series "Optimizations for khugepaged", v4. If the underlying folio mapped by the ptes is large, we can process those ptes in a batch using folio_pte_batch(). For arm64 specifically, this results in a 16x reduction in the number of ptep_get() calls, since on a contig block, ptep_get() on arm64 will iterate through all 16 entries to collect a/d bits. Next, ptep_clear() will cause a TLBI for every contig block in the range via contpte_try_unfold(). Instead, use clear_ptes() to only do the TLBI at the first and last contig block of the range. For split folios, there will be no pte batching; the batch size returned by folio_pte_batch() will be 1. For pagetable split folios, the ptes will still point to the same large folio; for arm64, this results in the optimization described above, and for other arches, a minor improvement is expected due to a reduction in the number of function calls and batching atomic operations. This patch (of 3): Let's add variants to be used where "full" does not apply -- which will be the majority of cases in the future. "full" really only applies if we are about to tear down a full MM. Use get_and_clear_ptes() in existing code, clear_ptes() users will be added next. Link: https://lkml.kernel.org/r/20250724052301.23844-2-dev.jain@arm.com Signed-off-by: David Hildenbrand Signed-off-by: Dev Jain Reviewed-by: Baolin Wang Reviewed-by: Barry Song Reviewed-by: Lorenzo Stoakes Reviewed-by: Zi Yan Cc: Liam Howlett Cc: Mariano Pache Cc: Ryan Roberts Signed-off-by: Andrew Morton --- arch/arm64/mm/mmu.c | 2 +- include/linux/pgtable.h | 45 +++++++++++++++++++++++++++++++++++++++++ mm/mremap.c | 2 +- mm/rmap.c | 2 +- 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index abd9725796e9..20a89ab97dc5 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1528,7 +1528,7 @@ early_initcall(prevent_bootmem_remove_init); pte_t modify_prot_start_ptes(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, unsigned int nr) { - pte_t pte = get_and_clear_full_ptes(vma->vm_mm, addr, ptep, nr, /* full = */ 0); + pte_t pte = get_and_clear_ptes(vma->vm_mm, addr, ptep, nr); if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) { /* diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e3b99920be05..4c035637eeb7 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -736,6 +736,29 @@ static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm, } #endif +/** + * get_and_clear_ptes - Clear present PTEs that map consecutive pages of + * the same folio, collecting dirty/accessed bits. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * + * Use this instead of get_and_clear_full_ptes() if it is known that we don't + * need to clear the full mm, which is mostly the case. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline pte_t get_and_clear_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr) +{ + return get_and_clear_full_ptes(mm, addr, ptep, nr, 0); +} + #ifndef clear_full_ptes /** * clear_full_ptes - Clear present PTEs that map consecutive pages of the same @@ -768,6 +791,28 @@ static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr, } #endif +/** + * clear_ptes - Clear present PTEs that map consecutive pages of the same folio. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * + * Use this instead of clear_full_ptes() if it is known that we don't need to + * clear the full mm, which is mostly the case. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline void clear_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr) +{ + clear_full_ptes(mm, addr, ptep, nr, 0); +} + /* * If two threads concurrently fault at the same page, the thread that * won the race updates the PTE and its local TLB/Cache. The other thread diff --git a/mm/mremap.c b/mm/mremap.c index ac39845e9718..677a4d744df9 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -280,7 +280,7 @@ static int move_ptes(struct pagetable_move_control *pmc, old_pte, max_nr_ptes); force_flush = true; } - pte = get_and_clear_full_ptes(mm, old_addr, old_ptep, nr_ptes, 0); + pte = get_and_clear_ptes(mm, old_addr, old_ptep, nr_ptes); pte = move_pte(pte, old_addr, new_addr); pte = move_soft_dirty_pte(pte); diff --git a/mm/rmap.c b/mm/rmap.c index f93ce27132ab..568198e9efc2 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -2036,7 +2036,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, flush_cache_range(vma, address, end_addr); /* Nuke the page table entry. */ - pteval = get_and_clear_full_ptes(mm, address, pvmw.pte, nr_pages, 0); + pteval = get_and_clear_ptes(mm, address, pvmw.pte, nr_pages); /* * We clear the PTE but do not flush so potentially * a remote CPU could still be writing to the folio. From 4ea3594a47412f9dd20fbda0dc70b0cbec9cba43 Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Thu, 24 Jul 2025 10:53:00 +0530 Subject: [PATCH 1445/2411] khugepaged: optimize __collapse_huge_page_copy_succeeded() by PTE batching Use PTE batching to batch process PTEs mapping the same large folio. An improvement is expected due to batching refcount-mapcount manipulation on the folios, and for arm64 which supports contig mappings, the number of TLB flushes is also reduced. Link: https://lkml.kernel.org/r/20250724052301.23844-3-dev.jain@arm.com Signed-off-by: Dev Jain Acked-by: David Hildenbrand Reviewed-by: Baolin Wang Reviewed-by: Lorenzo Stoakes Cc: Barry Song Cc: Liam Howlett Cc: Mariano Pache Cc: Ryan Roberts Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/khugepaged.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index a55fb1dcd224..f23e943506bc 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -700,12 +700,15 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte, spinlock_t *ptl, struct list_head *compound_pagelist) { + unsigned long end = address + HPAGE_PMD_SIZE; struct folio *src, *tmp; - pte_t *_pte; pte_t pteval; + pte_t *_pte; + unsigned int nr_ptes; - for (_pte = pte; _pte < pte + HPAGE_PMD_NR; - _pte++, address += PAGE_SIZE) { + for (_pte = pte; _pte < pte + HPAGE_PMD_NR; _pte += nr_ptes, + address += nr_ptes * PAGE_SIZE) { + nr_ptes = 1; pteval = ptep_get(_pte); if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1); @@ -722,18 +725,26 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte, struct page *src_page = pte_page(pteval); src = page_folio(src_page); - if (!folio_test_large(src)) + + if (folio_test_large(src)) { + unsigned int max_nr_ptes = (end - address) >> PAGE_SHIFT; + + nr_ptes = folio_pte_batch(src, _pte, pteval, max_nr_ptes); + } else { release_pte_folio(src); + } + /* * ptl mostly unnecessary, but preempt has to * be disabled to update the per-cpu stats * inside folio_remove_rmap_pte(). */ spin_lock(ptl); - ptep_clear(vma->vm_mm, address, _pte); - folio_remove_rmap_pte(src, src_page, vma); + clear_ptes(vma->vm_mm, address, _pte, nr_ptes); + folio_remove_rmap_ptes(src, src_page, nr_ptes, vma); spin_unlock(ptl); - free_folio_and_swap_cache(src); + free_swap_cache(src); + folio_put_refs(src, nr_ptes); } } From 22d0229093b92db2fe6ca6ba946bad1f246024e8 Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Thu, 24 Jul 2025 10:53:01 +0530 Subject: [PATCH 1446/2411] khugepaged: optimize collapse_pte_mapped_thp() by PTE batching Use PTE batching to batch process PTEs mapping the same large folio. An improvement is expected due to batching mapcount manipulation on the folios, and for arm64 which supports contig mappings, the number of TLB flushes is also reduced. Note that we do not need to make a change to the check "if (folio_page(folio, i) != page)"; if i'th page of the folio is equal to the first page of our batch, then i + 1, .... i + nr_batch_ptes - 1 pages of the folio will be equal to the corresponding pages of our batch mapping consecutive pages. Link: https://lkml.kernel.org/r/20250724052301.23844-4-dev.jain@arm.com Signed-off-by: Dev Jain Acked-by: David Hildenbrand Reviewed-by: Baolin Wang Reviewed-by: Lorenzo Stoakes Reviewed-by: Zi Yan Cc: Barry Song Cc: Liam Howlett Cc: Mariano Pache Cc: Ryan Roberts Signed-off-by: Andrew Morton --- mm/khugepaged.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index f23e943506bc..374a6a5193a7 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1503,15 +1503,17 @@ static int set_huge_pmd(struct vm_area_struct *vma, unsigned long addr, int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, bool install_pmd) { + int nr_mapped_ptes = 0, result = SCAN_FAIL; + unsigned int nr_batch_ptes; struct mmu_notifier_range range; bool notified = false; unsigned long haddr = addr & HPAGE_PMD_MASK; + unsigned long end = haddr + HPAGE_PMD_SIZE; struct vm_area_struct *vma = vma_lookup(mm, haddr); struct folio *folio; pte_t *start_pte, *pte; pmd_t *pmd, pgt_pmd; spinlock_t *pml = NULL, *ptl; - int nr_ptes = 0, result = SCAN_FAIL; int i; mmap_assert_locked(mm); @@ -1625,11 +1627,15 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, goto abort; /* step 2: clear page table and adjust rmap */ - for (i = 0, addr = haddr, pte = start_pte; - i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) { + for (i = 0, addr = haddr, pte = start_pte; i < HPAGE_PMD_NR; + i += nr_batch_ptes, addr += nr_batch_ptes * PAGE_SIZE, + pte += nr_batch_ptes) { + unsigned int max_nr_batch_ptes = (end - addr) >> PAGE_SHIFT; struct page *page; pte_t ptent = ptep_get(pte); + nr_batch_ptes = 1; + if (pte_none(ptent)) continue; /* @@ -1643,26 +1649,29 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, goto abort; } page = vm_normal_page(vma, addr, ptent); + if (folio_page(folio, i) != page) goto abort; + nr_batch_ptes = folio_pte_batch(folio, pte, ptent, max_nr_batch_ptes); + /* * Must clear entry, or a racing truncate may re-remove it. * TLB flush can be left until pmdp_collapse_flush() does it. * PTE dirty? Shmem page is already dirty; file is read-only. */ - ptep_clear(mm, addr, pte); - folio_remove_rmap_pte(folio, page, vma); - nr_ptes++; + clear_ptes(mm, addr, pte, nr_batch_ptes); + folio_remove_rmap_ptes(folio, page, nr_batch_ptes, vma); + nr_mapped_ptes += nr_batch_ptes; } if (!pml) spin_unlock(ptl); /* step 3: set proper refcount and mm_counters. */ - if (nr_ptes) { - folio_ref_sub(folio, nr_ptes); - add_mm_counter(mm, mm_counter_file(folio), -nr_ptes); + if (nr_mapped_ptes) { + folio_ref_sub(folio, nr_mapped_ptes); + add_mm_counter(mm, mm_counter_file(folio), -nr_mapped_ptes); } /* step 4: remove empty page table */ @@ -1695,10 +1704,10 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, : SCAN_SUCCEED; goto drop_folio; abort: - if (nr_ptes) { + if (nr_mapped_ptes) { flush_tlb_mm(mm); - folio_ref_sub(folio, nr_ptes); - add_mm_counter(mm, mm_counter_file(folio), -nr_ptes); + folio_ref_sub(folio, nr_mapped_ptes); + add_mm_counter(mm, mm_counter_file(folio), -nr_mapped_ptes); } unlock: if (start_pte) From 9a4f90e246615d1f42a9b907deb9b4c0a418d996 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 25 Jul 2025 15:29:01 +0100 Subject: [PATCH 1447/2411] mm: remove mm/io-mapping.c This is dead code, which was used from commit b739f125e4eb ("i915: use io_mapping_map_user") but reverted a month later by commit 0e4fe0c9f2f9 ("Revert "i915: use io_mapping_map_user"") back in 2021. Since then nobody has used it, so remove it. [akpm@linux-foundation.org: update Documentation/core-api/mm-api.rst, per Vlastimil] Link: https://lkml.kernel.org/r/20250725142901.81502-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- Documentation/core-api/mm-api.rst | 1 - include/linux/io-mapping.h | 3 --- mm/Kconfig | 4 ---- mm/Makefile | 1 - mm/io-mapping.c | 30 ------------------------------ 5 files changed, 39 deletions(-) delete mode 100644 mm/io-mapping.c diff --git a/Documentation/core-api/mm-api.rst b/Documentation/core-api/mm-api.rst index af8151db88b2..24970b91ac15 100644 --- a/Documentation/core-api/mm-api.rst +++ b/Documentation/core-api/mm-api.rst @@ -139,4 +139,3 @@ More Memory Management Functions .. kernel-doc:: mm/mmu_notifier.c .. kernel-doc:: mm/balloon_compaction.c .. kernel-doc:: mm/huge_memory.c -.. kernel-doc:: mm/io-mapping.c diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 7376c1df9c90..c16353cc6e3c 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -225,7 +225,4 @@ io_mapping_free(struct io_mapping *iomap) kfree(iomap); } -int io_mapping_map_user(struct io_mapping *iomap, struct vm_area_struct *vma, - unsigned long addr, unsigned long pfn, unsigned long size); - #endif /* _LINUX_IO_MAPPING_H */ diff --git a/mm/Kconfig b/mm/Kconfig index d5d4eca947a6..e443fe8cd6cf 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1242,10 +1242,6 @@ config KMAP_LOCAL config KMAP_LOCAL_NON_LINEAR_PTE_ARRAY bool -# struct io_mapping based helper. Selected by drivers that need them -config IO_MAPPING - bool - config MEMFD_CREATE bool "Enable memfd_create() system call" if EXPERT diff --git a/mm/Makefile b/mm/Makefile index 1a7a11d4933d..ef54aa615d9d 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -141,7 +141,6 @@ obj-$(CONFIG_MEMFD_CREATE) += memfd.o obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o obj-$(CONFIG_PTDUMP) += ptdump.o obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o -obj-$(CONFIG_IO_MAPPING) += io-mapping.o obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o diff --git a/mm/io-mapping.c b/mm/io-mapping.c deleted file mode 100644 index d3586e95c12c..000000000000 --- a/mm/io-mapping.c +++ /dev/null @@ -1,30 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - -#include -#include - -/** - * io_mapping_map_user - remap an I/O mapping to userspace - * @iomap: the source io_mapping - * @vma: user vma to map to - * @addr: target user address to start at - * @pfn: physical address of kernel memory - * @size: size of map area - * - * Note: this is only safe if the mm semaphore is held when called. - */ -int io_mapping_map_user(struct io_mapping *iomap, struct vm_area_struct *vma, - unsigned long addr, unsigned long pfn, unsigned long size) -{ - vm_flags_t expected_flags = VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; - - if (WARN_ON_ONCE((vma->vm_flags & expected_flags) != expected_flags)) - return -EINVAL; - - pgprot_t remap_prot = __pgprot((pgprot_val(iomap->prot) & _PAGE_CACHE_MASK) | - (pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK)); - - /* We rely on prevalidation of the io-mapping to skip pfnmap tracking. */ - return remap_pfn_range_notrack(vma, addr, pfn, size, remap_prot); -} -EXPORT_SYMBOL_GPL(io_mapping_map_user); From a222439e1e273fa0f4e37ce17aeb109f3e91824f Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 25 Jul 2025 14:16:24 +0200 Subject: [PATCH 1448/2411] mm/rmap: add anon_vma lifetime debug check If an anon folio is mapped into userspace, its anon_vma must be alive, otherwise rmap walks can hit UAF. There have been syzkaller reports a few months ago[1][2] of UAF in rmap walks that seems to indicate that there can be pages with elevated mapcount whose anon_vma has already been freed, but I think we never figured out what the cause is; and syzkaller only hit these UAFs when memory pressure randomly caused reclaim to rmap-walk the affected pages, so it of course didn't manage to create a reproducer. Add a VM_WARN_ON_FOLIO() when we add/remove mappings of anonymous folios to hopefully catch such issues more reliably. [1] https://lore.kernel.org/r/67abaeaf.050a0220.110943.0041.GAE@google.com [2] https://lore.kernel.org/r/67a76f33.050a0220.3d72c.0028.GAE@google.com Link: https://lkml.kernel.org/r/20250725-anonvma-uaf-debug-v2-1-bc3c7e5ba5b1@google.com Signed-off-by: Jann Horn Acked-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Acked-by: Vlastimil Babka Acked-by: Harry Yoo Cc: David Hildenbrand Cc: Jann Horn Cc: Liam Howlett Cc: Rik van Riel Signed-off-by: Andrew Morton --- include/linux/rmap.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 20803fcb49a7..6cd020eea37a 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -449,6 +449,28 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio, default: VM_WARN_ON_ONCE(true); } + + /* + * Anon folios must have an associated live anon_vma as long as they're + * mapped into userspace. + * Note that the atomic_read() mainly does two things: + * + * 1. In KASAN builds with CONFIG_SLUB_RCU_DEBUG, it causes KASAN to + * check that the associated anon_vma has not yet been freed (subject + * to KASAN's usual limitations). This check will pass if the + * anon_vma's refcount has already dropped to 0 but an RCU grace + * period hasn't passed since then. + * 2. If the anon_vma has not yet been freed, it checks that the + * anon_vma still has a nonzero refcount (as opposed to being in the + * middle of an RCU delay for getting freed). + */ + if (folio_test_anon(folio) && !folio_test_ksm(folio)) { + unsigned long mapping = (unsigned long)folio->mapping; + struct anon_vma *anon_vma; + + anon_vma = (void *)(mapping - FOLIO_MAPPING_ANON); + VM_WARN_ON_FOLIO(atomic_read(&anon_vma->refcount) == 0, folio); + } } /* From 9bbffee67ffd16360179327b57f3b1245579ef08 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 28 Jul 2025 10:53:55 -0700 Subject: [PATCH 1449/2411] mm: fix a UAF when vma->mm is freed after vma->vm_refcnt got dropped By inducing delays in the right places, Jann Horn created a reproducer for a hard to hit UAF issue that became possible after VMAs were allowed to be recycled by adding SLAB_TYPESAFE_BY_RCU to their cache. Race description is borrowed from Jann's discovery report: lock_vma_under_rcu() looks up a VMA locklessly with mas_walk() under rcu_read_lock(). At that point, the VMA may be concurrently freed, and it can be recycled by another process. vma_start_read() then increments the vma->vm_refcnt (if it is in an acceptable range), and if this succeeds, vma_start_read() can return a recycled VMA. In this scenario where the VMA has been recycled, lock_vma_under_rcu() will then detect the mismatching ->vm_mm pointer and drop the VMA through vma_end_read(), which calls vma_refcount_put(). vma_refcount_put() drops the refcount and then calls rcuwait_wake_up() using a copy of vma->vm_mm. This is wrong: It implicitly assumes that the caller is keeping the VMA's mm alive, but in this scenario the caller has no relation to the VMA's mm, so the rcuwait_wake_up() can cause UAF. The diagram depicting the race: T1 T2 T3 == == == lock_vma_under_rcu mas_walk mmap vma_start_read __refcount_inc_not_zero_limited_acquire munmap __vma_enter_locked refcount_add_not_zero vma_end_read vma_refcount_put __refcount_dec_and_test rcuwait_wait_event rcuwait_wake_up [UAF] Note that rcuwait_wait_event() in T3 does not block because refcount was already dropped by T1. At this point T3 can exit and free the mm causing UAF in T1. To avoid this we move vma->vm_mm verification into vma_start_read() and grab vma->vm_mm to stabilize it before vma_refcount_put() operation. [surenb@google.com: v3] Link: https://lkml.kernel.org/r/20250729145709.2731370-1-surenb@google.com Link: https://lkml.kernel.org/r/20250728175355.2282375-1-surenb@google.com Fixes: 3104138517fc ("mm: make vma cache SLAB_TYPESAFE_BY_RCU") Signed-off-by: Suren Baghdasaryan Reported-by: Jann Horn Closes: https://lore.kernel.org/all/CAG48ez0-deFbVH=E3jbkWx=X3uVbd8nWeo6kbJPQ0KoUD+m2tA@mail.gmail.com/ Reviewed-by: Vlastimil Babka Acked-by: Lorenzo Stoakes Cc: Jann Horn Cc: Liam Howlett Cc: Signed-off-by: Andrew Morton --- include/linux/mmap_lock.h | 30 ++++++++++++++++++++++++++++++ mm/mmap_lock.c | 10 +++------- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 1f4f44951abe..11a078de9150 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -12,6 +12,7 @@ extern int rcuwait_wake_up(struct rcuwait *w); #include #include #include +#include #define MMAP_LOCK_INITIALIZER(name) \ .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), @@ -154,6 +155,10 @@ static inline void vma_refcount_put(struct vm_area_struct *vma) * reused and attached to a different mm before we lock it. * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got * detached. + * + * WARNING! The vma passed to this function cannot be used if the function + * fails to lock it because in certain cases RCU lock is dropped and then + * reacquired. Once RCU lock is dropped the vma can be concurently freed. */ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, struct vm_area_struct *vma) @@ -183,6 +188,31 @@ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, } rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); + + /* + * If vma got attached to another mm from under us, that mm is not + * stable and can be freed in the narrow window after vma->vm_refcnt + * is dropped and before rcuwait_wake_up(mm) is called. Grab it before + * releasing vma->vm_refcnt. + */ + if (unlikely(vma->vm_mm != mm)) { + /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ + struct mm_struct *other_mm = vma->vm_mm; + + /* + * __mmdrop() is a heavy operation and we don't need RCU + * protection here. Release RCU lock during these operations. + * We reinstate the RCU read lock as the caller expects it to + * be held when this function returns even on error. + */ + rcu_read_unlock(); + mmgrab(other_mm); + vma_refcount_put(vma); + mmdrop(other_mm); + rcu_read_lock(); + return NULL; + } + /* * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. * False unlocked result is impossible because we modify and check diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c index 729fb7d0dd59..b006cec8e6fe 100644 --- a/mm/mmap_lock.c +++ b/mm/mmap_lock.c @@ -164,8 +164,7 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, */ /* Check if the vma we locked is the right one. */ - if (unlikely(vma->vm_mm != mm || - address < vma->vm_start || address >= vma->vm_end)) + if (unlikely(address < vma->vm_start || address >= vma->vm_end)) goto inval_end_read; rcu_read_unlock(); @@ -236,11 +235,8 @@ struct vm_area_struct *lock_next_vma(struct mm_struct *mm, goto fallback; } - /* - * Verify the vma we locked belongs to the same address space and it's - * not behind of the last search position. - */ - if (unlikely(vma->vm_mm != mm || from_addr >= vma->vm_end)) + /* Verify the vma is not behind the last search position. */ + if (unlikely(from_addr >= vma->vm_end)) goto fallback_unlock; /* From fcd90ad31e29d0b403f3a074a64cd7f0876175dd Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:23 +0300 Subject: [PATCH 1450/2411] execmem: drop unused execmem_update_copy() Patch series "x86: enable EXECMEM_ROX_CACHE for ftrace and kprobes", v3. These patches enable use of EXECMEM_ROX_CACHE for ftrace and kprobes allocations on x86. They also include some ground work in execmem. Since the execmem model for caching large ROX pages changed from the initial assumption that the memory that is allocated from ROX cache is always ROX to the current state where memory can be temporarily made RW and then restored to ROX, we can stop using text poking to update it. This also saves the hassle of trying lock text_mutex in execmem_cache_free() when kprobes already hold that mutex. This patch (of 8): The execmem_update_copy() that used text poking was required when memory allocated from ROX cache was always read-only. Since now its permissions can be switched to read-write there is no need in a function that updates memory with text poking. Remove it. Link: https://lkml.kernel.org/r/20250713071730.4117334-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20250713071730.4117334-2-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Peter Zijlstra (Intel) Cc: Daniel Gomez Cc: Masami Hiramatsu (Google) Cc: Petr Pavlu Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/linux/execmem.h | 13 ------------- mm/execmem.c | 5 ----- 2 files changed, 18 deletions(-) diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 3be35680a54f..734fbe83d98e 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -185,19 +185,6 @@ DEFINE_FREE(execmem, void *, if (_T) execmem_free(_T)); struct vm_struct *execmem_vmap(size_t size); #endif -/** - * execmem_update_copy - copy an update to executable memory - * @dst: destination address to update - * @src: source address containing the data - * @size: how many bytes of memory shold be copied - * - * Copy @size bytes from @src to @dst using text poking if the memory at - * @dst is read-only. - * - * Return: a pointer to @dst or NULL on error - */ -void *execmem_update_copy(void *dst, const void *src, size_t size); - /** * execmem_is_rox - check if execmem is read-only * @type - the execmem type to check diff --git a/mm/execmem.c b/mm/execmem.c index 627e6cf64f4f..aac211bc88c5 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -399,11 +399,6 @@ void execmem_free(void *ptr) vfree(ptr); } -void *execmem_update_copy(void *dst, const void *src, size_t size) -{ - return text_poke_copy(dst, src, size); -} - bool execmem_is_rox(enum execmem_type type) { return !!(execmem_info->ranges[type].flags & EXECMEM_ROX_CACHE); From 838955f64ae7582f009a3538889bb9244f37ab26 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:24 +0300 Subject: [PATCH 1451/2411] execmem: introduce execmem_alloc_rw() Some callers of execmem_alloc() require the memory to be temporarily writable even when it is allocated from ROX cache. These callers use execemem_make_temp_rw() right after the call to execmem_alloc(). Wrap this sequence in execmem_alloc_rw() API. Link: https://lkml.kernel.org/r/20250713071730.4117334-3-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: Daniel Gomez Reviewed-by: Petr Pavlu Acked-by: Peter Zijlstra (Intel) Cc: Masami Hiramatsu (Google) Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- arch/x86/kernel/alternative.c | 3 +-- include/linux/execmem.h | 38 ++++++++++++++++++++--------------- kernel/module/main.c | 13 ++---------- mm/execmem.c | 27 ++++++++++++++++++++++++- 4 files changed, 51 insertions(+), 30 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ea1d984166cd..526a5fef93ab 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -120,7 +120,7 @@ struct its_array its_pages; static void *__its_alloc(struct its_array *pages) { - void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE); + void *page __free(execmem) = execmem_alloc_rw(EXECMEM_MODULE_TEXT, PAGE_SIZE); if (!page) return NULL; @@ -237,7 +237,6 @@ static void *its_alloc(void) if (!page) return NULL; - execmem_make_temp_rw(page, PAGE_SIZE); if (pages == &its_pages) set_memory_x((unsigned long)page, 1); diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 734fbe83d98e..8b61b05da7d5 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -67,21 +67,6 @@ enum execmem_range_flags { */ void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable); -/** - * execmem_make_temp_rw - temporarily remap region with read-write - * permissions - * @ptr: address of the region to remap - * @size: size of the region to remap - * - * Remaps a part of the cached large page in the ROX cache in the range - * [@ptr, @ptr + @size) as writable and not executable. The caller must - * have exclusive ownership of this range and ensure nothing will try to - * execute code in this range. - * - * Return: 0 on success or negative error code on failure. - */ -int execmem_make_temp_rw(void *ptr, size_t size); - /** * execmem_restore_rox - restore read-only-execute permissions * @ptr: address of the region to remap @@ -95,7 +80,6 @@ int execmem_make_temp_rw(void *ptr, size_t size); */ int execmem_restore_rox(void *ptr, size_t size); #else -static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } #endif @@ -165,6 +149,28 @@ struct execmem_info *execmem_arch_setup(void); */ void *execmem_alloc(enum execmem_type type, size_t size); +/** + * execmem_alloc_rw - allocate writable executable memory + * @type: type of the allocation + * @size: how many bytes of memory are required + * + * Allocates memory that will contain executable code, either generated or + * loaded from kernel modules. + * + * Allocates memory that will contain data coupled with executable code, + * like data sections in kernel modules. + * + * Forces writable permissions on the allocated memory and the caller is + * responsible to manage the permissions afterwards. + * + * For architectures that use ROX cache the permissions will be set to R+W. + * For architectures that don't use ROX cache the default permissions for @type + * will be used as they must be writable. + * + * Return: a pointer to the allocated memory or %NULL + */ +void *execmem_alloc_rw(enum execmem_type type, size_t size); + /** * execmem_free - free executable memory * @ptr: pointer to the memory that should be freed diff --git a/kernel/module/main.c b/kernel/module/main.c index 413ac6ea3702..d009326ef7bb 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1292,20 +1292,11 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) else execmem_type = EXECMEM_MODULE_TEXT; - ptr = execmem_alloc(execmem_type, size); + ptr = execmem_alloc_rw(execmem_type, size); if (!ptr) return -ENOMEM; - if (execmem_is_rox(execmem_type)) { - int err = execmem_make_temp_rw(ptr, size); - - if (err) { - execmem_free(ptr); - return -ENOMEM; - } - - mod->mem[type].is_rox = true; - } + mod->mem[type].is_rox = execmem_is_rox(execmem_type); /* * The pointer to these blocks of memory are stored on the module diff --git a/mm/execmem.c b/mm/execmem.c index aac211bc88c5..d0bf0123bce4 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -336,7 +336,7 @@ static bool execmem_cache_free(void *ptr) return true; } -int execmem_make_temp_rw(void *ptr, size_t size) +static int execmem_force_rw(void *ptr, size_t size) { unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long addr = (unsigned long)ptr; @@ -358,6 +358,16 @@ int execmem_restore_rox(void *ptr, size_t size) } #else /* CONFIG_ARCH_HAS_EXECMEM_ROX */ +/* + * when ROX cache is not used the permissions defined by architectures for + * execmem ranges that are updated before use (e.g. EXECMEM_MODULE_TEXT) must + * be writable anyway + */ +static inline int execmem_force_rw(void *ptr, size_t size) +{ + return 0; +} + static void *execmem_cache_alloc(struct execmem_range *range, size_t size) { return NULL; @@ -387,6 +397,21 @@ void *execmem_alloc(enum execmem_type type, size_t size) return kasan_reset_tag(p); } +void *execmem_alloc_rw(enum execmem_type type, size_t size) +{ + void *p __free(execmem) = execmem_alloc(type, size); + int err; + + if (!p) + return NULL; + + err = execmem_force_rw(p, size); + if (err) + return NULL; + + return no_free_ptr(p); +} + void execmem_free(void *ptr) { /* From 187fd8521dd8b202cbacd7af57f4301da4d5b52d Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:25 +0300 Subject: [PATCH 1452/2411] execmem: rework execmem_cache_free() Currently execmem_cache_free() ignores potential allocation failures that may happen in execmem_cache_add(). Besides, it uses text poking to fill the memory with trapping instructions before returning it to cache although it would be more efficient to make that memory writable, update it using memcpy and then restore ROX protection. Rework execmem_cache_free() so that in case of an error it will defer freeing of the memory to a delayed work. With this the happy fast path will now change permissions to RW, fill the memory with trapping instructions using memcpy, restore ROX permissions, add the memory back to the free cache and clear the relevant entry in busy_areas. If any step in the fast path fails, the entry in busy_areas will be marked as pending_free. These entries will be handled by a delayed work and freed asynchronously. To make the fast path faster, use __GFP_NORETRY for memory allocations and let asynchronous handler try harder with GFP_KERNEL. Link: https://lkml.kernel.org/r/20250713071730.4117334-4-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Peter Zijlstra (Intel) Cc: Daniel Gomez Cc: Masami Hiramatsu (Google) Cc: Petr Pavlu Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- mm/execmem.c | 125 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 102 insertions(+), 23 deletions(-) diff --git a/mm/execmem.c b/mm/execmem.c index d0bf0123bce4..52b06ccf614a 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -93,8 +93,15 @@ struct execmem_cache { struct mutex mutex; struct maple_tree busy_areas; struct maple_tree free_areas; + unsigned int pending_free_cnt; /* protected by mutex */ }; +/* delay to schedule asynchronous free if fast path free fails */ +#define FREE_DELAY (msecs_to_jiffies(10)) + +/* mark entries in busy_areas that should be freed asynchronously */ +#define PENDING_FREE_MASK (1 << (PAGE_SHIFT - 1)) + static struct execmem_cache execmem_cache = { .mutex = __MUTEX_INITIALIZER(execmem_cache.mutex), .busy_areas = MTREE_INIT_EXT(busy_areas, MT_FLAGS_LOCK_EXTERN, @@ -155,20 +162,17 @@ static void execmem_cache_clean(struct work_struct *work) static DECLARE_WORK(execmem_cache_clean_work, execmem_cache_clean); -static int execmem_cache_add(void *ptr, size_t size) +static int execmem_cache_add_locked(void *ptr, size_t size, gfp_t gfp_mask) { struct maple_tree *free_areas = &execmem_cache.free_areas; - struct mutex *mutex = &execmem_cache.mutex; unsigned long addr = (unsigned long)ptr; MA_STATE(mas, free_areas, addr - 1, addr + 1); unsigned long lower, upper; void *area = NULL; - int err; lower = addr; upper = addr + size - 1; - mutex_lock(mutex); area = mas_walk(&mas); if (area && mas.last == addr - 1) lower = mas.index; @@ -178,12 +182,14 @@ static int execmem_cache_add(void *ptr, size_t size) upper = mas.last; mas_set_range(&mas, lower, upper); - err = mas_store_gfp(&mas, (void *)lower, GFP_KERNEL); - mutex_unlock(mutex); - if (err) - return err; + return mas_store_gfp(&mas, (void *)lower, gfp_mask); +} - return 0; +static int execmem_cache_add(void *ptr, size_t size, gfp_t gfp_mask) +{ + guard(mutex)(&execmem_cache.mutex); + + return execmem_cache_add_locked(ptr, size, gfp_mask); } static bool within_range(struct execmem_range *range, struct ma_state *mas, @@ -278,7 +284,7 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size) if (err) goto err_free_mem; - err = execmem_cache_add(p, alloc_size); + err = execmem_cache_add(p, alloc_size, GFP_KERNEL); if (err) goto err_reset_direct_map; @@ -307,29 +313,102 @@ static void *execmem_cache_alloc(struct execmem_range *range, size_t size) return __execmem_cache_alloc(range, size); } +static inline bool is_pending_free(void *ptr) +{ + return ((unsigned long)ptr & PENDING_FREE_MASK); +} + +static inline void *pending_free_set(void *ptr) +{ + return (void *)((unsigned long)ptr | PENDING_FREE_MASK); +} + +static inline void *pending_free_clear(void *ptr) +{ + return (void *)((unsigned long)ptr & ~PENDING_FREE_MASK); +} + +static int execmem_force_rw(void *ptr, size_t size); + +static int __execmem_cache_free(struct ma_state *mas, void *ptr, gfp_t gfp_mask) +{ + size_t size = mas_range_len(mas); + int err; + + err = execmem_force_rw(ptr, size); + if (err) + return err; + + execmem_fill_trapping_insns(ptr, size, /* writable = */ true); + execmem_restore_rox(ptr, size); + + err = execmem_cache_add_locked(ptr, size, gfp_mask); + if (err) + return err; + + mas_store_gfp(mas, NULL, gfp_mask); + return 0; +} + +static void execmem_cache_free_slow(struct work_struct *work); +static DECLARE_DELAYED_WORK(execmem_cache_free_work, execmem_cache_free_slow); + +static void execmem_cache_free_slow(struct work_struct *work) +{ + struct maple_tree *busy_areas = &execmem_cache.busy_areas; + MA_STATE(mas, busy_areas, 0, ULONG_MAX); + void *area; + + guard(mutex)(&execmem_cache.mutex); + + if (!execmem_cache.pending_free_cnt) + return; + + mas_for_each(&mas, area, ULONG_MAX) { + if (!is_pending_free(area)) + continue; + + area = pending_free_clear(area); + if (__execmem_cache_free(&mas, area, GFP_KERNEL)) + continue; + + execmem_cache.pending_free_cnt--; + } + + if (execmem_cache.pending_free_cnt) + schedule_delayed_work(&execmem_cache_free_work, FREE_DELAY); + else + schedule_work(&execmem_cache_clean_work); +} + static bool execmem_cache_free(void *ptr) { struct maple_tree *busy_areas = &execmem_cache.busy_areas; - struct mutex *mutex = &execmem_cache.mutex; unsigned long addr = (unsigned long)ptr; MA_STATE(mas, busy_areas, addr, addr); - size_t size; void *area; + int err; + + guard(mutex)(&execmem_cache.mutex); - mutex_lock(mutex); area = mas_walk(&mas); - if (!area) { - mutex_unlock(mutex); + if (!area) return false; + + err = __execmem_cache_free(&mas, area, GFP_KERNEL | __GFP_NORETRY); + if (err) { + /* + * mas points to exact slot we've got the area from, nothing + * else can modify the tree because of the mutex, so there + * won't be any allocations in mas_store_gfp() and it will just + * change the pointer. + */ + area = pending_free_set(area); + mas_store_gfp(&mas, area, GFP_KERNEL); + execmem_cache.pending_free_cnt++; + schedule_delayed_work(&execmem_cache_free_work, FREE_DELAY); + return true; } - size = mas_range_len(&mas); - - mas_store_gfp(&mas, NULL, GFP_KERNEL); - mutex_unlock(mutex); - - execmem_fill_trapping_insns(ptr, size, /* writable = */ false); - - execmem_cache_add(ptr, size); schedule_work(&execmem_cache_clean_work); From 888b5a847ba9650f454cd0842ccf8497268da959 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:26 +0300 Subject: [PATCH 1453/2411] execmem: move execmem_force_rw() and execmem_restore_rox() before use to avoid static declarations. Link: https://lkml.kernel.org/r/20250713071730.4117334-5-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Peter Zijlstra (Intel) Cc: Daniel Gomez Cc: Masami Hiramatsu (Google) Cc: Petr Pavlu Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- mm/execmem.c | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/mm/execmem.c b/mm/execmem.c index 52b06ccf614a..c99b299b113c 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -137,6 +137,27 @@ static int execmem_set_direct_map_valid(struct vm_struct *vm, bool valid) return err; } +static int execmem_force_rw(void *ptr, size_t size) +{ + unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long addr = (unsigned long)ptr; + int ret; + + ret = set_memory_nx(addr, nr); + if (ret) + return ret; + + return set_memory_rw(addr, nr); +} + +int execmem_restore_rox(void *ptr, size_t size) +{ + unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long addr = (unsigned long)ptr; + + return set_memory_rox(addr, nr); +} + static void execmem_cache_clean(struct work_struct *work) { struct maple_tree *free_areas = &execmem_cache.free_areas; @@ -328,8 +349,6 @@ static inline void *pending_free_clear(void *ptr) return (void *)((unsigned long)ptr & ~PENDING_FREE_MASK); } -static int execmem_force_rw(void *ptr, size_t size); - static int __execmem_cache_free(struct ma_state *mas, void *ptr, gfp_t gfp_mask) { size_t size = mas_range_len(mas); @@ -415,27 +434,6 @@ static bool execmem_cache_free(void *ptr) return true; } -static int execmem_force_rw(void *ptr, size_t size) -{ - unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long addr = (unsigned long)ptr; - int ret; - - ret = set_memory_nx(addr, nr); - if (ret) - return ret; - - return set_memory_rw(addr, nr); -} - -int execmem_restore_rox(void *ptr, size_t size) -{ - unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long addr = (unsigned long)ptr; - - return set_memory_rox(addr, nr); -} - #else /* CONFIG_ARCH_HAS_EXECMEM_ROX */ /* * when ROX cache is not used the permissions defined by architectures for From 3bd4e0ac61b2fd87d64572e866f58940d1d5fbdf Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:27 +0300 Subject: [PATCH 1454/2411] execmem: add fallback for failures in vmalloc(VM_ALLOW_HUGE_VMAP) When execmem populates ROX cache it uses vmalloc(VM_ALLOW_HUGE_VMAP). Although vmalloc falls back to allocating base pages if high order allocation fails, it may happen that it still cannot allocate enough memory. Right now ROX cache is only used by modules and in majority of cases the allocations happen at boot time when there's plenty of free memory, but upcoming enabling ROX cache for ftrace and kprobes would mean that execmem allocations can happen when the system is under memory pressure and a failure to allocate large page worth of memory becomes more likely. Fallback to regular vmalloc() if vmalloc(VM_ALLOW_HUGE_VMAP) fails. Link: https://lkml.kernel.org/r/20250713071730.4117334-6-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Peter Zijlstra (Intel) Cc: Daniel Gomez Cc: Masami Hiramatsu (Google) Cc: Petr Pavlu Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- mm/execmem.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/execmem.c b/mm/execmem.c index c99b299b113c..9abf76a63a79 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -291,6 +291,11 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size) alloc_size = round_up(size, PMD_SIZE); p = execmem_vmalloc(range, alloc_size, PAGE_KERNEL, vm_flags); + if (!p) { + alloc_size = size; + p = execmem_vmalloc(range, alloc_size, PAGE_KERNEL, vm_flags); + } + if (!p) return err; @@ -462,7 +467,7 @@ void *execmem_alloc(enum execmem_type type, size_t size) bool use_cache = range->flags & EXECMEM_ROX_CACHE; vm_flags_t vm_flags = VM_FLUSH_RESET_PERMS; pgprot_t pgprot = range->pgprot; - void *p; + void *p = NULL; size = PAGE_ALIGN(size); From ab674b6871b049aab2e86d1d7375526368ed175a Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:28 +0300 Subject: [PATCH 1455/2411] execmem: drop writable parameter from execmem_fill_trapping_insns() After update of execmem_cache_free() that made memory writable before updating it, there is no need to update read only memory, so the writable parameter to execmem_fill_trapping_insns() is not needed. Drop it. Link: https://lkml.kernel.org/r/20250713071730.4117334-7-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Peter Zijlstra (Intel) Cc: Daniel Gomez Cc: Masami Hiramatsu (Google) Cc: Petr Pavlu Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- arch/x86/mm/init.c | 8 ++------ include/linux/execmem.h | 3 +-- mm/execmem.c | 4 ++-- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 7456df985d96..dbc63f0d538f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -1063,13 +1063,9 @@ unsigned long arch_max_swapfile_size(void) static struct execmem_info execmem_info __ro_after_init; #ifdef CONFIG_ARCH_HAS_EXECMEM_ROX -void execmem_fill_trapping_insns(void *ptr, size_t size, bool writeable) +void execmem_fill_trapping_insns(void *ptr, size_t size) { - /* fill memory with INT3 instructions */ - if (writeable) - memset(ptr, INT3_INSN_OPCODE, size); - else - text_poke_set(ptr, INT3_INSN_OPCODE, size); + memset(ptr, INT3_INSN_OPCODE, size); } #endif diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 8b61b05da7d5..7de229134e30 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -60,12 +60,11 @@ enum execmem_range_flags { * will trap * @ptr: pointer to memory to fill * @size: size of the range to fill - * @writable: is the memory poited by @ptr is writable or ROX * * A hook for architecures to fill execmem ranges with invalid instructions. * Architectures that use EXECMEM_ROX_CACHE must implement this. */ -void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable); +void execmem_fill_trapping_insns(void *ptr, size_t size); /** * execmem_restore_rox - restore read-only-execute permissions diff --git a/mm/execmem.c b/mm/execmem.c index 9abf76a63a79..1785d7f435e4 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -304,7 +304,7 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size) goto err_free_mem; /* fill memory with instructions that will trap */ - execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true); + execmem_fill_trapping_insns(p, alloc_size); err = set_memory_rox((unsigned long)p, vm->nr_pages); if (err) @@ -363,7 +363,7 @@ static int __execmem_cache_free(struct ma_state *mas, void *ptr, gfp_t gfp_mask) if (err) return err; - execmem_fill_trapping_insns(ptr, size, /* writable = */ true); + execmem_fill_trapping_insns(ptr, size); execmem_restore_rox(ptr, size); err = execmem_cache_add_locked(ptr, size, gfp_mask); From 36de1e4238c1243866eaec515ef59972c490367f Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:29 +0300 Subject: [PATCH 1456/2411] x86/kprobes: enable EXECMEM_ROX_CACHE for kprobes allocations x86::alloc_insn_page() always allocates ROX memory. Instead of overriding this method, add EXECMEM_KPROBES entry in execmem_info with pgprot set to PAGE_KERNEL_ROX and use ROX cache when configuration and CPU features allow it. Link: https://lkml.kernel.org/r/20250713071730.4117334-8-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu (Google) Cc: Daniel Gomez Cc: Petr Pavlu Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- arch/x86/kernel/kprobes/core.c | 18 ------------------ arch/x86/mm/init.c | 9 ++++++++- 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 47cb8eb138ba..6079d15dab8c 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -481,24 +481,6 @@ static int prepare_singlestep(kprobe_opcode_t *buf, struct kprobe *p, return len; } -/* Make page to RO mode when allocate it */ -void *alloc_insn_page(void) -{ - void *page; - - page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); - if (!page) - return NULL; - - /* - * TODO: Once additional kernel code protection mechanisms are set, ensure - * that the page was not maliciously altered and it is still zeroed. - */ - set_memory_rox((unsigned long)page, 1); - - return page; -} - /* Kprobe x86 instruction emulation - only regs->ip or IF flag modifiers */ static void kprobe_emulate_ifmodifiers(struct kprobe *p, struct pt_regs *regs) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index dbc63f0d538f..442fafd8ff52 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -1098,7 +1098,14 @@ struct execmem_info __init *execmem_arch_setup(void) .pgprot = pgprot, .alignment = MODULE_ALIGN, }, - [EXECMEM_KPROBES ... EXECMEM_BPF] = { + [EXECMEM_KPROBES] = { + .flags = flags, + .start = start, + .end = MODULES_END, + .pgprot = PAGE_KERNEL_ROX, + .alignment = MODULE_ALIGN, + }, + [EXECMEM_FTRACE ... EXECMEM_BPF] = { .flags = EXECMEM_KASAN_SHADOW, .start = start, .end = MODULES_END, From 5d79c2be508143559c65ace445e7a951ef92881b Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:30 +0300 Subject: [PATCH 1457/2411] x86/ftrace: enable EXECMEM_ROX_CACHE for ftrace allocations For the most part ftrace uses text poking and can handle ROX memory. The only place that requires writable memory is create_trampoline() that updates the allocated memory and in the end makes it ROX. Use execmem_alloc_rw() in x86::ftrace::alloc_tramp() and enable ROX cache for EXECMEM_FTRACE when configuration and CPU features allow that. Link: https://lkml.kernel.org/r/20250713071730.4117334-9-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Peter Zijlstra (Intel) Acked-by: Steven Rostedt (Google) Cc: Daniel Gomez Cc: Masami Hiramatsu (Google) Cc: Petr Pavlu Signed-off-by: Andrew Morton --- arch/x86/kernel/ftrace.c | 2 +- arch/x86/mm/init.c | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 252e82bcfd2f..4450acec9390 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -263,7 +263,7 @@ void arch_ftrace_update_code(int command) static inline void *alloc_tramp(unsigned long size) { - return execmem_alloc(EXECMEM_FTRACE, size); + return execmem_alloc_rw(EXECMEM_FTRACE, size); } static inline void tramp_free(void *tramp) { diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 442fafd8ff52..bb57e93b4caf 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -1105,7 +1105,14 @@ struct execmem_info __init *execmem_arch_setup(void) .pgprot = PAGE_KERNEL_ROX, .alignment = MODULE_ALIGN, }, - [EXECMEM_FTRACE ... EXECMEM_BPF] = { + [EXECMEM_FTRACE] = { + .flags = flags, + .start = start, + .end = MODULES_END, + .pgprot = pgprot, + .alignment = MODULE_ALIGN, + }, + [EXECMEM_BPF] = { .flags = EXECMEM_KASAN_SHADOW, .start = start, .end = MODULES_END, From 0cfc0e7e3d062b93e9eec6828de000981cdfb152 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 28 Jul 2025 15:53:00 +0800 Subject: [PATCH 1458/2411] mm/shmem, swap: avoid redundant Xarray lookup during swapin Patch series "mm/shmem, swap: bugfix and improvement of mTHP swap in", v6. The current THP swapin path have several problems. It may potentially hang, may cause redundant faults due to false positive swap cache lookup, and it issues redundant Xarray walks. !CONFIG_TRANSPARENT_HUGEPAGE builds may also contain unnecessary THP checks. This series fixes all of the mentioned issues, the code should be more robust and prepared for the swap table series. Now 4 walks is reduced to 3 (get order & confirm, confirm, insert folio), !CONFIG_TRANSPARENT_HUGEPAGE build overhead is also minimized, and comes with a sanity check now. The performance is slightly better after this series, sequential swap in of 24G data from ZRAM, using transparent_hugepage_tmpfs=always (24 samples each): Before: avg: 10.66s, stddev: 0.04 After patch 1: avg: 10.58s, stddev: 0.04 After patch 2: avg: 10.65s, stddev: 0.05 After patch 3: avg: 10.65s, stddev: 0.04 After patch 4: avg: 10.67s, stddev: 0.04 After patch 5: avg: 9.79s, stddev: 0.04 After patch 6: avg: 9.79s, stddev: 0.05 After patch 7: avg: 9.78s, stddev: 0.05 After patch 8: avg: 9.79s, stddev: 0.04 Several patches improve the performance by a little, which is about ~8% faster in total. Build kernel test showed very slightly improvement, testing with make -j48 with defconfig in a 768M memcg also using ZRAM as swap, and transparent_hugepage_tmpfs=always (6 test runs): Before: avg: 3334.66s, stddev: 43.76 After patch 1: avg: 3349.77s, stddev: 18.55 After patch 2: avg: 3325.01s, stddev: 42.96 After patch 3: avg: 3354.58s, stddev: 14.62 After patch 4: avg: 3336.24s, stddev: 32.15 After patch 5: avg: 3325.13s, stddev: 22.14 After patch 6: avg: 3285.03s, stddev: 38.95 After patch 7: avg: 3287.32s, stddev: 26.37 After patch 8: avg: 3295.87s, stddev: 46.24 This patch (of 7): Currently shmem calls xa_get_order to get the swap radix entry order, requiring a full tree walk. This can be easily combined with the swap entry value checking (shmem_confirm_swap) to avoid the duplicated lookup and abort early if the entry is gone already. Which should improve the performance. Link: https://lkml.kernel.org/r/20250728075306.12704-1-ryncsn@gmail.com Link: https://lkml.kernel.org/r/20250728075306.12704-3-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Kemeng Shi Reviewed-by: Dev Jain Reviewed-by: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: Hugh Dickins Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Signed-off-by: Andrew Morton --- mm/shmem.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 5e9ec28fab85..6e00d2ec40a9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -512,15 +512,27 @@ static int shmem_replace_entry(struct address_space *mapping, /* * Sometimes, before we decide whether to proceed or to fail, we must check - * that an entry was not already brought back from swap by a racing thread. + * that an entry was not already brought back or split by a racing thread. * * Checking folio is not enough: by the time a swapcache folio is locked, it * might be reused, and again be swapcache, using the same swap as before. + * Returns the swap entry's order if it still presents, else returns -1. */ -static bool shmem_confirm_swap(struct address_space *mapping, - pgoff_t index, swp_entry_t swap) +static int shmem_confirm_swap(struct address_space *mapping, pgoff_t index, + swp_entry_t swap) { - return xa_load(&mapping->i_pages, index) == swp_to_radix_entry(swap); + XA_STATE(xas, &mapping->i_pages, index); + int ret = -1; + void *entry; + + rcu_read_lock(); + do { + entry = xas_load(&xas); + if (entry == swp_to_radix_entry(swap)) + ret = xas_get_order(&xas); + } while (xas_retry(&xas, entry)); + rcu_read_unlock(); + return ret; } /* @@ -2293,16 +2305,20 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, return -EIO; si = get_swap_device(swap); - if (!si) { - if (!shmem_confirm_swap(mapping, index, swap)) + order = shmem_confirm_swap(mapping, index, swap); + if (unlikely(!si)) { + if (order < 0) return -EEXIST; else return -EINVAL; } + if (unlikely(order < 0)) { + put_swap_device(si); + return -EEXIST; + } /* Look it up and read it in.. */ folio = swap_cache_get_folio(swap, NULL, 0); - order = xa_get_order(&mapping->i_pages, index); if (!folio) { int nr_pages = 1 << order; bool fallback_order0 = false; @@ -2412,7 +2428,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, */ folio_lock(folio); if ((!skip_swapcache && !folio_test_swapcache(folio)) || - !shmem_confirm_swap(mapping, index, swap) || + shmem_confirm_swap(mapping, index, swap) < 0 || folio->swap.val != swap.val) { error = -EEXIST; goto unlock; @@ -2460,7 +2476,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, *foliop = folio; return 0; failed: - if (!shmem_confirm_swap(mapping, index, swap)) + if (shmem_confirm_swap(mapping, index, swap) < 0) error = -EEXIST; if (error == -EIO) shmem_set_folio_swapin_error(inode, index, folio, swap, From c262ffd72c8539d16ada8641a6348c5a88f0c542 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 28 Jul 2025 15:53:01 +0800 Subject: [PATCH 1459/2411] mm/shmem, swap: tidy up THP swapin checks Move all THP swapin related checks under CONFIG_TRANSPARENT_HUGEPAGE, so they will be trimmed off by the compiler if not needed. And add a WARN if shmem sees a order > 0 entry when CONFIG_TRANSPARENT_HUGEPAGE is disabled, that should never happen unless things went very wrong. There should be no observable feature change except the new added WARN. Link: https://lkml.kernel.org/r/20250728075306.12704-4-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: Dev Jain Cc: Hugh Dickins Cc: Kemeng Shi Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Signed-off-by: Andrew Morton --- mm/shmem.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 6e00d2ec40a9..88058d53ae55 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2017,26 +2017,38 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode, swp_entry_t entry, int order, gfp_t gfp) { struct shmem_inode_info *info = SHMEM_I(inode); + int nr_pages = 1 << order; struct folio *new; void *shadow; - int nr_pages; /* * We have arrived here because our zones are constrained, so don't * limit chance of success with further cpuset and node constraints. */ gfp &= ~GFP_CONSTRAINT_MASK; - if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && order > 0) { - gfp_t huge_gfp = vma_thp_gfp_mask(vma); + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { + if (WARN_ON_ONCE(order)) + return ERR_PTR(-EINVAL); + } else if (order) { + /* + * If uffd is active for the vma, we need per-page fault + * fidelity to maintain the uffd semantics, then fallback + * to swapin order-0 folio, as well as for zswap case. + * Any existing sub folio in the swap cache also blocks + * mTHP swapin. + */ + if ((vma && unlikely(userfaultfd_armed(vma))) || + !zswap_never_enabled() || + non_swapcache_batch(entry, nr_pages) != nr_pages) + return ERR_PTR(-EINVAL); - gfp = limit_gfp_mask(huge_gfp, gfp); + gfp = limit_gfp_mask(vma_thp_gfp_mask(vma), gfp); } new = shmem_alloc_folio(gfp, order, info, index); if (!new) return ERR_PTR(-ENOMEM); - nr_pages = folio_nr_pages(new); if (mem_cgroup_swapin_charge_folio(new, vma ? vma->vm_mm : NULL, gfp, entry)) { folio_put(new); @@ -2320,9 +2332,6 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, /* Look it up and read it in.. */ folio = swap_cache_get_folio(swap, NULL, 0); if (!folio) { - int nr_pages = 1 << order; - bool fallback_order0 = false; - /* Or update major stats only when swapin succeeds?? */ if (fault_type) { *fault_type |= VM_FAULT_MAJOR; @@ -2330,20 +2339,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, count_memcg_event_mm(fault_mm, PGMAJFAULT); } - /* - * If uffd is active for the vma, we need per-page fault - * fidelity to maintain the uffd semantics, then fallback - * to swapin order-0 folio, as well as for zswap case. - * Any existing sub folio in the swap cache also blocks - * mTHP swapin. - */ - if (order > 0 && ((vma && unlikely(userfaultfd_armed(vma))) || - !zswap_never_enabled() || - non_swapcache_batch(swap, nr_pages) != nr_pages)) - fallback_order0 = true; - /* Skip swapcache for synchronous device. */ - if (!fallback_order0 && data_race(si->flags & SWP_SYNCHRONOUS_IO)) { + if (data_race(si->flags & SWP_SYNCHRONOUS_IO)) { folio = shmem_swap_alloc_folio(inode, vma, index, swap, order, gfp); if (!IS_ERR(folio)) { skip_swapcache = true; From 91ab656ece137c368a3189dfd42f8c9203a6285c Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 28 Jul 2025 15:53:02 +0800 Subject: [PATCH 1460/2411] mm/shmem, swap: tidy up swap entry splitting Instead of keeping different paths of splitting the entry before the swap in start, move the entry splitting after the swapin has put the folio in swap cache (or set the SWAP_HAS_CACHE bit). This way we only need one place and one unified way to split the large entry. Whenever swapin brought in a folio smaller than the shmem swap entry, split the entry and recalculate the entry and index for verification. This removes duplicated codes and function calls, reduces LOC, and the split is less racy as it's guarded by swap cache now. So it will have a lower chance of repeated faults due to raced split. The compiler is also able to optimize the coder further: bloat-o-meter results with GCC 14: With DEBUG_SECTION_MISMATCH (-fno-inline-functions-called-once): ./scripts/bloat-o-meter mm/shmem.o.old mm/shmem.o add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-143 (-143) Function old new delta shmem_swapin_folio 2358 2215 -143 Total: Before=32933, After=32790, chg -0.43% With !DEBUG_SECTION_MISMATCH: add/remove: 0/1 grow/shrink: 1/0 up/down: 1069/-749 (320) Function old new delta shmem_swapin_folio 2871 3940 +1069 shmem_split_large_entry.isra 749 - -749 Total: Before=32806, After=33126, chg +0.98% Since shmem_split_large_entry is only called in one place now. The compiler will either generate more compact code, or inlined it for better performance. Link: https://lkml.kernel.org/r/20250728075306.12704-5-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: Dev Jain Cc: Hugh Dickins Cc: Kemeng Shi Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Signed-off-by: Andrew Morton --- mm/shmem.c | 56 ++++++++++++++++++++++-------------------------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 88058d53ae55..5e10e27e8b73 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2303,14 +2303,16 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct mm_struct *fault_mm = vma ? vma->vm_mm : NULL; struct shmem_inode_info *info = SHMEM_I(inode); + swp_entry_t swap, index_entry; struct swap_info_struct *si; struct folio *folio = NULL; bool skip_swapcache = false; - swp_entry_t swap; int error, nr_pages, order, split_order; + pgoff_t offset; VM_BUG_ON(!*foliop || !xa_is_value(*foliop)); - swap = radix_to_swp_entry(*foliop); + index_entry = radix_to_swp_entry(*foliop); + swap = index_entry; *foliop = NULL; if (is_poisoned_swp_entry(swap)) @@ -2358,46 +2360,35 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, } /* - * Now swap device can only swap in order 0 folio, then we - * should split the large swap entry stored in the pagecache - * if necessary. - */ - split_order = shmem_split_large_entry(inode, index, swap, gfp); - if (split_order < 0) { - error = split_order; - goto failed; - } - - /* - * If the large swap entry has already been split, it is + * Now swap device can only swap in order 0 folio, it is * necessary to recalculate the new swap entry based on - * the old order alignment. + * the offset, as the swapin index might be unalgined. */ - if (split_order > 0) { - pgoff_t offset = index - round_down(index, 1 << split_order); - + if (order) { + offset = index - round_down(index, 1 << order); swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); } - /* Here we actually start the io */ folio = shmem_swapin_cluster(swap, gfp, info, index); if (!folio) { error = -ENOMEM; goto failed; } - } else if (order > folio_order(folio)) { + } +alloced: + if (order > folio_order(folio)) { /* - * Swap readahead may swap in order 0 folios into swapcache + * Swapin may get smaller folios due to various reasons: + * It may fallback to order 0 due to memory pressure or race, + * swap readahead may swap in order 0 folios into swapcache * asynchronously, while the shmem mapping can still stores * large swap entries. In such cases, we should split the * large swap entry to prevent possible data corruption. */ - split_order = shmem_split_large_entry(inode, index, swap, gfp); + split_order = shmem_split_large_entry(inode, index, index_entry, gfp); if (split_order < 0) { - folio_put(folio); - folio = NULL; error = split_order; - goto failed; + goto failed_nolock; } /* @@ -2406,16 +2397,14 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, * the old order alignment. */ if (split_order > 0) { - pgoff_t offset = index - round_down(index, 1 << split_order); - - swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); + offset = index - round_down(index, 1 << split_order); + swap = swp_entry(swp_type(swap), swp_offset(index_entry) + offset); } } else if (order < folio_order(folio)) { swap.val = round_down(swap.val, 1 << folio_order(folio)); index = round_down(index, 1 << folio_order(folio)); } -alloced: /* * We have to do this with the folio locked to prevent races. * The shmem_confirm_swap below only checks if the first swap @@ -2479,12 +2468,13 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, shmem_set_folio_swapin_error(inode, index, folio, swap, skip_swapcache); unlock: - if (skip_swapcache) - swapcache_clear(si, swap, folio_nr_pages(folio)); - if (folio) { + if (folio) folio_unlock(folio); +failed_nolock: + if (skip_swapcache) + swapcache_clear(si, folio->swap, folio_nr_pages(folio)); + if (folio) folio_put(folio); - } put_swap_device(si); return error; From 69805ea79db6634d4e7d596f3f36667924dc6cbf Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 28 Jul 2025 15:53:03 +0800 Subject: [PATCH 1461/2411] mm/shmem, swap: never use swap cache and readahead for SWP_SYNCHRONOUS_IO For SWP_SYNCHRONOUS_IO devices, if a cache bypassing THP swapin failed due to reasons like memory pressure, partially conflicting swap cache or ZSWAP enabled, shmem will fallback to cached order 0 swapin. Right now the swap cache still has a non-trivial overhead, and readahead is not helpful for SWP_SYNCHRONOUS_IO devices, so we should always skip the readahead and swap cache even if the swapin falls back to order 0. So handle the fallback logic without falling back to the cached read. Link: https://lkml.kernel.org/r/20250728075306.12704-6-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: Dev Jain Cc: Hugh Dickins Cc: Kemeng Shi Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Signed-off-by: Andrew Morton --- mm/shmem.c | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 5e10e27e8b73..e6207ad6f2f1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2019,6 +2019,7 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode, struct shmem_inode_info *info = SHMEM_I(inode); int nr_pages = 1 << order; struct folio *new; + gfp_t alloc_gfp; void *shadow; /* @@ -2026,6 +2027,7 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode, * limit chance of success with further cpuset and node constraints. */ gfp &= ~GFP_CONSTRAINT_MASK; + alloc_gfp = gfp; if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { if (WARN_ON_ONCE(order)) return ERR_PTR(-EINVAL); @@ -2040,19 +2042,22 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode, if ((vma && unlikely(userfaultfd_armed(vma))) || !zswap_never_enabled() || non_swapcache_batch(entry, nr_pages) != nr_pages) - return ERR_PTR(-EINVAL); + goto fallback; - gfp = limit_gfp_mask(vma_thp_gfp_mask(vma), gfp); + alloc_gfp = limit_gfp_mask(vma_thp_gfp_mask(vma), gfp); + } +retry: + new = shmem_alloc_folio(alloc_gfp, order, info, index); + if (!new) { + new = ERR_PTR(-ENOMEM); + goto fallback; } - new = shmem_alloc_folio(gfp, order, info, index); - if (!new) - return ERR_PTR(-ENOMEM); - if (mem_cgroup_swapin_charge_folio(new, vma ? vma->vm_mm : NULL, - gfp, entry)) { + alloc_gfp, entry)) { folio_put(new); - return ERR_PTR(-ENOMEM); + new = ERR_PTR(-ENOMEM); + goto fallback; } /* @@ -2067,7 +2072,9 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode, */ if (swapcache_prepare(entry, nr_pages)) { folio_put(new); - return ERR_PTR(-EEXIST); + new = ERR_PTR(-EEXIST); + /* Try smaller folio to avoid cache conflict */ + goto fallback; } __folio_set_locked(new); @@ -2081,6 +2088,15 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode, folio_add_lru(new); swap_read_folio(new, NULL); return new; +fallback: + /* Order 0 swapin failed, nothing to fallback to, abort */ + if (!order) + return new; + entry.val += index - round_down(index, nr_pages); + alloc_gfp = gfp; + nr_pages = 1; + order = 0; + goto retry; } /* @@ -2350,13 +2366,12 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, } /* - * Fallback to swapin order-0 folio unless the swap entry - * already exists. + * Direct swapin handled order 0 fallback already, + * if it failed, abort. */ error = PTR_ERR(folio); folio = NULL; - if (error == -EEXIST) - goto failed; + goto failed; } /* From 1326359f22805b2b0e9567ec0099980b8956fc29 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 28 Jul 2025 15:53:04 +0800 Subject: [PATCH 1462/2411] mm/shmem, swap: simplify swapin path and result handling Slightly tidy up the different handling of swap in and error handling for SWP_SYNCHRONOUS_IO and non-SWP_SYNCHRONOUS_IO devices. Now swapin will always use either shmem_swap_alloc_folio or shmem_swapin_cluster, then check the result. Simplify the control flow and avoid a redundant goto label. Link: https://lkml.kernel.org/r/20250728075306.12704-7-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: Dev Jain Cc: Hugh Dickins Cc: Kemeng Shi Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Signed-off-by: Andrew Morton --- mm/shmem.c | 49 +++++++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index e6207ad6f2f1..0de37d014524 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2357,40 +2357,33 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, count_memcg_event_mm(fault_mm, PGMAJFAULT); } - /* Skip swapcache for synchronous device. */ if (data_race(si->flags & SWP_SYNCHRONOUS_IO)) { + /* Direct swapin skipping swap cache & readahead */ folio = shmem_swap_alloc_folio(inode, vma, index, swap, order, gfp); - if (!IS_ERR(folio)) { - skip_swapcache = true; - goto alloced; + if (IS_ERR(folio)) { + error = PTR_ERR(folio); + folio = NULL; + goto failed; + } + skip_swapcache = true; + } else { + /* + * Cached swapin only supports order 0 folio, it is + * necessary to recalculate the new swap entry based on + * the offset, as the swapin index might be unalgined. + */ + if (order) { + offset = index - round_down(index, 1 << order); + swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); } - /* - * Direct swapin handled order 0 fallback already, - * if it failed, abort. - */ - error = PTR_ERR(folio); - folio = NULL; - goto failed; - } - - /* - * Now swap device can only swap in order 0 folio, it is - * necessary to recalculate the new swap entry based on - * the offset, as the swapin index might be unalgined. - */ - if (order) { - offset = index - round_down(index, 1 << order); - swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); - } - - folio = shmem_swapin_cluster(swap, gfp, info, index); - if (!folio) { - error = -ENOMEM; - goto failed; + folio = shmem_swapin_cluster(swap, gfp, info, index); + if (!folio) { + error = -ENOMEM; + goto failed; + } } } -alloced: if (order > folio_order(folio)) { /* * Swapin may get smaller folios due to various reasons: From 93c0476e705768c7ca902cffea4efb500b9678b4 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 28 Jul 2025 15:53:05 +0800 Subject: [PATCH 1463/2411] mm/shmem, swap: rework swap entry and index calculation for large swapin Instead of calculating the swap entry differently in different swapin paths, calculate it early before the swap cache lookup and use that for the lookup and later swapin. And after swapin have brought a folio, simply round it down against the size of the folio. This is simple and effective enough to verify the swap value. A folio's swap entry is always aligned by its size. Any kind of parallel split or race is acceptable because the final shmem_add_to_page_cache ensures that all entries covered by the folio are correct, and thus there will be no data corruption. This also prevents false positive cache lookup. If a shmem read request's index points to the middle of a large swap entry, previously, shmem will try the swap cache lookup using the large swap entry's starting value (which is the first sub swap entry of this large entry). This will lead to false positive lookup results if only the first few swap entries are cached but the actual requested swap entry pointed by the index is uncached. This is not a rare event, as swap readahead always tries to cache order 0 folios when possible. And this shouldn't cause any increased repeated faults. Instead, no matter how the shmem mapping is split in parallel, as long as the mapping still contains the right entries, the swapin will succeed. The final object size and stack usage are also reduced due to simplified code: ./scripts/bloat-o-meter mm/shmem.o.old mm/shmem.o add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-145 (-145) Function old new delta shmem_swapin_folio 4056 3911 -145 Total: Before=33242, After=33097, chg -0.44% Stack usage (Before vs After): mm/shmem.c:2314:12:shmem_swapin_folio 264 static mm/shmem.c:2314:12:shmem_swapin_folio 256 static And while at it, round down the index too if swap entry is round down. The index is used either for folio reallocation or confirming the mapping content. In either case, it should be aligned with the swap folio. Link: https://lkml.kernel.org/r/20250728075306.12704-8-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: Dev Jain Cc: Hugh Dickins Cc: Kemeng Shi Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Signed-off-by: Andrew Morton --- mm/shmem.c | 67 +++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 0de37d014524..33d30ee5bc84 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2302,7 +2302,7 @@ static int shmem_split_large_entry(struct inode *inode, pgoff_t index, if (xas_error(&xas)) return xas_error(&xas); - return entry_order; + return 0; } /* @@ -2323,7 +2323,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, struct swap_info_struct *si; struct folio *folio = NULL; bool skip_swapcache = false; - int error, nr_pages, order, split_order; + int error, nr_pages, order; pgoff_t offset; VM_BUG_ON(!*foliop || !xa_is_value(*foliop)); @@ -2331,11 +2331,11 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, swap = index_entry; *foliop = NULL; - if (is_poisoned_swp_entry(swap)) + if (is_poisoned_swp_entry(index_entry)) return -EIO; - si = get_swap_device(swap); - order = shmem_confirm_swap(mapping, index, swap); + si = get_swap_device(index_entry); + order = shmem_confirm_swap(mapping, index, index_entry); if (unlikely(!si)) { if (order < 0) return -EEXIST; @@ -2347,6 +2347,12 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, return -EEXIST; } + /* index may point to the middle of a large entry, get the sub entry */ + if (order) { + offset = index - round_down(index, 1 << order); + swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); + } + /* Look it up and read it in.. */ folio = swap_cache_get_folio(swap, NULL, 0); if (!folio) { @@ -2359,7 +2365,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, if (data_race(si->flags & SWP_SYNCHRONOUS_IO)) { /* Direct swapin skipping swap cache & readahead */ - folio = shmem_swap_alloc_folio(inode, vma, index, swap, order, gfp); + folio = shmem_swap_alloc_folio(inode, vma, index, + index_entry, order, gfp); if (IS_ERR(folio)) { error = PTR_ERR(folio); folio = NULL; @@ -2367,16 +2374,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, } skip_swapcache = true; } else { - /* - * Cached swapin only supports order 0 folio, it is - * necessary to recalculate the new swap entry based on - * the offset, as the swapin index might be unalgined. - */ - if (order) { - offset = index - round_down(index, 1 << order); - swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); - } - + /* Cached swapin only supports order 0 folio */ folio = shmem_swapin_cluster(swap, gfp, info, index); if (!folio) { error = -ENOMEM; @@ -2384,6 +2382,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, } } } + if (order > folio_order(folio)) { /* * Swapin may get smaller folios due to various reasons: @@ -2393,24 +2392,25 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, * large swap entries. In such cases, we should split the * large swap entry to prevent possible data corruption. */ - split_order = shmem_split_large_entry(inode, index, index_entry, gfp); - if (split_order < 0) { - error = split_order; + error = shmem_split_large_entry(inode, index, index_entry, gfp); + if (error) goto failed_nolock; - } + } - /* - * If the large swap entry has already been split, it is - * necessary to recalculate the new swap entry based on - * the old order alignment. - */ - if (split_order > 0) { - offset = index - round_down(index, 1 << split_order); - swap = swp_entry(swp_type(swap), swp_offset(index_entry) + offset); - } - } else if (order < folio_order(folio)) { - swap.val = round_down(swap.val, 1 << folio_order(folio)); - index = round_down(index, 1 << folio_order(folio)); + /* + * If the folio is large, round down swap and index by folio size. + * No matter what race occurs, the swap layer ensures we either get + * a valid folio that has its swap entry aligned by size, or a + * temporarily invalid one which we'll abort very soon and retry. + * + * shmem_add_to_page_cache ensures the whole range contains expected + * entries and prevents any corruption, so any race split is fine + * too, it will succeed as long as the entries are still there. + */ + nr_pages = folio_nr_pages(folio); + if (nr_pages > 1) { + swap.val = round_down(swap.val, nr_pages); + index = round_down(index, nr_pages); } /* @@ -2446,8 +2446,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, goto failed; } - error = shmem_add_to_page_cache(folio, mapping, - round_down(index, nr_pages), + error = shmem_add_to_page_cache(folio, mapping, index, swp_to_radix_entry(swap), gfp); if (error) goto failed; From de55be42379cc0561aadfd9e1459239dea70be32 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 28 Jul 2025 15:53:06 +0800 Subject: [PATCH 1464/2411] mm/shmem, swap: fix major fault counting If the swapin failed, don't update the major fault count. There is a long existing comment for doing it this way, now with previous cleanups, we can finally fix it. Link: https://lkml.kernel.org/r/20250728075306.12704-9-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: Dev Jain Cc: Hugh Dickins Cc: Kemeng Shi Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Signed-off-by: Andrew Morton --- mm/shmem.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 33d30ee5bc84..e1e5d5f7f58d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2356,13 +2356,6 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, /* Look it up and read it in.. */ folio = swap_cache_get_folio(swap, NULL, 0); if (!folio) { - /* Or update major stats only when swapin succeeds?? */ - if (fault_type) { - *fault_type |= VM_FAULT_MAJOR; - count_vm_event(PGMAJFAULT); - count_memcg_event_mm(fault_mm, PGMAJFAULT); - } - if (data_race(si->flags & SWP_SYNCHRONOUS_IO)) { /* Direct swapin skipping swap cache & readahead */ folio = shmem_swap_alloc_folio(inode, vma, index, @@ -2381,6 +2374,11 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, goto failed; } } + if (fault_type) { + *fault_type |= VM_FAULT_MAJOR; + count_vm_event(PGMAJFAULT); + count_memcg_event_mm(fault_mm, PGMAJFAULT); + } } if (order > folio_order(folio)) { From f04fd85f15945f3ff189701050e3ce303c1a4d98 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 29 Jul 2025 12:49:06 +0100 Subject: [PATCH 1465/2411] mm: correct type for vmalloc vm_flags fields Several functions refer to the unfortunately named 'vm_flags' field when referencing vmalloc flags, which happens to be the precise same name used for VMA flags. As a result these were erroneously changed to use the vm_flags_t type (which currently is a typedef equivalent to unsigned long). Currently this has no impact, but in future when vm_flags_t changes this will result in issues, so change the type to unsigned long to account for this. [lorenzo.stoakes@oracle.com: fixup very disguised vmalloc flags parameter] Link: https://lkml.kernel.org/r/e74dd8de-7e60-47ab-8a45-2c851f3c5d26@lucifer.local Link: https://lkml.kernel.org/r/20250729114906.55347-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reported-by: Harry Yoo Closes: https://lore.kernel.org/all/aIgSpAnU8EaIcqd9@hyeyoo/ Reviewed-by: Pedro Falcato Acked-by: David Hildenbrand Reviewed-by: Harry Yoo Acked-by: Vlastimil Babka Cc: Jann Horn Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: "Uladzislau Rezki (Sony)" Signed-off-by: Andrew Morton --- arch/arm64/mm/mmu.c | 2 +- mm/execmem.c | 8 ++++---- mm/internal.h | 2 +- mm/nommu.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 20a89ab97dc5..34e5d78af076 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -721,7 +721,7 @@ void mark_rodata_ro(void) static void __init declare_vma(struct vm_struct *vma, void *va_start, void *va_end, - vm_flags_t vm_flags) + unsigned long vm_flags) { phys_addr_t pa_start = __pa_symbol(va_start); unsigned long size = va_end - va_start; diff --git a/mm/execmem.c b/mm/execmem.c index 1785d7f435e4..0822305413ec 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -26,7 +26,7 @@ static struct execmem_info default_execmem_info __ro_after_init; #ifdef CONFIG_MMU static void *execmem_vmalloc(struct execmem_range *range, size_t size, - pgprot_t pgprot, vm_flags_t vm_flags) + pgprot_t pgprot, unsigned long vm_flags) { bool kasan = range->flags & EXECMEM_KASAN_SHADOW; gfp_t gfp_flags = GFP_KERNEL | __GFP_NOWARN; @@ -82,7 +82,7 @@ struct vm_struct *execmem_vmap(size_t size) } #else static void *execmem_vmalloc(struct execmem_range *range, size_t size, - pgprot_t pgprot, vm_flags_t vm_flags) + pgprot_t pgprot, unsigned long vm_flags) { return vmalloc(size); } @@ -283,7 +283,7 @@ static void *__execmem_cache_alloc(struct execmem_range *range, size_t size) static int execmem_cache_populate(struct execmem_range *range, size_t size) { - vm_flags_t vm_flags = VM_ALLOW_HUGE_VMAP; + unsigned long vm_flags = VM_ALLOW_HUGE_VMAP; struct vm_struct *vm; size_t alloc_size; int err = -ENOMEM; @@ -465,7 +465,7 @@ void *execmem_alloc(enum execmem_type type, size_t size) { struct execmem_range *range = &execmem_info->ranges[type]; bool use_cache = range->flags & EXECMEM_ROX_CACHE; - vm_flags_t vm_flags = VM_FLUSH_RESET_PERMS; + unsigned long vm_flags = VM_FLUSH_RESET_PERMS; pgprot_t pgprot = range->pgprot; void *p = NULL; diff --git a/mm/internal.h b/mm/internal.h index 28d2d5b051df..142d9302c2ae 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1391,7 +1391,7 @@ int migrate_device_coherent_folio(struct folio *folio); struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long align, unsigned long shift, - vm_flags_t vm_flags, unsigned long start, + unsigned long vm_flags, unsigned long start, unsigned long end, int node, gfp_t gfp_mask, const void *caller); diff --git a/mm/nommu.c b/mm/nommu.c index 87e1acab0d64..07504d666d6a 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -126,7 +126,7 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags) void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, - pgprot_t prot, vm_flags_t vm_flags, int node, + pgprot_t prot, unsigned long vm_flags, int node, const void *caller) { return __vmalloc_noprof(size, gfp_mask); From a2152fef29020e740ba0276930f3a24440012505 Mon Sep 17 00:00:00 2001 From: Yadan Fan Date: Fri, 1 Aug 2025 02:14:45 +0800 Subject: [PATCH 1466/2411] mm: mempool: fix crash in mempool_free() for zero-minimum pools The mempool wake-up fix introduced in commit a5867a218d7c ("mm: mempool: fix wake-up edge case bug for zero-minimum pools") inlined the add_element() logic in mempool_free() to return the element to the zero-minimum pool: pool->elements[pool->curr_nr++] = element; This causes crash, because mempool_init_node() does not initialize with real allocation for zero-minimum pool, it only returns ZERO_SIZE_PTR to the elements array which is unable to be dereferenced, and the pre-allocation of this array never happened since the while test: while (pool->curr_nr < pool->min_nr) can never be satisfied as min_nr is zero, so the pool does not actually reserve any buffer, the only way so far is to call alloc_fn() to get buffer from SLUB, but if the memory is under high pressure the alloc_fn() could never get any buffer, the waiting thread would be in an indefinite loop of wake-sleep in a period until there is free memory to get. This patch changes mempool_init_node() to allocate 1 element for the elements array of zero-minimum pool, so that the pool will have reserved buffer to use. This will fix the crash issue and let the waiting thread can get the reserved element when alloc_fn() failed to get buffer under high memory pressure. Also modify add_element() to support zero-minimum pool with simplifying codes of zero-minimum handling in mempool_free(). Link: https://lkml.kernel.org/r/e01f00f3-58d9-4ca7-af54-bfa42fec9527@suse.com Fixes: a5867a218d7c ("mm: mempool: fix wake-up edge case bug for zero-minimum pools") Signed-off-by: Yadan Fan Signed-off-by: Andrew Morton --- mm/mempool.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/mm/mempool.c b/mm/mempool.c index 204a216b6418..1c38e873e546 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -136,7 +136,7 @@ static void kasan_unpoison_element(mempool_t *pool, void *element) static __always_inline void add_element(mempool_t *pool, void *element) { - BUG_ON(pool->curr_nr >= pool->min_nr); + BUG_ON(pool->min_nr != 0 && pool->curr_nr >= pool->min_nr); poison_element(pool, element); if (kasan_poison_element(pool, element)) pool->elements[pool->curr_nr++] = element; @@ -202,16 +202,20 @@ int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, pool->alloc = alloc_fn; pool->free = free_fn; init_waitqueue_head(&pool->wait); - - pool->elements = kmalloc_array_node(min_nr, sizeof(void *), + /* + * max() used here to ensure storage for at least 1 element to support + * zero minimum pool + */ + pool->elements = kmalloc_array_node(max(1, min_nr), sizeof(void *), gfp_mask, node_id); if (!pool->elements) return -ENOMEM; /* - * First pre-allocate the guaranteed number of buffers. + * First pre-allocate the guaranteed number of buffers, + * also pre-allocate 1 element for zero minimum pool. */ - while (pool->curr_nr < pool->min_nr) { + while (pool->curr_nr < max(1, pool->min_nr)) { void *element; element = pool->alloc(gfp_mask, pool->pool_data); @@ -555,20 +559,12 @@ void mempool_free(void *element, mempool_t *pool) * wake-up path of previous test. This explicit check ensures the * allocation of element when both min_nr and curr_nr are 0, and * any active waiters are properly awakened. - * - * Inline the same logic as previous test, add_element() cannot be - * directly used here since it has BUG_ON to deny if min_nr equals - * curr_nr, so here picked rest of add_element() to use without - * BUG_ON check. */ if (unlikely(pool->min_nr == 0 && READ_ONCE(pool->curr_nr) == 0)) { spin_lock_irqsave(&pool->lock, flags); if (likely(pool->curr_nr == 0)) { - /* Inline the logic of add_element() */ - poison_element(pool, element); - if (kasan_poison_element(pool, element)) - pool->elements[pool->curr_nr++] = element; + add_element(pool, element); spin_unlock_irqrestore(&pool->lock, flags); if (wq_has_sleeper(&pool->wait)) wake_up(&pool->wait); From af0db3c1f898144846d4c172531a199bb3ca375d Mon Sep 17 00:00:00 2001 From: Sravan Kumar Gundu Date: Thu, 31 Jul 2025 15:36:18 -0500 Subject: [PATCH 1467/2411] fbdev: Fix vmalloc out-of-bounds write in fast_imageblit This issue triggers when a userspace program does an ioctl FBIOPUT_CON2FBMAP by passing console number and frame buffer number. Ideally this maps console to frame buffer and updates the screen if console is visible. As part of mapping it has to do resize of console according to frame buffer info. if this resize fails and returns from vc_do_resize() and continues further. At this point console and new frame buffer are mapped and sets display vars. Despite failure still it continue to proceed updating the screen at later stages where vc_data is related to previous frame buffer and frame buffer info and display vars are mapped to new frame buffer and eventully leading to out-of-bounds write in fast_imageblit(). This bheviour is excepted only when fg_console is equal to requested console which is a visible console and updates screen with invalid struct references in fbcon_putcs(). Reported-and-tested-by: syzbot+c4b7aa0513823e2ea880@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c4b7aa0513823e2ea880 Signed-off-by: Sravan Kumar Gundu Cc: stable@vger.kernel.org Signed-off-by: Helge Deller --- drivers/video/fbdev/core/fbcon.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 2df48037688d..e71629618e6a 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -825,7 +825,8 @@ static void con2fb_init_display(struct vc_data *vc, struct fb_info *info, fg_vc->vc_rows); } - update_screen(vc_cons[fg_console].d); + if (fg_console != unit) + update_screen(vc_cons[fg_console].d); } /** @@ -1362,6 +1363,7 @@ static void fbcon_set_disp(struct fb_info *info, struct fb_var_screeninfo *var, struct vc_data *svc; struct fbcon_ops *ops = info->fbcon_par; int rows, cols; + unsigned long ret = 0; p = &fb_display[unit]; @@ -1412,11 +1414,10 @@ static void fbcon_set_disp(struct fb_info *info, struct fb_var_screeninfo *var, rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres); cols /= vc->vc_font.width; rows /= vc->vc_font.height; - vc_resize(vc, cols, rows); + ret = vc_resize(vc, cols, rows); - if (con_is_visible(vc)) { + if (con_is_visible(vc) && !ret) update_screen(vc); - } } static __inline__ void ywrap_up(struct vc_data *vc, int count) From e4fc307d8e24f122402907ebf585248cad52841d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 2 Aug 2025 21:34:37 +0200 Subject: [PATCH 1468/2411] Revert "vgacon: Add check for vc_origin address range in vgacon_scroll()" This reverts commit 864f9963ec6b4b76d104d595ba28110b87158003. The patch is wrong as it checks vc_origin against vc_screenbuf, while in text mode it should compare against vga_vram_base. As such it broke VGA text scrolling, which can be reproduced like this: (1) boot a kernel that is configured to use text mode VGA-console (2) type commands: ls -l /usr/bin | less -S (3) scroll up/down with cursor-down/up keys Reported-by: Jari Ruusu Cc: stable@vger.kernel.org Cc: Yi Yang Cc: GONG Ruiqi Signed-off-by: Helge Deller --- drivers/video/console/vgacon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index f9cdbf8c53e3..37bd18730fe0 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -1168,7 +1168,7 @@ static bool vgacon_scroll(struct vc_data *c, unsigned int t, unsigned int b, c->vc_screenbuf_size - delta); c->vc_origin = vga_vram_end - c->vc_screenbuf_size; vga_rolled_over = 0; - } else if (oldo - delta >= (unsigned long)c->vc_screenbuf) + } else c->vc_origin -= delta; c->vc_scr_end = c->vc_origin + c->vc_screenbuf_size; scr_memsetw((u16 *) (c->vc_origin), c->vc_video_erase_char, From cf6eb547a24af7ad7bbd2abe9c5327f956bbeae8 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:21 -0400 Subject: [PATCH 1469/2411] rtc: ds1307: fix incorrect maximum clock rate handling When ds3231_clk_sqw_round_rate() is called with a requested rate higher than the highest supported rate, it currently returns 0, which disables the clock. According to the clk API, round_rate() should instead return the highest supported rate. Update the function to return the maximum supported rate in this case. Fixes: 6c6ff145b3346 ("rtc: ds1307: add clock provider support for DS3231") Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-1-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds1307.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index ce0994d9219a..1960d1bd851c 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -1477,7 +1477,7 @@ static long ds3231_clk_sqw_round_rate(struct clk_hw *hw, unsigned long rate, return ds3231_clk_sqw_rates[i]; } - return 0; + return ds3231_clk_sqw_rates[ARRAY_SIZE(ds3231_clk_sqw_rates) - 1]; } static int ds3231_clk_sqw_set_rate(struct clk_hw *hw, unsigned long rate, From d0a518eb0a692a2ab8357e844970660c5ea37720 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:22 -0400 Subject: [PATCH 1470/2411] rtc: hym8563: fix incorrect maximum clock rate handling When hym8563_clkout_round_rate() is called with a requested rate higher than the highest supported rate, it currently returns 0, which disables the clock. According to the clk API, round_rate() should instead return the highest supported rate. Update the function to return the maximum supported rate in this case. Fixes: dcaf038493525 ("rtc: add hym8563 rtc-driver") Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-2-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-hym8563.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c index 63f11ea3589d..759dc2ad6e3b 100644 --- a/drivers/rtc/rtc-hym8563.c +++ b/drivers/rtc/rtc-hym8563.c @@ -294,7 +294,7 @@ static long hym8563_clkout_round_rate(struct clk_hw *hw, unsigned long rate, if (clkout_rates[i] <= rate) return clkout_rates[i]; - return 0; + return clkout_rates[0]; } static int hym8563_clkout_set_rate(struct clk_hw *hw, unsigned long rate, From 437c59e4b222cd697b4cf95995d933e7d583c5f1 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:23 -0400 Subject: [PATCH 1471/2411] rtc: nct3018y: fix incorrect maximum clock rate handling When nct3018y_clkout_round_rate() is called with a requested rate higher than the highest supported rate, it currently returns 0, which disables the clock. According to the clk API, round_rate() should instead return the highest supported rate. Update the function to return the maximum supported rate in this case. Fixes: 5adbaed16cc63 ("rtc: Add NCT3018Y real time clock driver") Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-3-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-nct3018y.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-nct3018y.c b/drivers/rtc/rtc-nct3018y.c index 76c5f464b2da..cea05fca0bcc 100644 --- a/drivers/rtc/rtc-nct3018y.c +++ b/drivers/rtc/rtc-nct3018y.c @@ -376,7 +376,7 @@ static long nct3018y_clkout_round_rate(struct clk_hw *hw, unsigned long rate, if (clkout_rates[i] <= rate) return clkout_rates[i]; - return 0; + return clkout_rates[0]; } static int nct3018y_clkout_set_rate(struct clk_hw *hw, unsigned long rate, From 186ae1869880e58bb3f142d222abdb35ecb4df0f Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:24 -0400 Subject: [PATCH 1472/2411] rtc: pcf85063: fix incorrect maximum clock rate handling When pcf85063_clkout_round_rate() is called with a requested rate higher than the highest supported rate, it currently returns 0, which disables the clock. According to the clk API, round_rate() should instead return the highest supported rate. Update the function to return the maximum supported rate in this case. Fixes: 8c229ab6048b7 ("rtc: pcf85063: Add pcf85063 clkout control to common clock framework") Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-4-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf85063.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index e3b58cdb1eda..e312bbbf5a42 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -414,7 +414,7 @@ static long pcf85063_clkout_round_rate(struct clk_hw *hw, unsigned long rate, if (clkout_rates[i] <= rate) return clkout_rates[i]; - return 0; + return clkout_rates[0]; } static int pcf85063_clkout_set_rate(struct clk_hw *hw, unsigned long rate, From 906726a5efeefe0ef0103ccff5312a09080c04ae Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:25 -0400 Subject: [PATCH 1473/2411] rtc: pcf8563: fix incorrect maximum clock rate handling When pcf8563_clkout_round_rate() is called with a requested rate higher than the highest supported rate, it currently returns 0, which disables the clock. According to the clk API, round_rate() should instead return the highest supported rate. Update the function to return the maximum supported rate in this case. Fixes: a39a6405d5f94 ("rtc: pcf8563: add CLKOUT to common clock framework") Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-5-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf8563.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index b2611697fa5e..a2a2067b28a1 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -339,7 +339,7 @@ static long pcf8563_clkout_round_rate(struct clk_hw *hw, unsigned long rate, if (clkout_rates[i] <= rate) return clkout_rates[i]; - return 0; + return clkout_rates[0]; } static int pcf8563_clkout_set_rate(struct clk_hw *hw, unsigned long rate, From b574acb3cf7591d2513a9f29f8c2021ad55fb881 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:26 -0400 Subject: [PATCH 1474/2411] rtc: rv3028: fix incorrect maximum clock rate handling When rv3028_clkout_round_rate() is called with a requested rate higher than the highest supported rate, it currently returns 0, which disables the clock. According to the clk API, round_rate() should instead return the highest supported rate. Update the function to return the maximum supported rate in this case. Fixes: f583c341a515f ("rtc: rv3028: add clkout support") Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-6-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rv3028.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-rv3028.c b/drivers/rtc/rtc-rv3028.c index 868d1b1eb0f4..278841c2e47e 100644 --- a/drivers/rtc/rtc-rv3028.c +++ b/drivers/rtc/rtc-rv3028.c @@ -740,7 +740,7 @@ static long rv3028_clkout_round_rate(struct clk_hw *hw, unsigned long rate, if (clkout_rates[i] <= rate) return clkout_rates[i]; - return 0; + return clkout_rates[0]; } static int rv3028_clkout_set_rate(struct clk_hw *hw, unsigned long rate, From 31b5fea399d57cea6657bc4515d1e93cd528a510 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:27 -0400 Subject: [PATCH 1475/2411] rtc: ds1307: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate() using the Coccinelle semantic patch on the cover letter of this series. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-7-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds1307.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 1960d1bd851c..7205c59ff729 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -1467,17 +1467,22 @@ static unsigned long ds3231_clk_sqw_recalc_rate(struct clk_hw *hw, return ds3231_clk_sqw_rates[rate_sel]; } -static long ds3231_clk_sqw_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int ds3231_clk_sqw_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { int i; for (i = ARRAY_SIZE(ds3231_clk_sqw_rates) - 1; i >= 0; i--) { - if (ds3231_clk_sqw_rates[i] <= rate) - return ds3231_clk_sqw_rates[i]; + if (ds3231_clk_sqw_rates[i] <= req->rate) { + req->rate = ds3231_clk_sqw_rates[i]; + + return 0; + } } - return ds3231_clk_sqw_rates[ARRAY_SIZE(ds3231_clk_sqw_rates) - 1]; + req->rate = ds3231_clk_sqw_rates[ARRAY_SIZE(ds3231_clk_sqw_rates) - 1]; + + return 0; } static int ds3231_clk_sqw_set_rate(struct clk_hw *hw, unsigned long rate, @@ -1536,7 +1541,7 @@ static const struct clk_ops ds3231_clk_sqw_ops = { .unprepare = ds3231_clk_sqw_unprepare, .is_prepared = ds3231_clk_sqw_is_prepared, .recalc_rate = ds3231_clk_sqw_recalc_rate, - .round_rate = ds3231_clk_sqw_round_rate, + .determine_rate = ds3231_clk_sqw_determine_rate, .set_rate = ds3231_clk_sqw_set_rate, }; From 394a4b920a72b032f531bc9d115ff7f4571547cb Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:28 -0400 Subject: [PATCH 1476/2411] rtc: hym8563: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate() using the Coccinelle semantic patch on the cover letter of this series. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-8-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-hym8563.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c index 759dc2ad6e3b..7a170c0f9710 100644 --- a/drivers/rtc/rtc-hym8563.c +++ b/drivers/rtc/rtc-hym8563.c @@ -285,16 +285,21 @@ static unsigned long hym8563_clkout_recalc_rate(struct clk_hw *hw, return clkout_rates[ret]; } -static long hym8563_clkout_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int hym8563_clkout_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { int i; for (i = 0; i < ARRAY_SIZE(clkout_rates); i++) - if (clkout_rates[i] <= rate) - return clkout_rates[i]; + if (clkout_rates[i] <= req->rate) { + req->rate = clkout_rates[i]; - return clkout_rates[0]; + return 0; + } + + req->rate = clkout_rates[0]; + + return 0; } static int hym8563_clkout_set_rate(struct clk_hw *hw, unsigned long rate, @@ -363,7 +368,7 @@ static const struct clk_ops hym8563_clkout_ops = { .unprepare = hym8563_clkout_unprepare, .is_prepared = hym8563_clkout_is_prepared, .recalc_rate = hym8563_clkout_recalc_rate, - .round_rate = hym8563_clkout_round_rate, + .determine_rate = hym8563_clkout_determine_rate, .set_rate = hym8563_clkout_set_rate, }; From e05d81b75efd500fda90251d745bfd83903d806b Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:29 -0400 Subject: [PATCH 1477/2411] rtc: m41t80: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate() using the Coccinelle semantic patch on the cover letter of this series. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-9-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-m41t80.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index 869358e9305b..740cab013f59 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -484,16 +484,17 @@ static unsigned long m41t80_sqw_recalc_rate(struct clk_hw *hw, return sqw_to_m41t80_data(hw)->freq; } -static long m41t80_sqw_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int m41t80_sqw_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { - if (rate >= M41T80_SQW_MAX_FREQ) - return M41T80_SQW_MAX_FREQ; - if (rate >= M41T80_SQW_MAX_FREQ / 4) - return M41T80_SQW_MAX_FREQ / 4; - if (!rate) - return 0; - return 1 << ilog2(rate); + if (req->rate >= M41T80_SQW_MAX_FREQ) + req->rate = M41T80_SQW_MAX_FREQ; + else if (req->rate >= M41T80_SQW_MAX_FREQ / 4) + req->rate = M41T80_SQW_MAX_FREQ / 4; + else if (req->rate) + req->rate = 1 << ilog2(req->rate); + + return 0; } static int m41t80_sqw_set_rate(struct clk_hw *hw, unsigned long rate, @@ -564,7 +565,7 @@ static const struct clk_ops m41t80_sqw_ops = { .unprepare = m41t80_sqw_unprepare, .is_prepared = m41t80_sqw_is_prepared, .recalc_rate = m41t80_sqw_recalc_rate, - .round_rate = m41t80_sqw_round_rate, + .determine_rate = m41t80_sqw_determine_rate, .set_rate = m41t80_sqw_set_rate, }; From 9e0dfc7962b3d0e08af98ffa6859a085dea6fca4 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:30 -0400 Subject: [PATCH 1478/2411] rtc: max31335: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate() using the Coccinelle semantic patch on the cover letter of this series. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-10-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-max31335.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-max31335.c b/drivers/rtc/rtc-max31335.c index a7bb37aaab9e..dfb5bad3a369 100644 --- a/drivers/rtc/rtc-max31335.c +++ b/drivers/rtc/rtc-max31335.c @@ -497,15 +497,17 @@ static unsigned long max31335_clkout_recalc_rate(struct clk_hw *hw, return max31335_clkout_freq[reg & freq_mask]; } -static long max31335_clkout_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int max31335_clkout_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { int index; - index = find_closest(rate, max31335_clkout_freq, + index = find_closest(req->rate, max31335_clkout_freq, ARRAY_SIZE(max31335_clkout_freq)); - return max31335_clkout_freq[index]; + req->rate = max31335_clkout_freq[index]; + + return 0; } static int max31335_clkout_set_rate(struct clk_hw *hw, unsigned long rate, @@ -554,7 +556,7 @@ static int max31335_clkout_is_enabled(struct clk_hw *hw) static const struct clk_ops max31335_clkout_ops = { .recalc_rate = max31335_clkout_recalc_rate, - .round_rate = max31335_clkout_round_rate, + .determine_rate = max31335_clkout_determine_rate, .set_rate = max31335_clkout_set_rate, .enable = max31335_clkout_enable, .disable = max31335_clkout_disable, From 1251d043f7648fd3210b383fd589d522142b9914 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:31 -0400 Subject: [PATCH 1479/2411] rtc: nct3018y: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate() using the Coccinelle semantic patch on the cover letter of this series. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-11-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-nct3018y.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-nct3018y.c b/drivers/rtc/rtc-nct3018y.c index cea05fca0bcc..cd4b1db902e9 100644 --- a/drivers/rtc/rtc-nct3018y.c +++ b/drivers/rtc/rtc-nct3018y.c @@ -367,16 +367,21 @@ static unsigned long nct3018y_clkout_recalc_rate(struct clk_hw *hw, return clkout_rates[flags]; } -static long nct3018y_clkout_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int nct3018y_clkout_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { int i; for (i = 0; i < ARRAY_SIZE(clkout_rates); i++) - if (clkout_rates[i] <= rate) - return clkout_rates[i]; + if (clkout_rates[i] <= req->rate) { + req->rate = clkout_rates[i]; - return clkout_rates[0]; + return 0; + } + + req->rate = clkout_rates[0]; + + return 0; } static int nct3018y_clkout_set_rate(struct clk_hw *hw, unsigned long rate, @@ -446,7 +451,7 @@ static const struct clk_ops nct3018y_clkout_ops = { .unprepare = nct3018y_clkout_unprepare, .is_prepared = nct3018y_clkout_is_prepared, .recalc_rate = nct3018y_clkout_recalc_rate, - .round_rate = nct3018y_clkout_round_rate, + .determine_rate = nct3018y_clkout_determine_rate, .set_rate = nct3018y_clkout_set_rate, }; From ad853657d7913458219df56d060a50993b122acc Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:32 -0400 Subject: [PATCH 1480/2411] rtc: pcf85063: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate() using the Coccinelle semantic patch on the cover letter of this series. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-12-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf85063.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index e312bbbf5a42..f643e0bd7351 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -405,16 +405,21 @@ static unsigned long pcf85063_clkout_recalc_rate(struct clk_hw *hw, return clkout_rates[buf]; } -static long pcf85063_clkout_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int pcf85063_clkout_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { int i; for (i = 0; i < ARRAY_SIZE(clkout_rates); i++) - if (clkout_rates[i] <= rate) - return clkout_rates[i]; + if (clkout_rates[i] <= req->rate) { + req->rate = clkout_rates[i]; - return clkout_rates[0]; + return 0; + } + + req->rate = clkout_rates[0]; + + return 0; } static int pcf85063_clkout_set_rate(struct clk_hw *hw, unsigned long rate, @@ -486,7 +491,7 @@ static const struct clk_ops pcf85063_clkout_ops = { .unprepare = pcf85063_clkout_unprepare, .is_prepared = pcf85063_clkout_is_prepared, .recalc_rate = pcf85063_clkout_recalc_rate, - .round_rate = pcf85063_clkout_round_rate, + .determine_rate = pcf85063_clkout_determine_rate, .set_rate = pcf85063_clkout_set_rate, }; From e6f1af719ea1ec918827d369a80e2176410b0b90 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:33 -0400 Subject: [PATCH 1481/2411] rtc: pcf8563: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate() using the Coccinelle semantic patch on the cover letter of this series. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-13-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf8563.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index a2a2067b28a1..4e61011fb7a9 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -330,16 +330,21 @@ static unsigned long pcf8563_clkout_recalc_rate(struct clk_hw *hw, return clkout_rates[buf]; } -static long pcf8563_clkout_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int pcf8563_clkout_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { int i; for (i = 0; i < ARRAY_SIZE(clkout_rates); i++) - if (clkout_rates[i] <= rate) - return clkout_rates[i]; + if (clkout_rates[i] <= req->rate) { + req->rate = clkout_rates[i]; - return clkout_rates[0]; + return 0; + } + + req->rate = clkout_rates[0]; + + return 0; } static int pcf8563_clkout_set_rate(struct clk_hw *hw, unsigned long rate, @@ -413,7 +418,7 @@ static const struct clk_ops pcf8563_clkout_ops = { .unprepare = pcf8563_clkout_unprepare, .is_prepared = pcf8563_clkout_is_prepared, .recalc_rate = pcf8563_clkout_recalc_rate, - .round_rate = pcf8563_clkout_round_rate, + .determine_rate = pcf8563_clkout_determine_rate, .set_rate = pcf8563_clkout_set_rate, }; From c4253b0914410fd18eb2fc8558e77c150e329f55 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:34 -0400 Subject: [PATCH 1482/2411] rtc: rv3028: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate() using the Coccinelle semantic patch on the cover letter of this series. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-14-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rv3028.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-rv3028.c b/drivers/rtc/rtc-rv3028.c index 278841c2e47e..c2a531f0e125 100644 --- a/drivers/rtc/rtc-rv3028.c +++ b/drivers/rtc/rtc-rv3028.c @@ -731,16 +731,21 @@ static unsigned long rv3028_clkout_recalc_rate(struct clk_hw *hw, return clkout_rates[clkout]; } -static long rv3028_clkout_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int rv3028_clkout_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { int i; for (i = 0; i < ARRAY_SIZE(clkout_rates); i++) - if (clkout_rates[i] <= rate) - return clkout_rates[i]; + if (clkout_rates[i] <= req->rate) { + req->rate = clkout_rates[i]; - return clkout_rates[0]; + return 0; + } + + req->rate = clkout_rates[0]; + + return 0; } static int rv3028_clkout_set_rate(struct clk_hw *hw, unsigned long rate, @@ -802,7 +807,7 @@ static const struct clk_ops rv3028_clkout_ops = { .unprepare = rv3028_clkout_unprepare, .is_prepared = rv3028_clkout_is_prepared, .recalc_rate = rv3028_clkout_recalc_rate, - .round_rate = rv3028_clkout_round_rate, + .determine_rate = rv3028_clkout_determine_rate, .set_rate = rv3028_clkout_set_rate, }; From 35d6aae85b3653630b43913aee15d8b35b7190c6 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:35 -0400 Subject: [PATCH 1483/2411] rtc: rv3032: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate() using the Coccinelle semantic patch on the cover letter of this series. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-15-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rv3032.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/rtc/rtc-rv3032.c b/drivers/rtc/rtc-rv3032.c index 2c6a8918acba..b8376bd1d905 100644 --- a/drivers/rtc/rtc-rv3032.c +++ b/drivers/rtc/rtc-rv3032.c @@ -646,19 +646,24 @@ static unsigned long rv3032_clkout_recalc_rate(struct clk_hw *hw, return clkout_xtal_rates[FIELD_GET(RV3032_CLKOUT2_FD_MSK, clkout)]; } -static long rv3032_clkout_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int rv3032_clkout_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { int i, hfd; - if (rate < RV3032_HFD_STEP) + if (req->rate < RV3032_HFD_STEP) for (i = 0; i < ARRAY_SIZE(clkout_xtal_rates); i++) - if (clkout_xtal_rates[i] <= rate) - return clkout_xtal_rates[i]; + if (clkout_xtal_rates[i] <= req->rate) { + req->rate = clkout_xtal_rates[i]; - hfd = DIV_ROUND_CLOSEST(rate, RV3032_HFD_STEP); + return 0; + } - return RV3032_HFD_STEP * clamp(hfd, 0, 8192); + hfd = DIV_ROUND_CLOSEST(req->rate, RV3032_HFD_STEP); + + req->rate = RV3032_HFD_STEP * clamp(hfd, 0, 8192); + + return 0; } static int rv3032_clkout_set_rate(struct clk_hw *hw, unsigned long rate, @@ -738,7 +743,7 @@ static const struct clk_ops rv3032_clkout_ops = { .unprepare = rv3032_clkout_unprepare, .is_prepared = rv3032_clkout_is_prepared, .recalc_rate = rv3032_clkout_recalc_rate, - .round_rate = rv3032_clkout_round_rate, + .determine_rate = rv3032_clkout_determine_rate, .set_rate = rv3032_clkout_set_rate, }; From bb5b0b4317c9516bdc5e9a4235e3b5f1a73b7e48 Mon Sep 17 00:00:00 2001 From: Joshua Kinard Date: Mon, 21 Jul 2025 13:00:51 -0400 Subject: [PATCH 1484/2411] rtc: ds1685: Update Joshua Kinard's email address. I am switching my address to a personal domain, so need to update the driver's files and the entry in MAINTAINERS. Signed-off-by: Joshua Kinard Link: https://lore.kernel.org/r/20250721170051.32407-1-kumba@gentoo.org Signed-off-by: Alexandre Belloni --- MAINTAINERS | 2 +- drivers/rtc/rtc-ds1685.c | 4 ++-- include/linux/rtc/ds1685.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..536befd32be8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6608,7 +6608,7 @@ S: Supported F: drivers/input/keyboard/dlink-dir685-touchkeys.c DALLAS/MAXIM DS1685-FAMILY REAL TIME CLOCK -M: Joshua Kinard +M: Joshua Kinard S: Maintained F: drivers/rtc/rtc-ds1685.c F: include/linux/rtc/ds1685.h diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c index 38e25f63597a..97423f1d0361 100644 --- a/drivers/rtc/rtc-ds1685.c +++ b/drivers/rtc/rtc-ds1685.c @@ -3,7 +3,7 @@ * An rtc driver for the Dallas/Maxim DS1685/DS1687 and related real-time * chips. * - * Copyright (C) 2011-2014 Joshua Kinard . + * Copyright (C) 2011-2014 Joshua Kinard . * Copyright (C) 2009 Matthias Fuchs . * * References: @@ -1436,7 +1436,7 @@ EXPORT_SYMBOL_GPL(ds1685_rtc_poweroff); /* ----------------------------------------------------------------------- */ -MODULE_AUTHOR("Joshua Kinard "); +MODULE_AUTHOR("Joshua Kinard "); MODULE_AUTHOR("Matthias Fuchs "); MODULE_DESCRIPTION("Dallas/Maxim DS1685/DS1687-series RTC driver"); MODULE_LICENSE("GPL"); diff --git a/include/linux/rtc/ds1685.h b/include/linux/rtc/ds1685.h index 5a41c3bbcbe3..01da4582db6d 100644 --- a/include/linux/rtc/ds1685.h +++ b/include/linux/rtc/ds1685.h @@ -8,7 +8,7 @@ * include larger, battery-backed NV-SRAM, burst-mode access, and an RTC * write counter. * - * Copyright (C) 2011-2014 Joshua Kinard . + * Copyright (C) 2011-2014 Joshua Kinard . * Copyright (C) 2009 Matthias Fuchs . * * References: From 987ca60637a46882a026ca9cc9f3e5f26e8aec28 Mon Sep 17 00:00:00 2001 From: Wang Jinchao Date: Mon, 7 Jul 2025 09:26:57 +0800 Subject: [PATCH 1485/2411] md/raid1: change r1conf->r1bio_pool to a pointer type In raid1_reshape(), newpool is a stack variable. mempool_init() initializes newpool->wait with the stack address. After assigning newpool to conf->r1bio_pool, the wait queue need to be reinitialized, which is not ideal. Change raid1_conf->r1bio_pool to a pointer type and replace mempool_init() with mempool_create_kmalloc_pool() to avoid referencing a stack-based wait queue. Signed-off-by: Wang Jinchao Link: https://lore.kernel.org/linux-raid/20250707012711.376844-2-yukuai1@huaweicloud.com Signed-off-by: Yu Kuai --- drivers/md/raid1.c | 39 ++++++++++++++++++--------------------- drivers/md/raid1.h | 2 +- 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 6cee738a645f..589223fef20a 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -255,7 +255,7 @@ static void free_r1bio(struct r1bio *r1_bio) struct r1conf *conf = r1_bio->mddev->private; put_all_bios(conf, r1_bio); - mempool_free(r1_bio, &conf->r1bio_pool); + mempool_free(r1_bio, conf->r1bio_pool); } static void put_buf(struct r1bio *r1_bio) @@ -1305,9 +1305,8 @@ alloc_r1bio(struct mddev *mddev, struct bio *bio) struct r1conf *conf = mddev->private; struct r1bio *r1_bio; - r1_bio = mempool_alloc(&conf->r1bio_pool, GFP_NOIO); - /* Ensure no bio records IO_BLOCKED */ - memset(r1_bio->bios, 0, conf->raid_disks * sizeof(r1_bio->bios[0])); + r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); + memset(r1_bio, 0, offsetof(struct r1bio, bios[conf->raid_disks * 2])); init_r1bio(r1_bio, mddev, bio); return r1_bio; } @@ -3085,6 +3084,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) int i; struct raid1_info *disk; struct md_rdev *rdev; + size_t r1bio_size; int err = -ENOMEM; conf = kzalloc(sizeof(struct r1conf), GFP_KERNEL); @@ -3125,9 +3125,10 @@ static struct r1conf *setup_conf(struct mddev *mddev) if (!conf->poolinfo) goto abort; conf->poolinfo->raid_disks = mddev->raid_disks * 2; - err = mempool_init(&conf->r1bio_pool, NR_RAID_BIOS, r1bio_pool_alloc, - rbio_pool_free, conf->poolinfo); - if (err) + + r1bio_size = offsetof(struct r1bio, bios[mddev->raid_disks * 2]); + conf->r1bio_pool = mempool_create_kmalloc_pool(NR_RAID_BIOS, r1bio_size); + if (!conf->r1bio_pool) goto abort; err = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0); @@ -3198,7 +3199,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) abort: if (conf) { - mempool_exit(&conf->r1bio_pool); + mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); safe_put_page(conf->tmppage); kfree(conf->poolinfo); @@ -3311,7 +3312,7 @@ static void raid1_free(struct mddev *mddev, void *priv) { struct r1conf *conf = priv; - mempool_exit(&conf->r1bio_pool); + mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); safe_put_page(conf->tmppage); kfree(conf->poolinfo); @@ -3367,17 +3368,14 @@ static int raid1_reshape(struct mddev *mddev) * At the same time, we "pack" the devices so that all the missing * devices have the higher raid_disk numbers. */ - mempool_t newpool, oldpool; + mempool_t *newpool, *oldpool; struct pool_info *newpoolinfo; + size_t new_r1bio_size; struct raid1_info *newmirrors; struct r1conf *conf = mddev->private; int cnt, raid_disks; unsigned long flags; int d, d2; - int ret; - - memset(&newpool, 0, sizeof(newpool)); - memset(&oldpool, 0, sizeof(oldpool)); /* Cannot change chunk_size, layout, or level */ if (mddev->chunk_sectors != mddev->new_chunk_sectors || @@ -3409,18 +3407,18 @@ static int raid1_reshape(struct mddev *mddev) newpoolinfo->mddev = mddev; newpoolinfo->raid_disks = raid_disks * 2; - ret = mempool_init(&newpool, NR_RAID_BIOS, r1bio_pool_alloc, - rbio_pool_free, newpoolinfo); - if (ret) { + new_r1bio_size = offsetof(struct r1bio, bios[raid_disks * 2]); + newpool = mempool_create_kmalloc_pool(NR_RAID_BIOS, new_r1bio_size); + if (!newpool) { kfree(newpoolinfo); - return ret; + return -ENOMEM; } newmirrors = kzalloc(array3_size(sizeof(struct raid1_info), raid_disks, 2), GFP_KERNEL); if (!newmirrors) { kfree(newpoolinfo); - mempool_exit(&newpool); + mempool_destroy(newpool); return -ENOMEM; } @@ -3429,7 +3427,6 @@ static int raid1_reshape(struct mddev *mddev) /* ok, everything is stopped */ oldpool = conf->r1bio_pool; conf->r1bio_pool = newpool; - init_waitqueue_head(&conf->r1bio_pool.wait); for (d = d2 = 0; d < conf->raid_disks; d++) { struct md_rdev *rdev = conf->mirrors[d].rdev; @@ -3461,7 +3458,7 @@ static int raid1_reshape(struct mddev *mddev) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); - mempool_exit(&oldpool); + mempool_destroy(oldpool); return 0; } diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 33f318fcc268..652c347b1a70 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -118,7 +118,7 @@ struct r1conf { * mempools - it changes when the array grows or shrinks */ struct pool_info *poolinfo; - mempool_t r1bio_pool; + mempool_t *r1bio_pool; mempool_t r1buf_pool; struct bio_set bio_split; From 178d1391c5ce0fc829f3e748058acab9bd9ca4f4 Mon Sep 17 00:00:00 2001 From: Wang Jinchao Date: Mon, 7 Jul 2025 09:26:58 +0800 Subject: [PATCH 1486/2411] md/raid1: remove struct pool_info and related code The struct pool_info was originally introduced mainly to support reshape operations, serving as a parameter for mempool_init() when raid_disks changes. Now that mempool_create_kmalloc_pool() is sufficient for this purpose, struct pool_info and its related code are no longer needed. Remove struct pool_info and all associated code. Signed-off-by: Wang Jinchao Link: https://lore.kernel.org/linux-raid/20250707012711.376844-3-yukuai1@huaweicloud.com Signed-off-by: Yu Kuai --- drivers/md/raid1.c | 49 +++++++++++++--------------------------------- drivers/md/raid1.h | 20 ------------------- 2 files changed, 14 insertions(+), 55 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 589223fef20a..408c26398321 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -127,10 +127,9 @@ static inline struct r1bio *get_resync_r1bio(struct bio *bio) return get_resync_pages(bio)->raid_bio; } -static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) +static void *r1bio_pool_alloc(gfp_t gfp_flags, struct r1conf *conf) { - struct pool_info *pi = data; - int size = offsetof(struct r1bio, bios[pi->raid_disks]); + int size = offsetof(struct r1bio, bios[conf->raid_disks * 2]); /* allocate a r1bio with room for raid_disks entries in the bios array */ return kzalloc(size, gfp_flags); @@ -145,18 +144,18 @@ static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) { - struct pool_info *pi = data; + struct r1conf *conf = data; struct r1bio *r1_bio; struct bio *bio; int need_pages; int j; struct resync_pages *rps; - r1_bio = r1bio_pool_alloc(gfp_flags, pi); + r1_bio = r1bio_pool_alloc(gfp_flags, conf); if (!r1_bio) return NULL; - rps = kmalloc_array(pi->raid_disks, sizeof(struct resync_pages), + rps = kmalloc_array(conf->raid_disks * 2, sizeof(struct resync_pages), gfp_flags); if (!rps) goto out_free_r1bio; @@ -164,7 +163,7 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) /* * Allocate bios : 1 for reading, n-1 for writing */ - for (j = pi->raid_disks ; j-- ; ) { + for (j = conf->raid_disks * 2; j-- ; ) { bio = bio_kmalloc(RESYNC_PAGES, gfp_flags); if (!bio) goto out_free_bio; @@ -177,11 +176,11 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) * If this is a user-requested check/repair, allocate * RESYNC_PAGES for each bio. */ - if (test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) - need_pages = pi->raid_disks; + if (test_bit(MD_RECOVERY_REQUESTED, &conf->mddev->recovery)) + need_pages = conf->raid_disks * 2; else need_pages = 1; - for (j = 0; j < pi->raid_disks; j++) { + for (j = 0; j < conf->raid_disks * 2; j++) { struct resync_pages *rp = &rps[j]; bio = r1_bio->bios[j]; @@ -207,7 +206,7 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) resync_free_pages(&rps[j]); out_free_bio: - while (++j < pi->raid_disks) { + while (++j < conf->raid_disks * 2) { bio_uninit(r1_bio->bios[j]); kfree(r1_bio->bios[j]); } @@ -220,12 +219,12 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) static void r1buf_pool_free(void *__r1_bio, void *data) { - struct pool_info *pi = data; + struct r1conf *conf = data; int i; struct r1bio *r1bio = __r1_bio; struct resync_pages *rp = NULL; - for (i = pi->raid_disks; i--; ) { + for (i = conf->raid_disks * 2; i--; ) { rp = get_resync_pages(r1bio->bios[i]); resync_free_pages(rp); bio_uninit(r1bio->bios[i]); @@ -2746,7 +2745,7 @@ static int init_resync(struct r1conf *conf) BUG_ON(mempool_initialized(&conf->r1buf_pool)); return mempool_init(&conf->r1buf_pool, buffs, r1buf_pool_alloc, - r1buf_pool_free, conf->poolinfo); + r1buf_pool_free, conf); } static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf) @@ -2756,7 +2755,7 @@ static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf) struct bio *bio; int i; - for (i = conf->poolinfo->raid_disks; i--; ) { + for (i = conf->raid_disks * 2; i--; ) { bio = r1bio->bios[i]; rps = bio->bi_private; bio_reset(bio, NULL, 0); @@ -3121,11 +3120,6 @@ static struct r1conf *setup_conf(struct mddev *mddev) if (!conf->tmppage) goto abort; - conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL); - if (!conf->poolinfo) - goto abort; - conf->poolinfo->raid_disks = mddev->raid_disks * 2; - r1bio_size = offsetof(struct r1bio, bios[mddev->raid_disks * 2]); conf->r1bio_pool = mempool_create_kmalloc_pool(NR_RAID_BIOS, r1bio_size); if (!conf->r1bio_pool) @@ -3135,8 +3129,6 @@ static struct r1conf *setup_conf(struct mddev *mddev) if (err) goto abort; - conf->poolinfo->mddev = mddev; - err = -EINVAL; spin_lock_init(&conf->device_lock); conf->raid_disks = mddev->raid_disks; @@ -3202,7 +3194,6 @@ static struct r1conf *setup_conf(struct mddev *mddev) mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); safe_put_page(conf->tmppage); - kfree(conf->poolinfo); kfree(conf->nr_pending); kfree(conf->nr_waiting); kfree(conf->nr_queued); @@ -3315,7 +3306,6 @@ static void raid1_free(struct mddev *mddev, void *priv) mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); safe_put_page(conf->tmppage); - kfree(conf->poolinfo); kfree(conf->nr_pending); kfree(conf->nr_waiting); kfree(conf->nr_queued); @@ -3369,7 +3359,6 @@ static int raid1_reshape(struct mddev *mddev) * devices have the higher raid_disk numbers. */ mempool_t *newpool, *oldpool; - struct pool_info *newpoolinfo; size_t new_r1bio_size; struct raid1_info *newmirrors; struct r1conf *conf = mddev->private; @@ -3401,23 +3390,15 @@ static int raid1_reshape(struct mddev *mddev) return -EBUSY; } - newpoolinfo = kmalloc(sizeof(*newpoolinfo), GFP_KERNEL); - if (!newpoolinfo) - return -ENOMEM; - newpoolinfo->mddev = mddev; - newpoolinfo->raid_disks = raid_disks * 2; - new_r1bio_size = offsetof(struct r1bio, bios[raid_disks * 2]); newpool = mempool_create_kmalloc_pool(NR_RAID_BIOS, new_r1bio_size); if (!newpool) { - kfree(newpoolinfo); return -ENOMEM; } newmirrors = kzalloc(array3_size(sizeof(struct raid1_info), raid_disks, 2), GFP_KERNEL); if (!newmirrors) { - kfree(newpoolinfo); mempool_destroy(newpool); return -ENOMEM; } @@ -3443,8 +3424,6 @@ static int raid1_reshape(struct mddev *mddev) } kfree(conf->mirrors); conf->mirrors = newmirrors; - kfree(conf->poolinfo); - conf->poolinfo = newpoolinfo; spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded += (raid_disks - conf->raid_disks); diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 652c347b1a70..d236ef179cfb 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -49,22 +49,6 @@ struct raid1_info { sector_t seq_start; }; -/* - * memory pools need a pointer to the mddev, so they can force an unplug - * when memory is tight, and a count of the number of drives that the - * pool was allocated for, so they know how much to allocate and free. - * mddev->raid_disks cannot be used, as it can change while a pool is active - * These two datums are stored in a kmalloced struct. - * The 'raid_disks' here is twice the raid_disks in r1conf. - * This allows space for each 'real' device can have a replacement in the - * second half of the array. - */ - -struct pool_info { - struct mddev *mddev; - int raid_disks; -}; - struct r1conf { struct mddev *mddev; struct raid1_info *mirrors; /* twice 'raid_disks' to @@ -114,10 +98,6 @@ struct r1conf { */ int recovery_disabled; - /* poolinfo contains information about the content of the - * mempools - it changes when the array grows or shrinks - */ - struct pool_info *poolinfo; mempool_t *r1bio_pool; mempool_t r1buf_pool; From 13017b427118f4311471ee47df74872372ca8482 Mon Sep 17 00:00:00 2001 From: Yang Erkun Date: Thu, 31 Jul 2025 19:45:30 +0800 Subject: [PATCH 1487/2411] md: make rdev_addable usable for rcu mode Our testcase trigger panic: BUG: kernel NULL pointer dereference, address: 00000000000000e0 ... Oops: Oops: 0000 [#1] SMP NOPTI CPU: 2 UID: 0 PID: 85 Comm: kworker/2:1 Not tainted 6.16.0+ #94 PREEMPT(none) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.1-2.fc37 04/01/2014 Workqueue: md_misc md_start_sync RIP: 0010:rdev_addable+0x4d/0xf0 ... Call Trace: md_start_sync+0x329/0x480 process_one_work+0x226/0x6d0 worker_thread+0x19e/0x340 kthread+0x10f/0x250 ret_from_fork+0x14d/0x180 ret_from_fork_asm+0x1a/0x30 Modules linked in: raid10 CR2: 00000000000000e0 ---[ end trace 0000000000000000 ]--- RIP: 0010:rdev_addable+0x4d/0xf0 md_spares_need_change in md_start_sync will call rdev_addable which protected by rcu_read_lock/rcu_read_unlock. This rcu context will help protect rdev won't be released, but rdev->mddev will be set to NULL before we call synchronize_rcu in md_kick_rdev_from_array. Fix this by using READ_ONCE and check does rdev->mddev still alive. Fixes: bc08041b32ab ("md: suspend array in md_start_sync() if array need reconfiguration") Fixes: 570b9147deb6 ("md: use RCU lock to protect traversal in md_spares_need_change()") Signed-off-by: Yang Erkun Link: https://lore.kernel.org/linux-raid/20250731114530.776670-1-yangerkun@huawei.com Signed-off-by: Yu Kuai --- drivers/md/md.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 9c7ed23c45ad..ac85ec73a409 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9427,6 +9427,12 @@ static bool rdev_is_spare(struct md_rdev *rdev) static bool rdev_addable(struct md_rdev *rdev) { + struct mddev *mddev; + + mddev = READ_ONCE(rdev->mddev); + if (!mddev) + return false; + /* rdev is already used, don't add it again. */ if (test_bit(Candidate, &rdev->flags) || rdev->raid_disk >= 0 || test_bit(Faulty, &rdev->flags)) @@ -9437,7 +9443,7 @@ static bool rdev_addable(struct md_rdev *rdev) return true; /* Allow to add if array is read-write. */ - if (md_is_rdwr(rdev->mddev)) + if (md_is_rdwr(mddev)) return true; /* From 7c527c15cdda2e0a26a05ac15a44d3e14738fc55 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 3 Aug 2025 21:20:12 +0900 Subject: [PATCH 1488/2411] firewire: core: use reference counting to invoke address handlers safely The lifetime of address handler has been managed by linked list and RCU. This approach was introduced in commit 35202f7d8420 ("firewire: remove global lock around address handlers, convert to RCU"). The invocations of address handler are performed within RCU read-side critical sections. In commit 57e6d9f85fff ("firewire: ohci: use workqueue to handle events of AR request/response contexts"), the invocations are in a workqueue context. The approach still imposes limitation that sleeping is not allowed within RCU read-side critical sections. However, since sleeping is not permitted within RCU read-side critical sections, this approach still has a limitation. This commit adds reference counting to decouple handler invocation from handler discovery. The linked list and RCU is used to discover the handlers, while the reference counting is used to invoke them safely. Link: https://lore.kernel.org/r/20250803122015.236493-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 32 +++++++++++++++++++++++++++-- include/linux/firewire.h | 4 ++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index d28477d84697..29ca9f3f14ce 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -550,6 +550,23 @@ const struct fw_address_region fw_unit_space_region = { .start = 0xfffff0000900ULL, .end = 0x1000000000000ULL, }; #endif /* 0 */ +static void complete_address_handler(struct kref *kref) +{ + struct fw_address_handler *handler = container_of(kref, struct fw_address_handler, kref); + + complete(&handler->done); +} + +static void get_address_handler(struct fw_address_handler *handler) +{ + kref_get(&handler->kref); +} + +static int put_address_handler(struct fw_address_handler *handler) +{ + return kref_put(&handler->kref, complete_address_handler); +} + /** * fw_core_add_address_handler() - register for incoming requests * @handler: callback @@ -596,6 +613,8 @@ int fw_core_add_address_handler(struct fw_address_handler *handler, if (other != NULL) { handler->offset += other->length; } else { + init_completion(&handler->done); + kref_init(&handler->kref); list_add_tail_rcu(&handler->link, &address_handler_list); ret = 0; break; @@ -621,6 +640,9 @@ void fw_core_remove_address_handler(struct fw_address_handler *handler) list_del_rcu(&handler->link); synchronize_rcu(); + + if (!put_address_handler(handler)) + wait_for_completion(&handler->done); } EXPORT_SYMBOL(fw_core_remove_address_handler); @@ -913,10 +935,13 @@ static void handle_exclusive_region_request(struct fw_card *card, scoped_guard(rcu) { handler = lookup_enclosing_address_handler(&address_handler_list, offset, request->length); - if (handler) + if (handler) { + get_address_handler(handler); handler->address_callback(card, request, tcode, destination, source, p->generation, offset, request->data, request->length, handler->callback_data); + put_address_handler(handler); + } } if (!handler) @@ -952,10 +977,13 @@ static void handle_fcp_region_request(struct fw_card *card, scoped_guard(rcu) { list_for_each_entry_rcu(handler, &address_handler_list, link) { - if (is_enclosing_handler(handler, offset, request->length)) + if (is_enclosing_handler(handler, offset, request->length)) { + get_address_handler(handler); handler->address_callback(card, request, tcode, destination, source, p->generation, offset, request->data, request->length, handler->callback_data); + put_address_handler(handler); + } } } diff --git a/include/linux/firewire.h b/include/linux/firewire.h index cceb70415ed2..d38c6e538e5c 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -341,7 +341,11 @@ struct fw_address_handler { u64 length; fw_address_callback_t address_callback; void *callback_data; + + // Only for core functions. struct list_head link; + struct kref kref; + struct completion done; }; struct fw_address_region { From e8cf6875005b017c293bf1b9be707c43f3eff9f4 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 3 Aug 2025 21:20:13 +0900 Subject: [PATCH 1489/2411] firewire: core: call handler for exclusive regions outside RCU read-side critical section The previous commit added reference counting to ensure safe invocations of address handlers. This commit moves the invocation of handlers for exclusive regions outside of the RCU read-side critical section. The address handler for the requested region is selected within the critical section, then invoked outside of it. Link: https://lore.kernel.org/r/20250803122015.236493-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 29ca9f3f14ce..a742971c65fa 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -935,17 +935,19 @@ static void handle_exclusive_region_request(struct fw_card *card, scoped_guard(rcu) { handler = lookup_enclosing_address_handler(&address_handler_list, offset, request->length); - if (handler) { + if (handler) get_address_handler(handler); - handler->address_callback(card, request, tcode, destination, source, - p->generation, offset, request->data, - request->length, handler->callback_data); - put_address_handler(handler); - } } - if (!handler) + if (!handler) { fw_send_response(card, request, RCODE_ADDRESS_ERROR); + return; + } + + // Outside the RCU read-side critical section. Without spinlock. With reference count. + handler->address_callback(card, request, tcode, destination, source, p->generation, offset, + request->data, request->length, handler->callback_data); + put_address_handler(handler); } static void handle_fcp_region_request(struct fw_card *card, From e884a8a0c573ca5c191b269f31993733ecb6250e Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 3 Aug 2025 21:20:14 +0900 Subject: [PATCH 1490/2411] firewire: core: call FCP address handlers outside RCU read-side critical section The former commit added reference counting to ensure safe invocations of address handlers. Unlike the exclusive-region address handlers, all FCP address handlers should be called on receiving an FCP request. This commit uses the part of kernel stack to collect address handlers up to 4 within the section, then invoke them outside of the section. Reference counting ensures that each handler remains valid and safe to call. Lifting the limitation of supporting only 4 handlers is left for next work. Link: https://lore.kernel.org/r/20250803122015.236493-4-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index a742971c65fa..7a62c660e912 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -950,13 +950,17 @@ static void handle_exclusive_region_request(struct fw_card *card, put_address_handler(handler); } +// To use kmalloc allocator efficiently, this should be power of two. +#define BUFFER_ON_KERNEL_STACK_SIZE 4 + static void handle_fcp_region_request(struct fw_card *card, struct fw_packet *p, struct fw_request *request, unsigned long long offset) { - struct fw_address_handler *handler; - int tcode, destination, source; + struct fw_address_handler *buffer_on_kernel_stack[BUFFER_ON_KERNEL_STACK_SIZE]; + struct fw_address_handler *handler, **handlers; + int tcode, destination, source, i, count; if ((offset != (CSR_REGISTER_BASE | CSR_FCP_COMMAND) && offset != (CSR_REGISTER_BASE | CSR_FCP_RESPONSE)) || @@ -977,18 +981,27 @@ static void handle_fcp_region_request(struct fw_card *card, return; } + count = 0; + handlers = buffer_on_kernel_stack; scoped_guard(rcu) { list_for_each_entry_rcu(handler, &address_handler_list, link) { if (is_enclosing_handler(handler, offset, request->length)) { get_address_handler(handler); - handler->address_callback(card, request, tcode, destination, source, - p->generation, offset, request->data, - request->length, handler->callback_data); - put_address_handler(handler); + handlers[count] = handler; + if (++count >= ARRAY_SIZE(buffer_on_kernel_stack)) + break; } } } + for (i = 0; i < count; ++i) { + handler = handlers[i]; + handler->address_callback(card, request, tcode, destination, source, + p->generation, offset, request->data, + request->length, handler->callback_data); + put_address_handler(handler); + } + fw_send_response(card, request, RCODE_COMPLETE); } From 0342273e14c25971f2916de2b598db2e9cfeec15 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 3 Aug 2025 21:20:15 +0900 Subject: [PATCH 1491/2411] firewire: core: reallocate buffer for FCP address handlers when more than 4 are registered The former commit has a limitation that only up to 4 FCP address handlers could be processed per request. Although it suffices for most use cases, it is technically a regression. This commit lifts the restriction by reallocating the buffer from kernel heap when more than 4 handlers are registered. The allocation is performed within RCU read-side critical section, thus it uses GCP_ATOMIC flag. The buffer size is rounded up to the next power of two to align with kmalloc allocation units. Link: https://lore.kernel.org/r/20250803122015.236493-5-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 36 +++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 7a62c660e912..1d1c2d8f85ae 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -960,7 +960,7 @@ static void handle_fcp_region_request(struct fw_card *card, { struct fw_address_handler *buffer_on_kernel_stack[BUFFER_ON_KERNEL_STACK_SIZE]; struct fw_address_handler *handler, **handlers; - int tcode, destination, source, i, count; + int tcode, destination, source, i, count, buffer_size; if ((offset != (CSR_REGISTER_BASE | CSR_FCP_COMMAND) && offset != (CSR_REGISTER_BASE | CSR_FCP_RESPONSE)) || @@ -983,13 +983,38 @@ static void handle_fcp_region_request(struct fw_card *card, count = 0; handlers = buffer_on_kernel_stack; + buffer_size = ARRAY_SIZE(buffer_on_kernel_stack); scoped_guard(rcu) { list_for_each_entry_rcu(handler, &address_handler_list, link) { if (is_enclosing_handler(handler, offset, request->length)) { + if (count >= buffer_size) { + int next_size = buffer_size * 2; + struct fw_address_handler **buffer_on_kernel_heap; + + if (handlers == buffer_on_kernel_stack) + buffer_on_kernel_heap = NULL; + else + buffer_on_kernel_heap = handlers; + + buffer_on_kernel_heap = + krealloc_array(buffer_on_kernel_heap, next_size, + sizeof(*buffer_on_kernel_heap), GFP_ATOMIC); + // FCP is used for purposes unrelated to significant system + // resources (e.g. storage or networking), so allocation + // failures are not considered so critical. + if (!buffer_on_kernel_heap) + break; + + if (handlers == buffer_on_kernel_stack) { + memcpy(buffer_on_kernel_heap, buffer_on_kernel_stack, + sizeof(buffer_on_kernel_stack)); + } + + handlers = buffer_on_kernel_heap; + buffer_size = next_size; + } get_address_handler(handler); - handlers[count] = handler; - if (++count >= ARRAY_SIZE(buffer_on_kernel_stack)) - break; + handlers[count++] = handler; } } } @@ -1002,6 +1027,9 @@ static void handle_fcp_region_request(struct fw_card *card, put_address_handler(handler); } + if (handlers != buffer_on_kernel_stack) + kfree(handlers); + fw_send_response(card, request, RCODE_COMPLETE); } From 41fee4f0036734bec427659f749e44cfe1821565 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Sun, 3 Aug 2025 22:49:47 +0800 Subject: [PATCH 1492/2411] LoongArch: Complete KSave registers definition According to the "LoongArch Reference Manual Volume 1: Basic Architecture", the KSave registers (SAVE0-SAVE15) are defined in Section 7.4.16 "Data Save (SAVE)" and listed in Table 7-1 "Control and Status Registers Overview". These registers occupy the CSR addresses from 0x30 to 0x3F, with 16 registers in total. This patch completes the definitions of KS9 to KS15, so as to match the architecture specification. Reviewed-by: Wentao Guan Signed-off-by: Yanteng Si Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/loongarch.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index a0994d226eff..09dfd7eb406e 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -451,6 +451,13 @@ #define LOONGARCH_CSR_KS6 0x36 #define LOONGARCH_CSR_KS7 0x37 #define LOONGARCH_CSR_KS8 0x38 +#define LOONGARCH_CSR_KS9 0x39 +#define LOONGARCH_CSR_KS10 0x3a +#define LOONGARCH_CSR_KS11 0x3b +#define LOONGARCH_CSR_KS12 0x3c +#define LOONGARCH_CSR_KS13 0x3d +#define LOONGARCH_CSR_KS14 0x3e +#define LOONGARCH_CSR_KS15 0x3f /* Exception allocated KS0, KS1 and KS2 statically */ #define EXCEPTION_KS0 LOONGARCH_CSR_KS0 From a1a81b5477196ca1290b367404a461e046e647d5 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 3 Aug 2025 22:49:47 +0800 Subject: [PATCH 1493/2411] LoongArch: Make relocate_new_kernel_size be a .quad value Now relocate_new_kernel_size is a .long value, which means 32bit, so its high 32bit is undefined. This causes memcpy((void *)reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size) in machine_kexec_prepare() access out of range memories in some cases, and then end up with an ADE exception. So make relocate_new_kernel_size be a .quad value, which means 64bit, to avoid such errors. Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen --- arch/loongarch/kernel/relocate_kernel.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S index 84e6de2fd973..8b5140ac9ea1 100644 --- a/arch/loongarch/kernel/relocate_kernel.S +++ b/arch/loongarch/kernel/relocate_kernel.S @@ -109,4 +109,4 @@ SYM_CODE_END(kexec_smp_wait) relocate_new_kernel_end: .section ".data" -SYM_DATA(relocate_new_kernel_size, .long relocate_new_kernel_end - relocate_new_kernel) +SYM_DATA(relocate_new_kernel_size, .quad relocate_new_kernel_end - relocate_new_kernel) From 243f8de49f076d56ee88d6f03b6221984cc63668 Mon Sep 17 00:00:00 2001 From: Ming Wang Date: Sun, 3 Aug 2025 22:49:47 +0800 Subject: [PATCH 1494/2411] LoongArch: Support mem= kernel parameter The LoongArch mem= parameter parser was previously limited to the mem=@ format. This was inconvenient for the common use case of simply capping the total system memory, as it forced users to manually specify a start address. It was also inconsistent with the behavior on other architectures. This patch enhances the parser in early_parse_mem() to also support the more user-friendly mem= format. The implementation now checks for the presence of the '@' symbol to determine the user's intent: - If mem= is provided (no '@'), the kernel now calls memblock_enforce_memory_limit(). This trims memory from the top down to the specified size. - If mem=@ is provided, the original behavior is retained for backward compatibility. This allows for defining specific memory banks. This change introduces an important usage rule reflected in the code's comments: the mem= format should only be specified once on the kernel command line. It acts as a single, global cap on total memory. In contrast, the mem=@ format can be specified multiple times to define several distinct memory regions. Signed-off-by: Ming Wang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/setup.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index b99fbb388fe0..b0be56482368 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -191,6 +191,16 @@ static int __init early_parse_mem(char *p) return -EINVAL; } + start = 0; + size = memparse(p, &p); + if (*p == '@') /* Every mem=... should contain '@' */ + start = memparse(p + 1, &p); + else { /* Only one mem=... is allowed if no '@' */ + usermem = 1; + memblock_enforce_memory_limit(size); + return 0; + } + /* * If a user specifies memory size, we * blow away any automatically generated @@ -201,14 +211,6 @@ static int __init early_parse_mem(char *p) memblock_remove(memblock_start_of_DRAM(), memblock_end_of_DRAM() - memblock_start_of_DRAM()); } - start = 0; - size = memparse(p, &p); - if (*p == '@') - start = memparse(p + 1, &p); - else { - pr_err("Invalid format!\n"); - return -EINVAL; - } if (!IS_ENABLED(CONFIG_NUMA)) memblock_add(start, size); From 70a2365e18affc5ebdaab1ca6a0b3c4f3aac2ee8 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Sun, 3 Aug 2025 22:49:47 +0800 Subject: [PATCH 1495/2411] LoongArch: Avoid in-place string operation on FDT content In init_cpu_fullname(), a constant pointer to "model" property is retrieved. It's later modified by the strsep() function, which is illegal and corrupts kernel's FDT copy. This is shown by dmesg, OF: fdt: not creating '/sys/firmware/fdt': CRC check failed Create a mutable copy of the model property and do in-place operations on the mutable copy instead. loongson_sysconf.cpuname lives across the kernel lifetime, thus manually releasing isn't necessary. Also move the of_node_put() call for the root node after the usage of its property, since of_node_put() decreases the reference counter thus usage after the call is unsafe. Cc: stable@vger.kernel.org Fixes: 44a01f1f726a ("LoongArch: Parsing CPU-related information from DTS") Reviewed-by: Jiaxun Yang Signed-off-by: Yao Zi Signed-off-by: Huacai Chen --- arch/loongarch/kernel/env.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c index 27144de5c5fe..c0a5dc9aeae2 100644 --- a/arch/loongarch/kernel/env.c +++ b/arch/loongarch/kernel/env.c @@ -39,16 +39,19 @@ void __init init_environ(void) static int __init init_cpu_fullname(void) { - struct device_node *root; int cpu, ret; - char *model; + char *cpuname; + const char *model; + struct device_node *root; /* Parsing cpuname from DTS model property */ root = of_find_node_by_path("/"); - ret = of_property_read_string(root, "model", (const char **)&model); + ret = of_property_read_string(root, "model", &model); + if (ret == 0) { + cpuname = kstrdup(model, GFP_KERNEL); + loongson_sysconf.cpuname = strsep(&cpuname, " "); + } of_node_put(root); - if (ret == 0) - loongson_sysconf.cpuname = strsep(&model, " "); if (loongson_sysconf.cpuname && !strncmp(loongson_sysconf.cpuname, "Loongson", 8)) { for (cpu = 0; cpu < NR_CPUS; cpu++) From 2362e8124ed21445c6886806e5deaee717629ddd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 3 Aug 2025 22:49:47 +0800 Subject: [PATCH 1496/2411] LoongArch: Don't use %pK through printk() in unwinder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the past %pK was preferable to %p as it would not leak raw pointer values into the kernel log. Since commit ad67b74d2469 ("printk: hash addresses printed with %p") the regular %p has been improved to avoid this issue. Furthermore, restricted pointers ("%pK") were never meant to be used through printk(). They can still unintentionally leak raw pointers or acquire sleeping locks in atomic contexts. Switch to the regular pointer formatting which is safer and easier to reason about. Cc: stable@vger.kernel.org Signed-off-by: Thomas Weißschuh Signed-off-by: Huacai Chen --- arch/loongarch/kernel/unwind_orc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c index 0005be49b056..0d5fa64a2225 100644 --- a/arch/loongarch/kernel/unwind_orc.c +++ b/arch/loongarch/kernel/unwind_orc.c @@ -508,7 +508,7 @@ bool unwind_next_frame(struct unwind_state *state) state->pc = bt_address(pc); if (!state->pc) { - pr_err("cannot find unwind pc at %pK\n", (void *)pc); + pr_err("cannot find unwind pc at %p\n", (void *)pc); goto err; } From 6ab55e0a9eac638ca390bfaef6408c10c127e623 Mon Sep 17 00:00:00 2001 From: Chenghao Duan Date: Sun, 3 Aug 2025 22:49:50 +0800 Subject: [PATCH 1497/2411] LoongArch: Add larch_insn_gen_{beq,bne} helpers Add larch_insn_gen_beq() and larch_insn_gen_bne() helpers which will be used in BPF trampoline implementation. Reviewed-by: Hengqi Chen Co-developed-by: George Guo Signed-off-by: George Guo Co-developed-by: Youling Tang Signed-off-by: Youling Tang Signed-off-by: Chenghao Duan Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/inst.h | 2 ++ arch/loongarch/kernel/inst.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 3089785ca97e..47d190595587 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -510,6 +510,8 @@ u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj); u32 larch_insn_gen_lu12iw(enum loongarch_gpr rd, int imm); u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm); u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); +u32 larch_insn_gen_beq(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); +u32 larch_insn_gen_bne(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); static inline bool signed_imm_check(long val, unsigned int bit) diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index 14d7d700bcb9..e957b0f69688 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -323,6 +323,34 @@ u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) return insn.word; } +u32 larch_insn_gen_beq(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + union loongarch_instruction insn; + + if ((imm & 3) || imm < -SZ_128K || imm >= SZ_128K) { + pr_warn("The generated beq instruction is out of range.\n"); + return INSN_BREAK; + } + + emit_beq(&insn, rj, rd, imm >> 2); + + return insn.word; +} + +u32 larch_insn_gen_bne(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + union loongarch_instruction insn; + + if ((imm & 3) || imm < -SZ_128K || imm >= SZ_128K) { + pr_warn("The generated bne instruction is out of range.\n"); + return INSN_BREAK; + } + + emit_bne(&insn, rj, rd, imm >> 2); + + return insn.word; +} + u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) { union loongarch_instruction insn; From b644c640923b625340c603cdb8d8f456406eb4de Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 1 Aug 2025 09:18:58 +0200 Subject: [PATCH 1498/2411] Revert "gpio: pxa: Make irq_chip immutable" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 20117cf426b6 ("gpio: pxa: Make irq_chip immutableas") as it caused a regression on samsung coreprimevelte and we've not been able to fix it so far. Cc: stable@vger.kernel.org # v6.16 Fixes: 20117cf426b6 ("gpio: pxa: Make irq_chip immutableas") Reported-by: Duje Mihanović Closes: https://lore.kernel.org/all/3367665.aeNJFYEL58@radijator/ Tested-by: Duje Mihanović Link: https://lore.kernel.org/r/20250801071858.7554-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pxa.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index 13f7da2a9486..cbcdd416f8b9 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -499,8 +499,6 @@ static void pxa_mask_muxed_gpio(struct irq_data *d) gfer = readl_relaxed(base + GFER_OFFSET) & ~GPIO_bit(gpio); writel_relaxed(grer, base + GRER_OFFSET); writel_relaxed(gfer, base + GFER_OFFSET); - - gpiochip_disable_irq(&pchip->chip, gpio); } static int pxa_gpio_set_wake(struct irq_data *d, unsigned int on) @@ -520,21 +518,17 @@ static void pxa_unmask_muxed_gpio(struct irq_data *d) unsigned int gpio = irqd_to_hwirq(d); struct pxa_gpio_bank *c = gpio_to_pxabank(&pchip->chip, gpio); - gpiochip_enable_irq(&pchip->chip, gpio); - c->irq_mask |= GPIO_bit(gpio); update_edge_detect(c); } -static const struct irq_chip pxa_muxed_gpio_chip = { +static struct irq_chip pxa_muxed_gpio_chip = { .name = "GPIO", .irq_ack = pxa_ack_muxed_gpio, .irq_mask = pxa_mask_muxed_gpio, .irq_unmask = pxa_unmask_muxed_gpio, .irq_set_type = pxa_gpio_irq_type, .irq_set_wake = pxa_gpio_set_wake, - .flags = IRQCHIP_IMMUTABLE, - GPIOCHIP_IRQ_RESOURCE_HELPERS, }; static int pxa_gpio_nums(struct platform_device *pdev) From 63c7bc53a35e785accdc2ceab8f72d94501931ab Mon Sep 17 00:00:00 2001 From: David Thompson Date: Mon, 28 Jul 2025 10:46:19 -0400 Subject: [PATCH 1499/2411] gpio: mlxbf2: use platform_get_irq_optional() The gpio-mlxbf2 driver interfaces with four GPIO controllers, device instances 0-3. There are two IRQ resources shared between the four controllers, and they are found in the ACPI table for instances 0 and 3. The driver should not use platform_get_irq(), otherwise this error is logged when probing instances 1 and 2: mlxbf2_gpio MLNXBF22:01: error -ENXIO: IRQ index 0 not found Fixes: 2b725265cb08 ("gpio: mlxbf2: Introduce IRQ support") Cc: stable@vger.kernel.org Signed-off-by: David Thompson Reviewed-by: Shravan Kumar Ramani Reviewed-by: Mika Westerberg Link: https://lore.kernel.org/r/20250728144619.29894-1-davthompson@nvidia.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mlxbf2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-mlxbf2.c b/drivers/gpio/gpio-mlxbf2.c index 6f3dda6b635f..390f2e74a9d8 100644 --- a/drivers/gpio/gpio-mlxbf2.c +++ b/drivers/gpio/gpio-mlxbf2.c @@ -397,7 +397,7 @@ mlxbf2_gpio_probe(struct platform_device *pdev) gc->ngpio = npins; gc->owner = THIS_MODULE; - irq = platform_get_irq(pdev, 0); + irq = platform_get_irq_optional(pdev, 0); if (irq >= 0) { girq = &gs->gc.irq; gpio_irq_chip_set_chip(girq, &mlxbf2_gpio_irq_chip); From 533210f23936a482010016ac3f57995046c58565 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 31 Jul 2025 16:10:38 +0800 Subject: [PATCH 1500/2411] nfs/localio: use read_seqbegin() rather than read_seqbegin_or_lock() The usage of read_seqbegin_or_lock() in nfs_copy_boot_verifier() is wrong. "seq" is always even and thus "or_lock" has no effect. nfs_copy_boot_verifier() just copies 8 bytes and is supposed to be very rare operation, so we do not need the adaptive locking in this case. Signed-off-by: Li RongQing Link: https://lore.kernel.org/r/20250731081038.3478-1-lirongqing@baidu.com Signed-off-by: Trond Myklebust --- fs/nfs/localio.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 510d0a16cfe9..bd5fca285899 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -500,14 +500,13 @@ nfs_copy_boot_verifier(struct nfs_write_verifier *verifier, struct inode *inode) { struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; u32 *verf = (u32 *)verifier->data; - int seq = 0; + unsigned int seq; do { - read_seqbegin_or_lock(&clp->cl_boot_lock, &seq); + seq = read_seqbegin(&clp->cl_boot_lock); verf[0] = (u32)clp->cl_nfssvc_boot.tv_sec; verf[1] = (u32)clp->cl_nfssvc_boot.tv_nsec; - } while (need_seqretry(&clp->cl_boot_lock, seq)); - done_seqretry(&clp->cl_boot_lock, seq); + } while (read_seqretry(&clp->cl_boot_lock, seq)); } static void From cc5d59081fa26506d02de2127ab822f40d88bc5a Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 31 Jul 2025 14:00:56 -0400 Subject: [PATCH 1501/2411] sunrpc: fix client side handling of tls alerts A security exploit was discovered in NFS over TLS in tls_alert_recv due to its assumption that there is valid data in the msghdr's iterator's kvec. Instead, this patch proposes the rework how control messages are setup and used by sock_recvmsg(). If no control message structure is setup, kTLS layer will read and process TLS data record types. As soon as it encounters a TLS control message, it would return an error. At that point, NFS can setup a kvec backed control buffer and read in the control message such as a TLS alert. Scott found that a msg iterator can advance the kvec pointer as a part of the copy process thus we need to revert the iterator before calling into the tls_alert_recv. Fixes: dea034b963c8 ("SUNRPC: Capture CMSG metadata on client-side receive") Suggested-by: Trond Myklebust Suggested-by: Scott Mayhew Signed-off-by: Olga Kornievskaia Link: https://lore.kernel.org/r/20250731180058.4669-3-okorniev@redhat.com Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 04ff66758fc3..c5f7bbf5775f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -358,7 +358,7 @@ xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp) static int xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg, - struct cmsghdr *cmsg, int ret) + unsigned int *msg_flags, struct cmsghdr *cmsg, int ret) { u8 content_type = tls_get_record_type(sock->sk, cmsg); u8 level, description; @@ -371,7 +371,7 @@ xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg, * record, even though there might be more frames * waiting to be decrypted. */ - msg->msg_flags &= ~MSG_EOR; + *msg_flags &= ~MSG_EOR; break; case TLS_RECORD_TYPE_ALERT: tls_alert_recv(sock->sk, msg, &level, &description); @@ -386,19 +386,33 @@ xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg, } static int -xs_sock_recv_cmsg(struct socket *sock, struct msghdr *msg, int flags) +xs_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags, int flags) { union { struct cmsghdr cmsg; u8 buf[CMSG_SPACE(sizeof(u8))]; } u; + u8 alert[2]; + struct kvec alert_kvec = { + .iov_base = alert, + .iov_len = sizeof(alert), + }; + struct msghdr msg = { + .msg_flags = *msg_flags, + .msg_control = &u, + .msg_controllen = sizeof(u), + }; int ret; - msg->msg_control = &u; - msg->msg_controllen = sizeof(u); - ret = sock_recvmsg(sock, msg, flags); - if (msg->msg_controllen != sizeof(u)) - ret = xs_sock_process_cmsg(sock, msg, &u.cmsg, ret); + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1, + alert_kvec.iov_len); + ret = sock_recvmsg(sock, &msg, flags); + if (ret > 0 && + tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) { + iov_iter_revert(&msg.msg_iter, ret); + ret = xs_sock_process_cmsg(sock, &msg, msg_flags, &u.cmsg, + -EAGAIN); + } return ret; } @@ -408,7 +422,13 @@ xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek) ssize_t ret; if (seek != 0) iov_iter_advance(&msg->msg_iter, seek); - ret = xs_sock_recv_cmsg(sock, msg, flags); + ret = sock_recvmsg(sock, msg, flags); + /* Handle TLS inband control message lazily */ + if (msg->msg_flags & MSG_CTRUNC) { + msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR); + if (ret == 0 || ret == -EIO) + ret = xs_sock_recv_cmsg(sock, &msg->msg_flags, flags); + } return ret > 0 ? ret + seek : ret; } @@ -434,7 +454,7 @@ xs_read_discard(struct socket *sock, struct msghdr *msg, int flags, size_t count) { iov_iter_discard(&msg->msg_iter, ITER_DEST, count); - return xs_sock_recv_cmsg(sock, msg, flags); + return xs_sock_recvmsg(sock, msg, flags, 0); } #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE From 65cf62cd62af27386606ed25054f78480c2f7fc7 Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Thu, 12 Jun 2025 21:11:29 +0000 Subject: [PATCH 1502/2411] i2c: apple: Drop default ARCH_APPLE in Kconfig When the first driver for Apple Silicon was upstreamed we accidentally included `default ARCH_APPLE` in its Kconfig which then spread to almost every subsequent driver. As soon as ARCH_APPLE is set to y this will pull in many drivers as built-ins which is not what we want. Thus, drop `default ARCH_APPLE` from Kconfig. Signed-off-by: Sven Peter Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index c8d115b58e44..070d014fdc5d 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -992,7 +992,6 @@ config I2C_APPLE tristate "Apple SMBus platform driver" depends on !I2C_PASEMI depends on ARCH_APPLE || COMPILE_TEST - default ARCH_APPLE help Say Y here if you want to use the I2C controller present on Apple Silicon chips such as the M1. From 0b7c9528facdb5a73ad78fea86d2e95a6c48dbc4 Mon Sep 17 00:00:00 2001 From: "fangzhong.zhou" Date: Sun, 3 Aug 2025 07:15:54 +0800 Subject: [PATCH 1503/2411] i2c: Force DLL0945 touchpad i2c freq to 100khz This patch fixes an issue where the touchpad cursor movement becomes slow on the Dell Precision 5560. Force the touchpad freq to 100khz as a workaround. Tested on Dell Precision 5560 with 6.14 to 6.14.6. Cursor movement is now smooth and responsive. Signed-off-by: fangzhong.zhou [wsa: kept sorting and removed unnecessary parts from commit msg] Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-acpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index 3445cc3b476b..ed90858a27b7 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -370,6 +370,7 @@ static const struct acpi_device_id i2c_acpi_force_100khz_device_ids[] = { * the device works without issues on Windows at what is expected to be * a 400KHz frequency. The root cause of the issue is not known. */ + { "DLL0945", 0 }, { "ELAN06FA", 0 }, {} }; From 33ac5155891cab165c93b51b0e22e153eacc2ee7 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 30 Jul 2025 21:38:02 +0200 Subject: [PATCH 1504/2411] i2c: muxes: mule: Fix an error handling path in mule_i2c_mux_probe() If an error occurs in the loop that creates the device adapters, then a reference to 'dev' still needs to be released. Use for_each_child_of_node_scoped() to both fix the issue and save one line of code. Fixes: d0f8e97866bf ("i2c: muxes: add support for tsd,mule-i2c multiplexer") Signed-off-by: Christophe JAILLET Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-mux-mule.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-mule.c b/drivers/i2c/muxes/i2c-mux-mule.c index 284ff4afeeac..d3b32b794172 100644 --- a/drivers/i2c/muxes/i2c-mux-mule.c +++ b/drivers/i2c/muxes/i2c-mux-mule.c @@ -47,7 +47,6 @@ static int mule_i2c_mux_probe(struct platform_device *pdev) struct mule_i2c_reg_mux *priv; struct i2c_client *client; struct i2c_mux_core *muxc; - struct device_node *dev; unsigned int readback; int ndev, ret; bool old_fw; @@ -95,7 +94,7 @@ static int mule_i2c_mux_probe(struct platform_device *pdev) "Failed to register mux remove\n"); /* Create device adapters */ - for_each_child_of_node(mux_dev->of_node, dev) { + for_each_child_of_node_scoped(mux_dev->of_node, dev) { u32 reg; ret = of_property_read_u32(dev, "reg", ®); From 584460393efbcccb6388b1cd5d37284b5326709c Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Wed, 16 Jul 2025 20:55:55 +0800 Subject: [PATCH 1505/2411] drm/bridge: Describe the newly introduced drm_connector parameter for drm_bridge_detect This fix the make htmldocs warnings: drivers/gpu/drm/drm_bridge.c:1242: warning: Function parameter or struct member 'connector' not described in 'drm_bridge_detect' Fixes: 5d156a9c3d5e ("drm/bridge: Pass down connector to drm bridge detect hook") Signed-off-by: Andy Yan Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250716125602.3166573-1-andyshrk@163.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/drm_bridge.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c index dd45d9b504d8..4bde00083047 100644 --- a/drivers/gpu/drm/drm_bridge.c +++ b/drivers/gpu/drm/drm_bridge.c @@ -1227,6 +1227,7 @@ EXPORT_SYMBOL(drm_atomic_bridge_chain_check); /** * drm_bridge_detect - check if anything is attached to the bridge output * @bridge: bridge control structure + * @connector: attached connector * * If the bridge supports output detection, as reported by the * DRM_BRIDGE_OP_DETECT bridge ops flag, call &drm_bridge_funcs.detect for the From 5f49c2d1f422c660c726ac5e0499c66c901633c2 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 1 Aug 2025 20:36:06 -0700 Subject: [PATCH 1506/2411] apparmor: fix: oops when trying to free null ruleset profile allocation is wrongly setting the number of entries on the rules vector before any ruleset is assigned. If profile allocation fails between ruleset allocation and assigning the first ruleset, free_ruleset() will be called with a null pointer resulting in an oops. [ 107.350226] kernel BUG at mm/slub.c:545! [ 107.350912] Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 107.351447] CPU: 1 UID: 0 PID: 27 Comm: ksoftirqd/1 Not tainted 6.14.6-hwe-rlee287-dev+ #5 [ 107.353279] Hardware name:[ 107.350218] -QE-----------[ cutMU here ]--------- Ub--- [ 107.3502untu26] kernel BUG a 24t mm/slub.c:545.!04 P [ 107.350912]C ( Oops: invalid oi4pcode: 0000 [#1]40 PREEMPT SMP NOPFXTI + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 107.356054] RIP: 0010:__slab_free+0x152/0x340 [ 107.356444] Code: 00 4c 89 ff e8 0f ac df 00 48 8b 14 24 48 8b 4c 24 20 48 89 44 24 08 48 8b 03 48 c1 e8 09 83 e0 01 88 44 24 13 e9 71 ff ff ff <0f> 0b 41 f7 44 24 08 87 04 00 00 75 b2 eb a8 41 f7 44 24 08 87 04 [ 107.357856] RSP: 0018:ffffad4a800fbbb0 EFLAGS: 00010246 [ 107.358937] RAX: ffff97ebc2a88e70 RBX: ffffd759400aa200 RCX: 0000000000800074 [ 107.359976] RDX: ffff97ebc2a88e60 RSI: ffffd759400aa200 RDI: ffffad4a800fbc20 [ 107.360600] RBP: ffffad4a800fbc50 R08: 0000000000000001 R09: ffffffff86f02cf2 [ 107.361254] R10: 0000000000000000 R11: 0000000000000000 R12: ffff97ecc0049400 [ 107.361934] R13: ffff97ebc2a88e60 R14: ffff97ecc0049400 R15: 0000000000000000 [ 107.362597] FS: 0000000000000000(0000) GS:ffff97ecfb200000(0000) knlGS:0000000000000000 [ 107.363332] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 107.363784] CR2: 000061c9545ac000 CR3: 0000000047aa6000 CR4: 0000000000750ef0 [ 107.364331] PKRU: 55555554 [ 107.364545] Call Trace: [ 107.364761] [ 107.364931] ? local_clock+0x15/0x30 [ 107.365219] ? srso_alias_return_thunk+0x5/0xfbef5 [ 107.365593] ? kfree_sensitive+0x32/0x70 [ 107.365900] kfree+0x29d/0x3a0 [ 107.366144] ? srso_alias_return_thunk+0x5/0xfbef5 [ 107.366510] ? local_clock_noinstr+0xe/0xd0 [ 107.366841] ? srso_alias_return_thunk+0x5/0xfbef5 [ 107.367209] kfree_sensitive+0x32/0x70 [ 107.367502] aa_free_profile.part.0+0xa2/0x400 [ 107.367850] ? rcu_do_batch+0x1e6/0x5e0 [ 107.368148] aa_free_profile+0x23/0x60 [ 107.368438] label_free_switch+0x4c/0x80 [ 107.368751] label_free_rcu+0x1c/0x50 [ 107.369038] rcu_do_batch+0x1e8/0x5e0 [ 107.369324] ? rcu_do_batch+0x157/0x5e0 [ 107.369626] rcu_core+0x1b0/0x2f0 [ 107.369888] rcu_core_si+0xe/0x20 [ 107.370156] handle_softirqs+0x9b/0x3d0 [ 107.370460] ? smpboot_thread_fn+0x26/0x210 [ 107.370790] run_ksoftirqd+0x3a/0x70 [ 107.371070] smpboot_thread_fn+0xf9/0x210 [ 107.371383] ? __pfx_smpboot_thread_fn+0x10/0x10 [ 107.371746] kthread+0x10d/0x280 [ 107.372010] ? __pfx_kthread+0x10/0x10 [ 107.372310] ret_from_fork+0x44/0x70 [ 107.372655] ? __pfx_kthread+0x10/0x10 [ 107.372974] ret_from_fork_asm+0x1a/0x30 [ 107.373316] [ 107.373505] Modules linked in: af_packet_diag mptcp_diag tcp_diag udp_diag raw_diag inet_diag snd_seq_dummy snd_hrtimer snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq snd_seq_device snd_timer snd soundcore qrtr binfmt_misc intel_rapl_msr intel_rapl_common kvm_amd ccp kvm irqbypass polyval_clmulni polyval_generic ghash_clmulni_intel sha256_ssse3 sha1_ssse3 aesni_intel crypto_simd cryptd i2c_piix4 i2c_smbus input_leds joydev sch_fq_codel msr parport_pc ppdev lp parport efi_pstore nfnetlink vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vsock vmw_vmci dmi_sysfs qemu_fw_cfg ip_tables x_tables autofs4 hid_generic usbhid hid psmouse serio_raw floppy bochs pata_acpi [ 107.379086] ---[ end trace 0000000000000000 ]--- Don't set the count until a ruleset is actually allocated and guard against free_ruleset() being called with a null pointer. Reported-by: Ryan Lee Fixes: 217af7e2f4de ("apparmor: refactor profile rules and attachments") Signed-off-by: John Johansen --- security/apparmor/policy.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 261a9d3a0afe..50d5345ff5cb 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -243,6 +243,9 @@ static void free_ruleset(struct aa_ruleset *rules) { int i; + if (!rules) + return; + aa_put_pdb(rules->file); aa_put_pdb(rules->policy); aa_free_cap_rules(&rules->caps); @@ -335,7 +338,6 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy, profile = kzalloc(struct_size(profile, label.rules, 1), gfp); if (!profile) return NULL; - profile->n_rules = 1; if (!aa_policy_init(&profile->base, NULL, hname, gfp)) goto fail; @@ -346,6 +348,7 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy, profile->label.rules[0] = aa_alloc_ruleset(gfp); if (!profile->label.rules[0]) goto fail; + profile->n_rules = 1; /* update being set needed by fs interface */ if (!proxy) { From 407728da41cd6450cec6a4277027015a75744d56 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 2 Aug 2025 09:25:59 +0200 Subject: [PATCH 1507/2411] block, bfq: Reorder struct bfq_iocq_bfqq_data The size of struct bfq_iocq_bfqq_data can be reduced by moving a few fields around. On a x86_64, with allmodconfig, this shrinks the size from 144 to 128 bytes. The main benefit is to reduce the size of struct bfq_io_cq from 1360 to 1232. This structure is stored in a dedicated slab cache. So reducing its size improves cache usage. Signed-off-by: Christophe JAILLET Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/79394db1befaa658e8066b8e3348073ce27d9d26.1754119538.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jens Axboe --- block/bfq-iosched.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 687a3a7ba784..0b4704932d72 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -427,9 +427,6 @@ struct bfq_iocq_bfqq_data { */ bool saved_IO_bound; - u64 saved_io_start_time; - u64 saved_tot_idle_time; - /* * Same purpose as the previous fields for the values of the * field keeping the queue's belonging to a large burst @@ -450,6 +447,9 @@ struct bfq_iocq_bfqq_data { */ unsigned int saved_weight; + u64 saved_io_start_time; + u64 saved_tot_idle_time; + /* * Similar to previous fields: save wr information. */ @@ -457,13 +457,13 @@ struct bfq_iocq_bfqq_data { unsigned long saved_last_wr_start_finish; unsigned long saved_service_from_wr; unsigned long saved_wr_start_at_switch_to_srt; - unsigned int saved_wr_cur_max_time; struct bfq_ttime saved_ttime; + unsigned int saved_wr_cur_max_time; /* Save also injection state */ - u64 saved_last_serv_time_ns; unsigned int saved_inject_limit; unsigned long saved_decrease_time_jif; + u64 saved_last_serv_time_ns; /* candidate queue for a stable merge (due to close creation time) */ struct bfq_queue *stable_merge_bfqq; From df9bdd4381be100a778c63a5dd810dc82b8c2705 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 22 Jul 2025 20:26:16 +0200 Subject: [PATCH 1508/2411] drm/xe/pf: Enable SR-IOV PF mode by default We already claim official support for SR-IOV PF/VF modes on PTL and BMG platforms, but by default we start the Xe driver on those platforms in non-virtualized mode (native) since we still have max_vfs modparam set to disable creation of the VFs. It's time to let the Xe driver support SR-IOV PF mode by default. We were already testing this on our CI, which was relying on the patch that was enabling it for CONFIG_DRM_XE_DEBUG used by our CI. Signed-off-by: Michal Wajdeczko Cc: Thomas Hellstrom Cc: Lucas De Marchi Cc: Rodrigo Vivi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250722182618.30811-3-michal.wajdeczko@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit a2b461bd6f3b36bded0a74178dec0e58e4714d3d) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_module.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 107ffe87808c..d9391bd08194 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -27,6 +27,8 @@ #define DEFAULT_PROBE_DISPLAY true #define DEFAULT_VRAM_BAR_SIZE 0 #define DEFAULT_FORCE_PROBE CONFIG_DRM_XE_FORCE_PROBE +#define DEFAULT_MAX_VFS ~0 +#define DEFAULT_MAX_VFS_STR "unlimited" #define DEFAULT_WEDGED_MODE 1 #define DEFAULT_SVM_NOTIFIER_SIZE 512 @@ -34,6 +36,9 @@ struct xe_modparam xe_modparam = { .probe_display = DEFAULT_PROBE_DISPLAY, .guc_log_level = DEFAULT_GUC_LOG_LEVEL, .force_probe = DEFAULT_FORCE_PROBE, +#ifdef CONFIG_PCI_IOV + .max_vfs = DEFAULT_MAX_VFS, +#endif .wedged_mode = DEFAULT_WEDGED_MODE, .svm_notifier_size = DEFAULT_SVM_NOTIFIER_SIZE, /* the rest are 0 by default */ @@ -79,7 +84,8 @@ MODULE_PARM_DESC(force_probe, module_param_named(max_vfs, xe_modparam.max_vfs, uint, 0400); MODULE_PARM_DESC(max_vfs, "Limit number of Virtual Functions (VFs) that could be managed. " - "(0 = no VFs [default]; N = allow up to N VFs)"); + "(0=no VFs; N=allow up to N VFs " + "[default=" DEFAULT_MAX_VFS_STR "])"); #endif module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600); From 465f1dba74c010995190bff267ae5a75afcdcfea Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Fri, 1 Aug 2025 10:53:56 +0530 Subject: [PATCH 1509/2411] drm/xe/devcoredump: Defer devcoredump initialization during probe Doing devcoredump initializing before GT though look harmless, it leads to problem during driver unbind. Because of this order, GT/Engine release functions will be called before xe devcoredump release function (xe_driver_devcoredump_fini) leading to the following kernel crash[1] because the devcoredump functions might still use GT/Engine datastructures after those are freed. The following crash is observed while running the IGT xe_wedged@wedged-at-any-timeout. The test forces a wedged state by submitting a workload which hangs. Then does a unbind/rebind of the driver to recover from the wedged state. The hanged workload leads to a devcoredump. The following crash is noticed when the devcoredump capture races with the driver unbind. During driver unbind, the release function hw_engine_fini() will be called which assigns NULL to hwe->gt. But the same data structure is accessed during the coredump capture in the function xe_engine_snapshot_print by reading snapshot->hwe->gt. With this patch, we make sure the devcoredump is stopped before deinitializing the core driver functions. [1]: BUG: kernel NULL pointer dereference, address: 0000000000000000 Workqueue: events_unbound xe_devcoredump_deferred_snap_work [xe] RIP: 0010:xe_engine_snapshot_print+0x47/0x420 [xe] Call Trace: ? drm_printf+0x64/0x90 __xe_devcoredump_read+0x23f/0x2d0 [xe] ? __pfx___drm_printfn_coredump+0x10/0x10 ? __pfx___drm_puts_coredump+0x10/0x10 xe_devcoredump_deferred_snap_work+0x17a/0x190 [xe] process_one_work+0x22e/0x6f0 worker_thread+0x1e8/0x3d0 ? __pfx_worker_thread+0x10/0x10 kthread+0x11f/0x250 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x47/0x70 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 v2: Detailed commit description (Rodrigo) v3: FIXME added (Rodrigo, Stuart) Fixes: 4209d635a823 ("drm/xe: Remove devcoredump during driver release") Reviewed-by: Stuart Summers Link: https://lore.kernel.org/r/20250731061300.14320-1-balasubramani.vivekanandan@intel.com Signed-off-by: Balasubramani Vivekanandan Link: https://lore.kernel.org/r/20250801052356.21885-1-balasubramani.vivekanandan@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 1fdc4c381ff765479d76ccf3134717c430c871b8) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 8 ++++---- drivers/gpu/drm/xe/xe_guc_capture.c | 6 ++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 5bd2f7d7b4ea..6ece4defa9df 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -802,10 +802,6 @@ int xe_device_probe(struct xe_device *xe) return err; } - err = xe_devcoredump_init(xe); - if (err) - return err; - /* * From here on, if a step fails, make sure a Driver-FLR is triggereed */ @@ -870,6 +866,10 @@ int xe_device_probe(struct xe_device *xe) XE_WA(xe->tiles->media_gt, 15015404425_disable)) XE_DEVICE_WA_DISABLE(xe, 15015404425); + err = xe_devcoredump_init(xe); + if (err) + return err; + xe_nvm_init(xe); err = xe_heci_gsc_init(xe); diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index 859a3ba91be5..243dad3e2418 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -1817,6 +1817,12 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm str_yes_no(snapshot->kernel_reserved)); for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) { + /* + * FIXME: During devcoredump print we should avoid accessing the + * driver pointers for gt or engine. Printing should be done only + * using the snapshot captured. Here we are accessing the gt + * pointer. It should be fixed. + */ list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type, capture_class, false); snapshot_print_by_list_order(snapshot, p, type, list); From c286ce6b01f633806b4db3e4ec8e0162928299cd Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 1 Aug 2025 16:28:20 +0200 Subject: [PATCH 1510/2411] drm/xe/pf: Disable PF restart worker on device removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can't let restart worker run once device is removed, since other data that it might want to access could be already released. Explicitly disable worker as part of device cleanup action. Fixes: a4d1c5d0b99b ("drm/xe/pf: Move VFs reprovisioning to worker") Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Cc: Jonathan Cavitt Link: https://lore.kernel.org/r/20250801142822.180530-2-michal.wajdeczko@intel.com (cherry picked from commit a424353937c24554bb242a6582ed8f018b4a411c) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_sriov_pf.c | 32 ++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index 35489fa81825..2ea81d81c0ae 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -47,9 +47,16 @@ static int pf_alloc_metadata(struct xe_gt *gt) static void pf_init_workers(struct xe_gt *gt) { + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); INIT_WORK(>->sriov.pf.workers.restart, pf_worker_restart_func); } +static void pf_fini_workers(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + disable_work_sync(>->sriov.pf.workers.restart); +} + /** * xe_gt_sriov_pf_init_early - Prepare SR-IOV PF data structures on PF. * @gt: the &xe_gt to initialize @@ -79,6 +86,21 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt) return 0; } +static void pf_fini_action(void *arg) +{ + struct xe_gt *gt = arg; + + pf_fini_workers(gt); +} + +static int pf_init_late(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + return devm_add_action_or_reset(xe->drm.dev, pf_fini_action, gt); +} + /** * xe_gt_sriov_pf_init - Prepare SR-IOV PF data structures on PF. * @gt: the &xe_gt to initialize @@ -95,7 +117,15 @@ int xe_gt_sriov_pf_init(struct xe_gt *gt) if (err) return err; - return xe_gt_sriov_pf_migration_init(gt); + err = xe_gt_sriov_pf_migration_init(gt); + if (err) + return err; + + err = pf_init_late(gt); + if (err) + return err; + + return 0; } static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe) From cb7a3f949aa4804ed8e0553d0196a4ce048ff7b8 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 1 Aug 2025 16:28:21 +0200 Subject: [PATCH 1511/2411] drm/xe/pf: Make sure PF is ready to configure VFs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PF driver might be resumed just to configure VFs, but since it is doing some asynchronous GuC reconfigurations after fresh reset, we should wait until all pending works are completed. This is especially important in case of LMEM provisioning, since we also need to update the LMTT and send invalidation requests to all GuCs, which are expected to be already in the VGT mode. Fixes: 68ae022278a1 ("drm/xe/pf: Force GuC virtualization mode") Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Cc: Jonathan Cavitt Link: https://lore.kernel.org/r/20250801142822.180530-3-michal.wajdeczko@intel.com (cherry picked from commit c6c86441c465ea440dfb5039f1c26e629a6fd64c) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_sriov_pf.c | 25 +++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf.h | 1 + drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 4 ++- drivers/gpu/drm/xe/xe_pci_sriov.c | 7 +++++- drivers/gpu/drm/xe/xe_sriov_pf.c | 27 +++++++++++++++++++++ drivers/gpu/drm/xe/xe_sriov_pf.h | 1 + 6 files changed, 63 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index 2ea81d81c0ae..bdbd15f3afe3 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -16,6 +16,7 @@ #include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_pf_service.h" #include "xe_gt_sriov_printk.h" +#include "xe_guc_submit.h" #include "xe_mmio.h" #include "xe_pm.h" @@ -260,3 +261,27 @@ void xe_gt_sriov_pf_restart(struct xe_gt *gt) { pf_queue_restart(gt); } + +static void pf_flush_restart(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + flush_work(>->sriov.pf.workers.restart); +} + +/** + * xe_gt_sriov_pf_wait_ready() - Wait until per-GT PF SR-IOV support is ready. + * @gt: the &xe_gt + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_wait_ready(struct xe_gt *gt) +{ + /* don't wait if there is another ongoing reset */ + if (xe_guc_read_stopped(>->uc.guc)) + return -EBUSY; + + pf_flush_restart(gt); + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index e2b2ff8132dc..e7fde3f9937a 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -11,6 +11,7 @@ struct xe_gt; #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_init_early(struct xe_gt *gt); int xe_gt_sriov_pf_init(struct xe_gt *gt); +int xe_gt_sriov_pf_wait_ready(struct xe_gt *gt); void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid); void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index bf679b21f485..3ed245e04d0c 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -22,6 +22,7 @@ #include "xe_gt_sriov_pf_policy.h" #include "xe_gt_sriov_pf_service.h" #include "xe_pm.h" +#include "xe_sriov_pf.h" /* * /sys/kernel/debug/dri/0/ @@ -205,7 +206,8 @@ static int CONFIG##_set(void *data, u64 val) \ return -EOVERFLOW; \ \ xe_pm_runtime_get(xe); \ - err = xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \ + err = xe_sriov_pf_wait_ready(xe) ?: \ + xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \ xe_pm_runtime_put(xe); \ \ return err; \ diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index 8813efdcafbb..447a7867eecb 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -12,6 +12,7 @@ #include "xe_pci_sriov.h" #include "xe_pm.h" #include "xe_sriov.h" +#include "xe_sriov_pf.h" #include "xe_sriov_pf_helpers.h" #include "xe_sriov_printk.h" @@ -138,6 +139,10 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) xe_assert(xe, num_vfs <= total_vfs); xe_sriov_dbg(xe, "enabling %u VF%s\n", num_vfs, str_plural(num_vfs)); + err = xe_sriov_pf_wait_ready(xe); + if (err) + goto out; + /* * We must hold additional reference to the runtime PM to keep PF in D0 * during VFs lifetime, as our VFs do not implement the PM capability. @@ -169,7 +174,7 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) failed: pf_unprovision_vfs(xe, num_vfs); xe_pm_runtime_put(xe); - +out: xe_sriov_notice(xe, "Failed to enable %u VF%s (%pe)\n", num_vfs, str_plural(num_vfs), ERR_PTR(err)); return err; diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index afbdd894bd6e..27ddf3cc80e9 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -9,6 +9,7 @@ #include "xe_assert.h" #include "xe_device.h" +#include "xe_gt_sriov_pf.h" #include "xe_module.h" #include "xe_sriov.h" #include "xe_sriov_pf.h" @@ -102,6 +103,32 @@ int xe_sriov_pf_init_early(struct xe_device *xe) return 0; } +/** + * xe_sriov_pf_wait_ready() - Wait until PF is ready to operate. + * @xe: the &xe_device to test + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_wait_ready(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + int err; + + if (xe_device_wedged(xe)) + return -ECANCELED; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_wait_ready(gt); + if (err) + return err; + } + + return 0; +} + /** * xe_sriov_pf_print_vfs_summary - Print SR-IOV PF information. * @xe: the &xe_device to print info from diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.h b/drivers/gpu/drm/xe/xe_sriov_pf.h index c392c3fcf085..e3b34f8f5e04 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf.h @@ -15,6 +15,7 @@ struct xe_device; #ifdef CONFIG_PCI_IOV bool xe_sriov_pf_readiness(struct xe_device *xe); int xe_sriov_pf_init_early(struct xe_device *xe); +int xe_sriov_pf_wait_ready(struct xe_device *xe); void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root); void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p); #else From 022906afdf90327bce33d52fb4fb41b6c7d618fb Mon Sep 17 00:00:00 2001 From: Simon Richter Date: Sat, 2 Aug 2025 11:40:36 +0900 Subject: [PATCH 1512/2411] Mark xe driver as BROKEN if kernel page size is not 4kB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver, for the time being, assumes that the kernel page size is 4kB, so it fails on loong64 and aarch64 with 16kB pages, and ppc64el with 64kB pages. Signed-off-by: Simon Richter Reviewed-by: Thomas Hellström Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org # v6.8+ Signed-off-by: Thomas Hellström Link: https://lore.kernel.org/r/20250802024152.3021-1-Simon.Richter@hogyros.de (cherry picked from commit 0521a868222ffe636bf202b6e9d29292c1e19c62) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 2bb2bc052120..714d5702dfd7 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -5,6 +5,7 @@ config DRM_XE depends on KUNIT || !KUNIT depends on INTEL_VSEC || !INTEL_VSEC depends on X86_PLATFORM_DEVICES || !(X86 && ACPI) + depends on PAGE_SIZE_4KB || COMPILE_TEST || BROKEN select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular # the shmem_readpage() which depends upon tmpfs From b01f21cacde9f2878492cf318fee61bf4ccad323 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 3 Aug 2025 14:31:59 -0700 Subject: [PATCH 1513/2411] NFS: Fix the setting of capabilities when automounting a new filesystem Capabilities cannot be inherited when we cross into a new filesystem. They need to be reset to the minimal defaults, and then probed for again. Fixes: 54ceac451598 ("NFS: Share NFS superblocks per-protocol per-server per-FSID") Cc: stable@vger.kernel.org Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 44 ++++++++++++++++++++++++++++++++++++++++++-- fs/nfs/internal.h | 2 +- fs/nfs/nfs4client.c | 20 +------------------- fs/nfs/nfs4proc.c | 2 +- 4 files changed, 45 insertions(+), 23 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index e13eb429b8b5..8fb4a950dd55 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -682,6 +682,44 @@ struct nfs_client *nfs_init_client(struct nfs_client *clp, } EXPORT_SYMBOL_GPL(nfs_init_client); +static void nfs4_server_set_init_caps(struct nfs_server *server) +{ +#if IS_ENABLED(CONFIG_NFS_V4) + /* Set the basic capabilities */ + server->caps = server->nfs_client->cl_mvops->init_caps; + if (server->flags & NFS_MOUNT_NORDIRPLUS) + server->caps &= ~NFS_CAP_READDIRPLUS; + if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA) + server->caps &= ~NFS_CAP_READ_PLUS; + + /* + * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower + * authentication. + */ + if (nfs4_disable_idmapping && + server->client->cl_auth->au_flavor == RPC_AUTH_UNIX) + server->caps |= NFS_CAP_UIDGID_NOMAP; +#endif +} + +void nfs_server_set_init_caps(struct nfs_server *server) +{ + switch (server->nfs_client->rpc_ops->version) { + case 2: + server->caps = NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS; + break; + case 3: + server->caps = NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS; + if (!(server->flags & NFS_MOUNT_NORDIRPLUS)) + server->caps |= NFS_CAP_READDIRPLUS; + break; + default: + nfs4_server_set_init_caps(server); + break; + } +} +EXPORT_SYMBOL_GPL(nfs_server_set_init_caps); + /* * Create a version 2 or 3 client */ @@ -726,7 +764,6 @@ static int nfs_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = ctx->flags; server->options = ctx->options; - server->caps |= NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS; switch (clp->rpc_ops->version) { case 2: @@ -762,6 +799,8 @@ static int nfs_init_server(struct nfs_server *server, if (error < 0) goto error; + nfs_server_set_init_caps(server); + /* Preserve the values of mount_server-related mount options */ if (ctx->mount_server.addrlen) { memcpy(&server->mountd_address, &ctx->mount_server.address, @@ -934,7 +973,6 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour target->acregmax = source->acregmax; target->acdirmin = source->acdirmin; target->acdirmax = source->acdirmax; - target->caps = source->caps; target->options = source->options; target->auth_info = source->auth_info; target->port = source->port; @@ -1169,6 +1207,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, if (error < 0) goto out_free_server; + nfs_server_set_init_caps(server); + /* probe the filesystem info for this server filesystem */ error = nfs_probe_server(server, fh); if (error < 0) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 0143e0794d32..1a18d8d9be25 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -231,7 +231,7 @@ extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, struct nfs4_sessionid *, u32); extern struct nfs_server *nfs_create_server(struct fs_context *); -extern void nfs4_server_set_init_caps(struct nfs_server *); +extern void nfs_server_set_init_caps(struct nfs_server *); extern struct nfs_server *nfs4_create_server(struct fs_context *); extern struct nfs_server *nfs4_create_referral_server(struct fs_context *); extern int nfs4_update_server(struct nfs_server *server, const char *hostname, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 2ea98f1f116f..6fddf43d729c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1074,24 +1074,6 @@ static void nfs4_session_limit_xasize(struct nfs_server *server) #endif } -void nfs4_server_set_init_caps(struct nfs_server *server) -{ - /* Set the basic capabilities */ - server->caps |= server->nfs_client->cl_mvops->init_caps; - if (server->flags & NFS_MOUNT_NORDIRPLUS) - server->caps &= ~NFS_CAP_READDIRPLUS; - if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA) - server->caps &= ~NFS_CAP_READ_PLUS; - - /* - * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower - * authentication. - */ - if (nfs4_disable_idmapping && - server->client->cl_auth->au_flavor == RPC_AUTH_UNIX) - server->caps |= NFS_CAP_UIDGID_NOMAP; -} - static int nfs4_server_common_setup(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_probe) { @@ -1110,7 +1092,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, if (error < 0) return error; - nfs4_server_set_init_caps(server); + nfs_server_set_init_caps(server); /* Probe the root fh to retrieve its FSID and filehandle */ error = nfs4_get_rootfh(server, mntfh, auth_probe); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d7dc669d84c5..c7c7ec22f21d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4092,7 +4092,7 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) }; int err; - nfs4_server_set_init_caps(server); + nfs_server_set_init_caps(server); do { err = nfs4_handle_exception(server, _nfs4_server_capabilities(server, fhandle), From b9defd611abf3d24354e14c8d85da14c7abaa07e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 3 Aug 2025 16:32:00 -0700 Subject: [PATCH 1514/2411] NFSv4: Remove duplicate lookups, capability probes and fsinfo calls When crossing into a new filesystem, the NFSv4 client will look up the new directory, and then call nfs4_server_capabilities() as well as nfs4_do_fsinfo() at least twice. This patch removes the duplicate calls, and reduces the initial lookup to retrieve just a minimal set of attributes. Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 5 ++- fs/nfs/nfs4getroot.c | 14 +++---- fs/nfs/nfs4proc.c | 87 ++++++++++++++++++++------------------------ 3 files changed, 48 insertions(+), 58 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index d3ca91f60fc1..c34c89af9c7d 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -63,7 +63,7 @@ struct nfs4_minor_version_ops { bool (*match_stateid)(const nfs4_stateid *, const nfs4_stateid *); int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, - struct nfs_fsinfo *); + struct nfs_fattr *); void (*free_lock_state)(struct nfs_server *, struct nfs4_lock_state *); int (*test_and_free_expired)(struct nfs_server *, @@ -296,7 +296,8 @@ extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int, int); extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, const struct cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, const struct cred *); -extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool); +extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, + struct nfs_fattr *, bool); extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, const struct cred *cred); extern int nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cred); extern int nfs4_destroy_clientid(struct nfs_client *clp); diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c index 1a69479a3a59..e67ea345de69 100644 --- a/fs/nfs/nfs4getroot.c +++ b/fs/nfs/nfs4getroot.c @@ -12,30 +12,28 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_probe) { - struct nfs_fsinfo fsinfo; + struct nfs_fattr *fattr = nfs_alloc_fattr(); int ret = -ENOMEM; - fsinfo.fattr = nfs_alloc_fattr(); - if (fsinfo.fattr == NULL) + if (fattr == NULL) goto out; /* Start by getting the root filehandle from the server */ - ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo, auth_probe); + ret = nfs4_proc_get_rootfh(server, mntfh, fattr, auth_probe); if (ret < 0) { dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret); goto out; } - if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE) - || !S_ISDIR(fsinfo.fattr->mode)) { + if (!(fattr->valid & NFS_ATTR_FATTR_TYPE) || !S_ISDIR(fattr->mode)) { printk(KERN_ERR "nfs4_get_rootfh:" " getroot encountered non-directory\n"); ret = -ENOTDIR; goto out; } - memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid)); + memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid)); out: - nfs_free_fattr(fsinfo.fattr); + nfs_free_fattr(fattr); return ret; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c7c7ec22f21d..7d2b67e06cc3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4240,15 +4240,18 @@ static int nfs4_discover_trunking(struct nfs_server *server, } static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fattr *fattr) { - u32 bitmask[3]; + u32 bitmask[3] = { + [0] = FATTR4_WORD0_TYPE | FATTR4_WORD0_CHANGE | + FATTR4_WORD0_SIZE | FATTR4_WORD0_FSID, + }; struct nfs4_lookup_root_arg args = { .bitmask = bitmask, }; struct nfs4_lookup_res res = { .server = server, - .fattr = info->fattr, + .fattr = fattr, .fh = fhandle, }; struct rpc_message msg = { @@ -4257,27 +4260,20 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_resp = &res, }; - bitmask[0] = nfs4_fattr_bitmap[0]; - bitmask[1] = nfs4_fattr_bitmap[1]; - /* - * Process the label in the upcoming getfattr - */ - bitmask[2] = nfs4_fattr_bitmap[2] & ~FATTR4_WORD2_SECURITY_LABEL; - - nfs_fattr_init(info->fattr); + nfs_fattr_init(fattr); return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); } static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fattr *fattr) { struct nfs4_exception exception = { .interruptible = true, }; int err; do { - err = _nfs4_lookup_root(server, fhandle, info); - trace_nfs4_lookup_root(server, fhandle, info->fattr, err); + err = _nfs4_lookup_root(server, fhandle, fattr); + trace_nfs4_lookup_root(server, fhandle, fattr, err); switch (err) { case 0: case -NFS4ERR_WRONGSEC: @@ -4290,8 +4286,9 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, return err; } -static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info, rpc_authflavor_t flavor) +static int nfs4_lookup_root_sec(struct nfs_server *server, + struct nfs_fh *fhandle, struct nfs_fattr *fattr, + rpc_authflavor_t flavor) { struct rpc_auth_create_args auth_args = { .pseudoflavor = flavor, @@ -4301,7 +4298,7 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl auth = rpcauth_create(&auth_args, server->client); if (IS_ERR(auth)) return -EACCES; - return nfs4_lookup_root(server, fhandle, info); + return nfs4_lookup_root(server, fhandle, fattr); } /* @@ -4314,7 +4311,7 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl * negative errno value. */ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fattr *fattr) { /* Per 3530bis 15.33.5 */ static const rpc_authflavor_t flav_array[] = { @@ -4330,8 +4327,9 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, if (server->auth_info.flavor_len > 0) { /* try each flavor specified by user */ for (i = 0; i < server->auth_info.flavor_len; i++) { - status = nfs4_lookup_root_sec(server, fhandle, info, - server->auth_info.flavors[i]); + status = nfs4_lookup_root_sec( + server, fhandle, fattr, + server->auth_info.flavors[i]); if (status == -NFS4ERR_WRONGSEC || status == -EACCES) continue; break; @@ -4339,7 +4337,7 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, } else { /* no flavors specified by user, try default list */ for (i = 0; i < ARRAY_SIZE(flav_array); i++) { - status = nfs4_lookup_root_sec(server, fhandle, info, + status = nfs4_lookup_root_sec(server, fhandle, fattr, flav_array[i]); if (status == -NFS4ERR_WRONGSEC || status == -EACCES) continue; @@ -4363,28 +4361,22 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, * nfs4_proc_get_rootfh - get file handle for server's pseudoroot * @server: initialized nfs_server handle * @fhandle: we fill in the pseudo-fs root file handle - * @info: we fill in an FSINFO struct + * @fattr: we fill in a bare bones struct fattr * @auth_probe: probe the auth flavours * * Returns zero on success, or a negative errno. */ int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info, - bool auth_probe) + struct nfs_fattr *fattr, bool auth_probe) { int status = 0; if (!auth_probe) - status = nfs4_lookup_root(server, fhandle, info); + status = nfs4_lookup_root(server, fhandle, fattr); if (auth_probe || status == NFS4ERR_WRONGSEC) - status = server->nfs_client->cl_mvops->find_root_sec(server, - fhandle, info); - - if (status == 0) - status = nfs4_server_capabilities(server, fhandle); - if (status == 0) - status = nfs4_do_fsinfo(server, fhandle, info); + status = server->nfs_client->cl_mvops->find_root_sec( + server, fhandle, fattr); return nfs4_map_errors(status); } @@ -10351,10 +10343,10 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) * Use the state managment nfs_client cl_rpcclient, which uses krb5i (if * possible) as per RFC3530bis and RFC5661 Security Considerations sections */ -static int -_nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info, - struct nfs4_secinfo_flavors *flavors, bool use_integrity) +static int _nfs41_proc_secinfo_no_name(struct nfs_server *server, + struct nfs_fh *fhandle, + struct nfs4_secinfo_flavors *flavors, + bool use_integrity) { struct nfs41_secinfo_no_name_args args = { .style = SECINFO_STYLE_CURRENT_FH, @@ -10398,9 +10390,9 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, return status; } -static int -nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) +static int nfs41_proc_secinfo_no_name(struct nfs_server *server, + struct nfs_fh *fhandle, + struct nfs4_secinfo_flavors *flavors) { struct nfs4_exception exception = { .interruptible = true, @@ -10412,7 +10404,7 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, /* try to use integrity protection with machine cred */ if (_nfs4_is_integrity_protected(server->nfs_client)) - err = _nfs41_proc_secinfo_no_name(server, fhandle, info, + err = _nfs41_proc_secinfo_no_name(server, fhandle, flavors, true); /* @@ -10422,7 +10414,7 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, * the current filesystem's rpc_client and the user cred. */ if (err == -NFS4ERR_WRONGSEC) - err = _nfs41_proc_secinfo_no_name(server, fhandle, info, + err = _nfs41_proc_secinfo_no_name(server, fhandle, flavors, false); switch (err) { @@ -10438,9 +10430,8 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, return err; } -static int -nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) +static int nfs41_find_root_sec(struct nfs_server *server, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { int err; struct page *page; @@ -10456,14 +10447,14 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, } flavors = page_address(page); - err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); + err = nfs41_proc_secinfo_no_name(server, fhandle, flavors); /* * Fall back on "guess and check" method if * the server doesn't support SECINFO_NO_NAME */ if (err == -NFS4ERR_WRONGSEC || err == -ENOTSUPP) { - err = nfs4_find_root_sec(server, fhandle, info); + err = nfs4_find_root_sec(server, fhandle, fattr); goto out_freepage; } if (err) @@ -10488,8 +10479,8 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, flavor = RPC_AUTH_MAXFLAVOR; if (flavor != RPC_AUTH_MAXFLAVOR) { - err = nfs4_lookup_root_sec(server, fhandle, - info, flavor); + err = nfs4_lookup_root_sec(server, fhandle, fattr, + flavor); if (!err) break; } From 99b773d720aeea1ef2170dce5fcfa80649e26b78 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Jul 2025 15:11:14 -0400 Subject: [PATCH 1515/2411] sched/psi: Fix psi_seq initialization With the seqcount moved out of the group into a global psi_seq, re-initializing the seqcount on group creation is causing seqcount corruption. Fixes: 570c8efd5eb7 ("sched/psi: Optimize psi_group_change() cpu_clock() usage") Reported-by: Chris Mason Suggested-by: Beata Michalska Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds --- kernel/sched/psi.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 2024c1d36402..59fdb7ebbf22 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -176,7 +176,7 @@ struct psi_group psi_system = { .pcpu = &system_group_pcpu, }; -static DEFINE_PER_CPU(seqcount_t, psi_seq); +static DEFINE_PER_CPU(seqcount_t, psi_seq) = SEQCNT_ZERO(psi_seq); static inline void psi_write_begin(int cpu) { @@ -204,11 +204,7 @@ static void poll_timer_fn(struct timer_list *t); static void group_init(struct psi_group *group) { - int cpu; - group->enabled = true; - for_each_possible_cpu(cpu) - seqcount_init(per_cpu_ptr(&psi_seq, cpu)); group->avg_last_update = sched_clock(); group->avg_next_update = group->avg_last_update + psi_period; mutex_init(&group->avgs_lock); From dbe05428c4e54068a86e7e02405f3b30b1d2b3dd Mon Sep 17 00:00:00 2001 From: Thomas Croft Date: Mon, 4 Aug 2025 09:12:07 -0600 Subject: [PATCH 1516/2411] ALSA: hda/realtek: add LG gram 16Z90R-A to alc269 fixup table Several months ago, Joshua Grisham submitted a patch [1] for several ALC298 based sound cards. The entry for the LG gram 16 in the alc269_fixup_tbl only matches the Subsystem ID for the 16Z90R-Q and 16Z90R-K models [2]. My 16Z90R-A has a different Subsystem ID [3]. I'm not sure why these IDs differ, but I speculate it's due to the NVIDIA GPU included in the 16Z90R-A model that isn't present in the other models. I applied the patch to the latest Arch Linux kernel and the card was initialized as expected. [1]: https://lore.kernel.org/linux-sound/20240909193000.838815-1-josh@joshuagrisham.com/ [2]: https://linux-hardware.org/?id=pci:8086-51ca-1854-0488 [3]: https://linux-hardware.org/?id=pci:8086-51ca-1854-0489 Signed-off-by: Thomas Croft Link: https://patch.msgid.link/20250804151457.134761-2-thomasmcft@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 2554b42eeb0f..e27a36e4e92a 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7110,6 +7110,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1854, 0x0440, "LG CQ6", ALC256_FIXUP_HEADPHONE_AMP_VOL), SND_PCI_QUIRK(0x1854, 0x0441, "LG CQ6 AIO", ALC256_FIXUP_HEADPHONE_AMP_VOL), SND_PCI_QUIRK(0x1854, 0x0488, "LG gram 16 (16Z90R)", ALC298_FIXUP_SAMSUNG_AMP_V2_4_AMPS), + SND_PCI_QUIRK(0x1854, 0x0489, "LG gram 16 (16Z90R-A)", ALC298_FIXUP_SAMSUNG_AMP_V2_4_AMPS), SND_PCI_QUIRK(0x1854, 0x048a, "LG gram 17 (17ZD90R)", ALC298_FIXUP_SAMSUNG_AMP_V2_4_AMPS), SND_PCI_QUIRK(0x19e5, 0x3204, "Huawei MACH-WX9", ALC256_FIXUP_HUAWEI_MACH_WX9_PINS), SND_PCI_QUIRK(0x19e5, 0x320f, "Huawei WRT-WX9 ", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), From 6bc829220b33da8522572cc50fdf5067c51d3bf3 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Fri, 1 Aug 2025 09:58:35 +0200 Subject: [PATCH 1517/2411] spi: spi-qpic-snand: use correct CW_PER_PAGE value for OOB write The qcom_spi_program_oob() function uses only the last codeword to write the OOB data into the flash, but it sets the CW_PER_PAGE field in the CFG0 register as it would use all codewords. It seems that this confuses the hardware somehow, and any access to the flash fails with a timeout error after the function is called. The problem can be easily reproduced with the following commands: # dd if=/dev/zero bs=2176 count=1 > /tmp/test.bin 1+0 records in 1+0 records out # flash_erase /dev/mtd4 0 0 Erasing 128 Kibyte @ 0 -- 100 % complete # nandwrite -O /dev/mtd4 /tmp/test.bin Writing data to block 0 at offset 0x0 # nanddump -o /dev/mtd4 >/dev/null ECC failed: 0 ECC corrected: 0 Number of bad blocks: 0 Number of bbt blocks: 0 Block size 131072, page size 2048, OOB size 128 Dumping data starting at 0x00000000 and ending at 0x00020000... [ 33.197605] qcom_snand 79b0000.spi: failure to read oob libmtd: error!: MEMREADOOB64 ioctl failed for mtd4, offset 0 (eraseblock 0) error 110 (Operation timed out) [ 35.277582] qcom_snand 79b0000.spi: failure in submitting cmd descriptor libmtd: error!: cannot read 2048 bytes from mtd4 (eraseblock 0, offset 2048) error 110 (Operation timed out) nanddump: error!: mtd_read Change the code to use the correct CW_PER_PAGE value to avoid this. Fixes: 7304d1909080 ("spi: spi-qpic: add driver for QCOM SPI NAND flash Interface") Signed-off-by: Gabor Juhos Link: https://patch.msgid.link/20250801-qpic-snand-oob-cwpp-fix-v1-1-f5a41b86af2e@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-qpic-snand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index 0cfa0d960fd3..5216d60e01aa 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -1196,7 +1196,7 @@ static int qcom_spi_program_oob(struct qcom_nand_controller *snandc, u32 cfg0, cfg1, ecc_bch_cfg, ecc_buf_cfg; cfg0 = (ecc_cfg->cfg0 & ~CW_PER_PAGE_MASK) | - FIELD_PREP(CW_PER_PAGE_MASK, num_cw - 1); + FIELD_PREP(CW_PER_PAGE_MASK, 0); cfg1 = ecc_cfg->cfg1; ecc_bch_cfg = ecc_cfg->ecc_bch_cfg; ecc_buf_cfg = ecc_cfg->ecc_buf_cfg; From 72332439e6b0a39e763d4604e71774ab83423275 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 4 Aug 2025 11:50:13 +0200 Subject: [PATCH 1518/2411] spi: spi-mem: Add missing kdoc argument The "*mem" pointer has been added without description, describe it in the kdoc comment in order to fix the following W=1 warning: Warning: drivers/spi/spi-mem.c:594 function parameter 'mem' not described in 'spi_mem_calc_op_duration' Fixes: a11a51896572 ("spi: spi-mem: Take into account the actual maximum frequency") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507310409.fnuQ21qb-lkp@intel.com/ Signed-off-by: Miquel Raynal Link: https://patch.msgid.link/20250804095013.409700-1-miquel.raynal@bootlin.com Signed-off-by: Mark Brown --- drivers/spi/spi-mem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index 5db0639d3b01..dfa8ab1ec80f 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -577,6 +577,7 @@ EXPORT_SYMBOL_GPL(spi_mem_adjust_op_freq); * spi_mem_calc_op_duration() - Derives the theoretical length (in ns) of an * operation. This helps finding the best variant * among a list of possible choices. + * @mem: the SPI memory * @op: the operation to benchmark * * Some chips have per-op frequency limitations, PCBs usually have their own From 389d79a195a9f71a103b39097ee8341a7ca60927 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 8 Jul 2025 13:17:18 +0530 Subject: [PATCH 1519/2411] drm/amdgpu: Update supported modes for GC v9.5.0 For GC v9.5.0 SOCs, both CPX and QPX compute modes are also supported in NPS2 mode. Signed-off-by: Lijo Lazar Acked-by: Mangesh Gadre Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher (cherry picked from commit 9d1ac25c7f830e0132aa816393b1e9f140e71148) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 914cf4bfb033..811124ff88a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -227,6 +227,7 @@ static int __aqua_vanjaram_get_px_mode_info(struct amdgpu_xcp_mgr *xcp_mgr, uint16_t *nps_modes) { struct amdgpu_device *adev = xcp_mgr->adev; + uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0); if (!num_xcp || !nps_modes || !(xcp_mgr->supp_xcp_modes & BIT(px_mode))) return -EINVAL; @@ -250,12 +251,14 @@ static int __aqua_vanjaram_get_px_mode_info(struct amdgpu_xcp_mgr *xcp_mgr, *num_xcp = 4; *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | BIT(AMDGPU_NPS4_PARTITION_MODE); + if (gc_ver == IP_VERSION(9, 5, 0)) + *nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE); break; case AMDGPU_CPX_PARTITION_MODE: *num_xcp = NUM_XCC(adev->gfx.xcc_mask); *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | BIT(AMDGPU_NPS4_PARTITION_MODE); - if (amdgpu_sriov_vf(adev)) + if (gc_ver == IP_VERSION(9, 5, 0)) *nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE); break; default: From 05c8b690511854ba31d8d1bff7139a13ec66b9e7 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 25 Jul 2025 10:21:10 +0530 Subject: [PATCH 1520/2411] drm/amdgpu: Update external revid for GC v9.5.0 Use different external revid for GC v9.5.0 SOCs. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher (cherry picked from commit 21c6764ed4bfaecad034bc4fd15dd64c5a436325) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/soc15.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index c457be3a3c56..9e74c9822e62 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1218,6 +1218,8 @@ static int soc15_common_early_init(struct amdgpu_ip_block *ip_block) AMD_PG_SUPPORT_JPEG; /*TODO: need a new external_rev_id for GC 9.4.4? */ adev->external_rev_id = adev->rev_id + 0x46; + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) + adev->external_rev_id = adev->rev_id + 0x50; break; default: /* FIXME: not supported yet */ From ed4efe426a49729952b3dc05d20e33b94409bdd1 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 24 Jul 2025 22:12:21 -0500 Subject: [PATCH 1521/2411] drm/amd: Restore cached power limit during resume The power limit will be cached in smu->current_power_limit but if the ASIC goes into S3 this value won't be restored. Restore the value during SMU resume. Acked-by: Alex Deucher Link: https://lore.kernel.org/r/20250725031222.3015095-2-superm1@kernel.org Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 26a609e053a6fc494403e95403bc6a2470383bec) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 756afe78a6e5..310f51ff05b9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2226,6 +2226,12 @@ static int smu_resume(struct amdgpu_ip_block *ip_block) adev->pm.dpm_enabled = true; + if (smu->current_power_limit) { + ret = smu_set_power_limit(smu, smu->current_power_limit); + if (ret && ret != -EOPNOTSUPP) + return ret; + } + dev_info(adev->dev, "SMU is resumed successfully!\n"); return 0; From 796ff8a7e01bd18738d3bb4111f9d6f963145d29 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 24 Jul 2025 22:12:22 -0500 Subject: [PATCH 1522/2411] drm/amd: Restore cached manual clock settings during resume If the SCLK limits have been set before S3 they will not be restored. The limits are however cached in the driver and so they can be restored by running a commit sequence during resume. Acked-by: Alex Deucher Link: https://lore.kernel.org/r/20250725031222.3015095-3-superm1@kernel.org Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 4e9526924d09057a9ba854305e17eded900ced82) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 310f51ff05b9..b47cb4a5f488 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -77,6 +77,9 @@ static void smu_power_profile_mode_get(struct smu_context *smu, static void smu_power_profile_mode_put(struct smu_context *smu, enum PP_SMC_POWER_PROFILE profile_mode); static enum smu_clk_type smu_convert_to_smuclk(enum pp_clock_type type); +static int smu_od_edit_dpm_table(void *handle, + enum PP_OD_DPM_TABLE_COMMAND type, + long *input, uint32_t size); static int smu_sys_get_pp_feature_mask(void *handle, char *buf) @@ -2195,6 +2198,7 @@ static int smu_resume(struct amdgpu_ip_block *ip_block) int ret; struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; + struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); if (amdgpu_sriov_multi_vf_mode(adev)) return 0; @@ -2232,6 +2236,12 @@ static int smu_resume(struct amdgpu_ip_block *ip_block) return ret; } + if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { + ret = smu_od_edit_dpm_table(smu, PP_OD_COMMIT_DPM_TABLE, NULL, 0); + if (ret) + return ret; + } + dev_info(adev->dev, "SMU is resumed successfully!\n"); return 0; From f6c0f3d24478a0792e50a64c2eba9f34d65519f2 Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Wed, 16 Jul 2025 22:04:28 +0000 Subject: [PATCH 1523/2411] drm/amdkfd: Fix checkpoint-restore on multi-xcc GPUs with multi-xcc have multiple MQDs per queue. This patch saves and restores all the MQDs within the partition. Signed-off-by: David Yat Sin Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit a578f2a58c3ab38f0643b1b6e7534af860233cb1) Cc: stable@vger.kernel.org --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 61 ++++++++++++++++--- .../amd/amdkfd/kfd_process_queue_manager.c | 20 ++++-- 3 files changed, 67 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 2d91027e2a74..6c5c7c1bf5ed 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2725,7 +2725,7 @@ static void get_queue_checkpoint_info(struct device_queue_manager *dqm, dqm_lock(dqm); mqd_mgr = dqm->mqd_mgrs[mqd_type]; - *mqd_size = mqd_mgr->mqd_size; + *mqd_size = mqd_mgr->mqd_size * NUM_XCC(mqd_mgr->dev->xcc_mask); *ctl_stack_size = 0; if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 97933d2a3803..f2dee320fada 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -373,7 +373,7 @@ static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stac { struct v9_mqd *m = get_mqd(mqd); - *ctl_stack_size = m->cp_hqd_cntl_stack_size; + *ctl_stack_size = m->cp_hqd_cntl_stack_size * NUM_XCC(mm->dev->xcc_mask); } static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst) @@ -388,6 +388,24 @@ static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, voi memcpy(ctl_stack_dst, ctl_stack, m->cp_hqd_cntl_stack_size); } +static void checkpoint_mqd_v9_4_3(struct mqd_manager *mm, + void *mqd, + void *mqd_dst, + void *ctl_stack_dst) +{ + struct v9_mqd *m; + int xcc; + uint64_t size = get_mqd(mqd)->cp_mqd_stride_size; + + for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) { + m = get_mqd(mqd + size * xcc); + + checkpoint_mqd(mm, m, + (uint8_t *)mqd_dst + sizeof(*m) * xcc, + (uint8_t *)ctl_stack_dst + m->cp_hqd_cntl_stack_size * xcc); + } +} + static void restore_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *qp, @@ -764,13 +782,35 @@ static void restore_mqd_v9_4_3(struct mqd_manager *mm, void **mqd, const void *mqd_src, const void *ctl_stack_src, u32 ctl_stack_size) { - restore_mqd(mm, mqd, mqd_mem_obj, gart_addr, qp, mqd_src, ctl_stack_src, ctl_stack_size); - if (amdgpu_sriov_multi_vf_mode(mm->dev->adev)) { - struct v9_mqd *m; + struct kfd_mem_obj xcc_mqd_mem_obj; + u32 mqd_ctl_stack_size; + struct v9_mqd *m; + u32 num_xcc; + int xcc; - m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr; - m->cp_hqd_pq_doorbell_control |= 1 << - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT; + uint64_t offset = mm->mqd_stride(mm, qp); + + mm->dev->dqm->current_logical_xcc_start++; + + num_xcc = NUM_XCC(mm->dev->xcc_mask); + mqd_ctl_stack_size = ctl_stack_size / num_xcc; + + memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj)); + + /* Set the MQD pointer and gart address to XCC0 MQD */ + *mqd = mqd_mem_obj->cpu_ptr; + if (gart_addr) + *gart_addr = mqd_mem_obj->gpu_addr; + + for (xcc = 0; xcc < num_xcc; xcc++) { + get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset * xcc); + restore_mqd(mm, (void **)&m, + &xcc_mqd_mem_obj, + NULL, + qp, + (uint8_t *)mqd_src + xcc * sizeof(*m), + (uint8_t *)ctl_stack_src + xcc * mqd_ctl_stack_size, + mqd_ctl_stack_size); } } static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, @@ -906,7 +946,6 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->free_mqd = kfd_free_mqd_cp; mqd->is_occupied = kfd_is_occupied_cp; mqd->get_checkpoint_info = get_checkpoint_info; - mqd->checkpoint_mqd = checkpoint_mqd; mqd->mqd_size = sizeof(struct v9_mqd); mqd->mqd_stride = mqd_stride_v9; #if defined(CONFIG_DEBUG_FS) @@ -918,16 +957,18 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->init_mqd = init_mqd_v9_4_3; mqd->load_mqd = load_mqd_v9_4_3; mqd->update_mqd = update_mqd_v9_4_3; - mqd->restore_mqd = restore_mqd_v9_4_3; mqd->destroy_mqd = destroy_mqd_v9_4_3; mqd->get_wave_state = get_wave_state_v9_4_3; + mqd->checkpoint_mqd = checkpoint_mqd_v9_4_3; + mqd->restore_mqd = restore_mqd_v9_4_3; } else { mqd->init_mqd = init_mqd; mqd->load_mqd = load_mqd; mqd->update_mqd = update_mqd; - mqd->restore_mqd = restore_mqd; mqd->destroy_mqd = kfd_destroy_mqd_cp; mqd->get_wave_state = get_wave_state; + mqd->checkpoint_mqd = checkpoint_mqd; + mqd->restore_mqd = restore_mqd; } break; case KFD_MQD_TYPE_HIQ: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index c643e0ccec52..7fbb5c274ccc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -914,7 +914,10 @@ static int criu_checkpoint_queues_device(struct kfd_process_device *pdd, q_data = (struct kfd_criu_queue_priv_data *)q_private_data; - /* data stored in this order: priv_data, mqd, ctl_stack */ + /* + * data stored in this order: + * priv_data, mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0], ctl_stack[xcc1]... + */ q_data->mqd_size = mqd_size; q_data->ctl_stack_size = ctl_stack_size; @@ -963,7 +966,7 @@ int kfd_criu_checkpoint_queues(struct kfd_process *p, } static void set_queue_properties_from_criu(struct queue_properties *qp, - struct kfd_criu_queue_priv_data *q_data) + struct kfd_criu_queue_priv_data *q_data, uint32_t num_xcc) { qp->is_interop = false; qp->queue_percent = q_data->q_percent; @@ -976,7 +979,11 @@ static void set_queue_properties_from_criu(struct queue_properties *qp, qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size; qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address; qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size; - qp->ctl_stack_size = q_data->ctl_stack_size; + if (q_data->type == KFD_QUEUE_TYPE_COMPUTE) + qp->ctl_stack_size = q_data->ctl_stack_size / num_xcc; + else + qp->ctl_stack_size = q_data->ctl_stack_size; + qp->type = q_data->type; qp->format = q_data->format; } @@ -1036,12 +1043,15 @@ int kfd_criu_restore_queue(struct kfd_process *p, goto exit; } - /* data stored in this order: mqd, ctl_stack */ + /* + * data stored in this order: + * mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0], ctl_stack[xcc1]... + */ mqd = q_extra_data; ctl_stack = mqd + q_data->mqd_size; memset(&qp, 0, sizeof(qp)); - set_queue_properties_from_criu(&qp, q_data); + set_queue_properties_from_criu(&qp, q_data, NUM_XCC(pdd->dev->adev->gfx.xcc_mask)); print_queue_properties(&qp); From 4db9cd554883e051df1840d4d58d636043101034 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Tue, 22 Jul 2025 17:58:29 +0200 Subject: [PATCH 1524/2411] drm/amd/display: Don't overwrite dce60_clk_mgr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dc_clk_mgr_create accidentally overwrites the dce60_clk_mgr with the dce_clk_mgr, causing incorrect behaviour on DCE6. Fix it by removing the extra dce_clk_mgr_construct. Fixes: 62eab49faae7 ("drm/amd/display: hide VGH asic specific structs") Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Deucher Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher (cherry picked from commit bbddcbe36a686af03e91341b9bbfcca94bd45fb6) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 33b9d36619ff..4071851f9e86 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -158,7 +158,6 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p return NULL; } dce60_clk_mgr_construct(ctx, clk_mgr); - dce_clk_mgr_construct(ctx, clk_mgr); return &clk_mgr->base; } #endif From 1c8dc3e088e09531bcdfc9fe348204abc3decb6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Tue, 22 Jul 2025 17:58:30 +0200 Subject: [PATCH 1525/2411] drm/amd/display: Fix DCE 6.0 and 6.4 PLL programming. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently, both DCE 6.0 and 6.4 have 3 PLLs, but PLL0 can only be used for DP. Make sure to initialize the correct amount of PLLs in DC for these DCE versions and use PLL0 only for DP. Also, on DCE 6.0 and 6.4, the PLL0 needs to be powered on at initialization as opposed to DCE 6.1 and 7.x which use a different clock source for DFS. The following functions were used as reference from the old radeon driver implementation of DCE 6.x: - radeon_atom_pick_pll - atombios_crtc_set_disp_eng_pll Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Deucher Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher (cherry picked from commit 35222b5934ec8d762473592ece98659baf6bc48e) Cc: stable@vger.kernel.org --- .../display/dc/clk_mgr/dce100/dce_clk_mgr.c | 5 +++ .../dc/resource/dce60/dce60_resource.c | 34 +++++++++++-------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c index 26feefbb8990..f5ad0a177038 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c @@ -245,6 +245,11 @@ int dce_set_clock( pxl_clk_params.target_pixel_clock_100hz = requested_clk_khz * 10; pxl_clk_params.pll_id = CLOCK_SOURCE_ID_DFS; + /* DCE 6.0, DCE 6.4: engine clock is the same as PLL0 */ + if (clk_mgr_base->ctx->dce_version == DCE_VERSION_6_0 || + clk_mgr_base->ctx->dce_version == DCE_VERSION_6_4) + pxl_clk_params.pll_id = CLOCK_SOURCE_ID_PLL0; + if (clk_mgr_dce->dfs_bypass_active) pxl_clk_params.flags.SET_DISPCLK_DFS_BYPASS = true; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c index 58b59d52dc9d..53b60044653f 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c @@ -373,7 +373,7 @@ static const struct resource_caps res_cap = { .num_timing_generator = 6, .num_audio = 6, .num_stream_encoder = 6, - .num_pll = 2, + .num_pll = 3, .num_ddc = 6, }; @@ -389,7 +389,7 @@ static const struct resource_caps res_cap_64 = { .num_timing_generator = 2, .num_audio = 2, .num_stream_encoder = 2, - .num_pll = 2, + .num_pll = 3, .num_ddc = 2, }; @@ -973,21 +973,24 @@ static bool dce60_construct( if (bp->fw_info_valid && bp->fw_info.external_clock_source_frequency_for_dp != 0) { pool->base.dp_clock_source = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true); + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true); + /* DCE 6.0 and 6.4: PLL0 can only be used with DP. Don't initialize it here. */ pool->base.clock_sources[0] = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], false); + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); pool->base.clock_sources[1] = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); pool->base.clk_src_count = 2; } else { pool->base.dp_clock_source = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], true); + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], true); pool->base.clock_sources[0] = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); - pool->base.clk_src_count = 1; + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); + pool->base.clock_sources[1] = + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); + pool->base.clk_src_count = 2; } if (pool->base.dp_clock_source == NULL) { @@ -1365,21 +1368,24 @@ static bool dce64_construct( if (bp->fw_info_valid && bp->fw_info.external_clock_source_frequency_for_dp != 0) { pool->base.dp_clock_source = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true); + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true); + /* DCE 6.0 and 6.4: PLL0 can only be used with DP. Don't initialize it here. */ pool->base.clock_sources[0] = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[0], false); + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); pool->base.clock_sources[1] = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[1], false); + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); pool->base.clk_src_count = 2; } else { pool->base.dp_clock_source = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[0], true); + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], true); pool->base.clock_sources[0] = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[1], false); - pool->base.clk_src_count = 1; + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); + pool->base.clock_sources[1] = + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); + pool->base.clk_src_count = 2; } if (pool->base.dp_clock_source == NULL) { From c00d8b79fd2167c6ac65e096619535acdf8678d5 Mon Sep 17 00:00:00 2001 From: YuanShang Date: Wed, 23 Jul 2025 16:44:49 +0800 Subject: [PATCH 1526/2411] drm/amdgpu: Retain job->vm in amdgpu_job_prepare_job The field job->vm is used in function amdgpu_job_run to get the page table re-generation counter and decide whether the job should be skipped. Specifically, function amdgpu_vm_generation checks if the VM is valid for this job to use. For instance, if a gfx job depends on a cancelled sdma job from entity vm->delayed, then the gfx job should be skipped. Fixes: 26c95e838e63 ("drm/amdgpu: set the VM pointer to NULL in amdgpu_job_prepare") Signed-off-by: YuanShang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit ed76936c6b10b547c6df4ca75412331e9ef6d339) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index e6061d45f142..9b1c55115921 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -365,13 +365,6 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r); goto error; } - /* - * The VM structure might be released after the VMID is - * assigned, we had multiple problems with people trying to use - * the VM pointer so better set it to NULL. - */ - if (!fence) - job->vm = NULL; return fence; } From 0bae62cc989fa99ac9cb564eb573aad916d1eb61 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 18 Jul 2025 15:52:04 -0400 Subject: [PATCH 1527/2411] drm/amdgpu: update mmhub 3.0.1 client id mappings Update the client id mapping so the correct clients get printed when there is a mmhub page fault. Reviewed-by: David (Ming Qiang) Wu Signed-off-by: Alex Deucher (cherry picked from commit 2a2681eda73b99a2c1ee8cdb006099ea5d0c2505) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c | 57 +++++++++++++---------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c index 134c4ec10887..910337dc28d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c @@ -36,40 +36,47 @@ static const char *mmhub_client_ids_v3_0_1[][2] = { [0][0] = "VMC", + [1][0] = "ISPXT", + [2][0] = "ISPIXT", [4][0] = "DCEDMC", [5][0] = "DCEVGA", [6][0] = "MP0", [7][0] = "MP1", - [8][0] = "MPIO", - [16][0] = "HDP", - [17][0] = "LSDMA", - [18][0] = "JPEG", - [19][0] = "VCNU0", - [21][0] = "VSCH", - [22][0] = "VCNU1", - [23][0] = "VCN1", - [32+20][0] = "VCN0", - [2][1] = "DBGUNBIO", + [8][0] = "MPM", + [12][0] = "ISPTNR", + [14][0] = "ISPCRD0", + [15][0] = "ISPCRD1", + [16][0] = "ISPCRD2", + [22][0] = "HDP", + [23][0] = "LSDMA", + [24][0] = "JPEG", + [27][0] = "VSCH", + [28][0] = "VCNU", + [29][0] = "VCN", + [1][1] = "ISPXT", + [2][1] = "ISPIXT", [3][1] = "DCEDWB", [4][1] = "DCEDMC", [5][1] = "DCEVGA", [6][1] = "MP0", [7][1] = "MP1", - [8][1] = "MPIO", - [10][1] = "DBGU0", - [11][1] = "DBGU1", - [12][1] = "DBGU2", - [13][1] = "DBGU3", - [14][1] = "XDP", - [15][1] = "OSSSYS", - [16][1] = "HDP", - [17][1] = "LSDMA", - [18][1] = "JPEG", - [19][1] = "VCNU0", - [20][1] = "VCN0", - [21][1] = "VSCH", - [22][1] = "VCNU1", - [23][1] = "VCN1", + [8][1] = "MPM", + [10][1] = "ISPMWR0", + [11][1] = "ISPMWR1", + [12][1] = "ISPTNR", + [13][1] = "ISPSWR", + [14][1] = "ISPCWR0", + [15][1] = "ISPCWR1", + [16][1] = "ISPCWR2", + [17][1] = "ISPCWR3", + [18][1] = "XDP", + [21][1] = "OSSSYS", + [22][1] = "HDP", + [23][1] = "LSDMA", + [24][1] = "JPEG", + [27][1] = "VSCH", + [28][1] = "VCNU", + [29][1] = "VCN", }; static uint32_t mmhub_v3_0_1_get_invalidate_req(unsigned int vmid, From 9f9bddfa31d87b084700a6e9eca1a8b4f8ddcdf6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 18 Jul 2025 15:53:21 -0400 Subject: [PATCH 1528/2411] drm/amdgpu: update mmhub 3.3 client id mappings Update the client id mapping so the correct clients get printed when there is a mmhub page fault. v2: fix typos spotted by David Wu. v3: fix additional typo spotted by David. Reviewed-by: David (Ming Qiang) Wu Signed-off-by: Alex Deucher (cherry picked from commit e932f4779a2d329841bb9ca70bb80a4bb2d707b6) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c | 121 ++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c index bc3d6c2fc87a..f6fc9778bc30 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c @@ -39,6 +39,64 @@ #define regDAGB1_L1TLB_REG_RW_3_3_BASE_IDX 1 static const char *mmhub_client_ids_v3_3[][2] = { + [0][0] = "VMC", + [1][0] = "ISPXT", + [2][0] = "ISPIXT", + [4][0] = "DCEDMC", + [6][0] = "MP0", + [7][0] = "MP1", + [8][0] = "MPM", + [9][0] = "ISPPDPRD", + [10][0] = "ISPCSTATRD", + [11][0] = "ISPBYRPRD", + [12][0] = "ISPRGBPRD", + [13][0] = "ISPMCFPRD", + [14][0] = "ISPMCFPRD1", + [15][0] = "ISPYUVPRD", + [16][0] = "ISPMCSCRD", + [17][0] = "ISPGDCRD", + [18][0] = "ISPLMERD", + [22][0] = "ISPXT1", + [23][0] = "ISPIXT1", + [24][0] = "HDP", + [25][0] = "LSDMA", + [26][0] = "JPEG", + [27][0] = "VPE", + [28][0] = "VSCH", + [29][0] = "VCNU", + [30][0] = "VCN", + [1][1] = "ISPXT", + [2][1] = "ISPIXT", + [3][1] = "DCEDWB", + [4][1] = "DCEDMC", + [5][1] = "ISPCSISWR", + [6][1] = "MP0", + [7][1] = "MP1", + [8][1] = "MPM", + [9][1] = "ISPPDPWR", + [10][1] = "ISPCSTATWR", + [11][1] = "ISPBYRPWR", + [12][1] = "ISPRGBPWR", + [13][1] = "ISPMCFPWR", + [14][1] = "ISPMWR0", + [15][1] = "ISPYUVPWR", + [16][1] = "ISPMCSCWR", + [17][1] = "ISPGDCWR", + [18][1] = "ISPLMEWR", + [20][1] = "ISPMWR2", + [21][1] = "OSSSYS", + [22][1] = "ISPXT1", + [23][1] = "ISPIXT1", + [24][1] = "HDP", + [25][1] = "LSDMA", + [26][1] = "JPEG", + [27][1] = "VPE", + [28][1] = "VSCH", + [29][1] = "VCNU", + [30][1] = "VCN", +}; + +static const char *mmhub_client_ids_v3_3_1[][2] = { [0][0] = "VMC", [4][0] = "DCEDMC", [6][0] = "MP0", @@ -46,10 +104,29 @@ static const char *mmhub_client_ids_v3_3[][2] = { [8][0] = "MPM", [24][0] = "HDP", [25][0] = "LSDMA", - [26][0] = "JPEG", - [27][0] = "VPE", - [29][0] = "VCNU", - [30][0] = "VCN", + [26][0] = "JPEG0", + [27][0] = "VPE0", + [28][0] = "VSCH", + [29][0] = "VCNU0", + [30][0] = "VCN0", + [32+1][0] = "ISPXT", + [32+2][0] = "ISPIXT", + [32+9][0] = "ISPPDPRD", + [32+10][0] = "ISPCSTATRD", + [32+11][0] = "ISPBYRPRD", + [32+12][0] = "ISPRGBPRD", + [32+13][0] = "ISPMCFPRD", + [32+14][0] = "ISPMCFPRD1", + [32+15][0] = "ISPYUVPRD", + [32+16][0] = "ISPMCSCRD", + [32+17][0] = "ISPGDCRD", + [32+18][0] = "ISPLMERD", + [32+22][0] = "ISPXT1", + [32+23][0] = "ISPIXT1", + [32+26][0] = "JPEG1", + [32+27][0] = "VPE1", + [32+29][0] = "VCNU1", + [32+30][0] = "VCN1", [3][1] = "DCEDWB", [4][1] = "DCEDMC", [6][1] = "MP0", @@ -58,10 +135,32 @@ static const char *mmhub_client_ids_v3_3[][2] = { [21][1] = "OSSSYS", [24][1] = "HDP", [25][1] = "LSDMA", - [26][1] = "JPEG", - [27][1] = "VPE", - [29][1] = "VCNU", - [30][1] = "VCN", + [26][1] = "JPEG0", + [27][1] = "VPE0", + [28][1] = "VSCH", + [29][1] = "VCNU0", + [30][1] = "VCN0", + [32+1][1] = "ISPXT", + [32+2][1] = "ISPIXT", + [32+5][1] = "ISPCSISWR", + [32+9][1] = "ISPPDPWR", + [32+10][1] = "ISPCSTATWR", + [32+11][1] = "ISPBYRPWR", + [32+12][1] = "ISPRGBPWR", + [32+13][1] = "ISPMCFPWR", + [32+14][1] = "ISPMWR0", + [32+15][1] = "ISPYUVPWR", + [32+16][1] = "ISPMCSCWR", + [32+17][1] = "ISPGDCWR", + [32+18][1] = "ISPLMEWR", + [32+19][1] = "ISPMWR1", + [32+20][1] = "ISPMWR2", + [32+22][1] = "ISPXT1", + [32+23][1] = "ISPIXT1", + [32+26][1] = "JPEG1", + [32+27][1] = "VPE1", + [32+29][1] = "VCNU1", + [32+30][1] = "VCN1", }; static uint32_t mmhub_v3_3_get_invalidate_req(unsigned int vmid, @@ -102,12 +201,16 @@ mmhub_v3_3_print_l2_protection_fault_status(struct amdgpu_device *adev, switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(3, 3, 0): - case IP_VERSION(3, 3, 1): case IP_VERSION(3, 3, 2): mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3) ? mmhub_client_ids_v3_3[cid][rw] : cid == 0x140 ? "UMSCH" : NULL; break; + case IP_VERSION(3, 3, 1): + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3_1) ? + mmhub_client_ids_v3_3_1[cid][rw] : + cid == 0x140 ? "UMSCH" : NULL; + break; default: mmhub_cid = NULL; break; From 3477c1b0972dc1c8a46f78e8fb1fa6966095b5ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Wed, 30 Jul 2025 10:09:02 +0200 Subject: [PATCH 1529/2411] drm/amd/display: Add primary plane to commits for correct VRR handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amdgpu_dm_commit_planes calls update_freesync_state_on_stream only for the primary plane. If a commit affects a CRTC but not its primary plane, it would previously not trigger a refresh cycle or affect LFC, violating current UAPI semantics. Fixes e.g. atomic commits affecting only the cursor plane being limited to the minimum refresh rate. Don't do this for the legacy cursor ioctls though, it would break the UAPI semantics for those. Suggested-by: Xaver Hugl Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3034 Signed-off-by: Michel Dänzer Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher (cherry picked from commit cc7bfba95966251b254cb970c21627124da3b7f4) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 2551823382f8..010172f930ae 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -661,6 +661,15 @@ static int amdgpu_dm_crtc_helper_atomic_check(struct drm_crtc *crtc, return -EINVAL; } + if (!state->legacy_cursor_update && amdgpu_dm_crtc_vrr_active(dm_crtc_state)) { + struct drm_plane_state *primary_state; + + /* Pull in primary plane for correct VRR handling */ + primary_state = drm_atomic_get_plane_state(state, crtc->primary); + if (IS_ERR(primary_state)) + return PTR_ERR(primary_state); + } + /* In some use cases, like reset, no stream is attached */ if (!dm_crtc_state->stream) return 0; From 1bcf63a44381691d6192872801f830ce3250e367 Mon Sep 17 00:00:00 2001 From: Siyang Liu Date: Fri, 4 Jul 2025 11:16:22 +0800 Subject: [PATCH 1530/2411] drm/amd/display: fix a Null pointer dereference vulnerability [Why] A null pointer dereference vulnerability exists in the AMD display driver's (DC module) cleanup function dc_destruct(). When display control context (dc->ctx) construction fails (due to memory allocation failure), this pointer remains NULL. During subsequent error handling when dc_destruct() is called, there's no NULL check before dereferencing the perf_trace member (dc->ctx->perf_trace), causing a kernel null pointer dereference crash. [How] Check if dc->ctx is non-NULL before dereferencing. Link: https://lore.kernel.org/r/tencent_54FF4252EDFB6533090A491A25EEF3EDBF06@qq.com Co-developed-by: Mario Limonciello Signed-off-by: Mario Limonciello (Updated commit text and removed unnecessary error message) Signed-off-by: Siyang Liu Signed-off-by: Roman Li Reviewed-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 9dd8e2ba268c636c240a918e0a31e6feaee19404) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 28aca7017f0f..9ab0ee20ca6f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -938,17 +938,18 @@ static void dc_destruct(struct dc *dc) if (dc->link_srv) link_destroy_link_service(&dc->link_srv); - if (dc->ctx->gpio_service) - dal_gpio_service_destroy(&dc->ctx->gpio_service); + if (dc->ctx) { + if (dc->ctx->gpio_service) + dal_gpio_service_destroy(&dc->ctx->gpio_service); - if (dc->ctx->created_bios) - dal_bios_parser_destroy(&dc->ctx->dc_bios); + if (dc->ctx->created_bios) + dal_bios_parser_destroy(&dc->ctx->dc_bios); + kfree(dc->ctx->logger); + dc_perf_trace_destroy(&dc->ctx->perf_trace); - kfree(dc->ctx->logger); - dc_perf_trace_destroy(&dc->ctx->perf_trace); - - kfree(dc->ctx); - dc->ctx = NULL; + kfree(dc->ctx); + dc->ctx = NULL; + } kfree(dc->bw_vbios); dc->bw_vbios = NULL; From 8e6a18cbf3ee2c1e3d0afd8d3debd0ba8738ad0c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sun, 20 Jul 2025 23:39:41 -0500 Subject: [PATCH 1531/2411] drm/amd/display: Revert "drm/amd/display: Fix AMDGPU_MAX_BL_LEVEL value" This reverts commit 66abb996999de0d440a02583a6e70c2c24deab45. This broke custom brightness curves but it wasn't obvious because of other related changes. Custom brightness curves are always from a 0-255 input signal. The correct fix was to fix the default value which was done by [1]. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4412 Link: https://lore.kernel.org/amd-gfx/0f094c4b-d2a3-42cd-824c-dc2858a5618d@kernel.org/T/#m69f875a7e69aa22df3370b3e3a9e69f4a61fdaf2 Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 6ec8a5cbec751625133461600d0d4950ffd3a214) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 2a175fc0399c..cd0e2976e268 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4756,16 +4756,16 @@ static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps, return 1; } -/* Rescale from [min..max] to [0..MAX_BACKLIGHT_LEVEL] */ +/* Rescale from [min..max] to [0..AMDGPU_MAX_BL_LEVEL] */ static inline u32 scale_input_to_fw(int min, int max, u64 input) { - return DIV_ROUND_CLOSEST_ULL(input * MAX_BACKLIGHT_LEVEL, max - min); + return DIV_ROUND_CLOSEST_ULL(input * AMDGPU_MAX_BL_LEVEL, max - min); } -/* Rescale from [0..MAX_BACKLIGHT_LEVEL] to [min..max] */ +/* Rescale from [0..AMDGPU_MAX_BL_LEVEL] to [min..max] */ static inline u32 scale_fw_to_input(int min, int max, u64 input) { - return min + DIV_ROUND_CLOSEST_ULL(input * (max - min), MAX_BACKLIGHT_LEVEL); + return min + DIV_ROUND_CLOSEST_ULL(input * (max - min), AMDGPU_MAX_BL_LEVEL); } static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *caps, From c2fe914d50ab22defca14ac6fca33888bfb19843 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 18 Jul 2025 09:25:21 +0530 Subject: [PATCH 1532/2411] drm/amdgpu: Add NULL check for asic_funcs If driver load fails too early, asic_funcs pointer remains unassigned. Add NULL check to sanitize unwind path. Signed-off-by: Lijo Lazar Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 582bf7c5158dce16f7dc5b8345b7876bd8031224) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c index e56ba93a8df6..a974265837f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -55,7 +55,8 @@ u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev) bool amdgpu_nbio_is_replay_cnt_supported(struct amdgpu_device *adev) { - if (amdgpu_sriov_vf(adev) || !adev->asic_funcs->get_pcie_replay_count || + if (amdgpu_sriov_vf(adev) || !adev->asic_funcs || + !adev->asic_funcs->get_pcie_replay_count || (!adev->nbio.funcs || !adev->nbio.funcs->get_pcie_replay_count)) return false; From 124ffa2970087f3b9033a00a4855748514225b9d Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Mon, 4 Aug 2025 08:43:15 +0800 Subject: [PATCH 1533/2411] drm/amdgpu: Update SDMA firmware version check for user queue support This commit fixes a firmware version check for enabling user queue support in SDMA v7.0. The previous version check (7836028) was incorrect and could lead to issues with PROTECTED_FENCE_SIGNAL commands causing register conflicts between MCU_DBG0 and MCU_DBG1. Fixes: 8c011408ed84 ("drm/amdgpu/sdma7: add ucode version checks for userq support") Reviewed-by: Alex Deucher Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher (cherry picked from commit 92e2449241516c95aab95eea91faecd0fa2b7ed5) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index b8b06d4c5882..326ecc8d37d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1353,7 +1353,7 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(7, 0, 0): case IP_VERSION(7, 0, 1): - if ((adev->sdma.instance[0].fw_version >= 7836028) && !adev->sdma.disable_uq) + if ((adev->sdma.instance[0].fw_version >= 7966358) && !adev->sdma.disable_uq) adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; break; default: From ce0b5eedcb753697d43f61dd2e27d68eb5d3150f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 24 Jul 2025 12:49:30 +0200 Subject: [PATCH 1534/2411] x86/irq: Plug vector setup race Hogan reported a vector setup race, which overwrites the interrupt descriptor in the per CPU vector array resulting in a disfunctional device. CPU0 CPU1 interrupt is raised in APIC IRR but not handled free_irq() per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN; request_irq() common_interrupt() d = this_cpu_read(vector_irq[vector]); per_cpu(vector_irq, CPU1)[vector] = desc; if (d == VECTOR_SHUTDOWN) this_cpu_write(vector_irq[vector], VECTOR_UNUSED); free_irq() cannot observe the pending vector in the CPU1 APIC as there is no way to query the remote CPUs APIC IRR. This requires that request_irq() uses the same vector/CPU as the one which was freed, but this also can be triggered by a spurious interrupt. Interestingly enough this problem managed to be hidden for more than a decade. Prevent this by reevaluating vector_irq under the vector lock, which is held by the interrupt activation code when vector_irq is updated. To avoid ifdeffery or IS_ENABLED() nonsense, move the [un]lock_vector_lock() declarations out under the CONFIG_IRQ_DOMAIN_HIERARCHY guard as it's only provided when CONFIG_X86_LOCAL_APIC=y. The current CONFIG_IRQ_DOMAIN_HIERARCHY guard is selected by CONFIG_X86_LOCAL_APIC, but can also be selected by other parts of the Kconfig system, which makes 32-bit UP builds with CONFIG_X86_LOCAL_APIC=n fail. Can we just get rid of this !APIC nonsense once and forever? Fixes: 9345005f4eed ("x86/irq: Fix do_IRQ() interrupt warning for cpu hotplug retriggered irqs") Reported-by: Hogan Wang Signed-off-by: Thomas Gleixner Tested-by: Hogan Wang Link: https://lore.kernel.org/all/draft-87ikjhrhhh.ffs@tglx --- arch/x86/include/asm/hw_irq.h | 12 ++++--- arch/x86/kernel/irq.c | 63 ++++++++++++++++++++++++++--------- 2 files changed, 55 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 162ebd73a698..cbe19e669080 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -92,8 +92,6 @@ struct irq_cfg { extern struct irq_cfg *irq_cfg(unsigned int irq); extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data); -extern void lock_vector_lock(void); -extern void unlock_vector_lock(void); #ifdef CONFIG_SMP extern void vector_schedule_cleanup(struct irq_cfg *); extern void irq_complete_move(struct irq_cfg *cfg); @@ -101,12 +99,16 @@ extern void irq_complete_move(struct irq_cfg *cfg); static inline void vector_schedule_cleanup(struct irq_cfg *c) { } static inline void irq_complete_move(struct irq_cfg *c) { } #endif - extern void apic_ack_edge(struct irq_data *data); -#else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ +#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ + +#ifdef CONFIG_X86_LOCAL_APIC +extern void lock_vector_lock(void); +extern void unlock_vector_lock(void); +#else static inline void lock_vector_lock(void) {} static inline void unlock_vector_lock(void) {} -#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ +#endif /* Statistics */ extern atomic_t irq_err_count; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 9ed29ff10e59..10721a125226 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -256,26 +256,59 @@ static __always_inline void handle_irq(struct irq_desc *desc, __handle_irq(desc, regs); } -static __always_inline int call_irq_handler(int vector, struct pt_regs *regs) +static struct irq_desc *reevaluate_vector(int vector) { - struct irq_desc *desc; - int ret = 0; + struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); + + if (!IS_ERR_OR_NULL(desc)) + return desc; + + if (desc == VECTOR_UNUSED) + pr_emerg_ratelimited("No irq handler for %d.%u\n", smp_processor_id(), vector); + else + __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); + return NULL; +} + +static __always_inline bool call_irq_handler(int vector, struct pt_regs *regs) +{ + struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); - desc = __this_cpu_read(vector_irq[vector]); if (likely(!IS_ERR_OR_NULL(desc))) { handle_irq(desc, regs); - } else { - ret = -EINVAL; - if (desc == VECTOR_UNUSED) { - pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n", - __func__, smp_processor_id(), - vector); - } else { - __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); - } + return true; } - return ret; + /* + * Reevaluate with vector_lock held to prevent a race against + * request_irq() setting up the vector: + * + * CPU0 CPU1 + * interrupt is raised in APIC IRR + * but not handled + * free_irq() + * per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN; + * + * request_irq() common_interrupt() + * d = this_cpu_read(vector_irq[vector]); + * + * per_cpu(vector_irq, CPU1)[vector] = desc; + * + * if (d == VECTOR_SHUTDOWN) + * this_cpu_write(vector_irq[vector], VECTOR_UNUSED); + * + * This requires that the same vector on the same target CPU is + * handed out or that a spurious interrupt hits that CPU/vector. + */ + lock_vector_lock(); + desc = reevaluate_vector(vector); + unlock_vector_lock(); + + if (!desc) + return false; + + handle_irq(desc, regs); + return true; } /* @@ -289,7 +322,7 @@ DEFINE_IDTENTRY_IRQ(common_interrupt) /* entry code tells RCU that we're not quiescent. Check it. */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); - if (unlikely(call_irq_handler(vector, regs))) + if (unlikely(!call_irq_handler(vector, regs))) apic_eoi(); set_irq_regs(old_regs); From 4b31bcb025cb497da2b01f87173108ff32d350d2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 28 Jul 2025 09:31:29 -0700 Subject: [PATCH 1535/2411] eth: fbnic: unlink NAPIs from queues on error to open CI hit a UaF in fbnic in the AF_XDP portion of the queues.py test. The UaF is in the __sk_mark_napi_id_once() call in xsk_bind(), NAPI has been freed. Looks like the device failed to open earlier, and we lack clearing the NAPI pointer from the queue. Fixes: 557d02238e05 ("eth: fbnic: centralize the queue count and NAPI<>queue setting") Reviewed-by: Alexander Duyck Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250728163129.117360-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_netdev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index 7bd7812d9c06..04bb6e7147a2 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -33,7 +33,7 @@ int __fbnic_open(struct fbnic_net *fbn) dev_warn(fbd->dev, "Error %d sending host ownership message to the firmware\n", err); - goto free_resources; + goto err_reset_queues; } err = fbnic_time_start(fbn); @@ -57,6 +57,8 @@ int __fbnic_open(struct fbnic_net *fbn) fbnic_time_stop(fbn); release_ownership: fbnic_fw_xmit_ownership_msg(fbn->fbd, false); +err_reset_queues: + fbnic_reset_netif_queues(fbn); free_resources: fbnic_free_resources(fbn); free_napi_vectors: From 2df158047d532d0e2a6b39953656c738872151a3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 31 Jul 2025 10:00:20 +0200 Subject: [PATCH 1536/2411] ipa: fix compile-testing with qcom-mdt=m There are multiple drivers that use the qualcomm mdt loader, but they have conflicting ideas of how to deal with that dependency when compile-testing for non-qualcomm targets: IPA only enables the MDT loader when the kernel config includes ARCH_QCOM, but the newly added ath12k support always enables it, which leads to a link failure with the combination of IPA=y and ATH12K=m: aarch64-linux-ld: drivers/net/ipa/ipa_main.o: in function `ipa_firmware_load': ipa_main.c:(.text.unlikely+0x134): undefined reference to `qcom_mdt_load The ATH12K method seems more reliable here, so change IPA over to do the same thing. Fixes: 38a4066f593c ("net: ipa: support COMPILE_TEST") Fixes: c0dd3f4f7091 ("wifi: ath12k: enable ath12k AHB support") Signed-off-by: Arnd Bergmann Link: https://patch.msgid.link/20250731080024.2054904-1-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ipa/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/Kconfig b/drivers/net/ipa/Kconfig index 6782c2cbf542..01d219d3760c 100644 --- a/drivers/net/ipa/Kconfig +++ b/drivers/net/ipa/Kconfig @@ -5,7 +5,7 @@ config QCOM_IPA depends on INTERCONNECT depends on QCOM_RPROC_COMMON || (QCOM_RPROC_COMMON=n && COMPILE_TEST) depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n - select QCOM_MDT_LOADER if ARCH_QCOM + select QCOM_MDT_LOADER select QCOM_SCM select QCOM_QMI_HELPERS help From fa516c0d8bf90da9d5b168757162205aafe5d0e1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 31 Jul 2025 18:13:35 -0700 Subject: [PATCH 1537/2411] net: devmem: fix DMA direction on unmapping Looks like we always unmap the DMA_BUF with DMA_FROM_DEVICE direction. While at it unexport __net_devmem_dmabuf_binding_free(), it's internal. Found by code inspection. Fixes: bd61848900bf ("net: devmem: Implement TX path") Acked-by: Stanislav Fomichev Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20250801011335.2267515-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/devmem.c | 6 +++--- net/core/devmem.h | 7 +++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/net/core/devmem.c b/net/core/devmem.c index b3a62ca0df65..24c591ab38ae 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -70,14 +70,13 @@ void __net_devmem_dmabuf_binding_free(struct work_struct *wq) gen_pool_destroy(binding->chunk_pool); dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt, - DMA_FROM_DEVICE); + binding->direction); dma_buf_detach(binding->dmabuf, binding->attachment); dma_buf_put(binding->dmabuf); xa_destroy(&binding->bound_rxqs); kvfree(binding->tx_vec); kfree(binding); } -EXPORT_SYMBOL(__net_devmem_dmabuf_binding_free); struct net_iov * net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) @@ -208,6 +207,7 @@ net_devmem_bind_dmabuf(struct net_device *dev, mutex_init(&binding->lock); binding->dmabuf = dmabuf; + binding->direction = direction; binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent); if (IS_ERR(binding->attachment)) { @@ -312,7 +312,7 @@ net_devmem_bind_dmabuf(struct net_device *dev, kvfree(binding->tx_vec); err_unmap: dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt, - DMA_FROM_DEVICE); + direction); err_detach: dma_buf_detach(dmabuf, binding->attachment); err_free_binding: diff --git a/net/core/devmem.h b/net/core/devmem.h index 0a3b28ba5c13..41cd6e1c9141 100644 --- a/net/core/devmem.h +++ b/net/core/devmem.h @@ -56,6 +56,9 @@ struct net_devmem_dmabuf_binding { */ u32 id; + /* DMA direction, FROM_DEVICE for Rx binding, TO_DEVICE for Tx. */ + enum dma_data_direction direction; + /* Array of net_iov pointers for this binding, sorted by virtual * address. This array is convenient to map the virtual addresses to * net_iovs in the TX path. @@ -165,10 +168,6 @@ static inline void net_devmem_put_net_iov(struct net_iov *niov) { } -static inline void __net_devmem_dmabuf_binding_free(struct work_struct *wq) -{ -} - static inline struct net_devmem_dmabuf_binding * net_devmem_bind_dmabuf(struct net_device *dev, enum dma_data_direction direction, From 4e7e471e2e3f9085fe1dbe821c4dd904a917c66a Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 1 Aug 2025 09:12:25 +0200 Subject: [PATCH 1538/2411] net: airoha: npu: Add missing MODULE_FIRMWARE macros Introduce missing MODULE_FIRMWARE definitions for firmware autoload. Fixes: 23290c7bc190d ("net: airoha: Introduce Airoha NPU support") Signed-off-by: Lorenzo Bianconi Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250801-airoha-npu-missing-module-firmware-v2-1-e860c824d515@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_npu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/airoha/airoha_npu.c b/drivers/net/ethernet/airoha/airoha_npu.c index 9ab964c536e1..a802f95df99d 100644 --- a/drivers/net/ethernet/airoha/airoha_npu.c +++ b/drivers/net/ethernet/airoha/airoha_npu.c @@ -579,6 +579,8 @@ static struct platform_driver airoha_npu_driver = { }; module_platform_driver(airoha_npu_driver); +MODULE_FIRMWARE(NPU_EN7581_FIRMWARE_DATA); +MODULE_FIRMWARE(NPU_EN7581_FIRMWARE_RV32); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Lorenzo Bianconi "); MODULE_DESCRIPTION("Airoha Network Processor Unit driver"); From 5a40f8af2ba1b9bdf46e2db10e8c9710538fbc63 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 1 Aug 2025 12:13:37 +0200 Subject: [PATCH 1539/2411] benet: fix BUG when creating VFs benet crashes as soon as SRIOV VFs are created: kernel BUG at mm/vmalloc.c:3457! Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI CPU: 4 UID: 0 PID: 7408 Comm: test.sh Kdump: loaded Not tainted 6.16.0+ #1 PREEMPT(voluntary) [...] RIP: 0010:vunmap+0x5f/0x70 [...] Call Trace: __iommu_dma_free+0xe8/0x1c0 be_cmd_set_mac_list+0x3fe/0x640 [be2net] be_cmd_set_mac+0xaf/0x110 [be2net] be_vf_eth_addr_config+0x19f/0x330 [be2net] be_vf_setup+0x4f7/0x990 [be2net] be_pci_sriov_configure+0x3a1/0x470 [be2net] sriov_numvfs_store+0x20b/0x380 kernfs_fop_write_iter+0x354/0x530 vfs_write+0x9b9/0xf60 ksys_write+0xf3/0x1d0 do_syscall_64+0x8c/0x3d0 be_cmd_set_mac_list() calls dma_free_coherent() under a spin_lock_bh. Fix it by freeing only after the lock has been released. Fixes: 1a82d19ca2d6 ("be2net: fix sleeping while atomic bugs in be_ndo_bridge_getlink") Signed-off-by: Michal Schmidt Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250801101338.72502-1-mschmidt@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/emulex/benet/be_cmds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index d730af4a50c7..bb5d2fa15736 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -3856,8 +3856,8 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array, status = be_mcc_notify_wait(adapter); err: - dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); spin_unlock_bh(&adapter->mcc_lock); + dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); return status; } From 01d3c8417b9c1b884a8a981a3b886da556512f36 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Fri, 1 Aug 2025 13:54:16 -0400 Subject: [PATCH 1540/2411] net/packet: fix a race in packet_set_ring() and packet_notifier() When packet_set_ring() releases po->bind_lock, another thread can run packet_notifier() and process an NETDEV_UP event. This race and the fix are both similar to that of commit 15fe076edea7 ("net/packet: fix a race in packet_bind() and packet_notifier()"). There too the packet_notifier NETDEV_UP event managed to run while a po->bind_lock critical section had to be temporarily released. And the fix was similarly to temporarily set po->num to zero to keep the socket unhooked until the lock is retaken. The po->bind_lock in packet_set_ring and packet_notifier precede the introduction of git history. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Quang Le Signed-off-by: Willem de Bruijn Link: https://patch.msgid.link/20250801175423.2970334-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index bc438d0d96a7..a7017d7f0927 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4573,10 +4573,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, spin_lock(&po->bind_lock); was_running = packet_sock_flag(po, PACKET_SOCK_RUNNING); num = po->num; - if (was_running) { - WRITE_ONCE(po->num, 0); + WRITE_ONCE(po->num, 0); + if (was_running) __unregister_prot_hook(sk, false); - } + spin_unlock(&po->bind_lock); synchronize_net(); @@ -4608,10 +4608,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, mutex_unlock(&po->pg_vec_lock); spin_lock(&po->bind_lock); - if (was_running) { - WRITE_ONCE(po->num, num); + WRITE_ONCE(po->num, num); + if (was_running) register_prot_hook(sk); - } + spin_unlock(&po->bind_lock); if (pg_vec && (po->tp_version > TPACKET_V2)) { /* Because we don't support block-based V3 on tx-ring */ From 5ef7fdf52c0f2b792802aac3438e67e5ebe7e63d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 1 Aug 2025 11:16:38 -0700 Subject: [PATCH 1541/2411] selftests: net: packetdrill: xfail all problems on slow machines We keep seeing flakes on packetdrill on debug kernels, while non-debug kernels are stable, not a single flake in 200 runs. Time to give up, debug kernels appear to suffer from 10msec latency spikes and any timing-sensitive test is bound to flake. Reviewed-by: Willem de Bruijn Acked-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250801181638.2483531-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../selftests/net/packetdrill/ksft_runner.sh | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index c5b01e1bd4c7..a7e790af38ff 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -35,24 +35,7 @@ failfunc=ktap_test_fail if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then optargs+=('--tolerance_usecs=14000') - - # xfail tests that are known flaky with dbg config, not fixable. - # still run them for coverage (and expect 100% pass without dbg). - declare -ar xfail_list=( - "tcp_blocking_blocking-connect.pkt" - "tcp_blocking_blocking-read.pkt" - "tcp_eor_no-coalesce-retrans.pkt" - "tcp_fast_recovery_prr-ss.*.pkt" - "tcp_sack_sack-route-refresh-ip-tos.pkt" - "tcp_slow_start_slow-start-after-win-update.pkt" - "tcp_timestamping.*.pkt" - "tcp_user_timeout_user-timeout-probe.pkt" - "tcp_zerocopy_cl.*.pkt" - "tcp_zerocopy_epoll_.*.pkt" - "tcp_tcp_info_tcp-info-.*-limited.pkt" - ) - readonly xfail_regex="^($(printf '%s|' "${xfail_list[@]}"))$" - [[ "$script" =~ ${xfail_regex} ]] && failfunc=ktap_test_xfail + failfunc=ktap_test_xfail fi ktap_print_header From 175811b8f05f0da3e19b7d3124666649ddde3802 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 1 Aug 2025 14:27:42 -0700 Subject: [PATCH 1542/2411] Revert "net: mdio_bus: Use devm for getting reset GPIO" This reverts commit 3b98c9352511db627b606477fc7944b2fa53a165. Russell says: Using devm_*() [here] is completely wrong, because this is called from mdiobus_register_device(). This is not the probe function for the device, and thus there is no code to trigger the release of the resource on unregistration. Moreover, when the mdiodev is eventually probed, if the driver fails or the driver is unbound, the GPIO will be released, but a reference will be left behind. Using devm* with a struct device that is *not* currently being probed is fundamentally wrong - an abuse of devm. Reported-by: Mark Brown Link: https://lore.kernel.org/95449490-fa58-41d4-9493-c9213c1f2e7d@sirena.org.uk Suggested-by: Russell King (Oracle) Fixes: 3b98c9352511 ("net: mdio_bus: Use devm for getting reset GPIO") Link: https://patch.msgid.link/20250801212742.2607149-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/mdio_bus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 24bdab5bdd24..fda2e27c1810 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -36,8 +36,8 @@ static int mdiobus_register_gpiod(struct mdio_device *mdiodev) { /* Deassert the optional reset signal */ - mdiodev->reset_gpio = devm_gpiod_get_optional(&mdiodev->dev, - "reset", GPIOD_OUT_LOW); + mdiodev->reset_gpio = gpiod_get_optional(&mdiodev->dev, + "reset", GPIOD_OUT_LOW); if (IS_ERR(mdiodev->reset_gpio)) return PTR_ERR(mdiodev->reset_gpio); From ffd2dc4c6c49ff4f1e5d34e454a6a55608104c17 Mon Sep 17 00:00:00 2001 From: Maher Azzouzi Date: Fri, 1 Aug 2025 17:18:57 -0700 Subject: [PATCH 1543/2411] net/sched: mqprio: fix stack out-of-bounds write in tc entry parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TCA_MQPRIO_TC_ENTRY_INDEX is validated using NLA_POLICY_MAX(NLA_U32, TC_QOPT_MAX_QUEUE), which allows the value TC_QOPT_MAX_QUEUE (16). This leads to a 4-byte out-of-bounds stack write in the fp[] array, which only has room for 16 elements (0–15). Fix this by changing the policy to allow only up to TC_QOPT_MAX_QUEUE - 1. Fixes: f62af20bed2d ("net/sched: mqprio: allow per-TC user input of FP adminStatus") Reviewed-by: Eric Dumazet Signed-off-by: Maher Azzouzi Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250802001857.2702497-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/sched/sch_mqprio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 51d4013b6121..f3e5ef9a9592 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -152,7 +152,7 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt, static const struct nla_policy mqprio_tc_entry_policy[TCA_MQPRIO_TC_ENTRY_MAX + 1] = { [TCA_MQPRIO_TC_ENTRY_INDEX] = NLA_POLICY_MAX(NLA_U32, - TC_QOPT_MAX_QUEUE), + TC_QOPT_MAX_QUEUE - 1), [TCA_MQPRIO_TC_ENTRY_FP] = NLA_POLICY_RANGE(NLA_U32, TC_FP_EXPRESS, TC_FP_PREEMPTIBLE), From 4eabe4cc0958e28ceaf592bbb62c234339642e41 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sat, 2 Aug 2025 17:53:02 +0200 Subject: [PATCH 1544/2411] dpll: zl3073x: ZL3073X_I2C and ZL3073X_SPI should depend on NET When making ZL3073X invisible, it was overlooked that ZL3073X depends on NET, while ZL3073X_I2C and ZL3073X_SPI do not, causing: WARNING: unmet direct dependencies detected for ZL3073X when selected by ZL3073X_I2C WARNING: unmet direct dependencies detected for ZL3073X when selected by ZL3073X_SPI WARNING: unmet direct dependencies detected for ZL3073X Depends on [n]: NET [=n] Selected by [y]: - ZL3073X_I2C [=y] && I2C [=y] Selected by [y]: - ZL3073X_SPI [=y] && SPI [=y] Fix this by adding the missing dependencies to ZL3073X_I2C and ZL3073X_SPI. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202508022110.nTqZ5Ylu-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202508022351.NHIxPF8j-lkp@intel.com/ Fixes: a4f0866e3dbbf3fe ("dpll: Make ZL3073X invisible") Signed-off-by: Geert Uytterhoeven Acked-by: Ivan Vecera Link: https://patch.msgid.link/20250802155302.3673457-1-geert+renesas@glider.be Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dpll/zl3073x/Kconfig b/drivers/dpll/zl3073x/Kconfig index 9915f7423dea..5bbca1400581 100644 --- a/drivers/dpll/zl3073x/Kconfig +++ b/drivers/dpll/zl3073x/Kconfig @@ -16,7 +16,7 @@ config ZL3073X config ZL3073X_I2C tristate "I2C bus implementation for Microchip Azurite devices" - depends on I2C + depends on I2C && NET select REGMAP_I2C select ZL3073X help @@ -28,7 +28,7 @@ config ZL3073X_I2C config ZL3073X_SPI tristate "SPI bus implementation for Microchip Azurite devices" - depends on SPI + depends on NET && SPI select REGMAP_SPI select ZL3073X help From 6b445309eec2bc0594f3e24c7777aeef891d386e Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 31 Jul 2025 20:46:42 -0300 Subject: [PATCH 1545/2411] smb: client: default to nonativesocket under POSIX mounts SMB3.1.1 POSIX mounts require sockets to be created with NFS reparse points. Cc: linux-cifs@vger.kernel.org Cc: Ralph Boehme Cc: David Howells Cc: Reported-by: Matthew Richardson Closes: https://marc.info/?i=1124e7cd-6a46-40a6-9f44-b7664a66654b@ed.ac.uk Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index cc8bd79ebca9..072383899e81 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1652,6 +1652,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, pr_warn_once("conflicting posix mount options specified\n"); ctx->linux_ext = 1; ctx->no_linux_ext = 0; + ctx->nonativesocket = 1; /* POSIX mounts use NFS style reparse points */ } break; case Opt_nocase: From 5b432ae5dff5eb2e6acd55473309fdd5c16ff779 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 31 Jul 2025 20:46:43 -0300 Subject: [PATCH 1546/2411] smb: client: fix creating symlinks under POSIX mounts SMB3.1.1 POSIX mounts support native symlinks that are created with IO_REPARSE_TAG_SYMLINK reparse points, so skip the checking of FILE_SUPPORTS_REPARSE_POINTS as some servers might not have it set. Cc: linux-cifs@vger.kernel.org Cc: Ralph Boehme Cc: David Howells Cc: Reported-by: Matthew Richardson Closes: https://marc.info/?i=1124e7cd-6a46-40a6-9f44-b7664a66654b@ed.ac.uk Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 5 +++++ fs/smb/client/cifssmb.c | 4 ++-- fs/smb/client/link.c | 2 +- fs/smb/client/smb1ops.c | 2 +- fs/smb/client/smb2inode.c | 5 ++--- fs/smb/client/smb2ops.c | 5 ++--- 6 files changed, 13 insertions(+), 10 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 19dd901fe8ab..a97e2cca2f53 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -2377,4 +2377,9 @@ static inline bool cifs_netbios_name(const char *name, size_t namelen) return ret; } +#define CIFS_REPARSE_SUPPORT(tcon) \ + ((tcon)->posix_extensions || \ + (le32_to_cpu((tcon)->fsAttrInfo.Attributes) & \ + FILE_SUPPORTS_REPARSE_POINTS)) + #endif /* _CIFS_GLOB_H */ diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 6c890db06593..d20766f664c4 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -2751,7 +2751,7 @@ int cifs_query_reparse_point(const unsigned int xid, if (cap_unix(tcon->ses)) return -EOPNOTSUPP; - if (!(le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS)) + if (!CIFS_REPARSE_SUPPORT(tcon)) return -EOPNOTSUPP; oparms = (struct cifs_open_parms) { @@ -2879,7 +2879,7 @@ struct inode *cifs_create_reparse_inode(struct cifs_open_info_data *data, * attempt to create reparse point. This will prevent creating unusable * empty object on the server. */ - if (!(le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS)) + if (!CIFS_REPARSE_SUPPORT(tcon)) return ERR_PTR(-EOPNOTSUPP); #ifndef CONFIG_CIFS_XATTR diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c index afe76367d2c8..fe80e711cd75 100644 --- a/fs/smb/client/link.c +++ b/fs/smb/client/link.c @@ -635,7 +635,7 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, case CIFS_SYMLINK_TYPE_NATIVE: case CIFS_SYMLINK_TYPE_NFS: case CIFS_SYMLINK_TYPE_WSL: - if (le32_to_cpu(pTcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS) { + if (CIFS_REPARSE_SUPPORT(pTcon)) { rc = create_reparse_symlink(xid, inode, direntry, pTcon, full_path, symname); goto symlink_exit; diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index e364b6515af3..f722c7f47b07 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -1272,7 +1272,7 @@ cifs_make_node(unsigned int xid, struct inode *inode, */ return cifs_sfu_make_node(xid, inode, dentry, tcon, full_path, mode, dev); - } else if (le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS) { + } else if (CIFS_REPARSE_SUPPORT(tcon)) { /* * mknod via reparse points requires server support for * storing reparse points, which is available since diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 69d251726c02..2a0316c514e4 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -1346,9 +1346,8 @@ struct inode *smb2_create_reparse_inode(struct cifs_open_info_data *data, * attempt to create reparse point. This will prevent creating unusable * empty object on the server. */ - if (!(le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS)) - if (!tcon->posix_extensions) - return ERR_PTR(-EOPNOTSUPP); + if (!CIFS_REPARSE_SUPPORT(tcon)) + return ERR_PTR(-EOPNOTSUPP); oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, SYNCHRONIZE | DELETE | diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 1b4a31894f43..bd6c1fb2a992 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -5260,10 +5260,9 @@ static int smb2_make_node(unsigned int xid, struct inode *inode, if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { rc = cifs_sfu_make_node(xid, inode, dentry, tcon, full_path, mode, dev); - } else if ((le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS) - || (tcon->posix_extensions)) { + } else if (CIFS_REPARSE_SUPPORT(tcon)) { rc = mknod_reparse(xid, inode, dentry, tcon, - full_path, mode, dev); + full_path, mode, dev); } return rc; } From cb9f6a40382ca7b7a81d6f52285f897b09b5851b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 2 Aug 2025 12:59:13 +0200 Subject: [PATCH 1547/2411] irqchip/riscv-imsic: Don't dereference before NULL pointer check smatch warns about a dereference before check: drivers/irqchip/irq-riscv-imsic-platform.c:317 imsic_irqdomain_init() warn: variable dereferenced before check 'imsic' (see line 311) Cure it by moving the firmware not assignement after the checks. Fixes: 59422904dd98 ("irqchip/riscv-imsic: Convert to msi_create_parent_irq_domain() helper") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Thomas Gleixner Closes: https://lore.kernel.org/r/202507311953.NFVZkr0a-lkp@intel.com/ --- drivers/irqchip/irq-riscv-imsic-platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- drivers/irqchip/irq-riscv-imsic-platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-riscv-imsic-platform.c b/drivers/irqchip/irq-riscv-imsic-platform.c index 74a2a28f9403..643c8e459611 100644 --- a/drivers/irqchip/irq-riscv-imsic-platform.c +++ b/drivers/irqchip/irq-riscv-imsic-platform.c @@ -308,7 +308,6 @@ static const struct msi_parent_ops imsic_msi_parent_ops = { int imsic_irqdomain_init(void) { struct irq_domain_info info = { - .fwnode = imsic->fwnode, .ops = &imsic_base_domain_ops, .host_data = imsic, }; @@ -325,6 +324,7 @@ int imsic_irqdomain_init(void) } /* Create Base IRQ domain */ + info.fwnode = imsic->fwnode, imsic->base_domain = msi_create_parent_irq_domain(&info, &imsic_msi_parent_ops); if (!imsic->base_domain) { pr_err("%pfwP: failed to create IMSIC base domain\n", imsic->fwnode); From 02cbf8e0692bd30717b35a3ff5e46460d1d5d471 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 4 Aug 2025 16:55:53 +0200 Subject: [PATCH 1548/2411] irqchip/msi-lib: Fix fwnode refcount in msi_lib_irq_domain_select() Commit 8b65db1e93a2 ("irqchip/msi-lib: Add IRQ_DOMAIN_FLAG_FWNODE_PARENT handling") added logic in msi_lib_irq_domain_select() to match the domain fwnode against the fwnode parent of the fwspec.fwnode. The fwnode_get_parent() caller must call fwnode_handle_put() on the returned pointer value, lest fwnode refcounting for the parent ends up being out of kilter. Fix this by relying on the fwnode_handle clean-up handlers and by incrementing the fwnode refcount regardless of whether parent matching is used or not (the domain selection code already holds a reference before calling msi_lib_irq_domain_select() but to make the exit path more uniform if IRQ_DOMAIN_FLAG_FWNODE_PARENT is not set fwnode_handle_get() is called again on fwspec.fwnode so that the clean-up code is the same for the two matching patterns). Fixes: 8b65db1e93a2 ("irqchip/msi-lib: Add IRQ_DOMAIN_FLAG_FWNODE_PARENT handling") Signed-off-by: Lorenzo Pieralisi Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250804145553.795065-1-lpieralisi@kernel.org --- drivers/irqchip/irq-msi-lib.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-msi-lib.c b/drivers/irqchip/irq-msi-lib.c index 454c7f16dd4d..908944009c21 100644 --- a/drivers/irqchip/irq-msi-lib.c +++ b/drivers/irqchip/irq-msi-lib.c @@ -133,13 +133,13 @@ int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, { const struct msi_parent_ops *ops = d->msi_parent_ops; u32 busmask = BIT(bus_token); - struct fwnode_handle *fwh; if (!ops) return 0; - fwh = d->flags & IRQ_DOMAIN_FLAG_FWNODE_PARENT ? fwnode_get_parent(fwspec->fwnode) - : fwspec->fwnode; + struct fwnode_handle *fwh __free(fwnode_handle) = + d->flags & IRQ_DOMAIN_FLAG_FWNODE_PARENT ? fwnode_get_parent(fwspec->fwnode) + : fwnode_handle_get(fwspec->fwnode); if (fwh != d->fwnode || fwspec->param_count != 0) return 0; From 3c3d7dbab2c70a4bca47634d564bf659351c05ca Mon Sep 17 00:00:00 2001 From: Elad Nachman Date: Sun, 3 Aug 2025 13:25:48 +0300 Subject: [PATCH 1549/2411] irqchip/mvebu-gicp: Clear pending interrupts on init When a kexec'ed kernel boots up, there might be stale unhandled interrupts pending in the interrupt controller. These are delivered as spurious interrupts once the boot CPU enables interrupts. Clear all pending interrupts when the driver is initialized to prevent these spurious interrupts from locking the CPU in an endless loop. Signed-off-by: Elad Nachman Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250803102548.669682-2-enachman@marvell.com --- drivers/irqchip/irq-mvebu-gicp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/irqchip/irq-mvebu-gicp.c b/drivers/irqchip/irq-mvebu-gicp.c index d3232d6d8dce..fd85c845e015 100644 --- a/drivers/irqchip/irq-mvebu-gicp.c +++ b/drivers/irqchip/irq-mvebu-gicp.c @@ -177,6 +177,7 @@ static int mvebu_gicp_probe(struct platform_device *pdev) .ops = &gicp_domain_ops, }; struct mvebu_gicp *gicp; + void __iomem *base; int ret, i; gicp = devm_kzalloc(&pdev->dev, sizeof(*gicp), GFP_KERNEL); @@ -236,6 +237,15 @@ static int mvebu_gicp_probe(struct platform_device *pdev) return -ENODEV; } + base = ioremap(gicp->res->start, gicp->res->end - gicp->res->start); + if (IS_ERR(base)) { + dev_err(&pdev->dev, "ioremap() failed. Unable to clear pending interrupts.\n"); + } else { + for (i = 0; i < 64; i++) + writel(i, base + GICP_CLRSPI_NSR_OFFSET); + iounmap(base); + } + return msi_create_parent_irq_domain(&info, &gicp_msi_parent_ops) ? 0 : -ENOMEM; } From a8913d54ab1f9ed871b4e45a7c8a4f7a9949d071 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 1 Aug 2025 09:58:18 +0200 Subject: [PATCH 1550/2411] irqchip/gic-v5: iwb: Fix iounmap probe failure path The 0-day bot reported that on the failure path the driver iounmap()s IWB resources that are managed through devm_ioremap(), which is clearly wrong because the driver would end up unmapping the MMIO resource twice on probing failure. Fix this by removing the error path altogether and by letting devres manage the iounmapping on clean-up. Fixes: 695949d8b16f ("irqchip/gic-v5: Add GICv5 IWB support") Reported-by: kernel test robot Signed-off-by: Lorenzo Pieralisi Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/all/20250801-gic-v5-fixes-6-17-v1-1-4fcedaccf9e6@kernel.org Closes: https://lore.kernel.org/oe-kbuild-all/202508010038.N3r4ZmII-lkp@intel.com --- drivers/irqchip/irq-gic-v5-iwb.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/irqchip/irq-gic-v5-iwb.c b/drivers/irqchip/irq-gic-v5-iwb.c index ed72fbdd4900..ad9fdc14d1c6 100644 --- a/drivers/irqchip/irq-gic-v5-iwb.c +++ b/drivers/irqchip/irq-gic-v5-iwb.c @@ -241,7 +241,6 @@ static int gicv5_iwb_device_probe(struct platform_device *pdev) struct gicv5_iwb_chip_data *iwb_node; void __iomem *iwb_base; struct resource *res; - int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) @@ -254,16 +253,10 @@ static int gicv5_iwb_device_probe(struct platform_device *pdev) } iwb_node = gicv5_iwb_init_bases(iwb_base, pdev); - if (IS_ERR(iwb_node)) { - ret = PTR_ERR(iwb_node); - goto out_unmap; - } + if (IS_ERR(iwb_node)) + return PTR_ERR(iwb_node); return 0; - -out_unmap: - iounmap(iwb_base); - return ret; } static const struct of_device_id gicv5_iwb_of_match[] = { From 9ba0a63badc8e74ac0d490f9113300dda0ce2c19 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 1 Aug 2025 09:58:20 +0200 Subject: [PATCH 1551/2411] irqchip/gic-v5: Remove IRQD_RESEND_WHEN_IN_PROGRESS for ITS IRQs GICv5 LPI interrupts have an active state hence they cannot retrigger while the interrupt is being handled. Therefore, setting the IRQD_RESEND_WHEN_IN_PROGRESS flag on LPIs is pointless, as the situation this flag caters for cannot happen. Remove it. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/all/20250801-gic-v5-fixes-6-17-v1-3-4fcedaccf9e6@kernel.org --- drivers/irqchip/irq-gic-v5-its.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v5-its.c b/drivers/irqchip/irq-gic-v5-its.c index 340640fdbdf6..9290ac741949 100644 --- a/drivers/irqchip/irq-gic-v5-its.c +++ b/drivers/irqchip/irq-gic-v5-its.c @@ -973,7 +973,6 @@ static int gicv5_its_irq_domain_alloc(struct irq_domain *domain, unsigned int vi irqd = irq_get_irq_data(virq + i); irqd_set_single_target(irqd); irqd_set_affinity_on_activate(irqd); - irqd_set_resend_when_in_progress(irqd); } return 0; From 6a20f9fca30c4047488a616b5225acb82367ef6b Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 29 Jul 2025 15:39:16 +0800 Subject: [PATCH 1552/2411] vhost: initialize vq->nheads properly Commit 7918bb2d19c9 ("vhost: basic in order support") introduces vq->nheads to store the number of batched used buffers per used elem but it forgets to initialize the vq->nheads to NULL in vhost_dev_init() this will cause kfree() that would try to free it without be allocated if SET_OWNER is not called. Reported-by: JAEHOON KIM Reported-by: Breno Leitao Fixes: 45347e79b544 ("vhost: basic in order support") Signed-off-by: Jason Wang Message-Id: <20250729073916.80647-1-jasowang@redhat.com> Reviewed-by: Dawid Osuchowski Tested-by: Breno Leitao Reviewed-by: Stefano Garzarella Tested-by: Jaehoon Kim Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 23286e4d7b49..8570fdf2e14a 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -615,6 +615,7 @@ void vhost_dev_init(struct vhost_dev *dev, vq->log = NULL; vq->indirect = NULL; vq->heads = NULL; + vq->nheads = NULL; vq->dev = dev; mutex_init(&vq->mutex); vhost_vq_reset(dev, vq); From eec8e8c048caa826ecbde7bf40f0ac2d11eef99d Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 1 Aug 2025 13:46:41 +0300 Subject: [PATCH 1553/2411] drm/bridge: document HDMI CEC callbacks Provide documentation for the drm_bridge callbacks related to the DRM_BRIDGE_OP_HDMI_CEC_ADAPTER flag. Fixes: a74288c8ded7 ("drm/display: bridge-connector: handle CEC adapters") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/r/20250611140933.1429a1b8@canb.auug.org.au Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250801-drm-hdmi-cec-docs-v1-1-be63e6008d0e@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- include/drm/drm_bridge.h | 48 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 8ed80cad77ec..b0e6653ee42e 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -866,13 +866,61 @@ struct drm_bridge_funcs { struct drm_connector *connector, bool enable, int direction); + /** + * @hdmi_cec_init: + * + * Initialize CEC part of the bridge. + * + * This callback is optional, it can be implemented by bridges that + * set the @DRM_BRIDGE_OP_HDMI_CEC_ADAPTER flag in their + * &drm_bridge->ops. + * + * Returns: + * 0 on success, a negative error code otherwise + */ int (*hdmi_cec_init)(struct drm_bridge *bridge, struct drm_connector *connector); + /** + * @hdmi_cec_enable: + * + * Enable or disable the CEC adapter inside the bridge. + * + * This callback is optional, it can be implemented by bridges that + * set the @DRM_BRIDGE_OP_HDMI_CEC_ADAPTER flag in their + * &drm_bridge->ops. + * + * Returns: + * 0 on success, a negative error code otherwise + */ int (*hdmi_cec_enable)(struct drm_bridge *bridge, bool enable); + /** + * @hdmi_cec_log_addr: + * + * Set the logical address of the CEC adapter inside the bridge. + * + * This callback is optional, it can be implemented by bridges that + * set the @DRM_BRIDGE_OP_HDMI_CEC_ADAPTER flag in their + * &drm_bridge->ops. + * + * Returns: + * 0 on success, a negative error code otherwise + */ int (*hdmi_cec_log_addr)(struct drm_bridge *bridge, u8 logical_addr); + /** + * @hdmi_cec_transmit: + * + * Transmit the message using the CEC adapter inside the bridge. + * + * This callback is optional, it can be implemented by bridges that + * set the @DRM_BRIDGE_OP_HDMI_CEC_ADAPTER flag in their + * &drm_bridge->ops. + * + * Returns: + * 0 on success, a negative error code otherwise + */ int (*hdmi_cec_transmit)(struct drm_bridge *bridge, u8 attempts, u32 signal_free_time, struct cec_msg *msg); From ed1a1fe6ec5e73b23b310b434ace07d1e5060657 Mon Sep 17 00:00:00 2001 From: Chenghao Duan Date: Tue, 5 Aug 2025 19:00:18 +0800 Subject: [PATCH 1554/2411] LoongArch: BPF: Rename and refactor validate_code() 1. Rename the existing validate_code() to validate_ctx() 2. Factor out the code validation handling into a new helper validate_code() Then: * validate_code() is used to check the validity of code. * validate_ctx() is used to check both code validity and table entry correctness. The new validate_code() will be used in subsequent changes. Reviewed-by: Hengqi Chen Co-developed-by: George Guo Signed-off-by: George Guo Signed-off-by: Chenghao Duan Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index fa1500d4aa3e..7032f11d3a21 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1180,6 +1180,14 @@ static int validate_code(struct jit_ctx *ctx) return -1; } + return 0; +} + +static int validate_ctx(struct jit_ctx *ctx) +{ + if (validate_code(ctx)) + return -1; + if (WARN_ON_ONCE(ctx->num_exentries != ctx->prog->aux->num_exentries)) return -1; @@ -1288,7 +1296,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) build_epilogue(&ctx); /* 3. Extra pass to validate JITed code */ - if (validate_code(&ctx)) { + if (validate_ctx(&ctx)) { bpf_jit_binary_free(header); prog = orig_prog; goto out_offset; From 9fbd18cf4c69f512f7de3ab73235078f3e32ecec Mon Sep 17 00:00:00 2001 From: Chenghao Duan Date: Tue, 5 Aug 2025 19:00:18 +0800 Subject: [PATCH 1555/2411] LoongArch: BPF: Add dynamic code modification support This commit adds support for BPF dynamic code modification on the LoongArch architecture: 1. Add bpf_arch_text_copy() for instruction block copying. 2. Add bpf_arch_text_poke() for runtime instruction patching. 3. Add bpf_arch_text_invalidate() for code invalidation. On LoongArch, since symbol addresses in the direct mapping region can't be reached via relative jump instructions from the paged mapping region, we use the move_imm+jirl instruction pair as absolute jump instructions. These require 2-5 instructions, so we reserve 5 NOP instructions in the program as placeholders for function jumps. The larch_insn_text_copy() function is solely used for BPF. And the use of larch_insn_text_copy() requires PAGE_SIZE alignment. Currently, only the size of the BPF trampoline is page-aligned. Co-developed-by: George Guo Signed-off-by: George Guo Signed-off-by: Chenghao Duan Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/inst.h | 1 + arch/loongarch/kernel/inst.c | 46 +++++++++++++ arch/loongarch/net/bpf_jit.c | 105 +++++++++++++++++++++++++++++- 3 files changed, 151 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 47d190595587..277d2140676b 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -497,6 +497,7 @@ void arch_simulate_insn(union loongarch_instruction insn, struct pt_regs *regs); int larch_insn_read(void *addr, u32 *insnp); int larch_insn_write(void *addr, u32 insn); int larch_insn_patch_text(void *addr, u32 insn); +int larch_insn_text_copy(void *dst, void *src, size_t len); u32 larch_insn_gen_nop(void); u32 larch_insn_gen_b(unsigned long pc, unsigned long dest); diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index e957b0f69688..72ecfed29d55 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -4,6 +4,8 @@ */ #include #include +#include +#include #include #include @@ -218,6 +220,50 @@ int larch_insn_patch_text(void *addr, u32 insn) return ret; } +struct insn_copy { + void *dst; + void *src; + size_t len; + unsigned int cpu; +}; + +static int text_copy_cb(void *data) +{ + int ret = 0; + struct insn_copy *copy = data; + + if (smp_processor_id() == copy->cpu) { + ret = copy_to_kernel_nofault(copy->dst, copy->src, copy->len); + if (ret) + pr_err("%s: operation failed\n", __func__); + } + + flush_icache_range((unsigned long)copy->dst, (unsigned long)copy->dst + copy->len); + + return ret; +} + +int larch_insn_text_copy(void *dst, void *src, size_t len) +{ + int ret = 0; + size_t start, end; + struct insn_copy copy = { + .dst = dst, + .src = src, + .len = len, + .cpu = smp_processor_id(), + }; + + start = round_down((size_t)dst, PAGE_SIZE); + end = round_up((size_t)dst + len, PAGE_SIZE); + + set_memory_rw(start, (end - start) / PAGE_SIZE); + ret = stop_machine(text_copy_cb, ©, cpu_online_mask); + set_memory_rox(start, (end - start) / PAGE_SIZE); + + return ret; +} + u32 larch_insn_gen_nop(void) { return INSN_NOP; diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 7032f11d3a21..7d56300e1296 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -4,8 +4,12 @@ * * Copyright (C) 2022 Loongson Technology Corporation Limited */ +#include #include "bpf_jit.h" +#define LOONGARCH_LONG_JUMP_NINSNS 5 +#define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) + #define REG_TCC LOONGARCH_GPR_A6 #define TCC_SAVED LOONGARCH_GPR_S5 @@ -88,7 +92,7 @@ static u8 tail_call_reg(struct jit_ctx *ctx) */ static void build_prologue(struct jit_ctx *ctx) { - int stack_adjust = 0, store_offset, bpf_stack_adjust; + int i, stack_adjust = 0, store_offset, bpf_stack_adjust; bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); @@ -98,6 +102,10 @@ static void build_prologue(struct jit_ctx *ctx) stack_adjust = round_up(stack_adjust, 16); stack_adjust += bpf_stack_adjust; + /* Reserve space for the move_imm + jirl instruction */ + for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) + emit_insn(ctx, nop); + /* * First instruction initializes the tail call count (TCC). * On tail call we skip this instruction, and the TCC is @@ -1194,6 +1202,101 @@ static int validate_ctx(struct jit_ctx *ctx) return 0; } +static int emit_jump_and_link(struct jit_ctx *ctx, u8 rd, u64 target) +{ + if (!target) { + pr_err("bpf_jit: jump target address is error\n"); + return -EFAULT; + } + + move_imm(ctx, LOONGARCH_GPR_T1, target, false); + emit_insn(ctx, jirl, rd, LOONGARCH_GPR_T1, 0); + + return 0; +} + +static int emit_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call) +{ + int i; + struct jit_ctx ctx; + + ctx.idx = 0; + ctx.image = (union loongarch_instruction *)insns; + + if (!target) { + for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) + emit_insn((&ctx), nop); + return 0; + } + + return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_T0 : LOONGARCH_GPR_ZERO, (u64)target); +} + +void *bpf_arch_text_copy(void *dst, void *src, size_t len) +{ + int ret; + + mutex_lock(&text_mutex); + ret = larch_insn_text_copy(dst, src, len); + mutex_unlock(&text_mutex); + + return ret ? ERR_PTR(-EINVAL) : dst; +} + +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, + void *old_addr, void *new_addr) +{ + int ret; + bool is_call = (poke_type == BPF_MOD_CALL); + u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; + u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; + + if (!is_kernel_text((unsigned long)ip) && + !is_bpf_text_address((unsigned long)ip)) + return -ENOTSUPP; + + ret = emit_jump_or_nops(old_addr, ip, old_insns, is_call); + if (ret) + return ret; + + if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES)) + return -EFAULT; + + ret = emit_jump_or_nops(new_addr, ip, new_insns, is_call); + if (ret) + return ret; + + mutex_lock(&text_mutex); + if (memcmp(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES)) + ret = larch_insn_text_copy(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES); + mutex_unlock(&text_mutex); + + return ret; +} + +int bpf_arch_text_invalidate(void *dst, size_t len) +{ + int i; + int ret = 0; + u32 *inst; + + inst = kvmalloc(len, GFP_KERNEL); + if (!inst) + return -ENOMEM; + + for (i = 0; i < (len / sizeof(u32)); i++) + inst[i] = INSN_BREAK; + + mutex_lock(&text_mutex); + if (larch_insn_text_copy(dst, inst, len)) + ret = -EINVAL; + mutex_unlock(&text_mutex); + + kvfree(inst); + + return ret; +} + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { bool tmp_blinded = false, extra_pass = false; From f9b6b41f0cf31791541cea9644ddbedb46465801 Mon Sep 17 00:00:00 2001 From: Chenghao Duan Date: Tue, 5 Aug 2025 19:00:18 +0800 Subject: [PATCH 1556/2411] LoongArch: BPF: Add basic bpf trampoline support BPF trampoline is the critical infrastructure of the BPF subsystem, acting as a mediator between kernel functions and BPF programs. Numerous important features, such as using BPF program for zero overhead kernel introspection, rely on this key component. The related tests have passed, including the following technical points: 1. fentry 2. fmod_ret 3. fexit The following related testcases passed on LoongArch: sudo ./test_progs -a fentry_test/fentry sudo ./test_progs -a fexit_test/fexit sudo ./test_progs -a fentry_fexit sudo ./test_progs -a modify_return sudo ./test_progs -a fexit_sleep sudo ./test_progs -a test_overhead sudo ./test_progs -a trampoline_count This issue was first reported by Geliang Tang in June 2024 while debugging MPTCP BPF selftests on a LoongArch machine (see commit eef0532e900c "selftests/bpf: Null checks for links in bpf_tcp_ca"). Geliang, Huacai, and Tiezhu then worked together to drive the implementation of this feature, encouraging broader collaboration among Chinese kernel engineers. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507100034.wXofj6VX-lkp@intel.com/ Reported-by: Geliang Tang Tested-by: Tiezhu Yang Tested-by: Vincent Li Co-developed-by: George Guo Signed-off-by: George Guo Signed-off-by: Chenghao Duan Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 377 +++++++++++++++++++++++++++++++++++ arch/loongarch/net/bpf_jit.h | 6 + 2 files changed, 383 insertions(+) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 7d56300e1296..052136abb8ab 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -7,9 +7,15 @@ #include #include "bpf_jit.h" +#define LOONGARCH_MAX_REG_ARGS 8 + #define LOONGARCH_LONG_JUMP_NINSNS 5 #define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) +#define LOONGARCH_FENTRY_NINSNS 2 +#define LOONGARCH_FENTRY_NBYTES (LOONGARCH_FENTRY_NINSNS * 4) +#define LOONGARCH_BPF_FENTRY_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) + #define REG_TCC LOONGARCH_GPR_A6 #define TCC_SAVED LOONGARCH_GPR_S5 @@ -1232,6 +1238,11 @@ static int emit_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call) return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_T0 : LOONGARCH_GPR_ZERO, (u64)target); } +static int emit_call(struct jit_ctx *ctx, u64 addr) +{ + return emit_jump_and_link(ctx, LOONGARCH_GPR_RA, addr); +} + void *bpf_arch_text_copy(void *dst, void *src, size_t len) { int ret; @@ -1297,6 +1308,372 @@ int bpf_arch_text_invalidate(void *dst, size_t len) return ret; } +static void store_args(struct jit_ctx *ctx, int nargs, int args_off) +{ + int i; + + for (i = 0; i < nargs; i++) { + emit_insn(ctx, std, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off); + args_off -= 8; + } +} + +static void restore_args(struct jit_ctx *ctx, int nargs, int args_off) +{ + int i; + + for (i = 0; i < nargs; i++) { + emit_insn(ctx, ldd, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off); + args_off -= 8; + } +} + +static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, + int args_off, int retval_off, int run_ctx_off, bool save_ret) +{ + int ret; + u32 *branch; + struct bpf_prog *p = l->link.prog; + int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); + + if (l->cookie) { + move_imm(ctx, LOONGARCH_GPR_T1, l->cookie, false); + emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off); + } else { + emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off); + } + + /* arg1: prog */ + move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false); + /* arg2: &run_ctx */ + emit_insn(ctx, addid, LOONGARCH_GPR_A1, LOONGARCH_GPR_FP, -run_ctx_off); + ret = emit_call(ctx, (const u64)bpf_trampoline_enter(p)); + if (ret) + return ret; + + /* store prog start time */ + move_reg(ctx, LOONGARCH_GPR_S1, LOONGARCH_GPR_A0); + + /* + * if (__bpf_prog_enter(prog) == 0) + * goto skip_exec_of_prog; + */ + branch = (u32 *)ctx->image + ctx->idx; + /* nop reserved for conditional jump */ + emit_insn(ctx, nop); + + /* arg1: &args_off */ + emit_insn(ctx, addid, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -args_off); + if (!p->jited) + move_imm(ctx, LOONGARCH_GPR_A1, (const s64)p->insnsi, false); + ret = emit_call(ctx, (const u64)p->bpf_func); + if (ret) + return ret; + + if (save_ret) { + emit_insn(ctx, std, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off); + emit_insn(ctx, std, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8)); + } + + /* update branch with beqz */ + if (ctx->image) { + int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branch; + *branch = larch_insn_gen_beq(LOONGARCH_GPR_A0, LOONGARCH_GPR_ZERO, offset); + } + + /* arg1: prog */ + move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false); + /* arg2: prog start time */ + move_reg(ctx, LOONGARCH_GPR_A1, LOONGARCH_GPR_S1); + /* arg3: &run_ctx */ + emit_insn(ctx, addid, LOONGARCH_GPR_A2, LOONGARCH_GPR_FP, -run_ctx_off); + ret = emit_call(ctx, (const u64)bpf_trampoline_exit(p)); + + return ret; +} + +static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, + int args_off, int retval_off, int run_ctx_off, u32 **branches) +{ + int i; + + emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -retval_off); + for (i = 0; i < tl->nr_links; i++) { + invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, run_ctx_off, true); + emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -retval_off); + branches[i] = (u32 *)ctx->image + ctx->idx; + emit_insn(ctx, nop); + } +} + +void *arch_alloc_bpf_trampoline(unsigned int size) +{ + return bpf_prog_pack_alloc(size, jit_fill_hole); +} + +void arch_free_bpf_trampoline(void *image, unsigned int size) +{ + bpf_prog_pack_free(image, size); +} + +static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, + const struct btf_func_model *m, struct bpf_tramp_links *tlinks, + void *func_addr, u32 flags) +{ + int i, ret, save_ret; + int stack_size = 0, nargs = 0; + int retval_off, args_off, nargs_off, ip_off, run_ctx_off, sreg_off; + void *orig_call = func_addr; + struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; + struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; + struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; + u32 **branches = NULL; + + if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY)) + return -ENOTSUPP; + + /* + * FP + 8 [ RA to parent func ] return address to parent + * function + * FP + 0 [ FP of parent func ] frame pointer of parent + * function + * FP - 8 [ T0 to traced func ] return address of traced + * function + * FP - 16 [ FP of traced func ] frame pointer of traced + * function + * + * FP - retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or + * BPF_TRAMP_F_RET_FENTRY_RET + * [ argN ] + * [ ... ] + * FP - args_off [ arg1 ] + * + * FP - nargs_off [ regs count ] + * + * FP - ip_off [ traced func ] BPF_TRAMP_F_IP_ARG + * + * FP - run_ctx_off [ bpf_tramp_run_ctx ] + * + * FP - sreg_off [ callee saved reg ] + * + */ + + if (m->nr_args > LOONGARCH_MAX_REG_ARGS) + return -ENOTSUPP; + + if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY)) + return -ENOTSUPP; + + stack_size = 0; + + /* Room of trampoline frame to store return address and frame pointer */ + stack_size += 16; + + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); + if (save_ret) { + /* Save BPF R0 and A0 */ + stack_size += 16; + retval_off = stack_size; + } + + /* Room of trampoline frame to store args */ + nargs = m->nr_args; + stack_size += nargs * 8; + args_off = stack_size; + + /* Room of trampoline frame to store args number */ + stack_size += 8; + nargs_off = stack_size; + + /* Room of trampoline frame to store ip address */ + if (flags & BPF_TRAMP_F_IP_ARG) { + stack_size += 8; + ip_off = stack_size; + } + + /* Room of trampoline frame to store struct bpf_tramp_run_ctx */ + stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); + run_ctx_off = stack_size; + + stack_size += 8; + sreg_off = stack_size; + + stack_size = round_up(stack_size, 16); + + /* For the trampoline called from function entry */ + /* RA and FP for parent function */ + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -16); + emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8); + emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0); + emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 16); + + /* RA and FP for traced function */ + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size); + emit_insn(ctx, std, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8); + emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); + emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size); + + /* callee saved register S1 to pass start time */ + emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off); + + /* store ip address of the traced function */ + if (flags & BPF_TRAMP_F_IP_ARG) { + move_imm(ctx, LOONGARCH_GPR_T1, (const s64)func_addr, false); + emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -ip_off); + } + + /* store nargs number */ + move_imm(ctx, LOONGARCH_GPR_T1, nargs, false); + emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -nargs_off); + + store_args(ctx, nargs, args_off); + + /* To traced function */ + /* Ftrace jump skips 2 NOP instructions */ + if (is_kernel_text((unsigned long)orig_call)) + orig_call += LOONGARCH_FENTRY_NBYTES; + /* Direct jump skips 5 NOP instructions */ + else if (is_bpf_text_address((unsigned long)orig_call)) + orig_call += LOONGARCH_BPF_FENTRY_NBYTES; + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + move_imm(ctx, LOONGARCH_GPR_A0, (const s64)im, false); + ret = emit_call(ctx, (const u64)__bpf_tramp_enter); + if (ret) + return ret; + } + + for (i = 0; i < fentry->nr_links; i++) { + ret = invoke_bpf_prog(ctx, fentry->links[i], args_off, retval_off, + run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET); + if (ret) + return ret; + } + if (fmod_ret->nr_links) { + branches = kcalloc(fmod_ret->nr_links, sizeof(u32 *), GFP_KERNEL); + if (!branches) + return -ENOMEM; + + invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, run_ctx_off, branches); + } + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + restore_args(ctx, m->nr_args, args_off); + ret = emit_call(ctx, (const u64)orig_call); + if (ret) + goto out; + emit_insn(ctx, std, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off); + emit_insn(ctx, std, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8)); + im->ip_after_call = ctx->ro_image + ctx->idx; + /* Reserve space for the move_imm + jirl instruction */ + for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) + emit_insn(ctx, nop); + } + + for (i = 0; ctx->image && i < fmod_ret->nr_links; i++) { + int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branches[i]; + *branches[i] = larch_insn_gen_bne(LOONGARCH_GPR_T1, LOONGARCH_GPR_ZERO, offset); + } + + for (i = 0; i < fexit->nr_links; i++) { + ret = invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, run_ctx_off, false); + if (ret) + goto out; + } + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + im->ip_epilogue = ctx->ro_image + ctx->idx; + move_imm(ctx, LOONGARCH_GPR_A0, (const s64)im, false); + ret = emit_call(ctx, (const u64)__bpf_tramp_exit); + if (ret) + goto out; + } + + if (flags & BPF_TRAMP_F_RESTORE_REGS) + restore_args(ctx, m->nr_args, args_off); + + if (save_ret) { + emit_insn(ctx, ldd, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off); + emit_insn(ctx, ldd, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8)); + } + + emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off); + + /* trampoline called from function entry */ + emit_insn(ctx, ldd, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8); + emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size); + + emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8); + emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0); + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, 16); + + if (flags & BPF_TRAMP_F_SKIP_FRAME) + /* return to parent function */ + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0); + else + /* return to traced function */ + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T0, 0); + + ret = ctx->idx; +out: + kfree(branches); + + return ret; +} + +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, + void *ro_image_end, const struct btf_func_model *m, + u32 flags, struct bpf_tramp_links *tlinks, void *func_addr) +{ + int ret, size; + void *image, *tmp; + struct jit_ctx ctx; + + size = ro_image_end - ro_image; + image = kvmalloc(size, GFP_KERNEL); + if (!image) + return -ENOMEM; + + ctx.image = (union loongarch_instruction *)image; + ctx.ro_image = (union loongarch_instruction *)ro_image; + ctx.idx = 0; + + jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image)); + ret = __arch_prepare_bpf_trampoline(&ctx, im, m, tlinks, func_addr, flags); + if (ret > 0 && validate_code(&ctx) < 0) { + ret = -EINVAL; + goto out; + } + + tmp = bpf_arch_text_copy(ro_image, image, size); + if (IS_ERR(tmp)) { + ret = PTR_ERR(tmp); + goto out; + } + + bpf_flush_icache(ro_image, ro_image_end); +out: + kvfree(image); + return ret < 0 ? ret : size; +} + +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, void *func_addr) +{ + int ret; + struct jit_ctx ctx; + struct bpf_tramp_image im; + + ctx.image = NULL; + ctx.idx = 0; + + ret = __arch_prepare_bpf_trampoline(&ctx, &im, m, tlinks, func_addr, flags); + + /* Page align */ + return ret < 0 ? ret : round_up(ret * LOONGARCH_INSN_SIZE, PAGE_SIZE); +} + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { bool tmp_blinded = false, extra_pass = false; diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h index f9c569f53949..5697158fd164 100644 --- a/arch/loongarch/net/bpf_jit.h +++ b/arch/loongarch/net/bpf_jit.h @@ -18,6 +18,7 @@ struct jit_ctx { u32 *offset; int num_exentries; union loongarch_instruction *image; + union loongarch_instruction *ro_image; u32 stack_size; }; @@ -308,3 +309,8 @@ static inline int emit_tailcall_jmp(struct jit_ctx *ctx, u8 cond, enum loongarch return -EINVAL; } + +static inline void bpf_flush_icache(void *start, void *end) +{ + flush_icache_range((unsigned long)start, (unsigned long)end); +} From 6abf17d690d83d25f6d00a1a2cd3553c7d20c2d8 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 5 Aug 2025 19:00:22 +0800 Subject: [PATCH 1557/2411] LoongArch: BPF: Add struct ops support for trampoline Use BPF_TRAMP_F_INDIRECT flag to detect struct ops and emit proper prologue and epilogue for this case. With this patch, all of the struct_ops related testcases (except struct_ops_multi_pages) passed on LoongArch. The testcase struct_ops_multi_pages failed is because the actual image_pages_cnt is 40 which is bigger than MAX_TRAMP_IMAGE_PAGES. Before: $ sudo ./test_progs -t struct_ops -d struct_ops_multi_pages ... WATCHDOG: test case struct_ops_module/struct_ops_load executes for 10 seconds... After: $ sudo ./test_progs -t struct_ops -d struct_ops_multi_pages ... #15 bad_struct_ops:OK ... #399 struct_ops_autocreate:OK ... #400 struct_ops_kptr_return:OK ... #401 struct_ops_maybe_null:OK ... #402 struct_ops_module:OK ... #404 struct_ops_no_cfi:OK ... #405 struct_ops_private_stack:SKIP ... #406 struct_ops_refcounted:OK Summary: 8/25 PASSED, 3 SKIPPED, 0 FAILED Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 73 ++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 24 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 052136abb8ab..0d11d90dc4c9 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1423,6 +1423,7 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i int i, ret, save_ret; int stack_size = 0, nargs = 0; int retval_off, args_off, nargs_off, ip_off, run_ctx_off, sreg_off; + bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT; void *orig_call = func_addr; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; @@ -1500,18 +1501,33 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i stack_size = round_up(stack_size, 16); - /* For the trampoline called from function entry */ - /* RA and FP for parent function */ - emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -16); - emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8); - emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0); - emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 16); + if (is_struct_ops) { + /* + * For the trampoline called directly, just handle + * the frame of trampoline. + */ + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size); + emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, stack_size - 8); + emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); + emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size); + } else { + /* + * For the trampoline called from function entry, + * the frame of traced function and the frame of + * trampoline need to be considered. + */ + /* RA and FP for parent function */ + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -16); + emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8); + emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0); + emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 16); - /* RA and FP for traced function */ - emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size); - emit_insn(ctx, std, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8); - emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); - emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size); + /* RA and FP for traced function */ + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size); + emit_insn(ctx, std, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8); + emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); + emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size); + } /* callee saved register S1 to pass start time */ emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off); @@ -1599,21 +1615,30 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off); - /* trampoline called from function entry */ - emit_insn(ctx, ldd, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8); - emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); - emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size); + if (is_struct_ops) { + /* trampoline called directly */ + emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, stack_size - 8); + emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size); - emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8); - emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0); - emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, 16); - - if (flags & BPF_TRAMP_F_SKIP_FRAME) - /* return to parent function */ emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0); - else - /* return to traced function */ - emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T0, 0); + } else { + /* trampoline called from function entry */ + emit_insn(ctx, ldd, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8); + emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size); + + emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8); + emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0); + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, 16); + + if (flags & BPF_TRAMP_F_SKIP_FRAME) + /* return to parent function */ + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0); + else + /* return to traced function */ + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T0, 0); + } ret = ctx->idx; out: From cd39d9e6b7e4c58fa77783e7aedf7ada51d02ea3 Mon Sep 17 00:00:00 2001 From: Haoran Jiang Date: Tue, 5 Aug 2025 19:00:22 +0800 Subject: [PATCH 1558/2411] LoongArch: BPF: Fix jump offset calculation in tailcall The extra pass of bpf_int_jit_compile() skips JIT context initialization which essentially skips offset calculation leaving out_offset = -1, so the jmp_offset in emit_bpf_tail_call is calculated by "#define jmp_offset (out_offset - (cur_offset))" is a negative number, which is wrong. The final generated assembly are as follow. 54: bgeu $a2, $t1, -8 # 0x0000004c 58: addi.d $a6, $s5, -1 5c: bltz $a6, -16 # 0x0000004c 60: alsl.d $t2, $a2, $a1, 0x3 64: ld.d $t2, $t2, 264 68: beq $t2, $zero, -28 # 0x0000004c Before apply this patch, the follow test case will reveal soft lock issues. cd tools/testing/selftests/bpf/ ./test_progs --allow=tailcalls/tailcall_bpf2bpf_1 dmesg: watchdog: BUG: soft lockup - CPU#2 stuck for 26s! [test_progs:25056] Cc: stable@vger.kernel.org Fixes: 5dc615520c4d ("LoongArch: Add BPF JIT support") Reviewed-by: Hengqi Chen Signed-off-by: Haoran Jiang Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 0d11d90dc4c9..f4f12ed16d2f 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -222,11 +222,9 @@ bool bpf_jit_supports_far_kfunc_call(void) return true; } -/* initialized on the first pass of build_body() */ -static int out_offset = -1; -static int emit_bpf_tail_call(struct jit_ctx *ctx) +static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn) { - int off; + int off, tc_ninsn = 0; u8 tcc = tail_call_reg(ctx); u8 a1 = LOONGARCH_GPR_A1; u8 a2 = LOONGARCH_GPR_A2; @@ -236,7 +234,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) const int idx0 = ctx->idx; #define cur_offset (ctx->idx - idx0) -#define jmp_offset (out_offset - (cur_offset)) +#define jmp_offset (tc_ninsn - (cur_offset)) /* * a0: &ctx @@ -246,6 +244,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) * if (index >= array->map.max_entries) * goto out; */ + tc_ninsn = insn ? ctx->offset[insn+1] - ctx->offset[insn] : ctx->offset[0]; off = offsetof(struct bpf_array, map.max_entries); emit_insn(ctx, ldwu, t1, a1, off); /* bgeu $a2, $t1, jmp_offset */ @@ -277,15 +276,6 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) emit_insn(ctx, ldd, t3, t2, off); __build_epilogue(ctx, true); - /* out: */ - if (out_offset == -1) - out_offset = cur_offset; - if (cur_offset != out_offset) { - pr_err_once("tail_call out_offset = %d, expected %d!\n", - cur_offset, out_offset); - return -1; - } - return 0; toofar: @@ -930,7 +920,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext /* tail call */ case BPF_JMP | BPF_TAIL_CALL: mark_tail_call(ctx); - if (emit_bpf_tail_call(ctx) < 0) + if (emit_bpf_tail_call(ctx, i) < 0) return -EINVAL; break; @@ -1855,7 +1845,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) if (tmp_blinded) bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog); - out_offset = -1; return prog; From c0fcc955ff827431b541b1aa6bcb82bdce4531f7 Mon Sep 17 00:00:00 2001 From: Haoran Jiang Date: Tue, 5 Aug 2025 19:00:22 +0800 Subject: [PATCH 1559/2411] LoongArch: BPF: Fix the tailcall hierarchy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In specific use cases combining tailcalls and BPF-to-BPF calls, MAX_TAIL_CALL_CNT won't work because of missing tail_call_cnt back-propagation from callee to caller. This patch fixes this tailcall issue caused by abusing the tailcall in bpf2bpf feature on LoongArch like the way of "bpf, x64: Fix tailcall hierarchy". Push tail_call_cnt_ptr and tail_call_cnt into the stack, tail_call_cnt_ptr is passed between tailcall and bpf2bpf, uses tail_call_cnt_ptr to increment tail_call_cnt. Fixes: bb035ef0cc91 ("LoongArch: BPF: Support mixing bpf2bpf and tailcalls") Reviewed-by: Geliang Tang Reviewed-by: Hengqi Chen Signed-off-by: Haoran Jiang Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 155 ++++++++++++++++++++++++----------- 1 file changed, 107 insertions(+), 48 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index f4f12ed16d2f..4ea8ae4cf0ca 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -17,10 +17,7 @@ #define LOONGARCH_BPF_FENTRY_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) #define REG_TCC LOONGARCH_GPR_A6 -#define TCC_SAVED LOONGARCH_GPR_S5 - -#define SAVE_RA BIT(0) -#define SAVE_TCC BIT(1) +#define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack) (round_up(stack, 16) - 80) static const int regmap[] = { /* return value from in-kernel function, and exit value for eBPF program */ @@ -42,32 +39,57 @@ static const int regmap[] = { [BPF_REG_AX] = LOONGARCH_GPR_T0, }; -static void mark_call(struct jit_ctx *ctx) +static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx, int *store_offset) { - ctx->flags |= SAVE_RA; -} + const struct bpf_prog *prog = ctx->prog; + const bool is_main_prog = !bpf_is_subprog(prog); -static void mark_tail_call(struct jit_ctx *ctx) -{ - ctx->flags |= SAVE_TCC; -} + if (is_main_prog) { + /* + * LOONGARCH_GPR_T3 = MAX_TAIL_CALL_CNT + * if (REG_TCC > T3 ) + * std REG_TCC -> LOONGARCH_GPR_SP + store_offset + * else + * std REG_TCC -> LOONGARCH_GPR_SP + store_offset + * REG_TCC = LOONGARCH_GPR_SP + store_offset + * + * std REG_TCC -> LOONGARCH_GPR_SP + store_offset + * + * The purpose of this code is to first push the TCC into stack, + * and then push the address of TCC into stack. + * In cases where bpf2bpf and tailcall are used in combination, + * the value in REG_TCC may be a count or an address, + * these two cases need to be judged and handled separately. + */ + emit_insn(ctx, addid, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT); + *store_offset -= sizeof(long); -static bool seen_call(struct jit_ctx *ctx) -{ - return (ctx->flags & SAVE_RA); -} + emit_cond_jmp(ctx, BPF_JGT, REG_TCC, LOONGARCH_GPR_T3, 4); -static bool seen_tail_call(struct jit_ctx *ctx) -{ - return (ctx->flags & SAVE_TCC); -} + /* + * If REG_TCC < MAX_TAIL_CALL_CNT, the value in REG_TCC is a count, + * push tcc into stack + */ + emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset); -static u8 tail_call_reg(struct jit_ctx *ctx) -{ - if (seen_call(ctx)) - return TCC_SAVED; + /* Push the address of TCC into the REG_TCC */ + emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_SP, *store_offset); - return REG_TCC; + emit_uncond_jmp(ctx, 2); + + /* + * If REG_TCC > MAX_TAIL_CALL_CNT, the value in REG_TCC is an address, + * push tcc_ptr into stack + */ + emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset); + } else { + *store_offset -= sizeof(long); + emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset); + } + + /* Push tcc_ptr into stack */ + *store_offset -= sizeof(long); + emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset); } /* @@ -90,6 +112,10 @@ static u8 tail_call_reg(struct jit_ctx *ctx) * | $s4 | * +-------------------------+ * | $s5 | + * +-------------------------+ + * | tcc | + * +-------------------------+ + * | tcc_ptr | * +-------------------------+ <--BPF_REG_FP * | prog->aux->stack_depth | * | (optional) | @@ -99,12 +125,17 @@ static u8 tail_call_reg(struct jit_ctx *ctx) static void build_prologue(struct jit_ctx *ctx) { int i, stack_adjust = 0, store_offset, bpf_stack_adjust; + const struct bpf_prog *prog = ctx->prog; + const bool is_main_prog = !bpf_is_subprog(prog); bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); - /* To store ra, fp, s0, s1, s2, s3, s4 and s5. */ + /* To store ra, fp, s0, s1, s2, s3, s4, s5 */ stack_adjust += sizeof(long) * 8; + /* To store tcc and tcc_ptr */ + stack_adjust += sizeof(long) * 2; + stack_adjust = round_up(stack_adjust, 16); stack_adjust += bpf_stack_adjust; @@ -113,11 +144,12 @@ static void build_prologue(struct jit_ctx *ctx) emit_insn(ctx, nop); /* - * First instruction initializes the tail call count (TCC). - * On tail call we skip this instruction, and the TCC is - * passed in REG_TCC from the caller. + * First instruction initializes the tail call count (TCC) + * register to zero. On tail call we skip this instruction, + * and the TCC is passed in REG_TCC from the caller. */ - emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT); + if (is_main_prog) + emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, 0); emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust); @@ -145,20 +177,13 @@ static void build_prologue(struct jit_ctx *ctx) store_offset -= sizeof(long); emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset); + prepare_bpf_tail_call_cnt(ctx, &store_offset); + emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust); if (bpf_stack_adjust) emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust); - /* - * Program contains calls and tail calls, so REG_TCC need - * to be saved across calls. - */ - if (seen_tail_call(ctx) && seen_call(ctx)) - move_reg(ctx, TCC_SAVED, REG_TCC); - else - emit_insn(ctx, nop); - ctx->stack_size = stack_adjust; } @@ -191,6 +216,16 @@ static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call) load_offset -= sizeof(long); emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset); + /* + * When push into the stack, follow the order of tcc then tcc_ptr. + * When pop from the stack, first pop tcc_ptr then followed by tcc. + */ + load_offset -= 2 * sizeof(long); + emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, load_offset); + + load_offset += sizeof(long); + emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, load_offset); + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust); if (!is_tail_call) { @@ -203,7 +238,7 @@ static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call) * Call the next bpf prog and skip the first instruction * of TCC initialization. */ - emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T3, 1); + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T3, 6); } } @@ -225,7 +260,7 @@ bool bpf_jit_supports_far_kfunc_call(void) static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn) { int off, tc_ninsn = 0; - u8 tcc = tail_call_reg(ctx); + int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(ctx->stack_size); u8 a1 = LOONGARCH_GPR_A1; u8 a2 = LOONGARCH_GPR_A2; u8 t1 = LOONGARCH_GPR_T1; @@ -252,11 +287,15 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn) goto toofar; /* - * if (--TCC < 0) - * goto out; + * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT) + * goto out; */ - emit_insn(ctx, addid, REG_TCC, tcc, -1); - if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0) + emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off); + emit_insn(ctx, ldd, t3, REG_TCC, 0); + emit_insn(ctx, addid, t3, t3, 1); + emit_insn(ctx, std, t3, REG_TCC, 0); + emit_insn(ctx, addid, t2, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT); + if (emit_tailcall_jmp(ctx, BPF_JSGT, t3, t2, jmp_offset) < 0) goto toofar; /* @@ -467,7 +506,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext u64 func_addr; bool func_addr_fixed, sign_extend; int i = insn - ctx->prog->insnsi; - int ret, jmp_offset; + int ret, jmp_offset, tcc_ptr_off; const u8 code = insn->code; const u8 cond = BPF_OP(code); const u8 t1 = LOONGARCH_GPR_T1; @@ -903,12 +942,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext /* function call */ case BPF_JMP | BPF_CALL: - mark_call(ctx); ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &func_addr, &func_addr_fixed); if (ret < 0) return ret; + if (insn->src_reg == BPF_PSEUDO_CALL) { + tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(ctx->stack_size); + emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off); + } + move_addr(ctx, t1, func_addr); emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0); @@ -919,7 +962,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext /* tail call */ case BPF_JMP | BPF_TAIL_CALL: - mark_tail_call(ctx); if (emit_bpf_tail_call(ctx, i) < 0) return -EINVAL; break; @@ -1412,7 +1454,7 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i { int i, ret, save_ret; int stack_size = 0, nargs = 0; - int retval_off, args_off, nargs_off, ip_off, run_ctx_off, sreg_off; + int retval_off, args_off, nargs_off, ip_off, run_ctx_off, sreg_off, tcc_ptr_off; bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT; void *orig_call = func_addr; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; @@ -1447,6 +1489,7 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i * * FP - sreg_off [ callee saved reg ] * + * FP - tcc_ptr_off [ tail_call_cnt_ptr ] */ if (m->nr_args > LOONGARCH_MAX_REG_ARGS) @@ -1489,6 +1532,12 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i stack_size += 8; sreg_off = stack_size; + /* Room of trampoline frame to store tail_call_cnt_ptr */ + if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) { + stack_size += 8; + tcc_ptr_off = stack_size; + } + stack_size = round_up(stack_size, 16); if (is_struct_ops) { @@ -1519,6 +1568,9 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size); } + if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) + emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_FP, -tcc_ptr_off); + /* callee saved register S1 to pass start time */ emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off); @@ -1565,6 +1617,10 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i if (flags & BPF_TRAMP_F_CALL_ORIG) { restore_args(ctx, m->nr_args, args_off); + + if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) + emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_FP, -tcc_ptr_off); + ret = emit_call(ctx, (const u64)orig_call); if (ret) goto out; @@ -1605,6 +1661,9 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off); + if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) + emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_FP, -tcc_ptr_off); + if (is_struct_ops) { /* trampoline called directly */ emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, stack_size - 8); From 8568df83ea7d9afed0b40ee3a5f8287df008b1c9 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 5 Aug 2025 19:00:22 +0800 Subject: [PATCH 1560/2411] LoongArch: BPF: Set bpf_jit_bypass_spec_v1/v4() JITs can set bpf_jit_bypass_spec_v1/v4() if they want the verifier to skip analysis/patching for the respective vulnerability, it is safe to set both bpf_jit_bypass_spec_v1/v4(), because there is no speculation barrier instruction for LoongArch. Suggested-by: Luis Gerhorst Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 4ea8ae4cf0ca..abfdb6bb5c38 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1915,6 +1915,16 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto out_offset; } +bool bpf_jit_bypass_spec_v1(void) +{ + return true; +} + +bool bpf_jit_bypass_spec_v4(void) +{ + return true; +} + /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ bool bpf_jit_supports_subprog_tailcalls(void) { From 06d380c470ef987ec703e96cef91681ac13810fe Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Tue, 5 Aug 2025 19:00:22 +0800 Subject: [PATCH 1561/2411] LoongArch: dts: Add SDIO controller support to Loongson-2K0500 The Loongson-2K0500 integrates two SDIO controllers for SD storage cards and SDIO cards, supporting SD storage card boot. The module is supported now, enable it. Signed-off-by: Binbin Zhou Signed-off-by: Huacai Chen --- .../boot/dts/loongson-2k0500-ref.dts | 9 ++++++ arch/loongarch/boot/dts/loongson-2k0500.dtsi | 28 +++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts index a34734a6c3ce..018ed904352a 100644 --- a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts +++ b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts @@ -41,6 +41,15 @@ linux,cma { }; }; +&apbdma3 { + status = "okay"; +}; + +&mmc0 { + status = "okay"; + bus-width = <4>; +}; + &gmac0 { status = "okay"; diff --git a/arch/loongarch/boot/dts/loongson-2k0500.dtsi b/arch/loongarch/boot/dts/loongson-2k0500.dtsi index 760c60eebb89..588ebc3bded4 100644 --- a/arch/loongarch/boot/dts/loongson-2k0500.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k0500.dtsi @@ -104,7 +104,7 @@ dma-controller@1fe10c10 { status = "disabled"; }; - dma-controller@1fe10c20 { + apbdma2: dma-controller@1fe10c20 { compatible = "loongson,ls2k0500-apbdma", "loongson,ls2k1000-apbdma"; reg = <0 0x1fe10c20 0 0x8>; interrupt-parent = <&eiointc>; @@ -114,7 +114,7 @@ dma-controller@1fe10c20 { status = "disabled"; }; - dma-controller@1fe10c30 { + apbdma3: dma-controller@1fe10c30 { compatible = "loongson,ls2k0500-apbdma", "loongson,ls2k1000-apbdma"; reg = <0 0x1fe10c30 0 0x8>; interrupt-parent = <&eiointc>; @@ -437,6 +437,30 @@ i2c@1ff4a800 { status = "disabled"; }; + mmc0: mmc@1ff64000 { + compatible = "loongson,ls2k0500-mmc"; + reg = <0 0x1ff64000 0 0x2000>, + <0 0x1fe10100 0 0x4>; + interrupt-parent = <&eiointc>; + interrupts = <57>; + dmas = <&apbdma3 0>; + dma-names = "rx-tx"; + clocks = <&clk LOONGSON2_APB_CLK>; + status = "disabled"; + }; + + mmc@1ff66000 { + compatible = "loongson,ls2k0500-mmc"; + reg = <0 0x1ff66000 0 0x2000>, + <0 0x1fe10100 0 0x4>; + interrupt-parent = <&eiointc>; + interrupts = <58>; + dmas = <&apbdma2 0>; + dma-names = "rx-tx"; + clocks = <&clk LOONGSON2_APB_CLK>; + status = "disabled"; + }; + pmc: power-management@1ff6c000 { compatible = "loongson,ls2k0500-pmc", "syscon"; reg = <0x0 0x1ff6c000 0x0 0x58>; From df0fbf7fab3025ab80be7760806ecf14bd87a929 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Tue, 5 Aug 2025 19:00:22 +0800 Subject: [PATCH 1562/2411] LoongArch: dts: Add SDIO controller support to Loongson-2K1000 The Loongson-2K1000 integrates one SDIO controller for SD storage cards and SDIO cards. The module is supported now, enable it. Signed-off-by: Binbin Zhou Signed-off-by: Huacai Chen --- .../boot/dts/loongson-2k1000-ref.dts | 13 ++++++++++ arch/loongarch/boot/dts/loongson-2k1000.dtsi | 24 ++++++++++++++----- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts index 78ea995abf1c..d9a452ada5d7 100644 --- a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts +++ b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts @@ -48,6 +48,19 @@ fan0: pwm-fan { }; }; +&apbdma1 { + status = "okay"; +}; + +&mmc { + status = "okay"; + + pinctrl-0 = <&sdio_pins_default>; + pinctrl-names = "default"; + bus-width = <4>; + cd-gpios = <&gpio0 22 GPIO_ACTIVE_LOW>; +}; + &gmac0 { status = "okay"; diff --git a/arch/loongarch/boot/dts/loongson-2k1000.dtsi b/arch/loongarch/boot/dts/loongson-2k1000.dtsi index 1da3beb00f0e..d8e01e2534dd 100644 --- a/arch/loongarch/boot/dts/loongson-2k1000.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k1000.dtsi @@ -187,14 +187,14 @@ gpio0: gpio@1fe00500 { <26 IRQ_TYPE_LEVEL_HIGH>, <26 IRQ_TYPE_LEVEL_HIGH>, <26 IRQ_TYPE_LEVEL_HIGH>, - <>, - <26 IRQ_TYPE_LEVEL_HIGH>, + <0 IRQ_TYPE_NONE>, <26 IRQ_TYPE_LEVEL_HIGH>, <26 IRQ_TYPE_LEVEL_HIGH>, <26 IRQ_TYPE_LEVEL_HIGH>, <26 IRQ_TYPE_LEVEL_HIGH>, <26 IRQ_TYPE_LEVEL_HIGH>, <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_NONE>, <26 IRQ_TYPE_LEVEL_HIGH>, <26 IRQ_TYPE_LEVEL_HIGH>, <26 IRQ_TYPE_LEVEL_HIGH>, @@ -209,13 +209,13 @@ gpio0: gpio@1fe00500 { <27 IRQ_TYPE_LEVEL_HIGH>, <27 IRQ_TYPE_LEVEL_HIGH>, <27 IRQ_TYPE_LEVEL_HIGH>, - <>, + <0 IRQ_TYPE_NONE>, <27 IRQ_TYPE_LEVEL_HIGH>, <27 IRQ_TYPE_LEVEL_HIGH>, <27 IRQ_TYPE_LEVEL_HIGH>, <27 IRQ_TYPE_LEVEL_HIGH>, - <>, - <>, + <0 IRQ_TYPE_NONE>, + <0 IRQ_TYPE_NONE>, <27 IRQ_TYPE_LEVEL_HIGH>, <27 IRQ_TYPE_LEVEL_HIGH>, <27 IRQ_TYPE_LEVEL_HIGH>, @@ -256,7 +256,7 @@ dma-controller@1fe00c00 { status = "disabled"; }; - dma-controller@1fe00c10 { + apbdma1: dma-controller@1fe00c10 { compatible = "loongson,ls2k1000-apbdma"; reg = <0x0 0x1fe00c10 0x0 0x8>; interrupt-parent = <&liointc1>; @@ -405,6 +405,18 @@ i2s: i2s@1fe2d000 { status = "disabled"; }; + mmc: mmc@1fe2c000 { + compatible = "loongson,ls2k1000-mmc"; + reg = <0 0x1fe2c000 0 0x68>, + <0 0x1fe00438 0 0x8>; + interrupt-parent = <&liointc0>; + interrupts = <31 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + dmas = <&apbdma1 0>; + dma-names = "rx-tx"; + status = "disabled"; + }; + spi0: spi@1fff0220 { compatible = "loongson,ls2k1000-spi"; reg = <0x0 0x1fff0220 0x0 0x10>; From d3eece04f3021a782f02fb435c32142d130d5585 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Tue, 5 Aug 2025 19:00:22 +0800 Subject: [PATCH 1563/2411] LoongArch: dts: Add eMMC/SDIO controller support to Loongson-2K2000 The Loongson-2K2000 integrates one eMMC controller and one SDIO controller. The module is supported now, enable it. Signed-off-by: Binbin Zhou Signed-off-by: Huacai Chen --- .../loongarch/boot/dts/loongson-2k2000-ref.dts | 10 ++++++++++ arch/loongarch/boot/dts/loongson-2k2000.dtsi | 18 ++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts index ea9e6985d0e9..3c6b12220386 100644 --- a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts +++ b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts @@ -39,6 +39,16 @@ linux,cma { }; }; +&emmc { + status = "okay"; + + bus-width = <8>; + cap-mmc-highspeed; + mmc-hs200-1_8v; + no-sd; + no-sdio; +}; + &sata { status = "okay"; }; diff --git a/arch/loongarch/boot/dts/loongson-2k2000.dtsi b/arch/loongarch/boot/dts/loongson-2k2000.dtsi index 9e0411f2754c..00cc485b753b 100644 --- a/arch/loongarch/boot/dts/loongson-2k2000.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k2000.dtsi @@ -259,6 +259,24 @@ uart0: serial@1fe001e0 { status = "disabled"; }; + emmc: mmc@79990000 { + compatible = "loongson,ls2k2000-mmc"; + reg = <0x0 0x79990000 0x0 0x1000>; + interrupt-parent = <&pic>; + interrupts = <51 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_EMMC_CLK>; + status = "disabled"; + }; + + mmc@79991000 { + compatible = "loongson,ls2k2000-mmc"; + reg = <0x0 0x79991000 0x0 0x1000>; + interrupt-parent = <&pic>; + interrupts = <50 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_EMMC_CLK>; + status = "disabled"; + }; + pcie@1a000000 { compatible = "loongson,ls2k-pci"; reg = <0x0 0x1a000000 0x0 0x02000000>, From d35ec48fa6c8fe0cfa4a03155109fec7677911d4 Mon Sep 17 00:00:00 2001 From: Wentao Guan Date: Tue, 5 Aug 2025 19:00:22 +0800 Subject: [PATCH 1564/2411] LoongArch: vDSO: Remove -nostdlib complier flag Since $(LD) is directly used, hence -nostdlib is unneeded, MIPS has removed this, we should remove it too. bdbf2038fbf4 ("MIPS: VDSO: remove -nostdlib compiler flag"). In fact, other architectures also use $(LD) now. fe00e50b2db8 ("ARM: 8858/1: vdso: use $(LD) instead of $(CC) to link VDSO") 691efbedc60d ("arm64: vdso: use $(LD) instead of $(CC) to link VDSO") 2ff906994b6c ("MIPS: VDSO: Use $(LD) instead of $(CC) to link VDSO") 2b2a25845d53 ("s390/vdso: Use $(LD) instead of $(CC) to link vDSO") Cc: stable@vger.kernel.org Reviewed-by: Yanteng Si Signed-off-by: Wentao Guan Signed-off-by: Huacai Chen --- arch/loongarch/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index ccd2c5e135c6..d8316f993482 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -36,7 +36,7 @@ endif # VDSO linker flags. ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ - $(filter -E%,$(KBUILD_CFLAGS)) -nostdlib -shared --build-id -T + $(filter -E%,$(KBUILD_CFLAGS)) -shared --build-id -T # # Shared build commands. From daa8af80d283ee9a7d42dd6f164a65036665b9d4 Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Mon, 4 Aug 2025 11:57:03 +0200 Subject: [PATCH 1565/2411] s390/mm: Allocate page table with PAGE_SIZE granularity Make vmem_pte_alloc() consistent by always allocating page table of PAGE_SIZE granularity, regardless of whether page_table_alloc() (with slab) or memblock_alloc() is used. This ensures page table can be fully freed when the corresponding page table entries are removed. Fixes: d08d4e7cd6bf ("s390/mm: use full 4KB page for 2KB PTE") Reviewed-by: Heiko Carstens Reviewed-by: Alexander Gordeev Signed-off-by: Sumanth Korikkar Signed-off-by: Alexander Gordeev --- arch/s390/mm/vmem.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 448dd6ed1069..f48ef361bc83 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -64,13 +64,12 @@ void *vmem_crst_alloc(unsigned long val) pte_t __ref *vmem_pte_alloc(void) { - unsigned long size = PTRS_PER_PTE * sizeof(pte_t); pte_t *pte; if (slab_is_available()) - pte = (pte_t *) page_table_alloc(&init_mm); + pte = (pte_t *)page_table_alloc(&init_mm); else - pte = (pte_t *) memblock_alloc(size, size); + pte = (pte_t *)memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!pte) return NULL; memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); From f9a348e0de19226fc3c7e81de7677d3fa2c4b2d8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Jul 2025 09:34:29 -0400 Subject: [PATCH 1566/2411] nfsd: don't set the ctime on delegated atime updates Clients will typically precede a DELEGRETURN for a delegation with delegated timestamp with a SETATTR to set the timestamps on the server to match what the client has. knfsd implements this by using the nfsd_setattr() infrastructure, which will set ATTR_CTIME on any update that goes to notify_change(). This is problematic as it means that the client will get a spurious ctime update when updating the atime. POSIX unfortunately doesn't phrase it succinctly, but updating the atime due to reads should not update the ctime. In this case, the client is sending a SETATTR to update the atime on the server to match its latest value. The ctime should not be advanced in this case as that would incorrectly indicate a change to the inode. Fix this by not implicitly setting ATTR_CTIME when ATTR_DELEG is set in __nfsd_setattr(). The decoder for FATTR4_WORD2_TIME_DELEG_MODIFY already sets ATTR_CTIME, so this is sufficient to make it skip setting the ctime on atime-only updates. Fixes: 7e13f4f8d27d ("nfsd: handle delegated timestamps in SETATTR") Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/vfs.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ee78b6fb1709..eaf04751d07f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -470,7 +470,15 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap) if (!iap->ia_valid) return 0; - iap->ia_valid |= ATTR_CTIME; + /* + * If ATTR_DELEG is set, then this is an update from a client that + * holds a delegation. If this is an update for only the atime, the + * ctime should not be changed. If the update contains the mtime + * too, then ATTR_CTIME should already be set. + */ + if (!(iap->ia_valid & ATTR_DELEG)) + iap->ia_valid |= ATTR_CTIME; + return notify_change(&nop_mnt_idmap, dentry, iap, NULL); } From e5a73150776f18547ee685c9f6bfafe549714899 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 18 Jul 2025 11:26:14 +1000 Subject: [PATCH 1567/2411] nfsd: avoid ref leak in nfsd_open_local_fh() If two calls to nfsd_open_local_fh() race and both successfully call nfsd_file_acquire_local(), they will both get an extra reference to the net to accompany the file reference stored in *pnf. One of them will fail to store (using xchg()) the file reference in *pnf and will drop that reference but WON'T drop the accompanying reference to the net. This leak means that when the nfs server is shut down it will hang in nfsd_shutdown_net() waiting for &nn->nfsd_net_free_done. This patch adds the missing nfsd_net_put(). Reported-by: Mike Snitzer Fixes: e6f7e1487ab5 ("nfs_localio: simplify interface to nfsd for getting nfsd_file") Cc: stable@vger.kernel.org Signed-off-by: NeilBrown Tested-by: Mike Snitzer Reviewed-by: Mike Snitzer Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/localio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c index 4f6468eb2adf..cb237f1b902a 100644 --- a/fs/nfsd/localio.c +++ b/fs/nfsd/localio.c @@ -103,10 +103,11 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom, if (nfsd_file_get(new) == NULL) goto again; /* - * Drop the ref we were going to install and the - * one we were going to return. + * Drop the ref we were going to install (both file and + * net) and the one we were going to return (only file). */ nfsd_file_put(localio); + nfsd_net_put(net); nfsd_file_put(localio); localio = new; } From c18646248fed07683d4cee8a8af933fc4fe83c0d Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Tue, 29 Jul 2025 13:03:48 +0100 Subject: [PATCH 1568/2411] RDMA/siw: Fix the sendmsg byte count in siw_tcp_sendpages Ever since commit c2ff29e99a76 ("siw: Inline do_tcp_sendpages()"), we have been doing this: static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset, size_t size) [...] /* Calculate the number of bytes we need to push, for this page * specifically */ size_t bytes = min_t(size_t, PAGE_SIZE - offset, size); /* If we can't splice it, then copy it in, as normal */ if (!sendpage_ok(page[i])) msg.msg_flags &= ~MSG_SPLICE_PAGES; /* Set the bvec pointing to the page, with len $bytes */ bvec_set_page(&bvec, page[i], bytes, offset); /* Set the iter to $size, aka the size of the whole sendpages (!!!) */ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); try_page_again: lock_sock(sk); /* Sendmsg with $size size (!!!) */ rv = tcp_sendmsg_locked(sk, &msg, size); This means we've been sending oversized iov_iters and tcp_sendmsg calls for a while. This has a been a benign bug because sendpage_ok() always returned true. With the recent slab allocator changes being slowly introduced into next (that disallow sendpage on large kmalloc allocations), we have recently hit out-of-bounds crashes, due to slight differences in iov_iter behavior between the MSG_SPLICE_PAGES and "regular" copy paths: (MSG_SPLICE_PAGES) skb_splice_from_iter iov_iter_extract_pages iov_iter_extract_bvec_pages uses i->nr_segs to correctly stop in its tracks before OoB'ing everywhere skb_splice_from_iter gets a "short" read (!MSG_SPLICE_PAGES) skb_copy_to_page_nocache copy=iov_iter_count [...] copy_from_iter /* this doesn't help */ if (unlikely(iter->count < len)) len = iter->count; iterate_bvec ... and we run off the bvecs Fix this by properly setting the iov_iter's byte count, plus sending the correct byte count to tcp_sendmsg_locked. Link: https://patch.msgid.link/r/20250729120348.495568-1-pfalcato@suse.de Cc: stable@vger.kernel.org Fixes: c2ff29e99a76 ("siw: Inline do_tcp_sendpages()") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202507220801.50a7210-lkp@intel.com Reviewed-by: David Howells Signed-off-by: Pedro Falcato Acked-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_qp_tx.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 3a08f57d2211..f7dd32c6e5ba 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -340,18 +340,17 @@ static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset, if (!sendpage_ok(page[i])) msg.msg_flags &= ~MSG_SPLICE_PAGES; bvec_set_page(&bvec, page[i], bytes, offset); - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, bytes); try_page_again: lock_sock(sk); - rv = tcp_sendmsg_locked(sk, &msg, size); + rv = tcp_sendmsg_locked(sk, &msg, bytes); release_sock(sk); if (rv > 0) { size -= rv; sent += rv; if (rv != bytes) { - offset += rv; bytes -= rv; goto try_page_again; } From f3ba7c9b0421e3935998334a860bd88f2ffdb18e Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Mon, 4 Aug 2025 21:40:03 +0800 Subject: [PATCH 1569/2411] smb: client: rename server mid_lock to mid_queue_lock This is step 1/4 of a patch series to fix mid_q_entry memory leaks caused by race conditions in callback execution. The current mid_lock name is somewhat ambiguous about what it protects. To prepare for splitting this lock into separate, more granular locks, this patch renames mid_lock to mid_queue_lock to clearly indicate its specific responsibility for protecting the pending_mid_q list and related queue operations. No functional changes are made in this patch - it only prepares the codebase for the lock splitting that follows. - mid_queue_lock for queue operations - mid_counter_lock for mid counter operations - per-mid locks for individual mid state management Signed-off-by: Wang Zhaolong Acked-by: Enzo Matsumiya Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 8 ++++---- fs/smb/client/cifsglob.h | 4 ++-- fs/smb/client/connect.c | 20 +++++++++---------- fs/smb/client/smb1ops.c | 10 +++++----- fs/smb/client/smb2ops.c | 26 ++++++++++++------------- fs/smb/client/smb2transport.c | 4 ++-- fs/smb/client/transport.c | 36 +++++++++++++++++------------------ 7 files changed, 54 insertions(+), 54 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index f1cea365b6f1..80d6a51b8c11 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -60,7 +60,7 @@ void cifs_dump_mids(struct TCP_Server_Info *server) return; cifs_dbg(VFS, "Dump pending requests:\n"); - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) { cifs_dbg(VFS, "State: %d Cmd: %d Pid: %d Cbdata: %p Mid %llu\n", mid_entry->mid_state, @@ -83,7 +83,7 @@ void cifs_dump_mids(struct TCP_Server_Info *server) mid_entry->resp_buf, 62); } } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); #endif /* CONFIG_CIFS_DEBUG2 */ } @@ -672,7 +672,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "\n\tServer ConnectionId: 0x%llx", chan_server->conn_id); - spin_lock(&chan_server->mid_lock); + spin_lock(&chan_server->mid_queue_lock); list_for_each_entry(mid_entry, &chan_server->pending_mid_q, qhead) { seq_printf(m, "\n\t\tState: %d com: %d pid: %d cbdata: %p mid %llu", mid_entry->mid_state, @@ -681,7 +681,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) mid_entry->callback_data, mid_entry->mid); } - spin_unlock(&chan_server->mid_lock); + spin_unlock(&chan_server->mid_queue_lock); } spin_unlock(&ses->chan_lock); seq_puts(m, "\n--\n"); diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index a97e2cca2f53..2dd1ef274250 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -732,7 +732,7 @@ struct TCP_Server_Info { #endif wait_queue_head_t response_q; wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ - spinlock_t mid_lock; /* protect mid queue and it's entries */ + spinlock_t mid_queue_lock; /* protect mid queue */ struct list_head pending_mid_q; bool noblocksnd; /* use blocking sendmsg */ bool noautotune; /* do not autotune send buf sizes */ @@ -2007,7 +2007,7 @@ require use of the stronger protocol */ * GlobalCurrentXid * GlobalTotalActiveXid * TCP_Server_Info->srv_lock (anything in struct not protected by another lock and can change) - * TCP_Server_Info->mid_lock TCP_Server_Info->pending_mid_q cifs_get_tcp_session + * TCP_Server_Info->mid_queue_lock TCP_Server_Info->pending_mid_q cifs_get_tcp_session * ->CurrentMid * (any changes in mid_q_entry fields) * TCP_Server_Info->req_lock TCP_Server_Info->in_flight cifs_get_tcp_session diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 5eec8957f2a9..e4b577ca48d5 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -321,7 +321,7 @@ cifs_abort_connection(struct TCP_Server_Info *server) /* mark submitted MIDs for retry and issue callback */ INIT_LIST_HEAD(&retry_list); cifs_dbg(FYI, "%s: moving mids to private list\n", __func__); - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); list_for_each_entry_safe(mid, nmid, &server->pending_mid_q, qhead) { kref_get(&mid->refcount); if (mid->mid_state == MID_REQUEST_SUBMITTED) @@ -329,7 +329,7 @@ cifs_abort_connection(struct TCP_Server_Info *server) list_move(&mid->qhead, &retry_list); mid->mid_flags |= MID_DELETED; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); cifs_server_unlock(server); cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__); @@ -884,13 +884,13 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type) * server there should be exactly one pending mid * corresponding to SMB1/SMB2 Negotiate packet. */ - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); list_for_each_entry_safe(mid, nmid, &server->pending_mid_q, qhead) { kref_get(&mid->refcount); list_move(&mid->qhead, &dispose_list); mid->mid_flags |= MID_DELETED; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); /* Now try to reconnect once with NetBIOS session. */ server->with_rfc1001 = true; @@ -957,7 +957,7 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed) #ifdef CONFIG_CIFS_STATS2 mid->when_received = jiffies; #endif - spin_lock(&mid->server->mid_lock); + spin_lock(&mid->server->mid_queue_lock); if (!malformed) mid->mid_state = MID_RESPONSE_RECEIVED; else @@ -967,12 +967,12 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed) * function has finished processing it is a bug. */ if (mid->mid_flags & MID_DELETED) { - spin_unlock(&mid->server->mid_lock); + spin_unlock(&mid->server->mid_queue_lock); pr_warn_once("trying to dequeue a deleted mid\n"); } else { list_del_init(&mid->qhead); mid->mid_flags |= MID_DELETED; - spin_unlock(&mid->server->mid_lock); + spin_unlock(&mid->server->mid_queue_lock); } } @@ -1101,7 +1101,7 @@ clean_demultiplex_info(struct TCP_Server_Info *server) struct list_head *tmp, *tmp2; LIST_HEAD(dispose_list); - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); cifs_dbg(FYI, "Clearing mid %llu\n", mid_entry->mid); @@ -1110,7 +1110,7 @@ clean_demultiplex_info(struct TCP_Server_Info *server) list_move(&mid_entry->qhead, &dispose_list); mid_entry->mid_flags |= MID_DELETED; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); /* now walk dispose list and issue callbacks */ list_for_each_safe(tmp, tmp2, &dispose_list) { @@ -1822,7 +1822,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, tcp_ses->compression.requested = ctx->compress; spin_lock_init(&tcp_ses->req_lock); spin_lock_init(&tcp_ses->srv_lock); - spin_lock_init(&tcp_ses->mid_lock); + spin_lock_init(&tcp_ses->mid_queue_lock); INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); INIT_LIST_HEAD(&tcp_ses->smb_ses_list); INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request); diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index f722c7f47b07..e16566d3c319 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -95,17 +95,17 @@ cifs_find_mid(struct TCP_Server_Info *server, char *buffer) struct smb_hdr *buf = (struct smb_hdr *)buffer; struct mid_q_entry *mid; - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); list_for_each_entry(mid, &server->pending_mid_q, qhead) { if (compare_mid(mid->mid, buf) && mid->mid_state == MID_REQUEST_SUBMITTED && le16_to_cpu(mid->command) == buf->Command) { kref_get(&mid->refcount); - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); return mid; } } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); return NULL; } @@ -169,7 +169,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server) __u16 last_mid, cur_mid; bool collision, reconnect = false; - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); /* mid is 16 bit only for CIFS/SMB */ cur_mid = (__u16)((server->CurrentMid) & 0xffff); @@ -228,7 +228,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server) } cur_mid++; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); if (reconnect) { cifs_signal_cifsd_for_reconnect(server, false); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index bd6c1fb2a992..7935f9b433ac 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -374,19 +374,19 @@ smb2_get_next_mid(struct TCP_Server_Info *server) { __u64 mid; /* for SMB2 we need the current value */ - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); mid = server->CurrentMid++; - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); return mid; } static void smb2_revert_current_mid(struct TCP_Server_Info *server, const unsigned int val) { - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); if (server->CurrentMid >= val) server->CurrentMid -= val; - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); } static struct mid_q_entry * @@ -401,7 +401,7 @@ __smb2_find_mid(struct TCP_Server_Info *server, char *buf, bool dequeue) return NULL; } - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); list_for_each_entry(mid, &server->pending_mid_q, qhead) { if ((mid->mid == wire_mid) && (mid->mid_state == MID_REQUEST_SUBMITTED) && @@ -411,11 +411,11 @@ __smb2_find_mid(struct TCP_Server_Info *server, char *buf, bool dequeue) list_del_init(&mid->qhead); mid->mid_flags |= MID_DELETED; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); return mid; } } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); return NULL; } @@ -460,9 +460,9 @@ smb2_negotiate(const unsigned int xid, { int rc; - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); server->CurrentMid = 0; - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); rc = SMB2_negotiate(xid, ses, server); return rc; } @@ -4809,18 +4809,18 @@ static void smb2_decrypt_offload(struct work_struct *work) } else { spin_lock(&dw->server->srv_lock); if (dw->server->tcpStatus == CifsNeedReconnect) { - spin_lock(&dw->server->mid_lock); + spin_lock(&dw->server->mid_queue_lock); mid->mid_state = MID_RETRY_NEEDED; - spin_unlock(&dw->server->mid_lock); + spin_unlock(&dw->server->mid_queue_lock); spin_unlock(&dw->server->srv_lock); mid->callback(mid); } else { - spin_lock(&dw->server->mid_lock); + spin_lock(&dw->server->mid_queue_lock); mid->mid_state = MID_REQUEST_SUBMITTED; mid->mid_flags &= ~(MID_DELETED); list_add_tail(&mid->qhead, &dw->server->pending_mid_q); - spin_unlock(&dw->server->mid_lock); + spin_unlock(&dw->server->mid_queue_lock); spin_unlock(&dw->server->srv_lock); } } diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 475b36c27f65..ff9ef7fcd010 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -840,9 +840,9 @@ smb2_get_mid_entry(struct cifs_ses *ses, struct TCP_Server_Info *server, *mid = smb2_mid_entry_alloc(shdr, server); if (*mid == NULL) return -ENOMEM; - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); list_add_tail(&(*mid)->qhead, &server->pending_mid_q); - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); return 0; } diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 191783f553ce..12dc927aa4a2 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -160,12 +160,12 @@ void __release_mid(struct kref *refcount) void delete_mid(struct mid_q_entry *mid) { - spin_lock(&mid->server->mid_lock); + spin_lock(&mid->server->mid_queue_lock); if (!(mid->mid_flags & MID_DELETED)) { list_del_init(&mid->qhead); mid->mid_flags |= MID_DELETED; } - spin_unlock(&mid->server->mid_lock); + spin_unlock(&mid->server->mid_queue_lock); release_mid(mid); } @@ -716,9 +716,9 @@ static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, *ppmidQ = alloc_mid(in_buf, ses->server); if (*ppmidQ == NULL) return -ENOMEM; - spin_lock(&ses->server->mid_lock); + spin_lock(&ses->server->mid_queue_lock); list_add_tail(&(*ppmidQ)->qhead, &ses->server->pending_mid_q); - spin_unlock(&ses->server->mid_lock); + spin_unlock(&ses->server->mid_queue_lock); return 0; } @@ -819,9 +819,9 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, mid->mid_state = MID_REQUEST_SUBMITTED; /* put it on the pending_mid_q */ - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); list_add_tail(&mid->qhead, &server->pending_mid_q); - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); /* * Need to store the time in mid before calling I/O. For call_async, @@ -880,10 +880,10 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) cifs_dbg(FYI, "%s: cmd=%d mid=%llu state=%d\n", __func__, le16_to_cpu(mid->command), mid->mid, mid->mid_state); - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); switch (mid->mid_state) { case MID_RESPONSE_READY: - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); return rc; case MID_RETRY_NEEDED: rc = -EAGAIN; @@ -902,13 +902,13 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) list_del_init(&mid->qhead); mid->mid_flags |= MID_DELETED; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n", __func__, mid->mid, mid->mid_state); rc = -EIO; goto sync_mid_done; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); sync_mid_done: release_mid(mid); @@ -1213,7 +1213,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n", midQ[i]->mid, le16_to_cpu(midQ[i]->command)); send_cancel(server, &rqst[i], midQ[i]); - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); midQ[i]->mid_flags |= MID_WAIT_CANCELLED; if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED || midQ[i]->mid_state == MID_RESPONSE_RECEIVED) { @@ -1221,7 +1221,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, cancelled_mid[i] = true; credits[i].value = 0; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); } } @@ -1423,16 +1423,16 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, rc = wait_for_response(server, midQ); if (rc != 0) { send_cancel(server, &rqst, midQ); - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); if (midQ->mid_state == MID_REQUEST_SUBMITTED || midQ->mid_state == MID_RESPONSE_RECEIVED) { /* no longer considered to be "in-flight" */ midQ->callback = release_mid; - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); add_credits(server, &credits, 0); return rc; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); } rc = cifs_sync_mid_result(midQ, server); @@ -1605,15 +1605,15 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, rc = wait_for_response(server, midQ); if (rc) { send_cancel(server, &rqst, midQ); - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); if (midQ->mid_state == MID_REQUEST_SUBMITTED || midQ->mid_state == MID_RESPONSE_RECEIVED) { /* no longer considered to be "in-flight" */ midQ->callback = release_mid; - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); return rc; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); } /* We got the response - restart system call. */ From 9bd42798d5bf87f56d229a27e40140df95ef743d Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Mon, 4 Aug 2025 21:40:04 +0800 Subject: [PATCH 1570/2411] smb: client: add mid_counter_lock to protect the mid counter counter This is step 2/4 of a patch series to fix mid_q_entry memory leaks caused by race conditions in callback execution. Add a dedicated mid_counter_lock to protect current_mid counter, separating it from mid_queue_lock which protects pending_mid_q operations. This reduces lock contention and prepares for finer- grained locking in subsequent patches. Changes: - Add TCP_Server_Info->mid_counter_lock spinlock - Rename CurrentMid to current_mid for consistency - Use mid_counter_lock to protect current_mid access - Update locking documentation in cifsglob.h This separation allows mid allocation to proceed without blocking queue operations, improving performance under heavy load. Signed-off-by: Wang Zhaolong Acked-by: Enzo Matsumiya Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 5 +++-- fs/smb/client/connect.c | 5 +++-- fs/smb/client/smb1ops.c | 11 ++++++----- fs/smb/client/smb2ops.c | 40 +++++++++++++++++++-------------------- fs/smb/client/transport.c | 12 ++++++------ 5 files changed, 38 insertions(+), 35 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 2dd1ef274250..cfba226f3396 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -733,6 +733,7 @@ struct TCP_Server_Info { wait_queue_head_t response_q; wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ spinlock_t mid_queue_lock; /* protect mid queue */ + spinlock_t mid_counter_lock; struct list_head pending_mid_q; bool noblocksnd; /* use blocking sendmsg */ bool noautotune; /* do not autotune send buf sizes */ @@ -770,7 +771,7 @@ struct TCP_Server_Info { /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */ unsigned int capabilities; /* selective disabling of caps by smb sess */ int timeAdj; /* Adjust for difference in server time zone in sec */ - __u64 CurrentMid; /* multiplex id - rotating counter, protected by GlobalMid_Lock */ + __u64 current_mid; /* multiplex id - rotating counter, protected by mid_counter_lock */ char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */ /* 16th byte of RFC1001 workstation name is always null */ char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; @@ -2008,8 +2009,8 @@ require use of the stronger protocol */ * GlobalTotalActiveXid * TCP_Server_Info->srv_lock (anything in struct not protected by another lock and can change) * TCP_Server_Info->mid_queue_lock TCP_Server_Info->pending_mid_q cifs_get_tcp_session - * ->CurrentMid * (any changes in mid_q_entry fields) + * TCP_Server_Info->mid_counter_lock TCP_Server_Info->current_mid cifs_get_tcp_session * TCP_Server_Info->req_lock TCP_Server_Info->in_flight cifs_get_tcp_session * ->credits * ->echo_credits diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index e4b577ca48d5..74ad5881ee45 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -358,7 +358,7 @@ static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num } cifs_dbg(FYI, "Mark tcp session as need reconnect\n"); - trace_smb3_reconnect(server->CurrentMid, server->conn_id, + trace_smb3_reconnect(server->current_mid, server->conn_id, server->hostname); server->tcpStatus = CifsNeedReconnect; @@ -1242,7 +1242,7 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server) spin_unlock(&server->req_lock); wake_up(&server->request_q); - trace_smb3_hdr_credits(server->CurrentMid, + trace_smb3_hdr_credits(server->current_mid, server->conn_id, server->hostname, scredits, le16_to_cpu(shdr->CreditRequest), in_flight); cifs_server_dbg(FYI, "%s: added %u credits total=%d\n", @@ -1823,6 +1823,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, spin_lock_init(&tcp_ses->req_lock); spin_lock_init(&tcp_ses->srv_lock); spin_lock_init(&tcp_ses->mid_queue_lock); + spin_lock_init(&tcp_ses->mid_counter_lock); INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); INIT_LIST_HEAD(&tcp_ses->smb_ses_list); INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request); diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index e16566d3c319..893a1ea8c000 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -169,10 +169,9 @@ cifs_get_next_mid(struct TCP_Server_Info *server) __u16 last_mid, cur_mid; bool collision, reconnect = false; - spin_lock(&server->mid_queue_lock); - + spin_lock(&server->mid_counter_lock); /* mid is 16 bit only for CIFS/SMB */ - cur_mid = (__u16)((server->CurrentMid) & 0xffff); + cur_mid = (__u16)((server->current_mid) & 0xffff); /* we do not want to loop forever */ last_mid = cur_mid; cur_mid++; @@ -198,6 +197,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server) cur_mid++; num_mids = 0; + spin_lock(&server->mid_queue_lock); list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) { ++num_mids; if (mid_entry->mid == cur_mid && @@ -207,6 +207,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server) break; } } + spin_unlock(&server->mid_queue_lock); /* * if we have more than 32k mids in the list, then something @@ -223,12 +224,12 @@ cifs_get_next_mid(struct TCP_Server_Info *server) if (!collision) { mid = (__u64)cur_mid; - server->CurrentMid = mid; + server->current_mid = mid; break; } cur_mid++; } - spin_unlock(&server->mid_queue_lock); + spin_unlock(&server->mid_counter_lock); if (reconnect) { cifs_signal_cifsd_for_reconnect(server, false); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 7935f9b433ac..ebaeb2993569 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -91,7 +91,7 @@ smb2_add_credits(struct TCP_Server_Info *server, if (*val > 65000) { *val = 65000; /* Don't get near 64K credits, avoid srv bugs */ pr_warn_once("server overflowed SMB3 credits\n"); - trace_smb3_overflow_credits(server->CurrentMid, + trace_smb3_overflow_credits(server->current_mid, server->conn_id, server->hostname, *val, add, server->in_flight); } @@ -136,7 +136,7 @@ smb2_add_credits(struct TCP_Server_Info *server, wake_up(&server->request_q); if (reconnect_detected) { - trace_smb3_reconnect_detected(server->CurrentMid, + trace_smb3_reconnect_detected(server->current_mid, server->conn_id, server->hostname, scredits, add, in_flight); cifs_dbg(FYI, "trying to put %d credits from the old server instance %d\n", @@ -144,7 +144,7 @@ smb2_add_credits(struct TCP_Server_Info *server, } if (reconnect_with_invalid_credits) { - trace_smb3_reconnect_with_invalid_credits(server->CurrentMid, + trace_smb3_reconnect_with_invalid_credits(server->current_mid, server->conn_id, server->hostname, scredits, add, in_flight); cifs_dbg(FYI, "Negotiate operation when server credits is non-zero. Optype: %d, server credits: %d, credits added: %d\n", optype, scredits, add); @@ -176,7 +176,7 @@ smb2_add_credits(struct TCP_Server_Info *server, break; } - trace_smb3_add_credits(server->CurrentMid, + trace_smb3_add_credits(server->current_mid, server->conn_id, server->hostname, scredits, add, in_flight); cifs_dbg(FYI, "%s: added %u credits total=%d\n", __func__, add, scredits); } @@ -203,7 +203,7 @@ smb2_set_credits(struct TCP_Server_Info *server, const int val) in_flight = server->in_flight; spin_unlock(&server->req_lock); - trace_smb3_set_credits(server->CurrentMid, + trace_smb3_set_credits(server->current_mid, server->conn_id, server->hostname, scredits, val, in_flight); cifs_dbg(FYI, "%s: set %u credits\n", __func__, val); @@ -288,7 +288,7 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, size_t size, in_flight = server->in_flight; spin_unlock(&server->req_lock); - trace_smb3_wait_credits(server->CurrentMid, + trace_smb3_wait_credits(server->current_mid, server->conn_id, server->hostname, scredits, -(credits->value), in_flight); cifs_dbg(FYI, "%s: removed %u credits total=%d\n", __func__, credits->value, scredits); @@ -316,7 +316,7 @@ smb2_adjust_credits(struct TCP_Server_Info *server, server->credits, server->in_flight, new_val - credits->value, cifs_trace_rw_credits_no_adjust_up); - trace_smb3_too_many_credits(server->CurrentMid, + trace_smb3_too_many_credits(server->current_mid, server->conn_id, server->hostname, 0, credits->value - new_val, 0); cifs_server_dbg(VFS, "R=%x[%x] request has less credits (%d) than required (%d)", subreq->rreq->debug_id, subreq->subreq.debug_index, @@ -338,7 +338,7 @@ smb2_adjust_credits(struct TCP_Server_Info *server, server->credits, server->in_flight, new_val - credits->value, cifs_trace_rw_credits_old_session); - trace_smb3_reconnect_detected(server->CurrentMid, + trace_smb3_reconnect_detected(server->current_mid, server->conn_id, server->hostname, scredits, credits->value - new_val, in_flight); cifs_server_dbg(VFS, "R=%x[%x] trying to return %d credits to old session\n", @@ -358,7 +358,7 @@ smb2_adjust_credits(struct TCP_Server_Info *server, spin_unlock(&server->req_lock); wake_up(&server->request_q); - trace_smb3_adj_credits(server->CurrentMid, + trace_smb3_adj_credits(server->current_mid, server->conn_id, server->hostname, scredits, credits->value - new_val, in_flight); cifs_dbg(FYI, "%s: adjust added %u credits total=%d\n", @@ -374,19 +374,19 @@ smb2_get_next_mid(struct TCP_Server_Info *server) { __u64 mid; /* for SMB2 we need the current value */ - spin_lock(&server->mid_queue_lock); - mid = server->CurrentMid++; - spin_unlock(&server->mid_queue_lock); + spin_lock(&server->mid_counter_lock); + mid = server->current_mid++; + spin_unlock(&server->mid_counter_lock); return mid; } static void smb2_revert_current_mid(struct TCP_Server_Info *server, const unsigned int val) { - spin_lock(&server->mid_queue_lock); - if (server->CurrentMid >= val) - server->CurrentMid -= val; - spin_unlock(&server->mid_queue_lock); + spin_lock(&server->mid_counter_lock); + if (server->current_mid >= val) + server->current_mid -= val; + spin_unlock(&server->mid_counter_lock); } static struct mid_q_entry * @@ -460,9 +460,9 @@ smb2_negotiate(const unsigned int xid, { int rc; - spin_lock(&server->mid_queue_lock); - server->CurrentMid = 0; - spin_unlock(&server->mid_queue_lock); + spin_lock(&server->mid_counter_lock); + server->current_mid = 0; + spin_unlock(&server->mid_counter_lock); rc = SMB2_negotiate(xid, ses, server); return rc; } @@ -2498,7 +2498,7 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server) spin_unlock(&server->req_lock); wake_up(&server->request_q); - trace_smb3_pend_credits(server->CurrentMid, + trace_smb3_pend_credits(server->current_mid, server->conn_id, server->hostname, scredits, le16_to_cpu(shdr->CreditRequest), in_flight); cifs_dbg(FYI, "%s: status pending add %u credits total=%d\n", diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 12dc927aa4a2..8037accc3987 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -397,7 +397,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, * socket so the server throws away the partial SMB */ cifs_signal_cifsd_for_reconnect(server, false); - trace_smb3_partial_send_reconnect(server->CurrentMid, + trace_smb3_partial_send_reconnect(server->current_mid, server->conn_id, server->hostname); } smbd_done: @@ -509,7 +509,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, in_flight = server->in_flight; spin_unlock(&server->req_lock); - trace_smb3_nblk_credits(server->CurrentMid, + trace_smb3_nblk_credits(server->current_mid, server->conn_id, server->hostname, scredits, -1, in_flight); cifs_dbg(FYI, "%s: remove %u credits total=%d\n", __func__, 1, scredits); @@ -542,7 +542,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, in_flight = server->in_flight; spin_unlock(&server->req_lock); - trace_smb3_credit_timeout(server->CurrentMid, + trace_smb3_credit_timeout(server->current_mid, server->conn_id, server->hostname, scredits, num_credits, in_flight); cifs_server_dbg(VFS, "wait timed out after %d ms\n", @@ -585,7 +585,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, spin_unlock(&server->req_lock); trace_smb3_credit_timeout( - server->CurrentMid, + server->current_mid, server->conn_id, server->hostname, scredits, num_credits, in_flight); cifs_server_dbg(VFS, "wait timed out after %d ms\n", @@ -615,7 +615,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, in_flight = server->in_flight; spin_unlock(&server->req_lock); - trace_smb3_waitff_credits(server->CurrentMid, + trace_smb3_waitff_credits(server->current_mid, server->conn_id, server->hostname, scredits, -(num_credits), in_flight); cifs_dbg(FYI, "%s: remove %u credits total=%d\n", @@ -666,7 +666,7 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num, */ if (server->in_flight == 0) { spin_unlock(&server->req_lock); - trace_smb3_insufficient_credits(server->CurrentMid, + trace_smb3_insufficient_credits(server->current_mid, server->conn_id, server->hostname, scredits, num, in_flight); cifs_dbg(FYI, "%s: %d requests in flight, needed %d total=%d\n", From 3fd8ec2fc93b009e5288b123d77292b8b1b9e1e7 Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Mon, 4 Aug 2025 21:40:05 +0800 Subject: [PATCH 1571/2411] smb: client: smb: client: eliminate mid_flags field This is step 3/4 of a patch series to fix mid_q_entry memory leaks caused by race conditions in callback execution. Replace the mid_flags bitmask with dedicated boolean fields to simplify locking logic and improve code readability: - Replace MID_DELETED with bool deleted_from_q - Replace MID_WAIT_CANCELLED with bool wait_cancelled - Remove mid_flags field entirely The new boolean fields have clearer semantics: - deleted_from_q: whether mid has been removed from pending_mid_q - wait_cancelled: whether request was cancelled during wait This change reduces memory usage (from 4-byte bitmask to 2 boolean flags) and eliminates confusion about which lock protects which flag bits, preparing for per-mid locking in the next patch. Signed-off-by: Wang Zhaolong Acked-by: Enzo Matsumiya Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 9 +++------ fs/smb/client/connect.c | 10 +++++----- fs/smb/client/smb2ops.c | 4 ++-- fs/smb/client/transport.c | 12 ++++++------ 4 files changed, 16 insertions(+), 19 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index cfba226f3396..e6830ab3a546 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1730,9 +1730,10 @@ struct mid_q_entry { unsigned int resp_buf_size; int mid_state; /* wish this were enum but can not pass to wait_event */ int mid_rc; /* rc for MID_RC */ - unsigned int mid_flags; __le16 command; /* smb command code */ unsigned int optype; /* operation type */ + bool wait_cancelled:1; /* Cancelled while waiting for response */ + bool deleted_from_q:1; /* Whether Mid has been dequeued frem pending_mid_q */ bool large_buf:1; /* if valid response, is pointer to large buf */ bool multiRsp:1; /* multiple trans2 responses for one request */ bool multiEnd:1; /* both received */ @@ -1894,10 +1895,6 @@ static inline bool is_replayable_error(int error) #define MID_RESPONSE_READY 0x40 /* ready for other process handle the rsp */ #define MID_RC 0x80 /* mid_rc contains custom rc */ -/* Flags */ -#define MID_WAIT_CANCELLED 1 /* Cancelled while waiting for response */ -#define MID_DELETED 2 /* Mid has been dequeued/deleted */ - /* Types of response buffer returned from SendReceive2 */ #define CIFS_NO_BUFFER 0 /* Response buffer not returned */ #define CIFS_SMALL_BUFFER 1 @@ -2009,7 +2006,7 @@ require use of the stronger protocol */ * GlobalTotalActiveXid * TCP_Server_Info->srv_lock (anything in struct not protected by another lock and can change) * TCP_Server_Info->mid_queue_lock TCP_Server_Info->pending_mid_q cifs_get_tcp_session - * (any changes in mid_q_entry fields) + * mid_q_entry->deleted_from_q * TCP_Server_Info->mid_counter_lock TCP_Server_Info->current_mid cifs_get_tcp_session * TCP_Server_Info->req_lock TCP_Server_Info->in_flight cifs_get_tcp_session * ->credits diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 74ad5881ee45..587845a2452d 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -327,7 +327,7 @@ cifs_abort_connection(struct TCP_Server_Info *server) if (mid->mid_state == MID_REQUEST_SUBMITTED) mid->mid_state = MID_RETRY_NEEDED; list_move(&mid->qhead, &retry_list); - mid->mid_flags |= MID_DELETED; + mid->deleted_from_q = true; } spin_unlock(&server->mid_queue_lock); cifs_server_unlock(server); @@ -888,7 +888,7 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type) list_for_each_entry_safe(mid, nmid, &server->pending_mid_q, qhead) { kref_get(&mid->refcount); list_move(&mid->qhead, &dispose_list); - mid->mid_flags |= MID_DELETED; + mid->deleted_from_q = true; } spin_unlock(&server->mid_queue_lock); @@ -966,12 +966,12 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed) * Trying to handle/dequeue a mid after the send_recv() * function has finished processing it is a bug. */ - if (mid->mid_flags & MID_DELETED) { + if (mid->deleted_from_q == true) { spin_unlock(&mid->server->mid_queue_lock); pr_warn_once("trying to dequeue a deleted mid\n"); } else { list_del_init(&mid->qhead); - mid->mid_flags |= MID_DELETED; + mid->deleted_from_q = true; spin_unlock(&mid->server->mid_queue_lock); } } @@ -1108,7 +1108,7 @@ clean_demultiplex_info(struct TCP_Server_Info *server) kref_get(&mid_entry->refcount); mid_entry->mid_state = MID_SHUTDOWN; list_move(&mid_entry->qhead, &dispose_list); - mid_entry->mid_flags |= MID_DELETED; + mid_entry->deleted_from_q = true; } spin_unlock(&server->mid_queue_lock); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index ebaeb2993569..ad8947434b71 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -409,7 +409,7 @@ __smb2_find_mid(struct TCP_Server_Info *server, char *buf, bool dequeue) kref_get(&mid->refcount); if (dequeue) { list_del_init(&mid->qhead); - mid->mid_flags |= MID_DELETED; + mid->deleted_from_q = true; } spin_unlock(&server->mid_queue_lock); return mid; @@ -4817,7 +4817,7 @@ static void smb2_decrypt_offload(struct work_struct *work) } else { spin_lock(&dw->server->mid_queue_lock); mid->mid_state = MID_REQUEST_SUBMITTED; - mid->mid_flags &= ~(MID_DELETED); + mid->deleted_from_q = false; list_add_tail(&mid->qhead, &dw->server->pending_mid_q); spin_unlock(&dw->server->mid_queue_lock); diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 8037accc3987..ca9358c24ceb 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -89,7 +89,7 @@ void __release_mid(struct kref *refcount) #endif struct TCP_Server_Info *server = midEntry->server; - if (midEntry->resp_buf && (midEntry->mid_flags & MID_WAIT_CANCELLED) && + if (midEntry->resp_buf && (midEntry->wait_cancelled) && (midEntry->mid_state == MID_RESPONSE_RECEIVED || midEntry->mid_state == MID_RESPONSE_READY) && server->ops->handle_cancelled_mid) @@ -161,9 +161,9 @@ void delete_mid(struct mid_q_entry *mid) { spin_lock(&mid->server->mid_queue_lock); - if (!(mid->mid_flags & MID_DELETED)) { + if (mid->deleted_from_q == false) { list_del_init(&mid->qhead); - mid->mid_flags |= MID_DELETED; + mid->deleted_from_q = true; } spin_unlock(&mid->server->mid_queue_lock); @@ -898,9 +898,9 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) rc = mid->mid_rc; break; default: - if (!(mid->mid_flags & MID_DELETED)) { + if (mid->deleted_from_q == false) { list_del_init(&mid->qhead); - mid->mid_flags |= MID_DELETED; + mid->deleted_from_q = true; } spin_unlock(&server->mid_queue_lock); cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n", @@ -1214,7 +1214,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, midQ[i]->mid, le16_to_cpu(midQ[i]->command)); send_cancel(server, &rqst[i], midQ[i]); spin_lock(&server->mid_queue_lock); - midQ[i]->mid_flags |= MID_WAIT_CANCELLED; + midQ[i]->wait_cancelled = true; if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED || midQ[i]->mid_state == MID_RESPONSE_RECEIVED) { midQ[i]->callback = cifs_cancelled_callback; From 54473e0ef849f44e5ee43e6d6746c27030c3825b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Aug 2025 22:22:09 +0200 Subject: [PATCH 1572/2411] perf/core: Preserve AUX buffer allocation failure result A recent overhaul sets the return value to 0 unconditionally after the allocations, which causes reference count leaks and corrupts the user->vm accounting. Preserve the AUX buffer allocation failure return value, so that the subsequent code works correctly. Fixes: 0983593f32c4 ("perf/core: Lift event->mmap_mutex in perf_mmap()") Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Stoakes Cc: stable@vger.kernel.org --- kernel/events/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 22fdf0c187cd..c05262e15b7d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7115,6 +7115,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) perf_event_update_time(event); perf_event_init_userpage(event); perf_event_update_userpage(event); + ret = 0; } else { ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages, event->attr.aux_watermark, flags); @@ -7122,8 +7123,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) rb->aux_mmap_locked = extra; } - ret = 0; - unlock: if (!ret) { atomic_long_add(user_extra, &user->locked_vm); From 5468c0fbccbb9d156522c50832244a8b722374fb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 2 Aug 2025 12:39:39 +0200 Subject: [PATCH 1573/2411] perf/core: Don't leak AUX buffer refcount on allocation failure Failure of the AUX buffer allocation leaks the reference count. Set the reference count to 1 only when the allocation succeeds. Fixes: 45bfb2e50471 ("perf: Add AUX area to ring buffer for raw data streams") Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Stoakes Cc: stable@vger.kernel.org --- kernel/events/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index c05262e15b7d..e89e77228591 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7051,8 +7051,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) ret = 0; goto unlock; } - - atomic_set(&rb->aux_mmap_count, 1); } user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10); @@ -7119,8 +7117,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) } else { ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages, event->attr.aux_watermark, flags); - if (!ret) + if (!ret) { + atomic_set(&rb->aux_mmap_count, 1); rb->aux_mmap_locked = extra; + } } unlock: @@ -7130,6 +7130,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) atomic_inc(&event->mmap_count); } else if (rb) { + /* AUX allocation failed */ atomic_dec(&rb->mmap_count); } aux_unlock: From 07091aade394f690e7b655578140ef84d0e8d7b0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 2 Aug 2025 12:49:48 +0200 Subject: [PATCH 1574/2411] perf/core: Exit early on perf_mmap() fail When perf_mmap() fails to allocate a buffer, it still invokes the event_mapped() callback of the related event. On X86 this might increase the perf_rdpmc_allowed reference counter. But nothing undoes this as perf_mmap_close() is never called in this case, which causes another reference count leak. Return early on failure to prevent that. Fixes: 1e0fb9ec679c ("perf: Add pmu callbacks to track event mapping and unmapping") Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Stoakes Cc: stable@vger.kernel.org --- kernel/events/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index e89e77228591..a2e3591175c6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7138,6 +7138,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) mutex_unlock(aux_mutex); mutex_unlock(&event->mmap_mutex); + if (ret) + return ret; + /* * Since pinned accounting is per vm we cannot allow fork() to copy our * vma. @@ -7145,8 +7148,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP); vma->vm_ops = &perf_mmap_vmops; - if (!ret) - ret = map_range(rb, vma); + ret = map_range(rb, vma); mapped = get_mapped(event, event_mapped); if (mapped) From f74b9f4ba63ffdf597aaaa6cad7e284cb8e04820 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 2 Aug 2025 12:48:55 +0200 Subject: [PATCH 1575/2411] perf/core: Handle buffer mapping fail correctly in perf_mmap() After successful allocation of a buffer or a successful attachment to an existing buffer perf_mmap() tries to map the buffer read only into the page table. If that fails, the already set up page table entries are zapped, but the other perf specific side effects of that failure are not handled. The calling code just cleans up the VMA and does not invoke perf_mmap_close(). This leaks reference counts, corrupts user->vm accounting and also results in an unbalanced invocation of event::event_mapped(). Cure this by moving the event::event_mapped() invocation before the map_range() call so that on map_range() failure perf_mmap_close() can be invoked without causing an unbalanced event::event_unmapped() call. perf_mmap_close() undoes the reference counts and eventually frees buffers. Fixes: b709eb872e19 ("perf: map pages in advance") Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Stoakes Cc: stable@vger.kernel.org --- kernel/events/core.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index a2e3591175c6..4563bd864bbc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7148,12 +7148,20 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP); vma->vm_ops = &perf_mmap_vmops; - ret = map_range(rb, vma); - mapped = get_mapped(event, event_mapped); if (mapped) mapped(event, vma->vm_mm); + /* + * Try to map it into the page table. On fail, invoke + * perf_mmap_close() to undo the above, as the callsite expects + * full cleanup in this case and therefore does not invoke + * vmops::close(). + */ + ret = map_range(rb, vma); + if (ret) + perf_mmap_close(vma); + return ret; } From b024d7b56c77191cde544f838debb7f8451cd0d6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jul 2025 23:01:21 +0200 Subject: [PATCH 1576/2411] perf/core: Prevent VMA split of buffer mappings The perf mmap code is careful about mmap()'ing the user page with the ringbuffer and additionally the auxiliary buffer, when the event supports it. Once the first mapping is established, subsequent mapping have to use the same offset and the same size in both cases. The reference counting for the ringbuffer and the auxiliary buffer depends on this being correct. Though perf does not prevent that a related mapping is split via mmap(2), munmap(2) or mremap(2). A split of a VMA results in perf_mmap_open() calls, which take reference counts, but then the subsequent perf_mmap_close() calls are not longer fulfilling the offset and size checks. This leads to reference count leaks. As perf already has the requirement for subsequent mappings to match the initial mapping, the obvious consequence is that VMA splits, caused by resizing of a mapping or partial unmapping, have to be prevented. Implement the vm_operations_struct::may_split() callback and return unconditionally -EINVAL. That ensures that the mapping offsets and sizes cannot be changed after the fact. Remapping to a different fixed address with the same size is still possible as it takes the references for the new mapping and drops those of the old mapping. Fixes: 45bfb2e50471 ("perf: Add AUX area to ring buffer for raw data streams") Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-27504 Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Stoakes Acked-by: Arnaldo Carvalho de Melo Acked-by: Vlastimil Babka Cc: stable@vger.kernel.org --- kernel/events/core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4563bd864bbc..8060c2857bb2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6842,10 +6842,20 @@ static vm_fault_t perf_mmap_pfn_mkwrite(struct vm_fault *vmf) return vmf->pgoff == 0 ? 0 : VM_FAULT_SIGBUS; } +static int perf_mmap_may_split(struct vm_area_struct *vma, unsigned long addr) +{ + /* + * Forbid splitting perf mappings to prevent refcount leaks due to + * the resulting non-matching offsets and sizes. See open()/close(). + */ + return -EINVAL; +} + static const struct vm_operations_struct perf_mmap_vmops = { .open = perf_mmap_open, .close = perf_mmap_close, /* non mergeable */ .pfn_mkwrite = perf_mmap_pfn_mkwrite, + .may_split = perf_mmap_may_split, }; static int map_range(struct perf_buffer *rb, struct vm_area_struct *vma) From 084d2ac4030c5919e85bba1f4af26e33491469cb Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sat, 2 Aug 2025 22:55:35 +0200 Subject: [PATCH 1577/2411] selftests/perf_events: Add a mmap() correctness test Exercise various mmap(), munmap() and mremap() invocations, which might cause a perf buffer mapping to be split or truncated. To avoid hard coding the perf event and having dependencies on architectures and configuration options, scan through event types in sysfs and try to open them. On success, try to mmap() and if that succeeds try to mmap() the AUX buffer. In case that no AUX buffer supporting event is found, only test the base buffer mapping. If no mappable event is found or permissions are not sufficient, skip the tests. Reserve a PROT_NONE region for both rb and aux tests to allow testing the case where mremap unmaps beyond the end of a mapped VMA to prevent it from unmapping unrelated mappings. Signed-off-by: Lorenzo Stoakes Co-developed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Stoakes --- .../testing/selftests/perf_events/.gitignore | 1 + tools/testing/selftests/perf_events/Makefile | 2 +- tools/testing/selftests/perf_events/mmap.c | 236 ++++++++++++++++++ 3 files changed, 238 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/perf_events/mmap.c diff --git a/tools/testing/selftests/perf_events/.gitignore b/tools/testing/selftests/perf_events/.gitignore index ee93dc4969b8..4931b3b6bbd3 100644 --- a/tools/testing/selftests/perf_events/.gitignore +++ b/tools/testing/selftests/perf_events/.gitignore @@ -2,3 +2,4 @@ sigtrap_threads remove_on_exec watermark_signal +mmap diff --git a/tools/testing/selftests/perf_events/Makefile b/tools/testing/selftests/perf_events/Makefile index 70e3ff211278..2e5d85770dfe 100644 --- a/tools/testing/selftests/perf_events/Makefile +++ b/tools/testing/selftests/perf_events/Makefile @@ -2,5 +2,5 @@ CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES) LDFLAGS += -lpthread -TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal +TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal mmap include ../lib.mk diff --git a/tools/testing/selftests/perf_events/mmap.c b/tools/testing/selftests/perf_events/mmap.c new file mode 100644 index 000000000000..ea0427aac1f9 --- /dev/null +++ b/tools/testing/selftests/perf_events/mmap.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define _GNU_SOURCE + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "../kselftest_harness.h" + +#define RB_SIZE 0x3000 +#define AUX_SIZE 0x10000 +#define AUX_OFFS 0x4000 + +#define HOLE_SIZE 0x1000 + +/* Reserve space for rb, aux with space for shrink-beyond-vma testing. */ +#define REGION_SIZE (2 * RB_SIZE + 2 * AUX_SIZE) +#define REGION_AUX_OFFS (2 * RB_SIZE) + +#define MAP_BASE 1 +#define MAP_AUX 2 + +#define EVENT_SRC_DIR "/sys/bus/event_source/devices" + +FIXTURE(perf_mmap) +{ + int fd; + void *ptr; + void *region; +}; + +FIXTURE_VARIANT(perf_mmap) +{ + bool aux; + unsigned long ptr_size; +}; + +FIXTURE_VARIANT_ADD(perf_mmap, rb) +{ + .aux = false, + .ptr_size = RB_SIZE, +}; + +FIXTURE_VARIANT_ADD(perf_mmap, aux) +{ + .aux = true, + .ptr_size = AUX_SIZE, +}; + +static bool read_event_type(struct dirent *dent, __u32 *type) +{ + char typefn[512]; + FILE *fp; + int res; + + snprintf(typefn, sizeof(typefn), "%s/%s/type", EVENT_SRC_DIR, dent->d_name); + fp = fopen(typefn, "r"); + if (!fp) + return false; + + res = fscanf(fp, "%u", type); + fclose(fp); + return res > 0; +} + +FIXTURE_SETUP(perf_mmap) +{ + struct perf_event_attr attr = { + .size = sizeof(attr), + .disabled = 1, + .exclude_kernel = 1, + .exclude_hv = 1, + }; + struct perf_event_attr attr_ok = {}; + unsigned int eacces = 0, map = 0; + struct perf_event_mmap_page *rb; + struct dirent *dent; + void *aux, *region; + DIR *dir; + + self->ptr = NULL; + + dir = opendir(EVENT_SRC_DIR); + if (!dir) + SKIP(return, "perf not available."); + + region = mmap(NULL, REGION_SIZE, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(region, MAP_FAILED); + self->region = region; + + // Try to find a suitable event on this system + while ((dent = readdir(dir))) { + int fd; + + if (!read_event_type(dent, &attr.type)) + continue; + + fd = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0); + if (fd < 0) { + if (errno == EACCES) + eacces++; + continue; + } + + // Check whether the event supports mmap() + rb = mmap(region, RB_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0); + if (rb == MAP_FAILED) { + close(fd); + continue; + } + + if (!map) { + // Save the event in case that no AUX capable event is found + attr_ok = attr; + map = MAP_BASE; + } + + if (!variant->aux) + continue; + + rb->aux_offset = AUX_OFFS; + rb->aux_size = AUX_SIZE; + + // Check whether it supports a AUX buffer + aux = mmap(region + REGION_AUX_OFFS, AUX_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, fd, AUX_OFFS); + if (aux == MAP_FAILED) { + munmap(rb, RB_SIZE); + close(fd); + continue; + } + + attr_ok = attr; + map = MAP_AUX; + munmap(aux, AUX_SIZE); + munmap(rb, RB_SIZE); + close(fd); + break; + } + closedir(dir); + + if (!map) { + if (!eacces) + SKIP(return, "No mappable perf event found."); + else + SKIP(return, "No permissions for perf_event_open()"); + } + + self->fd = syscall(SYS_perf_event_open, &attr_ok, 0, -1, -1, 0); + ASSERT_NE(self->fd, -1); + + rb = mmap(region, RB_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, self->fd, 0); + ASSERT_NE(rb, MAP_FAILED); + + if (!variant->aux) { + self->ptr = rb; + return; + } + + if (map != MAP_AUX) + SKIP(return, "No AUX event found."); + + rb->aux_offset = AUX_OFFS; + rb->aux_size = AUX_SIZE; + aux = mmap(region + REGION_AUX_OFFS, AUX_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, self->fd, AUX_OFFS); + ASSERT_NE(aux, MAP_FAILED); + self->ptr = aux; +} + +FIXTURE_TEARDOWN(perf_mmap) +{ + ASSERT_EQ(munmap(self->region, REGION_SIZE), 0); + if (self->fd != -1) + ASSERT_EQ(close(self->fd), 0); +} + +TEST_F(perf_mmap, remap) +{ + void *tmp, *ptr = self->ptr; + unsigned long size = variant->ptr_size; + + // Test the invalid remaps + ASSERT_EQ(mremap(ptr, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED); + ASSERT_EQ(mremap(ptr + HOLE_SIZE, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED); + ASSERT_EQ(mremap(ptr + size - HOLE_SIZE, HOLE_SIZE, size, MREMAP_MAYMOVE), MAP_FAILED); + // Shrink the end of the mapping such that we only unmap past end of the VMA, + // which should succeed and poke a hole into the PROT_NONE region + ASSERT_NE(mremap(ptr + size - HOLE_SIZE, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED); + + // Remap the whole buffer to a new address + tmp = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(tmp, MAP_FAILED); + + // Try splitting offset 1 hole size into VMA, this should fail + ASSERT_EQ(mremap(ptr + HOLE_SIZE, size - HOLE_SIZE, size - HOLE_SIZE, + MREMAP_MAYMOVE | MREMAP_FIXED, tmp), MAP_FAILED); + // Remapping the whole thing should succeed fine + ptr = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tmp); + ASSERT_EQ(ptr, tmp); + ASSERT_EQ(munmap(tmp, size), 0); +} + +TEST_F(perf_mmap, unmap) +{ + unsigned long size = variant->ptr_size; + + // Try to poke holes into the mappings + ASSERT_NE(munmap(self->ptr, HOLE_SIZE), 0); + ASSERT_NE(munmap(self->ptr + HOLE_SIZE, HOLE_SIZE), 0); + ASSERT_NE(munmap(self->ptr + size - HOLE_SIZE, HOLE_SIZE), 0); +} + +TEST_F(perf_mmap, map) +{ + unsigned long size = variant->ptr_size; + + // Try to poke holes into the mappings by mapping anonymous memory over it + ASSERT_EQ(mmap(self->ptr, HOLE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED); + ASSERT_EQ(mmap(self->ptr + HOLE_SIZE, HOLE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED); + ASSERT_EQ(mmap(self->ptr + size - HOLE_SIZE, HOLE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED); +} + +TEST_HARNESS_MAIN From 475356fe2814f2f0b188da8bf0f1fcc579d81272 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 28 Jul 2025 22:11:54 +0200 Subject: [PATCH 1578/2411] kasan/test: fix protection against compiler elision The kunit test is using assignments to "static volatile void *kasan_ptr_result" to prevent elision of memory loads, but that's not working: In this variable definition, the "volatile" applies to the "void", not to the pointer. To make "volatile" apply to the pointer as intended, it must follow after the "*". This makes the kasan_memchr test pass again on my system. The kasan_strings test is still failing because all the definitions of load_unaligned_zeropad() are lacking explicit instrumentation hooks and ASAN does not instrument asm() memory operands. Link: https://lkml.kernel.org/r/20250728-kasan-kunit-fix-volatile-v1-1-e7157c9af82d@google.com Fixes: 5f1c8108e7ad ("mm:kasan: fix sparse warnings: Should it be static?") Signed-off-by: Jann Horn Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitriy Vyukov Cc: Jann Horn Cc: Nihar Chaithanya Cc: Vincenzo Frascino Cc: Signed-off-by: Andrew Morton --- mm/kasan/kasan_test_c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kasan/kasan_test_c.c b/mm/kasan/kasan_test_c.c index 2aa12dfa427a..e0968acc03aa 100644 --- a/mm/kasan/kasan_test_c.c +++ b/mm/kasan/kasan_test_c.c @@ -47,7 +47,7 @@ static struct { * Some tests use these global variables to store return values from function * calls that could otherwise be eliminated by the compiler as dead code. */ -static volatile void *kasan_ptr_result; +static void *volatile kasan_ptr_result; static volatile int kasan_int_result; /* Probe for console output: obtains test_status lines of interest. */ From 47b0f6d8f0d2be4d311a49e13d2fd5f152f492b2 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 31 Jul 2025 02:57:18 -0700 Subject: [PATCH 1579/2411] mm/kmemleak: avoid deadlock by moving pr_warn() outside kmemleak_lock When netpoll is enabled, calling pr_warn_once() while holding kmemleak_lock in mem_pool_alloc() can cause a deadlock due to lock inversion with the netconsole subsystem. This occurs because pr_warn_once() may trigger netpoll, which eventually leads to __alloc_skb() and back into kmemleak code, attempting to reacquire kmemleak_lock. This is the path for the deadlock. mem_pool_alloc() -> raw_spin_lock_irqsave(&kmemleak_lock, flags); -> pr_warn_once() -> netconsole subsystem -> netpoll -> __alloc_skb -> __create_object -> raw_spin_lock_irqsave(&kmemleak_lock, flags); Fix this by setting a flag and issuing the pr_warn_once() after kmemleak_lock is released. Link: https://lkml.kernel.org/r/20250731-kmemleak_lock-v1-1-728fd470198f@debian.org Fixes: c5665868183f ("mm: kmemleak: use the memory pool for early allocations") Signed-off-by: Breno Leitao Reported-by: Jakub Kicinski Acked-by: Catalin Marinas Cc: Signed-off-by: Andrew Morton --- mm/kmemleak.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 8d588e685311..e0333455c738 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -470,6 +470,7 @@ static struct kmemleak_object *mem_pool_alloc(gfp_t gfp) { unsigned long flags; struct kmemleak_object *object; + bool warn = false; /* try the slab allocator first */ if (object_cache) { @@ -488,8 +489,10 @@ static struct kmemleak_object *mem_pool_alloc(gfp_t gfp) else if (mem_pool_free_count) object = &mem_pool[--mem_pool_free_count]; else - pr_warn_once("Memory pool empty, consider increasing CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE\n"); + warn = true; raw_spin_unlock_irqrestore(&kmemleak_lock, flags); + if (warn) + pr_warn_once("Memory pool empty, consider increasing CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE\n"); return object; } From 5a309dbf1f829de7f8dc84a518d0b6e7e9be9994 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 30 Jul 2025 23:25:08 +0900 Subject: [PATCH 1580/2411] MAINTAINERS: add Masami as a reviewer of hung task detector Since I'm actively working on hung task blocker detector, add myself to a reviewer of the HUNG TASK DETECTOR feature. Link: https://lkml.kernel.org/r/175388550841.627474.3260499035226455392.stgit@devnote2 Signed-off-by: Masami Hiramatsu (Google) Acked-by: Lance Yang Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1b57dd4fcf01..3f957983c192 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11438,6 +11438,7 @@ F: drivers/tty/hvc/ HUNG TASK DETECTOR M: Andrew Morton R: Lance Yang +R: Masami Hiramatsu L: linux-kernel@vger.kernel.org S: Maintained F: include/linux/hung_task.h From d1534ae23c2b6be350c8ab060803fbf6e9682adc Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 28 Jul 2025 15:02:48 -0400 Subject: [PATCH 1581/2411] mm/kmemleak: avoid soft lockup in __kmemleak_do_cleanup() A soft lockup warning was observed on a relative small system x86-64 system with 16 GB of memory when running a debug kernel with kmemleak enabled. watchdog: BUG: soft lockup - CPU#8 stuck for 33s! [kworker/8:1:134] The test system was running a workload with hot unplug happening in parallel. Then kemleak decided to disable itself due to its inability to allocate more kmemleak objects. The debug kernel has its CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE set to 40,000. The soft lockup happened in kmemleak_do_cleanup() when the existing kmemleak objects were being removed and deleted one-by-one in a loop via a workqueue. In this particular case, there are at least 40,000 objects that need to be processed and given the slowness of a debug kernel and the fact that a raw_spinlock has to be acquired and released in __delete_object(), it could take a while to properly handle all these objects. As kmemleak has been disabled in this case, the object removal and deletion process can be further optimized as locking isn't really needed. However, it is probably not worth the effort to optimize for such an edge case that should rarely happen. So the simple solution is to call cond_resched() at periodic interval in the iteration loop to avoid soft lockup. Link: https://lkml.kernel.org/r/20250728190248.605750-1-longman@redhat.com Signed-off-by: Waiman Long Acked-by: Catalin Marinas Cc: Signed-off-by: Andrew Morton --- mm/kmemleak.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e0333455c738..84265983f239 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -2184,6 +2184,7 @@ static const struct file_operations kmemleak_fops = { static void __kmemleak_do_cleanup(void) { struct kmemleak_object *object, *tmp; + unsigned int cnt = 0; /* * Kmemleak has already been disabled, no need for RCU list traversal @@ -2192,6 +2193,10 @@ static void __kmemleak_do_cleanup(void) list_for_each_entry_safe(object, tmp, &object_list, object_list) { __remove_object(object); __delete_object(object); + + /* Call cond_resched() once per 64 iterations to avoid soft lockup */ + if (!(++cnt & 0x3f)) + cond_resched(); } } From 366a4532d96fc357998465133db34d34edb79e4c Mon Sep 17 00:00:00 2001 From: Barry Song Date: Tue, 5 Aug 2025 11:54:47 +0800 Subject: [PATCH 1582/2411] mm: fix the race between collapse and PT_RECLAIM under per-vma lock The check_pmd_still_valid() call during collapse is currently only protected by the mmap_lock in write mode, which was sufficient when pt_reclaim always ran under mmap_lock in read mode. However, since madvise_dontneed can now execute under a per-VMA lock, this assumption is no longer valid. As a result, a race condition can occur between collapse and PT_RECLAIM, potentially leading to a kernel panic. [ 38.151897] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] SMP KASI [ 38.153519] KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f] [ 38.154605] CPU: 0 UID: 0 PID: 721 Comm: repro Not tainted 6.16.0-next-20250801-next-2025080 #1 PREEMPT(voluntary) [ 38.155929] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org4 [ 38.157418] RIP: 0010:kasan_byte_accessible+0x15/0x30 [ 38.158125] Code: 03 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00 48 b8 00 00 00 00 00 fc0 [ 38.160461] RSP: 0018:ffff88800feef678 EFLAGS: 00010286 [ 38.161220] RAX: dffffc0000000000 RBX: 0000000000000001 RCX: 1ffffffff0dde60c [ 38.162232] RDX: 0000000000000000 RSI: ffffffff85da1e18 RDI: dffffc0000000003 [ 38.163176] RBP: ffff88800feef698 R08: 0000000000000001 R09: 0000000000000000 [ 38.164195] R10: 0000000000000000 R11: ffff888016a8ba58 R12: 0000000000000018 [ 38.165189] R13: 0000000000000018 R14: ffffffff85da1e18 R15: 0000000000000000 [ 38.166100] FS: 0000000000000000(0000) GS:ffff8880e3b40000(0000) knlGS:0000000000000000 [ 38.167137] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 38.167891] CR2: 00007f97fadfe504 CR3: 0000000007088005 CR4: 0000000000770ef0 [ 38.168812] PKRU: 55555554 [ 38.169275] Call Trace: [ 38.169647] [ 38.169975] ? __kasan_check_byte+0x19/0x50 [ 38.170581] lock_acquire+0xea/0x310 [ 38.171083] ? rcu_is_watching+0x19/0xc0 [ 38.171615] ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20 [ 38.172343] ? __sanitizer_cov_trace_const_cmp8+0x1c/0x30 [ 38.173130] _raw_spin_lock+0x38/0x50 [ 38.173707] ? __pte_offset_map_lock+0x1a2/0x3c0 [ 38.174390] __pte_offset_map_lock+0x1a2/0x3c0 [ 38.174987] ? __pfx___pte_offset_map_lock+0x10/0x10 [ 38.175724] ? __pfx_pud_val+0x10/0x10 [ 38.176308] ? __sanitizer_cov_trace_const_cmp1+0x1e/0x30 [ 38.177183] unmap_page_range+0xb60/0x43e0 [ 38.177824] ? __pfx_unmap_page_range+0x10/0x10 [ 38.178485] ? mas_next_slot+0x133a/0x1a50 [ 38.179079] unmap_single_vma.constprop.0+0x15b/0x250 [ 38.179830] unmap_vmas+0x1fa/0x460 [ 38.180373] ? __pfx_unmap_vmas+0x10/0x10 [ 38.180994] ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20 [ 38.181877] exit_mmap+0x1a2/0xb40 [ 38.182396] ? lock_release+0x14f/0x2c0 [ 38.182929] ? __pfx_exit_mmap+0x10/0x10 [ 38.183474] ? __pfx___mutex_unlock_slowpath+0x10/0x10 [ 38.184188] ? mutex_unlock+0x16/0x20 [ 38.184704] mmput+0x132/0x370 [ 38.185208] do_exit+0x7e7/0x28c0 [ 38.185682] ? __this_cpu_preempt_check+0x21/0x30 [ 38.186328] ? do_group_exit+0x1d8/0x2c0 [ 38.186873] ? __pfx_do_exit+0x10/0x10 [ 38.187401] ? __this_cpu_preempt_check+0x21/0x30 [ 38.188036] ? _raw_spin_unlock_irq+0x2c/0x60 [ 38.188634] ? lockdep_hardirqs_on+0x89/0x110 [ 38.189313] do_group_exit+0xe4/0x2c0 [ 38.189831] __x64_sys_exit_group+0x4d/0x60 [ 38.190413] x64_sys_call+0x2174/0x2180 [ 38.190935] do_syscall_64+0x6d/0x2e0 [ 38.191449] entry_SYSCALL_64_after_hwframe+0x76/0x7e This patch moves the vma_start_write() call to precede check_pmd_still_valid(), ensuring that the check is also properly protected by the per-VMA lock. Link: https://lkml.kernel.org/r/20250805035447.7958-1-21cnbao@gmail.com Fixes: a6fde7add78d ("mm: use per_vma lock for MADV_DONTNEED") Signed-off-by: Barry Song Tested-by: "Lai, Yi" Reported-by: "Lai, Yi" Closes: https://lore.kernel.org/all/aJAFrYfyzGpbm+0m@ly-workstation/ Reviewed-by: Lorenzo Stoakes Cc: David Hildenbrand Cc: Lorenzo Stoakes Cc: Qi Zheng Cc: Vlastimil Babka Cc: Jann Horn Cc: Suren Baghdasaryan Cc: Lokesh Gidra Cc: Tangquan Zheng Cc: Lance Yang Cc: Zi Yan Cc: Baolin Wang Cc: Liam R. Howlett Cc: Nico Pache Cc: Ryan Roberts Cc: Dev Jain Signed-off-by: Andrew Morton --- mm/khugepaged.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 374a6a5193a7..6b40bdfd224c 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1172,11 +1172,11 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, if (result != SCAN_SUCCEED) goto out_up_write; /* check if the pmd is still valid */ + vma_start_write(vma); result = check_pmd_still_valid(mm, address, pmd); if (result != SCAN_SUCCEED) goto out_up_write; - vma_start_write(vma); anon_vma_lock_write(vma->anon_vma); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, address, From 45d19b4b6c2d422771c29b83462d84afcbb33f01 Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Thu, 24 Jul 2025 17:09:56 +0800 Subject: [PATCH 1583/2411] mm/smaps: fix race between smaps_hugetlb_range and migration smaps_hugetlb_range() handles the pte without holdling ptl, and may be concurrenct with migration, leaing to BUG_ON in pfn_swap_entry_to_page(). The race is as follows. smaps_hugetlb_range migrate_pages huge_ptep_get remove_migration_ptes folio_unlock pfn_swap_entry_folio BUG_ON To fix it, hold ptl lock in smaps_hugetlb_range(). Link: https://lkml.kernel.org/r/20250724090958.455887-1-tujinjiang@huawei.com Link: https://lkml.kernel.org/r/20250724090958.455887-2-tujinjiang@huawei.com Fixes: 25ee01a2fca0 ("mm: hugetlb: proc: add hugetlb-related fields to /proc/PID/smaps") Signed-off-by: Jinjiang Tu Acked-by: David Hildenbrand Cc: Andrei Vagin Cc: Andrii Nakryiko Cc: Baolin Wang Cc: Brahmajit Das Cc: Catalin Marinas Cc: Christophe Leroy Cc: David Rientjes Cc: Dev Jain Cc: Hugh Dickins Cc: Joern Engel Cc: Kefeng Wang Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Ryan Roberts Cc: Thiago Jung Bauermann Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3d6d8a9f13fc..55bab10bc779 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1148,10 +1148,13 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; - pte_t ptent = huge_ptep_get(walk->mm, addr, pte); struct folio *folio = NULL; bool present = false; + spinlock_t *ptl; + pte_t ptent; + ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte); + ptent = huge_ptep_get(walk->mm, addr, pte); if (pte_present(ptent)) { folio = page_folio(pte_page(ptent)); present = true; @@ -1170,6 +1173,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, else mss->private_hugetlb += huge_page_size(hstate_vma(vma)); } + spin_unlock(ptl); return 0; } #else From aa5a10b070690225317ed4d85413d144abfff750 Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Thu, 24 Jul 2025 17:09:57 +0800 Subject: [PATCH 1584/2411] fs/proc/task_mmu: hold PTL in pagemap_hugetlb_range and gather_hugetlb_stats Hold PTL in pagemap_hugetlb_range() and gather_hugetlb_stats() to avoid operating on stale page, as pagemap_pmd_range() and gather_pte_stats() have done. Link: https://lkml.kernel.org/r/20250724090958.455887-3-tujinjiang@huawei.com Signed-off-by: Jinjiang Tu Acked-by: David Hildenbrand Cc: Andrei Vagin Cc: Andrii Nakryiko Cc: Baolin Wang Cc: Brahmajit Das Cc: Catalin Marinas Cc: Christophe Leroy Cc: David Rientjes Cc: Dev Jain Cc: Hugh Dickins Cc: Joern Engel Cc: Kefeng Wang Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Ryan Roberts Cc: Thiago Jung Bauermann Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 55bab10bc779..ee1e4ccd33bd 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -2021,12 +2021,14 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, struct pagemapread *pm = walk->private; struct vm_area_struct *vma = walk->vma; u64 flags = 0, frame = 0; + spinlock_t *ptl; int err = 0; pte_t pte; if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; + ptl = huge_pte_lock(hstate_vma(vma), walk->mm, ptep); pte = huge_ptep_get(walk->mm, addr, ptep); if (pte_present(pte)) { struct folio *folio = page_folio(pte_page(pte)); @@ -2054,11 +2056,12 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, err = add_to_pagemap(&pme, pm); if (err) - return err; + break; if (pm->show_pfn && (flags & PM_PRESENT)) frame++; } + spin_unlock(ptl); cond_resched(); return err; @@ -3132,17 +3135,22 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { - pte_t huge_pte = huge_ptep_get(walk->mm, addr, pte); + pte_t huge_pte; struct numa_maps *md; struct page *page; + spinlock_t *ptl; + ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte); + huge_pte = huge_ptep_get(walk->mm, addr, pte); if (!pte_present(huge_pte)) - return 0; + goto out; page = pte_page(huge_pte); md = walk->private; gather_stats(page, md, pte_dirty(huge_pte), 1); +out: + spin_unlock(ptl); return 0; } From 86624ba3b522b6512def25534341da93356c8da4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 14 Jul 2025 13:08:25 -0300 Subject: [PATCH 1585/2411] vfio/pci: Do vf_token checks for VFIO_DEVICE_BIND_IOMMUFD This was missed during the initial implementation. The VFIO PCI encodes the vf_token inside the device name when opening the device from the group FD, something like: "0000:04:10.0 vf_token=bd8d9d2b-5a5f-4f5a-a211-f591514ba1f3" This is used to control access to a VF unless there is co-ordination with the owner of the PF. Since we no longer have a device name in the cdev path, pass the token directly through VFIO_DEVICE_BIND_IOMMUFD using an optional field indicated by VFIO_DEVICE_BIND_FLAG_TOKEN. Fixes: 5fcc26969a16 ("vfio: Add VFIO_DEVICE_BIND_IOMMUFD") Tested-by: Shameer Kolothum Reviewed-by: Yi Liu Signed-off-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/0-v3-bdd8716e85fe+3978a-vfio_token_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/device_cdev.c | 38 +++++++++++++++++-- .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 1 + drivers/vfio/pci/mlx5/main.c | 1 + drivers/vfio/pci/nvgrace-gpu/main.c | 2 + drivers/vfio/pci/pds/vfio_dev.c | 1 + drivers/vfio/pci/qat/main.c | 1 + drivers/vfio/pci/vfio_pci.c | 1 + drivers/vfio/pci/vfio_pci_core.c | 22 +++++++---- drivers/vfio/pci/virtio/main.c | 3 ++ include/linux/vfio.h | 4 ++ include/linux/vfio_pci_core.h | 2 + include/uapi/linux/vfio.h | 12 +++++- 12 files changed, 76 insertions(+), 12 deletions(-) diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c index 281a8dc3ed49..480cac3a0c27 100644 --- a/drivers/vfio/device_cdev.c +++ b/drivers/vfio/device_cdev.c @@ -60,22 +60,50 @@ static void vfio_df_get_kvm_safe(struct vfio_device_file *df) spin_unlock(&df->kvm_ref_lock); } +static int vfio_df_check_token(struct vfio_device *device, + const struct vfio_device_bind_iommufd *bind) +{ + uuid_t uuid; + + if (!device->ops->match_token_uuid) { + if (bind->flags & VFIO_DEVICE_BIND_FLAG_TOKEN) + return -EINVAL; + return 0; + } + + if (!(bind->flags & VFIO_DEVICE_BIND_FLAG_TOKEN)) + return device->ops->match_token_uuid(device, NULL); + + if (copy_from_user(&uuid, u64_to_user_ptr(bind->token_uuid_ptr), + sizeof(uuid))) + return -EFAULT; + return device->ops->match_token_uuid(device, &uuid); +} + long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df, struct vfio_device_bind_iommufd __user *arg) { + const u32 VALID_FLAGS = VFIO_DEVICE_BIND_FLAG_TOKEN; struct vfio_device *device = df->device; struct vfio_device_bind_iommufd bind; unsigned long minsz; + u32 user_size; int ret; static_assert(__same_type(arg->out_devid, df->devid)); minsz = offsetofend(struct vfio_device_bind_iommufd, out_devid); - if (copy_from_user(&bind, arg, minsz)) - return -EFAULT; + ret = get_user(user_size, &arg->argsz); + if (ret) + return ret; + if (user_size < minsz) + return -EINVAL; + ret = copy_struct_from_user(&bind, minsz, arg, user_size); + if (ret) + return ret; - if (bind.argsz < minsz || bind.flags || bind.iommufd < 0) + if (bind.iommufd < 0 || bind.flags & ~VALID_FLAGS) return -EINVAL; /* BIND_IOMMUFD only allowed for cdev fds */ @@ -93,6 +121,10 @@ long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df, goto out_unlock; } + ret = vfio_df_check_token(device, &bind); + if (ret) + goto out_unlock; + df->iommufd = iommufd_ctx_from_fd(bind.iommufd); if (IS_ERR(df->iommufd)) { ret = PTR_ERR(df->iommufd); diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 2149f49aeec7..397f5e445136 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -1583,6 +1583,7 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 93f894fe60d2..7ec47e736a8e 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -1372,6 +1372,7 @@ static const struct vfio_device_ops mlx5vf_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c index e5ac39c4cc6b..d95761dcdd58 100644 --- a/drivers/vfio/pci/nvgrace-gpu/main.c +++ b/drivers/vfio/pci/nvgrace-gpu/main.c @@ -696,6 +696,7 @@ static const struct vfio_device_ops nvgrace_gpu_pci_ops = { .mmap = nvgrace_gpu_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, @@ -715,6 +716,7 @@ static const struct vfio_device_ops nvgrace_gpu_pci_core_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, diff --git a/drivers/vfio/pci/pds/vfio_dev.c b/drivers/vfio/pci/pds/vfio_dev.c index f6e0253a8a14..f3ccb0008f67 100644 --- a/drivers/vfio/pci/pds/vfio_dev.c +++ b/drivers/vfio/pci/pds/vfio_dev.c @@ -201,6 +201,7 @@ static const struct vfio_device_ops pds_vfio_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, diff --git a/drivers/vfio/pci/qat/main.c b/drivers/vfio/pci/qat/main.c index 845ed15b6771..5cce6b0b8d2f 100644 --- a/drivers/vfio/pci/qat/main.c +++ b/drivers/vfio/pci/qat/main.c @@ -614,6 +614,7 @@ static const struct vfio_device_ops qat_vf_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 5ba39f7623bb..ac10f14417f2 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -138,6 +138,7 @@ static const struct vfio_device_ops vfio_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 261a6dc5a5fc..fad410cf91bc 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1821,9 +1821,13 @@ void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count) } EXPORT_SYMBOL_GPL(vfio_pci_core_request); -static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, - bool vf_token, uuid_t *uuid) +int vfio_pci_core_match_token_uuid(struct vfio_device *core_vdev, + const uuid_t *uuid) + { + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); + /* * There's always some degree of trust or collaboration between SR-IOV * PF and VFs, even if just that the PF hosts the SR-IOV capability and @@ -1854,7 +1858,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, bool match; if (!pf_vdev) { - if (!vf_token) + if (!uuid) return 0; /* PF is not vfio-pci, no VF token */ pci_info_ratelimited(vdev->pdev, @@ -1862,7 +1866,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, return -EINVAL; } - if (!vf_token) { + if (!uuid) { pci_info_ratelimited(vdev->pdev, "VF token required to access device\n"); return -EACCES; @@ -1880,7 +1884,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, } else if (vdev->vf_token) { mutex_lock(&vdev->vf_token->lock); if (vdev->vf_token->users) { - if (!vf_token) { + if (!uuid) { mutex_unlock(&vdev->vf_token->lock); pci_info_ratelimited(vdev->pdev, "VF token required to access device\n"); @@ -1893,12 +1897,12 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, "Incorrect VF token provided for device\n"); return -EACCES; } - } else if (vf_token) { + } else if (uuid) { uuid_copy(&vdev->vf_token->uuid, uuid); } mutex_unlock(&vdev->vf_token->lock); - } else if (vf_token) { + } else if (uuid) { pci_info_ratelimited(vdev->pdev, "VF token incorrectly provided, not a PF or VF\n"); return -EINVAL; @@ -1906,6 +1910,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, return 0; } +EXPORT_SYMBOL_GPL(vfio_pci_core_match_token_uuid); #define VF_TOKEN_ARG "vf_token=" @@ -1952,7 +1957,8 @@ int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf) } } - ret = vfio_pci_validate_vf_token(vdev, vf_token, &uuid); + ret = core_vdev->ops->match_token_uuid(core_vdev, + vf_token ? &uuid : NULL); if (ret) return ret; diff --git a/drivers/vfio/pci/virtio/main.c b/drivers/vfio/pci/virtio/main.c index 515fe1b9f94d..8084f3e36a9f 100644 --- a/drivers/vfio/pci/virtio/main.c +++ b/drivers/vfio/pci/virtio/main.c @@ -94,6 +94,7 @@ static const struct vfio_device_ops virtiovf_vfio_pci_lm_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, @@ -114,6 +115,7 @@ static const struct vfio_device_ops virtiovf_vfio_pci_tran_lm_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, @@ -134,6 +136,7 @@ static const struct vfio_device_ops virtiovf_vfio_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 707b00772ce1..eb563f538dee 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -105,6 +105,9 @@ struct vfio_device { * @match: Optional device name match callback (return: 0 for no-match, >0 for * match, -errno for abort (ex. match with insufficient or incorrect * additional args) + * @match_token_uuid: Optional device token match/validation. Return 0 + * if the uuid is valid for the device, -errno otherwise. uuid is NULL + * if none was provided. * @dma_unmap: Called when userspace unmaps IOVA from the container * this device is attached to. * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl @@ -132,6 +135,7 @@ struct vfio_device_ops { int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma); void (*request)(struct vfio_device *vdev, unsigned int count); int (*match)(struct vfio_device *vdev, char *buf); + int (*match_token_uuid)(struct vfio_device *vdev, const uuid_t *uuid); void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length); int (*device_feature)(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz); diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index fbb472dd99b3..f541044e42a2 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -122,6 +122,8 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma); void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count); int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf); +int vfio_pci_core_match_token_uuid(struct vfio_device *core_vdev, + const uuid_t *uuid); int vfio_pci_core_enable(struct vfio_pci_core_device *vdev); void vfio_pci_core_disable(struct vfio_pci_core_device *vdev); void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 5764f315137f..75100bf009ba 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -905,10 +905,12 @@ struct vfio_device_feature { * VFIO_DEVICE_BIND_IOMMUFD - _IOR(VFIO_TYPE, VFIO_BASE + 18, * struct vfio_device_bind_iommufd) * @argsz: User filled size of this data. - * @flags: Must be 0. + * @flags: Must be 0 or a bit flags of VFIO_DEVICE_BIND_* * @iommufd: iommufd to bind. * @out_devid: The device id generated by this bind. devid is a handle for * this device/iommufd bond and can be used in IOMMUFD commands. + * @token_uuid_ptr: Valid if VFIO_DEVICE_BIND_FLAG_TOKEN. Points to a 16 byte + * UUID in the same format as VFIO_DEVICE_FEATURE_PCI_VF_TOKEN. * * Bind a vfio_device to the specified iommufd. * @@ -917,13 +919,21 @@ struct vfio_device_feature { * * Unbind is automatically conducted when device fd is closed. * + * A token is sometimes required to open the device, unless this is known to be + * needed VFIO_DEVICE_BIND_FLAG_TOKEN should not be set and token_uuid_ptr is + * ignored. The only case today is a PF/VF relationship where the VF bind must + * be provided the same token as VFIO_DEVICE_FEATURE_PCI_VF_TOKEN provided to + * the PF. + * * Return: 0 on success, -errno on failure. */ struct vfio_device_bind_iommufd { __u32 argsz; __u32 flags; +#define VFIO_DEVICE_BIND_FLAG_TOKEN (1 << 0) __s32 iommufd; __u32 out_devid; + __aligned_u64 token_uuid_ptr; }; #define VFIO_DEVICE_BIND_IOMMUFD _IO(VFIO_TYPE, VFIO_BASE + 18) From 27a23faecd5f62c8fea86c5aa67479b559306406 Mon Sep 17 00:00:00 2001 From: Xin Zeng Date: Mon, 14 Jul 2025 20:13:57 -0400 Subject: [PATCH 1586/2411] vfio/qat: Remove myself from VFIO QAT PCI driver maintainers Remove myself from VFIO QAT PCI driver maintainers as I'm leaving Intel. Signed-off-by: Xin Zeng Link: https://lore.kernel.org/r/20250715001357.33725-1-xin.zeng@intel.com Signed-off-by: Alex Williamson --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fad6cb025a19..886365433105 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -26090,7 +26090,6 @@ S: Maintained F: drivers/vfio/platform/ VFIO QAT PCI DRIVER -M: Xin Zeng M: Giovanni Cabiddu L: kvm@vger.kernel.org L: qat-linux@intel.com From 1e9c0f1da562651160456e45629f815673c2dd5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Mielnik?= Date: Tue, 15 Jul 2025 09:11:50 +0100 Subject: [PATCH 1587/2411] vfio/qat: add support for intel QAT 6xxx virtual functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the qat_vfio_pci variant driver to support QAT 6xxx Virtual Functions (VFs). Add the relevant QAT 6xxx VF device IDs to the driver's probe table, enabling proper detection and initialization of these devices. Update the module description to reflect that the driver now supports all QAT generations. Signed-off-by: Małgorzata Mielnik Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Link: https://lore.kernel.org/r/20250715081150.1244466-1-suman.kumar.chakraborty@intel.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/qat/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/pci/qat/main.c b/drivers/vfio/pci/qat/main.c index 5cce6b0b8d2f..a19b68043eb2 100644 --- a/drivers/vfio/pci/qat/main.c +++ b/drivers/vfio/pci/qat/main.c @@ -676,6 +676,8 @@ static const struct pci_device_id qat_vf_vfio_pci_table[] = { { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_INTEL, 0x4941) }, { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_INTEL, 0x4943) }, { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_INTEL, 0x4945) }, + /* Intel QAT GEN6 6xxx VF device */ + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_INTEL, 0x4949) }, {} }; MODULE_DEVICE_TABLE(pci, qat_vf_vfio_pci_table); @@ -697,5 +699,5 @@ module_pci_driver(qat_vf_vfio_pci_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Xin Zeng "); -MODULE_DESCRIPTION("QAT VFIO PCI - VFIO PCI driver with live migration support for Intel(R) QAT GEN4 device family"); +MODULE_DESCRIPTION("QAT VFIO PCI - VFIO PCI driver with live migration support for Intel(R) QAT device family"); MODULE_IMPORT_NS("CRYPTO_QAT"); From b1779e4f209c7ff7e32f3c79d69bca4e3a3a68b6 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 15 Jul 2025 11:46:22 -0700 Subject: [PATCH 1588/2411] vfio/type1: conditional rescheduling while pinning A large DMA mapping request can loop through dma address pinning for many pages. In cases where THP can not be used, the repeated vmf_insert_pfn can be costly, so let the task reschedule as need to prevent CPU stalls. Failure to do so has potential harmful side effects, like increased memory pressure as unrelated rcu tasks are unable to make their reclaim callbacks and result in OOM conditions. rcu: INFO: rcu_sched self-detected stall on CPU rcu: 36-....: (20999 ticks this GP) idle=b01c/1/0x4000000000000000 softirq=35839/35839 fqs=3538 rcu: hardirqs softirqs csw/system rcu: number: 0 107 0 rcu: cputime: 50 0 10446 ==> 10556(ms) rcu: (t=21075 jiffies g=377761 q=204059 ncpus=384) ... ? asm_sysvec_apic_timer_interrupt+0x16/0x20 ? walk_system_ram_range+0x63/0x120 ? walk_system_ram_range+0x46/0x120 ? pgprot_writethrough+0x20/0x20 lookup_memtype+0x67/0xf0 track_pfn_insert+0x20/0x40 vmf_insert_pfn_prot+0x88/0x140 vfio_pci_mmap_huge_fault+0xf9/0x1b0 [vfio_pci_core] __do_fault+0x28/0x1b0 handle_mm_fault+0xef1/0x2560 fixup_user_fault+0xf5/0x270 vaddr_get_pfns+0x169/0x2f0 [vfio_iommu_type1] vfio_pin_pages_remote+0x162/0x8e0 [vfio_iommu_type1] vfio_iommu_type1_ioctl+0x1121/0x1810 [vfio_iommu_type1] ? futex_wake+0x1c1/0x260 x64_sys_call+0x234/0x17a0 do_syscall_64+0x63/0x130 ? exc_page_fault+0x63/0x130 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Signed-off-by: Keith Busch Reviewed-by: Paul E. McKenney Link: https://lore.kernel.org/r/20250715184622.3561598-1-kbusch@meta.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 1136d7ac6b59..f8d68fe77b41 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -647,6 +647,13 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, while (npage) { if (!batch->size) { + /* + * Large mappings may take a while to repeatedly refill + * the batch, so conditionally relinquish the CPU when + * needed to avoid stalls. + */ + cond_resched(); + /* Empty batch, so refill it. */ ret = vaddr_get_pfns(mm, vaddr, npage, dma->prot, &pfn, batch); From 966c529aa177e154722386abc4c46027ce7cf7ce Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 24 Jul 2025 13:37:59 +0200 Subject: [PATCH 1589/2411] dt-bindings: net: Replace bouncing Alexandru Tachici emails Emails to alexandru.tachici@analog.com bounce permanently: Remote Server returned '550 5.1.10 RESOLVER.ADR.RecipientNotFound; Recipient not found by SMTP address lookup' so replace him with Marcelo Schmitt from Analog. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring (Arm) Signed-off-by: Krzysztof Kozlowski Reviewed-by: Marcelo Schmitt Link: https://patch.msgid.link/20250724113758.61874-2-krzysztof.kozlowski@linaro.org Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/adi,adin.yaml | 2 +- Documentation/devicetree/bindings/net/adi,adin1110.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/adi,adin.yaml b/Documentation/devicetree/bindings/net/adi,adin.yaml index 929cf8c0b0fd..c425a9f1886d 100644 --- a/Documentation/devicetree/bindings/net/adi,adin.yaml +++ b/Documentation/devicetree/bindings/net/adi,adin.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Analog Devices ADIN1200/ADIN1300 PHY maintainers: - - Alexandru Tachici + - Marcelo Schmitt description: | Bindings for Analog Devices Industrial Ethernet PHYs diff --git a/Documentation/devicetree/bindings/net/adi,adin1110.yaml b/Documentation/devicetree/bindings/net/adi,adin1110.yaml index 9de865295d7a..0a73e01d7f97 100644 --- a/Documentation/devicetree/bindings/net/adi,adin1110.yaml +++ b/Documentation/devicetree/bindings/net/adi,adin1110.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: ADI ADIN1110 MAC-PHY maintainers: - - Alexandru Tachici + - Marcelo Schmitt description: | The ADIN1110 is a low power single port 10BASE-T1L MAC- From e88fbc30dda1cb7438515303704ceddb3ade4ecd Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 30 Jul 2025 22:23:23 +0200 Subject: [PATCH 1590/2411] net: ftgmac100: fix potential NULL pointer access in ftgmac100_phy_disconnect After the call to phy_disconnect() netdev->phydev is reset to NULL. So fixed_phy_unregister() would be called with a NULL pointer as argument. Therefore cache the phy_device before this call. Fixes: e24a6c874601 ("net: ftgmac100: Get link speed and duplex for NC-SI") Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Reviewed-by: Dawid Osuchowski Link: https://patch.msgid.link/2b80a77a-06db-4dd7-85dc-3a8e0de55a1d@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/faraday/ftgmac100.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 5d0c0906878d..a863f7841210 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1750,16 +1750,17 @@ static int ftgmac100_setup_mdio(struct net_device *netdev) static void ftgmac100_phy_disconnect(struct net_device *netdev) { struct ftgmac100 *priv = netdev_priv(netdev); + struct phy_device *phydev = netdev->phydev; - if (!netdev->phydev) + if (!phydev) return; - phy_disconnect(netdev->phydev); + phy_disconnect(phydev); if (of_phy_is_fixed_link(priv->dev->of_node)) of_phy_deregister_fixed_link(priv->dev->of_node); if (priv->use_ncsi) - fixed_phy_unregister(netdev->phydev); + fixed_phy_unregister(phydev); } static void ftgmac100_destroy_mdio(struct net_device *netdev) From e407fceeaf1b2959892b4fc9b584843d3f2bfc05 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 1 Aug 2025 10:07:54 -0700 Subject: [PATCH 1591/2411] eth: fbnic: remove the debugging trick of super high page bias Alex added page bias of LONG_MAX, which is admittedly quite a clever way of catching overflows of the pp ref count. The page pool code was "optimized" to leave the ref at 1 for freed pages so it can't catch basic bugs by itself any more. (Something we should probably address under DEBUG_NET...) Unfortunately for fbnic since commit f7dc3248dcfb ("skbuff: Optimization of SKB coalescing for page pool") core _may_ actually take two extra pp refcounts, if one of them is returned before driver gives up the bias the ret < 0 check in page_pool_unref_netmem() will trigger. While at it add a FBNIC_ to the name of the driver constant. Fixes: 0cb4c0a13723 ("eth: fbnic: Implement Rx queue alloc/start/stop/free") Link: https://patch.msgid.link/20250801170754.2439577-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 4 ++-- drivers/net/ethernet/meta/fbnic/fbnic_txrx.h | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c index ac11389a764c..f9543d03485f 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c @@ -661,8 +661,8 @@ static void fbnic_page_pool_init(struct fbnic_ring *ring, unsigned int idx, { struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; - page_pool_fragment_page(page, PAGECNT_BIAS_MAX); - rx_buf->pagecnt_bias = PAGECNT_BIAS_MAX; + page_pool_fragment_page(page, FBNIC_PAGECNT_BIAS_MAX); + rx_buf->pagecnt_bias = FBNIC_PAGECNT_BIAS_MAX; rx_buf->page = page; } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h index 2e361d6f03ff..34693596e5eb 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h @@ -91,10 +91,8 @@ struct fbnic_queue_stats { struct u64_stats_sync syncp; }; -/* Pagecnt bias is long max to reserve the last bit to catch overflow - * cases where if we overcharge the bias it will flip over to be negative. - */ -#define PAGECNT_BIAS_MAX LONG_MAX +#define FBNIC_PAGECNT_BIAS_MAX PAGE_SIZE + struct fbnic_rx_buf { struct page *page; long pagecnt_bias; From 2972395d8fad7f4efc8555348f2f988d4941d797 Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Fri, 1 Aug 2025 19:46:35 -0700 Subject: [PATCH 1592/2411] eth: fbnic: Fix tx_dropped reporting Correctly copy the tx_dropped stats from the fbd->hw_stats to the rtnl_link_stats64 struct. Fixes: 5f8bd2ce8269 ("eth: fbnic: add support for TMI stats") Signed-off-by: Mohsin Bashir Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250802024636.679317-2-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_netdev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index 04bb6e7147a2..c0c9808afdd0 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -422,16 +422,16 @@ static void fbnic_get_stats64(struct net_device *dev, tx_packets = stats->packets; tx_dropped = stats->dropped; - stats64->tx_bytes = tx_bytes; - stats64->tx_packets = tx_packets; - stats64->tx_dropped = tx_dropped; - /* Record drops from Tx HW Datapath */ tx_dropped += fbd->hw_stats.tmi.drop.frames.value + fbd->hw_stats.tti.cm_drop.frames.value + fbd->hw_stats.tti.frame_drop.frames.value + fbd->hw_stats.tti.tbi_drop.frames.value; + stats64->tx_bytes = tx_bytes; + stats64->tx_packets = tx_packets; + stats64->tx_dropped = tx_dropped; + for (i = 0; i < fbn->num_tx_queues; i++) { struct fbnic_ring *txr = fbn->tx[i]; From 53abd9c86fd086d8448ceec4e9ffbd65b6c17a37 Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Fri, 1 Aug 2025 19:46:36 -0700 Subject: [PATCH 1593/2411] eth: fbnic: Lock the tx_dropped update Wrap copying of drop stats on TX path from fbd->hw_stats by the hw_stats_lock. Currently, it is being performed outside the lock and another thread accessing fbd->hw_stats can lead to inconsistencies. Fixes: 5f8bd2ce8269 ("eth: fbnic: add support for TMI stats") Signed-off-by: Mohsin Bashir Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250802024636.679317-3-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_netdev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index c0c9808afdd0..e67e99487a27 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -423,10 +423,12 @@ static void fbnic_get_stats64(struct net_device *dev, tx_dropped = stats->dropped; /* Record drops from Tx HW Datapath */ + spin_lock(&fbd->hw_stats_lock); tx_dropped += fbd->hw_stats.tmi.drop.frames.value + fbd->hw_stats.tti.cm_drop.frames.value + fbd->hw_stats.tti.frame_drop.frames.value + fbd->hw_stats.tti.tbi_drop.frames.value; + spin_unlock(&fbd->hw_stats_lock); stats64->tx_bytes = tx_bytes; stats64->tx_packets = tx_packets; From 8d22aea8af0d57a1daff046d65b7c18552e35e29 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 4 Aug 2025 14:43:20 +0300 Subject: [PATCH 1594/2411] selftests: netdevsim: Xfail nexthop test on slow machines A lot of test cases in the file are related to the idle and unbalanced timers of resilient nexthop groups and these tests are reported to be flaky on slow machines running debug kernels. Rather than marking a lot of individual tests with xfail_on_slow(), simply mark all the tests. Note that the test is stable on non-debug machines and that with debug kernels we are mainly interested in the output of various sanitizers in order to determine pass / fail. Before: # make -C tools/testing/selftests KSFT_MACHINE_SLOW=yes \ TARGETS=drivers/net/netdevsim TEST_PROGS=nexthop.sh \ TEST_GEN_PROGS="" run_tests [...] # TEST: Bucket migration after idle timer (with delete) [FAIL] # Group expected to still be unbalanced [...] not ok 1 selftests: drivers/net/netdevsim: nexthop.sh # exit=1 After: # make -C tools/testing/selftests KSFT_MACHINE_SLOW=yes \ TARGETS=drivers/net/netdevsim TEST_PROGS=nexthop.sh \ TEST_GEN_PROGS="" run_tests [...] # TEST: Bucket migration after idle timer (with delete) [XFAIL] # Group expected to still be unbalanced [...] ok 1 selftests: drivers/net/netdevsim: nexthop.sh Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20250729160609.02e0f157@kernel.org/ Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20250804114320.193203-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/netdevsim/nexthop.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh index e8e0dc088d6a..01d0c044a5fc 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh @@ -1053,6 +1053,6 @@ trap cleanup EXIT setup_prepare -tests_run +xfail_on_slow tests_run exit $EXIT_STATUS From e144d53cf21fb9d02626c669533788c6bdc61ce3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Jul 2025 12:43:41 -0700 Subject: [PATCH 1595/2411] NFS/localio: nfs_close_local_fh() fix check for file closed If the struct nfs_file_localio is closed, its list entry will be empty, but the nfs_uuid->files list might still contain other entries. Acked-by: Mike Snitzer Tested-by: Mike Snitzer Reviewed-by: NeilBrown Fixes: 21fb44034695 ("nfs_localio: protect race between nfs_uuid_put() and nfs_close_local_fh()") Signed-off-by: Trond Myklebust --- fs/nfs_common/nfslocalio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index 05c7c16e37ab..64949c46c174 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -314,7 +314,7 @@ void nfs_close_local_fh(struct nfs_file_localio *nfl) rcu_read_unlock(); return; } - if (list_empty(&nfs_uuid->files)) { + if (list_empty(&nfl->list)) { /* nfs_uuid_put() has started closing files, wait for it * to finished */ From fdd015de767977f21892329af5e12276eb80375f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Jul 2025 12:49:00 -0700 Subject: [PATCH 1596/2411] NFS/localio: nfs_uuid_put() fix races with nfs_open/close_local_fh() In order for the wait in nfs_uuid_put() to be safe, it is necessary to ensure that nfs_uuid_add_file() doesn't add a new entry once the nfs_uuid->net has been NULLed out. Also fix up the wake_up_var_locked() / wait_var_event_spinlock() to both use the nfs_uuid address, since nfl, and &nfl->uuid could be used elsewhere. Acked-by: Mike Snitzer Tested-by: Mike Snitzer Link: https://lore.kernel.org/all/175262893035.2234665.1735173020338594784@noble.neil.brown.name/ Fixes: 21fb44034695 ("nfs_localio: protect race between nfs_uuid_put() and nfs_close_local_fh()") Signed-off-by: Trond Myklebust --- fs/nfs_common/nfslocalio.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index 64949c46c174..f1f1592ac134 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -177,7 +177,7 @@ static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid) /* nfs_close_local_fh() is doing the * close and we must wait. until it unlinks */ - wait_var_event_spinlock(nfl, + wait_var_event_spinlock(nfs_uuid, list_first_entry_or_null( &nfs_uuid->files, struct nfs_file_localio, @@ -243,15 +243,20 @@ void nfs_localio_invalidate_clients(struct list_head *nn_local_clients, } EXPORT_SYMBOL_GPL(nfs_localio_invalidate_clients); -static void nfs_uuid_add_file(nfs_uuid_t *nfs_uuid, struct nfs_file_localio *nfl) +static int nfs_uuid_add_file(nfs_uuid_t *nfs_uuid, struct nfs_file_localio *nfl) { + int ret = 0; + /* Add nfl to nfs_uuid->files if it isn't already */ spin_lock(&nfs_uuid->lock); - if (list_empty(&nfl->list)) { + if (rcu_access_pointer(nfs_uuid->net) == NULL) { + ret = -ENXIO; + } else if (list_empty(&nfl->list)) { rcu_assign_pointer(nfl->nfs_uuid, nfs_uuid); list_add_tail(&nfl->list, &nfs_uuid->files); } spin_unlock(&nfs_uuid->lock); + return ret; } /* @@ -285,11 +290,13 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid, } rcu_read_unlock(); /* We have an implied reference to net thanks to nfsd_net_try_get */ - localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt, - cred, nfs_fh, pnf, fmode); + localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt, cred, + nfs_fh, pnf, fmode); + if (!IS_ERR(localio) && nfs_uuid_add_file(uuid, nfl) < 0) { + /* Delete the cached file when racing with nfs_uuid_put() */ + nfs_to_nfsd_file_put_local(pnf); + } nfs_to_nfsd_net_put(net); - if (!IS_ERR(localio)) - nfs_uuid_add_file(uuid, nfl); return localio; } @@ -338,7 +345,7 @@ void nfs_close_local_fh(struct nfs_file_localio *nfl) */ spin_lock(&nfs_uuid->lock); list_del_init(&nfl->list); - wake_up_var_locked(&nfl->nfs_uuid, &nfs_uuid->lock); + wake_up_var_locked(nfs_uuid, &nfs_uuid->lock); spin_unlock(&nfs_uuid->lock); } EXPORT_SYMBOL_GPL(nfs_close_local_fh); From 4ec752ce6debd5a0e7e0febf6bcf780ccda6ab5e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Jul 2025 11:29:51 -0700 Subject: [PATCH 1597/2411] NFS/localio: nfs_uuid_put() fix the wake up after unlinking the file Use store_release_wake_up() instead of wake_up_var_locked(), because the waiter cannot retake the nfs_uuid->lock. Acked-by: Mike Snitzer Tested-by: Mike Snitzer Suggested-by: NeilBrown Link: https://lore.kernel.org/all/175262948827.2234665.1891349021754495573@noble.neil.brown.name/ Fixes: 21fb44034695 ("nfs_localio: protect race between nfs_uuid_put() and nfs_close_local_fh()") Signed-off-by: Trond Myklebust --- fs/nfs_common/nfslocalio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index f1f1592ac134..dd715cdb6c04 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -198,8 +198,7 @@ static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid) /* Now we can allow racing nfs_close_local_fh() to * skip the locking. */ - RCU_INIT_POINTER(nfl->nfs_uuid, NULL); - wake_up_var_locked(&nfl->nfs_uuid, &nfs_uuid->lock); + store_release_wake_up(&nfl->nfs_uuid, RCU_INITIALIZER(NULL)); } /* Remove client from nn->local_clients */ From e6d76268813dc64cc0b74ea9c274501f2de05344 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Mon, 4 Aug 2025 16:44:57 +0000 Subject: [PATCH 1598/2411] net: Update threaded state in napi config in netif_set_threaded Commit 2677010e7793 ("Add support to set NAPI threaded for individual NAPI") added support to enable/disable threaded napi using netlink. This also extended the napi config save/restore functionality to set the napi threaded state. This breaks netdev reset for drivers that use napi threaded at device level and also use napi config save/restore on napi_disable/napi_enable. Basically on netdev with napi threaded enabled at device level, a napi_enable call will get stuck trying to stop the napi kthread. This is because the napi->config->threaded is set to disabled when threaded is enabled at device level. The issue can be reproduced on virtio-net device using qemu. To reproduce the issue run following, echo 1 > /sys/class/net/threaded ethtool -L eth0 combined 1 Update the threaded state in napi config in netif_set_threaded and add a new test that verifies this scenario. Tested on qemu with virtio-net: NETIF=eth0 ./tools/testing/selftests/drivers/net/napi_threaded.py TAP version 13 1..2 ok 1 napi_threaded.change_num_queues ok 2 napi_threaded.enable_dev_threaded_disable_napi_threaded # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:0 error:0 Fixes: 2677010e7793 ("Add support to set NAPI threaded for individual NAPI") Signed-off-by: Samiullah Khawaja Link: https://patch.msgid.link/20250804164457.2494390-1-skhawaja@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 26 ++-- tools/testing/selftests/drivers/net/Makefile | 1 + .../selftests/drivers/net/napi_threaded.py | 111 ++++++++++++++++++ 3 files changed, 121 insertions(+), 17 deletions(-) create mode 100755 tools/testing/selftests/drivers/net/napi_threaded.py diff --git a/net/core/dev.c b/net/core/dev.c index b28ce68830b2..68dc47d7e700 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6978,6 +6978,12 @@ int napi_set_threaded(struct napi_struct *napi, if (napi->config) napi->config->threaded = threaded; + /* Setting/unsetting threaded mode on a napi might not immediately + * take effect, if the current napi instance is actively being + * polled. In this case, the switch between threaded mode and + * softirq mode will happen in the next round of napi_schedule(). + * This should not cause hiccups/stalls to the live traffic. + */ if (!threaded && napi->thread) { napi_stop_kthread(napi); } else { @@ -7011,23 +7017,9 @@ int netif_set_threaded(struct net_device *dev, WRITE_ONCE(dev->threaded, threaded); - /* Make sure kthread is created before THREADED bit - * is set. - */ - smp_mb__before_atomic(); - - /* Setting/unsetting threaded mode on a napi might not immediately - * take effect, if the current napi instance is actively being - * polled. In this case, the switch between threaded mode and - * softirq mode will happen in the next round of napi_schedule(). - * This should not cause hiccups/stalls to the live traffic. - */ - list_for_each_entry(napi, &dev->napi_list, dev_list) { - if (!threaded && napi->thread) - napi_stop_kthread(napi); - else - assign_bit(NAPI_STATE_THREADED, &napi->state, threaded); - } + /* The error should not occur as the kthreads are already created. */ + list_for_each_entry(napi, &dev->napi_list, dev_list) + WARN_ON_ONCE(napi_set_threaded(napi, threaded)); return err; } diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 3556f3563e08..984ece05f7f9 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -11,6 +11,7 @@ TEST_GEN_FILES := \ TEST_PROGS := \ napi_id.py \ + napi_threaded.py \ netcons_basic.sh \ netcons_cmdline.sh \ netcons_fragmented_msg.sh \ diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py new file mode 100755 index 000000000000..b2698db39817 --- /dev/null +++ b/tools/testing/selftests/drivers/net/napi_threaded.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Test napi threaded states. +""" + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, ksft_ne, ksft_ge +from lib.py import NetDrvEnv, NetdevFamily +from lib.py import cmd, defer, ethtool + + +def _assert_napi_threaded_enabled(nl, napi_id) -> None: + napi = nl.napi_get({'id': napi_id}) + ksft_eq(napi['threaded'], 'enabled') + ksft_ne(napi.get('pid'), None) + + +def _assert_napi_threaded_disabled(nl, napi_id) -> None: + napi = nl.napi_get({'id': napi_id}) + ksft_eq(napi['threaded'], 'disabled') + ksft_eq(napi.get('pid'), None) + + +def _set_threaded_state(cfg, threaded) -> None: + cmd(f"echo {threaded} > /sys/class/net/{cfg.ifname}/threaded") + + +def _setup_deferred_cleanup(cfg) -> None: + combined = ethtool(f"-l {cfg.ifname}", json=True)[0].get("combined", 0) + ksft_ge(combined, 2) + defer(ethtool, f"-L {cfg.ifname} combined {combined}") + + threaded = cmd(f"cat /sys/class/net/{cfg.ifname}/threaded").stdout + defer(_set_threaded_state, cfg, threaded) + + +def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: + """ + Test that when napi threaded is enabled at device level and + then disabled at napi level for one napi, the threaded state + of all napis is preserved after a change in number of queues. + """ + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + ksft_ge(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + _setup_deferred_cleanup(cfg) + + # set threaded + _set_threaded_state(cfg, 1) + + # check napi threaded is set for both napis + _assert_napi_threaded_enabled(nl, napi0_id) + _assert_napi_threaded_enabled(nl, napi1_id) + + # disable threaded for napi1 + nl.napi_set({'id': napi1_id, 'threaded': 'disabled'}) + + cmd(f"ethtool -L {cfg.ifname} combined 1") + cmd(f"ethtool -L {cfg.ifname} combined 2") + _assert_napi_threaded_enabled(nl, napi0_id) + _assert_napi_threaded_disabled(nl, napi1_id) + + +def change_num_queues(cfg, nl) -> None: + """ + Test that when napi threaded is enabled at device level, + the napi threaded state is preserved after a change in + number of queues. + """ + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + ksft_ge(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + _setup_deferred_cleanup(cfg) + + # set threaded + _set_threaded_state(cfg, 1) + + # check napi threaded is set for both napis + _assert_napi_threaded_enabled(nl, napi0_id) + _assert_napi_threaded_enabled(nl, napi1_id) + + cmd(f"ethtool -L {cfg.ifname} combined 1") + cmd(f"ethtool -L {cfg.ifname} combined 2") + + # check napi threaded is set for both napis + _assert_napi_threaded_enabled(nl, napi0_id) + _assert_napi_threaded_enabled(nl, napi1_id) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEnv(__file__, queue_count=2) as cfg: + ksft_run([change_num_queues, + enable_dev_threaded_disable_napi_threaded], + args=(cfg, NetdevFamily())) + ksft_exit() + + +if __name__ == "__main__": + main() From d942fe13f72bec92f6c689fbd74c5ec38228c16a Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Sun, 3 Aug 2025 23:32:16 +0530 Subject: [PATCH 1599/2411] net: ti: icssg-prueth: Fix skb handling for XDP_PASS emac_rx_packet() is a common function for handling traffic for both xdp and non-xdp use cases. Use common logic for handling skb with or without xdp to prevent any incorrect packet processing. This patch fixes ping working with XDP_PASS for icssg driver. Fixes: 62aa3246f4623 ("net: ti: icssg-prueth: Add XDP support") Signed-off-by: Meghana Malladi Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250803180216.3569139-1-m-malladi@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_common.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 12f25cec6255..57e5f1c88f50 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -706,9 +706,9 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) struct page_pool *pool; struct sk_buff *skb; struct xdp_buff xdp; + int headroom, ret; u32 *psdata; void *pa; - int ret; *xdp_state = 0; pool = rx_chn->pg_pool; @@ -757,22 +757,23 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false); *xdp_state = emac_run_xdp(emac, &xdp, page, &pkt_len); - if (*xdp_state == ICSSG_XDP_PASS) - skb = xdp_build_skb_from_buff(&xdp); - else + if (*xdp_state != ICSSG_XDP_PASS) goto requeue; + headroom = xdp.data - xdp.data_hard_start; + pkt_len = xdp.data_end - xdp.data; } else { - /* prepare skb and send to n/w stack */ - skb = napi_build_skb(pa, PAGE_SIZE); + headroom = PRUETH_HEADROOM; } + /* prepare skb and send to n/w stack */ + skb = napi_build_skb(pa, PAGE_SIZE); if (!skb) { ndev->stats.rx_dropped++; page_pool_recycle_direct(pool, page); goto requeue; } - skb_reserve(skb, PRUETH_HEADROOM); + skb_reserve(skb, headroom); skb_put(skb, pkt_len); skb->dev = ndev; From 1918f983687aa73bf0e5bc73431898994fce35a8 Mon Sep 17 00:00:00 2001 From: Suchit Karunakaran Date: Sun, 27 Jul 2025 01:13:07 +0530 Subject: [PATCH 1600/2411] kconfig: lxdialog: replace strcpy with snprintf in print_autowrap strcpy() does not perform bounds checking and can lead to buffer overflows if the source string exceeds the destination buffer size. In print_autowrap(), replace strcpy() with snprintf() to safely copy the prompt string into the fixed-size tempstr buffer. Signed-off-by: Suchit Karunakaran Signed-off-by: Masahiro Yamada --- scripts/kconfig/lxdialog/util.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/kconfig/lxdialog/util.c b/scripts/kconfig/lxdialog/util.c index 964139c87fcb..b34000beb294 100644 --- a/scripts/kconfig/lxdialog/util.c +++ b/scripts/kconfig/lxdialog/util.c @@ -345,8 +345,7 @@ void print_autowrap(WINDOW * win, const char *prompt, int width, int y, int x) int prompt_len, room, wlen; char tempstr[MAX_LEN + 1], *word, *sp, *sp2, *newline_separator = 0; - strcpy(tempstr, prompt); - + snprintf(tempstr, sizeof(tempstr), "%s", prompt); prompt_len = strlen(tempstr); if (prompt_len <= width - x * 2) { /* If prompt is short */ From 5ac726653a1029a2eccba93bbe59e01fc9725828 Mon Sep 17 00:00:00 2001 From: Suchit Karunakaran Date: Sun, 27 Jul 2025 22:14:33 +0530 Subject: [PATCH 1601/2411] kconfig: lxdialog: replace strcpy() with strncpy() in inputbox.c strcpy() performs no bounds checking and can lead to buffer overflows if the input string exceeds the destination buffer size. This patch replaces it with strncpy(), and null terminates the input string. Signed-off-by: Suchit Karunakaran Reviewed-by: Nicolas Schier Signed-off-by: Masahiro Yamada --- scripts/kconfig/lxdialog/inputbox.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/lxdialog/inputbox.c b/scripts/kconfig/lxdialog/inputbox.c index 3c6e24b20f5b..5e4a131724f2 100644 --- a/scripts/kconfig/lxdialog/inputbox.c +++ b/scripts/kconfig/lxdialog/inputbox.c @@ -39,8 +39,10 @@ int dialog_inputbox(const char *title, const char *prompt, int height, int width if (!init) instr[0] = '\0'; - else - strcpy(instr, init); + else { + strncpy(instr, init, sizeof(dialog_input_result) - 1); + instr[sizeof(dialog_input_result) - 1] = '\0'; + } do_resize: if (getmaxy(stdscr) <= (height - INPUTBOX_HEIGHT_MIN)) From 936599ca514973d44a766b7376c6bbdc96b6a8cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 28 Jul 2025 15:47:37 +0200 Subject: [PATCH 1602/2411] kbuild: userprogs: use correct linker when mixing clang and GNU ld MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The userprogs infrastructure does not expect clang being used with GNU ld and in that case uses /usr/bin/ld for linking, not the configured $(LD). This fallback is problematic as it will break when cross-compiling. Mixing clang and GNU ld is used for example when building for SPARC64, as ld.lld is not sufficient; see Documentation/kbuild/llvm.rst. Relax the check around --ld-path so it gets used for all linkers. Fixes: dfc1b168a8c4 ("kbuild: userprogs: use correct lld when linking through clang") Cc: stable@vger.kernel.org Signed-off-by: Thomas Weißschuh Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ba0827a1fccd..f4009f7238c7 100644 --- a/Makefile +++ b/Makefile @@ -1134,7 +1134,7 @@ KBUILD_USERCFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) # userspace programs are linked via the compiler, use the correct linker -ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_LD_IS_LLD),yy) +ifdef CONFIG_CC_IS_CLANG KBUILD_USERLDFLAGS += --ld-path=$(LD) endif From 73d210e9faf85c36d5c9d2e38cb42c2d9837ee51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Tue, 29 Jul 2025 15:24:55 +0200 Subject: [PATCH 1603/2411] kheaders: make it possible to override TAR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 86cdd2fdc4e3 ("kheaders: make headers archive reproducible") introduced a number of options specific to GNU tar to the `tar` invocation in `gen_kheaders.sh` script. This causes the script to fail to work on systems where `tar` is not GNU tar. This can occur e.g. on recent Gentoo Linux installations that support using bsdtar from libarchive instead. Add a `TAR` make variable to make it possible to override the tar executable used, e.g. by specifying: make TAR=gtar Link: https://bugs.gentoo.org/884061 Reported-by: Sam James Tested-by: Sam James Co-developed-by: Masahiro Yamada Signed-off-by: Michał Górny Signed-off-by: Sam James Signed-off-by: Masahiro Yamada --- Makefile | 3 ++- kernel/gen_kheaders.sh | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index f4009f7238c7..6bc19b23d28d 100644 --- a/Makefile +++ b/Makefile @@ -543,6 +543,7 @@ LZMA = lzma LZ4 = lz4 XZ = xz ZSTD = zstd +TAR = tar CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void -Wno-unknown-attribute $(CF) @@ -622,7 +623,7 @@ export RUSTC RUSTDOC RUSTFMT RUSTC_OR_CLIPPY_QUIET RUSTC_OR_CLIPPY BINDGEN export HOSTRUSTC KBUILD_HOSTRUSTFLAGS export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX -export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD +export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD TAR export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS KBUILD_PROCMACROLDFLAGS LDFLAGS_MODULE export KBUILD_USERCFLAGS KBUILD_USERLDFLAGS diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index c64e5a00a3d9..896a503dfb29 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -30,8 +30,8 @@ rm -rf "${tmpdir}" mkdir "${tmpdir}" # shellcheck disable=SC2154 # srctree is passed as an env variable -sed "s:^${srctree}/::" "${srclist}" | tar -c -f - -C "${srctree}" -T - | tar -xf - -C "${tmpdir}" -tar -c -f - -T "${objlist}" | tar -xf - -C "${tmpdir}" +sed "s:^${srctree}/::" "${srclist}" | ${TAR} -c -f - -C "${srctree}" -T - | ${TAR} -xf - -C "${tmpdir}" +${TAR} -c -f - -T "${objlist}" | ${TAR} -xf - -C "${tmpdir}" # Remove comments except SDPX lines # Use a temporary file to store directory contents to prevent find/xargs from @@ -43,7 +43,7 @@ xargs -0 -P8 -n1 \ rm -f "${tmpdir}.contents.txt" # Create archive and try to normalize metadata for reproducibility. -tar "${timestamp:+--mtime=$timestamp}" \ +${TAR} "${timestamp:+--mtime=$timestamp}" \ --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \ -I "${XZ}" -cf "${tarfile}" -C "${tmpdir}/" . > /dev/null From 8d6841d5cb20dcee7bf9ba98cb6dbcbf5bccfea5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 4 Aug 2025 23:20:07 +0900 Subject: [PATCH 1604/2411] MAINTAINERS: hand over Kbuild maintenance I'm stepping down as the maintainer of Kbuild/Kconfig. It was enjoyable to refactor and improve the kernel build system, but due to personal reasons, I believe it's difficult for me to continue in this role any further. I discussed this off-list with Nathan and Nicolas, and they have kindly agreed to take over the maintenance of Kbuild with Odd Fixes. I'm grateful to them for stepping in. As for Kconfig, there are currently no designated reviewers, so the maintainer position will remain vacant for now. I hope someone will step up to take on the role. Signed-off-by: Masahiro Yamada Acked-by: Nathan Chancellor Acked-by: Nicolas Schier --- CREDITS | 6 ++++++ MAINTAINERS | 13 +++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/CREDITS b/CREDITS index 45446ae322ec..d134a8a63fa4 100644 --- a/CREDITS +++ b/CREDITS @@ -4369,6 +4369,12 @@ S: 542 West 112th Street, 5N S: New York, New York 10025 S: USA +N: Masahiro Yamada +E: masahiroy@kernel.org +D: Kbuild Maintainer 2017-2025 +D: Kconfig Maintainer 2018-2025 +S: Japan + N: Li Yang E: leoli@freescale.com D: Freescale Highspeed USB device driver diff --git a/MAINTAINERS b/MAINTAINERS index 0c1d245bf7b8..af3c328bf33a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12987,11 +12987,9 @@ F: mm/kasan/ F: scripts/Makefile.kasan KCONFIG -M: Masahiro Yamada L: linux-kbuild@vger.kernel.org -S: Maintained +S: Orphan Q: https://patchwork.kernel.org/project/linux-kbuild/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git kbuild F: Documentation/kbuild/kconfig* F: scripts/Kconfig.include F: scripts/kconfig/ @@ -13056,13 +13054,12 @@ S: Maintained F: fs/autofs/ KERNEL BUILD + files below scripts/ (unless maintained elsewhere) -M: Masahiro Yamada -R: Nathan Chancellor -R: Nicolas Schier +M: Nathan Chancellor +M: Nicolas Schier L: linux-kbuild@vger.kernel.org -S: Maintained +S: Odd Fixes Q: https://patchwork.kernel.org/project/linux-kbuild/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/kbuild/linux.git F: Documentation/kbuild/ F: Makefile F: scripts/*vmlinux* From 8466d393700f9ccef68134d3349f4e0a087679b9 Mon Sep 17 00:00:00 2001 From: Ammar Faizi Date: Wed, 6 Aug 2025 07:31:05 +0700 Subject: [PATCH 1605/2411] net: usbnet: Fix the wrong netif_carrier_on() call The commit referenced in the Fixes tag causes usbnet to malfunction (identified via git bisect). Post-commit, my external RJ45 LAN cable fails to connect. Linus also reported the same issue after pulling that commit. The code has a logic error: netif_carrier_on() is only called when the link is already on. Fix this by moving the netif_carrier_on() call outside the if-statement entirely. This ensures it is always called when EVENT_LINK_CARRIER_ON is set and properly clears it regardless of the link state. Cc: stable@vger.kernel.org Cc: Armando Budianto Reviewed-by: Simon Horman Suggested-by: Linus Torvalds Link: https://lore.kernel.org/all/CAHk-=wjqL4uF0MG_c8+xHX1Vv8==sPYQrtzbdA3kzi96284nuQ@mail.gmail.com Closes: https://lore.kernel.org/netdev/CAHk-=wjKh8X4PT_mU1kD4GQrbjivMfPn-_hXa6han_BTDcXddw@mail.gmail.com Closes: https://lore.kernel.org/netdev/0752dee6-43d6-4e1f-81d2-4248142cccd2@gnuweeb.org Fixes: 0d9cfc9b8cb1 ("net: usbnet: Avoid potential RCU stall on LINK_CHANGE event") Signed-off-by: Ammar Faizi Signed-off-by: Linus Torvalds --- drivers/net/usb/usbnet.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index a38ffbf4b3f0..511c4154cf74 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1120,6 +1120,9 @@ static void __handle_link_change(struct usbnet *dev) if (!test_bit(EVENT_DEV_OPEN, &dev->flags)) return; + if (test_and_clear_bit(EVENT_LINK_CARRIER_ON, &dev->flags)) + netif_carrier_on(dev->net); + if (!netif_carrier_ok(dev->net)) { /* kill URBs for reading packets to save bus bandwidth */ unlink_urbs(dev, &dev->rxq); @@ -1129,9 +1132,6 @@ static void __handle_link_change(struct usbnet *dev) * tx queue is stopped by netcore after link becomes off */ } else { - if (test_and_clear_bit(EVENT_LINK_CARRIER_ON, &dev->flags)) - netif_carrier_on(dev->net); - /* submitting URBs for reading packets */ queue_work(system_bh_wq, &dev->bh_work); } From 034d319c8899e8c5c0a35c6692c7fc7e8c12c374 Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Tue, 29 Jul 2025 04:27:11 +0530 Subject: [PATCH 1606/2411] scsi: ufs: core: Fix interrupt handling for MCQ Mode Commit 3c7ac40d7322 ("scsi: ufs: core: Delegate the interrupt service routine to a threaded IRQ handler") introduced a regression where the UFS interrupt status register (IS) was not cleared in ufshcd_intr() when operating in MCQ mode. As a result, the IS register remained uncleared. This led to a persistent issue during UIC interrupts: ufshcd_is_auto_hibern8_error() consistently returned true because the UFSHCD_UIC_HIBERN8_MASK bit was set, while the active command was neither UIC_CMD_DME_HIBER_ENTER nor UIC_CMD_DME_HIBER_EXIT. This caused continuous auto hibern8 enter errors and device failed to boot. To fix this, ensure that the interrupt status register is properly cleared in the ufshcd_intr() function for both MCQ mode with ESI enabled. [ 4.553226] ufshcd-qcom 1d84000.ufs: ufshcd_check_errors: Auto Hibern8 Enter failed - status: 0x00000040, upmcrs: 0x00000001 [ 4.553229] ufshcd-qcom 1d84000.ufs: ufshcd_check_errors: saved_err 0x40 saved_uic_err 0x0 [ 4.553311] host_regs: 00000000: d5c7033f 20e0071f 00000400 00000000 [ 4.553312] host_regs: 00000010: 01000000 00010217 00000c96 00000000 [ 4.553314] host_regs: 00000020: 00000440 00170ef5 00000000 00000000 [ 4.553316] host_regs: 00000030: 0000010f 00000001 00000000 00000000 [ 4.553317] host_regs: 00000040: 00000000 00000000 00000000 00000000 [ 4.553319] host_regs: 00000050: fffdf000 0000000f 00000000 00000000 [ 4.553320] host_regs: 00000060: 00000001 80000000 00000000 00000000 [ 4.553322] host_regs: 00000070: fffde000 0000000f 00000000 00000000 [ 4.553323] host_regs: 00000080: 00000001 00000000 00000000 00000000 [ 4.553325] host_regs: 00000090: 00000002 d0020000 00000000 01930200 Fixes: 3c7ac40d7322 ("scsi: ufs: core: Delegate the interrupt service routine to a threaded IRQ handler") Co-developed-by: Palash Kambar Signed-off-by: Palash Kambar Signed-off-by: Nitin Rawat Link: https://lore.kernel.org/r/20250728225711.29273-1-quic_nitirawa@quicinc.com Tested-by: Neil Armstrong # on SM8650-QRD Reviewed-by: Bart Van Assche Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 5442bb8540b5..baf28bd748cc 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -7133,14 +7133,19 @@ static irqreturn_t ufshcd_threaded_intr(int irq, void *__hba) static irqreturn_t ufshcd_intr(int irq, void *__hba) { struct ufs_hba *hba = __hba; + u32 intr_status, enabled_intr_status; /* Move interrupt handling to thread when MCQ & ESI are not enabled */ if (!hba->mcq_enabled || !hba->mcq_esi_enabled) return IRQ_WAKE_THREAD; + intr_status = ufshcd_readl(hba, REG_INTERRUPT_STATUS); + enabled_intr_status = intr_status & ufshcd_readl(hba, REG_INTERRUPT_ENABLE); + + ufshcd_writel(hba, intr_status, REG_INTERRUPT_STATUS); + /* Directly handle interrupts since MCQ ESI handlers does the hard job */ - return ufshcd_sl_intr(hba, ufshcd_readl(hba, REG_INTERRUPT_STATUS) & - ufshcd_readl(hba, REG_INTERRUPT_ENABLE)); + return ufshcd_sl_intr(hba, enabled_intr_status); } static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag) From a59976116a01dad1c72460f9ed700bf4b3fdbebd Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 31 Jul 2025 13:33:11 +0200 Subject: [PATCH 1607/2411] scsi: lpfc: Fix wrong function reference in a comment Function scsi_host_remove() doesn't exist, the actual function name is scsi_remove_host(). Signed-off-by: Jean Delvare Link: https://lore.kernel.org/r/20250731133311.52034cc4@endymion Reviewed-by: Justin Tee Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_vport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c index 2797aa75a689..aff6c9d5e7c2 100644 --- a/drivers/scsi/lpfc/lpfc_vport.c +++ b/drivers/scsi/lpfc/lpfc_vport.c @@ -666,7 +666,7 @@ lpfc_vport_delete(struct fc_vport *fc_vport) * Take early refcount for outstanding I/O requests we schedule during * delete processing for unreg_vpi. Always keep this before * scsi_remove_host() as we can no longer obtain a reference through - * scsi_host_get() after scsi_host_remove as shost is set to SHOST_DEL. + * scsi_host_get() after scsi_remove_host as shost is set to SHOST_DEL. */ if (!scsi_host_get(shost)) return VPORT_INVAL; From 7881cd6886a89eda848192d3f5759ce08672e084 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 5 Aug 2025 08:58:20 -0400 Subject: [PATCH 1608/2411] media: venus: Fix OPP table error handling The venus driver fails to check if dev_pm_opp_find_freq_{ceil,floor}() returns an error pointer before calling dev_pm_opp_put(). This causes a crash when OPP tables are not present in device tree. Unable to handle kernel access to user memory outside uaccess routines at virtual address 000000000000002e ... pc : dev_pm_opp_put+0x1c/0x4c lr : core_clks_enable+0x4c/0x16c [venus_core] Add IS_ERR() checks before calling dev_pm_opp_put() to avoid dereferencing error pointers. Fixes: b179234b5e59 ("media: venus: pm_helpers: use opp-table for the frequency") Signed-off-by: Sasha Levin Signed-off-by: Linus Torvalds --- drivers/media/platform/qcom/venus/pm_helpers.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/media/platform/qcom/venus/pm_helpers.c b/drivers/media/platform/qcom/venus/pm_helpers.c index 8dd5a9b0d060..e32f8862a9f9 100644 --- a/drivers/media/platform/qcom/venus/pm_helpers.c +++ b/drivers/media/platform/qcom/venus/pm_helpers.c @@ -48,7 +48,8 @@ static int core_clks_enable(struct venus_core *core) int ret; opp = dev_pm_opp_find_freq_ceil(dev, &freq); - dev_pm_opp_put(opp); + if (!IS_ERR(opp)) + dev_pm_opp_put(opp); for (i = 0; i < res->clks_num; i++) { if (IS_V6(core)) { @@ -660,7 +661,8 @@ static int decide_core(struct venus_inst *inst) /*TODO : divide this inst->load by work_route */ opp = dev_pm_opp_find_freq_floor(dev, &max_freq); - dev_pm_opp_put(opp); + if (!IS_ERR(opp)) + dev_pm_opp_put(opp); min_loaded_core(inst, &min_coreid, &min_load, false); min_loaded_core(inst, &min_lp_coreid, &min_lp_load, true); @@ -1121,7 +1123,8 @@ static int load_scale_v4(struct venus_inst *inst) freq = max(freq_core1, freq_core2); opp = dev_pm_opp_find_freq_floor(dev, &max_freq); - dev_pm_opp_put(opp); + if (!IS_ERR(opp)) + dev_pm_opp_put(opp); if (freq > max_freq) { dev_dbg(dev, VDBGL "requested clock rate: %lu scaling clock rate : %lu\n", @@ -1131,7 +1134,8 @@ static int load_scale_v4(struct venus_inst *inst) } opp = dev_pm_opp_find_freq_ceil(dev, &freq); - dev_pm_opp_put(opp); + if (!IS_ERR(opp)) + dev_pm_opp_put(opp); set_freq: From eea6cafb5890db488fce1c69d05464214616d800 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Fri, 1 Aug 2025 18:52:02 +0000 Subject: [PATCH 1609/2411] scsi: lpfc: Remove redundant assignment to avoid memory leak Remove the redundant assignment if kzalloc() succeeds to avoid memory leak. Fixes: bd2cdd5e400f ("scsi: lpfc: NVME Initiator: Add debugfs support") Signed-off-by: Jiasheng Jiang Link: https://lore.kernel.org/r/20250801185202.42631-1-jiashengjiangcool@gmail.com Reviewed-by: Justin Tee Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_debugfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 2c7d876c64c7..47cffdf2a8ac 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -6289,7 +6289,6 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport) } phba->nvmeio_trc_on = 1; phba->nvmeio_trc_output_idx = 0; - phba->nvmeio_trc = NULL; } else { nvmeio_off: phba->nvmeio_trc_size = 0; From 7ec2bd6cd2d0ce6d6224519f895cb932ed5af667 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 4 Aug 2025 14:01:54 +0800 Subject: [PATCH 1610/2411] scsi: ufs: mediatek: Fix out-of-bounds access in MCQ IRQ mapping Address a potential out-of-bounds access issue when accessing 'host->mcq_intr_info[q_index]'. The value of 'q_index' might exceed the valid array bounds if 'q_index == nr'. Correct condition to 'q_index >= nr' to prevent accessing invalid memory. Fixes: 66e26a4b8a77 ("scsi: ufs: host: mediatek: Set IRQ affinity policy for MCQ mode") Cc: stable@vger.kernel.org Reported-by: Dan Carpenter Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20250804060249.1387057-1-peter.wang@mediatek.com Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 86ae73b89d4d..f902ce08c95a 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -818,7 +818,7 @@ static u32 ufs_mtk_mcq_get_irq(struct ufs_hba *hba, unsigned int cpu) unsigned int q_index; q_index = map->mq_map[cpu]; - if (q_index > nr) { + if (q_index >= nr) { dev_err(hba->dev, "hwq index %d exceed %d\n", q_index, nr); return MTK_MCQ_INVALID_IRQ; From 72fc388d8bc0b49fd038477b74618cc15ce18b56 Mon Sep 17 00:00:00 2001 From: Waqar Hameed Date: Tue, 5 Aug 2025 11:33:36 +0200 Subject: [PATCH 1611/2411] scsi: ufs: core: Remove error print for devm_add_action_or_reset() When devm_add_action_or_reset() fails, it is due to a failed memory allocation and will thus return -ENOMEM. dev_err_probe() doesn't do anything when error is -ENOMEM. Therefore, remove the useless call to dev_err_probe() when devm_add_action_or_reset() fails, and just return the value instead. Signed-off-by: Waqar Hameed Link: https://lore.kernel.org/r/pndtt2mkt8v.a.out@axis.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index baf28bd748cc..2e1fa8cf83f5 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -10515,8 +10515,7 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle) err = devm_add_action_or_reset(dev, ufshcd_devres_release, host); if (err) - return dev_err_probe(dev, err, - "failed to add ufshcd dealloc action\n"); + return err; host->nr_maps = HCTX_TYPE_POLL + 1; hba = shost_priv(host); From 8cbe564974248ee980562be02f2b1912769562c7 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 6 Aug 2025 01:41:53 +0200 Subject: [PATCH 1612/2411] ALSA: intel_hdmi: Fix off-by-one error in __hdmi_lpe_audio_probe() In __hdmi_lpe_audio_probe(), strscpy() is incorrectly called with the length of the source string (excluding the NUL terminator) rather than the size of the destination buffer. This results in one character less being copied from 'card->shortname' to 'pcm->name'. Use the destination buffer size instead to ensure the card name is copied correctly. Cc: stable@vger.kernel.org Fixes: 75b1a8f9d62e ("ALSA: Convert strlcpy to strscpy when return value is unused") Signed-off-by: Thorsten Blum Link: https://patch.msgid.link/20250805234156.60294-1-thorsten.blum@linux.dev Signed-off-by: Takashi Iwai --- sound/x86/intel_hdmi_audio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index cc54539c6030..01f49555c5f6 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1765,7 +1765,7 @@ static int __hdmi_lpe_audio_probe(struct platform_device *pdev) /* setup private data which can be retrieved when required */ pcm->private_data = ctx; pcm->info_flags = 0; - strscpy(pcm->name, card->shortname, strlen(card->shortname)); + strscpy(pcm->name, card->shortname, sizeof(pcm->name)); /* setup the ops for playback */ snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &had_pcm_ops); From cac5f2af13459f6258c4857d2e61ea53d0dfd751 Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Tue, 5 Aug 2025 15:09:45 +0800 Subject: [PATCH 1613/2411] ALSA: hda/tas2781: Support L"SmartAmpCalibrationData" to save calibrated data Some devices save the calibrated data into L"CALI_DATA", and others into L"SmartAmpCalibrationData". Driver code will support both. Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20250805070945.524-1-shenghao-ding@ti.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/side-codecs/tas2781_hda.c | 47 +++++++++++++++------- sound/hda/codecs/side-codecs/tas2781_hda.h | 2 +- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/sound/hda/codecs/side-codecs/tas2781_hda.c b/sound/hda/codecs/side-codecs/tas2781_hda.c index 34217ce9f28e..f46d2e06c64f 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda.c @@ -18,6 +18,8 @@ #include "tas2781_hda.h" +#define CALIBRATION_DATA_AREA_NUM 2 + const efi_guid_t tasdev_fct_efi_guid[] = { /* DELL */ EFI_GUID(0xcc92382d, 0x6337, 0x41cb, 0xa8, 0x8b, 0x8e, 0xce, 0x74, @@ -160,36 +162,51 @@ int tas2781_save_calibration(struct tas2781_hda *hda) * manufactory. */ efi_guid_t efi_guid = tasdev_fct_efi_guid[LENOVO]; - static efi_char16_t efi_name[] = TASDEVICE_CALIBRATION_DATA_NAME; + /* + * Some devices save the calibrated data into L"CALI_DATA", + * and others into L"SmartAmpCalibrationData". + */ + static efi_char16_t *efi_name[CALIBRATION_DATA_AREA_NUM] = { + L"CALI_DATA", + L"SmartAmpCalibrationData", + }; struct tasdevice_priv *p = hda->priv; struct calidata *cali_data = &p->cali_data; unsigned long total_sz = 0; unsigned int attr, size; unsigned char *data; efi_status_t status; + int i; if (hda->catlog_id < LENOVO) efi_guid = tasdev_fct_efi_guid[hda->catlog_id]; cali_data->cali_dat_sz_per_dev = 20; size = p->ndev * (cali_data->cali_dat_sz_per_dev + 1); - /* Get real size of UEFI variable */ - status = efi.get_variable(efi_name, &efi_guid, &attr, &total_sz, NULL); - cali_data->total_sz = total_sz > size ? total_sz : size; - if (status == EFI_BUFFER_TOO_SMALL) { - /* Allocate data buffer of data_size bytes */ - data = p->cali_data.data = devm_kzalloc(p->dev, - p->cali_data.total_sz, GFP_KERNEL); - if (!data) { - p->cali_data.total_sz = 0; - return -ENOMEM; + for (i = 0; i < CALIBRATION_DATA_AREA_NUM; i++) { + /* Get real size of UEFI variable */ + status = efi.get_variable(efi_name[i], &efi_guid, &attr, + &total_sz, NULL); + cali_data->total_sz = total_sz > size ? total_sz : size; + if (status == EFI_BUFFER_TOO_SMALL) { + /* Allocate data buffer of data_size bytes */ + data = cali_data->data = devm_kzalloc(p->dev, + cali_data->total_sz, GFP_KERNEL); + if (!data) { + status = -ENOMEM; + continue; + } + /* Get variable contents into buffer */ + status = efi.get_variable(efi_name[i], &efi_guid, + &attr, &cali_data->total_sz, data); } - /* Get variable contents into buffer */ - status = efi.get_variable(efi_name, &efi_guid, &attr, - &p->cali_data.total_sz, data); + /* Check whether get the calibrated data */ + if (status == EFI_SUCCESS) + break; } + if (status != EFI_SUCCESS) { - p->cali_data.total_sz = 0; + cali_data->total_sz = 0; return status; } diff --git a/sound/hda/codecs/side-codecs/tas2781_hda.h b/sound/hda/codecs/side-codecs/tas2781_hda.h index 575a701c8dfb..66188909a0bb 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda.h +++ b/sound/hda/codecs/side-codecs/tas2781_hda.h @@ -11,7 +11,7 @@ /* Flag of calibration registers address. */ #define TASDEV_UEFI_CALI_REG_ADDR_FLG BIT(7) -#define TASDEVICE_CALIBRATION_DATA_NAME L"CALI_DATA" + #define TASDEV_CALIB_N 5 /* From 42e42562c9cfcdacf000f1b42284a4fad24f8546 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 4 Aug 2025 11:05:43 +0200 Subject: [PATCH 1614/2411] xfrm: flush all states in xfrm_state_fini While reverting commit f75a2804da39 ("xfrm: destroy xfrm_state synchronously on net exit path"), I incorrectly changed xfrm_state_flush's "proto" argument back to IPSEC_PROTO_ANY. This reverts some of the changes in commit dbb2483b2a46 ("xfrm: clean up xfrm protocol checks"), and leads to some states not being removed when we exit the netns. Pass 0 instead of IPSEC_PROTO_ANY from both xfrm_state_fini xfrm6_tunnel_net_exit, so that xfrm_state_flush deletes all states. Fixes: 2a198bbec691 ("Revert "xfrm: destroy xfrm_state synchronously on net exit path"") Reported-by: syzbot+6641a61fe0e2e89ae8c5@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=6641a61fe0e2e89ae8c5 Tested-by: syzbot+6641a61fe0e2e89ae8c5@syzkaller.appspotmail.com Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_tunnel.c | 2 +- net/xfrm/xfrm_state.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 5120a763da0d..0a0eeaed0591 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -334,7 +334,7 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net) struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); unsigned int i; - xfrm_state_flush(net, IPSEC_PROTO_ANY, false); + xfrm_state_flush(net, 0, false); xfrm_flush_gc(); for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 77db3b5fe4ac..78fcbb89cf32 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -3297,7 +3297,7 @@ void xfrm_state_fini(struct net *net) unsigned int sz; flush_work(&net->xfrm.state_hash_work); - xfrm_state_flush(net, IPSEC_PROTO_ANY, false); + xfrm_state_flush(net, 0, false); flush_work(&xfrm_state_gc_work); WARN_ON(!list_empty(&net->xfrm.state_all)); From 5b65258229117995eb6c4bd74995e15fb5f2cfe3 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 5 Aug 2025 11:32:20 -0700 Subject: [PATCH 1615/2411] genirq/test: Resolve irq lock inversion warnings irq_shutdown_and_deactivate() is normally called with the descriptor lock held, and interrupts disabled. Nested a few levels down, it grabs the global irq_resend_lock. Lockdep rightfully complains when interrupts are not disabled: CPU0 CPU1 ---- ---- lock(irq_resend_lock); local_irq_disable(); lock(&irq_desc_lock_class); lock(irq_resend_lock); lock(&irq_desc_lock_class); ... _raw_spin_lock+0x2b/0x40 clear_irq_resend+0x14/0x70 irq_shutdown_and_deactivate+0x29/0x80 irq_shutdown_depth_test+0x1ce/0x600 kunit_try_run_case+0x90/0x120 Grab the descriptor lock and disable interrupts, to resolve the problem. Fixes: 66067c3c8a1e ("genirq: Add kunit tests for depth counts") Reported-by: Guenter Roeck Signed-off-by: Brian Norris Signed-off-by: Thomas Gleixner Tested-by: Guenter Roeck Link: https://lore.kernel.org/all/aJJONEIoIiTSDMqc@google.com Closes: https://lore.kernel.org/lkml/31a761e4-8f81-40cf-aaf5-d220ba11911c@roeck-us.net/ --- kernel/irq/irq_test.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/irq/irq_test.c b/kernel/irq/irq_test.c index 5161b56a12f9..a75abebed7f2 100644 --- a/kernel/irq/irq_test.c +++ b/kernel/irq/irq_test.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: LGPL-2.1+ +#include #include #include #include @@ -134,7 +135,8 @@ static void irq_shutdown_depth_test(struct kunit *test) disable_irq(virq); KUNIT_EXPECT_EQ(test, desc->depth, 1); - irq_shutdown_and_deactivate(desc); + scoped_guard(raw_spinlock_irqsave, &desc->lock) + irq_shutdown_and_deactivate(desc); KUNIT_EXPECT_FALSE(test, irqd_is_activated(data)); KUNIT_EXPECT_FALSE(test, irqd_is_started(data)); From 3b6a18f0da8720d612d8a682ea5c55870da068e0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Aug 2025 18:09:49 +0200 Subject: [PATCH 1616/2411] irqchip: Build IMX_MU_MSI only on ARM Compile-testing IMX_MU_MSI on x86 without PCI_MSI support results in a build failure: drivers/gpio/gpio-sprd.c:8: include/linux/gpio/driver.h:41:33: error: field 'msiinfo' has incomplete type drivers/iommu/iommufd/viommu.c:4: include/linux/msi.h:528:33: error: field 'alloc_info' has incomplete type Tighten the dependency further to only allow compile testing on Arm. This could be refined further to allow certain x86 configs. This was submitted before to address a different build failure, which was fixed differently, but the problem has now returned in a different form. Fixes: 70afdab904d2d1e6 ("irqchip: Add IMX MU MSI controller driver") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250805160952.4006075-1-arnd@kernel.org Link: https://lore.kernel.org/all/20221215164109.761427-1-arnd@kernel.org/ --- drivers/irqchip/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 39a6ae1d574b..6d12c6ab9ea4 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -554,6 +554,7 @@ config IMX_MU_MSI tristate "i.MX MU used as MSI controller" depends on OF && HAS_IOMEM depends on ARCH_MXC || COMPILE_TEST + depends on ARM || ARM64 default m if ARCH_MXC select IRQ_DOMAIN select IRQ_DOMAIN_HIERARCHY From 0a32e4f0025a74c70dcab4478e9b29c22f5ecf2f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 30 Jul 2025 19:18:37 +0100 Subject: [PATCH 1617/2411] btrfs: fix log tree replay failure due to file with 0 links and extents If we log a new inode (not persisted in a past transaction) that has 0 links and extents, then log another inode with an higher inode number, we end up with failing to replay the log tree with -EINVAL. The steps for this are: 1) create new file A 2) write some data to file A 3) open an fd on file A 4) unlink file A 5) fsync file A using the previously open fd 6) create file B (has higher inode number than file A) 7) fsync file B 8) power fail before current transaction commits Now when attempting to mount the fs, the log replay will fail with -ENOENT at replay_one_extent() when attempting to replay the first extent of file A. The failure comes when trying to open the inode for file A in the subvolume tree, since it doesn't exist. Before commit 5f61b961599a ("btrfs: fix inode lookup error handling during log replay"), the returned error was -EIO instead of -ENOENT, since we converted any errors when attempting to read an inode during log replay to -EIO. The reason for this is that the log replay procedure fails to ignore the current inode when we are at the stage LOG_WALK_REPLAY_ALL, our current inode has 0 links and last inode we processed in the previous stage has a non 0 link count. In other words, the issue is that at replay_one_extent() we only update wc->ignore_cur_inode if the current replay stage is LOG_WALK_REPLAY_INODES. Fix this by updating wc->ignore_cur_inode whenever we find an inode item regardless of the current replay stage. This is a simple solution and easy to backport, but later we can do other alternatives like avoid logging extents or inode items other than the inode item for inodes with a link count of 0. The problem with the wc->ignore_cur_inode logic has been around since commit f2d72f42d5fa ("Btrfs: fix warning when replaying log after fsync of a tmpfile") but it only became frequent to hit since the more recent commit 5e85262e542d ("btrfs: fix fsync of files with no hard links not persisting deletion"), because we stopped skipping inodes with a link count of 0 when logging, while before the problem would only be triggered if trying to replay a log tree created with an older kernel which has a logged inode with 0 links. A test case for fstests will be submitted soon. Reported-by: Peter Jung Link: https://lore.kernel.org/linux-btrfs/fce139db-4458-4788-bb97-c29acf6cb1df@cachyos.org/ Reported-by: burneddi Link: https://lore.kernel.org/linux-btrfs/lh4W-Lwc0Mbk-QvBhhQyZxf6VbM3E8VtIvU3fPIQgweP_Q1n7wtlUZQc33sYlCKYd-o6rryJQfhHaNAOWWRKxpAXhM8NZPojzsJPyHMf2qY=@protonmail.com/#t Reported-by: Russell Haley Link: https://lore.kernel.org/linux-btrfs/598ecc75-eb80-41b3-83c2-f2317fbb9864@gmail.com/ Fixes: f2d72f42d5fa ("Btrfs: fix warning when replaying log after fsync of a tmpfile") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 48 ++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9f05d454b9df..2186e87fb61b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -321,8 +321,7 @@ struct walk_control { /* * Ignore any items from the inode currently being processed. Needs - * to be set every time we find a BTRFS_INODE_ITEM_KEY and we are in - * the LOG_WALK_REPLAY_INODES stage. + * to be set every time we find a BTRFS_INODE_ITEM_KEY. */ bool ignore_cur_inode; @@ -2465,23 +2464,30 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, nritems = btrfs_header_nritems(eb); for (i = 0; i < nritems; i++) { + struct btrfs_inode_item *inode_item; + btrfs_item_key_to_cpu(eb, &key, i); - /* inode keys are done during the first stage */ - if (key.type == BTRFS_INODE_ITEM_KEY && - wc->stage == LOG_WALK_REPLAY_INODES) { - struct btrfs_inode_item *inode_item; - u32 mode; - - inode_item = btrfs_item_ptr(eb, i, - struct btrfs_inode_item); + if (key.type == BTRFS_INODE_ITEM_KEY) { + inode_item = btrfs_item_ptr(eb, i, struct btrfs_inode_item); /* - * If we have a tmpfile (O_TMPFILE) that got fsync'ed - * and never got linked before the fsync, skip it, as - * replaying it is pointless since it would be deleted - * later. We skip logging tmpfiles, but it's always - * possible we are replaying a log created with a kernel - * that used to log tmpfiles. + * An inode with no links is either: + * + * 1) A tmpfile (O_TMPFILE) that got fsync'ed and never + * got linked before the fsync, skip it, as replaying + * it is pointless since it would be deleted later. + * We skip logging tmpfiles, but it's always possible + * we are replaying a log created with a kernel that + * used to log tmpfiles; + * + * 2) A non-tmpfile which got its last link deleted + * while holding an open fd on it and later got + * fsynced through that fd. We always log the + * parent inodes when inode->last_unlink_trans is + * set to the current transaction, so ignore all the + * inode items for this inode. We will delete the + * inode when processing the parent directory with + * replay_dir_deletes(). */ if (btrfs_inode_nlink(eb, inode_item) == 0) { wc->ignore_cur_inode = true; @@ -2489,8 +2495,14 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, } else { wc->ignore_cur_inode = false; } - ret = replay_xattr_deletes(wc->trans, root, log, - path, key.objectid); + } + + /* Inode keys are done during the first stage. */ + if (key.type == BTRFS_INODE_ITEM_KEY && + wc->stage == LOG_WALK_REPLAY_INODES) { + u32 mode; + + ret = replay_xattr_deletes(wc->trans, root, log, path, key.objectid); if (ret) break; mode = btrfs_inode_mode(eb, inode_item); From 614d416dd8aee2675fb591c598308a901a660db8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Aug 2025 18:04:25 +0200 Subject: [PATCH 1618/2411] ASoC: SOF: Intel: hda-sdw-bpt: fix SND_SOF_SOF_HDA_SDW_BPT dependencies The hda-sdw-bpt code links against the soundwire driver, but that fails when trying to link from built-in code into loadable module: x86_64-linux-ld: vmlinux.o: in function `intel_ace2x_bpt_close_stream.isra.0': intel_ace2x.c:(.text+0x137a531): undefined reference to `hda_sdw_bpt_close' x86_64-linux-ld: vmlinux.o: in function `intel_ace2x_bpt_send_async': intel_ace2x.c:(.text+0x137aa45): undefined reference to `hda_sdw_bpt_open' x86_64-linux-ld: intel_ace2x.c:(.text+0x137ab67): undefined reference to `hda_sdw_bpt_close' x86_64-linux-ld: intel_ace2x.c:(.text+0x137ac30): undefined reference to `hda_sdw_bpt_send_async' x86_64-linux-ld: vmlinux.o: in function `intel_ace2x_bpt_wait': intel_ace2x.c:(.text+0x137aced): undefined reference to `hda_sdw_bpt_wait' Ensure that both SOUNDWIRE_INTEL and SND_SOF_SOF_HDA_SDW_BPT are selected at the same time by SND_SOC_SOF_INTEL_LNL, and that this happens even if SND_SOC_SOF_INTEL_SOUNDWIRE is a loadable module but SND_SOC_SOF_INTEL_LNL is built-in. This follows the same logic as commit c5a61db9bf89 ("ASoC: SOF: fix intel-soundwire link failure"). Fixes: 5d5cb86fb46e ("ASoC: SOF: Intel: hda-sdw-bpt: add helpers for SoundWire BPT DMA") Signed-off-by: Arnd Bergmann Reviewed-by: Bard Liao Link: https://patch.msgid.link/20250805160451.4004602-1-arnd@kernel.org Signed-off-by: Mark Brown --- sound/soc/sof/intel/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig index dc1d21de4ab7..4f27f8c8debf 100644 --- a/sound/soc/sof/intel/Kconfig +++ b/sound/soc/sof/intel/Kconfig @@ -266,9 +266,10 @@ config SND_SOC_SOF_METEORLAKE config SND_SOC_SOF_INTEL_LNL tristate + select SOUNDWIRE_INTEL if SND_SOC_SOF_INTEL_SOUNDWIRE != n select SND_SOC_SOF_HDA_GENERIC select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE - select SND_SOF_SOF_HDA_SDW_BPT if SND_SOC_SOF_INTEL_SOUNDWIRE + select SND_SOF_SOF_HDA_SDW_BPT if SND_SOC_SOF_INTEL_SOUNDWIRE != n select SND_SOC_SOF_IPC4 select SND_SOC_SOF_INTEL_MTL From f8f6e72fe28595969829d63db93ecaa56a0c2811 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 5 Aug 2025 20:57:50 +0300 Subject: [PATCH 1619/2411] drm/omap: Pass along the format info from .fb_create() to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plumb the format info from .fb_create() all the way to drm_helper_mode_fill_fb_struct() to avoid the redundant lookup. For the fbdev case a manual drm_get_format_info() lookup is needed. The patch is based on the driver parts of the patchset at Link: below, which missed converting the omap driver. Due to the absence of this change in the patchset at Link:, after the Fixed: commit below, omap_framebuffer_init() -> drm_helper_mode_fill_fb_struct() set drm_framebuffer::format incorrectly to NULL, which lead to the !fb->format WARN() in drm_framebuffer_init() and causing framebuffer creation to fail. This patch fixes both of these issues. v2: Amend the commit log mentioning the functional issues the patch fixes. (Tomi) Cc: Ville Syrjälä Cc: Tomi Valkeinen Cc: Thomas Zimmermann Cc: Maarten Lankhorst Cc: Maxime Ripard Fixes: 41ab92d35ccd ("drm: Make passing of format info to drm_helper_mode_fill_fb_struct() mandatory") Reported-by: Mark Brown Closes: https://lore.kernel.org/all/98b3a62c-91ff-4f91-a58b-e1265f84180b@sirena.org.uk Link: https://lore.kernel.org/all/20250701090722.13645-1-ville.syrjala@linux.intel.com Tested-by: Mark Brown Tested-by: Linux Kernel Functional Testing Acked-by: Alex Deucher Reviewed-by: Tomi Valkeinen Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250805175752.690504-2-imre.deak@intel.com --- drivers/gpu/drm/omapdrm/omap_fb.c | 23 ++++++++++------------- drivers/gpu/drm/omapdrm/omap_fb.h | 2 ++ drivers/gpu/drm/omapdrm/omap_fbdev.c | 5 ++++- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c index 30c81e2e5d6b..bb3105556f19 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.c +++ b/drivers/gpu/drm/omapdrm/omap_fb.c @@ -351,7 +351,7 @@ struct drm_framebuffer *omap_framebuffer_create(struct drm_device *dev, } } - fb = omap_framebuffer_init(dev, mode_cmd, bos); + fb = omap_framebuffer_init(dev, info, mode_cmd, bos); if (IS_ERR(fb)) goto error; @@ -365,9 +365,9 @@ struct drm_framebuffer *omap_framebuffer_create(struct drm_device *dev, } struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos) { - const struct drm_format_info *format = NULL; struct omap_framebuffer *omap_fb = NULL; struct drm_framebuffer *fb = NULL; unsigned int pitch = mode_cmd->pitches[0]; @@ -377,15 +377,12 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, dev, mode_cmd, mode_cmd->width, mode_cmd->height, (char *)&mode_cmd->pixel_format); - format = drm_get_format_info(dev, mode_cmd->pixel_format, - mode_cmd->modifier[0]); - for (i = 0; i < ARRAY_SIZE(formats); i++) { if (formats[i] == mode_cmd->pixel_format) break; } - if (!format || i == ARRAY_SIZE(formats)) { + if (i == ARRAY_SIZE(formats)) { dev_dbg(dev->dev, "unsupported pixel format: %4.4s\n", (char *)&mode_cmd->pixel_format); ret = -EINVAL; @@ -399,7 +396,7 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, } fb = &omap_fb->base; - omap_fb->format = format; + omap_fb->format = info; mutex_init(&omap_fb->lock); /* @@ -407,23 +404,23 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, * that the two planes of multiplane formats need the same number of * bytes per pixel. */ - if (format->num_planes == 2 && pitch != mode_cmd->pitches[1]) { + if (info->num_planes == 2 && pitch != mode_cmd->pitches[1]) { dev_dbg(dev->dev, "pitches differ between planes 0 and 1\n"); ret = -EINVAL; goto fail; } - if (pitch % format->cpp[0]) { + if (pitch % info->cpp[0]) { dev_dbg(dev->dev, "buffer pitch (%u bytes) is not a multiple of pixel size (%u bytes)\n", - pitch, format->cpp[0]); + pitch, info->cpp[0]); ret = -EINVAL; goto fail; } - for (i = 0; i < format->num_planes; i++) { + for (i = 0; i < info->num_planes; i++) { struct plane *plane = &omap_fb->planes[i]; - unsigned int vsub = i == 0 ? 1 : format->vsub; + unsigned int vsub = i == 0 ? 1 : info->vsub; unsigned int size; size = pitch * mode_cmd->height / vsub; @@ -440,7 +437,7 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, plane->dma_addr = 0; } - drm_helper_mode_fill_fb_struct(dev, fb, NULL, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, info, mode_cmd); ret = drm_framebuffer_init(dev, fb, &omap_framebuffer_funcs); if (ret) { diff --git a/drivers/gpu/drm/omapdrm/omap_fb.h b/drivers/gpu/drm/omapdrm/omap_fb.h index 0873f953cf1d..e6010302a22b 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.h +++ b/drivers/gpu/drm/omapdrm/omap_fb.h @@ -13,6 +13,7 @@ struct drm_connector; struct drm_device; struct drm_file; struct drm_framebuffer; +struct drm_format_info; struct drm_gem_object; struct drm_mode_fb_cmd2; struct drm_plane_state; @@ -23,6 +24,7 @@ struct drm_framebuffer *omap_framebuffer_create(struct drm_device *dev, struct drm_file *file, const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos); int omap_framebuffer_pin(struct drm_framebuffer *fb); void omap_framebuffer_unpin(struct drm_framebuffer *fb); diff --git a/drivers/gpu/drm/omapdrm/omap_fbdev.c b/drivers/gpu/drm/omapdrm/omap_fbdev.c index 7b6396890681..948af7ec1130 100644 --- a/drivers/gpu/drm/omapdrm/omap_fbdev.c +++ b/drivers/gpu/drm/omapdrm/omap_fbdev.c @@ -197,7 +197,10 @@ int omap_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, goto fail; } - fb = omap_framebuffer_init(dev, &mode_cmd, &bo); + fb = omap_framebuffer_init(dev, + drm_get_format_info(dev, mode_cmd.pixel_format, + mode_cmd.modifier[0]), + &mode_cmd, &bo); if (IS_ERR(fb)) { dev_err(dev->dev, "failed to allocate fb\n"); /* note: if fb creation failed, we can't rely on fb destroy From d2b524c9064301471e8ffe4ffd85ab8870966aa4 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 5 Aug 2025 20:57:51 +0300 Subject: [PATCH 1620/2411] drm/nouveau: Pass along the format info from .fb_create() to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plumb the format info from .fb_create() all the way to drm_helper_mode_fill_fb_struct() to avoid the redundant lookup. The patch is based on the driver parts of the patchset at Link: below, which missed converting the nouveau driver. Due to the absence of this change in the patchset at Link:, after the Fixed: commit below, nouveau_framebuffer_new() -> drm_helper_mode_fill_fb_struct() set drm_framebuffer::format incorrectly to NULL, which lead to the !fb->format WARN() in drm_framebuffer_init() and causing framebuffer creation to fail. This patch fixes both of these issues. v2: Amend the commit log mentioning the functional issues the patch fixes. (Tomi) Cc: Ville Syrjälä Cc: Lyude Paul Cc: Danilo Krummrich Cc: Thomas Zimmermann Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Tomi Valkeinen Cc: nouveau@lists.freedesktop.org Fixes: 41ab92d35ccd ("drm: Make passing of format info to drm_helper_mode_fill_fb_struct() mandatory") Link: https://lore.kernel.org/all/20250701090722.13645-1-ville.syrjala@linux.intel.com Acked-by: Alex Deucher Acked-by: Danilo Krummrich Reviewed-by: James Jones Tested-by: Linux Kernel Functional Testing Tested-by: James Jones Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250805175752.690504-3-imre.deak@intel.com --- drivers/gpu/drm/nouveau/nouveau_display.c | 9 +++------ drivers/gpu/drm/nouveau/nouveau_display.h | 3 +++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index e1e542126310..805d0a87aa54 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -253,6 +253,7 @@ nouveau_check_bl_size(struct nouveau_drm *drm, struct nouveau_bo *nvbo, int nouveau_framebuffer_new(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *gem, struct drm_framebuffer **pfb) @@ -260,7 +261,6 @@ nouveau_framebuffer_new(struct drm_device *dev, struct nouveau_drm *drm = nouveau_drm(dev); struct nouveau_bo *nvbo = nouveau_gem_object(gem); struct drm_framebuffer *fb; - const struct drm_format_info *info; unsigned int height, i; uint32_t tile_mode; uint8_t kind; @@ -295,9 +295,6 @@ nouveau_framebuffer_new(struct drm_device *dev, kind = nvbo->kind; } - info = drm_get_format_info(dev, mode_cmd->pixel_format, - mode_cmd->modifier[0]); - for (i = 0; i < info->num_planes; i++) { height = drm_format_info_plane_height(info, mode_cmd->height, @@ -321,7 +318,7 @@ nouveau_framebuffer_new(struct drm_device *dev, if (!(fb = *pfb = kzalloc(sizeof(*fb), GFP_KERNEL))) return -ENOMEM; - drm_helper_mode_fill_fb_struct(dev, fb, NULL, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, info, mode_cmd); fb->obj[0] = gem; ret = drm_framebuffer_init(dev, fb, &nouveau_framebuffer_funcs); @@ -344,7 +341,7 @@ nouveau_user_framebuffer_create(struct drm_device *dev, if (!gem) return ERR_PTR(-ENOENT); - ret = nouveau_framebuffer_new(dev, mode_cmd, gem, &fb); + ret = nouveau_framebuffer_new(dev, info, mode_cmd, gem, &fb); if (ret == 0) return fb; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h index e45f211501f6..470e0910d484 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.h +++ b/drivers/gpu/drm/nouveau/nouveau_display.h @@ -8,8 +8,11 @@ #include +struct drm_format_info; + int nouveau_framebuffer_new(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *gem, struct drm_framebuffer **pfb); From c0a8e4443d768e5c86ddb52a3a744a151e7b72b0 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 5 Aug 2025 20:57:52 +0300 Subject: [PATCH 1621/2411] drm/radeon: Pass along the format info from .fb_create() to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plumb the format info from .fb_create() all the way to drm_helper_mode_fill_fb_struct() to avoid the redundant lookup. For the fbdev case a manual drm_get_format_info() lookup is needed. The patch is based on the driver parts of the patchset at Link: below, which missed converting the radeon driver. Due to the absence of this change in the patchset at Link:, after the Fixed: commit below, radeon_framebuffer_init() -> drm_helper_mode_fill_fb_struct() set drm_framebuffer::format incorrectly to NULL, which lead to the !fb->format WARN() in drm_framebuffer_init() and causing framebuffer creation to fail. This patch fixes both of these issues. v2: Amend the commit log mentioning the functional issues the patch fixes. (Tomi) Cc: Ville Syrjälä Cc: Alex Deucher Cc: Christian König Cc: Thomas Zimmermann Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: amd-gfx@lists.freedesktop.org Cc: Tomi Valkeinen Fixes: 41ab92d35ccd ("drm: Make passing of format info to drm_helper_mode_fill_fb_struct() mandatory") Link: https://lore.kernel.org/all/20250701090722.13645-1-ville.syrjala@linux.intel.com Acked-by: Alex Deucher Tested-by: Linux Kernel Functional Testing Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250805175752.690504-4-imre.deak@intel.com --- drivers/gpu/drm/radeon/radeon_display.c | 5 +++-- drivers/gpu/drm/radeon/radeon_fbdev.c | 11 ++++++----- drivers/gpu/drm/radeon/radeon_mode.h | 2 ++ 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index b4bf5dfeea2d..4dc77c398617 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -1297,12 +1297,13 @@ static const struct drm_framebuffer_funcs radeon_fb_funcs = { int radeon_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj) { int ret; fb->obj[0] = obj; - drm_helper_mode_fill_fb_struct(dev, fb, NULL, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, info, mode_cmd); ret = drm_framebuffer_init(dev, fb, &radeon_fb_funcs); if (ret) { fb->obj[0] = NULL; @@ -1341,7 +1342,7 @@ radeon_user_framebuffer_create(struct drm_device *dev, return ERR_PTR(-ENOMEM); } - ret = radeon_framebuffer_init(dev, fb, mode_cmd, obj); + ret = radeon_framebuffer_init(dev, fb, info, mode_cmd, obj); if (ret) { kfree(fb); drm_gem_object_put(obj); diff --git a/drivers/gpu/drm/radeon/radeon_fbdev.c b/drivers/gpu/drm/radeon/radeon_fbdev.c index e3a481bbee7b..dc81b0c2dbff 100644 --- a/drivers/gpu/drm/radeon/radeon_fbdev.c +++ b/drivers/gpu/drm/radeon/radeon_fbdev.c @@ -53,10 +53,10 @@ static void radeon_fbdev_destroy_pinned_object(struct drm_gem_object *gobj) } static int radeon_fbdev_create_pinned_object(struct drm_fb_helper *fb_helper, + const struct drm_format_info *info, struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **gobj_p) { - const struct drm_format_info *info; struct radeon_device *rdev = fb_helper->dev->dev_private; struct drm_gem_object *gobj = NULL; struct radeon_bo *rbo = NULL; @@ -67,8 +67,6 @@ static int radeon_fbdev_create_pinned_object(struct drm_fb_helper *fb_helper, int height = mode_cmd->height; u32 cpp; - info = drm_get_format_info(rdev_to_drm(rdev), mode_cmd->pixel_format, - mode_cmd->modifier[0]); cpp = info->cpp[0]; /* need to align pitch with crtc limits */ @@ -206,6 +204,7 @@ int radeon_fbdev_driver_fbdev_probe(struct drm_fb_helper *fb_helper, struct drm_fb_helper_surface_size *sizes) { struct radeon_device *rdev = fb_helper->dev->dev_private; + const struct drm_format_info *format_info; struct drm_mode_fb_cmd2 mode_cmd = { }; struct fb_info *info; struct drm_gem_object *gobj; @@ -224,7 +223,9 @@ int radeon_fbdev_driver_fbdev_probe(struct drm_fb_helper *fb_helper, mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth); - ret = radeon_fbdev_create_pinned_object(fb_helper, &mode_cmd, &gobj); + format_info = drm_get_format_info(rdev_to_drm(rdev), mode_cmd.pixel_format, + mode_cmd.modifier[0]); + ret = radeon_fbdev_create_pinned_object(fb_helper, format_info, &mode_cmd, &gobj); if (ret) { DRM_ERROR("failed to create fbcon object %d\n", ret); return ret; @@ -236,7 +237,7 @@ int radeon_fbdev_driver_fbdev_probe(struct drm_fb_helper *fb_helper, ret = -ENOMEM; goto err_radeon_fbdev_destroy_pinned_object; } - ret = radeon_framebuffer_init(rdev_to_drm(rdev), fb, &mode_cmd, gobj); + ret = radeon_framebuffer_init(rdev_to_drm(rdev), fb, format_info, &mode_cmd, gobj); if (ret) { DRM_ERROR("failed to initialize framebuffer %d\n", ret); goto err_kfree; diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 3102f6c2d055..9e34da2cacef 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -40,6 +40,7 @@ struct drm_fb_helper; struct drm_fb_helper_surface_size; +struct drm_format_info; struct edid; struct drm_edid; @@ -890,6 +891,7 @@ extern void radeon_combios_encoder_dpms_scratch_regs(struct drm_encoder *encoder, bool on); int radeon_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *rfb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj); From a4f8e70d75dd11ab1a01894893e0b03f1d0b61fd Mon Sep 17 00:00:00 2001 From: Tianyu Xu Date: Tue, 5 Aug 2025 09:54:03 +0800 Subject: [PATCH 1622/2411] spi: spi-mem: add spi_mem_adjust_op_freq() in spi_mem_supports_op() The function spi_mem_adjust_op_freq() within spi_mem_exec_op() adjusts the op->max_freq, which will informs the SPI controller of the maximum frequency for each operation. This adjustment is based on combined information from the SPI device and the board's wiring conditions. Similarly, spi_mem_supports_op() will check the capabilities of the SPI controller. It also requires the combined information before it can accurately determine whether the SPI controller supports a given operation. Signed-off-by: Tianyu Xu Reviewed-by: Miquel Raynal Link: https://patch.msgid.link/20250805015403.43928-1-tianyxu@cisco.com Signed-off-by: Mark Brown --- drivers/spi/spi-mem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index dfa8ab1ec80f..a8f14c608d2d 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -265,6 +265,9 @@ static bool spi_mem_internal_supports_op(struct spi_mem *mem, */ bool spi_mem_supports_op(struct spi_mem *mem, const struct spi_mem_op *op) { + /* Make sure the operation frequency is correct before going futher */ + spi_mem_adjust_op_freq(mem, (struct spi_mem_op *)op); + if (spi_mem_check_op(op)) return false; From 9f320dfb0ffc555aa2eac8331dee0c2c16f67633 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Aug 2025 11:44:22 +0200 Subject: [PATCH 1623/2411] ALSA: hda/ca0132: Fix missing error handling in ca0132_alt_select_out() There are a couple of cases where the error is ignored or the error code isn't propagated in ca0132_alt_select_out(). Fix those. Fixes: def3f0a5c700 ("ALSA: hda/ca0132 - Add quirk output selection structures.") Link: https://patch.msgid.link/20250806094423.8843-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/hda/codecs/ca0132.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/hda/codecs/ca0132.c b/sound/hda/codecs/ca0132.c index b716f721f25d..b7d456e16c93 100644 --- a/sound/hda/codecs/ca0132.c +++ b/sound/hda/codecs/ca0132.c @@ -4802,7 +4802,8 @@ static int ca0132_alt_select_out(struct hda_codec *codec) if (err < 0) goto exit; - if (ca0132_alt_select_out_quirk_set(codec) < 0) + err = ca0132_alt_select_out_quirk_set(codec); + if (err < 0) goto exit; switch (spec->cur_out_type) { @@ -4892,6 +4893,8 @@ static int ca0132_alt_select_out(struct hda_codec *codec) spec->bass_redirection_val); else err = ca0132_alt_surround_set_bass_redirection(codec, 0); + if (err < 0) + goto exit; /* Unmute DSP now that we're done with output selection. */ err = dspio_set_uint_param(codec, 0x96, From bee47cb026e762841f3faece47b51f985e215edb Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 29 Jul 2025 12:40:20 -0400 Subject: [PATCH 1624/2411] sunrpc: fix handling of server side tls alerts Scott Mayhew discovered a security exploit in NFS over TLS in tls_alert_recv() due to its assumption it can read data from the msg iterator's kvec.. kTLS implementation splits TLS non-data record payload between the control message buffer (which includes the type such as TLS aler or TLS cipher change) and the rest of the payload (say TLS alert's level/description) which goes into the msg payload buffer. This patch proposes to rework how control messages are setup and used by sock_recvmsg(). If no control message structure is setup, kTLS layer will read and process TLS data record types. As soon as it encounters a TLS control message, it would return an error. At that point, NFS can setup a kvec backed msg buffer and read in the control message such as a TLS alert. Msg iterator can advance the kvec pointer as a part of the copy process thus we need to revert the iterator before calling into the tls_alert_recv. Reported-by: Scott Mayhew Fixes: 5e052dda121e ("SUNRPC: Recognize control messages in server-side TCP socket code") Suggested-by: Trond Myklebust Cc: stable@vger.kernel.org Signed-off-by: Olga Kornievskaia Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 46c156b121db..e2c5e0e626f9 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -257,20 +257,47 @@ svc_tcp_sock_process_cmsg(struct socket *sock, struct msghdr *msg, } static int -svc_tcp_sock_recv_cmsg(struct svc_sock *svsk, struct msghdr *msg) +svc_tcp_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags) { union { struct cmsghdr cmsg; u8 buf[CMSG_SPACE(sizeof(u8))]; } u; - struct socket *sock = svsk->sk_sock; + u8 alert[2]; + struct kvec alert_kvec = { + .iov_base = alert, + .iov_len = sizeof(alert), + }; + struct msghdr msg = { + .msg_flags = *msg_flags, + .msg_control = &u, + .msg_controllen = sizeof(u), + }; int ret; - msg->msg_control = &u; - msg->msg_controllen = sizeof(u); + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1, + alert_kvec.iov_len); + ret = sock_recvmsg(sock, &msg, MSG_DONTWAIT); + if (ret > 0 && + tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) { + iov_iter_revert(&msg.msg_iter, ret); + ret = svc_tcp_sock_process_cmsg(sock, &msg, &u.cmsg, -EAGAIN); + } + return ret; +} + +static int +svc_tcp_sock_recvmsg(struct svc_sock *svsk, struct msghdr *msg) +{ + int ret; + struct socket *sock = svsk->sk_sock; + ret = sock_recvmsg(sock, msg, MSG_DONTWAIT); - if (unlikely(msg->msg_controllen != sizeof(u))) - ret = svc_tcp_sock_process_cmsg(sock, msg, &u.cmsg, ret); + if (msg->msg_flags & MSG_CTRUNC) { + msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR); + if (ret == 0 || ret == -EIO) + ret = svc_tcp_sock_recv_cmsg(sock, &msg->msg_flags); + } return ret; } @@ -321,7 +348,7 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen, iov_iter_advance(&msg.msg_iter, seek); buflen -= seek; } - len = svc_tcp_sock_recv_cmsg(svsk, &msg); + len = svc_tcp_sock_recvmsg(svsk, &msg); if (len > 0) svc_flush_bvec(bvec, len, seek); @@ -1018,7 +1045,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk, iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen; iov.iov_len = want; iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, want); - len = svc_tcp_sock_recv_cmsg(svsk, &msg); + len = svc_tcp_sock_recvmsg(svsk, &msg); if (len < 0) return len; svsk->sk_tcplen += len; From 9f7488f24c7571d349d938061e0ede7a39b65d6b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Aug 2025 16:53:18 +0200 Subject: [PATCH 1625/2411] irqchip/mvebu-gicp: Use resource_size() for ioremap() 0-day reported an off by one in the ioremap() sizing: drivers/irqchip/irq-mvebu-gicp.c:240:45-48: WARNING: Suspicious code. resource_size is maybe missing with gicp -> res Convert it to resource_size(), which does the right thing. Fixes: 3c3d7dbab2c7 ("irqchip/mvebu-gicp: Clear pending interrupts on init") Reported-by: kernel test robot Signed-off-by: Thomas Gleixner Closes: https://lore.kernel.org/oe-kbuild-all/202508062150.mtFQMTXc-lkp@intel.com/ --- drivers/irqchip/irq-mvebu-gicp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mvebu-gicp.c b/drivers/irqchip/irq-mvebu-gicp.c index fd85c845e015..54833717f8a7 100644 --- a/drivers/irqchip/irq-mvebu-gicp.c +++ b/drivers/irqchip/irq-mvebu-gicp.c @@ -237,7 +237,7 @@ static int mvebu_gicp_probe(struct platform_device *pdev) return -ENODEV; } - base = ioremap(gicp->res->start, gicp->res->end - gicp->res->start); + base = ioremap(gicp->res->start, resource_size(gicp->res)); if (IS_ERR(base)) { dev_err(&pdev->dev, "ioremap() failed. Unable to clear pending interrupts.\n"); } else { From e29409faec87ffd2de2ed20b6109f303f129281b Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Tue, 5 Aug 2025 10:41:33 +0200 Subject: [PATCH 1626/2411] s390/boot: Fix startup debugging log Fix 'kernel image' end address for kaslr case. Fixes: ec6f9f7e5bbf ("s390/boot: Add startup debugging support") Reviewed-by: Alexander Gordeev Signed-off-by: Mikhail Zaslonko Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/startup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 305e6c791071..93684a775716 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -384,7 +384,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) kernel_start = round_down(kernel_end - kernel_size, THREAD_SIZE); boot_debug("Randomization range: 0x%016lx-0x%016lx\n", vmax - kaslr_len, vmax); boot_debug("kernel image: 0x%016lx-0x%016lx (kaslr)\n", kernel_start, - kernel_size + kernel_size); + kernel_start + kernel_size); } else if (vmax < __NO_KASLR_END_KERNEL || vsize > __NO_KASLR_END_KERNEL) { kernel_start = round_down(vmax - kernel_size, THREAD_SIZE); boot_debug("kernel image: 0x%016lx-0x%016lx (constrained)\n", kernel_start, From 2baf16f381decee303da406ca5a0991134260270 Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Mon, 4 Aug 2025 11:33:21 +0200 Subject: [PATCH 1627/2411] s390/debug: Fix typo in debug_sprintf_format_fn() comment Signed-off-by: Tigran Mkrtchyan Link: https://lore.kernel.org/r/20250804093321.434674-1-tigran.mkrtchyan@desy.de Signed-off-by: Alexander Gordeev --- arch/s390/kernel/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 2a41be2f7925..c62100dc62c8 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1677,7 +1677,7 @@ EXPORT_SYMBOL(debug_dflt_header_fn); /* * prints debug data sprintf-formatted: - * debug_sprinf_event/exception calls must be used together with this view + * debug_sprintf_event/exception calls must be used together with this view */ #define DEBUG_SPRINTF_MAX_ARGS 10 From 1cdd5a2626d8c9eb059c6b93a628413da833df95 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 5 Aug 2025 17:56:33 -0500 Subject: [PATCH 1628/2411] cifs: Move the SMB1 transport code out of transport.c Shrink the size of cifs.ko when SMB1 is not enabled in the config by moving the SMB1 transport code to different file. Signed-off-by: David Howells cc: linux-cifs@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/Makefile | 2 +- fs/smb/client/cifsproto.h | 15 + fs/smb/client/cifstransport.c | 566 ++++++++++++++++++++++++++++++++++ fs/smb/client/transport.c | 558 +-------------------------------- 4 files changed, 588 insertions(+), 553 deletions(-) create mode 100644 fs/smb/client/cifstransport.c diff --git a/fs/smb/client/Makefile b/fs/smb/client/Makefile index 22023e30915b..4c97b31a25c2 100644 --- a/fs/smb/client/Makefile +++ b/fs/smb/client/Makefile @@ -32,6 +32,6 @@ cifs-$(CONFIG_CIFS_SMB_DIRECT) += smbdirect.o cifs-$(CONFIG_CIFS_ROOT) += cifsroot.o -cifs-$(CONFIG_CIFS_ALLOW_INSECURE_LEGACY) += smb1ops.o cifssmb.o +cifs-$(CONFIG_CIFS_ALLOW_INSECURE_LEGACY) += smb1ops.o cifssmb.o cifstransport.o cifs-$(CONFIG_CIFS_COMPRESSION) += compress.o compress/lz77.o diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 40ec0634377f..c34c533b2efa 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -116,16 +116,31 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *, int * /* bytes returned */ , const int); extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, char *in_buf, int flags); +int cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server); extern struct mid_q_entry *cifs_setup_request(struct cifs_ses *, struct TCP_Server_Info *, struct smb_rqst *); extern struct mid_q_entry *cifs_setup_async_request(struct TCP_Server_Info *, struct smb_rqst *); +int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, + struct smb_rqst *rqst); extern int cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error); +int wait_for_free_request(struct TCP_Server_Info *server, const int flags, + unsigned int *instance); extern int cifs_wait_mtu_credits(struct TCP_Server_Info *server, size_t size, size_t *num, struct cifs_credits *credits); + +static inline int +send_cancel(struct TCP_Server_Info *server, struct smb_rqst *rqst, + struct mid_q_entry *mid) +{ + return server->ops->send_cancel ? + server->ops->send_cancel(server, rqst, mid) : 0; +} + +int wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ); extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, struct kvec *, int /* nvec to send */, int * /* type of buf returned */, const int flags, diff --git a/fs/smb/client/cifstransport.c b/fs/smb/client/cifstransport.c new file mode 100644 index 000000000000..352dafb888dd --- /dev/null +++ b/fs/smb/client/cifstransport.c @@ -0,0 +1,566 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * + * Copyright (C) International Business Machines Corp., 2002,2008 + * Author(s): Steve French (sfrench@us.ibm.com) + * Jeremy Allison (jra@samba.org) 2006. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cifspdu.h" +#include "cifsglob.h" +#include "cifsproto.h" +#include "cifs_debug.h" +#include "smb2proto.h" +#include "smbdirect.h" +#include "compress.h" + +/* Max number of iovectors we can use off the stack when sending requests. */ +#define CIFS_MAX_IOV_SIZE 8 + +static struct mid_q_entry * +alloc_mid(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) +{ + struct mid_q_entry *temp; + + if (server == NULL) { + cifs_dbg(VFS, "%s: null TCP session\n", __func__); + return NULL; + } + + temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); + memset(temp, 0, sizeof(struct mid_q_entry)); + kref_init(&temp->refcount); + temp->mid = get_mid(smb_buffer); + temp->pid = current->pid; + temp->command = cpu_to_le16(smb_buffer->Command); + cifs_dbg(FYI, "For smb_command %d\n", smb_buffer->Command); + /* easier to use jiffies */ + /* when mid allocated can be before when sent */ + temp->when_alloc = jiffies; + temp->server = server; + + /* + * The default is for the mid to be synchronous, so the + * default callback just wakes up the current task. + */ + get_task_struct(current); + temp->creator = current; + temp->callback = cifs_wake_up_task; + temp->callback_data = current; + + atomic_inc(&mid_count); + temp->mid_state = MID_REQUEST_ALLOCATED; + return temp; +} + +int +smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, + unsigned int smb_buf_length) +{ + struct kvec iov[2]; + struct smb_rqst rqst = { .rq_iov = iov, + .rq_nvec = 2 }; + + iov[0].iov_base = smb_buffer; + iov[0].iov_len = 4; + iov[1].iov_base = (char *)smb_buffer + 4; + iov[1].iov_len = smb_buf_length; + + return __smb_send_rqst(server, 1, &rqst); +} + +static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, + struct mid_q_entry **ppmidQ) +{ + spin_lock(&ses->ses_lock); + if (ses->ses_status == SES_NEW) { + if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && + (in_buf->Command != SMB_COM_NEGOTIATE)) { + spin_unlock(&ses->ses_lock); + return -EAGAIN; + } + /* else ok - we are setting up session */ + } + + if (ses->ses_status == SES_EXITING) { + /* check if SMB session is bad because we are setting it up */ + if (in_buf->Command != SMB_COM_LOGOFF_ANDX) { + spin_unlock(&ses->ses_lock); + return -EAGAIN; + } + /* else ok - we are shutting down session */ + } + spin_unlock(&ses->ses_lock); + + *ppmidQ = alloc_mid(in_buf, ses->server); + if (*ppmidQ == NULL) + return -ENOMEM; + spin_lock(&ses->server->mid_queue_lock); + list_add_tail(&(*ppmidQ)->qhead, &ses->server->pending_mid_q); + spin_unlock(&ses->server->mid_queue_lock); + return 0; +} + +struct mid_q_entry * +cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) +{ + int rc; + struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base; + struct mid_q_entry *mid; + + if (rqst->rq_iov[0].iov_len != 4 || + rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base) + return ERR_PTR(-EIO); + + /* enable signing if server requires it */ + if (server->sign) + hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; + + mid = alloc_mid(hdr, server); + if (mid == NULL) + return ERR_PTR(-ENOMEM); + + rc = cifs_sign_rqst(rqst, server, &mid->sequence_number); + if (rc) { + release_mid(mid); + return ERR_PTR(rc); + } + + return mid; +} + +/* + * + * Send an SMB Request. No response info (other than return code) + * needs to be parsed. + * + * flags indicate the type of request buffer and how long to wait + * and whether to log NT STATUS code (error) before mapping it to POSIX error + * + */ +int +SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, + char *in_buf, int flags) +{ + int rc; + struct kvec iov[1]; + struct kvec rsp_iov; + int resp_buf_type; + + iov[0].iov_base = in_buf; + iov[0].iov_len = get_rfc1002_length(in_buf) + 4; + flags |= CIFS_NO_RSP_BUF; + rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov); + cifs_dbg(NOISY, "SendRcvNoRsp flags %d rc %d\n", flags, rc); + + return rc; +} + +int +cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, + bool log_error) +{ + unsigned int len = get_rfc1002_length(mid->resp_buf) + 4; + + dump_smb(mid->resp_buf, min_t(u32, 92, len)); + + /* convert the length into a more usable form */ + if (server->sign) { + struct kvec iov[2]; + int rc = 0; + struct smb_rqst rqst = { .rq_iov = iov, + .rq_nvec = 2 }; + + iov[0].iov_base = mid->resp_buf; + iov[0].iov_len = 4; + iov[1].iov_base = (char *)mid->resp_buf + 4; + iov[1].iov_len = len - 4; + /* FIXME: add code to kill session */ + rc = cifs_verify_signature(&rqst, server, + mid->sequence_number); + if (rc) + cifs_server_dbg(VFS, "SMB signature verification returned error = %d\n", + rc); + } + + /* BB special case reconnect tid and uid here? */ + return map_and_check_smb_error(mid, log_error); +} + +struct mid_q_entry * +cifs_setup_request(struct cifs_ses *ses, struct TCP_Server_Info *ignored, + struct smb_rqst *rqst) +{ + int rc; + struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base; + struct mid_q_entry *mid; + + if (rqst->rq_iov[0].iov_len != 4 || + rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base) + return ERR_PTR(-EIO); + + rc = allocate_mid(ses, hdr, &mid); + if (rc) + return ERR_PTR(rc); + rc = cifs_sign_rqst(rqst, ses->server, &mid->sequence_number); + if (rc) { + delete_mid(mid); + return ERR_PTR(rc); + } + return mid; +} + +int +SendReceive2(const unsigned int xid, struct cifs_ses *ses, + struct kvec *iov, int n_vec, int *resp_buf_type /* ret */, + const int flags, struct kvec *resp_iov) +{ + struct smb_rqst rqst; + struct kvec s_iov[CIFS_MAX_IOV_SIZE], *new_iov; + int rc; + + if (n_vec + 1 > CIFS_MAX_IOV_SIZE) { + new_iov = kmalloc_array(n_vec + 1, sizeof(struct kvec), + GFP_KERNEL); + if (!new_iov) { + /* otherwise cifs_send_recv below sets resp_buf_type */ + *resp_buf_type = CIFS_NO_BUFFER; + return -ENOMEM; + } + } else + new_iov = s_iov; + + /* 1st iov is a RFC1001 length followed by the rest of the packet */ + memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec)); + + new_iov[0].iov_base = new_iov[1].iov_base; + new_iov[0].iov_len = 4; + new_iov[1].iov_base += 4; + new_iov[1].iov_len -= 4; + + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = new_iov; + rqst.rq_nvec = n_vec + 1; + + rc = cifs_send_recv(xid, ses, ses->server, + &rqst, resp_buf_type, flags, resp_iov); + if (n_vec + 1 > CIFS_MAX_IOV_SIZE) + kfree(new_iov); + return rc; +} + +int +SendReceive(const unsigned int xid, struct cifs_ses *ses, + struct smb_hdr *in_buf, struct smb_hdr *out_buf, + int *pbytes_returned, const int flags) +{ + int rc = 0; + struct mid_q_entry *midQ; + unsigned int len = be32_to_cpu(in_buf->smb_buf_length); + struct kvec iov = { .iov_base = in_buf, .iov_len = len }; + struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 }; + struct cifs_credits credits = { .value = 1, .instance = 0 }; + struct TCP_Server_Info *server; + + if (ses == NULL) { + cifs_dbg(VFS, "Null smb session\n"); + return -EIO; + } + server = ses->server; + if (server == NULL) { + cifs_dbg(VFS, "Null tcp session\n"); + return -EIO; + } + + spin_lock(&server->srv_lock); + if (server->tcpStatus == CifsExiting) { + spin_unlock(&server->srv_lock); + return -ENOENT; + } + spin_unlock(&server->srv_lock); + + /* Ensure that we do not send more than 50 overlapping requests + to the same server. We may make this configurable later or + use ses->maxReq */ + + if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { + cifs_server_dbg(VFS, "Invalid length, greater than maximum frame, %d\n", + len); + return -EIO; + } + + rc = wait_for_free_request(server, flags, &credits.instance); + if (rc) + return rc; + + /* make sure that we sign in the same order that we send on this socket + and avoid races inside tcp sendmsg code that could cause corruption + of smb data */ + + cifs_server_lock(server); + + rc = allocate_mid(ses, in_buf, &midQ); + if (rc) { + cifs_server_unlock(server); + /* Update # of requests on wire to server */ + add_credits(server, &credits, 0); + return rc; + } + + rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number); + if (rc) { + cifs_server_unlock(server); + goto out; + } + + midQ->mid_state = MID_REQUEST_SUBMITTED; + + rc = smb_send(server, in_buf, len); + cifs_save_when_sent(midQ); + + if (rc < 0) + server->sequence_number -= 2; + + cifs_server_unlock(server); + + if (rc < 0) + goto out; + + rc = wait_for_response(server, midQ); + if (rc != 0) { + send_cancel(server, &rqst, midQ); + spin_lock(&server->mid_queue_lock); + if (midQ->mid_state == MID_REQUEST_SUBMITTED || + midQ->mid_state == MID_RESPONSE_RECEIVED) { + /* no longer considered to be "in-flight" */ + midQ->callback = release_mid; + spin_unlock(&server->mid_queue_lock); + add_credits(server, &credits, 0); + return rc; + } + spin_unlock(&server->mid_queue_lock); + } + + rc = cifs_sync_mid_result(midQ, server); + if (rc != 0) { + add_credits(server, &credits, 0); + return rc; + } + + if (!midQ->resp_buf || !out_buf || + midQ->mid_state != MID_RESPONSE_READY) { + rc = -EIO; + cifs_server_dbg(VFS, "Bad MID state?\n"); + goto out; + } + + *pbytes_returned = get_rfc1002_length(midQ->resp_buf); + memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); + rc = cifs_check_receive(midQ, server, 0); +out: + delete_mid(midQ); + add_credits(server, &credits, 0); + + return rc; +} + +/* We send a LOCKINGX_CANCEL_LOCK to cause the Windows + blocking lock to return. */ + +static int +send_lock_cancel(const unsigned int xid, struct cifs_tcon *tcon, + struct smb_hdr *in_buf, + struct smb_hdr *out_buf) +{ + int bytes_returned; + struct cifs_ses *ses = tcon->ses; + LOCK_REQ *pSMB = (LOCK_REQ *)in_buf; + + /* We just modify the current in_buf to change + the type of lock from LOCKING_ANDX_SHARED_LOCK + or LOCKING_ANDX_EXCLUSIVE_LOCK to + LOCKING_ANDX_CANCEL_LOCK. */ + + pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES; + pSMB->Timeout = 0; + pSMB->hdr.Mid = get_next_mid(ses->server); + + return SendReceive(xid, ses, in_buf, out_buf, + &bytes_returned, 0); +} + +int +SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, + struct smb_hdr *in_buf, struct smb_hdr *out_buf, + int *pbytes_returned) +{ + int rc = 0; + int rstart = 0; + struct mid_q_entry *midQ; + struct cifs_ses *ses; + unsigned int len = be32_to_cpu(in_buf->smb_buf_length); + struct kvec iov = { .iov_base = in_buf, .iov_len = len }; + struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 }; + unsigned int instance; + struct TCP_Server_Info *server; + + if (tcon == NULL || tcon->ses == NULL) { + cifs_dbg(VFS, "Null smb session\n"); + return -EIO; + } + ses = tcon->ses; + server = ses->server; + + if (server == NULL) { + cifs_dbg(VFS, "Null tcp session\n"); + return -EIO; + } + + spin_lock(&server->srv_lock); + if (server->tcpStatus == CifsExiting) { + spin_unlock(&server->srv_lock); + return -ENOENT; + } + spin_unlock(&server->srv_lock); + + /* Ensure that we do not send more than 50 overlapping requests + to the same server. We may make this configurable later or + use ses->maxReq */ + + if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { + cifs_tcon_dbg(VFS, "Invalid length, greater than maximum frame, %d\n", + len); + return -EIO; + } + + rc = wait_for_free_request(server, CIFS_BLOCKING_OP, &instance); + if (rc) + return rc; + + /* make sure that we sign in the same order that we send on this socket + and avoid races inside tcp sendmsg code that could cause corruption + of smb data */ + + cifs_server_lock(server); + + rc = allocate_mid(ses, in_buf, &midQ); + if (rc) { + cifs_server_unlock(server); + return rc; + } + + rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number); + if (rc) { + delete_mid(midQ); + cifs_server_unlock(server); + return rc; + } + + midQ->mid_state = MID_REQUEST_SUBMITTED; + rc = smb_send(server, in_buf, len); + cifs_save_when_sent(midQ); + + if (rc < 0) + server->sequence_number -= 2; + + cifs_server_unlock(server); + + if (rc < 0) { + delete_mid(midQ); + return rc; + } + + /* Wait for a reply - allow signals to interrupt. */ + rc = wait_event_interruptible(server->response_q, + (!(midQ->mid_state == MID_REQUEST_SUBMITTED || + midQ->mid_state == MID_RESPONSE_RECEIVED)) || + ((server->tcpStatus != CifsGood) && + (server->tcpStatus != CifsNew))); + + /* Were we interrupted by a signal ? */ + spin_lock(&server->srv_lock); + if ((rc == -ERESTARTSYS) && + (midQ->mid_state == MID_REQUEST_SUBMITTED || + midQ->mid_state == MID_RESPONSE_RECEIVED) && + ((server->tcpStatus == CifsGood) || + (server->tcpStatus == CifsNew))) { + spin_unlock(&server->srv_lock); + + if (in_buf->Command == SMB_COM_TRANSACTION2) { + /* POSIX lock. We send a NT_CANCEL SMB to cause the + blocking lock to return. */ + rc = send_cancel(server, &rqst, midQ); + if (rc) { + delete_mid(midQ); + return rc; + } + } else { + /* Windows lock. We send a LOCKINGX_CANCEL_LOCK + to cause the blocking lock to return. */ + + rc = send_lock_cancel(xid, tcon, in_buf, out_buf); + + /* If we get -ENOLCK back the lock may have + already been removed. Don't exit in this case. */ + if (rc && rc != -ENOLCK) { + delete_mid(midQ); + return rc; + } + } + + rc = wait_for_response(server, midQ); + if (rc) { + send_cancel(server, &rqst, midQ); + spin_lock(&server->mid_queue_lock); + if (midQ->mid_state == MID_REQUEST_SUBMITTED || + midQ->mid_state == MID_RESPONSE_RECEIVED) { + /* no longer considered to be "in-flight" */ + midQ->callback = release_mid; + spin_unlock(&server->mid_queue_lock); + return rc; + } + spin_unlock(&server->mid_queue_lock); + } + + /* We got the response - restart system call. */ + rstart = 1; + spin_lock(&server->srv_lock); + } + spin_unlock(&server->srv_lock); + + rc = cifs_sync_mid_result(midQ, server); + if (rc != 0) + return rc; + + /* rcvd frame is ok */ + if (out_buf == NULL || midQ->mid_state != MID_RESPONSE_READY) { + rc = -EIO; + cifs_tcon_dbg(VFS, "Bad MID state?\n"); + goto out; + } + + *pbytes_returned = get_rfc1002_length(midQ->resp_buf); + memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); + rc = cifs_check_receive(midQ, server, 0); +out: + delete_mid(midQ); + if (rstart && rc == -EACCES) + return -ERESTARTSYS; + return rc; +} diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index ca9358c24ceb..32d528b4dd83 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -30,9 +30,6 @@ #include "smbdirect.h" #include "compress.h" -/* Max number of iovectors we can use off the stack when sending requests. */ -#define CIFS_MAX_IOV_SIZE 8 - void cifs_wake_up_task(struct mid_q_entry *mid) { @@ -41,42 +38,6 @@ cifs_wake_up_task(struct mid_q_entry *mid) wake_up_process(mid->callback_data); } -static struct mid_q_entry * -alloc_mid(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) -{ - struct mid_q_entry *temp; - - if (server == NULL) { - cifs_dbg(VFS, "%s: null TCP session\n", __func__); - return NULL; - } - - temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); - memset(temp, 0, sizeof(struct mid_q_entry)); - kref_init(&temp->refcount); - temp->mid = get_mid(smb_buffer); - temp->pid = current->pid; - temp->command = cpu_to_le16(smb_buffer->Command); - cifs_dbg(FYI, "For smb_command %d\n", smb_buffer->Command); - /* easier to use jiffies */ - /* when mid allocated can be before when sent */ - temp->when_alloc = jiffies; - temp->server = server; - - /* - * The default is for the mid to be synchronous, so the - * default callback just wakes up the current task. - */ - get_task_struct(current); - temp->creator = current; - temp->callback = cifs_wake_up_task; - temp->callback_data = current; - - atomic_inc(&mid_count); - temp->mid_state = MID_REQUEST_ALLOCATED; - return temp; -} - void __release_mid(struct kref *refcount) { struct mid_q_entry *midEntry = @@ -269,9 +230,8 @@ smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst) return buflen; } -static int -__smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, - struct smb_rqst *rqst) +int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, + struct smb_rqst *rqst) { int rc; struct kvec *iov; @@ -456,22 +416,6 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, return rc; } -int -smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, - unsigned int smb_buf_length) -{ - struct kvec iov[2]; - struct smb_rqst rqst = { .rq_iov = iov, - .rq_nvec = 2 }; - - iov[0].iov_base = smb_buffer; - iov[0].iov_len = 4; - iov[1].iov_base = (char *)smb_buffer + 4; - iov[1].iov_len = smb_buf_length; - - return __smb_send_rqst(server, 1, &rqst); -} - static int wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, const int timeout, const int flags, @@ -626,9 +570,8 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, return 0; } -static int -wait_for_free_request(struct TCP_Server_Info *server, const int flags, - unsigned int *instance) +int wait_for_free_request(struct TCP_Server_Info *server, const int flags, + unsigned int *instance) { return wait_for_free_credits(server, 1, -1, flags, instance); @@ -690,40 +633,7 @@ cifs_wait_mtu_credits(struct TCP_Server_Info *server, size_t size, return 0; } -static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, - struct mid_q_entry **ppmidQ) -{ - spin_lock(&ses->ses_lock); - if (ses->ses_status == SES_NEW) { - if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && - (in_buf->Command != SMB_COM_NEGOTIATE)) { - spin_unlock(&ses->ses_lock); - return -EAGAIN; - } - /* else ok - we are setting up session */ - } - - if (ses->ses_status == SES_EXITING) { - /* check if SMB session is bad because we are setting it up */ - if (in_buf->Command != SMB_COM_LOGOFF_ANDX) { - spin_unlock(&ses->ses_lock); - return -EAGAIN; - } - /* else ok - we are shutting down session */ - } - spin_unlock(&ses->ses_lock); - - *ppmidQ = alloc_mid(in_buf, ses->server); - if (*ppmidQ == NULL) - return -ENOMEM; - spin_lock(&ses->server->mid_queue_lock); - list_add_tail(&(*ppmidQ)->qhead, &ses->server->pending_mid_q); - spin_unlock(&ses->server->mid_queue_lock); - return 0; -} - -static int -wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) +int wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) { int error; @@ -737,34 +647,6 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) return 0; } -struct mid_q_entry * -cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) -{ - int rc; - struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base; - struct mid_q_entry *mid; - - if (rqst->rq_iov[0].iov_len != 4 || - rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base) - return ERR_PTR(-EIO); - - /* enable signing if server requires it */ - if (server->sign) - hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; - - mid = alloc_mid(hdr, server); - if (mid == NULL) - return ERR_PTR(-ENOMEM); - - rc = cifs_sign_rqst(rqst, server, &mid->sequence_number); - if (rc) { - release_mid(mid); - return ERR_PTR(rc); - } - - return mid; -} - /* * Send a SMB request and set the callback function in the mid to handle * the result. Caller is responsible for dealing with timeouts. @@ -845,35 +727,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, return rc; } -/* - * - * Send an SMB Request. No response info (other than return code) - * needs to be parsed. - * - * flags indicate the type of request buffer and how long to wait - * and whether to log NT STATUS code (error) before mapping it to POSIX error - * - */ -int -SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, - char *in_buf, int flags) -{ - int rc; - struct kvec iov[1]; - struct kvec rsp_iov; - int resp_buf_type; - - iov[0].iov_base = in_buf; - iov[0].iov_len = get_rfc1002_length(in_buf) + 4; - flags |= CIFS_NO_RSP_BUF; - rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov); - cifs_dbg(NOISY, "SendRcvNoRsp flags %d rc %d\n", flags, rc); - - return rc; -} - -static int -cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) +int cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) { int rc = 0; @@ -915,68 +769,6 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) return rc; } -static inline int -send_cancel(struct TCP_Server_Info *server, struct smb_rqst *rqst, - struct mid_q_entry *mid) -{ - return server->ops->send_cancel ? - server->ops->send_cancel(server, rqst, mid) : 0; -} - -int -cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, - bool log_error) -{ - unsigned int len = get_rfc1002_length(mid->resp_buf) + 4; - - dump_smb(mid->resp_buf, min_t(u32, 92, len)); - - /* convert the length into a more usable form */ - if (server->sign) { - struct kvec iov[2]; - int rc = 0; - struct smb_rqst rqst = { .rq_iov = iov, - .rq_nvec = 2 }; - - iov[0].iov_base = mid->resp_buf; - iov[0].iov_len = 4; - iov[1].iov_base = (char *)mid->resp_buf + 4; - iov[1].iov_len = len - 4; - /* FIXME: add code to kill session */ - rc = cifs_verify_signature(&rqst, server, - mid->sequence_number); - if (rc) - cifs_server_dbg(VFS, "SMB signature verification returned error = %d\n", - rc); - } - - /* BB special case reconnect tid and uid here? */ - return map_and_check_smb_error(mid, log_error); -} - -struct mid_q_entry * -cifs_setup_request(struct cifs_ses *ses, struct TCP_Server_Info *ignored, - struct smb_rqst *rqst) -{ - int rc; - struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base; - struct mid_q_entry *mid; - - if (rqst->rq_iov[0].iov_len != 4 || - rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base) - return ERR_PTR(-EIO); - - rc = allocate_mid(ses, hdr, &mid); - if (rc) - return ERR_PTR(rc); - rc = cifs_sign_rqst(rqst, ses->server, &mid->sequence_number); - if (rc) { - delete_mid(mid); - return ERR_PTR(rc); - } - return mid; -} - static void cifs_compound_callback(struct mid_q_entry *mid) { @@ -1304,344 +1096,6 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses, rqst, resp_buf_type, resp_iov); } -int -SendReceive2(const unsigned int xid, struct cifs_ses *ses, - struct kvec *iov, int n_vec, int *resp_buf_type /* ret */, - const int flags, struct kvec *resp_iov) -{ - struct smb_rqst rqst; - struct kvec s_iov[CIFS_MAX_IOV_SIZE], *new_iov; - int rc; - - if (n_vec + 1 > CIFS_MAX_IOV_SIZE) { - new_iov = kmalloc_array(n_vec + 1, sizeof(struct kvec), - GFP_KERNEL); - if (!new_iov) { - /* otherwise cifs_send_recv below sets resp_buf_type */ - *resp_buf_type = CIFS_NO_BUFFER; - return -ENOMEM; - } - } else - new_iov = s_iov; - - /* 1st iov is a RFC1001 length followed by the rest of the packet */ - memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec)); - - new_iov[0].iov_base = new_iov[1].iov_base; - new_iov[0].iov_len = 4; - new_iov[1].iov_base += 4; - new_iov[1].iov_len -= 4; - - memset(&rqst, 0, sizeof(struct smb_rqst)); - rqst.rq_iov = new_iov; - rqst.rq_nvec = n_vec + 1; - - rc = cifs_send_recv(xid, ses, ses->server, - &rqst, resp_buf_type, flags, resp_iov); - if (n_vec + 1 > CIFS_MAX_IOV_SIZE) - kfree(new_iov); - return rc; -} - -int -SendReceive(const unsigned int xid, struct cifs_ses *ses, - struct smb_hdr *in_buf, struct smb_hdr *out_buf, - int *pbytes_returned, const int flags) -{ - int rc = 0; - struct mid_q_entry *midQ; - unsigned int len = be32_to_cpu(in_buf->smb_buf_length); - struct kvec iov = { .iov_base = in_buf, .iov_len = len }; - struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 }; - struct cifs_credits credits = { .value = 1, .instance = 0 }; - struct TCP_Server_Info *server; - - if (ses == NULL) { - cifs_dbg(VFS, "Null smb session\n"); - return -EIO; - } - server = ses->server; - if (server == NULL) { - cifs_dbg(VFS, "Null tcp session\n"); - return -EIO; - } - - spin_lock(&server->srv_lock); - if (server->tcpStatus == CifsExiting) { - spin_unlock(&server->srv_lock); - return -ENOENT; - } - spin_unlock(&server->srv_lock); - - /* Ensure that we do not send more than 50 overlapping requests - to the same server. We may make this configurable later or - use ses->maxReq */ - - if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { - cifs_server_dbg(VFS, "Invalid length, greater than maximum frame, %d\n", - len); - return -EIO; - } - - rc = wait_for_free_request(server, flags, &credits.instance); - if (rc) - return rc; - - /* make sure that we sign in the same order that we send on this socket - and avoid races inside tcp sendmsg code that could cause corruption - of smb data */ - - cifs_server_lock(server); - - rc = allocate_mid(ses, in_buf, &midQ); - if (rc) { - cifs_server_unlock(server); - /* Update # of requests on wire to server */ - add_credits(server, &credits, 0); - return rc; - } - - rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number); - if (rc) { - cifs_server_unlock(server); - goto out; - } - - midQ->mid_state = MID_REQUEST_SUBMITTED; - - rc = smb_send(server, in_buf, len); - cifs_save_when_sent(midQ); - - if (rc < 0) - server->sequence_number -= 2; - - cifs_server_unlock(server); - - if (rc < 0) - goto out; - - rc = wait_for_response(server, midQ); - if (rc != 0) { - send_cancel(server, &rqst, midQ); - spin_lock(&server->mid_queue_lock); - if (midQ->mid_state == MID_REQUEST_SUBMITTED || - midQ->mid_state == MID_RESPONSE_RECEIVED) { - /* no longer considered to be "in-flight" */ - midQ->callback = release_mid; - spin_unlock(&server->mid_queue_lock); - add_credits(server, &credits, 0); - return rc; - } - spin_unlock(&server->mid_queue_lock); - } - - rc = cifs_sync_mid_result(midQ, server); - if (rc != 0) { - add_credits(server, &credits, 0); - return rc; - } - - if (!midQ->resp_buf || !out_buf || - midQ->mid_state != MID_RESPONSE_READY) { - rc = -EIO; - cifs_server_dbg(VFS, "Bad MID state?\n"); - goto out; - } - - *pbytes_returned = get_rfc1002_length(midQ->resp_buf); - memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); - rc = cifs_check_receive(midQ, server, 0); -out: - delete_mid(midQ); - add_credits(server, &credits, 0); - - return rc; -} - -/* We send a LOCKINGX_CANCEL_LOCK to cause the Windows - blocking lock to return. */ - -static int -send_lock_cancel(const unsigned int xid, struct cifs_tcon *tcon, - struct smb_hdr *in_buf, - struct smb_hdr *out_buf) -{ - int bytes_returned; - struct cifs_ses *ses = tcon->ses; - LOCK_REQ *pSMB = (LOCK_REQ *)in_buf; - - /* We just modify the current in_buf to change - the type of lock from LOCKING_ANDX_SHARED_LOCK - or LOCKING_ANDX_EXCLUSIVE_LOCK to - LOCKING_ANDX_CANCEL_LOCK. */ - - pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES; - pSMB->Timeout = 0; - pSMB->hdr.Mid = get_next_mid(ses->server); - - return SendReceive(xid, ses, in_buf, out_buf, - &bytes_returned, 0); -} - -int -SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, - struct smb_hdr *in_buf, struct smb_hdr *out_buf, - int *pbytes_returned) -{ - int rc = 0; - int rstart = 0; - struct mid_q_entry *midQ; - struct cifs_ses *ses; - unsigned int len = be32_to_cpu(in_buf->smb_buf_length); - struct kvec iov = { .iov_base = in_buf, .iov_len = len }; - struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 }; - unsigned int instance; - struct TCP_Server_Info *server; - - if (tcon == NULL || tcon->ses == NULL) { - cifs_dbg(VFS, "Null smb session\n"); - return -EIO; - } - ses = tcon->ses; - server = ses->server; - - if (server == NULL) { - cifs_dbg(VFS, "Null tcp session\n"); - return -EIO; - } - - spin_lock(&server->srv_lock); - if (server->tcpStatus == CifsExiting) { - spin_unlock(&server->srv_lock); - return -ENOENT; - } - spin_unlock(&server->srv_lock); - - /* Ensure that we do not send more than 50 overlapping requests - to the same server. We may make this configurable later or - use ses->maxReq */ - - if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { - cifs_tcon_dbg(VFS, "Invalid length, greater than maximum frame, %d\n", - len); - return -EIO; - } - - rc = wait_for_free_request(server, CIFS_BLOCKING_OP, &instance); - if (rc) - return rc; - - /* make sure that we sign in the same order that we send on this socket - and avoid races inside tcp sendmsg code that could cause corruption - of smb data */ - - cifs_server_lock(server); - - rc = allocate_mid(ses, in_buf, &midQ); - if (rc) { - cifs_server_unlock(server); - return rc; - } - - rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number); - if (rc) { - delete_mid(midQ); - cifs_server_unlock(server); - return rc; - } - - midQ->mid_state = MID_REQUEST_SUBMITTED; - rc = smb_send(server, in_buf, len); - cifs_save_when_sent(midQ); - - if (rc < 0) - server->sequence_number -= 2; - - cifs_server_unlock(server); - - if (rc < 0) { - delete_mid(midQ); - return rc; - } - - /* Wait for a reply - allow signals to interrupt. */ - rc = wait_event_interruptible(server->response_q, - (!(midQ->mid_state == MID_REQUEST_SUBMITTED || - midQ->mid_state == MID_RESPONSE_RECEIVED)) || - ((server->tcpStatus != CifsGood) && - (server->tcpStatus != CifsNew))); - - /* Were we interrupted by a signal ? */ - spin_lock(&server->srv_lock); - if ((rc == -ERESTARTSYS) && - (midQ->mid_state == MID_REQUEST_SUBMITTED || - midQ->mid_state == MID_RESPONSE_RECEIVED) && - ((server->tcpStatus == CifsGood) || - (server->tcpStatus == CifsNew))) { - spin_unlock(&server->srv_lock); - - if (in_buf->Command == SMB_COM_TRANSACTION2) { - /* POSIX lock. We send a NT_CANCEL SMB to cause the - blocking lock to return. */ - rc = send_cancel(server, &rqst, midQ); - if (rc) { - delete_mid(midQ); - return rc; - } - } else { - /* Windows lock. We send a LOCKINGX_CANCEL_LOCK - to cause the blocking lock to return. */ - - rc = send_lock_cancel(xid, tcon, in_buf, out_buf); - - /* If we get -ENOLCK back the lock may have - already been removed. Don't exit in this case. */ - if (rc && rc != -ENOLCK) { - delete_mid(midQ); - return rc; - } - } - - rc = wait_for_response(server, midQ); - if (rc) { - send_cancel(server, &rqst, midQ); - spin_lock(&server->mid_queue_lock); - if (midQ->mid_state == MID_REQUEST_SUBMITTED || - midQ->mid_state == MID_RESPONSE_RECEIVED) { - /* no longer considered to be "in-flight" */ - midQ->callback = release_mid; - spin_unlock(&server->mid_queue_lock); - return rc; - } - spin_unlock(&server->mid_queue_lock); - } - - /* We got the response - restart system call. */ - rstart = 1; - spin_lock(&server->srv_lock); - } - spin_unlock(&server->srv_lock); - - rc = cifs_sync_mid_result(midQ, server); - if (rc != 0) - return rc; - - /* rcvd frame is ok */ - if (out_buf == NULL || midQ->mid_state != MID_RESPONSE_READY) { - rc = -EIO; - cifs_tcon_dbg(VFS, "Bad MID state?\n"); - goto out; - } - - *pbytes_returned = get_rfc1002_length(midQ->resp_buf); - memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); - rc = cifs_check_receive(midQ, server, 0); -out: - delete_mid(midQ); - if (rstart && rc == -EACCES) - return -ERESTARTSYS; - return rc; -} /* * Discard any remaining data in the current SMB. To do this, we borrow the From 4f67c41894674d351a4b4e7dd3471380b71b5bb3 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Wed, 6 Aug 2025 09:53:32 -0700 Subject: [PATCH 1629/2411] HID: hid-steam: Use new BTN_GRIP* buttons Make use of the newly defined BTN_GRIP* codes instead of using BTN_TRIGGER_HAPPY* and other less suited button codes. Signed-off-by: Vicki Pfau Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20250717000143.1902875-4-vi@endrift.com Signed-off-by: Dmitry Torokhov --- drivers/hid/hid-steam.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index 949d307c66a8..197126d6e081 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -755,15 +755,12 @@ static int steam_input_register(struct steam_device *steam) input_set_capability(input, EV_KEY, BTN_THUMBL); input_set_capability(input, EV_KEY, BTN_THUMB); input_set_capability(input, EV_KEY, BTN_THUMB2); + input_set_capability(input, EV_KEY, BTN_GRIPL); + input_set_capability(input, EV_KEY, BTN_GRIPR); if (steam->quirks & STEAM_QUIRK_DECK) { input_set_capability(input, EV_KEY, BTN_BASE); - input_set_capability(input, EV_KEY, BTN_TRIGGER_HAPPY1); - input_set_capability(input, EV_KEY, BTN_TRIGGER_HAPPY2); - input_set_capability(input, EV_KEY, BTN_TRIGGER_HAPPY3); - input_set_capability(input, EV_KEY, BTN_TRIGGER_HAPPY4); - } else { - input_set_capability(input, EV_KEY, BTN_GEAR_DOWN); - input_set_capability(input, EV_KEY, BTN_GEAR_UP); + input_set_capability(input, EV_KEY, BTN_GRIPL2); + input_set_capability(input, EV_KEY, BTN_GRIPR2); } input_set_abs_params(input, ABS_X, -32767, 32767, 0, 0); @@ -1419,8 +1416,8 @@ static inline s16 steam_le16(u8 *data) * 9.4 | BTN_SELECT | menu left * 9.5 | BTN_MODE | steam logo * 9.6 | BTN_START | menu right - * 9.7 | BTN_GEAR_DOWN | left back lever - * 10.0 | BTN_GEAR_UP | right back lever + * 9.7 | BTN_GRIPL | left back lever + * 10.0 | BTN_GRIPR | right back lever * 10.1 | -- | left-pad clicked * 10.2 | BTN_THUMBR | right-pad clicked * 10.3 | BTN_THUMB | left-pad touched (but see explanation below) @@ -1485,8 +1482,8 @@ static void steam_do_input_event(struct steam_device *steam, input_event(input, EV_KEY, BTN_SELECT, !!(b9 & BIT(4))); input_event(input, EV_KEY, BTN_MODE, !!(b9 & BIT(5))); input_event(input, EV_KEY, BTN_START, !!(b9 & BIT(6))); - input_event(input, EV_KEY, BTN_GEAR_DOWN, !!(b9 & BIT(7))); - input_event(input, EV_KEY, BTN_GEAR_UP, !!(b10 & BIT(0))); + input_event(input, EV_KEY, BTN_GRIPL, !!(b9 & BIT(7))); + input_event(input, EV_KEY, BTN_GRIPR, !!(b10 & BIT(0))); input_event(input, EV_KEY, BTN_THUMBR, !!(b10 & BIT(2))); input_event(input, EV_KEY, BTN_THUMBL, !!(b10 & BIT(6))); input_event(input, EV_KEY, BTN_THUMB, lpad_touched || lpad_and_joy); @@ -1547,8 +1544,8 @@ static void steam_do_input_event(struct steam_device *steam, * 9.4 | BTN_SELECT | menu left * 9.5 | BTN_MODE | steam logo * 9.6 | BTN_START | menu right - * 9.7 | BTN_TRIGGER_HAPPY3 | left bottom grip button - * 10.0 | BTN_TRIGGER_HAPPY4 | right bottom grip button + * 9.7 | BTN_GRIPL2 | left bottom grip button + * 10.0 | BTN_GRIPR2 | right bottom grip button * 10.1 | BTN_THUMB | left pad pressed * 10.2 | BTN_THUMB2 | right pad pressed * 10.3 | -- | left pad touched @@ -1573,8 +1570,8 @@ static void steam_do_input_event(struct steam_device *steam, * 12.6 | -- | unknown * 12.7 | -- | unknown * 13.0 | -- | unknown - * 13.1 | BTN_TRIGGER_HAPPY1 | left top grip button - * 13.2 | BTN_TRIGGER_HAPPY2 | right top grip button + * 13.1 | BTN_GRIPL | left top grip button + * 13.2 | BTN_GRIPR | right top grip button * 13.3 | -- | unknown * 13.4 | -- | unknown * 13.5 | -- | unknown @@ -1659,8 +1656,8 @@ static void steam_do_deck_input_event(struct steam_device *steam, input_event(input, EV_KEY, BTN_SELECT, !!(b9 & BIT(4))); input_event(input, EV_KEY, BTN_MODE, !!(b9 & BIT(5))); input_event(input, EV_KEY, BTN_START, !!(b9 & BIT(6))); - input_event(input, EV_KEY, BTN_TRIGGER_HAPPY3, !!(b9 & BIT(7))); - input_event(input, EV_KEY, BTN_TRIGGER_HAPPY4, !!(b10 & BIT(0))); + input_event(input, EV_KEY, BTN_GRIPL2, !!(b9 & BIT(7))); + input_event(input, EV_KEY, BTN_GRIPR2, !!(b10 & BIT(0))); input_event(input, EV_KEY, BTN_THUMBL, !!(b10 & BIT(6))); input_event(input, EV_KEY, BTN_THUMBR, !!(b11 & BIT(2))); input_event(input, EV_KEY, BTN_DPAD_UP, !!(b9 & BIT(0))); @@ -1669,8 +1666,8 @@ static void steam_do_deck_input_event(struct steam_device *steam, input_event(input, EV_KEY, BTN_DPAD_DOWN, !!(b9 & BIT(3))); input_event(input, EV_KEY, BTN_THUMB, !!(b10 & BIT(1))); input_event(input, EV_KEY, BTN_THUMB2, !!(b10 & BIT(2))); - input_event(input, EV_KEY, BTN_TRIGGER_HAPPY1, !!(b13 & BIT(1))); - input_event(input, EV_KEY, BTN_TRIGGER_HAPPY2, !!(b13 & BIT(2))); + input_event(input, EV_KEY, BTN_GRIPL, !!(b13 & BIT(1))); + input_event(input, EV_KEY, BTN_GRIPR, !!(b13 & BIT(2))); input_event(input, EV_KEY, BTN_BASE, !!(b14 & BIT(2))); input_sync(input); From 7b306dfa326f70114312b320d083b21fa9481e1e Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 29 Jul 2025 13:41:29 -0500 Subject: [PATCH 1630/2411] x86/sev: Evict cache lines during SNP memory validation An SNP cache coherency vulnerability requires a cache line eviction mitigation when validating memory after a page state change to private. The specific mitigation is to touch the first and last byte of each 4K page that is being validated. There is no need to perform the mitigation when performing a page state change to shared and rescinding validation. CPUID bit Fn8000001F_EBX[31] defines the COHERENCY_SFW_NO CPUID bit that, when set, indicates that the software mitigation for this vulnerability is not needed. Implement the mitigation and invoke it when validating memory (making it private) and the COHERENCY_SFW_NO bit is not set, indicating the SNP guest is vulnerable. Co-developed-by: Michael Roth Signed-off-by: Michael Roth Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Acked-by: Thomas Gleixner --- arch/x86/boot/cpuflags.c | 13 +++++++++++++ arch/x86/boot/startup/sev-shared.c | 7 +++++++ arch/x86/coco/sev/core.c | 21 +++++++++++++++++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/sev.h | 19 +++++++++++++++++++ arch/x86/kernel/cpu/scattered.c | 1 + 6 files changed, 62 insertions(+) diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c index 916bac09b464..63e037e94e4c 100644 --- a/arch/x86/boot/cpuflags.c +++ b/arch/x86/boot/cpuflags.c @@ -106,5 +106,18 @@ void get_cpuflags(void) cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6], &cpu.flags[1]); } + + if (max_amd_level >= 0x8000001f) { + u32 ebx; + + /* + * The X86_FEATURE_COHERENCY_SFW_NO feature bit is in + * the virtualization flags entry (word 8) and set by + * scattered.c, so the bit needs to be explicitly set. + */ + cpuid(0x8000001f, &ignored, &ebx, &ignored, &ignored); + if (ebx & BIT(31)) + set_bit(X86_FEATURE_COHERENCY_SFW_NO, cpu.flags); + } } } diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c index 7a706db87b93..ac7dfd21ddd4 100644 --- a/arch/x86/boot/startup/sev-shared.c +++ b/arch/x86/boot/startup/sev-shared.c @@ -810,6 +810,13 @@ static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr, if (ret) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); } + + /* + * If validating memory (making it private) and affected by the + * cache-coherency vulnerability, perform the cache eviction mitigation. + */ + if (validate && !has_cpuflag(X86_FEATURE_COHERENCY_SFW_NO)) + sev_evict_cache((void *)vaddr, 1); } /* diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index fc59ce78c477..400a6ab75d45 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -358,10 +358,31 @@ static void svsm_pval_pages(struct snp_psc_desc *desc) static void pvalidate_pages(struct snp_psc_desc *desc) { + struct psc_entry *e; + unsigned int i; + if (snp_vmpl) svsm_pval_pages(desc); else pval_pages(desc); + + /* + * If not affected by the cache-coherency vulnerability there is no need + * to perform the cache eviction mitigation. + */ + if (cpu_feature_enabled(X86_FEATURE_COHERENCY_SFW_NO)) + return; + + for (i = 0; i <= desc->hdr.end_entry; i++) { + e = &desc->entries[i]; + + /* + * If validating memory (making it private) perform the cache + * eviction mitigation. + */ + if (e->operation == SNP_PAGE_STATE_PRIVATE) + sev_evict_cache(pfn_to_kaddr(e->gfn), e->pagesize ? 512 : 1); + } } static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 602957dd2609..06fc0479a23f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -218,6 +218,7 @@ #define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */ #define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */ #define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */ +#define X86_FEATURE_COHERENCY_SFW_NO ( 8*32+ 4) /* SNP cache coherency software work around not needed */ #define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */ #define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 89075ff19afa..02236962fdb1 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -619,6 +619,24 @@ int rmp_make_shared(u64 pfn, enum pg_level level); void snp_leak_pages(u64 pfn, unsigned int npages); void kdump_sev_callback(void); void snp_fixup_e820_tables(void); + +static inline void sev_evict_cache(void *va, int npages) +{ + volatile u8 val __always_unused; + u8 *bytes = va; + int page_idx; + + /* + * For SEV guests, a read from the first/last cache-lines of a 4K page + * using the guest key is sufficient to cause a flush of all cache-lines + * associated with that 4K page without incurring all the overhead of a + * full CLFLUSH sequence. + */ + for (page_idx = 0; page_idx < npages; page_idx++) { + val = bytes[page_idx * PAGE_SIZE]; + val = bytes[page_idx * PAGE_SIZE + PAGE_SIZE - 1]; + } +} #else static inline bool snp_probe_rmptable_info(void) { return false; } static inline int snp_rmptable_init(void) { return -ENOSYS; } @@ -634,6 +652,7 @@ static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV static inline void snp_leak_pages(u64 pfn, unsigned int npages) {} static inline void kdump_sev_callback(void) { } static inline void snp_fixup_e820_tables(void) {} +static inline void sev_evict_cache(void *va, int npages) {} #endif #endif diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index b4a1f6732a3a..6b868afb26c3 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -48,6 +48,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, { X86_FEATURE_AMD_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 }, { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, + { X86_FEATURE_COHERENCY_SFW_NO, CPUID_EBX, 31, 0x8000001f, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, From 472f8a3fccbb579cb98c1821da4cb9cbd51ee3e4 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 25 Jun 2025 11:15:01 +0300 Subject: [PATCH 1631/2411] mailbox: mtk-cmdq: Switch to pm_runtime_put_autosuspend() __pm_runtime_put_autosuspend() was meant to be used by callers that needed to put the Runtime PM usage_count without marking the device's last busy timestamp. It was however seen that the Runtime PM autosuspend related functions should include that call. Thus switch the driver to use pm_runtime_put_autosuspend(). Signed-off-by: Sakari Ailus Signed-off-by: Jassi Brar --- drivers/mailbox/mtk-cmdq-mailbox.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c index ab4e8d1954a1..532929916e99 100644 --- a/drivers/mailbox/mtk-cmdq-mailbox.c +++ b/drivers/mailbox/mtk-cmdq-mailbox.c @@ -390,7 +390,7 @@ static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data) task = kzalloc(sizeof(*task), GFP_ATOMIC); if (!task) { - __pm_runtime_put_autosuspend(cmdq->mbox.dev); + pm_runtime_put_autosuspend(cmdq->mbox.dev); return -ENOMEM; } @@ -440,7 +440,7 @@ static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data) list_move_tail(&task->list_entry, &thread->task_busy_list); pm_runtime_mark_last_busy(cmdq->mbox.dev); - __pm_runtime_put_autosuspend(cmdq->mbox.dev); + pm_runtime_put_autosuspend(cmdq->mbox.dev); return 0; } @@ -488,7 +488,7 @@ static void cmdq_mbox_shutdown(struct mbox_chan *chan) spin_unlock_irqrestore(&thread->chan->lock, flags); pm_runtime_mark_last_busy(cmdq->mbox.dev); - __pm_runtime_put_autosuspend(cmdq->mbox.dev); + pm_runtime_put_autosuspend(cmdq->mbox.dev); } static int cmdq_mbox_flush(struct mbox_chan *chan, unsigned long timeout) @@ -528,7 +528,7 @@ static int cmdq_mbox_flush(struct mbox_chan *chan, unsigned long timeout) out: spin_unlock_irqrestore(&thread->chan->lock, flags); pm_runtime_mark_last_busy(cmdq->mbox.dev); - __pm_runtime_put_autosuspend(cmdq->mbox.dev); + pm_runtime_put_autosuspend(cmdq->mbox.dev); return 0; @@ -543,7 +543,7 @@ static int cmdq_mbox_flush(struct mbox_chan *chan, unsigned long timeout) return -EFAULT; } pm_runtime_mark_last_busy(cmdq->mbox.dev); - __pm_runtime_put_autosuspend(cmdq->mbox.dev); + pm_runtime_put_autosuspend(cmdq->mbox.dev); return 0; } From fd3a4decb4f8ff1362db16fe42fc1af77d0259c3 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 11 Jun 2025 12:43:39 +0200 Subject: [PATCH 1632/2411] mailbox: Use dev_fwnode() irq_domain_create_simple() takes fwnode as the first argument. It can be extracted from the struct device using dev_fwnode() helper instead of using of_node with of_fwnode_handle(). So use the dev_fwnode() helper. Signed-off-by: Jiri Slaby (SUSE) Cc: Manivannan Sadhasivam Cc: Jassi Brar Signed-off-by: Jassi Brar --- drivers/mailbox/qcom-ipcc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mailbox/qcom-ipcc.c b/drivers/mailbox/qcom-ipcc.c index ea44ffb5ce1a..d957d989c0ce 100644 --- a/drivers/mailbox/qcom-ipcc.c +++ b/drivers/mailbox/qcom-ipcc.c @@ -312,8 +312,7 @@ static int qcom_ipcc_probe(struct platform_device *pdev) if (!name) return -ENOMEM; - ipcc->irq_domain = irq_domain_create_tree(of_fwnode_handle(pdev->dev.of_node), - &qcom_ipcc_irq_ops, ipcc); + ipcc->irq_domain = irq_domain_create_tree(dev_fwnode(&pdev->dev), &qcom_ipcc_irq_ops, ipcc); if (!ipcc->irq_domain) return -ENOMEM; From dfa477b6e674d51f87b342f1d31d9316e44d67d3 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Mon, 2 Jun 2025 15:23:10 -0700 Subject: [PATCH 1633/2411] dt-bindings: mailbox: Add support for bcm74110 Add devicetree YAML binding for brcmstb bcm74110 mailbox used for communicating with a co-processor. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Justin Chen Reviewed-by: Florian Fainelli Signed-off-by: Jassi Brar --- .../bindings/mailbox/brcm,bcm74110-mbox.yaml | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 Documentation/devicetree/bindings/mailbox/brcm,bcm74110-mbox.yaml diff --git a/Documentation/devicetree/bindings/mailbox/brcm,bcm74110-mbox.yaml b/Documentation/devicetree/bindings/mailbox/brcm,bcm74110-mbox.yaml new file mode 100644 index 000000000000..750cc96edb46 --- /dev/null +++ b/Documentation/devicetree/bindings/mailbox/brcm,bcm74110-mbox.yaml @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mailbox/brcm,bcm74110-mbox.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Broadcom BCM74110 Mailbox + +maintainers: + - Justin Chen + - Florian Fainelli + +description: Broadcom mailbox hardware first introduced with 74110 + +properties: + compatible: + enum: + - brcm,bcm74110-mbox + + reg: + maxItems: 1 + + interrupts: + items: + - description: RX doorbell and watermark interrupts + - description: TX doorbell and watermark interrupts + + "#mbox-cells": + const: 2 + description: + The first cell is channel type and second cell is shared memory slot + + brcm,rx: + $ref: /schemas/types.yaml#/definitions/uint32 + description: RX Mailbox number + + brcm,tx: + $ref: /schemas/types.yaml#/definitions/uint32 + description: TX Mailbox number + +required: + - compatible + - reg + - interrupts + - "#mbox-cells" + - brcm,rx + - brcm,tx + +additionalProperties: false + +examples: + - | + #include + #include + + mailbox@a552000 { + compatible = "brcm,bcm74110-mbox"; + reg = <0xa552000 0x1104>; + interrupts = , + ; + #mbox-cells = <0x2>; + brcm,rx = <0x7>; + brcm,tx = <0x6>; + }; From 52436007b862a90348ac8efc3a89eaceb2234f53 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Mon, 2 Jun 2025 15:23:11 -0700 Subject: [PATCH 1634/2411] mailbox: Add support for bcm74110 The bcm74110 mailbox driver is used to communicate with a co-processor for various power management and firmware related tasks. Signed-off-by: Justin Chen Reviewed-by: Florian Fainelli Tested-by: Florian Fainelli Signed-off-by: Jassi Brar --- drivers/mailbox/Kconfig | 10 + drivers/mailbox/Makefile | 2 + drivers/mailbox/bcm74110-mailbox.c | 656 +++++++++++++++++++++++++++++ 3 files changed, 668 insertions(+) create mode 100644 drivers/mailbox/bcm74110-mailbox.c diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig index 4fef4797b110..9abf193acd0b 100644 --- a/drivers/mailbox/Kconfig +++ b/drivers/mailbox/Kconfig @@ -350,4 +350,14 @@ config CIX_MBOX is unidirectional. Say Y here if you want to use the CIX Mailbox support. +config BCM74110_MAILBOX + tristate "Brcmstb BCM74110 Mailbox" + depends on ARCH_BRCMSTB || COMPILE_TEST + default ARCH_BRCMSTB + help + Broadcom STB mailbox driver present starting with brcmstb bcm74110 + SoCs. The mailbox is a communication channel between the host + processor and coprocessor that handles various power management task + and more. + endif diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile index 786a46587ba1..c0a4278aa129 100644 --- a/drivers/mailbox/Makefile +++ b/drivers/mailbox/Makefile @@ -74,3 +74,5 @@ obj-$(CONFIG_QCOM_IPCC) += qcom-ipcc.o obj-$(CONFIG_THEAD_TH1520_MBOX) += mailbox-th1520.o obj-$(CONFIG_CIX_MBOX) += cix-mailbox.o + +obj-$(CONFIG_BCM74110_MAILBOX) += bcm74110-mailbox.o diff --git a/drivers/mailbox/bcm74110-mailbox.c b/drivers/mailbox/bcm74110-mailbox.c new file mode 100644 index 000000000000..0680be8dc18f --- /dev/null +++ b/drivers/mailbox/bcm74110-mailbox.c @@ -0,0 +1,656 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Broadcom BCM74110 Mailbox Driver + * + * Copyright (c) 2025 Broadcom + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BCM_MBOX_BASE(sel) ((sel) * 0x40) +#define BCM_MBOX_IRQ_BASE(sel) (((sel) * 0x20) + 0x800) + +#define BCM_MBOX_CFGA 0x0 +#define BCM_MBOX_CFGB 0x4 +#define BCM_MBOX_CFGC 0x8 +#define BCM_MBOX_CFGD 0xc +#define BCM_MBOX_CTRL 0x10 +#define BCM_MBOX_CTRL_EN BIT(0) +#define BCM_MBOX_CTRL_CLR BIT(1) +#define BCM_MBOX_STATUS0 0x14 +#define BCM_MBOX_STATUS0_NOT_EMPTY BIT(28) +#define BCM_MBOX_STATUS0_FULL BIT(29) +#define BCM_MBOX_STATUS1 0x18 +#define BCM_MBOX_STATUS2 0x1c +#define BCM_MBOX_WDATA 0x20 +#define BCM_MBOX_RDATA 0x28 + +#define BCM_MBOX_IRQ_STATUS 0x0 +#define BCM_MBOX_IRQ_SET 0x4 +#define BCM_MBOX_IRQ_CLEAR 0x8 +#define BCM_MBOX_IRQ_MASK_STATUS 0xc +#define BCM_MBOX_IRQ_MASK_SET 0x10 +#define BCM_MBOX_IRQ_MASK_CLEAR 0x14 +#define BCM_MBOX_IRQ_TIMEOUT BIT(0) +#define BCM_MBOX_IRQ_NOT_EMPTY BIT(1) +#define BCM_MBOX_IRQ_FULL BIT(2) +#define BCM_MBOX_IRQ_LOW_WM BIT(3) +#define BCM_MBOX_IRQ_HIGH_WM BIT(4) + +#define BCM_LINK_CODE0 0xbe0 +#define BCM_LINK_CODE1 0xbe1 +#define BCM_LINK_CODE2 0xbe2 + +enum { + BCM_MSG_FUNC_LINK_START = 0, + BCM_MSG_FUNC_LINK_STOP, + BCM_MSG_FUNC_SHMEM_TX, + BCM_MSG_FUNC_SHMEM_RX, + BCM_MSG_FUNC_SHMEM_STOP, + BCM_MSG_FUNC_MAX, +}; + +enum { + BCM_MSG_SVC_INIT = 0, + BCM_MSG_SVC_PMC, + BCM_MSG_SVC_SCMI, + BCM_MSG_SVC_DPFE, + BCM_MSG_SVC_MAX, +}; + +struct bcm74110_mbox_msg { + struct list_head list_entry; +#define BCM_MSG_VERSION_MASK GENMASK(31, 29) +#define BCM_MSG_VERSION 0x1 +#define BCM_MSG_REQ_MASK BIT(28) +#define BCM_MSG_RPLY_MASK BIT(27) +#define BCM_MSG_SVC_MASK GENMASK(26, 24) +#define BCM_MSG_FUNC_MASK GENMASK(23, 16) +#define BCM_MSG_LENGTH_MASK GENMASK(15, 4) +#define BCM_MSG_SLOT_MASK GENMASK(3, 0) + +#define BCM_MSG_SET_FIELD(hdr, field, val) \ + do { \ + hdr &= ~BCM_MSG_##field##_MASK; \ + hdr |= FIELD_PREP(BCM_MSG_##field##_MASK, val); \ + } while (0) + +#define BCM_MSG_GET_FIELD(hdr, field) \ + FIELD_GET(BCM_MSG_##field##_MASK, hdr) + u32 msg; +}; + +struct bcm74110_mbox_chan { + struct bcm74110_mbox *mbox; + bool en; + int slot; + int type; +}; + +struct bcm74110_mbox { + struct platform_device *pdev; + void __iomem *base; + + int tx_chan; + int rx_chan; + int rx_irq; + struct list_head rx_svc_init_list; + spinlock_t rx_svc_list_lock; + + struct mbox_controller controller; + struct bcm74110_mbox_chan *mbox_chan; +}; + +#define BCM74110_OFFSET_IO_WRITEL_MACRO(name, offset_base) \ +static void bcm74110_##name##_writel(struct bcm74110_mbox *mbox,\ + u32 val, u32 off) \ +{ \ + writel_relaxed(val, mbox->base + offset_base + off); \ +} +BCM74110_OFFSET_IO_WRITEL_MACRO(tx, BCM_MBOX_BASE(mbox->tx_chan)); +BCM74110_OFFSET_IO_WRITEL_MACRO(irq, BCM_MBOX_IRQ_BASE(mbox->rx_chan)); + +#define BCM74110_OFFSET_IO_READL_MACRO(name, offset_base) \ +static u32 bcm74110_##name##_readl(struct bcm74110_mbox *mbox, \ + u32 off) \ +{ \ + return readl_relaxed(mbox->base + offset_base + off); \ +} +BCM74110_OFFSET_IO_READL_MACRO(tx, BCM_MBOX_BASE(mbox->tx_chan)); +BCM74110_OFFSET_IO_READL_MACRO(rx, BCM_MBOX_BASE(mbox->rx_chan)); +BCM74110_OFFSET_IO_READL_MACRO(irq, BCM_MBOX_IRQ_BASE(mbox->rx_chan)); + +static inline struct bcm74110_mbox *bcm74110_mbox_from_cntrl( + struct mbox_controller *cntrl) +{ + return container_of(cntrl, struct bcm74110_mbox, controller); +} + +static void bcm74110_rx_push_init_msg(struct bcm74110_mbox *mbox, u32 val) +{ + struct bcm74110_mbox_msg *msg; + + msg = kzalloc(sizeof(*msg), GFP_ATOMIC); + if (!msg) + return; + + INIT_LIST_HEAD(&msg->list_entry); + msg->msg = val; + + spin_lock(&mbox->rx_svc_list_lock); + list_add_tail(&msg->list_entry, &mbox->rx_svc_init_list); + spin_unlock(&mbox->rx_svc_list_lock); +} + +static void bcm74110_rx_process_msg(struct bcm74110_mbox *mbox) +{ + struct device *dev = &mbox->pdev->dev; + struct bcm74110_mbox_chan *chan_priv; + struct mbox_chan *chan; + u32 msg, status; + int type; + + do { + msg = bcm74110_rx_readl(mbox, BCM_MBOX_RDATA); + status = bcm74110_rx_readl(mbox, BCM_MBOX_STATUS0); + + dev_dbg(dev, "rx: [{req=%lu|rply=%lu|srv=%lu|fn=%lu|length=%lu|slot=%lu]\n", + BCM_MSG_GET_FIELD(msg, REQ), BCM_MSG_GET_FIELD(msg, RPLY), + BCM_MSG_GET_FIELD(msg, SVC), BCM_MSG_GET_FIELD(msg, FUNC), + BCM_MSG_GET_FIELD(msg, LENGTH), BCM_MSG_GET_FIELD(msg, SLOT)); + + type = BCM_MSG_GET_FIELD(msg, SVC); + switch (type) { + case BCM_MSG_SVC_INIT: + bcm74110_rx_push_init_msg(mbox, msg); + break; + case BCM_MSG_SVC_PMC: + case BCM_MSG_SVC_SCMI: + case BCM_MSG_SVC_DPFE: + chan = &mbox->controller.chans[type]; + chan_priv = chan->con_priv; + if (chan_priv->en) + mbox_chan_received_data(chan, NULL); + else + dev_warn(dev, "Channel not enabled\n"); + break; + default: + dev_warn(dev, "Unsupported msg received\n"); + } + } while (status & BCM_MBOX_STATUS0_NOT_EMPTY); +} + +static irqreturn_t bcm74110_mbox_isr(int irq, void *data) +{ + struct bcm74110_mbox *mbox = data; + u32 status; + + status = bcm74110_irq_readl(mbox, BCM_MBOX_IRQ_STATUS); + + bcm74110_irq_writel(mbox, 0xffffffff, BCM_MBOX_IRQ_CLEAR); + + if (status & BCM_MBOX_IRQ_NOT_EMPTY) + bcm74110_rx_process_msg(mbox); + else + dev_warn(&mbox->pdev->dev, "Spurious interrupt\n"); + + return IRQ_HANDLED; +} + +static void bcm74110_mbox_mask_and_clear(struct bcm74110_mbox *mbox) +{ + bcm74110_irq_writel(mbox, 0xffffffff, BCM_MBOX_IRQ_MASK_SET); + bcm74110_irq_writel(mbox, 0xffffffff, BCM_MBOX_IRQ_CLEAR); +} + +static int bcm74110_rx_pop_init_msg(struct bcm74110_mbox *mbox, u32 func_type, + u32 *val) +{ + struct bcm74110_mbox_msg *msg, *msg_tmp; + unsigned long flags; + bool found = false; + + spin_lock_irqsave(&mbox->rx_svc_list_lock, flags); + list_for_each_entry_safe(msg, msg_tmp, &mbox->rx_svc_init_list, + list_entry) { + if (BCM_MSG_GET_FIELD(msg->msg, FUNC) == func_type) { + list_del(&msg->list_entry); + found = true; + break; + } + } + spin_unlock_irqrestore(&mbox->rx_svc_list_lock, flags); + + if (!found) + return -EINVAL; + + *val = msg->msg; + kfree(msg); + + return 0; +} + +static void bcm74110_rx_flush_msg(struct bcm74110_mbox *mbox) +{ + struct bcm74110_mbox_msg *msg, *msg_tmp; + LIST_HEAD(list_temp); + unsigned long flags; + + spin_lock_irqsave(&mbox->rx_svc_list_lock, flags); + list_splice_init(&mbox->rx_svc_init_list, &list_temp); + spin_unlock_irqrestore(&mbox->rx_svc_list_lock, flags); + + list_for_each_entry_safe(msg, msg_tmp, &list_temp, list_entry) { + list_del(&msg->list_entry); + kfree(msg); + } +} + +#define BCM_DEQUEUE_TIMEOUT_MS 30 +static int bcm74110_rx_pop_init_msg_block(struct bcm74110_mbox *mbox, u32 func_type, + u32 *val) +{ + int ret, timeout = 0; + + do { + ret = bcm74110_rx_pop_init_msg(mbox, func_type, val); + + if (!ret) + return 0; + + /* TODO: Figure out what is a good sleep here. */ + usleep_range(1000, 2000); + timeout++; + } while (timeout < BCM_DEQUEUE_TIMEOUT_MS); + + dev_warn(&mbox->pdev->dev, "Timeout waiting for service init response\n"); + return -ETIMEDOUT; +} + +static int bcm74110_mbox_create_msg(int req, int rply, int svc, int func, + int length, int slot) +{ + u32 msg = 0; + + BCM_MSG_SET_FIELD(msg, REQ, req); + BCM_MSG_SET_FIELD(msg, RPLY, rply); + BCM_MSG_SET_FIELD(msg, SVC, svc); + BCM_MSG_SET_FIELD(msg, FUNC, func); + BCM_MSG_SET_FIELD(msg, LENGTH, length); + BCM_MSG_SET_FIELD(msg, SLOT, slot); + + return msg; +} + +static int bcm74110_mbox_tx_msg(struct bcm74110_mbox *mbox, u32 msg) +{ + int val; + + /* We can potentially poll with timeout here instead */ + val = bcm74110_tx_readl(mbox, BCM_MBOX_STATUS0); + if (val & BCM_MBOX_STATUS0_FULL) { + dev_err(&mbox->pdev->dev, "Mailbox full\n"); + return -EINVAL; + } + + dev_dbg(&mbox->pdev->dev, "tx: [{req=%lu|rply=%lu|srv=%lu|fn=%lu|length=%lu|slot=%lu]\n", + BCM_MSG_GET_FIELD(msg, REQ), BCM_MSG_GET_FIELD(msg, RPLY), + BCM_MSG_GET_FIELD(msg, SVC), BCM_MSG_GET_FIELD(msg, FUNC), + BCM_MSG_GET_FIELD(msg, LENGTH), BCM_MSG_GET_FIELD(msg, SLOT)); + + bcm74110_tx_writel(mbox, msg, BCM_MBOX_WDATA); + + return 0; +} + +#define BCM_MBOX_LINK_TRAINING_RETRIES 5 +static int bcm74110_mbox_link_training(struct bcm74110_mbox *mbox) +{ + int ret, retries = 0; + u32 msg = 0, orig_len = 0, len = BCM_LINK_CODE0; + + do { + switch (len) { + case 0: + retries++; + dev_warn(&mbox->pdev->dev, + "Link train failed, trying again... %d\n", + retries); + if (retries > BCM_MBOX_LINK_TRAINING_RETRIES) + return -EINVAL; + len = BCM_LINK_CODE0; + fallthrough; + case BCM_LINK_CODE0: + case BCM_LINK_CODE1: + case BCM_LINK_CODE2: + msg = bcm74110_mbox_create_msg(1, 0, BCM_MSG_SVC_INIT, + BCM_MSG_FUNC_LINK_START, + len, BCM_MSG_SVC_INIT); + break; + default: + break; + } + + bcm74110_mbox_tx_msg(mbox, msg); + + /* No response expected for LINK_CODE2 */ + if (len == BCM_LINK_CODE2) + return 0; + + orig_len = len; + + ret = bcm74110_rx_pop_init_msg_block(mbox, + BCM_MSG_GET_FIELD(msg, FUNC), + &msg); + if (ret) { + len = 0; + continue; + } + + if ((BCM_MSG_GET_FIELD(msg, SVC) != BCM_MSG_SVC_INIT) || + (BCM_MSG_GET_FIELD(msg, FUNC) != BCM_MSG_FUNC_LINK_START) || + (BCM_MSG_GET_FIELD(msg, SLOT) != 0) || + (BCM_MSG_GET_FIELD(msg, RPLY) != 1) || + (BCM_MSG_GET_FIELD(msg, REQ) != 0)) { + len = 0; + continue; + } + + len = BCM_MSG_GET_FIELD(msg, LENGTH); + + /* Make sure sequence is good */ + if (len != (orig_len + 1)) { + len = 0; + continue; + } + } while (1); + + return -EINVAL; +} + +static int bcm74110_mbox_tx_msg_and_wait_ack(struct bcm74110_mbox *mbox, u32 msg) +{ + int ret; + u32 recv_msg; + + ret = bcm74110_mbox_tx_msg(mbox, msg); + if (ret) + return ret; + + ret = bcm74110_rx_pop_init_msg_block(mbox, BCM_MSG_GET_FIELD(msg, FUNC), + &recv_msg); + if (ret) + return ret; + + /* + * Modify tx message to verify rx ack. + * Flip RPLY/REQ for synchronous messages + */ + if (BCM_MSG_GET_FIELD(msg, REQ) == 1) { + BCM_MSG_SET_FIELD(msg, RPLY, 1); + BCM_MSG_SET_FIELD(msg, REQ, 0); + } + + if (msg != recv_msg) { + dev_err(&mbox->pdev->dev, "Found ack, but ack is invalid\n"); + return -EINVAL; + } + + return 0; +} + +/* Each index points to 0x100 of HAB MEM. IDX size counts from 0 */ +#define BCM_MBOX_HAB_MEM_IDX_START 0x30 +#define BCM_MBOX_HAB_MEM_IDX_SIZE 0x0 +static int bcm74110_mbox_shmem_init(struct bcm74110_mbox *mbox) +{ + u32 msg = 0; + int ret; + + msg = bcm74110_mbox_create_msg(1, 0, BCM_MSG_SVC_INIT, + BCM_MSG_FUNC_SHMEM_STOP, + 0, BCM_MSG_SVC_INIT); + ret = bcm74110_mbox_tx_msg_and_wait_ack(mbox, msg); + if (ret) + return -EINVAL; + + msg = bcm74110_mbox_create_msg(1, 0, BCM_MSG_SVC_INIT, + BCM_MSG_FUNC_SHMEM_TX, + BCM_MBOX_HAB_MEM_IDX_START, + BCM_MBOX_HAB_MEM_IDX_SIZE); + ret = bcm74110_mbox_tx_msg_and_wait_ack(mbox, msg); + if (ret) + return -EINVAL; + + msg = bcm74110_mbox_create_msg(1, 0, BCM_MSG_SVC_INIT, + BCM_MSG_FUNC_SHMEM_RX, + BCM_MBOX_HAB_MEM_IDX_START, + BCM_MBOX_HAB_MEM_IDX_SIZE); + ret = bcm74110_mbox_tx_msg_and_wait_ack(mbox, msg); + if (ret) + return -EINVAL; + + return 0; +} + +static int bcm74110_mbox_init(struct bcm74110_mbox *mbox) +{ + int ret = 0; + + /* Disable queues tx/rx */ + bcm74110_tx_writel(mbox, 0x0, BCM_MBOX_CTRL); + + /* Clear status & restart tx/rx*/ + bcm74110_tx_writel(mbox, BCM_MBOX_CTRL_EN | BCM_MBOX_CTRL_CLR, + BCM_MBOX_CTRL); + + /* Unmask irq */ + bcm74110_irq_writel(mbox, BCM_MBOX_IRQ_NOT_EMPTY, BCM_MBOX_IRQ_MASK_CLEAR); + + ret = bcm74110_mbox_link_training(mbox); + if (ret) { + dev_err(&mbox->pdev->dev, "Training failed\n"); + return ret; + } + + return bcm74110_mbox_shmem_init(mbox); +} + +static int bcm74110_mbox_send_data(struct mbox_chan *chan, void *data) +{ + struct bcm74110_mbox_chan *chan_priv = chan->con_priv; + u32 msg; + + switch (chan_priv->type) { + case BCM_MSG_SVC_PMC: + case BCM_MSG_SVC_SCMI: + case BCM_MSG_SVC_DPFE: + msg = bcm74110_mbox_create_msg(1, 0, chan_priv->type, 0, + 128 + 28, chan_priv->slot); + break; + default: + return -EINVAL; + }; + + return bcm74110_mbox_tx_msg(chan_priv->mbox, msg); +} + +static int bcm74110_mbox_chan_startup(struct mbox_chan *chan) +{ + struct bcm74110_mbox_chan *chan_priv = chan->con_priv; + + chan_priv->en = true; + + return 0; +} + +static void bcm74110_mbox_chan_shutdown(struct mbox_chan *chan) +{ + struct bcm74110_mbox_chan *chan_priv = chan->con_priv; + + chan_priv->en = false; +} + +static const struct mbox_chan_ops bcm74110_mbox_chan_ops = { + .send_data = bcm74110_mbox_send_data, + .startup = bcm74110_mbox_chan_startup, + .shutdown = bcm74110_mbox_chan_shutdown, +}; + +static void bcm74110_mbox_shutdown(struct platform_device *pdev) +{ + struct bcm74110_mbox *mbox = dev_get_drvdata(&pdev->dev); + u32 msg; + + msg = bcm74110_mbox_create_msg(1, 0, BCM_MSG_SVC_INIT, + BCM_MSG_FUNC_LINK_STOP, + 0, 0); + + bcm74110_mbox_tx_msg_and_wait_ack(mbox, msg); + + /* Even if we don't receive ACK, lets shut it down */ + + bcm74110_mbox_mask_and_clear(mbox); + + /* Disable queues tx/rx */ + bcm74110_tx_writel(mbox, 0x0, BCM_MBOX_CTRL); + + /* Flush queues */ + bcm74110_rx_flush_msg(mbox); +} + +static struct mbox_chan *bcm74110_mbox_of_xlate(struct mbox_controller *cntrl, + const struct of_phandle_args *p) +{ + struct bcm74110_mbox *mbox = bcm74110_mbox_from_cntrl(cntrl); + struct device *dev = &mbox->pdev->dev; + struct bcm74110_mbox_chan *chan_priv; + int slot, type; + + if (p->args_count != 2) { + dev_err(dev, "Invalid arguments\n"); + return ERR_PTR(-EINVAL); + } + + type = p->args[0]; + slot = p->args[1]; + + switch (type) { + case BCM_MSG_SVC_PMC: + case BCM_MSG_SVC_SCMI: + case BCM_MSG_SVC_DPFE: + if (slot > BCM_MBOX_HAB_MEM_IDX_SIZE) { + dev_err(dev, "Not enough shared memory\n"); + return ERR_PTR(-EINVAL); + } + chan_priv = cntrl->chans[type].con_priv; + chan_priv->slot = slot; + chan_priv->type = type; + break; + default: + dev_err(dev, "Invalid channel type: %d\n", type); + return ERR_PTR(-EINVAL); + }; + + return &cntrl->chans[type]; +} + +static int bcm74110_mbox_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct bcm74110_mbox *mbox; + int i, ret; + + mbox = devm_kzalloc(dev, sizeof(*mbox), GFP_KERNEL); + if (!mbox) + return -ENOMEM; + + mbox->pdev = pdev; + platform_set_drvdata(pdev, mbox); + + mbox->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(mbox->base)) + return dev_err_probe(dev, PTR_ERR(mbox->base), "Failed to iomap\n"); + + ret = of_property_read_u32(dev->of_node, "brcm,tx", &mbox->tx_chan); + if (ret) + return dev_err_probe(dev, ret, "Failed to find tx channel\n"); + + ret = of_property_read_u32(dev->of_node, "brcm,rx", &mbox->rx_chan); + if (ret) + return dev_err_probe(dev, ret, "Failed to find rx channel\n"); + + mbox->rx_irq = platform_get_irq(pdev, 0); + if (mbox->rx_irq < 0) + return mbox->rx_irq; + + INIT_LIST_HEAD(&mbox->rx_svc_init_list); + spin_lock_init(&mbox->rx_svc_list_lock); + bcm74110_mbox_mask_and_clear(mbox); + + ret = devm_request_irq(dev, mbox->rx_irq, bcm74110_mbox_isr, + IRQF_NO_SUSPEND, pdev->name, mbox); + if (ret) + return dev_err_probe(dev, ret, "Failed to request irq\n"); + + mbox->controller.ops = &bcm74110_mbox_chan_ops; + mbox->controller.dev = dev; + mbox->controller.num_chans = BCM_MSG_SVC_MAX; + mbox->controller.of_xlate = &bcm74110_mbox_of_xlate; + mbox->controller.chans = devm_kcalloc(dev, BCM_MSG_SVC_MAX, + sizeof(*mbox->controller.chans), + GFP_KERNEL); + if (!mbox->controller.chans) + return -ENOMEM; + + mbox->mbox_chan = devm_kcalloc(dev, BCM_MSG_SVC_MAX, + sizeof(*mbox->mbox_chan), + GFP_KERNEL); + if (!mbox->mbox_chan) + return -ENOMEM; + + for (i = 0; i < BCM_MSG_SVC_MAX; i++) { + mbox->mbox_chan[i].mbox = mbox; + mbox->controller.chans[i].con_priv = &mbox->mbox_chan[i]; + } + + ret = devm_mbox_controller_register(dev, &mbox->controller); + if (ret) + return ret; + + ret = bcm74110_mbox_init(mbox); + if (ret) + return ret; + + return 0; +} + +static const struct of_device_id bcm74110_mbox_of_match[] = { + { .compatible = "brcm,bcm74110-mbox", }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, bcm74110_mbox_of_match); + +static struct platform_driver bcm74110_mbox_driver = { + .driver = { + .name = "bcm74110-mbox", + .of_match_table = bcm74110_mbox_of_match, + }, + .probe = bcm74110_mbox_probe, + .shutdown = bcm74110_mbox_shutdown, +}; +module_platform_driver(bcm74110_mbox_driver); + +MODULE_AUTHOR("Justin Chen "); +MODULE_DESCRIPTION("BCM74110 mailbox driver"); +MODULE_LICENSE("GPL"); From 9bdaf9a96d04c9c520e720ebb2211550331f15ac Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Sun, 13 Jul 2025 10:05:28 +0200 Subject: [PATCH 1635/2411] dt-bindings: mailbox: qcom-ipcc: document the Milos Inter-Processor Communication Controller Document the Inter-Processor Communication Controller on the Milos SoC. Signed-off-by: Luca Weiss Acked-by: Rob Herring (Arm) Signed-off-by: Jassi Brar --- Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml b/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml index f69c0ec5d19d..e5c423130db6 100644 --- a/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml +++ b/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml @@ -24,6 +24,7 @@ properties: compatible: items: - enum: + - qcom,milos-ipcc - qcom,qcs8300-ipcc - qcom,qdu1000-ipcc - qcom,sa8255p-ipcc From b8fa5e827f2056c207f5c447e48e33af28bc1c19 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Jun 2025 13:57:08 +0200 Subject: [PATCH 1636/2411] dt-bindings: mailbox: amlogic,meson-gxbb-mhu: Add missing interrupts maxItems Lists should have fixed constraint, so add missing maxItems to the "interrupts" property. Since minItems=maxItems, the minItems is implied by dtschema so can be dropped. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Martin Blumenstingl Acked-by: Rob Herring (Arm) Signed-off-by: Jassi Brar --- .../devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml b/Documentation/devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml index 385809ed1569..0849799ee0c5 100644 --- a/Documentation/devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml +++ b/Documentation/devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml @@ -27,7 +27,7 @@ properties: maxItems: 1 interrupts: - minItems: 3 + maxItems: 3 description: Contains the interrupt information corresponding to each of the 3 links of MHU. From f869e8f7da4d73a6d0d106d6891d81937b52de12 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Jun 2025 13:57:09 +0200 Subject: [PATCH 1637/2411] dt-bindings: mailbox: ti,secure-proxy: Add missing reg maxItems Lists should have fixed constraint, so add missing maxItems to the "reg" property. Since minItems=maxItems, the minItems is implied by dtschema so can be dropped. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Nishanth Menon Acked-by: Rob Herring (Arm) Signed-off-by: Jassi Brar --- Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml b/Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml index eea822861804..682ccd76f5c2 100644 --- a/Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml +++ b/Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml @@ -36,7 +36,7 @@ properties: - const: scfg reg: - minItems: 3 + maxItems: 3 interrupt-names: minItems: 1 From 5682a215daae506af20d2caf3e9811ccfb24caf3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Jun 2025 13:57:10 +0200 Subject: [PATCH 1638/2411] dt-bindings: mailbox: Correct example indentation DTS example in the bindings should be indented with 2- or 4-spaces, so correct a mixture of different styles to keep consistent 4-spaces. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Nishanth Menon Reviewed-by: Sven Peter Acked-by: Rob Herring (Arm) Signed-off-by: Jassi Brar --- .../mailbox/allwinner,sun6i-a31-msgbox.yaml | 14 +++++++------- .../bindings/mailbox/amlogic,meson-gxbb-mhu.yaml | 8 ++++---- .../bindings/mailbox/apple,mailbox.yaml | 16 ++++++++-------- .../bindings/mailbox/ti,secure-proxy.yaml | 16 ++++++++-------- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/Documentation/devicetree/bindings/mailbox/allwinner,sun6i-a31-msgbox.yaml b/Documentation/devicetree/bindings/mailbox/allwinner,sun6i-a31-msgbox.yaml index 75d5d97305e1..87d31963c1b7 100644 --- a/Documentation/devicetree/bindings/mailbox/allwinner,sun6i-a31-msgbox.yaml +++ b/Documentation/devicetree/bindings/mailbox/allwinner,sun6i-a31-msgbox.yaml @@ -68,13 +68,13 @@ examples: #include msgbox: mailbox@1c17000 { - compatible = "allwinner,sun8i-h3-msgbox", - "allwinner,sun6i-a31-msgbox"; - reg = <0x01c17000 0x1000>; - clocks = <&ccu CLK_BUS_MSGBOX>; - resets = <&ccu RST_BUS_MSGBOX>; - interrupts = ; - #mbox-cells = <1>; + compatible = "allwinner,sun8i-h3-msgbox", + "allwinner,sun6i-a31-msgbox"; + reg = <0x01c17000 0x1000>; + clocks = <&ccu CLK_BUS_MSGBOX>; + resets = <&ccu RST_BUS_MSGBOX>; + interrupts = ; + #mbox-cells = <1>; }; ... diff --git a/Documentation/devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml b/Documentation/devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml index 0849799ee0c5..79963c9878ba 100644 --- a/Documentation/devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml +++ b/Documentation/devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml @@ -46,8 +46,8 @@ additionalProperties: false examples: - | mailbox@c883c404 { - compatible = "amlogic,meson-gxbb-mhu"; - reg = <0xc883c404 0x4c>; - interrupts = <208>, <209>, <210>; - #mbox-cells = <1>; + compatible = "amlogic,meson-gxbb-mhu"; + reg = <0xc883c404 0x4c>; + interrupts = <208>, <209>, <210>; + #mbox-cells = <1>; }; diff --git a/Documentation/devicetree/bindings/mailbox/apple,mailbox.yaml b/Documentation/devicetree/bindings/mailbox/apple,mailbox.yaml index 4c0668e5f0bd..474c1a0f99f3 100644 --- a/Documentation/devicetree/bindings/mailbox/apple,mailbox.yaml +++ b/Documentation/devicetree/bindings/mailbox/apple,mailbox.yaml @@ -78,11 +78,11 @@ additionalProperties: false examples: - | - mailbox@77408000 { - compatible = "apple,t8103-asc-mailbox", "apple,asc-mailbox-v4"; - reg = <0x77408000 0x4000>; - interrupts = <1 583 4>, <1 584 4>, <1 585 4>, <1 586 4>; - interrupt-names = "send-empty", "send-not-empty", - "recv-empty", "recv-not-empty"; - #mbox-cells = <0>; - }; + mailbox@77408000 { + compatible = "apple,t8103-asc-mailbox", "apple,asc-mailbox-v4"; + reg = <0x77408000 0x4000>; + interrupts = <1 583 4>, <1 584 4>, <1 585 4>, <1 586 4>; + interrupt-names = "send-empty", "send-not-empty", + "recv-empty", "recv-not-empty"; + #mbox-cells = <0>; + }; diff --git a/Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml b/Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml index 682ccd76f5c2..c321b69f0ccd 100644 --- a/Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml +++ b/Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml @@ -68,12 +68,12 @@ examples: - | #include secure_proxy: mailbox@32c00000 { - compatible = "ti,am654-secure-proxy"; - #mbox-cells = <1>; - reg-names = "target_data", "rt", "scfg"; - reg = <0x32c00000 0x100000>, - <0x32400000 0x100000>, - <0x32800000 0x100000>; - interrupt-names = "rx_011"; - interrupts = ; + compatible = "ti,am654-secure-proxy"; + #mbox-cells = <1>; + reg-names = "target_data", "rt", "scfg"; + reg = <0x32c00000 0x100000>, + <0x32400000 0x100000>, + <0x32800000 0x100000>; + interrupt-names = "rx_011"; + interrupts = ; }; From cc0dce769bcedb621f2c5535c31570cd51bc0235 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Jun 2025 13:57:11 +0200 Subject: [PATCH 1639/2411] dt-bindings: mailbox: nvidia,tegra186-hsp: Use generic node name According to Devicetree specifications, device node names should be generic, thus Mailbox provider should be called "mailbox", not "hsp". Signed-off-by: Krzysztof Kozlowski Acked-by: Thierry Reding Acked-by: Rob Herring (Arm) Signed-off-by: Jassi Brar --- .../devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml b/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml index d97050e40fbf..307dea3fd83f 100644 --- a/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml +++ b/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml @@ -59,9 +59,6 @@ description: | properties: - $nodename: - pattern: "^hsp@[0-9a-f]+$" - compatible: oneOf: - enum: @@ -131,7 +128,7 @@ examples: #include #include - hsp_top0: hsp@3c00000 { + hsp_top0: mailbox@3c00000 { compatible = "nvidia,tegra186-hsp"; reg = <0x03c00000 0xa0000>; interrupts = ; From b92f05bc61e23de4f4bb83647c295a4b84047eef Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Jun 2025 13:57:12 +0200 Subject: [PATCH 1640/2411] dt-bindings: mailbox: Drop consumers example DTS Providers DTS examples should not contain consumer nodes, because they are completely redundant, obvious (defined in common schema) and add unnecessary bloat. Drop consumer examples and unneeded node labels. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Nishanth Menon Acked-by: Rob Herring (Arm) Signed-off-by: Jassi Brar --- .../bindings/mailbox/nvidia,tegra186-hsp.yaml | 6 +----- .../bindings/mailbox/qcom,apcs-kpss-global.yaml | 9 +-------- .../devicetree/bindings/mailbox/ti,omap-mailbox.yaml | 10 +++------- 3 files changed, 5 insertions(+), 20 deletions(-) diff --git a/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml b/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml index 307dea3fd83f..f833b845de0d 100644 --- a/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml +++ b/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml @@ -128,14 +128,10 @@ examples: #include #include - hsp_top0: mailbox@3c00000 { + mailbox@3c00000 { compatible = "nvidia,tegra186-hsp"; reg = <0x03c00000 0xa0000>; interrupts = ; interrupt-names = "doorbell"; #mbox-cells = <2>; }; - - client { - mboxes = <&hsp_top0 TEGRA_HSP_MBOX_TYPE_DB TEGRA_HSP_DB_MASTER_CCPLEX>; - }; diff --git a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml index ac726136f7e5..615ed103b7e6 100644 --- a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml +++ b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml @@ -251,7 +251,7 @@ examples: # Example apcs with msm8996 - | #include - apcs_glb: mailbox@9820000 { + mailbox@9820000 { compatible = "qcom,msm8996-apcs-hmss-global"; reg = <0x9820000 0x1000>; @@ -259,13 +259,6 @@ examples: #clock-cells = <0>; }; - rpm-glink { - compatible = "qcom,glink-rpm"; - interrupts = ; - qcom,rpm-msg-ram = <&rpm_msg_ram>; - mboxes = <&apcs_glb 0>; - }; - # Example apcs with qcs404 - | #define GCC_APSS_AHB_CLK_SRC 1 diff --git a/Documentation/devicetree/bindings/mailbox/ti,omap-mailbox.yaml b/Documentation/devicetree/bindings/mailbox/ti,omap-mailbox.yaml index 1a2001e58880..8504ceb64806 100644 --- a/Documentation/devicetree/bindings/mailbox/ti,omap-mailbox.yaml +++ b/Documentation/devicetree/bindings/mailbox/ti,omap-mailbox.yaml @@ -242,7 +242,7 @@ examples: - | /* OMAP4 */ #include - mailbox: mailbox@4a0f4000 { + mailbox@4a0f4000 { compatible = "ti,omap4-mailbox"; reg = <0x4a0f4000 0x200>; interrupts = ; @@ -260,13 +260,9 @@ examples: }; }; - dsp { - mboxes = <&mailbox &mbox_dsp>; - }; - - | /* AM33xx */ - mailbox1: mailbox@480c8000 { + mailbox@480c8000 { compatible = "ti,omap4-mailbox"; reg = <0x480c8000 0x200>; interrupts = <77>; @@ -283,7 +279,7 @@ examples: - | /* AM65x */ - mailbox0_cluster0: mailbox@31f80000 { + mailbox@31f80000 { compatible = "ti,am654-mailbox"; reg = <0x31f80000 0x200>; #mbox-cells = <1>; From 7d33dd2d0e6825d4d2a61d06ae609dce17b56a3a Mon Sep 17 00:00:00 2001 From: Jammy Huang Date: Tue, 22 Jul 2025 09:31:16 +0800 Subject: [PATCH 1641/2411] dt-bindings: mailbox: Add ASPEED AST2700 series SoC Introduce the mailbox module for AST27XX series SoC, which is responsible for interchanging messages between asymmetric processors. Signed-off-by: Jammy Huang Reviewed-by: Krzysztof Kozlowski Reviewed-by: Andrew Jeffery Signed-off-by: Jassi Brar --- .../mailbox/aspeed,ast2700-mailbox.yaml | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 Documentation/devicetree/bindings/mailbox/aspeed,ast2700-mailbox.yaml diff --git a/Documentation/devicetree/bindings/mailbox/aspeed,ast2700-mailbox.yaml b/Documentation/devicetree/bindings/mailbox/aspeed,ast2700-mailbox.yaml new file mode 100644 index 000000000000..600e2d63fccd --- /dev/null +++ b/Documentation/devicetree/bindings/mailbox/aspeed,ast2700-mailbox.yaml @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mailbox/aspeed,ast2700-mailbox.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ASPEED AST2700 mailbox controller + +maintainers: + - Jammy Huang + +description: > + ASPEED AST2700 has multiple processors that need to communicate with each + other. The mailbox controller provides a way for these processors to send + messages to each other. It is a hardware-based inter-processor communication + mechanism that allows processors to send and receive messages through + dedicated channels. + + The mailbox's tx/rx are independent, meaning that one processor can send a + message while another processor is receiving a message simultaneously. + There are 4 channels available for both tx and rx operations. Each channel + has a FIFO buffer that can hold messages of a fixed size (32 bytes in this + case). + + The mailbox controller also supports interrupt generation, allowing + processors to notify each other when a message is available or when an event + occurs. + +properties: + compatible: + const: aspeed,ast2700-mailbox + + reg: + items: + - description: TX control register + - description: RX control register + + reg-names: + items: + - const: tx + - const: rx + + interrupts: + maxItems: 1 + + "#mbox-cells": + const: 1 + +required: + - compatible + - reg + - reg-names + - interrupts + - "#mbox-cells" + +additionalProperties: false + +examples: + - | + #include + + mailbox@12c1c200 { + compatible = "aspeed,ast2700-mailbox"; + reg = <0x12c1c200 0x100>, <0x12c1c300 0x100>; + reg-names = "tx", "rx"; + interrupts = ; + #mbox-cells = <1>; + }; From ae524eb766460a9f7957bf2db0968c9cccb71d90 Mon Sep 17 00:00:00 2001 From: Jammy Huang Date: Tue, 22 Jul 2025 09:31:17 +0800 Subject: [PATCH 1642/2411] mailbox: aspeed: add mailbox driver for AST27XX series SoC Add mailbox controller driver for AST27XX SoCs, which provides independent tx/rx mailbox between different processors. There are 4 channels for each tx/rx mailbox and each channel has an 32-byte FIFO. Signed-off-by: Jammy Huang Reviewed-by: Andrew Jeffery Signed-off-by: Jassi Brar --- drivers/mailbox/Kconfig | 9 ++ drivers/mailbox/Makefile | 2 + drivers/mailbox/ast2700-mailbox.c | 235 ++++++++++++++++++++++++++++++ 3 files changed, 246 insertions(+) create mode 100644 drivers/mailbox/ast2700-mailbox.c diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig index 9abf193acd0b..02432d4a5ccd 100644 --- a/drivers/mailbox/Kconfig +++ b/drivers/mailbox/Kconfig @@ -36,6 +36,15 @@ config ARM_MHU_V3 that provides different means of transports: supported extensions will be discovered and possibly managed at probe-time. +config AST2700_MBOX + tristate "ASPEED AST2700 IPC driver" + depends on ARCH_ASPEED || COMPILE_TEST + help + Mailbox driver implementation for ASPEED AST27XX SoCs. This driver + can be used to send message between different processors in SoC. + The driver provides mailbox support for sending interrupts to the + clients. Say Y here if you want to build this driver. + config CV1800_MBOX tristate "cv1800 mailbox" depends on ARCH_SOPHGO || COMPILE_TEST diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile index c0a4278aa129..98a68f838486 100644 --- a/drivers/mailbox/Makefile +++ b/drivers/mailbox/Makefile @@ -11,6 +11,8 @@ obj-$(CONFIG_ARM_MHU_V2) += arm_mhuv2.o obj-$(CONFIG_ARM_MHU_V3) += arm_mhuv3.o +obj-$(CONFIG_AST2700_MBOX) += ast2700-mailbox.o + obj-$(CONFIG_CV1800_MBOX) += cv1800-mailbox.o obj-$(CONFIG_EXYNOS_MBOX) += exynos-mailbox.o diff --git a/drivers/mailbox/ast2700-mailbox.c b/drivers/mailbox/ast2700-mailbox.c new file mode 100644 index 000000000000..83c6afe5411f --- /dev/null +++ b/drivers/mailbox/ast2700-mailbox.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright Aspeed Technology Inc. (C) 2025. All rights reserved + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Each bit in the register represents an IPC ID */ +#define IPCR_TX_TRIG 0x00 +#define IPCR_ENABLE 0x04 +#define IPCR_STATUS 0x08 +#define RX_IRQ(n) BIT(n) +#define RX_IRQ_MASK 0xf +#define IPCR_DATA 0x10 + +struct ast2700_mbox_data { + u8 num_chans; + u8 msg_size; +}; + +struct ast2700_mbox { + struct mbox_controller mbox; + u8 msg_size; + void __iomem *tx_regs; + void __iomem *rx_regs; + spinlock_t lock; +}; + +static inline int ch_num(struct mbox_chan *chan) +{ + return chan - chan->mbox->chans; +} + +static inline bool ast2700_mbox_tx_done(struct ast2700_mbox *mb, int idx) +{ + return !(readl(mb->tx_regs + IPCR_STATUS) & BIT(idx)); +} + +static irqreturn_t ast2700_mbox_irq(int irq, void *p) +{ + struct ast2700_mbox *mb = p; + void __iomem *data_reg; + int num_words = mb->msg_size / sizeof(u32); + u32 *word_data; + u32 status; + int n, i; + + /* Only examine channels that are currently enabled. */ + status = readl(mb->rx_regs + IPCR_ENABLE) & + readl(mb->rx_regs + IPCR_STATUS); + + if (!(status & RX_IRQ_MASK)) + return IRQ_NONE; + + for (n = 0; n < mb->mbox.num_chans; ++n) { + struct mbox_chan *chan = &mb->mbox.chans[n]; + + if (!(status & RX_IRQ(n))) + continue; + + data_reg = mb->rx_regs + IPCR_DATA + mb->msg_size * n; + word_data = chan->con_priv; + /* Read the message data */ + for (i = 0; i < num_words; i++) + word_data[i] = readl(data_reg + i * sizeof(u32)); + + mbox_chan_received_data(chan, chan->con_priv); + + /* The IRQ can be cleared only once the FIFO is empty. */ + writel(RX_IRQ(n), mb->rx_regs + IPCR_STATUS); + } + + return IRQ_HANDLED; +} + +static int ast2700_mbox_send_data(struct mbox_chan *chan, void *data) +{ + struct ast2700_mbox *mb = dev_get_drvdata(chan->mbox->dev); + int idx = ch_num(chan); + void __iomem *data_reg = mb->tx_regs + IPCR_DATA + mb->msg_size * idx; + u32 *word_data = data; + int num_words = mb->msg_size / sizeof(u32); + int i; + + if (!(readl(mb->tx_regs + IPCR_ENABLE) & BIT(idx))) { + dev_warn(mb->mbox.dev, "%s: Ch-%d not enabled yet\n", __func__, idx); + return -ENODEV; + } + + if (!(ast2700_mbox_tx_done(mb, idx))) { + dev_warn(mb->mbox.dev, "%s: Ch-%d last data has not finished\n", __func__, idx); + return -EBUSY; + } + + /* Write the message data */ + for (i = 0 ; i < num_words; i++) + writel(word_data[i], data_reg + i * sizeof(u32)); + + writel(BIT(idx), mb->tx_regs + IPCR_TX_TRIG); + dev_dbg(mb->mbox.dev, "%s: Ch-%d sent\n", __func__, idx); + + return 0; +} + +static int ast2700_mbox_startup(struct mbox_chan *chan) +{ + struct ast2700_mbox *mb = dev_get_drvdata(chan->mbox->dev); + int idx = ch_num(chan); + void __iomem *reg = mb->rx_regs + IPCR_ENABLE; + unsigned long flags; + + spin_lock_irqsave(&mb->lock, flags); + writel(readl(reg) | BIT(idx), reg); + spin_unlock_irqrestore(&mb->lock, flags); + + return 0; +} + +static void ast2700_mbox_shutdown(struct mbox_chan *chan) +{ + struct ast2700_mbox *mb = dev_get_drvdata(chan->mbox->dev); + int idx = ch_num(chan); + void __iomem *reg = mb->rx_regs + IPCR_ENABLE; + unsigned long flags; + + spin_lock_irqsave(&mb->lock, flags); + writel(readl(reg) & ~BIT(idx), reg); + spin_unlock_irqrestore(&mb->lock, flags); +} + +static bool ast2700_mbox_last_tx_done(struct mbox_chan *chan) +{ + struct ast2700_mbox *mb = dev_get_drvdata(chan->mbox->dev); + int idx = ch_num(chan); + + return ast2700_mbox_tx_done(mb, idx); +} + +static const struct mbox_chan_ops ast2700_mbox_chan_ops = { + .send_data = ast2700_mbox_send_data, + .startup = ast2700_mbox_startup, + .shutdown = ast2700_mbox_shutdown, + .last_tx_done = ast2700_mbox_last_tx_done, +}; + +static int ast2700_mbox_probe(struct platform_device *pdev) +{ + struct ast2700_mbox *mb; + const struct ast2700_mbox_data *dev_data; + struct device *dev = &pdev->dev; + int irq, ret; + + if (!pdev->dev.of_node) + return -ENODEV; + + dev_data = device_get_match_data(&pdev->dev); + + mb = devm_kzalloc(dev, sizeof(*mb), GFP_KERNEL); + if (!mb) + return -ENOMEM; + + mb->mbox.chans = devm_kcalloc(&pdev->dev, dev_data->num_chans, + sizeof(*mb->mbox.chans), GFP_KERNEL); + if (!mb->mbox.chans) + return -ENOMEM; + + /* con_priv of each channel is used to store the message received */ + for (int i = 0; i < dev_data->num_chans; i++) { + mb->mbox.chans[i].con_priv = devm_kcalloc(dev, dev_data->msg_size, + sizeof(u8), GFP_KERNEL); + if (!mb->mbox.chans[i].con_priv) + return -ENOMEM; + } + + platform_set_drvdata(pdev, mb); + + mb->tx_regs = devm_platform_ioremap_resource_byname(pdev, "tx"); + if (IS_ERR(mb->tx_regs)) + return PTR_ERR(mb->tx_regs); + + mb->rx_regs = devm_platform_ioremap_resource_byname(pdev, "rx"); + if (IS_ERR(mb->rx_regs)) + return PTR_ERR(mb->rx_regs); + + mb->msg_size = dev_data->msg_size; + mb->mbox.dev = dev; + mb->mbox.num_chans = dev_data->num_chans; + mb->mbox.ops = &ast2700_mbox_chan_ops; + mb->mbox.txdone_irq = false; + mb->mbox.txdone_poll = true; + mb->mbox.txpoll_period = 5; + spin_lock_init(&mb->lock); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + ret = devm_request_irq(dev, irq, ast2700_mbox_irq, 0, dev_name(dev), mb); + if (ret) + return ret; + + return devm_mbox_controller_register(dev, &mb->mbox); +} + +static const struct ast2700_mbox_data ast2700_dev_data = { + .num_chans = 4, + .msg_size = 0x20, +}; + +static const struct of_device_id ast2700_mbox_of_match[] = { + { .compatible = "aspeed,ast2700-mailbox", .data = &ast2700_dev_data }, + {} +}; +MODULE_DEVICE_TABLE(of, ast2700_mbox_of_match); + +static struct platform_driver ast2700_mbox_driver = { + .driver = { + .name = "ast2700-mailbox", + .of_match_table = ast2700_mbox_of_match, + }, + .probe = ast2700_mbox_probe, +}; +module_platform_driver(ast2700_mbox_driver); + +MODULE_AUTHOR("Jammy Huang "); +MODULE_DESCRIPTION("ASPEED AST2700 IPC driver"); +MODULE_LICENSE("GPL"); From 7fbb5a5672cce49dc0e1d54fd15621eec9d48448 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 25 Jul 2025 15:56:10 +0800 Subject: [PATCH 1643/2411] mailbox: bcm74110: remove unneeded semicolon No functional modification involved. ./drivers/mailbox/bcm74110-mailbox.c:483:2-3: Unneeded semicolon. ./drivers/mailbox/bcm74110-mailbox.c:563:2-3: Unneeded semicolon. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=22936 Signed-off-by: Jiapeng Chong Signed-off-by: Jassi Brar --- drivers/mailbox/bcm74110-mailbox.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mailbox/bcm74110-mailbox.c b/drivers/mailbox/bcm74110-mailbox.c index 0680be8dc18f..623ece0b2c1e 100644 --- a/drivers/mailbox/bcm74110-mailbox.c +++ b/drivers/mailbox/bcm74110-mailbox.c @@ -480,7 +480,7 @@ static int bcm74110_mbox_send_data(struct mbox_chan *chan, void *data) break; default: return -EINVAL; - }; + } return bcm74110_mbox_tx_msg(chan_priv->mbox, msg); } @@ -560,7 +560,7 @@ static struct mbox_chan *bcm74110_mbox_of_xlate(struct mbox_controller *cntrl, default: dev_err(dev, "Invalid channel type: %d\n", type); return ERR_PTR(-EINVAL); - }; + } return &cntrl->chans[type]; } From 75f1fbc9fd409a0c232dc78871ee7df186da9d57 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 23 Jul 2025 10:53:15 +0100 Subject: [PATCH 1644/2411] mailbox: bcm74110: Fix spelling mistake There is a spelling mistake in the author's email address. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Justin Chen Signed-off-by: Jassi Brar --- drivers/mailbox/bcm74110-mailbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mailbox/bcm74110-mailbox.c b/drivers/mailbox/bcm74110-mailbox.c index 623ece0b2c1e..2e7e86f3e6a4 100644 --- a/drivers/mailbox/bcm74110-mailbox.c +++ b/drivers/mailbox/bcm74110-mailbox.c @@ -651,6 +651,6 @@ static struct platform_driver bcm74110_mbox_driver = { }; module_platform_driver(bcm74110_mbox_driver); -MODULE_AUTHOR("Justin Chen "); +MODULE_AUTHOR("Justin Chen "); MODULE_DESCRIPTION("BCM74110 mailbox driver"); MODULE_LICENSE("GPL"); From 01027a62b508c48c762096f347de925eedcbd008 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 4 Aug 2025 14:15:50 +0200 Subject: [PATCH 1645/2411] smb: server: remove separate empty_recvmsg_queue There's no need to maintain two lists, we can just have a single list of receive buffers, which are free to use. Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 60 +++++----------------------------- 1 file changed, 8 insertions(+), 52 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index c6cbe0d56e32..393254109fc4 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -129,9 +129,6 @@ struct smb_direct_transport { spinlock_t recvmsg_queue_lock; struct list_head recvmsg_queue; - spinlock_t empty_recvmsg_queue_lock; - struct list_head empty_recvmsg_queue; - int send_credit_target; atomic_t send_credits; spinlock_t lock_new_recv_credits; @@ -276,32 +273,6 @@ static void put_recvmsg(struct smb_direct_transport *t, spin_unlock(&t->recvmsg_queue_lock); } -static struct -smb_direct_recvmsg *get_empty_recvmsg(struct smb_direct_transport *t) -{ - struct smb_direct_recvmsg *recvmsg = NULL; - - spin_lock(&t->empty_recvmsg_queue_lock); - if (!list_empty(&t->empty_recvmsg_queue)) { - recvmsg = list_first_entry(&t->empty_recvmsg_queue, - struct smb_direct_recvmsg, list); - list_del(&recvmsg->list); - } - spin_unlock(&t->empty_recvmsg_queue_lock); - return recvmsg; -} - -static void put_empty_recvmsg(struct smb_direct_transport *t, - struct smb_direct_recvmsg *recvmsg) -{ - ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr, - recvmsg->sge.length, DMA_FROM_DEVICE); - - spin_lock(&t->empty_recvmsg_queue_lock); - list_add_tail(&recvmsg->list, &t->empty_recvmsg_queue); - spin_unlock(&t->empty_recvmsg_queue_lock); -} - static void enqueue_reassembly(struct smb_direct_transport *t, struct smb_direct_recvmsg *recvmsg, int data_length) @@ -386,9 +357,6 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) spin_lock_init(&t->recvmsg_queue_lock); INIT_LIST_HEAD(&t->recvmsg_queue); - spin_lock_init(&t->empty_recvmsg_queue_lock); - INIT_LIST_HEAD(&t->empty_recvmsg_queue); - init_waitqueue_head(&t->wait_send_pending); atomic_set(&t->send_pending, 0); @@ -554,7 +522,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) wc->opcode); smb_direct_disconnect_rdma_connection(t); } - put_empty_recvmsg(t, recvmsg); + put_recvmsg(t, recvmsg); return; } @@ -568,7 +536,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) switch (recvmsg->type) { case SMB_DIRECT_MSG_NEGOTIATE_REQ: if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) { - put_empty_recvmsg(t, recvmsg); + put_recvmsg(t, recvmsg); return; } t->negotiation_requested = true; @@ -585,7 +553,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) if (wc->byte_len < offsetof(struct smb_direct_data_transfer, padding)) { - put_empty_recvmsg(t, recvmsg); + put_recvmsg(t, recvmsg); return; } @@ -593,7 +561,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) if (data_length) { if (wc->byte_len < sizeof(struct smb_direct_data_transfer) + (u64)data_length) { - put_empty_recvmsg(t, recvmsg); + put_recvmsg(t, recvmsg); return; } @@ -613,7 +581,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) avail_recvmsg_count = t->count_avail_recvmsg; spin_unlock(&t->receive_credit_lock); } else { - put_empty_recvmsg(t, recvmsg); + put_recvmsg(t, recvmsg); spin_lock(&t->receive_credit_lock); receive_credits = --(t->recv_credits); @@ -811,7 +779,6 @@ static void smb_direct_post_recv_credits(struct work_struct *work) struct smb_direct_recvmsg *recvmsg; int receive_credits, credits = 0; int ret; - int use_free = 1; spin_lock(&t->receive_credit_lock); receive_credits = t->recv_credits; @@ -819,18 +786,9 @@ static void smb_direct_post_recv_credits(struct work_struct *work) if (receive_credits < t->recv_credit_target) { while (true) { - if (use_free) - recvmsg = get_free_recvmsg(t); - else - recvmsg = get_empty_recvmsg(t); - if (!recvmsg) { - if (use_free) { - use_free = 0; - continue; - } else { - break; - } - } + recvmsg = get_free_recvmsg(t); + if (!recvmsg) + break; recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER; recvmsg->first_segment = false; @@ -1806,8 +1764,6 @@ static void smb_direct_destroy_pools(struct smb_direct_transport *t) while ((recvmsg = get_free_recvmsg(t))) mempool_free(recvmsg, t->recvmsg_mempool); - while ((recvmsg = get_empty_recvmsg(t))) - mempool_free(recvmsg, t->recvmsg_mempool); mempool_destroy(t->recvmsg_mempool); t->recvmsg_mempool = NULL; From afb4108c92898350e66b9a009692230bcdd2ac73 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 4 Aug 2025 14:15:51 +0200 Subject: [PATCH 1646/2411] smb: server: make sure we call ib_dma_unmap_single() only if we called ib_dma_map_single already In case of failures either ib_dma_map_single() might not be called yet or ib_dma_unmap_single() was already called. We should make sure put_recvmsg() only calls ib_dma_unmap_single() if needed. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 393254109fc4..fac82e60ff80 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -265,8 +265,13 @@ smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t) static void put_recvmsg(struct smb_direct_transport *t, struct smb_direct_recvmsg *recvmsg) { - ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr, - recvmsg->sge.length, DMA_FROM_DEVICE); + if (likely(recvmsg->sge.length != 0)) { + ib_dma_unmap_single(t->cm_id->device, + recvmsg->sge.addr, + recvmsg->sge.length, + DMA_FROM_DEVICE); + recvmsg->sge.length = 0; + } spin_lock(&t->recvmsg_queue_lock); list_add(&recvmsg->list, &t->recvmsg_queue); @@ -638,6 +643,7 @@ static int smb_direct_post_recv(struct smb_direct_transport *t, ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr, recvmsg->sge.length, DMA_FROM_DEVICE); + recvmsg->sge.length = 0; smb_direct_disconnect_rdma_connection(t); return ret; } @@ -1819,6 +1825,7 @@ static int smb_direct_create_pools(struct smb_direct_transport *t) if (!recvmsg) goto err; recvmsg->transport = t; + recvmsg->sge.length = 0; list_add(&recvmsg->list, &t->recvmsg_queue); } t->count_avail_recvmsg = t->recv_credit_max; From cfe76fdbb9729c650f3505d9cfb2f70ddda2dbdc Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 4 Aug 2025 14:15:52 +0200 Subject: [PATCH 1647/2411] smb: server: let recv_done() consistently call put_recvmsg/smb_direct_disconnect_rdma_connection We should call put_recvmsg() before smb_direct_disconnect_rdma_connection() in order to call it before waking up the callers. In all error cases we should call smb_direct_disconnect_rdma_connection() in order to avoid stale connections. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index fac82e60ff80..cd8a92fe372b 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -521,13 +521,13 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) t = recvmsg->transport; if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { + put_recvmsg(t, recvmsg); if (wc->status != IB_WC_WR_FLUSH_ERR) { pr_err("Recv error. status='%s (%d)' opcode=%d\n", ib_wc_status_msg(wc->status), wc->status, wc->opcode); smb_direct_disconnect_rdma_connection(t); } - put_recvmsg(t, recvmsg); return; } @@ -542,6 +542,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) case SMB_DIRECT_MSG_NEGOTIATE_REQ: if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) { put_recvmsg(t, recvmsg); + smb_direct_disconnect_rdma_connection(t); return; } t->negotiation_requested = true; @@ -549,7 +550,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) t->status = SMB_DIRECT_CS_CONNECTED; enqueue_reassembly(t, recvmsg, 0); wake_up_interruptible(&t->wait_status); - break; + return; case SMB_DIRECT_MSG_DATA_TRANSFER: { struct smb_direct_data_transfer *data_transfer = (struct smb_direct_data_transfer *)recvmsg->packet; @@ -559,6 +560,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) if (wc->byte_len < offsetof(struct smb_direct_data_transfer, padding)) { put_recvmsg(t, recvmsg); + smb_direct_disconnect_rdma_connection(t); return; } @@ -567,6 +569,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) if (wc->byte_len < sizeof(struct smb_direct_data_transfer) + (u64)data_length) { put_recvmsg(t, recvmsg); + smb_direct_disconnect_rdma_connection(t); return; } @@ -609,11 +612,16 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count)) mod_delayed_work(smb_direct_wq, &t->post_recv_credits_work, 0); - break; + return; } - default: - break; } + + /* + * This is an internal error! + */ + WARN_ON_ONCE(recvmsg->type != SMB_DIRECT_MSG_DATA_TRANSFER); + put_recvmsg(t, recvmsg); + smb_direct_disconnect_rdma_connection(t); } static int smb_direct_post_recv(struct smb_direct_transport *t, From a6c015b7ac2d8c5233337e5793f50d04fac17669 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 4 Aug 2025 14:15:53 +0200 Subject: [PATCH 1648/2411] smb: server: let recv_done() avoid touching data_transfer after cleanup/move Calling enqueue_reassembly() and wake_up_interruptible(&t->wait_reassembly_queue) or put_receive_buffer() means the recvmsg/data_transfer pointer might get re-used by another thread, which means these should be the last operations before calling return. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index cd8a92fe372b..8d366db5f605 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -581,16 +581,11 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) else t->full_packet_received = true; - enqueue_reassembly(t, recvmsg, (int)data_length); - wake_up_interruptible(&t->wait_reassembly_queue); - spin_lock(&t->receive_credit_lock); receive_credits = --(t->recv_credits); avail_recvmsg_count = t->count_avail_recvmsg; spin_unlock(&t->receive_credit_lock); } else { - put_recvmsg(t, recvmsg); - spin_lock(&t->receive_credit_lock); receive_credits = --(t->recv_credits); avail_recvmsg_count = ++(t->count_avail_recvmsg); @@ -612,6 +607,13 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count)) mod_delayed_work(smb_direct_wq, &t->post_recv_credits_work, 0); + + if (data_length) { + enqueue_reassembly(t, recvmsg, (int)data_length); + wake_up_interruptible(&t->wait_reassembly_queue); + } else + put_recvmsg(t, recvmsg); + return; } } From b0b73329ebeeb727913f07b5b6bb85e66e03d156 Mon Sep 17 00:00:00 2001 From: Yunseong Kim Date: Wed, 6 Aug 2025 13:22:12 +0000 Subject: [PATCH 1649/2411] cifs: Fix null-ptr-deref by static initializing global lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A kernel panic can be triggered by reading /proc/fs/cifs/debug_dirs. The crash is a null-ptr-deref inside spin_lock(), caused by the use of the uninitialized global spinlock cifs_tcp_ses_lock. init_cifs() └── cifs_proc_init() └── // User can access /proc/fs/cifs/debug_dirs here └── cifs_debug_dirs_proc_show() └── spin_lock(&cifs_tcp_ses_lock); // Uninitialized! KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] Mem abort info: ESR = 0x0000000096000005 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x05: level 1 translation fault Data abort info: ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [dfff800000000000] address between user and kernel address ranges Internal error: Oops: 0000000096000005 [#1] SMP Modules linked in: CPU: 3 UID: 0 PID: 16435 Comm: stress-ng-procf Not tainted 6.16.0-10385-g79f14b5d84c6 #37 PREEMPT Hardware name: QEMU KVM Virtual Machine, BIOS 2025.02-8ubuntu1 06/11/2025 pstate: 23400005 (nzCv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--) pc : do_raw_spin_lock+0x84/0x2cc lr : _raw_spin_lock+0x24/0x34 sp : ffff8000966477e0 x29: ffff800096647860 x28: ffff800096647b88 x27: ffff0001c0c22070 x26: ffff0003eb2b60c8 x25: ffff0001c0c22018 x24: dfff800000000000 x23: ffff0000f624e000 x22: ffff0003eb2b6020 x21: ffff0000f624e768 x20: 0000000000000004 x19: 0000000000000000 x18: 0000000000000000 x17: 0000000000000000 x16: ffff8000804b9600 x15: ffff700012cc8f04 x14: 1ffff00012cc8f04 x13: 0000000000000004 x12: ffffffffffffffff x11: 1ffff00012cc8f00 x10: ffff80008d9af0d2 x9 : f3f3f304f1f1f1f1 x8 : 0000000000000000 x7 : 7365733c203e6469 x6 : 20656572743c2023 x5 : ffff0000e0ce0044 x4 : ffff80008a4deb6e x3 : ffff8000804b9718 x2 : 0000000000000001 x1 : 0000000000000000 x0 : 0000000000000000 Call trace: do_raw_spin_lock+0x84/0x2cc (P) _raw_spin_lock+0x24/0x34 cifs_debug_dirs_proc_show+0x1ac/0x4c0 seq_read_iter+0x3b0/0xc28 proc_reg_read_iter+0x178/0x2a8 vfs_read+0x5f8/0x88c ksys_read+0x120/0x210 __arm64_sys_read+0x7c/0x90 invoke_syscall+0x98/0x2b8 el0_svc_common+0x130/0x23c do_el0_svc+0x48/0x58 el0_svc+0x40/0x140 el0t_64_sync_handler+0x84/0x12c el0t_64_sync+0x1ac/0x1b0 Code: aa0003f3 f9000feb f2fe7e69 f8386969 (38f86908) ---[ end trace 0000000000000000 ]--- The root cause is an initialization order problem. The lock is declared as a global variable and intended to be initialized during module startup. However, the procfs entry that uses this lock can be accessed by userspace before the spin_lock_init() call has run. This creates a race window where reading the proc file will attempt to use the lock before it is initialized, leading to the crash. For a global lock with a static lifetime, the correct and robust approach is to use compile-time initialization. Fixes: 844e5c0eb176 ("smb3 client: add way to show directory leases for improved debugging") Signed-off-by: Yunseong Kim Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 31930b7266db..3bd85ab2deb1 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -77,7 +77,7 @@ unsigned int global_secflags = CIFSSEC_DEF; unsigned int GlobalCurrentXid; /* protected by GlobalMid_Lock */ unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Lock */ unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Lock */ -spinlock_t GlobalMid_Lock; /* protects above & list operations on midQ entries */ +DEFINE_SPINLOCK(GlobalMid_Lock); /* protects above & list operations on midQ entries */ /* * Global counters, updated atomically @@ -97,7 +97,7 @@ atomic_t total_buf_alloc_count; atomic_t total_small_buf_alloc_count; #endif/* STATS2 */ struct list_head cifs_tcp_ses_list; -spinlock_t cifs_tcp_ses_lock; +DEFINE_SPINLOCK(cifs_tcp_ses_lock); static const struct super_operations cifs_super_ops; unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE; module_param(CIFSMaxBufSize, uint, 0444); @@ -1863,8 +1863,6 @@ init_cifs(void) GlobalCurrentXid = 0; GlobalTotalActiveXid = 0; GlobalMaxActiveXid = 0; - spin_lock_init(&cifs_tcp_ses_lock); - spin_lock_init(&GlobalMid_Lock); cifs_lock_secret = get_random_u32(); From 5349ae5e05fa37409fd48a1eb483b199c32c889b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 4 Aug 2025 14:10:12 +0200 Subject: [PATCH 1650/2411] smb: client: let send_done() cleanup before calling smbd_disconnect_rdma_connection() We should call ib_dma_unmap_single() and mempool_free() before calling smbd_disconnect_rdma_connection(). And smbd_disconnect_rdma_connection() needs to be the last function to call as all other state might already be gone after it returns. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: f198186aa9bb ("CIFS: SMBD: Establish SMB Direct connection") Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 754e94a0e07f..e99e783f1b0e 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -281,18 +281,20 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) log_rdma_send(INFO, "smbd_request 0x%p completed wc->status=%d\n", request, wc->status); - if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { - log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n", - wc->status, wc->opcode); - smbd_disconnect_rdma_connection(request->info); - } - for (i = 0; i < request->num_sge; i++) ib_dma_unmap_single(sc->ib.dev, request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); + if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { + log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n", + wc->status, wc->opcode); + mempool_free(request, info->request_mempool); + smbd_disconnect_rdma_connection(info); + return; + } + if (atomic_dec_and_test(&request->info->send_pending)) wake_up(&request->info->wait_send_pending); From 24b6afc36db748467e853e166a385df07e443859 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 4 Aug 2025 14:10:13 +0200 Subject: [PATCH 1651/2411] smb: client: remove separate empty_packet_queue There's no need to maintain two lists, we can just have a single list of receive buffers, which are free to use. It just added unneeded complexity and resulted in ib_dma_unmap_single() not being called from recv_done() for empty keepalive packets. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: f198186aa9bb ("CIFS: SMBD: Establish SMB Direct connection") Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 6 ++-- fs/smb/client/smbdirect.c | 62 +++----------------------------------- fs/smb/client/smbdirect.h | 4 --- 3 files changed, 7 insertions(+), 65 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 80d6a51b8c11..65421703593a 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -481,10 +481,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) server->smbd_conn->receive_credit_target); seq_printf(m, "\nPending send_pending: %x ", atomic_read(&server->smbd_conn->send_pending)); - seq_printf(m, "\nReceive buffers count_receive_queue: %x " - "count_empty_packet_queue: %x", - server->smbd_conn->count_receive_queue, - server->smbd_conn->count_empty_packet_queue); + seq_printf(m, "\nReceive buffers count_receive_queue: %x ", + server->smbd_conn->count_receive_queue); seq_printf(m, "\nMR responder_resources: %x " "max_frmr_depth: %x mr_type: %x", server->smbd_conn->responder_resources, diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index e99e783f1b0e..0ab490c0a9b0 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -13,8 +13,6 @@ #include "cifsproto.h" #include "smb2proto.h" -static struct smbd_response *get_empty_queue_buffer( - struct smbd_connection *info); static struct smbd_response *get_receive_buffer( struct smbd_connection *info); static void put_receive_buffer( @@ -23,8 +21,6 @@ static void put_receive_buffer( static int allocate_receive_buffers(struct smbd_connection *info, int num_buf); static void destroy_receive_buffers(struct smbd_connection *info); -static void put_empty_packet( - struct smbd_connection *info, struct smbd_response *response); static void enqueue_reassembly( struct smbd_connection *info, struct smbd_response *response, int data_length); @@ -393,7 +389,6 @@ static bool process_negotiation_response( static void smbd_post_send_credits(struct work_struct *work) { int ret = 0; - int use_receive_queue = 1; int rc; struct smbd_response *response; struct smbd_connection *info = @@ -409,18 +404,9 @@ static void smbd_post_send_credits(struct work_struct *work) if (info->receive_credit_target > atomic_read(&info->receive_credits)) { while (true) { - if (use_receive_queue) - response = get_receive_buffer(info); - else - response = get_empty_queue_buffer(info); - if (!response) { - /* now switch to empty packet queue */ - if (use_receive_queue) { - use_receive_queue = 0; - continue; - } else - break; - } + response = get_receive_buffer(info); + if (!response) + break; response->type = SMBD_TRANSFER_DATA; response->first_segment = false; @@ -511,7 +497,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) response, data_length); } else - put_empty_packet(info, response); + put_receive_buffer(info, response); if (data_length) wake_up_interruptible(&info->wait_reassembly_queue); @@ -1115,17 +1101,6 @@ static int smbd_negotiate(struct smbd_connection *info) return rc; } -static void put_empty_packet( - struct smbd_connection *info, struct smbd_response *response) -{ - spin_lock(&info->empty_packet_queue_lock); - list_add_tail(&response->list, &info->empty_packet_queue); - info->count_empty_packet_queue++; - spin_unlock(&info->empty_packet_queue_lock); - - queue_work(info->workqueue, &info->post_send_credits_work); -} - /* * Implement Connection.FragmentReassemblyBuffer defined in [MS-SMBD] 3.1.1.1 * This is a queue for reassembling upper layer payload and present to upper @@ -1174,25 +1149,6 @@ static struct smbd_response *_get_first_reassembly(struct smbd_connection *info) return ret; } -static struct smbd_response *get_empty_queue_buffer( - struct smbd_connection *info) -{ - struct smbd_response *ret = NULL; - unsigned long flags; - - spin_lock_irqsave(&info->empty_packet_queue_lock, flags); - if (!list_empty(&info->empty_packet_queue)) { - ret = list_first_entry( - &info->empty_packet_queue, - struct smbd_response, list); - list_del(&ret->list); - info->count_empty_packet_queue--; - } - spin_unlock_irqrestore(&info->empty_packet_queue_lock, flags); - - return ret; -} - /* * Get a receive buffer * For each remote send, we need to post a receive. The receive buffers are @@ -1257,10 +1213,6 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) spin_lock_init(&info->receive_queue_lock); info->count_receive_queue = 0; - INIT_LIST_HEAD(&info->empty_packet_queue); - spin_lock_init(&info->empty_packet_queue_lock); - info->count_empty_packet_queue = 0; - init_waitqueue_head(&info->wait_receive_queues); for (i = 0; i < num_buf; i++) { @@ -1294,9 +1246,6 @@ static void destroy_receive_buffers(struct smbd_connection *info) while ((response = get_receive_buffer(info))) mempool_free(response, info->response_mempool); - - while ((response = get_empty_queue_buffer(info))) - mempool_free(response, info->response_mempool); } /* Implement idle connection timer [MS-SMBD] 3.1.6.2 */ @@ -1383,8 +1332,7 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "free receive buffers\n"); wait_event(info->wait_receive_queues, - info->count_receive_queue + info->count_empty_packet_queue - == sp->recv_credit_max); + info->count_receive_queue == sp->recv_credit_max); destroy_receive_buffers(info); /* diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 75b3f491c3ad..ea04ce8a9763 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -110,10 +110,6 @@ struct smbd_connection { int count_receive_queue; spinlock_t receive_queue_lock; - struct list_head empty_packet_queue; - int count_empty_packet_queue; - spinlock_t empty_packet_queue_lock; - wait_queue_head_t wait_receive_queues; /* Reassembly queue */ From 047682c370b6f18fec818b57b0ed8b501bdb79f8 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 4 Aug 2025 14:10:14 +0200 Subject: [PATCH 1652/2411] smb: client: make sure we call ib_dma_unmap_single() only if we called ib_dma_map_single already In case of failures either ib_dma_map_single() might not be called yet or ib_dma_unmap_single() was already called. We should make sure put_receive_buffer() only calls ib_dma_unmap_single() if needed. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: f198186aa9bb ("CIFS: SMBD: Establish SMB Direct connection") Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 0ab490c0a9b0..5690e8b3d101 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1057,6 +1057,7 @@ static int smbd_post_recv( if (rc) { ib_dma_unmap_single(sc->ib.dev, response->sge.addr, response->sge.length, DMA_FROM_DEVICE); + response->sge.length = 0; smbd_disconnect_rdma_connection(info); log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc); } @@ -1186,8 +1187,13 @@ static void put_receive_buffer( struct smbdirect_socket *sc = &info->socket; unsigned long flags; - ib_dma_unmap_single(sc->ib.dev, response->sge.addr, - response->sge.length, DMA_FROM_DEVICE); + if (likely(response->sge.length != 0)) { + ib_dma_unmap_single(sc->ib.dev, + response->sge.addr, + response->sge.length, + DMA_FROM_DEVICE); + response->sge.length = 0; + } spin_lock_irqsave(&info->receive_queue_lock, flags); list_add_tail(&response->list, &info->receive_queue); @@ -1221,6 +1227,7 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) goto allocate_failed; response->info = info; + response->sge.length = 0; list_add_tail(&response->list, &info->receive_queue); info->count_receive_queue++; } From bdd7afc6dca5e0ebbb75583484aa6ea9e03fbb13 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 4 Aug 2025 14:10:15 +0200 Subject: [PATCH 1653/2411] smb: client: let recv_done() cleanup before notifying the callers. We should call put_receive_buffer() before waking up the callers. For the internal error case of response->type being unexpected, we now also call smbd_disconnect_rdma_connection() instead of not waking up the callers at all. Note that the SMBD_TRANSFER_DATA case still has problems, which will be addressed in the next commit in order to make it easier to review this one. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: f198186aa9bb ("CIFS: SMBD: Establish SMB Direct connection") Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 5690e8b3d101..d26b8cef82d6 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -454,7 +454,6 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { log_rdma_recv(INFO, "wc->status=%d opcode=%d\n", wc->status, wc->opcode); - smbd_disconnect_rdma_connection(info); goto error; } @@ -471,8 +470,9 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) info->full_packet_received = true; info->negotiate_done = process_negotiation_response(response, wc->byte_len); + put_receive_buffer(info, response); complete(&info->negotiate_completion); - break; + return; /* SMBD data transfer packet */ case SMBD_TRANSFER_DATA: @@ -529,14 +529,16 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) } return; - - default: - log_rdma_recv(ERR, - "unexpected response type=%d\n", response->type); } + /* + * This is an internal error! + */ + log_rdma_recv(ERR, "unexpected response type=%d\n", response->type); + WARN_ON_ONCE(response->type != SMBD_TRANSFER_DATA); error: put_receive_buffer(info, response); + smbd_disconnect_rdma_connection(info); } static struct rdma_cm_id *smbd_create_id( From 24eff17887cb45c25a427e662dda352973c5c171 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 4 Aug 2025 14:10:16 +0200 Subject: [PATCH 1654/2411] smb: client: let recv_done() avoid touching data_transfer after cleanup/move Calling enqueue_reassembly() and wake_up_interruptible(&info->wait_reassembly_queue) or put_receive_buffer() means the response/data_transfer pointer might get re-used by another thread, which means these should be the last operations before calling return. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: f198186aa9bb ("CIFS: SMBD: Establish SMB Direct connection") Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index d26b8cef82d6..47f2a6cc1c0c 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -479,10 +479,6 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) data_transfer = smbd_response_payload(response); data_length = le32_to_cpu(data_transfer->data_length); - /* - * If this is a packet with data playload place the data in - * reassembly queue and wake up the reading thread - */ if (data_length) { if (info->full_packet_received) response->first_segment = true; @@ -491,16 +487,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) info->full_packet_received = false; else info->full_packet_received = true; - - enqueue_reassembly( - info, - response, - data_length); - } else - put_receive_buffer(info, response); - - if (data_length) - wake_up_interruptible(&info->wait_reassembly_queue); + } atomic_dec(&info->receive_credits); info->receive_credit_target = @@ -528,6 +515,16 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) info->keep_alive_requested = KEEP_ALIVE_PENDING; } + /* + * If this is a packet with data playload place the data in + * reassembly queue and wake up the reading thread + */ + if (data_length) { + enqueue_reassembly(info, response, data_length); + wake_up_interruptible(&info->wait_reassembly_queue); + } else + put_receive_buffer(info, response); + return; } From 0edf9fc0a34436e9f257e8508e795b2caddc74d6 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 5 Aug 2025 18:11:29 +0200 Subject: [PATCH 1655/2411] smb: client: remove unused smbd_connection->fragment_reassembly_remaining Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 2 -- fs/smb/client/smbdirect.h | 1 - 2 files changed, 3 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 65421703593a..c7cbaf12c16f 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -465,13 +465,11 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "\nRead Queue count_reassembly_queue: %x " "count_enqueue_reassembly_queue: %x " "count_dequeue_reassembly_queue: %x " - "fragment_reassembly_remaining: %x " "reassembly_data_length: %x " "reassembly_queue_length: %x", server->smbd_conn->count_reassembly_queue, server->smbd_conn->count_enqueue_reassembly_queue, server->smbd_conn->count_dequeue_reassembly_queue, - server->smbd_conn->fragment_reassembly_remaining, server->smbd_conn->reassembly_data_length, server->smbd_conn->reassembly_queue_length); seq_printf(m, "\nCurrent Credits send_credits: %x " diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index ea04ce8a9763..a2026c542989 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -75,7 +75,6 @@ struct smbd_connection { atomic_t send_credits; atomic_t receive_credits; int receive_credit_target; - int fragment_reassembly_remaining; /* Memory registrations */ /* Maximum number of RDMA read/write outstanding on this connection */ From 33dd53a90e3419ea260e9ff2b4aa107385cdf7fa Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 5 Aug 2025 18:11:30 +0200 Subject: [PATCH 1656/2411] smb: smbdirect: introduce smbdirect_socket.recv_io.expected The expected message type can be global as they never change during the after negotiation process. This will replace smbd_response->type and smb_direct_recvmsg->type in future. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index e5b15cc44a7b..5db7815b614f 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -38,6 +38,20 @@ struct smbdirect_socket { } ib; struct smbdirect_socket_parameters parameters; + + /* + * The state for posted receive buffers + */ + struct { + /* + * The type of PDU we are expecting + */ + enum { + SMBDIRECT_EXPECT_NEGOTIATE_REQ = 1, + SMBDIRECT_EXPECT_NEGOTIATE_REP = 2, + SMBDIRECT_EXPECT_DATA_TRANSFER = 3, + } expected; + } recv_io; }; #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */ From bbdbd9ae47155da65aa0c1641698a44d85c2faa2 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 5 Aug 2025 18:11:31 +0200 Subject: [PATCH 1657/2411] smb: client: make use of smbdirect_socket->recv_io.expected The expected incoming message type can be per connection. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 22 ++++++++++++++-------- fs/smb/client/smbdirect.h | 7 ------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 47f2a6cc1c0c..a189973df1be 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -383,6 +383,7 @@ static bool process_negotiation_response( info->max_frmr_depth * PAGE_SIZE); info->max_frmr_depth = sp->max_read_write_size / PAGE_SIZE; + sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER; return true; } @@ -408,7 +409,6 @@ static void smbd_post_send_credits(struct work_struct *work) if (!response) break; - response->type = SMBD_TRANSFER_DATA; response->first_segment = false; rc = smbd_post_recv(info, response); if (rc) { @@ -445,10 +445,11 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) struct smbd_response *response = container_of(wc->wr_cqe, struct smbd_response, cqe); struct smbd_connection *info = response->info; + struct smbdirect_socket *sc = &info->socket; int data_length = 0; log_rdma_recv(INFO, "response=0x%p type=%d wc status=%d wc opcode %d byte_len=%d pkey_index=%u\n", - response, response->type, wc->status, wc->opcode, + response, sc->recv_io.expected, wc->status, wc->opcode, wc->byte_len, wc->pkey_index); if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { @@ -463,9 +464,9 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) response->sge.length, DMA_FROM_DEVICE); - switch (response->type) { + switch (sc->recv_io.expected) { /* SMBD negotiation response */ - case SMBD_NEGOTIATE_RESP: + case SMBDIRECT_EXPECT_NEGOTIATE_REP: dump_smbdirect_negotiate_resp(smbd_response_payload(response)); info->full_packet_received = true; info->negotiate_done = @@ -475,7 +476,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) return; /* SMBD data transfer packet */ - case SMBD_TRANSFER_DATA: + case SMBDIRECT_EXPECT_DATA_TRANSFER: data_transfer = smbd_response_payload(response); data_length = le32_to_cpu(data_transfer->data_length); @@ -526,13 +527,17 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) put_receive_buffer(info, response); return; + + case SMBDIRECT_EXPECT_NEGOTIATE_REQ: + /* Only server... */ + break; } /* * This is an internal error! */ - log_rdma_recv(ERR, "unexpected response type=%d\n", response->type); - WARN_ON_ONCE(response->type != SMBD_TRANSFER_DATA); + log_rdma_recv(ERR, "unexpected response type=%d\n", sc->recv_io.expected); + WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER); error: put_receive_buffer(info, response); smbd_disconnect_rdma_connection(info); @@ -1067,10 +1072,11 @@ static int smbd_post_recv( /* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */ static int smbd_negotiate(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; int rc; struct smbd_response *response = get_receive_buffer(info); - response->type = SMBD_NEGOTIATE_RESP; + sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REP; rc = smbd_post_recv(info, response); log_rdma_event(INFO, "smbd_post_recv rc=%d iov.addr=0x%llx iov.length=%u iov.lkey=0x%x\n", rc, response->sge.addr, diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index a2026c542989..dbb138900973 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -156,11 +156,6 @@ struct smbd_connection { unsigned int count_send_empty; }; -enum smbd_message_type { - SMBD_NEGOTIATE_RESP, - SMBD_TRANSFER_DATA, -}; - /* Maximum number of SGEs used by smbdirect.c in any send work request */ #define SMBDIRECT_MAX_SEND_SGE 6 @@ -186,8 +181,6 @@ struct smbd_response { struct ib_cqe cqe; struct ib_sge sge; - enum smbd_message_type type; - /* Link to receive queue or reassembly queue */ struct list_head list; From 60812d20da82606f0620904c281579a9af0ab452 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 5 Aug 2025 18:11:32 +0200 Subject: [PATCH 1658/2411] smb: smbdirect: introduce struct smbdirect_recv_io This will be used in client and server soon in order to replace smbd_response/smb_direct_recvmsg. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 5db7815b614f..a7ad31c471a7 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -54,4 +54,19 @@ struct smbdirect_socket { } recv_io; }; +struct smbdirect_recv_io { + struct smbdirect_socket *socket; + struct ib_cqe cqe; + struct ib_sge sge; + + /* Link to free or reassembly list */ + struct list_head list; + + /* Indicate if this is the 1st packet of a payload */ + bool first_segment; + + /* SMBD packet header and payload follows this structure */ + u8 packet[]; +}; + #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */ From 5dddf0497445d247e995306daf3b76dd0633831c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 5 Aug 2025 18:11:33 +0200 Subject: [PATCH 1659/2411] smb: client: make use of struct smbdirect_recv_io This is the shared structure that will be used in the server too and will allow us to move helper functions into common code soon. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 79 ++++++++++++++++++++------------------- fs/smb/client/smbdirect.h | 16 -------- 2 files changed, 41 insertions(+), 54 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index a189973df1be..2589834882cb 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -13,23 +13,23 @@ #include "cifsproto.h" #include "smb2proto.h" -static struct smbd_response *get_receive_buffer( +static struct smbdirect_recv_io *get_receive_buffer( struct smbd_connection *info); static void put_receive_buffer( struct smbd_connection *info, - struct smbd_response *response); + struct smbdirect_recv_io *response); static int allocate_receive_buffers(struct smbd_connection *info, int num_buf); static void destroy_receive_buffers(struct smbd_connection *info); static void enqueue_reassembly( struct smbd_connection *info, - struct smbd_response *response, int data_length); -static struct smbd_response *_get_first_reassembly( + struct smbdirect_recv_io *response, int data_length); +static struct smbdirect_recv_io *_get_first_reassembly( struct smbd_connection *info); static int smbd_post_recv( struct smbd_connection *info, - struct smbd_response *response); + struct smbdirect_recv_io *response); static int smbd_post_send_empty(struct smbd_connection *info); @@ -260,7 +260,7 @@ static inline void *smbd_request_payload(struct smbd_request *request) return (void *)request->packet; } -static inline void *smbd_response_payload(struct smbd_response *response) +static inline void *smbdirect_recv_io_payload(struct smbdirect_recv_io *response) { return (void *)response->packet; } @@ -315,12 +315,13 @@ static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp) * return value: true if negotiation is a success, false if failed */ static bool process_negotiation_response( - struct smbd_response *response, int packet_length) + struct smbdirect_recv_io *response, int packet_length) { - struct smbd_connection *info = response->info; - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket *sc = response->socket; + struct smbd_connection *info = + container_of(sc, struct smbd_connection, socket); struct smbdirect_socket_parameters *sp = &sc->parameters; - struct smbdirect_negotiate_resp *packet = smbd_response_payload(response); + struct smbdirect_negotiate_resp *packet = smbdirect_recv_io_payload(response); if (packet_length < sizeof(struct smbdirect_negotiate_resp)) { log_rdma_event(ERR, @@ -391,7 +392,7 @@ static void smbd_post_send_credits(struct work_struct *work) { int ret = 0; int rc; - struct smbd_response *response; + struct smbdirect_recv_io *response; struct smbd_connection *info = container_of(work, struct smbd_connection, post_send_credits_work); @@ -442,10 +443,11 @@ static void smbd_post_send_credits(struct work_struct *work) static void recv_done(struct ib_cq *cq, struct ib_wc *wc) { struct smbdirect_data_transfer *data_transfer; - struct smbd_response *response = - container_of(wc->wr_cqe, struct smbd_response, cqe); - struct smbd_connection *info = response->info; - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_recv_io *response = + container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); + struct smbdirect_socket *sc = response->socket; + struct smbd_connection *info = + container_of(sc, struct smbd_connection, socket); int data_length = 0; log_rdma_recv(INFO, "response=0x%p type=%d wc status=%d wc opcode %d byte_len=%d pkey_index=%u\n", @@ -467,7 +469,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) switch (sc->recv_io.expected) { /* SMBD negotiation response */ case SMBDIRECT_EXPECT_NEGOTIATE_REP: - dump_smbdirect_negotiate_resp(smbd_response_payload(response)); + dump_smbdirect_negotiate_resp(smbdirect_recv_io_payload(response)); info->full_packet_received = true; info->negotiate_done = process_negotiation_response(response, wc->byte_len); @@ -477,7 +479,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) /* SMBD data transfer packet */ case SMBDIRECT_EXPECT_DATA_TRANSFER: - data_transfer = smbd_response_payload(response); + data_transfer = smbdirect_recv_io_payload(response); data_length = le32_to_cpu(data_transfer->data_length); if (data_length) { @@ -1034,7 +1036,7 @@ static int smbd_post_send_full_iter(struct smbd_connection *info, * The interaction is controlled by send/receive credit system */ static int smbd_post_recv( - struct smbd_connection *info, struct smbd_response *response) + struct smbd_connection *info, struct smbdirect_recv_io *response) { struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; @@ -1074,7 +1076,7 @@ static int smbd_negotiate(struct smbd_connection *info) { struct smbdirect_socket *sc = &info->socket; int rc; - struct smbd_response *response = get_receive_buffer(info); + struct smbdirect_recv_io *response = get_receive_buffer(info); sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REP; rc = smbd_post_recv(info, response); @@ -1119,7 +1121,7 @@ static int smbd_negotiate(struct smbd_connection *info) */ static void enqueue_reassembly( struct smbd_connection *info, - struct smbd_response *response, + struct smbdirect_recv_io *response, int data_length) { spin_lock(&info->reassembly_queue_lock); @@ -1143,14 +1145,14 @@ static void enqueue_reassembly( * Caller is responsible for locking * return value: the first entry if any, NULL if queue is empty */ -static struct smbd_response *_get_first_reassembly(struct smbd_connection *info) +static struct smbdirect_recv_io *_get_first_reassembly(struct smbd_connection *info) { - struct smbd_response *ret = NULL; + struct smbdirect_recv_io *ret = NULL; if (!list_empty(&info->reassembly_queue)) { ret = list_first_entry( &info->reassembly_queue, - struct smbd_response, list); + struct smbdirect_recv_io, list); } return ret; } @@ -1161,16 +1163,16 @@ static struct smbd_response *_get_first_reassembly(struct smbd_connection *info) * pre-allocated in advance. * return value: the receive buffer, NULL if none is available */ -static struct smbd_response *get_receive_buffer(struct smbd_connection *info) +static struct smbdirect_recv_io *get_receive_buffer(struct smbd_connection *info) { - struct smbd_response *ret = NULL; + struct smbdirect_recv_io *ret = NULL; unsigned long flags; spin_lock_irqsave(&info->receive_queue_lock, flags); if (!list_empty(&info->receive_queue)) { ret = list_first_entry( &info->receive_queue, - struct smbd_response, list); + struct smbdirect_recv_io, list); list_del(&ret->list); info->count_receive_queue--; info->count_get_receive_buffer++; @@ -1187,7 +1189,7 @@ static struct smbd_response *get_receive_buffer(struct smbd_connection *info) * receive buffer is returned. */ static void put_receive_buffer( - struct smbd_connection *info, struct smbd_response *response) + struct smbd_connection *info, struct smbdirect_recv_io *response) { struct smbdirect_socket *sc = &info->socket; unsigned long flags; @@ -1212,8 +1214,9 @@ static void put_receive_buffer( /* Preallocate all receive buffer on transport establishment */ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) { + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_recv_io *response; int i; - struct smbd_response *response; INIT_LIST_HEAD(&info->reassembly_queue); spin_lock_init(&info->reassembly_queue_lock); @@ -1231,7 +1234,7 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) if (!response) goto allocate_failed; - response->info = info; + response->socket = sc; response->sge.length = 0; list_add_tail(&response->list, &info->receive_queue); info->count_receive_queue++; @@ -1243,7 +1246,7 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) while (!list_empty(&info->receive_queue)) { response = list_first_entry( &info->receive_queue, - struct smbd_response, list); + struct smbdirect_recv_io, list); list_del(&response->list); info->count_receive_queue--; @@ -1254,7 +1257,7 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) static void destroy_receive_buffers(struct smbd_connection *info) { - struct smbd_response *response; + struct smbdirect_recv_io *response; while ((response = get_receive_buffer(info))) mempool_free(response, info->response_mempool); @@ -1295,7 +1298,7 @@ void smbd_destroy(struct TCP_Server_Info *server) struct smbd_connection *info = server->smbd_conn; struct smbdirect_socket *sc; struct smbdirect_socket_parameters *sp; - struct smbd_response *response; + struct smbdirect_recv_io *response; unsigned long flags; if (!info) { @@ -1456,17 +1459,17 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) if (!info->request_mempool) goto out1; - scnprintf(name, MAX_NAME_LEN, "smbd_response_%p", info); + scnprintf(name, MAX_NAME_LEN, "smbdirect_recv_io_%p", info); struct kmem_cache_args response_args = { - .align = __alignof__(struct smbd_response), - .useroffset = (offsetof(struct smbd_response, packet) + + .align = __alignof__(struct smbdirect_recv_io), + .useroffset = (offsetof(struct smbdirect_recv_io, packet) + sizeof(struct smbdirect_data_transfer)), .usersize = sp->max_recv_size - sizeof(struct smbdirect_data_transfer), }; info->response_cache = kmem_cache_create(name, - sizeof(struct smbd_response) + sp->max_recv_size, + sizeof(struct smbdirect_recv_io) + sp->max_recv_size, &response_args, SLAB_HWCACHE_ALIGN); if (!info->response_cache) goto out2; @@ -1756,7 +1759,7 @@ struct smbd_connection *smbd_get_connection( int smbd_recv(struct smbd_connection *info, struct msghdr *msg) { struct smbdirect_socket *sc = &info->socket; - struct smbd_response *response; + struct smbdirect_recv_io *response; struct smbdirect_data_transfer *data_transfer; size_t size = iov_iter_count(&msg->msg_iter); int to_copy, to_read, data_read, offset; @@ -1792,7 +1795,7 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) offset = info->first_entry_offset; while (data_read < size) { response = _get_first_reassembly(info); - data_transfer = smbd_response_payload(response); + data_transfer = smbdirect_recv_io_payload(response); data_length = le32_to_cpu(data_transfer->data_length); remaining_data_length = le32_to_cpu( diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index dbb138900973..f53781f98e64 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -175,22 +175,6 @@ struct smbd_request { /* Maximum number of SGEs used by smbdirect.c in any receive work request */ #define SMBDIRECT_MAX_RECV_SGE 1 -/* The context for a SMBD response */ -struct smbd_response { - struct smbd_connection *info; - struct ib_cqe cqe; - struct ib_sge sge; - - /* Link to receive queue or reassembly queue */ - struct list_head list; - - /* Indicate if this is the 1st packet of a payload */ - bool first_segment; - - /* SMBD packet header and payload follows this structure */ - u8 packet[]; -}; - /* Create a SMBDirect session */ struct smbd_connection *smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr); From d0df32a3025c2de47d7eb8766aaee82644a53581 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 5 Aug 2025 18:11:34 +0200 Subject: [PATCH 1660/2411] smb: smbdirect: introduce smbdirect_socket.recv_io.free.{list,lock} This will allow the list of free smbdirect_recv_io messages including the spinlock to be in common between client and server in order to split out common helper functions in future. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index a7ad31c471a7..21a58e6078cb 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -51,6 +51,15 @@ struct smbdirect_socket { SMBDIRECT_EXPECT_NEGOTIATE_REP = 2, SMBDIRECT_EXPECT_DATA_TRANSFER = 3, } expected; + + /* + * The list of free smbdirect_recv_io + * structures + */ + struct { + struct list_head list; + spinlock_t lock; + } free; } recv_io; }; From 59500450843a5af9ca4fdba0ac2808e929a47584 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 5 Aug 2025 18:11:35 +0200 Subject: [PATCH 1661/2411] smb: client: make use of smb: smbdirect_socket.recv_io.free.{list,lock} This will be used by the server too in order to have common helper functions in future. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 25 +++++++++++++------------ fs/smb/client/smbdirect.h | 3 --- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 2589834882cb..073331080e3a 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1165,19 +1165,20 @@ static struct smbdirect_recv_io *_get_first_reassembly(struct smbd_connection *i */ static struct smbdirect_recv_io *get_receive_buffer(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; struct smbdirect_recv_io *ret = NULL; unsigned long flags; - spin_lock_irqsave(&info->receive_queue_lock, flags); - if (!list_empty(&info->receive_queue)) { + spin_lock_irqsave(&sc->recv_io.free.lock, flags); + if (!list_empty(&sc->recv_io.free.list)) { ret = list_first_entry( - &info->receive_queue, + &sc->recv_io.free.list, struct smbdirect_recv_io, list); list_del(&ret->list); info->count_receive_queue--; info->count_get_receive_buffer++; } - spin_unlock_irqrestore(&info->receive_queue_lock, flags); + spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); return ret; } @@ -1202,11 +1203,11 @@ static void put_receive_buffer( response->sge.length = 0; } - spin_lock_irqsave(&info->receive_queue_lock, flags); - list_add_tail(&response->list, &info->receive_queue); + spin_lock_irqsave(&sc->recv_io.free.lock, flags); + list_add_tail(&response->list, &sc->recv_io.free.list); info->count_receive_queue++; info->count_put_receive_buffer++; - spin_unlock_irqrestore(&info->receive_queue_lock, flags); + spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); queue_work(info->workqueue, &info->post_send_credits_work); } @@ -1223,8 +1224,8 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) info->reassembly_data_length = 0; info->reassembly_queue_length = 0; - INIT_LIST_HEAD(&info->receive_queue); - spin_lock_init(&info->receive_queue_lock); + INIT_LIST_HEAD(&sc->recv_io.free.list); + spin_lock_init(&sc->recv_io.free.lock); info->count_receive_queue = 0; init_waitqueue_head(&info->wait_receive_queues); @@ -1236,16 +1237,16 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) response->socket = sc; response->sge.length = 0; - list_add_tail(&response->list, &info->receive_queue); + list_add_tail(&response->list, &sc->recv_io.free.list); info->count_receive_queue++; } return 0; allocate_failed: - while (!list_empty(&info->receive_queue)) { + while (!list_empty(&sc->recv_io.free.list)) { response = list_first_entry( - &info->receive_queue, + &sc->recv_io.free.list, struct smbdirect_recv_io, list); list_del(&response->list); info->count_receive_queue--; diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index f53781f98e64..3381e01f5b83 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -105,10 +105,7 @@ struct smbd_connection { wait_queue_head_t wait_post_send; /* Receive queue */ - struct list_head receive_queue; int count_receive_queue; - spinlock_t receive_queue_lock; - wait_queue_head_t wait_receive_queues; /* Reassembly queue */ From b7ffb4d2a0360043d821b612040088ae9299aa8c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 5 Aug 2025 18:11:36 +0200 Subject: [PATCH 1662/2411] smb: smbdirect: introduce smbdirect_socket.recv_io.reassembly.* This will be used in common between client and server soon. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 21a58e6078cb..3ae834ca3af1 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -60,6 +60,32 @@ struct smbdirect_socket { struct list_head list; spinlock_t lock; } free; + + /* + * The list of arrived non-empty smbdirect_recv_io + * structures + * + * This represents the reassembly queue. + */ + struct { + struct list_head list; + spinlock_t lock; + wait_queue_head_t wait_queue; + /* total data length of reassembly queue */ + int data_length; + int queue_length; + /* the offset to first buffer in reassembly queue */ + int first_entry_offset; + /* + * Indicate if we have received a full packet on the + * connection This is used to identify the first SMBD + * packet of a assembled payload (SMB packet) in + * reassembly queue so we can return a RFC1002 length to + * upper layer to indicate the length of the SMB packet + * received + */ + bool full_packet_received; + } reassembly; } recv_io; }; From 61b4918e4ea17f3ce14ad13e4c2757c0b6afe708 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 5 Aug 2025 18:11:37 +0200 Subject: [PATCH 1663/2411] smb: client: make use of smbdirect_socket.recv_io.reassembly.* This will be used by the server too and will allow us to create common helper functions. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 8 ++-- fs/smb/client/smbdirect.c | 79 ++++++++++++++++++++------------------ fs/smb/client/smbdirect.h | 20 ---------- 3 files changed, 46 insertions(+), 61 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index c7cbaf12c16f..beb4f18f05ef 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -412,6 +412,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { #ifdef CONFIG_CIFS_SMB_DIRECT + struct smbdirect_socket *sc; struct smbdirect_socket_parameters *sp; #endif @@ -436,7 +437,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "\nSMBDirect transport not available"); goto skip_rdma; } - sp = &server->smbd_conn->socket.parameters; + sc = &server->smbd_conn->socket; + sp = &sc->parameters; seq_printf(m, "\nSMBDirect (in hex) protocol version: %x " "transport status: %x", @@ -470,8 +472,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) server->smbd_conn->count_reassembly_queue, server->smbd_conn->count_enqueue_reassembly_queue, server->smbd_conn->count_dequeue_reassembly_queue, - server->smbd_conn->reassembly_data_length, - server->smbd_conn->reassembly_queue_length); + sc->recv_io.reassembly.data_length, + sc->recv_io.reassembly.queue_length); seq_printf(m, "\nCurrent Credits send_credits: %x " "receive_credits: %x receive_credit_target: %x", atomic_read(&server->smbd_conn->send_credits), diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 073331080e3a..5217a8122a94 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -224,7 +224,7 @@ static int smbd_conn_upcall( sc->status = SMBDIRECT_SOCKET_DISCONNECTED; wake_up_interruptible(&info->disconn_wait); - wake_up_interruptible(&info->wait_reassembly_queue); + wake_up_interruptible(&sc->recv_io.reassembly.wait_queue); wake_up_interruptible_all(&info->wait_send_queue); break; @@ -470,7 +470,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) /* SMBD negotiation response */ case SMBDIRECT_EXPECT_NEGOTIATE_REP: dump_smbdirect_negotiate_resp(smbdirect_recv_io_payload(response)); - info->full_packet_received = true; + sc->recv_io.reassembly.full_packet_received = true; info->negotiate_done = process_negotiation_response(response, wc->byte_len); put_receive_buffer(info, response); @@ -483,13 +483,13 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) data_length = le32_to_cpu(data_transfer->data_length); if (data_length) { - if (info->full_packet_received) + if (sc->recv_io.reassembly.full_packet_received) response->first_segment = true; if (le32_to_cpu(data_transfer->remaining_data_length)) - info->full_packet_received = false; + sc->recv_io.reassembly.full_packet_received = false; else - info->full_packet_received = true; + sc->recv_io.reassembly.full_packet_received = true; } atomic_dec(&info->receive_credits); @@ -524,7 +524,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) */ if (data_length) { enqueue_reassembly(info, response, data_length); - wake_up_interruptible(&info->wait_reassembly_queue); + wake_up_interruptible(&sc->recv_io.reassembly.wait_queue); } else put_receive_buffer(info, response); @@ -1124,9 +1124,11 @@ static void enqueue_reassembly( struct smbdirect_recv_io *response, int data_length) { - spin_lock(&info->reassembly_queue_lock); - list_add_tail(&response->list, &info->reassembly_queue); - info->reassembly_queue_length++; + struct smbdirect_socket *sc = &info->socket; + + spin_lock(&sc->recv_io.reassembly.lock); + list_add_tail(&response->list, &sc->recv_io.reassembly.list); + sc->recv_io.reassembly.queue_length++; /* * Make sure reassembly_data_length is updated after list and * reassembly_queue_length are updated. On the dequeue side @@ -1134,8 +1136,8 @@ static void enqueue_reassembly( * if reassembly_queue_length and list is up to date */ virt_wmb(); - info->reassembly_data_length += data_length; - spin_unlock(&info->reassembly_queue_lock); + sc->recv_io.reassembly.data_length += data_length; + spin_unlock(&sc->recv_io.reassembly.lock); info->count_reassembly_queue++; info->count_enqueue_reassembly_queue++; } @@ -1147,11 +1149,12 @@ static void enqueue_reassembly( */ static struct smbdirect_recv_io *_get_first_reassembly(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; struct smbdirect_recv_io *ret = NULL; - if (!list_empty(&info->reassembly_queue)) { + if (!list_empty(&sc->recv_io.reassembly.list)) { ret = list_first_entry( - &info->reassembly_queue, + &sc->recv_io.reassembly.list, struct smbdirect_recv_io, list); } return ret; @@ -1219,10 +1222,10 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) struct smbdirect_recv_io *response; int i; - INIT_LIST_HEAD(&info->reassembly_queue); - spin_lock_init(&info->reassembly_queue_lock); - info->reassembly_data_length = 0; - info->reassembly_queue_length = 0; + INIT_LIST_HEAD(&sc->recv_io.reassembly.list); + spin_lock_init(&sc->recv_io.reassembly.lock); + sc->recv_io.reassembly.data_length = 0; + sc->recv_io.reassembly.queue_length = 0; INIT_LIST_HEAD(&sc->recv_io.free.list); spin_lock_init(&sc->recv_io.free.lock); @@ -1333,18 +1336,18 @@ void smbd_destroy(struct TCP_Server_Info *server) /* It's not possible for upper layer to get to reassembly */ log_rdma_event(INFO, "drain the reassembly queue\n"); do { - spin_lock_irqsave(&info->reassembly_queue_lock, flags); + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); response = _get_first_reassembly(info); if (response) { list_del(&response->list); spin_unlock_irqrestore( - &info->reassembly_queue_lock, flags); + &sc->recv_io.reassembly.lock, flags); put_receive_buffer(info, response); } else spin_unlock_irqrestore( - &info->reassembly_queue_lock, flags); + &sc->recv_io.reassembly.lock, flags); } while (response); - info->reassembly_data_length = 0; + sc->recv_io.reassembly.data_length = 0; log_rdma_event(INFO, "free receive buffers\n"); wait_event(info->wait_receive_queues, @@ -1639,7 +1642,7 @@ static struct smbd_connection *_smbd_get_connection( init_waitqueue_head(&info->conn_wait); init_waitqueue_head(&info->disconn_wait); - init_waitqueue_head(&info->wait_reassembly_queue); + init_waitqueue_head(&sc->recv_io.reassembly.wait_queue); rc = rdma_connect(sc->rdma.cm_id, &conn_param); if (rc) { log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc); @@ -1776,9 +1779,9 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) * the only one reading from the front of the queue. The transport * may add more entries to the back of the queue at the same time */ - log_read(INFO, "size=%zd info->reassembly_data_length=%d\n", size, - info->reassembly_data_length); - if (info->reassembly_data_length >= size) { + log_read(INFO, "size=%zd sc->recv_io.reassembly.data_length=%d\n", size, + sc->recv_io.reassembly.data_length); + if (sc->recv_io.reassembly.data_length >= size) { int queue_length; int queue_removed = 0; @@ -1790,10 +1793,10 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) * updated in SOFTIRQ as more data is received */ virt_rmb(); - queue_length = info->reassembly_queue_length; + queue_length = sc->recv_io.reassembly.queue_length; data_read = 0; to_read = size; - offset = info->first_entry_offset; + offset = sc->recv_io.reassembly.first_entry_offset; while (data_read < size) { response = _get_first_reassembly(info); data_transfer = smbdirect_recv_io_payload(response); @@ -1841,10 +1844,10 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) list_del(&response->list); else { spin_lock_irq( - &info->reassembly_queue_lock); + &sc->recv_io.reassembly.lock); list_del(&response->list); spin_unlock_irq( - &info->reassembly_queue_lock); + &sc->recv_io.reassembly.lock); } queue_removed++; info->count_reassembly_queue--; @@ -1863,23 +1866,23 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) to_read, data_read, offset); } - spin_lock_irq(&info->reassembly_queue_lock); - info->reassembly_data_length -= data_read; - info->reassembly_queue_length -= queue_removed; - spin_unlock_irq(&info->reassembly_queue_lock); + spin_lock_irq(&sc->recv_io.reassembly.lock); + sc->recv_io.reassembly.data_length -= data_read; + sc->recv_io.reassembly.queue_length -= queue_removed; + spin_unlock_irq(&sc->recv_io.reassembly.lock); - info->first_entry_offset = offset; + sc->recv_io.reassembly.first_entry_offset = offset; log_read(INFO, "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", - data_read, info->reassembly_data_length, - info->first_entry_offset); + data_read, sc->recv_io.reassembly.data_length, + sc->recv_io.reassembly.first_entry_offset); read_rfc1002_done: return data_read; } log_read(INFO, "wait_event on more data\n"); rc = wait_event_interruptible( - info->wait_reassembly_queue, - info->reassembly_data_length >= size || + sc->recv_io.reassembly.wait_queue, + sc->recv_io.reassembly.data_length >= size || sc->status != SMBDIRECT_SOCKET_CONNECTED); /* Don't return any data if interrupted */ if (rc) diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 3381e01f5b83..9df434f6bb8c 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -108,30 +108,10 @@ struct smbd_connection { int count_receive_queue; wait_queue_head_t wait_receive_queues; - /* Reassembly queue */ - struct list_head reassembly_queue; - spinlock_t reassembly_queue_lock; - wait_queue_head_t wait_reassembly_queue; - - /* total data length of reassembly queue */ - int reassembly_data_length; - int reassembly_queue_length; - /* the offset to first buffer in reassembly queue */ - int first_entry_offset; - bool send_immediate; wait_queue_head_t wait_send_queue; - /* - * Indicate if we have received a full packet on the connection - * This is used to identify the first SMBD packet of a assembled - * payload (SMB packet) in reassembly queue so we can return a - * RFC1002 length to upper layer to indicate the length of the SMB - * packet received - */ - bool full_packet_received; - struct workqueue_struct *workqueue; struct delayed_work idle_timer_work; From b126645b79547bd68fb8353ad27841b7408c08bb Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 6 Aug 2025 19:35:47 +0200 Subject: [PATCH 1664/2411] smb: client: remove unused enum smbd_connection_status Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 9df434f6bb8c..0463fde1bf26 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -33,16 +33,6 @@ enum keep_alive_status { KEEP_ALIVE_SENT, }; -enum smbd_connection_status { - SMBD_CREATED, - SMBD_CONNECTING, - SMBD_CONNECTED, - SMBD_NEGOTIATE_FAILED, - SMBD_DISCONNECTING, - SMBD_DISCONNECTED, - SMBD_DESTROYED -}; - /* * The context for the SMBDirect transport * Everything related to the transport is here. It has several logical parts From 3515aa6e43077157cf6070ad2cd181b179964856 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 6 Aug 2025 19:35:48 +0200 Subject: [PATCH 1665/2411] smb: smbdirect: add SMBDIRECT_RECV_IO_MAX_SGE This will allow the client and server specific defines to be replaced. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 3ae834ca3af1..7270fcee1048 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -92,6 +92,13 @@ struct smbdirect_socket { struct smbdirect_recv_io { struct smbdirect_socket *socket; struct ib_cqe cqe; + + /* + * For now we only use a single SGE + * as we have just one large buffer + * per posted recv. + */ +#define SMBDIRECT_RECV_IO_MAX_SGE 1 struct ib_sge sge; /* Link to free or reassembly list */ From 8b5964a1188f659afda0c72f7c7b7c9d54aa73cc Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 6 Aug 2025 19:35:49 +0200 Subject: [PATCH 1666/2411] smb: client: make use of SMBDIRECT_RECV_IO_MAX_SGE Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 4 ++-- fs/smb/client/smbdirect.h | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 5217a8122a94..5d1fa83583f6 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1563,7 +1563,7 @@ static struct smbd_connection *_smbd_get_connection( sp->keepalive_interval_msec = smbd_keep_alive_interval * 1000; if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE || - sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_MAX_RECV_SGE) { + sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) { log_rdma_event(ERR, "device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", IB_DEVICE_NAME_MAX, @@ -1595,7 +1595,7 @@ static struct smbd_connection *_smbd_get_connection( qp_attr.cap.max_send_wr = sp->send_credit_target; qp_attr.cap.max_recv_wr = sp->recv_credit_max; qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SEND_SGE; - qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_RECV_SGE; + qp_attr.cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; qp_attr.cap.max_inline_data = 0; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 0463fde1bf26..81b55c0de552 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -139,9 +139,6 @@ struct smbd_request { u8 packet[]; }; -/* Maximum number of SGEs used by smbdirect.c in any receive work request */ -#define SMBDIRECT_MAX_RECV_SGE 1 - /* Create a SMBDirect session */ struct smbd_connection *smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr); From 92ac696be763461b575022f06bbd843aadbe9593 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 6 Aug 2025 19:35:50 +0200 Subject: [PATCH 1667/2411] smb: smbdirect: introduce struct smbdirect_send_io This will be used in client and server soon in order to replace smbd_request/smb_direct_sendmsg. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 7270fcee1048..4660c05c358f 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -89,6 +89,30 @@ struct smbdirect_socket { } recv_io; }; +struct smbdirect_send_io { + struct smbdirect_socket *socket; + struct ib_cqe cqe; + + /* + * The SGE entries for this work request + * + * The first points to the packet header + */ +#define SMBDIRECT_SEND_IO_MAX_SGE 6 + size_t num_sge; + struct ib_sge sge[SMBDIRECT_SEND_IO_MAX_SGE]; + + /* + * Link to the list of sibling smbdirect_send_io + * messages. + */ + struct list_head sibling_list; + struct ib_send_wr wr; + + /* SMBD packet header follows this structure */ + u8 packet[]; +}; + struct smbdirect_recv_io { struct smbdirect_socket *socket; struct ib_cqe cqe; From 977ea06fddda493a8e16e609a408fdd08233793d Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 6 Aug 2025 19:35:51 +0200 Subject: [PATCH 1668/2411] smb: client: make use of struct smbdirect_send_io The server will also use this soon, so that we can split out common helper functions in future. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 45 ++++++++++++++++++++------------------- fs/smb/client/smbdirect.h | 16 -------------- 2 files changed, 23 insertions(+), 38 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 5d1fa83583f6..c367efef8c7a 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -255,7 +255,7 @@ smbd_qp_async_error_upcall(struct ib_event *event, void *context) } } -static inline void *smbd_request_payload(struct smbd_request *request) +static inline void *smbdirect_send_io_payload(struct smbdirect_send_io *request) { return (void *)request->packet; } @@ -269,12 +269,13 @@ static inline void *smbdirect_recv_io_payload(struct smbdirect_recv_io *response static void send_done(struct ib_cq *cq, struct ib_wc *wc) { int i; - struct smbd_request *request = - container_of(wc->wr_cqe, struct smbd_request, cqe); - struct smbd_connection *info = request->info; - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_send_io *request = + container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); + struct smbdirect_socket *sc = request->socket; + struct smbd_connection *info = + container_of(sc, struct smbd_connection, socket); - log_rdma_send(INFO, "smbd_request 0x%p completed wc->status=%d\n", + log_rdma_send(INFO, "smbdirect_send_io 0x%p completed wc->status=%d\n", request, wc->status); for (i = 0; i < request->num_sge; i++) @@ -291,12 +292,12 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) return; } - if (atomic_dec_and_test(&request->info->send_pending)) - wake_up(&request->info->wait_send_pending); + if (atomic_dec_and_test(&info->send_pending)) + wake_up(&info->wait_send_pending); - wake_up(&request->info->wait_post_send); + wake_up(&info->wait_post_send); - mempool_free(request, request->info->request_mempool); + mempool_free(request, info->request_mempool); } static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp) @@ -688,16 +689,16 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_send_wr send_wr; int rc = -ENOMEM; - struct smbd_request *request; + struct smbdirect_send_io *request; struct smbdirect_negotiate_req *packet; request = mempool_alloc(info->request_mempool, GFP_KERNEL); if (!request) return rc; - request->info = info; + request->socket = sc; - packet = smbd_request_payload(request); + packet = smbdirect_send_io_payload(request); packet->min_version = cpu_to_le16(SMBDIRECT_V1); packet->max_version = cpu_to_le16(SMBDIRECT_V1); packet->reserved = 0; @@ -794,7 +795,7 @@ static int manage_keep_alive_before_sending(struct smbd_connection *info) /* Post the send request */ static int smbd_post_send(struct smbd_connection *info, - struct smbd_request *request) + struct smbdirect_send_io *request) { struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; @@ -843,7 +844,7 @@ static int smbd_post_send_iter(struct smbd_connection *info, int i, rc; int header_length; int data_length; - struct smbd_request *request; + struct smbdirect_send_io *request; struct smbdirect_data_transfer *packet; int new_credits = 0; @@ -888,14 +889,14 @@ static int smbd_post_send_iter(struct smbd_connection *info, goto err_alloc; } - request->info = info; + request->socket = sc; memset(request->sge, 0, sizeof(request->sge)); /* Fill in the data payload to find out how much data we can add */ if (iter) { struct smb_extract_to_rdma extract = { .nr_sge = 1, - .max_sge = SMBDIRECT_MAX_SEND_SGE, + .max_sge = SMBDIRECT_SEND_IO_MAX_SGE, .sge = request->sge, .device = sc->ib.dev, .local_dma_lkey = sc->ib.pd->local_dma_lkey, @@ -917,7 +918,7 @@ static int smbd_post_send_iter(struct smbd_connection *info, } /* Fill in the packet header */ - packet = smbd_request_payload(request); + packet = smbdirect_send_io_payload(request); packet->credits_requested = cpu_to_le16(sp->send_credit_target); new_credits = manage_credits_prior_sending(info); @@ -1447,11 +1448,11 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) if (WARN_ON_ONCE(sp->max_recv_size < sizeof(struct smbdirect_data_transfer))) return -ENOMEM; - scnprintf(name, MAX_NAME_LEN, "smbd_request_%p", info); + scnprintf(name, MAX_NAME_LEN, "smbdirect_send_io_%p", info); info->request_cache = kmem_cache_create( name, - sizeof(struct smbd_request) + + sizeof(struct smbdirect_send_io) + sizeof(struct smbdirect_data_transfer), 0, SLAB_HWCACHE_ALIGN, NULL); if (!info->request_cache) @@ -1562,7 +1563,7 @@ static struct smbd_connection *_smbd_get_connection( sp->max_recv_size = smbd_max_receive_size; sp->keepalive_interval_msec = smbd_keep_alive_interval * 1000; - if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE || + if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_SEND_IO_MAX_SGE || sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) { log_rdma_event(ERR, "device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", @@ -1594,7 +1595,7 @@ static struct smbd_connection *_smbd_get_connection( qp_attr.qp_context = info; qp_attr.cap.max_send_wr = sp->send_credit_target; qp_attr.cap.max_recv_wr = sp->recv_credit_max; - qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SEND_SGE; + qp_attr.cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; qp_attr.cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; qp_attr.cap.max_inline_data = 0; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 81b55c0de552..a8380bccf623 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -123,22 +123,6 @@ struct smbd_connection { unsigned int count_send_empty; }; -/* Maximum number of SGEs used by smbdirect.c in any send work request */ -#define SMBDIRECT_MAX_SEND_SGE 6 - -/* The context for a SMBD request */ -struct smbd_request { - struct smbd_connection *info; - struct ib_cqe cqe; - - /* the SGE entries for this work request */ - struct ib_sge sge[SMBDIRECT_MAX_SEND_SGE]; - int num_sge; - - /* SMBD packet header follows this structure */ - u8 packet[]; -}; - /* Create a SMBDirect session */ struct smbd_connection *smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr); From 5ef8278e3734a8817fb0b8d302572dc2f2f5c46c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 6 Aug 2025 19:35:52 +0200 Subject: [PATCH 1669/2411] smb: smbdirect: add smbdirect_socket.{send,recv}_io.mem.{cache,pool} This will be the common location memory caches and pools. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 4660c05c358f..3c4a8d627aa3 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -39,6 +39,20 @@ struct smbdirect_socket { struct smbdirect_socket_parameters parameters; + /* + * The state for posted send buffers + */ + struct { + /* + * Memory pools for preallocating + * smbdirect_send_io buffers + */ + struct { + struct kmem_cache *cache; + mempool_t *pool; + } mem; + } send_io; + /* * The state for posted receive buffers */ @@ -52,6 +66,15 @@ struct smbdirect_socket { SMBDIRECT_EXPECT_DATA_TRANSFER = 3, } expected; + /* + * Memory pools for preallocating + * smbdirect_recv_io buffers + */ + struct { + struct kmem_cache *cache; + mempool_t *pool; + } mem; + /* * The list of free smbdirect_recv_io * structures From bef82d5848dadb10c2671970fae2cc4cd2c6a123 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 6 Aug 2025 19:35:53 +0200 Subject: [PATCH 1670/2411] smb: client: make use of smbdirect_socket.{send,recv}_io.mem.{cache,pool} This will allow common helper functions to be created later. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 65 ++++++++++++++++++++------------------- fs/smb/client/smbdirect.h | 9 ------ 2 files changed, 34 insertions(+), 40 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index c367efef8c7a..6c2af00be44c 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -287,7 +287,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n", wc->status, wc->opcode); - mempool_free(request, info->request_mempool); + mempool_free(request, sc->send_io.mem.pool); smbd_disconnect_rdma_connection(info); return; } @@ -297,7 +297,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) wake_up(&info->wait_post_send); - mempool_free(request, info->request_mempool); + mempool_free(request, sc->send_io.mem.pool); } static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp) @@ -692,7 +692,7 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) struct smbdirect_send_io *request; struct smbdirect_negotiate_req *packet; - request = mempool_alloc(info->request_mempool, GFP_KERNEL); + request = mempool_alloc(sc->send_io.mem.pool, GFP_KERNEL); if (!request) return rc; @@ -751,7 +751,7 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) smbd_disconnect_rdma_connection(info); dma_mapping_failed: - mempool_free(request, info->request_mempool); + mempool_free(request, sc->send_io.mem.pool); return rc; } @@ -883,7 +883,7 @@ static int smbd_post_send_iter(struct smbd_connection *info, goto wait_send_queue; } - request = mempool_alloc(info->request_mempool, GFP_KERNEL); + request = mempool_alloc(sc->send_io.mem.pool, GFP_KERNEL); if (!request) { rc = -ENOMEM; goto err_alloc; @@ -977,7 +977,7 @@ static int smbd_post_send_iter(struct smbd_connection *info, request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); - mempool_free(request, info->request_mempool); + mempool_free(request, sc->send_io.mem.pool); /* roll back receive credits and credits to be offered */ spin_lock(&info->lock_new_credits_offered); @@ -1235,7 +1235,7 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) init_waitqueue_head(&info->wait_receive_queues); for (i = 0; i < num_buf; i++) { - response = mempool_alloc(info->response_mempool, GFP_KERNEL); + response = mempool_alloc(sc->recv_io.mem.pool, GFP_KERNEL); if (!response) goto allocate_failed; @@ -1255,17 +1255,18 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) list_del(&response->list); info->count_receive_queue--; - mempool_free(response, info->response_mempool); + mempool_free(response, sc->recv_io.mem.pool); } return -ENOMEM; } static void destroy_receive_buffers(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; struct smbdirect_recv_io *response; while ((response = get_receive_buffer(info))) - mempool_free(response, info->response_mempool); + mempool_free(response, sc->recv_io.mem.pool); } /* Implement idle connection timer [MS-SMBD] 3.1.6.2 */ @@ -1377,11 +1378,11 @@ void smbd_destroy(struct TCP_Server_Info *server) rdma_destroy_id(sc->rdma.cm_id); /* free mempools */ - mempool_destroy(info->request_mempool); - kmem_cache_destroy(info->request_cache); + mempool_destroy(sc->send_io.mem.pool); + kmem_cache_destroy(sc->send_io.mem.cache); - mempool_destroy(info->response_mempool); - kmem_cache_destroy(info->response_cache); + mempool_destroy(sc->recv_io.mem.pool); + kmem_cache_destroy(sc->recv_io.mem.cache); sc->status = SMBDIRECT_SOCKET_DESTROYED; @@ -1429,12 +1430,14 @@ int smbd_reconnect(struct TCP_Server_Info *server) static void destroy_caches_and_workqueue(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; + destroy_receive_buffers(info); destroy_workqueue(info->workqueue); - mempool_destroy(info->response_mempool); - kmem_cache_destroy(info->response_cache); - mempool_destroy(info->request_mempool); - kmem_cache_destroy(info->request_cache); + mempool_destroy(sc->recv_io.mem.pool); + kmem_cache_destroy(sc->recv_io.mem.cache); + mempool_destroy(sc->send_io.mem.pool); + kmem_cache_destroy(sc->send_io.mem.cache); } #define MAX_NAME_LEN 80 @@ -1449,19 +1452,19 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) return -ENOMEM; scnprintf(name, MAX_NAME_LEN, "smbdirect_send_io_%p", info); - info->request_cache = + sc->send_io.mem.cache = kmem_cache_create( name, sizeof(struct smbdirect_send_io) + sizeof(struct smbdirect_data_transfer), 0, SLAB_HWCACHE_ALIGN, NULL); - if (!info->request_cache) + if (!sc->send_io.mem.cache) return -ENOMEM; - info->request_mempool = + sc->send_io.mem.pool = mempool_create(sp->send_credit_target, mempool_alloc_slab, - mempool_free_slab, info->request_cache); - if (!info->request_mempool) + mempool_free_slab, sc->send_io.mem.cache); + if (!sc->send_io.mem.pool) goto out1; scnprintf(name, MAX_NAME_LEN, "smbdirect_recv_io_%p", info); @@ -1472,17 +1475,17 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) sizeof(struct smbdirect_data_transfer)), .usersize = sp->max_recv_size - sizeof(struct smbdirect_data_transfer), }; - info->response_cache = + sc->recv_io.mem.cache = kmem_cache_create(name, sizeof(struct smbdirect_recv_io) + sp->max_recv_size, &response_args, SLAB_HWCACHE_ALIGN); - if (!info->response_cache) + if (!sc->recv_io.mem.cache) goto out2; - info->response_mempool = + sc->recv_io.mem.pool = mempool_create(sp->recv_credit_max, mempool_alloc_slab, - mempool_free_slab, info->response_cache); - if (!info->response_mempool) + mempool_free_slab, sc->recv_io.mem.cache); + if (!sc->recv_io.mem.pool) goto out3; scnprintf(name, MAX_NAME_LEN, "smbd_%p", info); @@ -1501,13 +1504,13 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) out5: destroy_workqueue(info->workqueue); out4: - mempool_destroy(info->response_mempool); + mempool_destroy(sc->recv_io.mem.pool); out3: - kmem_cache_destroy(info->response_cache); + kmem_cache_destroy(sc->recv_io.mem.cache); out2: - mempool_destroy(info->request_mempool); + mempool_destroy(sc->send_io.mem.pool); out1: - kmem_cache_destroy(info->request_cache); + kmem_cache_destroy(sc->send_io.mem.cache); return -ENOMEM; } diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index a8380bccf623..0d4d45428c85 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -105,15 +105,6 @@ struct smbd_connection { struct workqueue_struct *workqueue; struct delayed_work idle_timer_work; - /* Memory pool for preallocating buffers */ - /* request pool for RDMA send */ - struct kmem_cache *request_cache; - mempool_t *request_mempool; - - /* response pool for RDMA receive */ - struct kmem_cache *response_cache; - mempool_t *response_mempool; - /* for debug purposes */ unsigned int count_get_receive_buffer; unsigned int count_put_receive_buffer; From da274853fe7dbc7124e2dd84dad802be52a09321 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 29 Jul 2025 15:12:32 -0400 Subject: [PATCH 1671/2411] cpu: Remove obsolete comment from takedown_cpu() takedown_cpu() has a comment about "all preempt/rcu users must observe !cpu_active()" which is kind of meaningless in this function. This comment was originally introduced by commit 6acce3ef8452 ("sched: Remove get_online_cpus() usage") when _cpu_down() was setting cpu_active_mask and synchronize_rcu()/synchronize_sched() were added after that. Later commit 40190a78f85f ("sched/hotplug: Convert cpu_[in]active notifiers to state machine") added a new CPUHP_AP_ACTIVE hotplug state to set/clear cpu_active_mask. The following commit b2454caa8977 ("sched/hotplug: Move sync_rcu to be with set_cpu_active(false)") move the synchronize_*() calls to sched_cpu_deactivate() associated with the new hotplug state, but left the comment behind. Remove this comment as it is no longer relevant in takedown_cpu(). Signed-off-by: Waiman Long Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250729191232.664931-1-longman@redhat.com --- kernel/cpu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index faf0f23fc5d8..db9f6c539b28 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1309,9 +1309,6 @@ static int takedown_cpu(unsigned int cpu) */ irq_lock_sparse(); - /* - * So now all preempt/rcu users must observe !cpu_active(). - */ err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu)); if (err) { /* CPU refused to die */ From 928587381b54b1b6c62736486b1dc6cb16c568c2 Mon Sep 17 00:00:00 2001 From: Xaver Hugl Date: Fri, 1 Aug 2025 00:49:51 +0200 Subject: [PATCH 1672/2411] amdgpu/amdgpu_discovery: increase timeout limit for IFWI init With a timeout of only 1 second, my rx 5700XT fails to initialize, so this increases the timeout to 2s. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3697 Signed-off-by: Xaver Hugl Signed-off-by: Alex Deucher (cherry picked from commit 9ed3d7bdf2dcdf1a1196630fab89a124526e9cc2) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 81b3443c8d7f..047a63496441 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -276,7 +276,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, u32 msg; if (!amdgpu_sriov_vf(adev)) { - /* It can take up to a second for IFWI init to complete on some dGPUs, + /* It can take up to two second for IFWI init to complete on some dGPUs, * but generally it should be in the 60-100ms range. Normally this starts * as soon as the device gets power so by the time the OS loads this has long * completed. However, when a card is hotplugged via e.g., USB4, we need to @@ -284,7 +284,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, * continue. */ - for (i = 0; i < 1000; i++) { + for (i = 0; i < 2000; i++) { msg = RREG32(mmMP0_SMN_C2PMSG_33); if (msg & 0x80000000) break; From 2e58401a24e7b2d4ec619104e1a76590c1284a4c Mon Sep 17 00:00:00 2001 From: Amber Lin Date: Thu, 31 Jul 2025 20:45:00 -0400 Subject: [PATCH 1673/2411] drm/amdkfd: Destroy KFD debugfs after destroy KFD wq Since KFD proc content was moved to kernel debugfs, we can't destroy KFD debugfs before kfd_process_destroy_wq. Move kfd_process_destroy_wq prior to kfd_debugfs_fini to fix a kernel NULL pointer problem. It happens when /sys/kernel/debug/kfd was already destroyed in kfd_debugfs_fini but kfd_process_destroy_wq calls kfd_debugfs_remove_process. This line debugfs_remove_recursive(entry->proc_dentry); tries to remove /sys/kernel/debug/kfd/proc/ while /sys/kernel/debug/kfd is already gone. It hangs the kernel by kernel NULL pointer. Signed-off-by: Amber Lin Reviewed-by: Eric Huang Signed-off-by: Alex Deucher (cherry picked from commit 0333052d90683d88531558dcfdbf2525cc37c233) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index aee2212e52f6..33aa23450b3f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -78,8 +78,8 @@ static int kfd_init(void) static void kfd_exit(void) { kfd_cleanup_processes(); - kfd_debugfs_fini(); kfd_process_destroy_wq(); + kfd_debugfs_fini(); kfd_procfs_shutdown(); kfd_topology_shutdown(); kfd_chardev_exit(); From 514678da56da089b756b4d433efd964fa22b2079 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Jul 2025 11:16:05 -0400 Subject: [PATCH 1674/2411] drm/amdgpu/discovery: fix fw based ip discovery We only need the fw based discovery table for sysfs. No need to parse it. Additionally parsing some of the board specific tables may result in incorrect data on some boards. just load the binary and don't parse it on those boards. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4441 Fixes: 80a0e8282933 ("drm/amdgpu/discovery: optionally use fw based ip discovery") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Alex Deucher (cherry picked from commit 62eedd150fa11aefc2d377fc746633fdb1baeb55) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 72 ++++++++++--------- 2 files changed, 41 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a43ce3404849..aa223f6d4b59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2570,9 +2570,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) adev->firmware.gpu_info_fw = NULL; - if (adev->mman.discovery_bin) - return 0; - switch (adev->asic_type) { default: return 0; @@ -2594,6 +2591,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) chip_name = "arcturus"; break; case CHIP_NAVI12: + if (adev->mman.discovery_bin) + return 0; chip_name = "navi12"; break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 047a63496441..efe0058b48ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -2555,40 +2555,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_RAVEN: - case CHIP_VEGA20: - case CHIP_ARCTURUS: - case CHIP_ALDEBARAN: - /* this is not fatal. We have a fallback below - * if the new firmwares are not present. some of - * this will be overridden below to keep things - * consistent with the current behavior. + /* This is not fatal. We only need the discovery + * binary for sysfs. We don't need it for a + * functional system. */ - r = amdgpu_discovery_reg_base_init(adev); - if (!r) { - amdgpu_discovery_harvest_ip(adev); - amdgpu_discovery_get_gfx_info(adev); - amdgpu_discovery_get_mall_info(adev); - amdgpu_discovery_get_vcn_info(adev); - } - break; - default: - r = amdgpu_discovery_reg_base_init(adev); - if (r) { - drm_err(&adev->ddev, "discovery failed: %d\n", r); - return r; - } - - amdgpu_discovery_harvest_ip(adev); - amdgpu_discovery_get_gfx_info(adev); - amdgpu_discovery_get_mall_info(adev); - amdgpu_discovery_get_vcn_info(adev); - break; - } - - switch (adev->asic_type) { - case CHIP_VEGA10: + amdgpu_discovery_init(adev); vega10_reg_base_init(adev); adev->sdma.num_instances = 2; adev->gmc.num_umc = 4; @@ -2611,6 +2582,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 0); break; case CHIP_VEGA12: + /* This is not fatal. We only need the discovery + * binary for sysfs. We don't need it for a + * functional system. + */ + amdgpu_discovery_init(adev); vega10_reg_base_init(adev); adev->sdma.num_instances = 2; adev->gmc.num_umc = 4; @@ -2633,6 +2609,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 1); break; case CHIP_RAVEN: + /* This is not fatal. We only need the discovery + * binary for sysfs. We don't need it for a + * functional system. + */ + amdgpu_discovery_init(adev); vega10_reg_base_init(adev); adev->sdma.num_instances = 1; adev->vcn.num_vcn_inst = 1; @@ -2674,6 +2655,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) } break; case CHIP_VEGA20: + /* This is not fatal. We only need the discovery + * binary for sysfs. We don't need it for a + * functional system. + */ + amdgpu_discovery_init(adev); vega20_reg_base_init(adev); adev->sdma.num_instances = 2; adev->gmc.num_umc = 8; @@ -2697,6 +2683,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 1, 0); break; case CHIP_ARCTURUS: + /* This is not fatal. We only need the discovery + * binary for sysfs. We don't need it for a + * functional system. + */ + amdgpu_discovery_init(adev); arct_reg_base_init(adev); adev->sdma.num_instances = 8; adev->vcn.num_vcn_inst = 2; @@ -2725,6 +2716,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 5, 0); break; case CHIP_ALDEBARAN: + /* This is not fatal. We only need the discovery + * binary for sysfs. We don't need it for a + * functional system. + */ + amdgpu_discovery_init(adev); aldebaran_reg_base_init(adev); adev->sdma.num_instances = 5; adev->vcn.num_vcn_inst = 2; @@ -2751,6 +2747,16 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0); break; default: + r = amdgpu_discovery_reg_base_init(adev); + if (r) { + drm_err(&adev->ddev, "discovery failed: %d\n", r); + return r; + } + + amdgpu_discovery_harvest_ip(adev); + amdgpu_discovery_get_gfx_info(adev); + amdgpu_discovery_get_mall_info(adev); + amdgpu_discovery_get_vcn_info(adev); break; } From 81699fe81b0be287fb28b6210324db48e8458d9f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 4 Aug 2025 11:40:20 -0400 Subject: [PATCH 1675/2411] drm/amdgpu: add missing vram lost check for LEGACY RESET Legacy resets reset the memory controllers so VRAM contents may be unreliable after reset. Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit aae94897b6661a2a4b1de2d328090fc388b3e0af) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index aa223f6d4b59..e3022b396e30 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3270,6 +3270,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) * always assumed to be lost. */ switch (amdgpu_asic_reset_method(adev)) { + case AMD_RESET_METHOD_LEGACY: case AMD_RESET_METHOD_LINK: case AMD_RESET_METHOD_BACO: case AMD_RESET_METHOD_MODE1: From 234d1eff5d4987024be9d40ac07b918a5ae8db1a Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 4 Aug 2025 11:26:25 +0200 Subject: [PATCH 1676/2411] xfrm: restore GSO for SW crypto Commit 49431af6c4ef incorrectly assumes that the GSO path is only used by HW offload, but it's also useful for SW crypto. This patch re-enables GSO for SW crypto. It's not an exact revert to preserve the other changes made to xfrm_dev_offload_ok afterwards, but it reverts all of its effects. Fixes: 49431af6c4ef ("xfrm: rely on XFRM offload") Signed-off-by: Sabrina Dubroca Reviewed-by: Leon Romanovsky Reviewed-by: Zhu Yanjun Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_device.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index d2819baea414..1f88472aaac0 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -415,10 +415,12 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) struct net_device *dev = x->xso.dev; bool check_tunnel_size; - if (x->xso.type == XFRM_DEV_OFFLOAD_UNSPECIFIED) + if (!x->type_offload || + (x->xso.type == XFRM_DEV_OFFLOAD_UNSPECIFIED && x->encap)) return false; - if ((dev == xfrm_dst_path(dst)->dev) && !xdst->child->xfrm) { + if ((!dev || dev == xfrm_dst_path(dst)->dev) && + !xdst->child->xfrm) { mtu = xfrm_state_mtu(x, xdst->child_mtu_cached); if (skb->len <= mtu) goto ok; @@ -430,6 +432,9 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) return false; ok: + if (!dev) + return true; + check_tunnel_size = x->xso.type == XFRM_DEV_OFFLOAD_PACKET && x->props.mode == XFRM_MODE_TUNNEL; switch (x->props.family) { From 65f079a6c446a939eefe71e6d5957d5d6365fcf9 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 4 Aug 2025 11:26:26 +0200 Subject: [PATCH 1677/2411] xfrm: bring back device check in validate_xmit_xfrm This is partial revert of commit d53dda291bbd993a29b84d358d282076e3d01506. This change causes traffic using GSO with SW crypto running through a NIC capable of HW offload to no longer get segmented during validate_xmit_xfrm, and is unrelated to the bonding use case mentioned in the commit. Fixes: d53dda291bbd ("xfrm: Remove unneeded device check from validate_xmit_xfrm") Signed-off-by: Sabrina Dubroca Reviewed-by: Cosmin Ratiu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 1f88472aaac0..c7a1f080d2de 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -155,7 +155,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur return skb; } - if (skb_is_gso(skb) && unlikely(xmit_xfrm_check_overflow(skb))) { + if (skb_is_gso(skb) && (unlikely(x->xso.dev != dev) || + unlikely(xmit_xfrm_check_overflow(skb)))) { struct sk_buff *segs; /* Packet got rerouted, fixup features and segment it. */ From 1118aaa3b35157777890fffab91d8c1da841b20b Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 4 Aug 2025 11:26:27 +0200 Subject: [PATCH 1678/2411] udp: also consider secpath when evaluating ipsec use for checksumming Commit b40c5f4fde22 ("udp: disable inner UDP checksum offloads in IPsec case") tried to fix checksumming in UFO when the packets are going through IPsec, so that we can't rely on offloads because the UDP header and payload will be encrypted. But when doing a TCP test over VXLAN going through IPsec transport mode with GSO enabled (esp4_offload module loaded), I'm seeing broken UDP checksums on the encap after successful decryption. The skbs get to udp4_ufo_fragment/__skb_udp_tunnel_segment via __dev_queue_xmit -> validate_xmit_skb -> skb_gso_segment and at this point we've already dropped the dst (unless the device sets IFF_XMIT_DST_RELEASE, which is not common), so need_ipsec is false and we proceed with checksum offload. Make need_ipsec also check the secpath, which is not dropped on this callpath. Fixes: b40c5f4fde22 ("udp: disable inner UDP checksum offloads in IPsec case") Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/ipv4/udp_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 5128e2a5b00a..b1f3fd302e9d 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -217,7 +217,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM); skb->remcsum_offload = remcsum; - need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb)); + need_ipsec = (skb_dst(skb) && dst_xfrm(skb_dst(skb))) || skb_sec_path(skb); /* Try to offload checksum if possible */ offload_csum = !!(need_csum && !need_ipsec && From 6bd05db76751c872970c63be41c97172cfbec4c1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Aug 2025 21:25:37 +0200 Subject: [PATCH 1679/2411] ALSA: hda/realtek: Restrict prompt only for CONFIG_EXPERT The split of Realtek HD-audio codec driver may cause confusions especially when migrating from the previous kernel configurations because it's hard to know which driver to be enabled. Although we've already set default=y for those codec drivers, it may still make people changing the stuff unnecessarily without knowing its side effect. This patch is for avoiding such pitfalls by marking the prompt of each Realtek codec driver with CONFIG_EXPERT. For "normal" users (that is, unless CONFIG_EXPERT is set), all Realtek HD-audio codecs are enabled together with CONFIG_SND_HDA_CODEC_REALTEK; this is the very same situation like the previous kernels, after all. For users who really care about the minimalistic configuration, they can turn each driver on/off individually after setting CONFIG_EXPERT=y. The patch also adds the missing help text to the top-level CONFIG_SND_HDA_CODEC_REALTEK together with the explanation of individual choices, too. Fixes: aeeb85f26c3b ("ALSA: hda: Split Realtek HD-audio codec driver") Link: https://lore.kernel.org/10172c80-daec-4e20-ab57-a483cf1afc02@molgen.mpg.de Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250806192541.21949-2-tiwai@suse.de --- sound/hda/codecs/realtek/Kconfig | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/sound/hda/codecs/realtek/Kconfig b/sound/hda/codecs/realtek/Kconfig index 20899f3fc051..cdc6d9509a01 100644 --- a/sound/hda/codecs/realtek/Kconfig +++ b/sound/hda/codecs/realtek/Kconfig @@ -2,6 +2,12 @@ menuconfig SND_HDA_CODEC_REALTEK tristate "Realtek HD-audio codec support" + help + Say Y or M here to include Realtek HD-audio codec support. + + This will enable all Realtek HD-audio codec drivers as default, + but you can enable/disable each codec driver individually, too + (only when CONFIG_EXPERT is set). if SND_HDA_CODEC_REALTEK @@ -12,7 +18,7 @@ config SND_HDA_CODEC_REALTEK_LIB select SND_HDA_SCODEC_COMPONENT config SND_HDA_CODEC_ALC260 - tristate "Build Realtek ALC260 HD-audio codec support" + tristate "Build Realtek ALC260 HD-audio codec support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -20,7 +26,7 @@ config SND_HDA_CODEC_ALC260 Say Y or M here to include Realtek ALC260 HD-audio codec support config SND_HDA_CODEC_ALC262 - tristate "Build Realtek ALC262 HD-audio codec support" + tristate "Build Realtek ALC262 HD-audio codec support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -28,7 +34,7 @@ config SND_HDA_CODEC_ALC262 Say Y or M here to include Realtek ALC262 HD-audio codec support config SND_HDA_CODEC_ALC268 - tristate "Build Realtek ALC268 HD-audio codec support" + tristate "Build Realtek ALC268 HD-audio codec support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -37,7 +43,7 @@ config SND_HDA_CODEC_ALC268 codec support config SND_HDA_CODEC_ALC269 - tristate "Build Realtek ALC269 HD-audio codecs support" + tristate "Build Realtek ALC269 HD-audio codecs support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -46,7 +52,7 @@ config SND_HDA_CODEC_ALC269 codec support config SND_HDA_CODEC_ALC662 - tristate "Build Realtek ALC662 HD-audio codecs support" + tristate "Build Realtek ALC662 HD-audio codecs support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -55,7 +61,7 @@ config SND_HDA_CODEC_ALC662 codec support config SND_HDA_CODEC_ALC680 - tristate "Build Realtek ALC680 HD-audio codecs support" + tristate "Build Realtek ALC680 HD-audio codecs support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -63,7 +69,7 @@ config SND_HDA_CODEC_ALC680 Say Y or M here to include Realtek ALC680 HD-audio codec support config SND_HDA_CODEC_ALC861 - tristate "Build Realtek ALC861 HD-audio codecs support" + tristate "Build Realtek ALC861 HD-audio codecs support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -71,7 +77,7 @@ config SND_HDA_CODEC_ALC861 Say Y or M here to include Realtek ALC861 HD-audio codec support config SND_HDA_CODEC_ALC861VD - tristate "Build Realtek ALC861-VD HD-audio codecs support" + tristate "Build Realtek ALC861-VD HD-audio codecs support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -79,7 +85,7 @@ config SND_HDA_CODEC_ALC861VD Say Y or M here to include Realtek ALC861-VD HD-audio codec support config SND_HDA_CODEC_ALC880 - tristate "Build Realtek ALC880 HD-audio codecs support" + tristate "Build Realtek ALC880 HD-audio codecs support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -87,7 +93,7 @@ config SND_HDA_CODEC_ALC880 Say Y or M here to include Realtek ALC880 HD-audio codec support config SND_HDA_CODEC_ALC882 - tristate "Build Realtek ALC882 HD-audio codecs support" + tristate "Build Realtek ALC882 HD-audio codecs support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -96,5 +102,3 @@ config SND_HDA_CODEC_ALC882 codec support endif - - From 606fcab9aa212ba7d99a259663411d665070e317 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Aug 2025 21:25:38 +0200 Subject: [PATCH 1680/2411] ALSA: hda/hdmi: Restrict prompt only for CONFIG_EXPERT The split of HDMI codec driver may confuse users when migrating from the previous kernel configs and leave some drivers disabled unexpectedly. Although we've already set y to all HDMI codec drivers as default, it's still safer to paper over the wrong choices. This patch marks the prompt of each HDMI codec driver with CONFIG_EXPERT, so that they are all enabled when the top-level CONFIG_SND_HDA_CODEC_HDMI is set. For users who really care about the minimalistic configuration, they can turn each driver on/off individually after setting CONFIG_EXPERT=y. The patch also adds the missing help text to the top-level CONFIG_SND_HDA_CODEC_HDMI together with the explanation of individual choices, too. Fixes: 73cd0490819d ("ALSA: hda/hdmi: Split vendor codec drivers") Link: https://lore.kernel.org/10172c80-daec-4e20-ab57-a483cf1afc02@molgen.mpg.de Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250806192541.21949-3-tiwai@suse.de --- sound/hda/codecs/hdmi/Kconfig | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/sound/hda/codecs/hdmi/Kconfig b/sound/hda/codecs/hdmi/Kconfig index 973ca4ca077b..6ea3553ba9f8 100644 --- a/sound/hda/codecs/hdmi/Kconfig +++ b/sound/hda/codecs/hdmi/Kconfig @@ -2,11 +2,17 @@ menuconfig SND_HDA_CODEC_HDMI tristate "HD-audio HDMI codec support" + help + Say Y or M here to include HD-audio HDMI/DislayPort codec support. + + This will enable all HDMI/DP codec drivers as default, but you can + enable/disable each codec driver individually, too (only when + CONFIG_EXPERT is set). if SND_HDA_CODEC_HDMI config SND_HDA_CODEC_HDMI_GENERIC - tristate "Generic HDMI/DisplayPort HD-audio codec support" + tristate "Generic HDMI/DisplayPort HD-audio codec support" if EXPERT select SND_DYNAMIC_MINORS select SND_PCM_ELD default y @@ -18,14 +24,14 @@ config SND_HDA_CODEC_HDMI_GENERIC to assure the multiple streams for DP-MST support. config SND_HDA_CODEC_HDMI_SIMPLE - tristate "Simple HDMI/DisplayPort HD-audio codec support" + tristate "Simple HDMI/DisplayPort HD-audio codec support" if EXPERT default y help Say Y or M here to include Simple HDMI and DisplayPort HD-audio codec support for VIA and other codecs. config SND_HDA_CODEC_HDMI_INTEL - tristate "Intel HDMI/DisplayPort HD-audio codec support" + tristate "Intel HDMI/DisplayPort HD-audio codec support" if EXPERT select SND_HDA_CODEC_HDMI_GENERIC default y help @@ -48,7 +54,7 @@ config SND_HDA_INTEL_HDMI_SILENT_STREAM are kept reserved both at transmitter and receiver. config SND_HDA_CODEC_HDMI_ATI - tristate "AMD/ATI HDMI/DisplayPort HD-audio codec support" + tristate "AMD/ATI HDMI/DisplayPort HD-audio codec support" if EXPERT select SND_HDA_CODEC_HDMI_GENERIC default y help @@ -56,7 +62,7 @@ config SND_HDA_CODEC_HDMI_ATI HD-audio codec support. config SND_HDA_CODEC_HDMI_NVIDIA - tristate "Nvidia HDMI/DisplayPort HD-audio codec support" + tristate "Nvidia HDMI/DisplayPort HD-audio codec support" if EXPERT select SND_HDA_CODEC_HDMI_GENERIC default y help @@ -64,7 +70,7 @@ config SND_HDA_CODEC_HDMI_NVIDIA support for the recent Nvidia graphics cards. config SND_HDA_CODEC_HDMI_NVIDIA_MCP - tristate "Legacy Nvidia HDMI/DisplayPort HD-audio codec support" + tristate "Legacy Nvidia HDMI/DisplayPort HD-audio codec support" if EXPERT select SND_HDA_CODEC_HDMI_SIMPLE default y help @@ -72,7 +78,7 @@ config SND_HDA_CODEC_HDMI_NVIDIA_MCP support for the legacy Nvidia graphics like MCP73, MCP67, MCP77/78. config SND_HDA_CODEC_HDMI_TEGRA - tristate "Nvidia Tegra HDMI/DisplayPort HD-audio codec support" + tristate "Nvidia Tegra HDMI/DisplayPort HD-audio codec support" if EXPERT select SND_HDA_CODEC_HDMI_GENERIC default y help From e8e4f3c242cc26de9d69bd8b3a678d1e50980abe Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Aug 2025 21:25:39 +0200 Subject: [PATCH 1681/2411] ALSA: hda/cirrus: Restrict prompt only for CONFIG_EXPERT The split of Cirrus HD-audio codec driver may confuse users when migrating from the previous kernel configs and leave the needed drivers disabled. Although we've already set y as default, it's still safer to paper over the wrong choices. This patch marks the prompt of split CS420x and CS421x codec drivers with CONFIG_EXPERT, so that they are all enabled when the top-level CONFIG_SND_HDA_CODEC_CIRRUS is set. For users who really care about the minimalistic configuration, they can turn each driver on/off individually after setting CONFIG_EXPERT=y. This patch adds the missing help text to the top-level CONFIG_SND_HDA_CIRRUS_CODEC together with the explanation of individual choices, and corrects the help texts that don't fit well nowadays, too. Fixes: 1cb8744a36c7 ("ALSA: hda/cirrus: Split to cs420x and cs421x drivers") Link: https://lore.kernel.org/10172c80-daec-4e20-ab57-a483cf1afc02@molgen.mpg.de Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250806192541.21949-4-tiwai@suse.de --- sound/hda/codecs/cirrus/Kconfig | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/sound/hda/codecs/cirrus/Kconfig b/sound/hda/codecs/cirrus/Kconfig index 33cfe52713bc..ec6cbcaf64f0 100644 --- a/sound/hda/codecs/cirrus/Kconfig +++ b/sound/hda/codecs/cirrus/Kconfig @@ -2,27 +2,31 @@ menuconfig SND_HDA_CODEC_CIRRUS tristate "Cirrus Logic HD-audio codec support" + help + Say Y or M here to include Cirrus Logic HD-audio codec support. + + This will enable both CS420x and CS421x HD-audio codec drivers + as default, but you can enable/disable each codec driver + individually, too (only when CONFIG_EXPERT is set). if SND_HDA_CODEC_CIRRUS config SND_HDA_CODEC_CS420X - tristate "Build Cirrus Logic CS420x codec support" + tristate "Build Cirrus Logic CS420x codec support" if EXPERT select SND_HDA_GENERIC default y help - Say Y or M here to include Cirrus Logic CS420x codec support in - snd-hda-intel driver + Say Y or M here to include Cirrus Logic CS420x codec support comment "Set to Y if you want auto-loading the codec driver" depends on SND_HDA=y && SND_HDA_CODEC_CS420X=m config SND_HDA_CODEC_CS421X - tristate "Build Cirrus Logic CS421x codec support" + tristate "Build Cirrus Logic CS421x codec support" if EXPERT select SND_HDA_GENERIC default y help - Say Y or M here to include Cirrus Logic CS421x codec support in - snd-hda-intel driver + Say Y or M here to include Cirrus Logic CS421x codec support comment "Set to Y if you want auto-loading the codec driver" depends on SND_HDA=y && SND_HDA_CODEC_CS421X=m @@ -31,8 +35,8 @@ config SND_HDA_CODEC_CS8409 tristate "Build Cirrus Logic HDA bridge support" select SND_HDA_GENERIC help - Say Y or M here to include Cirrus Logic HDA bridge support in - snd-hda-intel driver, such as CS8409. + Say Y or M here to include Cirrus Logic HDA bridge support + such as CS8409. comment "Set to Y if you want auto-loading the codec driver" depends on SND_HDA=y && SND_HDA_CODEC_CS8409=m From 397a46c9aa3343e8efe6847bdaa124945bab1de4 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 25 Jul 2025 09:46:50 +0200 Subject: [PATCH 1682/2411] gpio: remove legacy GPIO line value setter callbacks With no more users of the legacy GPIO line value setters - .set() and .set_multiple() - we can now remove them from the kernel. Link: https://lore.kernel.org/r/20250725074651.14002-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 27 ++++++--------------------- include/linux/gpio/driver.h | 7 ------- 2 files changed, 6 insertions(+), 28 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index a93d2a9355e2..9ac4c23d656a 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1037,11 +1037,6 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, int base = 0; int ret; - /* Only allow one set() and one set_multiple(). */ - if ((gc->set && gc->set_rv) || - (gc->set_multiple && gc->set_multiple_rv)) - return -EINVAL; - /* * First: allocate and populate the internal stat container, and * set up the struct device. @@ -2891,19 +2886,14 @@ static int gpiochip_set(struct gpio_chip *gc, unsigned int offset, int value) lockdep_assert_held(&gc->gpiodev->srcu); - if (WARN_ON(unlikely(!gc->set && !gc->set_rv))) + if (WARN_ON(unlikely(!gc->set_rv))) return -EOPNOTSUPP; - if (gc->set_rv) { - ret = gc->set_rv(gc, offset, value); - if (ret > 0) - ret = -EBADE; + ret = gc->set_rv(gc, offset, value); + if (ret > 0) + ret = -EBADE; - return ret; - } - - gc->set(gc, offset, value); - return 0; + return ret; } static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value) @@ -2919,7 +2909,7 @@ static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value) * output-only, but if there is then not even a .set() operation it * is pretty tricky to drive the output line. */ - if (!guard.gc->set && !guard.gc->set_rv && !guard.gc->direction_output) { + if (!guard.gc->set_rv && !guard.gc->direction_output) { gpiod_warn(desc, "%s: missing set() and direction_output() operations\n", __func__); @@ -3673,11 +3663,6 @@ static int gpiochip_set_multiple(struct gpio_chip *gc, return ret; } - if (gc->set_multiple) { - gc->set_multiple(gc, mask, bits); - return 0; - } - /* set outputs if the corresponding mask bit is set */ for_each_set_bit(i, mask, gc->ngpio) { ret = gpiochip_set(gc, i, test_bit(i, bits)); diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 4b984e8f8fcd..90567dde7d8e 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -347,8 +347,6 @@ struct gpio_irq_chip { * @get: returns value for signal "offset", 0=low, 1=high, or negative error * @get_multiple: reads values for multiple signals defined by "mask" and * stores them in "bits", returns 0 on success or negative error - * @set: **DEPRECATED** - please use set_rv() instead - * @set_multiple: **DEPRECATED** - please use set_multiple_rv() instead * @set_rv: assigns output value for signal "offset", returns 0 on success or * negative error value * @set_multiple_rv: assigns output values for multiple signals defined by @@ -445,11 +443,6 @@ struct gpio_chip { int (*get_multiple)(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits); - void (*set)(struct gpio_chip *gc, - unsigned int offset, int value); - void (*set_multiple)(struct gpio_chip *gc, - unsigned long *mask, - unsigned long *bits); int (*set_rv)(struct gpio_chip *gc, unsigned int offset, int value); From d9d87d90cc0b10cd56ae353f50b11417e7d21712 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 17 Jul 2025 15:21:26 +0200 Subject: [PATCH 1683/2411] treewide: rename GPIO set callbacks back to their original names The conversion of all GPIO drivers to using the .set_rv() and .set_multiple_rv() callbacks from struct gpio_chip (which - unlike their predecessors - return an integer and allow the controller drivers to indicate failures to users) is now complete and the legacy ones have been removed. Rename the new callbacks back to their original names in one sweeping change. Signed-off-by: Bartosz Golaszewski --- arch/arm/common/sa1111.c | 4 ++-- arch/arm/common/scoop.c | 2 +- arch/arm/mach-s3c/gpio-samsung.c | 2 +- arch/arm/mach-sa1100/assabet.c | 2 +- arch/arm/mach-sa1100/neponset.c | 2 +- arch/arm/plat-orion/gpio.c | 2 +- arch/m68k/coldfire/gpio.c | 2 +- arch/mips/alchemy/common/gpiolib.c | 6 ++--- arch/mips/bcm63xx/gpio.c | 2 +- arch/mips/kernel/gpio_txx9.c | 2 +- arch/mips/rb532/gpio.c | 2 +- arch/mips/txx9/generic/setup.c | 2 +- arch/powerpc/platforms/44x/gpio.c | 2 +- arch/powerpc/platforms/52xx/mpc52xx_gpt.c | 2 +- .../powerpc/platforms/83xx/mcu_mpc8349emitx.c | 2 +- arch/powerpc/platforms/8xx/cpm1.c | 4 ++-- arch/powerpc/sysdev/cpm_common.c | 2 +- drivers/bcma/driver_gpio.c | 2 +- drivers/gpio/gpio-74x164.c | 4 ++-- drivers/gpio/gpio-adnp.c | 2 +- drivers/gpio/gpio-adp5520.c | 2 +- drivers/gpio/gpio-adp5585.c | 2 +- drivers/gpio/gpio-aggregator.c | 4 ++-- drivers/gpio/gpio-altera-a10sr.c | 2 +- drivers/gpio/gpio-altera.c | 2 +- drivers/gpio/gpio-amd-fch.c | 2 +- drivers/gpio/gpio-amd8111.c | 2 +- drivers/gpio/gpio-arizona.c | 2 +- drivers/gpio/gpio-aspeed-sgpio.c | 2 +- drivers/gpio/gpio-aspeed.c | 2 +- drivers/gpio/gpio-bcm-kona.c | 2 +- drivers/gpio/gpio-bd71815.c | 2 +- drivers/gpio/gpio-bd71828.c | 2 +- drivers/gpio/gpio-bd9571mwv.c | 2 +- drivers/gpio/gpio-bt8xx.c | 2 +- drivers/gpio/gpio-cgbc.c | 2 +- drivers/gpio/gpio-creg-snps.c | 2 +- drivers/gpio/gpio-cros-ec.c | 2 +- drivers/gpio/gpio-crystalcove.c | 2 +- drivers/gpio/gpio-cs5535.c | 2 +- drivers/gpio/gpio-da9052.c | 2 +- drivers/gpio/gpio-da9055.c | 2 +- drivers/gpio/gpio-davinci.c | 2 +- drivers/gpio/gpio-dln2.c | 2 +- drivers/gpio/gpio-eic-sprd.c | 2 +- drivers/gpio/gpio-em.c | 2 +- drivers/gpio/gpio-exar.c | 2 +- drivers/gpio/gpio-f7188x.c | 2 +- drivers/gpio/gpio-graniterapids.c | 2 +- drivers/gpio/gpio-gw-pld.c | 2 +- drivers/gpio/gpio-htc-egpio.c | 2 +- drivers/gpio/gpio-ich.c | 2 +- drivers/gpio/gpio-imx-scu.c | 2 +- drivers/gpio/gpio-it87.c | 2 +- drivers/gpio/gpio-janz-ttl.c | 2 +- drivers/gpio/gpio-kempld.c | 2 +- drivers/gpio/gpio-latch.c | 4 ++-- drivers/gpio/gpio-ljca.c | 2 +- drivers/gpio/gpio-logicvc.c | 2 +- drivers/gpio/gpio-loongson-64bit.c | 2 +- drivers/gpio/gpio-loongson.c | 2 +- drivers/gpio/gpio-lp3943.c | 2 +- drivers/gpio/gpio-lp873x.c | 2 +- drivers/gpio/gpio-lp87565.c | 2 +- drivers/gpio/gpio-lpc18xx.c | 2 +- drivers/gpio/gpio-lpc32xx.c | 10 ++++---- drivers/gpio/gpio-macsmc.c | 2 +- drivers/gpio/gpio-madera.c | 2 +- drivers/gpio/gpio-max730x.c | 2 +- drivers/gpio/gpio-max732x.c | 4 ++-- drivers/gpio/gpio-max77620.c | 2 +- drivers/gpio/gpio-max77650.c | 2 +- drivers/gpio/gpio-max77759.c | 2 +- drivers/gpio/gpio-mb86s7x.c | 2 +- drivers/gpio/gpio-mc33880.c | 2 +- drivers/gpio/gpio-ml-ioh.c | 2 +- drivers/gpio/gpio-mm-lantiq.c | 2 +- drivers/gpio/gpio-mmio.c | 24 +++++++++---------- drivers/gpio/gpio-mockup.c | 4 ++-- drivers/gpio/gpio-moxtet.c | 2 +- drivers/gpio/gpio-mpc5200.c | 4 ++-- drivers/gpio/gpio-mpfs.c | 2 +- drivers/gpio/gpio-mpsse.c | 4 ++-- drivers/gpio/gpio-msc313.c | 2 +- drivers/gpio/gpio-mvebu.c | 2 +- drivers/gpio/gpio-nomadik.c | 2 +- drivers/gpio/gpio-npcm-sgpio.c | 4 ++-- drivers/gpio/gpio-octeon.c | 2 +- drivers/gpio/gpio-omap.c | 4 ++-- drivers/gpio/gpio-palmas.c | 2 +- drivers/gpio/gpio-pca953x.c | 4 ++-- drivers/gpio/gpio-pca9570.c | 2 +- drivers/gpio/gpio-pcf857x.c | 4 ++-- drivers/gpio/gpio-pch.c | 2 +- drivers/gpio/gpio-pl061.c | 2 +- drivers/gpio/gpio-pxa.c | 2 +- drivers/gpio/gpio-raspberrypi-exp.c | 2 +- drivers/gpio/gpio-rc5t583.c | 2 +- drivers/gpio/gpio-rcar.c | 4 ++-- drivers/gpio/gpio-rdc321x.c | 2 +- drivers/gpio/gpio-reg.c | 6 ++--- drivers/gpio/gpio-regmap.c | 4 ++-- drivers/gpio/gpio-rockchip.c | 2 +- drivers/gpio/gpio-rtd.c | 2 +- drivers/gpio/gpio-sa1100.c | 2 +- drivers/gpio/gpio-sama5d2-piobu.c | 2 +- drivers/gpio/gpio-sch.c | 2 +- drivers/gpio/gpio-sch311x.c | 2 +- drivers/gpio/gpio-sim.c | 4 ++-- drivers/gpio/gpio-siox.c | 2 +- drivers/gpio/gpio-spear-spics.c | 2 +- drivers/gpio/gpio-sprd.c | 2 +- drivers/gpio/gpio-stmpe.c | 2 +- drivers/gpio/gpio-stp-xway.c | 2 +- drivers/gpio/gpio-syscon.c | 4 ++-- drivers/gpio/gpio-tangier.c | 2 +- drivers/gpio/gpio-tc3589x.c | 2 +- drivers/gpio/gpio-tegra.c | 2 +- drivers/gpio/gpio-tegra186.c | 2 +- drivers/gpio/gpio-thunderx.c | 4 ++-- drivers/gpio/gpio-timberdale.c | 2 +- drivers/gpio/gpio-tpic2810.c | 4 ++-- drivers/gpio/gpio-tps65086.c | 2 +- drivers/gpio/gpio-tps65218.c | 2 +- drivers/gpio/gpio-tps65219.c | 4 ++-- drivers/gpio/gpio-tps6586x.c | 2 +- drivers/gpio/gpio-tps65910.c | 2 +- drivers/gpio/gpio-tps65912.c | 2 +- drivers/gpio/gpio-tps68470.c | 2 +- drivers/gpio/gpio-tqmx86.c | 2 +- drivers/gpio/gpio-ts4900.c | 2 +- drivers/gpio/gpio-ts5500.c | 2 +- drivers/gpio/gpio-twl4030.c | 2 +- drivers/gpio/gpio-twl6040.c | 2 +- drivers/gpio/gpio-uniphier.c | 4 ++-- drivers/gpio/gpio-viperboard.c | 4 ++-- drivers/gpio/gpio-virtio.c | 2 +- drivers/gpio/gpio-vx855.c | 2 +- drivers/gpio/gpio-wcd934x.c | 2 +- drivers/gpio/gpio-wcove.c | 2 +- drivers/gpio/gpio-winbond.c | 2 +- drivers/gpio/gpio-wm831x.c | 2 +- drivers/gpio/gpio-wm8350.c | 2 +- drivers/gpio/gpio-wm8994.c | 2 +- drivers/gpio/gpio-xgene.c | 2 +- drivers/gpio/gpio-xilinx.c | 4 ++-- drivers/gpio/gpio-xlp.c | 2 +- drivers/gpio/gpio-xra1403.c | 2 +- drivers/gpio/gpio-xtensa.c | 2 +- drivers/gpio/gpio-zevio.c | 2 +- drivers/gpio/gpio-zynq.c | 2 +- drivers/gpio/gpio-zynqmp-modepin.c | 2 +- drivers/gpio/gpiolib.c | 10 ++++---- drivers/gpu/drm/bridge/ti-sn65dsi86.c | 2 +- drivers/hid/hid-cp2112.c | 2 +- drivers/hid/hid-mcp2200.c | 4 ++-- drivers/hid/hid-mcp2221.c | 2 +- drivers/hwmon/ltc2992.c | 4 ++-- drivers/hwmon/pmbus/ucd9000.c | 2 +- drivers/i2c/muxes/i2c-mux-ltc4306.c | 2 +- drivers/iio/adc/ad4130.c | 2 +- drivers/iio/adc/ad4170-4.c | 2 +- drivers/iio/adc/ad7768-1.c | 2 +- drivers/iio/adc/rohm-bd79124.c | 4 ++-- drivers/iio/adc/ti-ads7950.c | 2 +- drivers/iio/addac/ad74115.c | 2 +- drivers/iio/addac/ad74413r.c | 4 ++-- drivers/iio/dac/ad5592r-base.c | 2 +- drivers/input/keyboard/adp5588-keys.c | 2 +- drivers/input/touchscreen/ad7879.c | 2 +- drivers/leds/blink/leds-lgm-sso.c | 2 +- drivers/leds/leds-pca9532.c | 2 +- drivers/leds/leds-pca955x.c | 2 +- drivers/leds/leds-tca6507.c | 2 +- drivers/media/dvb-frontends/cxd2820r_core.c | 2 +- drivers/media/i2c/ds90ub913.c | 2 +- drivers/media/i2c/ds90ub953.c | 2 +- drivers/media/i2c/max9286.c | 2 +- drivers/media/i2c/max96717.c | 2 +- drivers/media/pci/solo6x10/solo6x10-gpio.c | 2 +- drivers/mfd/sm501.c | 2 +- drivers/mfd/tps65010.c | 2 +- drivers/mfd/ucb1x00-core.c | 2 +- .../misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c | 2 +- drivers/misc/ti_fpc202.c | 2 +- drivers/net/can/spi/mcp251x.c | 4 ++-- drivers/net/dsa/mt7530.c | 2 +- drivers/net/dsa/vitesse-vsc73xx-core.c | 2 +- drivers/net/phy/qcom/qca807x.c | 2 +- drivers/pinctrl/actions/pinctrl-owl.c | 2 +- drivers/pinctrl/bcm/pinctrl-bcm2835.c | 4 ++-- drivers/pinctrl/bcm/pinctrl-iproc-gpio.c | 2 +- drivers/pinctrl/bcm/pinctrl-nsp-gpio.c | 2 +- drivers/pinctrl/cirrus/pinctrl-cs42l43.c | 2 +- drivers/pinctrl/cirrus/pinctrl-lochnagar.c | 2 +- drivers/pinctrl/intel/pinctrl-baytrail.c | 2 +- drivers/pinctrl/intel/pinctrl-cherryview.c | 2 +- drivers/pinctrl/intel/pinctrl-intel.c | 2 +- drivers/pinctrl/intel/pinctrl-lynxpoint.c | 2 +- drivers/pinctrl/mediatek/pinctrl-airoha.c | 2 +- drivers/pinctrl/mediatek/pinctrl-moore.c | 2 +- drivers/pinctrl/mediatek/pinctrl-mtk-common.c | 2 +- drivers/pinctrl/mediatek/pinctrl-paris.c | 2 +- drivers/pinctrl/meson/pinctrl-amlogic-a4.c | 2 +- drivers/pinctrl/meson/pinctrl-meson.c | 2 +- drivers/pinctrl/mvebu/pinctrl-armada-37xx.c | 2 +- drivers/pinctrl/nomadik/pinctrl-abx500.c | 2 +- drivers/pinctrl/nuvoton/pinctrl-ma35.c | 2 +- drivers/pinctrl/pinctrl-amd.c | 2 +- drivers/pinctrl/pinctrl-amdisp.c | 2 +- drivers/pinctrl/pinctrl-apple-gpio.c | 2 +- drivers/pinctrl/pinctrl-as3722.c | 2 +- drivers/pinctrl/pinctrl-at91-pio4.c | 4 ++-- drivers/pinctrl/pinctrl-at91.c | 4 ++-- drivers/pinctrl/pinctrl-aw9523.c | 4 ++-- drivers/pinctrl/pinctrl-axp209.c | 4 ++-- drivers/pinctrl/pinctrl-cy8c95x0.c | 4 ++-- drivers/pinctrl/pinctrl-da9062.c | 2 +- drivers/pinctrl/pinctrl-digicolor.c | 2 +- drivers/pinctrl/pinctrl-ingenic.c | 2 +- drivers/pinctrl/pinctrl-keembay.c | 2 +- drivers/pinctrl/pinctrl-mcp23s08.c | 4 ++-- drivers/pinctrl/pinctrl-microchip-sgpio.c | 2 +- drivers/pinctrl/pinctrl-ocelot.c | 2 +- drivers/pinctrl/pinctrl-pic32.c | 2 +- drivers/pinctrl/pinctrl-pistachio.c | 2 +- drivers/pinctrl/pinctrl-rk805.c | 2 +- drivers/pinctrl/pinctrl-rp1.c | 2 +- drivers/pinctrl/pinctrl-st.c | 2 +- drivers/pinctrl/pinctrl-stmfx.c | 2 +- drivers/pinctrl/pinctrl-sx150x.c | 4 ++-- drivers/pinctrl/pinctrl-xway.c | 2 +- drivers/pinctrl/qcom/pinctrl-lpass-lpi.c | 2 +- drivers/pinctrl/qcom/pinctrl-msm.c | 2 +- drivers/pinctrl/qcom/pinctrl-spmi-gpio.c | 2 +- drivers/pinctrl/qcom/pinctrl-spmi-mpp.c | 2 +- drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c | 2 +- drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c | 2 +- drivers/pinctrl/renesas/gpio.c | 2 +- drivers/pinctrl/renesas/pinctrl-rza1.c | 2 +- drivers/pinctrl/renesas/pinctrl-rza2.c | 2 +- drivers/pinctrl/renesas/pinctrl-rzg2l.c | 2 +- drivers/pinctrl/renesas/pinctrl-rzv2m.c | 2 +- drivers/pinctrl/samsung/pinctrl-samsung.c | 2 +- drivers/pinctrl/spear/pinctrl-plgpio.c | 2 +- .../starfive/pinctrl-starfive-jh7100.c | 2 +- .../starfive/pinctrl-starfive-jh7110.c | 2 +- drivers/pinctrl/stm32/pinctrl-stm32.c | 2 +- drivers/pinctrl/sunplus/sppctl.c | 2 +- drivers/pinctrl/sunxi/pinctrl-sunxi.c | 2 +- drivers/pinctrl/vt8500/pinctrl-wmt.c | 2 +- .../platform/cznic/turris-omnia-mcu-gpio.c | 4 ++-- drivers/platform/x86/barco-p50-gpio.c | 2 +- drivers/platform/x86/intel/int0002_vgpio.c | 2 +- drivers/platform/x86/portwell-ec.c | 4 ++-- drivers/platform/x86/silicom-platform.c | 2 +- drivers/pwm/pwm-pca9685.c | 2 +- .../regulator/rpi-panel-attiny-regulator.c | 2 +- drivers/soc/fsl/qe/gpio.c | 4 ++-- drivers/soc/renesas/pwc-rzv2m.c | 2 +- drivers/spi/spi-xcomm.c | 2 +- drivers/ssb/driver_gpio.c | 4 ++-- drivers/staging/greybus/gpio.c | 2 +- drivers/tty/serial/max310x.c | 2 +- drivers/tty/serial/sc16is7xx.c | 2 +- drivers/usb/serial/cp210x.c | 2 +- drivers/usb/serial/ftdi_sio.c | 4 ++-- drivers/video/fbdev/via/via-gpio.c | 2 +- include/linux/gpio/driver.h | 19 +++++++-------- include/linux/gpio/generic.h | 4 ++-- .../codecs/side-codecs/cirrus_scodec_test.c | 2 +- sound/soc/codecs/idt821034.c | 2 +- sound/soc/codecs/peb2466.c | 2 +- sound/soc/codecs/rt5677.c | 2 +- sound/soc/codecs/tlv320adc3xxx.c | 2 +- sound/soc/codecs/wm5100.c | 2 +- sound/soc/codecs/wm8903.c | 2 +- sound/soc/codecs/wm8962.c | 2 +- sound/soc/codecs/wm8996.c | 2 +- sound/soc/codecs/zl38060.c | 2 +- sound/soc/soc-ac97.c | 2 +- sound/soc/ti/davinci-mcasp.c | 2 +- 282 files changed, 355 insertions(+), 356 deletions(-) diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index d7e2ea27ce59..3389a70e4d49 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -617,8 +617,8 @@ static int sa1111_setup_gpios(struct sa1111 *sachip) sachip->gc.direction_input = sa1111_gpio_direction_input; sachip->gc.direction_output = sa1111_gpio_direction_output; sachip->gc.get = sa1111_gpio_get; - sachip->gc.set_rv = sa1111_gpio_set; - sachip->gc.set_multiple_rv = sa1111_gpio_set_multiple; + sachip->gc.set = sa1111_gpio_set; + sachip->gc.set_multiple = sa1111_gpio_set_multiple; sachip->gc.to_irq = sa1111_gpio_to_irq; sachip->gc.base = -1; sachip->gc.ngpio = 18; diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c index 2d3ee76c8e17..dddb73c96826 100644 --- a/arch/arm/common/scoop.c +++ b/arch/arm/common/scoop.c @@ -218,7 +218,7 @@ static int scoop_probe(struct platform_device *pdev) devptr->gpio.label = dev_name(&pdev->dev); devptr->gpio.base = inf->gpio_base; devptr->gpio.ngpio = 12; /* PA11 = 0, PA12 = 1, etc. up to PA22 = 11 */ - devptr->gpio.set_rv = scoop_gpio_set; + devptr->gpio.set = scoop_gpio_set; devptr->gpio.get = scoop_gpio_get; devptr->gpio.direction_input = scoop_gpio_direction_input; devptr->gpio.direction_output = scoop_gpio_direction_output; diff --git a/arch/arm/mach-s3c/gpio-samsung.c b/arch/arm/mach-s3c/gpio-samsung.c index 206a492fbaf5..81e198e5a6d3 100644 --- a/arch/arm/mach-s3c/gpio-samsung.c +++ b/arch/arm/mach-s3c/gpio-samsung.c @@ -517,7 +517,7 @@ static void __init samsung_gpiolib_add(struct samsung_gpio_chip *chip) if (!gc->direction_output) gc->direction_output = samsung_gpiolib_2bit_output; if (!gc->set) - gc->set_rv = samsung_gpiolib_set; + gc->set = samsung_gpiolib_set; if (!gc->get) gc->get = samsung_gpiolib_get; diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c index bad8aa661e9d..2b833aa0212b 100644 --- a/arch/arm/mach-sa1100/assabet.c +++ b/arch/arm/mach-sa1100/assabet.c @@ -80,7 +80,7 @@ void ASSABET_BCR_frob(unsigned int mask, unsigned int val) { unsigned long m = mask, v = val; - assabet_bcr_gc->set_multiple_rv(assabet_bcr_gc, &m, &v); + assabet_bcr_gc->set_multiple(assabet_bcr_gc, &m, &v); } EXPORT_SYMBOL(ASSABET_BCR_frob); diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c index 6516598c8a71..88fe79f0a4ed 100644 --- a/arch/arm/mach-sa1100/neponset.c +++ b/arch/arm/mach-sa1100/neponset.c @@ -126,7 +126,7 @@ void neponset_ncr_frob(unsigned int mask, unsigned int val) unsigned long m = mask, v = val; if (nep) - n->gpio[0]->set_multiple_rv(n->gpio[0], &m, &v); + n->gpio[0]->set_multiple(n->gpio[0], &m, &v); else WARN(1, "nep unset\n"); } diff --git a/arch/arm/plat-orion/gpio.c b/arch/arm/plat-orion/gpio.c index 6f09f65e3d95..49e29b7894a3 100644 --- a/arch/arm/plat-orion/gpio.c +++ b/arch/arm/plat-orion/gpio.c @@ -540,7 +540,7 @@ void __init orion_gpio_init(int gpio_base, int ngpio, ochip->chip.direction_input = orion_gpio_direction_input; ochip->chip.get = orion_gpio_get; ochip->chip.direction_output = orion_gpio_direction_output; - ochip->chip.set_rv = orion_gpio_set; + ochip->chip.set = orion_gpio_set; ochip->chip.to_irq = orion_gpio_to_irq; ochip->chip.base = gpio_base; ochip->chip.ngpio = ngpio; diff --git a/arch/m68k/coldfire/gpio.c b/arch/m68k/coldfire/gpio.c index 30e5a4ed799d..e2f7af1facb2 100644 --- a/arch/m68k/coldfire/gpio.c +++ b/arch/m68k/coldfire/gpio.c @@ -160,7 +160,7 @@ static struct gpio_chip mcfgpio_chip = { .direction_input = mcfgpio_direction_input, .direction_output = mcfgpio_direction_output, .get = mcfgpio_get_value, - .set_rv = mcfgpio_set_value, + .set = mcfgpio_set_value, .to_irq = mcfgpio_to_irq, .base = 0, .ngpio = MCFGPIO_PIN_MAX, diff --git a/arch/mips/alchemy/common/gpiolib.c b/arch/mips/alchemy/common/gpiolib.c index 194034eba75f..e79e26ffac99 100644 --- a/arch/mips/alchemy/common/gpiolib.c +++ b/arch/mips/alchemy/common/gpiolib.c @@ -101,7 +101,7 @@ struct gpio_chip alchemy_gpio_chip[] = { .direction_input = gpio1_direction_input, .direction_output = gpio1_direction_output, .get = gpio1_get, - .set_rv = gpio1_set, + .set = gpio1_set, .to_irq = gpio1_to_irq, .base = ALCHEMY_GPIO1_BASE, .ngpio = ALCHEMY_GPIO1_NUM, @@ -111,7 +111,7 @@ struct gpio_chip alchemy_gpio_chip[] = { .direction_input = gpio2_direction_input, .direction_output = gpio2_direction_output, .get = gpio2_get, - .set_rv = gpio2_set, + .set = gpio2_set, .to_irq = gpio2_to_irq, .base = ALCHEMY_GPIO2_BASE, .ngpio = ALCHEMY_GPIO2_NUM, @@ -151,7 +151,7 @@ static struct gpio_chip au1300_gpiochip = { .direction_input = alchemy_gpic_dir_input, .direction_output = alchemy_gpic_dir_output, .get = alchemy_gpic_get, - .set_rv = alchemy_gpic_set, + .set = alchemy_gpic_set, .to_irq = alchemy_gpic_gpio_to_irq, .base = AU1300_GPIO_BASE, .ngpio = AU1300_GPIO_NUM, diff --git a/arch/mips/bcm63xx/gpio.c b/arch/mips/bcm63xx/gpio.c index e7a53cd0dec5..ff45a6989c3a 100644 --- a/arch/mips/bcm63xx/gpio.c +++ b/arch/mips/bcm63xx/gpio.c @@ -131,7 +131,7 @@ static struct gpio_chip bcm63xx_gpio_chip = { .direction_input = bcm63xx_gpio_direction_input, .direction_output = bcm63xx_gpio_direction_output, .get = bcm63xx_gpio_get, - .set_rv = bcm63xx_gpio_set, + .set = bcm63xx_gpio_set, .base = 0, }; diff --git a/arch/mips/kernel/gpio_txx9.c b/arch/mips/kernel/gpio_txx9.c index 027fb57d0d79..96ac40d20c23 100644 --- a/arch/mips/kernel/gpio_txx9.c +++ b/arch/mips/kernel/gpio_txx9.c @@ -70,7 +70,7 @@ static int txx9_gpio_dir_out(struct gpio_chip *chip, unsigned int offset, static struct gpio_chip txx9_gpio_chip = { .get = txx9_gpio_get, - .set_rv = txx9_gpio_set, + .set = txx9_gpio_set, .direction_input = txx9_gpio_dir_in, .direction_output = txx9_gpio_dir_out, .label = "TXx9", diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c index 0e47cd59b6cb..9aa5ef374465 100644 --- a/arch/mips/rb532/gpio.c +++ b/arch/mips/rb532/gpio.c @@ -164,7 +164,7 @@ static struct rb532_gpio_chip rb532_gpio_chip[] = { .direction_input = rb532_gpio_direction_input, .direction_output = rb532_gpio_direction_output, .get = rb532_gpio_get, - .set_rv = rb532_gpio_set, + .set = rb532_gpio_set, .to_irq = rb532_gpio_to_irq, .base = 0, .ngpio = 32, diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index 5a37e8b234a3..5dc867ea2c69 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -655,7 +655,7 @@ void __init txx9_iocled_init(unsigned long baseaddr, if (!iocled->mmioaddr) goto out_free; iocled->chip.get = txx9_iocled_get; - iocled->chip.set_rv = txx9_iocled_set; + iocled->chip.set = txx9_iocled_set; iocled->chip.direction_input = txx9_iocled_dir_in; iocled->chip.direction_output = txx9_iocled_dir_out; iocled->chip.label = "iocled"; diff --git a/arch/powerpc/platforms/44x/gpio.c b/arch/powerpc/platforms/44x/gpio.c index d540e261d85a..08ab76582568 100644 --- a/arch/powerpc/platforms/44x/gpio.c +++ b/arch/powerpc/platforms/44x/gpio.c @@ -180,7 +180,7 @@ static int __init ppc4xx_add_gpiochips(void) gc->direction_input = ppc4xx_gpio_dir_in; gc->direction_output = ppc4xx_gpio_dir_out; gc->get = ppc4xx_gpio_get; - gc->set_rv = ppc4xx_gpio_set; + gc->set = ppc4xx_gpio_set; ret = of_mm_gpiochip_add_data(np, mm_gc, ppc4xx_gc); if (ret) diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index bda707d848a6..7748b6641a3c 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -336,7 +336,7 @@ static void mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt) gpt->gc.direction_input = mpc52xx_gpt_gpio_dir_in; gpt->gc.direction_output = mpc52xx_gpt_gpio_dir_out; gpt->gc.get = mpc52xx_gpt_gpio_get; - gpt->gc.set_rv = mpc52xx_gpt_gpio_set; + gpt->gc.set = mpc52xx_gpt_gpio_set; gpt->gc.base = -1; gpt->gc.parent = gpt->dev; diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index 6e37dfc6c5c9..cb7b9498f291 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -126,7 +126,7 @@ static int mcu_gpiochip_add(struct mcu *mcu) gc->can_sleep = 1; gc->ngpio = MCU_NUM_GPIO; gc->base = -1; - gc->set_rv = mcu_gpio_set; + gc->set = mcu_gpio_set; gc->direction_output = mcu_gpio_dir_out; gc->parent = dev; diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index 7462c221115c..7433be7d66ee 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -499,7 +499,7 @@ int cpm1_gpiochip_add16(struct device *dev) gc->direction_input = cpm1_gpio16_dir_in; gc->direction_output = cpm1_gpio16_dir_out; gc->get = cpm1_gpio16_get; - gc->set_rv = cpm1_gpio16_set; + gc->set = cpm1_gpio16_set; gc->to_irq = cpm1_gpio16_to_irq; gc->parent = dev; gc->owner = THIS_MODULE; @@ -622,7 +622,7 @@ int cpm1_gpiochip_add32(struct device *dev) gc->direction_input = cpm1_gpio32_dir_in; gc->direction_output = cpm1_gpio32_dir_out; gc->get = cpm1_gpio32_get; - gc->set_rv = cpm1_gpio32_set; + gc->set = cpm1_gpio32_set; gc->parent = dev; gc->owner = THIS_MODULE; diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index e22fc638dbc7..f469f6a9f6e0 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -210,7 +210,7 @@ int cpm2_gpiochip_add32(struct device *dev) gc->direction_input = cpm2_gpio32_dir_in; gc->direction_output = cpm2_gpio32_dir_out; gc->get = cpm2_gpio32_get; - gc->set_rv = cpm2_gpio32_set; + gc->set = cpm2_gpio32_set; gc->parent = dev; gc->owner = THIS_MODULE; diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c index f021e27644e0..658c7e2ac8bf 100644 --- a/drivers/bcma/driver_gpio.c +++ b/drivers/bcma/driver_gpio.c @@ -186,7 +186,7 @@ int bcma_gpio_init(struct bcma_drv_cc *cc) chip->request = bcma_gpio_request; chip->free = bcma_gpio_free; chip->get = bcma_gpio_get_value; - chip->set_rv = bcma_gpio_set_value; + chip->set = bcma_gpio_set_value; chip->direction_input = bcma_gpio_direction_input; chip->direction_output = bcma_gpio_direction_output; chip->parent = bus->dev; diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c index 4dd5c2c330bb..c226524efeba 100644 --- a/drivers/gpio/gpio-74x164.c +++ b/drivers/gpio/gpio-74x164.c @@ -141,8 +141,8 @@ static int gen_74x164_probe(struct spi_device *spi) chip->gpio_chip.label = spi->modalias; chip->gpio_chip.direction_output = gen_74x164_direction_output; chip->gpio_chip.get = gen_74x164_get_value; - chip->gpio_chip.set_rv = gen_74x164_set_value; - chip->gpio_chip.set_multiple_rv = gen_74x164_set_multiple; + chip->gpio_chip.set = gen_74x164_set_value; + chip->gpio_chip.set_multiple = gen_74x164_set_multiple; chip->gpio_chip.base = -1; chip->gpio_chip.ngpio = GEN_74X164_NUMBER_GPIOS * chip->registers; chip->gpio_chip.can_sleep = true; diff --git a/drivers/gpio/gpio-adnp.c b/drivers/gpio/gpio-adnp.c index dc2b941c3726..e5ac2d211013 100644 --- a/drivers/gpio/gpio-adnp.c +++ b/drivers/gpio/gpio-adnp.c @@ -430,7 +430,7 @@ static int adnp_gpio_setup(struct adnp *adnp, unsigned int num_gpios, chip->direction_input = adnp_gpio_direction_input; chip->direction_output = adnp_gpio_direction_output; chip->get = adnp_gpio_get; - chip->set_rv = adnp_gpio_set; + chip->set = adnp_gpio_set; chip->can_sleep = true; if (IS_ENABLED(CONFIG_DEBUG_FS)) diff --git a/drivers/gpio/gpio-adp5520.c b/drivers/gpio/gpio-adp5520.c index 57d12c10cbda..6305c8b7dc05 100644 --- a/drivers/gpio/gpio-adp5520.c +++ b/drivers/gpio/gpio-adp5520.c @@ -122,7 +122,7 @@ static int adp5520_gpio_probe(struct platform_device *pdev) gc->direction_input = adp5520_gpio_direction_input; gc->direction_output = adp5520_gpio_direction_output; gc->get = adp5520_gpio_get_value; - gc->set_rv = adp5520_gpio_set_value; + gc->set = adp5520_gpio_set_value; gc->can_sleep = true; gc->base = pdata->gpio_start; diff --git a/drivers/gpio/gpio-adp5585.c b/drivers/gpio/gpio-adp5585.c index b2c8836c5f84..0fd3cc26d017 100644 --- a/drivers/gpio/gpio-adp5585.c +++ b/drivers/gpio/gpio-adp5585.c @@ -428,7 +428,7 @@ static int adp5585_gpio_probe(struct platform_device *pdev) gc->direction_input = adp5585_gpio_direction_input; gc->direction_output = adp5585_gpio_direction_output; gc->get = adp5585_gpio_get_value; - gc->set_rv = adp5585_gpio_set_value; + gc->set = adp5585_gpio_set_value; gc->set_config = adp5585_gpio_set_config; gc->request = adp5585_gpio_request; gc->free = adp5585_gpio_free; diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c index 6f941db02c04..af9d8b3a711d 100644 --- a/drivers/gpio/gpio-aggregator.c +++ b/drivers/gpio/gpio-aggregator.c @@ -534,8 +534,8 @@ static struct gpiochip_fwd *gpiochip_fwd_create(struct device *dev, chip->direction_output = gpio_fwd_direction_output; chip->get = gpio_fwd_get; chip->get_multiple = gpio_fwd_get_multiple_locked; - chip->set_rv = gpio_fwd_set; - chip->set_multiple_rv = gpio_fwd_set_multiple_locked; + chip->set = gpio_fwd_set; + chip->set_multiple = gpio_fwd_set_multiple_locked; chip->to_irq = gpio_fwd_to_irq; chip->base = -1; chip->ngpio = ngpios; diff --git a/drivers/gpio/gpio-altera-a10sr.c b/drivers/gpio/gpio-altera-a10sr.c index 77a674cf99e4..4524c18a87e7 100644 --- a/drivers/gpio/gpio-altera-a10sr.c +++ b/drivers/gpio/gpio-altera-a10sr.c @@ -69,7 +69,7 @@ static const struct gpio_chip altr_a10sr_gc = { .label = "altr_a10sr_gpio", .owner = THIS_MODULE, .get = altr_a10sr_gpio_get, - .set_rv = altr_a10sr_gpio_set, + .set = altr_a10sr_gpio_set, .direction_input = altr_a10sr_gpio_direction_input, .direction_output = altr_a10sr_gpio_direction_output, .can_sleep = true, diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c index 1b28525726d7..9508d764cce4 100644 --- a/drivers/gpio/gpio-altera.c +++ b/drivers/gpio/gpio-altera.c @@ -259,7 +259,7 @@ static int altera_gpio_probe(struct platform_device *pdev) altera_gc->gc.direction_input = altera_gpio_direction_input; altera_gc->gc.direction_output = altera_gpio_direction_output; altera_gc->gc.get = altera_gpio_get; - altera_gc->gc.set_rv = altera_gpio_set; + altera_gc->gc.set = altera_gpio_set; altera_gc->gc.owner = THIS_MODULE; altera_gc->gc.parent = &pdev->dev; altera_gc->gc.base = -1; diff --git a/drivers/gpio/gpio-amd-fch.c b/drivers/gpio/gpio-amd-fch.c index f8d0cea46049..e6c6c3ec7656 100644 --- a/drivers/gpio/gpio-amd-fch.c +++ b/drivers/gpio/gpio-amd-fch.c @@ -165,7 +165,7 @@ static int amd_fch_gpio_probe(struct platform_device *pdev) priv->gc.direction_output = amd_fch_gpio_direction_output; priv->gc.get_direction = amd_fch_gpio_get_direction; priv->gc.get = amd_fch_gpio_get; - priv->gc.set_rv = amd_fch_gpio_set; + priv->gc.set = amd_fch_gpio_set; spin_lock_init(&priv->lock); diff --git a/drivers/gpio/gpio-amd8111.c b/drivers/gpio/gpio-amd8111.c index 425d8472f744..15fd5e210d74 100644 --- a/drivers/gpio/gpio-amd8111.c +++ b/drivers/gpio/gpio-amd8111.c @@ -165,7 +165,7 @@ static struct amd_gpio gp = { .ngpio = 32, .request = amd_gpio_request, .free = amd_gpio_free, - .set_rv = amd_gpio_set, + .set = amd_gpio_set, .get = amd_gpio_get, .direction_output = amd_gpio_dirout, .direction_input = amd_gpio_dirin, diff --git a/drivers/gpio/gpio-arizona.c b/drivers/gpio/gpio-arizona.c index 89ffde693019..a7e98d395d8e 100644 --- a/drivers/gpio/gpio-arizona.c +++ b/drivers/gpio/gpio-arizona.c @@ -138,7 +138,7 @@ static const struct gpio_chip template_chip = { .direction_input = arizona_gpio_direction_in, .get = arizona_gpio_get, .direction_output = arizona_gpio_direction_out, - .set_rv = arizona_gpio_set, + .set = arizona_gpio_set, .can_sleep = true, }; diff --git a/drivers/gpio/gpio-aspeed-sgpio.c b/drivers/gpio/gpio-aspeed-sgpio.c index 00b31497ecff..7622f9e9f54a 100644 --- a/drivers/gpio/gpio-aspeed-sgpio.c +++ b/drivers/gpio/gpio-aspeed-sgpio.c @@ -596,7 +596,7 @@ static int __init aspeed_sgpio_probe(struct platform_device *pdev) gpio->chip.request = NULL; gpio->chip.free = NULL; gpio->chip.get = aspeed_sgpio_get; - gpio->chip.set_rv = aspeed_sgpio_set; + gpio->chip.set = aspeed_sgpio_set; gpio->chip.set_config = aspeed_sgpio_set_config; gpio->chip.label = dev_name(&pdev->dev); gpio->chip.base = -1; diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c index 2d340a343a17..7953a9c4e36d 100644 --- a/drivers/gpio/gpio-aspeed.c +++ b/drivers/gpio/gpio-aspeed.c @@ -1352,7 +1352,7 @@ static int aspeed_gpio_probe(struct platform_device *pdev) gpio->chip.request = aspeed_gpio_request; gpio->chip.free = aspeed_gpio_free; gpio->chip.get = aspeed_gpio_get; - gpio->chip.set_rv = aspeed_gpio_set; + gpio->chip.set = aspeed_gpio_set; gpio->chip.set_config = aspeed_gpio_set_config; gpio->chip.label = dev_name(&pdev->dev); gpio->chip.base = -1; diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index 8f22cb36004d..208b71c59d58 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -339,7 +339,7 @@ static const struct gpio_chip template_chip = { .direction_input = bcm_kona_gpio_direction_input, .get = bcm_kona_gpio_get, .direction_output = bcm_kona_gpio_direction_output, - .set_rv = bcm_kona_gpio_set, + .set = bcm_kona_gpio_set, .set_config = bcm_kona_gpio_set_config, .to_irq = bcm_kona_gpio_to_irq, .base = 0, diff --git a/drivers/gpio/gpio-bd71815.c b/drivers/gpio/gpio-bd71815.c index 36701500925e..afb18a5a9d79 100644 --- a/drivers/gpio/gpio-bd71815.c +++ b/drivers/gpio/gpio-bd71815.c @@ -85,7 +85,7 @@ static const struct gpio_chip bd71815gpo_chip = { .owner = THIS_MODULE, .get = bd71815gpo_get, .get_direction = bd71815gpo_direction_get, - .set_rv = bd71815gpo_set, + .set = bd71815gpo_set, .set_config = bd71815_gpio_set_config, .can_sleep = true, }; diff --git a/drivers/gpio/gpio-bd71828.c b/drivers/gpio/gpio-bd71828.c index 4ba151e5cf25..e439dbfffc62 100644 --- a/drivers/gpio/gpio-bd71828.c +++ b/drivers/gpio/gpio-bd71828.c @@ -109,7 +109,7 @@ static int bd71828_probe(struct platform_device *pdev) bdgpio->gpio.set_config = bd71828_gpio_set_config; bdgpio->gpio.can_sleep = true; bdgpio->gpio.get = bd71828_gpio_get; - bdgpio->gpio.set_rv = bd71828_gpio_set; + bdgpio->gpio.set = bd71828_gpio_set; bdgpio->gpio.base = -1; /* diff --git a/drivers/gpio/gpio-bd9571mwv.c b/drivers/gpio/gpio-bd9571mwv.c index 8df1361e3e84..7c95bb36511e 100644 --- a/drivers/gpio/gpio-bd9571mwv.c +++ b/drivers/gpio/gpio-bd9571mwv.c @@ -88,7 +88,7 @@ static const struct gpio_chip template_chip = { .direction_input = bd9571mwv_gpio_direction_input, .direction_output = bd9571mwv_gpio_direction_output, .get = bd9571mwv_gpio_get, - .set_rv = bd9571mwv_gpio_set, + .set = bd9571mwv_gpio_set, .base = -1, .ngpio = 2, .can_sleep = true, diff --git a/drivers/gpio/gpio-bt8xx.c b/drivers/gpio/gpio-bt8xx.c index 7c9e81fea37a..05401da03ca3 100644 --- a/drivers/gpio/gpio-bt8xx.c +++ b/drivers/gpio/gpio-bt8xx.c @@ -145,7 +145,7 @@ static void bt8xxgpio_gpio_setup(struct bt8xxgpio *bg) c->direction_input = bt8xxgpio_gpio_direction_input; c->get = bt8xxgpio_gpio_get; c->direction_output = bt8xxgpio_gpio_direction_output; - c->set_rv = bt8xxgpio_gpio_set; + c->set = bt8xxgpio_gpio_set; c->dbg_show = NULL; c->base = modparam_gpiobase; c->ngpio = BT8XXGPIO_NR_GPIOS; diff --git a/drivers/gpio/gpio-cgbc.c b/drivers/gpio/gpio-cgbc.c index 1495bec62456..0efa1b61001a 100644 --- a/drivers/gpio/gpio-cgbc.c +++ b/drivers/gpio/gpio-cgbc.c @@ -171,7 +171,7 @@ static int cgbc_gpio_probe(struct platform_device *pdev) chip->direction_output = cgbc_gpio_direction_output; chip->get_direction = cgbc_gpio_get_direction; chip->get = cgbc_gpio_get; - chip->set_rv = cgbc_gpio_set; + chip->set = cgbc_gpio_set; chip->ngpio = CGBC_GPIO_NGPIO; ret = devm_mutex_init(dev, &gpio->lock); diff --git a/drivers/gpio/gpio-creg-snps.c b/drivers/gpio/gpio-creg-snps.c index 8b49f02c7896..f8ea961fa1de 100644 --- a/drivers/gpio/gpio-creg-snps.c +++ b/drivers/gpio/gpio-creg-snps.c @@ -167,7 +167,7 @@ static int creg_gpio_probe(struct platform_device *pdev) hcg->gc.label = dev_name(dev); hcg->gc.base = -1; hcg->gc.ngpio = ngpios; - hcg->gc.set_rv = creg_gpio_set; + hcg->gc.set = creg_gpio_set; hcg->gc.direction_output = creg_gpio_dir_out; ret = devm_gpiochip_add_data(dev, &hcg->gc, hcg); diff --git a/drivers/gpio/gpio-cros-ec.c b/drivers/gpio/gpio-cros-ec.c index 53cd5ff6247b..435483826c6e 100644 --- a/drivers/gpio/gpio-cros-ec.c +++ b/drivers/gpio/gpio-cros-ec.c @@ -188,7 +188,7 @@ static int cros_ec_gpio_probe(struct platform_device *pdev) gc->can_sleep = true; gc->label = dev_name(dev); gc->base = -1; - gc->set_rv = cros_ec_gpio_set; + gc->set = cros_ec_gpio_set; gc->get = cros_ec_gpio_get; gc->get_direction = cros_ec_gpio_get_direction; diff --git a/drivers/gpio/gpio-crystalcove.c b/drivers/gpio/gpio-crystalcove.c index 8db7cca3a060..0fb5c06d0886 100644 --- a/drivers/gpio/gpio-crystalcove.c +++ b/drivers/gpio/gpio-crystalcove.c @@ -349,7 +349,7 @@ static int crystalcove_gpio_probe(struct platform_device *pdev) cg->chip.direction_input = crystalcove_gpio_dir_in; cg->chip.direction_output = crystalcove_gpio_dir_out; cg->chip.get = crystalcove_gpio_get; - cg->chip.set_rv = crystalcove_gpio_set; + cg->chip.set = crystalcove_gpio_set; cg->chip.base = -1; cg->chip.ngpio = CRYSTALCOVE_VGPIO_NUM; cg->chip.can_sleep = true; diff --git a/drivers/gpio/gpio-cs5535.c b/drivers/gpio/gpio-cs5535.c index 143d1f4173a6..8affe4e9f90e 100644 --- a/drivers/gpio/gpio-cs5535.c +++ b/drivers/gpio/gpio-cs5535.c @@ -296,7 +296,7 @@ static struct cs5535_gpio_chip cs5535_gpio_chip = { .request = chip_gpio_request, .get = chip_gpio_get, - .set_rv = chip_gpio_set, + .set = chip_gpio_set, .direction_input = chip_direction_input, .direction_output = chip_direction_output, diff --git a/drivers/gpio/gpio-da9052.c b/drivers/gpio/gpio-da9052.c index 6482c5b267db..495f0ee58505 100644 --- a/drivers/gpio/gpio-da9052.c +++ b/drivers/gpio/gpio-da9052.c @@ -172,7 +172,7 @@ static const struct gpio_chip reference_gp = { .label = "da9052-gpio", .owner = THIS_MODULE, .get = da9052_gpio_get, - .set_rv = da9052_gpio_set, + .set = da9052_gpio_set, .direction_input = da9052_gpio_direction_input, .direction_output = da9052_gpio_direction_output, .to_irq = da9052_gpio_to_irq, diff --git a/drivers/gpio/gpio-da9055.c b/drivers/gpio/gpio-da9055.c index 3d9d0c700100..a09bd6eb93cf 100644 --- a/drivers/gpio/gpio-da9055.c +++ b/drivers/gpio/gpio-da9055.c @@ -116,7 +116,7 @@ static const struct gpio_chip reference_gp = { .label = "da9055-gpio", .owner = THIS_MODULE, .get = da9055_gpio_get, - .set_rv = da9055_gpio_set, + .set = da9055_gpio_set, .direction_input = da9055_gpio_direction_input, .direction_output = da9055_gpio_direction_output, .to_irq = da9055_gpio_to_irq, diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c index 8f3a36d0191d..538f27209ce7 100644 --- a/drivers/gpio/gpio-davinci.c +++ b/drivers/gpio/gpio-davinci.c @@ -202,7 +202,7 @@ static int davinci_gpio_probe(struct platform_device *pdev) chips->chip.direction_input = davinci_direction_in; chips->chip.get = davinci_gpio_get; chips->chip.direction_output = davinci_direction_out; - chips->chip.set_rv = davinci_gpio_set; + chips->chip.set = davinci_gpio_set; chips->chip.ngpio = ngpio; chips->chip.base = -1; diff --git a/drivers/gpio/gpio-dln2.c b/drivers/gpio/gpio-dln2.c index 4bd3c47eaf93..4670ffd7ea7f 100644 --- a/drivers/gpio/gpio-dln2.c +++ b/drivers/gpio/gpio-dln2.c @@ -469,7 +469,7 @@ static int dln2_gpio_probe(struct platform_device *pdev) dln2->gpio.base = -1; dln2->gpio.ngpio = pins; dln2->gpio.can_sleep = true; - dln2->gpio.set_rv = dln2_gpio_set; + dln2->gpio.set = dln2_gpio_set; dln2->gpio.get = dln2_gpio_get; dln2->gpio.request = dln2_gpio_request; dln2->gpio.free = dln2_gpio_free; diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c index f2973d0b7138..50fafeda8d7e 100644 --- a/drivers/gpio/gpio-eic-sprd.c +++ b/drivers/gpio/gpio-eic-sprd.c @@ -663,7 +663,7 @@ static int sprd_eic_probe(struct platform_device *pdev) sprd_eic->chip.request = sprd_eic_request; sprd_eic->chip.free = sprd_eic_free; sprd_eic->chip.set_config = sprd_eic_set_config; - sprd_eic->chip.set_rv = sprd_eic_set; + sprd_eic->chip.set = sprd_eic_set; fallthrough; case SPRD_EIC_ASYNC: case SPRD_EIC_SYNC: diff --git a/drivers/gpio/gpio-em.c b/drivers/gpio/gpio-em.c index 015f1ac32dd9..a214b0672726 100644 --- a/drivers/gpio/gpio-em.c +++ b/drivers/gpio/gpio-em.c @@ -306,7 +306,7 @@ static int em_gio_probe(struct platform_device *pdev) gpio_chip->direction_input = em_gio_direction_input; gpio_chip->get = em_gio_get; gpio_chip->direction_output = em_gio_direction_output; - gpio_chip->set_rv = em_gio_set; + gpio_chip->set = em_gio_set; gpio_chip->to_irq = em_gio_to_irq; gpio_chip->request = pinctrl_gpio_request; gpio_chip->free = em_gio_free; diff --git a/drivers/gpio/gpio-exar.c b/drivers/gpio/gpio-exar.c index beb98286d13e..9053662f1817 100644 --- a/drivers/gpio/gpio-exar.c +++ b/drivers/gpio/gpio-exar.c @@ -211,7 +211,7 @@ static int gpio_exar_probe(struct platform_device *pdev) exar_gpio->gpio_chip.direction_input = exar_direction_input; exar_gpio->gpio_chip.get_direction = exar_get_direction; exar_gpio->gpio_chip.get = exar_get_value; - exar_gpio->gpio_chip.set_rv = exar_set_value; + exar_gpio->gpio_chip.set = exar_set_value; exar_gpio->gpio_chip.base = -1; exar_gpio->gpio_chip.ngpio = ngpios; exar_gpio->index = index; diff --git a/drivers/gpio/gpio-f7188x.c b/drivers/gpio/gpio-f7188x.c index dfcd3634f279..4d5b927ad70f 100644 --- a/drivers/gpio/gpio-f7188x.c +++ b/drivers/gpio/gpio-f7188x.c @@ -173,7 +173,7 @@ static int f7188x_gpio_set_config(struct gpio_chip *chip, unsigned offset, .direction_input = f7188x_gpio_direction_in, \ .get = f7188x_gpio_get, \ .direction_output = f7188x_gpio_direction_out, \ - .set_rv = f7188x_gpio_set, \ + .set = f7188x_gpio_set, \ .set_config = f7188x_gpio_set_config, \ .base = -1, \ .ngpio = _ngpio, \ diff --git a/drivers/gpio/gpio-graniterapids.c b/drivers/gpio/gpio-graniterapids.c index f25283e5239d..121bf29a27f5 100644 --- a/drivers/gpio/gpio-graniterapids.c +++ b/drivers/gpio/gpio-graniterapids.c @@ -159,7 +159,7 @@ static const struct gpio_chip gnr_gpio_chip = { .owner = THIS_MODULE, .request = gnr_gpio_request, .get = gnr_gpio_get, - .set_rv = gnr_gpio_set, + .set = gnr_gpio_set, .get_direction = gnr_gpio_get_direction, .direction_input = gnr_gpio_direction_input, .direction_output = gnr_gpio_direction_output, diff --git a/drivers/gpio/gpio-gw-pld.c b/drivers/gpio/gpio-gw-pld.c index a40ba99a3aea..2e5d97b7363f 100644 --- a/drivers/gpio/gpio-gw-pld.c +++ b/drivers/gpio/gpio-gw-pld.c @@ -86,7 +86,7 @@ static int gw_pld_probe(struct i2c_client *client) gw->chip.direction_input = gw_pld_input8; gw->chip.get = gw_pld_get8; gw->chip.direction_output = gw_pld_output8; - gw->chip.set_rv = gw_pld_set8; + gw->chip.set = gw_pld_set8; gw->client = client; /* diff --git a/drivers/gpio/gpio-htc-egpio.c b/drivers/gpio/gpio-htc-egpio.c index b1844a676c7c..2eaed83214d8 100644 --- a/drivers/gpio/gpio-htc-egpio.c +++ b/drivers/gpio/gpio-htc-egpio.c @@ -324,7 +324,7 @@ static int __init egpio_probe(struct platform_device *pdev) chip->parent = &pdev->dev; chip->owner = THIS_MODULE; chip->get = egpio_get; - chip->set_rv = egpio_set; + chip->set = egpio_set; chip->direction_input = egpio_direction_input; chip->direction_output = egpio_direction_output; chip->get_direction = egpio_get_direction; diff --git a/drivers/gpio/gpio-ich.c b/drivers/gpio/gpio-ich.c index 67089b2423d8..1802c9116ffe 100644 --- a/drivers/gpio/gpio-ich.c +++ b/drivers/gpio/gpio-ich.c @@ -273,7 +273,7 @@ static void ichx_gpiolib_setup(struct gpio_chip *chip) chip->get = ichx_priv.desc->get ? ichx_priv.desc->get : ichx_gpio_get; - chip->set_rv = ichx_gpio_set; + chip->set = ichx_gpio_set; chip->get_direction = ichx_gpio_get_direction; chip->direction_input = ichx_gpio_direction_input; chip->direction_output = ichx_gpio_direction_output; diff --git a/drivers/gpio/gpio-imx-scu.c b/drivers/gpio/gpio-imx-scu.c index 1693dbf1b777..0a75afecf9f8 100644 --- a/drivers/gpio/gpio-imx-scu.c +++ b/drivers/gpio/gpio-imx-scu.c @@ -102,7 +102,7 @@ static int imx_scu_gpio_probe(struct platform_device *pdev) gc->ngpio = ARRAY_SIZE(scu_rsrc_arr); gc->label = dev_name(dev); gc->get = imx_scu_gpio_get; - gc->set_rv = imx_scu_gpio_set; + gc->set = imx_scu_gpio_set; gc->get_direction = imx_scu_gpio_get_direction; platform_set_drvdata(pdev, priv); diff --git a/drivers/gpio/gpio-it87.c b/drivers/gpio/gpio-it87.c index d8184b527bac..5d677bcfccf2 100644 --- a/drivers/gpio/gpio-it87.c +++ b/drivers/gpio/gpio-it87.c @@ -267,7 +267,7 @@ static const struct gpio_chip it87_template_chip = { .request = it87_gpio_request, .get = it87_gpio_get, .direction_input = it87_gpio_direction_in, - .set_rv = it87_gpio_set, + .set = it87_gpio_set, .direction_output = it87_gpio_direction_out, .base = -1 }; diff --git a/drivers/gpio/gpio-janz-ttl.c b/drivers/gpio/gpio-janz-ttl.c index 9f548eda3888..b0c4a3346e7d 100644 --- a/drivers/gpio/gpio-janz-ttl.c +++ b/drivers/gpio/gpio-janz-ttl.c @@ -171,7 +171,7 @@ static int ttl_probe(struct platform_device *pdev) gpio->parent = &pdev->dev; gpio->label = pdev->name; gpio->get = ttl_get_value; - gpio->set_rv = ttl_set_value; + gpio->set = ttl_set_value; gpio->owner = THIS_MODULE; /* request dynamic allocation */ diff --git a/drivers/gpio/gpio-kempld.c b/drivers/gpio/gpio-kempld.c index e38e604baa22..923aad3ab4d4 100644 --- a/drivers/gpio/gpio-kempld.c +++ b/drivers/gpio/gpio-kempld.c @@ -169,7 +169,7 @@ static int kempld_gpio_probe(struct platform_device *pdev) chip->direction_output = kempld_gpio_direction_output; chip->get_direction = kempld_gpio_get_direction; chip->get = kempld_gpio_get; - chip->set_rv = kempld_gpio_set; + chip->set = kempld_gpio_set; chip->ngpio = kempld_gpio_pincount(pld); if (chip->ngpio == 0) { dev_err(dev, "No GPIO pins detected\n"); diff --git a/drivers/gpio/gpio-latch.c b/drivers/gpio/gpio-latch.c index 3d0ff09284fb..c64aaa896766 100644 --- a/drivers/gpio/gpio-latch.c +++ b/drivers/gpio/gpio-latch.c @@ -166,11 +166,11 @@ static int gpio_latch_probe(struct platform_device *pdev) if (gpio_latch_can_sleep(priv, n_latches)) { priv->gc.can_sleep = true; - priv->gc.set_rv = gpio_latch_set_can_sleep; + priv->gc.set = gpio_latch_set_can_sleep; mutex_init(&priv->mutex); } else { priv->gc.can_sleep = false; - priv->gc.set_rv = gpio_latch_set; + priv->gc.set = gpio_latch_set; spin_lock_init(&priv->spinlock); } diff --git a/drivers/gpio/gpio-ljca.c b/drivers/gpio/gpio-ljca.c index 61524a9ba765..3b4f8830c741 100644 --- a/drivers/gpio/gpio-ljca.c +++ b/drivers/gpio/gpio-ljca.c @@ -437,7 +437,7 @@ static int ljca_gpio_probe(struct auxiliary_device *auxdev, ljca_gpio->gc.direction_output = ljca_gpio_direction_output; ljca_gpio->gc.get_direction = ljca_gpio_get_direction; ljca_gpio->gc.get = ljca_gpio_get_value; - ljca_gpio->gc.set_rv = ljca_gpio_set_value; + ljca_gpio->gc.set = ljca_gpio_set_value; ljca_gpio->gc.set_config = ljca_gpio_set_config; ljca_gpio->gc.init_valid_mask = ljca_gpio_init_valid_mask; ljca_gpio->gc.can_sleep = true; diff --git a/drivers/gpio/gpio-logicvc.c b/drivers/gpio/gpio-logicvc.c index 19cd2847467c..cb9dbcc290ad 100644 --- a/drivers/gpio/gpio-logicvc.c +++ b/drivers/gpio/gpio-logicvc.c @@ -134,7 +134,7 @@ static int logicvc_gpio_probe(struct platform_device *pdev) logicvc->chip.ngpio = LOGICVC_CTRL_GPIO_BITS + LOGICVC_POWER_CTRL_GPIO_BITS; logicvc->chip.get = logicvc_gpio_get; - logicvc->chip.set_rv = logicvc_gpio_set; + logicvc->chip.set = logicvc_gpio_set; logicvc->chip.direction_output = logicvc_gpio_direction_output; return devm_gpiochip_add_data(dev, &logicvc->chip, logicvc); diff --git a/drivers/gpio/gpio-loongson-64bit.c b/drivers/gpio/gpio-loongson-64bit.c index add09971d26a..818c606fbc51 100644 --- a/drivers/gpio/gpio-loongson-64bit.c +++ b/drivers/gpio/gpio-loongson-64bit.c @@ -157,7 +157,7 @@ static int loongson_gpio_init(struct device *dev, struct loongson_gpio_chip *lgp lgpio->chip.get = loongson_gpio_get; lgpio->chip.get_direction = loongson_gpio_get_direction; lgpio->chip.direction_output = loongson_gpio_direction_output; - lgpio->chip.set_rv = loongson_gpio_set; + lgpio->chip.set = loongson_gpio_set; lgpio->chip.parent = dev; spin_lock_init(&lgpio->lock); } diff --git a/drivers/gpio/gpio-loongson.c b/drivers/gpio/gpio-loongson.c index 8f3668169ebf..f3e0559f969d 100644 --- a/drivers/gpio/gpio-loongson.c +++ b/drivers/gpio/gpio-loongson.c @@ -106,7 +106,7 @@ static int loongson_gpio_probe(struct platform_device *pdev) gc->base = 0; gc->ngpio = LOONGSON_N_GPIO; gc->get = loongson_gpio_get_value; - gc->set_rv = loongson_gpio_set_value; + gc->set = loongson_gpio_set_value; gc->direction_input = loongson_gpio_direction_input; gc->direction_output = loongson_gpio_direction_output; diff --git a/drivers/gpio/gpio-lp3943.c b/drivers/gpio/gpio-lp3943.c index 52ab3ac4844c..e8e00daff7df 100644 --- a/drivers/gpio/gpio-lp3943.c +++ b/drivers/gpio/gpio-lp3943.c @@ -184,7 +184,7 @@ static const struct gpio_chip lp3943_gpio_chip = { .direction_input = lp3943_gpio_direction_input, .get = lp3943_gpio_get, .direction_output = lp3943_gpio_direction_output, - .set_rv = lp3943_gpio_set, + .set = lp3943_gpio_set, .base = -1, .ngpio = LP3943_MAX_GPIO, .can_sleep = 1, diff --git a/drivers/gpio/gpio-lp873x.c b/drivers/gpio/gpio-lp873x.c index 1908ed302e92..5376708a81bf 100644 --- a/drivers/gpio/gpio-lp873x.c +++ b/drivers/gpio/gpio-lp873x.c @@ -124,7 +124,7 @@ static const struct gpio_chip template_chip = { .direction_input = lp873x_gpio_direction_input, .direction_output = lp873x_gpio_direction_output, .get = lp873x_gpio_get, - .set_rv = lp873x_gpio_set, + .set = lp873x_gpio_set, .set_config = lp873x_gpio_set_config, .base = -1, .ngpio = 2, diff --git a/drivers/gpio/gpio-lp87565.c b/drivers/gpio/gpio-lp87565.c index 8ea687d5d028..0f337c1283b2 100644 --- a/drivers/gpio/gpio-lp87565.c +++ b/drivers/gpio/gpio-lp87565.c @@ -139,7 +139,7 @@ static const struct gpio_chip template_chip = { .direction_input = lp87565_gpio_direction_input, .direction_output = lp87565_gpio_direction_output, .get = lp87565_gpio_get, - .set_rv = lp87565_gpio_set, + .set = lp87565_gpio_set, .set_config = lp87565_gpio_set_config, .base = -1, .ngpio = 3, diff --git a/drivers/gpio/gpio-lpc18xx.c b/drivers/gpio/gpio-lpc18xx.c index 2dbfbf90176c..37a2342eb2e6 100644 --- a/drivers/gpio/gpio-lpc18xx.c +++ b/drivers/gpio/gpio-lpc18xx.c @@ -327,7 +327,7 @@ static const struct gpio_chip lpc18xx_chip = { .free = gpiochip_generic_free, .direction_input = lpc18xx_gpio_direction_input, .direction_output = lpc18xx_gpio_direction_output, - .set_rv = lpc18xx_gpio_set, + .set = lpc18xx_gpio_set, .get = lpc18xx_gpio_get, .ngpio = LPC18XX_MAX_PORTS * LPC18XX_PINS_PER_PORT, .owner = THIS_MODULE, diff --git a/drivers/gpio/gpio-lpc32xx.c b/drivers/gpio/gpio-lpc32xx.c index 6668b8bd9f1e..37fc54fc7385 100644 --- a/drivers/gpio/gpio-lpc32xx.c +++ b/drivers/gpio/gpio-lpc32xx.c @@ -407,7 +407,7 @@ static struct lpc32xx_gpio_chip lpc32xx_gpiochip[] = { .direction_input = lpc32xx_gpio_dir_input_p012, .get = lpc32xx_gpio_get_value_p012, .direction_output = lpc32xx_gpio_dir_output_p012, - .set_rv = lpc32xx_gpio_set_value_p012, + .set = lpc32xx_gpio_set_value_p012, .request = lpc32xx_gpio_request, .to_irq = lpc32xx_gpio_to_irq_p01, .base = LPC32XX_GPIO_P0_GRP, @@ -423,7 +423,7 @@ static struct lpc32xx_gpio_chip lpc32xx_gpiochip[] = { .direction_input = lpc32xx_gpio_dir_input_p012, .get = lpc32xx_gpio_get_value_p012, .direction_output = lpc32xx_gpio_dir_output_p012, - .set_rv = lpc32xx_gpio_set_value_p012, + .set = lpc32xx_gpio_set_value_p012, .request = lpc32xx_gpio_request, .to_irq = lpc32xx_gpio_to_irq_p01, .base = LPC32XX_GPIO_P1_GRP, @@ -439,7 +439,7 @@ static struct lpc32xx_gpio_chip lpc32xx_gpiochip[] = { .direction_input = lpc32xx_gpio_dir_input_p012, .get = lpc32xx_gpio_get_value_p012, .direction_output = lpc32xx_gpio_dir_output_p012, - .set_rv = lpc32xx_gpio_set_value_p012, + .set = lpc32xx_gpio_set_value_p012, .request = lpc32xx_gpio_request, .base = LPC32XX_GPIO_P2_GRP, .ngpio = LPC32XX_GPIO_P2_MAX, @@ -454,7 +454,7 @@ static struct lpc32xx_gpio_chip lpc32xx_gpiochip[] = { .direction_input = lpc32xx_gpio_dir_input_p3, .get = lpc32xx_gpio_get_value_p3, .direction_output = lpc32xx_gpio_dir_output_p3, - .set_rv = lpc32xx_gpio_set_value_p3, + .set = lpc32xx_gpio_set_value_p3, .request = lpc32xx_gpio_request, .to_irq = lpc32xx_gpio_to_irq_gpio_p3, .base = LPC32XX_GPIO_P3_GRP, @@ -482,7 +482,7 @@ static struct lpc32xx_gpio_chip lpc32xx_gpiochip[] = { .chip = { .label = "gpo_p3", .direction_output = lpc32xx_gpio_dir_out_always, - .set_rv = lpc32xx_gpo_set_value, + .set = lpc32xx_gpo_set_value, .get = lpc32xx_gpo_get_value, .request = lpc32xx_gpio_request, .base = LPC32XX_GPO_P3_GRP, diff --git a/drivers/gpio/gpio-macsmc.c b/drivers/gpio/gpio-macsmc.c index 7570d9e89adf..30ef258e7655 100644 --- a/drivers/gpio/gpio-macsmc.c +++ b/drivers/gpio/gpio-macsmc.c @@ -261,7 +261,7 @@ static int macsmc_gpio_probe(struct platform_device *pdev) smcgp->gc.label = "macsmc-pmu-gpio"; smcgp->gc.owner = THIS_MODULE; smcgp->gc.get = macsmc_gpio_get; - smcgp->gc.set_rv = macsmc_gpio_set; + smcgp->gc.set = macsmc_gpio_set; smcgp->gc.get_direction = macsmc_gpio_get_direction; smcgp->gc.init_valid_mask = macsmc_gpio_init_valid_mask; smcgp->gc.can_sleep = true; diff --git a/drivers/gpio/gpio-madera.c b/drivers/gpio/gpio-madera.c index e73e72d62bc8..551faf9655b2 100644 --- a/drivers/gpio/gpio-madera.c +++ b/drivers/gpio/gpio-madera.c @@ -109,7 +109,7 @@ static const struct gpio_chip madera_gpio_chip = { .direction_input = madera_gpio_direction_in, .get = madera_gpio_get, .direction_output = madera_gpio_direction_out, - .set_rv = madera_gpio_set, + .set = madera_gpio_set, .set_config = gpiochip_generic_config, .can_sleep = true, }; diff --git a/drivers/gpio/gpio-max730x.c b/drivers/gpio/gpio-max730x.c index 75d414d8c992..84c7c2dca822 100644 --- a/drivers/gpio/gpio-max730x.c +++ b/drivers/gpio/gpio-max730x.c @@ -188,7 +188,7 @@ int __max730x_probe(struct max7301 *ts) ts->chip.direction_input = max7301_direction_input; ts->chip.get = max7301_get; ts->chip.direction_output = max7301_direction_output; - ts->chip.set_rv = max7301_set; + ts->chip.set = max7301_set; ts->chip.ngpio = PIN_NUMBER; ts->chip.can_sleep = true; diff --git a/drivers/gpio/gpio-max732x.c b/drivers/gpio/gpio-max732x.c index d5ffedb086af..a61d670ceeda 100644 --- a/drivers/gpio/gpio-max732x.c +++ b/drivers/gpio/gpio-max732x.c @@ -585,8 +585,8 @@ static int max732x_setup_gpio(struct max732x_chip *chip, gc->direction_input = max732x_gpio_direction_input; if (chip->dir_output) { gc->direction_output = max732x_gpio_direction_output; - gc->set_rv = max732x_gpio_set_value; - gc->set_multiple_rv = max732x_gpio_set_multiple; + gc->set = max732x_gpio_set_value; + gc->set_multiple = max732x_gpio_set_multiple; } gc->get = max732x_gpio_get_value; gc->can_sleep = true; diff --git a/drivers/gpio/gpio-max77620.c b/drivers/gpio/gpio-max77620.c index af7af8e40afe..02eca400b307 100644 --- a/drivers/gpio/gpio-max77620.c +++ b/drivers/gpio/gpio-max77620.c @@ -311,7 +311,7 @@ static int max77620_gpio_probe(struct platform_device *pdev) mgpio->gpio_chip.direction_input = max77620_gpio_dir_input; mgpio->gpio_chip.get = max77620_gpio_get; mgpio->gpio_chip.direction_output = max77620_gpio_dir_output; - mgpio->gpio_chip.set_rv = max77620_gpio_set; + mgpio->gpio_chip.set = max77620_gpio_set; mgpio->gpio_chip.set_config = max77620_gpio_set_config; mgpio->gpio_chip.ngpio = MAX77620_GPIO_NR; mgpio->gpio_chip.can_sleep = 1; diff --git a/drivers/gpio/gpio-max77650.c b/drivers/gpio/gpio-max77650.c index a553e141059f..4540da4c1418 100644 --- a/drivers/gpio/gpio-max77650.c +++ b/drivers/gpio/gpio-max77650.c @@ -166,7 +166,7 @@ static int max77650_gpio_probe(struct platform_device *pdev) chip->gc.direction_input = max77650_gpio_direction_input; chip->gc.direction_output = max77650_gpio_direction_output; - chip->gc.set_rv = max77650_gpio_set_value; + chip->gc.set = max77650_gpio_set_value; chip->gc.get = max77650_gpio_get_value; chip->gc.get_direction = max77650_gpio_get_direction; chip->gc.set_config = max77650_gpio_set_config; diff --git a/drivers/gpio/gpio-max77759.c b/drivers/gpio/gpio-max77759.c index 7fe8e6f697d0..5e48eb03e7b3 100644 --- a/drivers/gpio/gpio-max77759.c +++ b/drivers/gpio/gpio-max77759.c @@ -469,7 +469,7 @@ static int max77759_gpio_probe(struct platform_device *pdev) chip->gc.direction_input = max77759_gpio_direction_input; chip->gc.direction_output = max77759_gpio_direction_output; chip->gc.get = max77759_gpio_get_value; - chip->gc.set_rv = max77759_gpio_set_value; + chip->gc.set = max77759_gpio_set_value; girq = &chip->gc.irq; gpio_irq_chip_set_chip(girq, &max77759_gpio_irq_chip); diff --git a/drivers/gpio/gpio-mb86s7x.c b/drivers/gpio/gpio-mb86s7x.c index 5ee2991ecdfd..581a71872eab 100644 --- a/drivers/gpio/gpio-mb86s7x.c +++ b/drivers/gpio/gpio-mb86s7x.c @@ -180,7 +180,7 @@ static int mb86s70_gpio_probe(struct platform_device *pdev) gchip->gc.request = mb86s70_gpio_request; gchip->gc.free = mb86s70_gpio_free; gchip->gc.get = mb86s70_gpio_get; - gchip->gc.set_rv = mb86s70_gpio_set; + gchip->gc.set = mb86s70_gpio_set; gchip->gc.to_irq = mb86s70_gpio_to_irq; gchip->gc.label = dev_name(&pdev->dev); gchip->gc.ngpio = 32; diff --git a/drivers/gpio/gpio-mc33880.c b/drivers/gpio/gpio-mc33880.c index e68956104161..9a40e9579e95 100644 --- a/drivers/gpio/gpio-mc33880.c +++ b/drivers/gpio/gpio-mc33880.c @@ -103,7 +103,7 @@ static int mc33880_probe(struct spi_device *spi) mc->spi = spi; mc->chip.label = DRIVER_NAME; - mc->chip.set_rv = mc33880_set; + mc->chip.set = mc33880_set; mc->chip.base = pdata->base; mc->chip.ngpio = PIN_NUMBER; mc->chip.can_sleep = true; diff --git a/drivers/gpio/gpio-ml-ioh.c b/drivers/gpio/gpio-ml-ioh.c index 12cf36f9ca63..f6af81bf2b13 100644 --- a/drivers/gpio/gpio-ml-ioh.c +++ b/drivers/gpio/gpio-ml-ioh.c @@ -224,7 +224,7 @@ static void ioh_gpio_setup(struct ioh_gpio *chip, int num_port) gpio->direction_input = ioh_gpio_direction_input; gpio->get = ioh_gpio_get; gpio->direction_output = ioh_gpio_direction_output; - gpio->set_rv = ioh_gpio_set; + gpio->set = ioh_gpio_set; gpio->dbg_show = NULL; gpio->base = -1; gpio->ngpio = num_port; diff --git a/drivers/gpio/gpio-mm-lantiq.c b/drivers/gpio/gpio-mm-lantiq.c index 897a1e004681..8f1405733d98 100644 --- a/drivers/gpio/gpio-mm-lantiq.c +++ b/drivers/gpio/gpio-mm-lantiq.c @@ -111,7 +111,7 @@ static int ltq_mm_probe(struct platform_device *pdev) chip->mmchip.gc.ngpio = 16; chip->mmchip.gc.direction_output = ltq_mm_dir_out; - chip->mmchip.gc.set_rv = ltq_mm_set; + chip->mmchip.gc.set = ltq_mm_set; chip->mmchip.save_regs = ltq_mm_save_regs; /* store the shadow value if one was passed by the devicetree */ diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c index cf878c2ea6bf..021ad62778c2 100644 --- a/drivers/gpio/gpio-mmio.c +++ b/drivers/gpio/gpio-mmio.c @@ -367,7 +367,7 @@ static int bgpio_dir_out_err(struct gpio_chip *gc, unsigned int gpio, static int bgpio_simple_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) { - gc->set_rv(gc, gpio, val); + gc->set(gc, gpio, val); return bgpio_dir_return(gc, gpio, true); } @@ -432,14 +432,14 @@ static int bgpio_dir_out_dir_first(struct gpio_chip *gc, unsigned int gpio, int val) { bgpio_dir_out(gc, gpio, val); - gc->set_rv(gc, gpio, val); + gc->set(gc, gpio, val); return bgpio_dir_return(gc, gpio, true); } static int bgpio_dir_out_val_first(struct gpio_chip *gc, unsigned int gpio, int val) { - gc->set_rv(gc, gpio, val); + gc->set(gc, gpio, val); bgpio_dir_out(gc, gpio, val); return bgpio_dir_return(gc, gpio, true); } @@ -528,18 +528,18 @@ static int bgpio_setup_io(struct gpio_chip *gc, if (set && clr) { gc->reg_set = set; gc->reg_clr = clr; - gc->set_rv = bgpio_set_with_clear; - gc->set_multiple_rv = bgpio_set_multiple_with_clear; + gc->set = bgpio_set_with_clear; + gc->set_multiple = bgpio_set_multiple_with_clear; } else if (set && !clr) { gc->reg_set = set; - gc->set_rv = bgpio_set_set; - gc->set_multiple_rv = bgpio_set_multiple_set; + gc->set = bgpio_set_set; + gc->set_multiple = bgpio_set_multiple_set; } else if (flags & BGPIOF_NO_OUTPUT) { - gc->set_rv = bgpio_set_none; - gc->set_multiple_rv = NULL; + gc->set = bgpio_set_none; + gc->set_multiple = NULL; } else { - gc->set_rv = bgpio_set; - gc->set_multiple_rv = bgpio_set_multiple; + gc->set = bgpio_set; + gc->set_multiple = bgpio_set_multiple; } if (!(flags & BGPIOF_UNREADABLE_REG_SET) && @@ -676,7 +676,7 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev, } gc->bgpio_data = gc->read_reg(gc->reg_dat); - if (gc->set_rv == bgpio_set_set && + if (gc->set == bgpio_set_set && !(flags & BGPIOF_UNREADABLE_REG_SET)) gc->bgpio_data = gc->read_reg(gc->reg_set); diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c index 266c0953d914..a7d69f3835c1 100644 --- a/drivers/gpio/gpio-mockup.c +++ b/drivers/gpio/gpio-mockup.c @@ -449,9 +449,9 @@ static int gpio_mockup_probe(struct platform_device *pdev) gc->owner = THIS_MODULE; gc->parent = dev; gc->get = gpio_mockup_get; - gc->set_rv = gpio_mockup_set; + gc->set = gpio_mockup_set; gc->get_multiple = gpio_mockup_get_multiple; - gc->set_multiple_rv = gpio_mockup_set_multiple; + gc->set_multiple = gpio_mockup_set_multiple; gc->direction_output = gpio_mockup_dirout; gc->direction_input = gpio_mockup_dirin; gc->get_direction = gpio_mockup_get_direction; diff --git a/drivers/gpio/gpio-moxtet.c b/drivers/gpio/gpio-moxtet.c index 27dd9c3e7b77..4eb9f1a2779b 100644 --- a/drivers/gpio/gpio-moxtet.c +++ b/drivers/gpio/gpio-moxtet.c @@ -140,7 +140,7 @@ static int moxtet_gpio_probe(struct device *dev) chip->gpio_chip.direction_input = moxtet_gpio_direction_input; chip->gpio_chip.direction_output = moxtet_gpio_direction_output; chip->gpio_chip.get = moxtet_gpio_get_value; - chip->gpio_chip.set_rv = moxtet_gpio_set_value; + chip->gpio_chip.set = moxtet_gpio_set_value; chip->gpio_chip.base = -1; chip->gpio_chip.ngpio = MOXTET_GPIO_NGPIOS; diff --git a/drivers/gpio/gpio-mpc5200.c b/drivers/gpio/gpio-mpc5200.c index 40d587176a75..dad0eca1ca2e 100644 --- a/drivers/gpio/gpio-mpc5200.c +++ b/drivers/gpio/gpio-mpc5200.c @@ -153,7 +153,7 @@ static int mpc52xx_wkup_gpiochip_probe(struct platform_device *ofdev) gc->direction_input = mpc52xx_wkup_gpio_dir_in; gc->direction_output = mpc52xx_wkup_gpio_dir_out; gc->get = mpc52xx_wkup_gpio_get; - gc->set_rv = mpc52xx_wkup_gpio_set; + gc->set = mpc52xx_wkup_gpio_set; ret = of_mm_gpiochip_add_data(ofdev->dev.of_node, &chip->mmchip, chip); if (ret) @@ -315,7 +315,7 @@ static int mpc52xx_simple_gpiochip_probe(struct platform_device *ofdev) gc->direction_input = mpc52xx_simple_gpio_dir_in; gc->direction_output = mpc52xx_simple_gpio_dir_out; gc->get = mpc52xx_simple_gpio_get; - gc->set_rv = mpc52xx_simple_gpio_set; + gc->set = mpc52xx_simple_gpio_set; ret = of_mm_gpiochip_add_data(ofdev->dev.of_node, &chip->mmchip, chip); if (ret) diff --git a/drivers/gpio/gpio-mpfs.c b/drivers/gpio/gpio-mpfs.c index 3415cb7ebb0f..82d557a7e5d8 100644 --- a/drivers/gpio/gpio-mpfs.c +++ b/drivers/gpio/gpio-mpfs.c @@ -150,7 +150,7 @@ static int mpfs_gpio_probe(struct platform_device *pdev) mpfs_gpio->gc.direction_output = mpfs_gpio_direction_output; mpfs_gpio->gc.get_direction = mpfs_gpio_get_direction; mpfs_gpio->gc.get = mpfs_gpio_get; - mpfs_gpio->gc.set_rv = mpfs_gpio_set; + mpfs_gpio->gc.set = mpfs_gpio_set; mpfs_gpio->gc.base = -1; mpfs_gpio->gc.ngpio = ngpios; mpfs_gpio->gc.label = dev_name(dev); diff --git a/drivers/gpio/gpio-mpsse.c b/drivers/gpio/gpio-mpsse.c index b17de08e9e03..9f42bb30b4ec 100644 --- a/drivers/gpio/gpio-mpsse.c +++ b/drivers/gpio/gpio-mpsse.c @@ -448,9 +448,9 @@ static int gpio_mpsse_probe(struct usb_interface *interface, priv->gpio.direction_input = gpio_mpsse_direction_input; priv->gpio.direction_output = gpio_mpsse_direction_output; priv->gpio.get = gpio_mpsse_gpio_get; - priv->gpio.set_rv = gpio_mpsse_gpio_set; + priv->gpio.set = gpio_mpsse_gpio_set; priv->gpio.get_multiple = gpio_mpsse_get_multiple; - priv->gpio.set_multiple_rv = gpio_mpsse_set_multiple; + priv->gpio.set_multiple = gpio_mpsse_set_multiple; priv->gpio.base = -1; priv->gpio.ngpio = 16; priv->gpio.offset = priv->intf_id * priv->gpio.ngpio; diff --git a/drivers/gpio/gpio-msc313.c b/drivers/gpio/gpio-msc313.c index 992339a89d19..b0cccd856840 100644 --- a/drivers/gpio/gpio-msc313.c +++ b/drivers/gpio/gpio-msc313.c @@ -658,7 +658,7 @@ static int msc313_gpio_probe(struct platform_device *pdev) gpiochip->direction_input = msc313_gpio_direction_input; gpiochip->direction_output = msc313_gpio_direction_output; gpiochip->get = msc313_gpio_get; - gpiochip->set_rv = msc313_gpio_set; + gpiochip->set = msc313_gpio_set; gpiochip->base = -1; gpiochip->ngpio = gpio->gpio_data->num; gpiochip->names = gpio->gpio_data->names; diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 24792b8eb083..5e3f54cb8bc4 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -1168,7 +1168,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev) mvchip->chip.direction_input = mvebu_gpio_direction_input; mvchip->chip.get = mvebu_gpio_get; mvchip->chip.direction_output = mvebu_gpio_direction_output; - mvchip->chip.set_rv = mvebu_gpio_set; + mvchip->chip.set = mvebu_gpio_set; if (have_irqs) mvchip->chip.to_irq = mvebu_gpio_to_irq; mvchip->chip.base = id * MVEBU_MAX_GPIO_PER_BANK; diff --git a/drivers/gpio/gpio-nomadik.c b/drivers/gpio/gpio-nomadik.c index 296d13845b30..bcf4b07dd458 100644 --- a/drivers/gpio/gpio-nomadik.c +++ b/drivers/gpio/gpio-nomadik.c @@ -674,7 +674,7 @@ static int nmk_gpio_probe(struct platform_device *pdev) chip->direction_input = nmk_gpio_make_input; chip->get = nmk_gpio_get_input; chip->direction_output = nmk_gpio_make_output; - chip->set_rv = nmk_gpio_set_output; + chip->set = nmk_gpio_set_output; chip->dbg_show = nmk_gpio_dbg_show; chip->can_sleep = false; chip->owner = THIS_MODULE; diff --git a/drivers/gpio/gpio-npcm-sgpio.c b/drivers/gpio/gpio-npcm-sgpio.c index 25b203a89e38..83c77a2c0623 100644 --- a/drivers/gpio/gpio-npcm-sgpio.c +++ b/drivers/gpio/gpio-npcm-sgpio.c @@ -211,7 +211,7 @@ static int npcm_sgpio_dir_in(struct gpio_chip *gc, unsigned int offset) static int npcm_sgpio_dir_out(struct gpio_chip *gc, unsigned int offset, int val) { - return gc->set_rv(gc, offset, val); + return gc->set(gc, offset, val); } static int npcm_sgpio_get_direction(struct gpio_chip *gc, unsigned int offset) @@ -546,7 +546,7 @@ static int npcm_sgpio_probe(struct platform_device *pdev) gpio->chip.direction_output = npcm_sgpio_dir_out; gpio->chip.get_direction = npcm_sgpio_get_direction; gpio->chip.get = npcm_sgpio_get; - gpio->chip.set_rv = npcm_sgpio_set; + gpio->chip.set = npcm_sgpio_set; gpio->chip.label = dev_name(&pdev->dev); gpio->chip.base = -1; diff --git a/drivers/gpio/gpio-octeon.c b/drivers/gpio/gpio-octeon.c index 24966161742a..777e20c608dc 100644 --- a/drivers/gpio/gpio-octeon.c +++ b/drivers/gpio/gpio-octeon.c @@ -108,7 +108,7 @@ static int octeon_gpio_probe(struct platform_device *pdev) chip->direction_input = octeon_gpio_dir_in; chip->get = octeon_gpio_get; chip->direction_output = octeon_gpio_dir_out; - chip->set_rv = octeon_gpio_set; + chip->set = octeon_gpio_set; err = devm_gpiochip_add_data(&pdev->dev, chip, gpio); if (err) return err; diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index ed5c88a5c520..a268c76bdca6 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1046,8 +1046,8 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct device *pm_dev) bank->chip.get_multiple = omap_gpio_get_multiple; bank->chip.direction_output = omap_gpio_output; bank->chip.set_config = omap_gpio_set_config; - bank->chip.set_rv = omap_gpio_set; - bank->chip.set_multiple_rv = omap_gpio_set_multiple; + bank->chip.set = omap_gpio_set; + bank->chip.set_multiple = omap_gpio_set_multiple; if (bank->is_mpuio) { bank->chip.label = "mpuio"; if (bank->regs->wkup_en) diff --git a/drivers/gpio/gpio-palmas.c b/drivers/gpio/gpio-palmas.c index 9329d8ce8f59..e377f6dd4ccf 100644 --- a/drivers/gpio/gpio-palmas.c +++ b/drivers/gpio/gpio-palmas.c @@ -166,7 +166,7 @@ static int palmas_gpio_probe(struct platform_device *pdev) palmas_gpio->gpio_chip.direction_input = palmas_gpio_input; palmas_gpio->gpio_chip.direction_output = palmas_gpio_output; palmas_gpio->gpio_chip.to_irq = palmas_gpio_to_irq; - palmas_gpio->gpio_chip.set_rv = palmas_gpio_set; + palmas_gpio->gpio_chip.set = palmas_gpio_set; palmas_gpio->gpio_chip.get = palmas_gpio_get; palmas_gpio->gpio_chip.parent = &pdev->dev; diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 69906a9af7e6..b46927f55038 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -789,10 +789,10 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios) gc->direction_input = pca953x_gpio_direction_input; gc->direction_output = pca953x_gpio_direction_output; gc->get = pca953x_gpio_get_value; - gc->set_rv = pca953x_gpio_set_value; + gc->set = pca953x_gpio_set_value; gc->get_direction = pca953x_gpio_get_direction; gc->get_multiple = pca953x_gpio_get_multiple; - gc->set_multiple_rv = pca953x_gpio_set_multiple; + gc->set_multiple = pca953x_gpio_set_multiple; gc->set_config = pca953x_gpio_set_config; gc->can_sleep = true; diff --git a/drivers/gpio/gpio-pca9570.c b/drivers/gpio/gpio-pca9570.c index a33246f20fd8..c5a1287079a0 100644 --- a/drivers/gpio/gpio-pca9570.c +++ b/drivers/gpio/gpio-pca9570.c @@ -126,7 +126,7 @@ static int pca9570_probe(struct i2c_client *client) gpio->chip.owner = THIS_MODULE; gpio->chip.get_direction = pca9570_get_direction; gpio->chip.get = pca9570_get; - gpio->chip.set_rv = pca9570_set; + gpio->chip.set = pca9570_set; gpio->chip.base = -1; gpio->chip_data = device_get_match_data(&client->dev); gpio->chip.ngpio = gpio->chip_data->ngpio; diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c index a04203680333..3b9de8c3d924 100644 --- a/drivers/gpio/gpio-pcf857x.c +++ b/drivers/gpio/gpio-pcf857x.c @@ -295,8 +295,8 @@ static int pcf857x_probe(struct i2c_client *client) gpio->chip.owner = THIS_MODULE; gpio->chip.get = pcf857x_get; gpio->chip.get_multiple = pcf857x_get_multiple; - gpio->chip.set_rv = pcf857x_set; - gpio->chip.set_multiple_rv = pcf857x_set_multiple; + gpio->chip.set = pcf857x_set; + gpio->chip.set_multiple = pcf857x_set_multiple; gpio->chip.direction_input = pcf857x_input; gpio->chip.direction_output = pcf857x_output; gpio->chip.ngpio = (uintptr_t)i2c_get_match_data(client); diff --git a/drivers/gpio/gpio-pch.c b/drivers/gpio/gpio-pch.c index c6f313342ba0..9925687e05fb 100644 --- a/drivers/gpio/gpio-pch.c +++ b/drivers/gpio/gpio-pch.c @@ -219,7 +219,7 @@ static void pch_gpio_setup(struct pch_gpio *chip) gpio->direction_input = pch_gpio_direction_input; gpio->get = pch_gpio_get; gpio->direction_output = pch_gpio_direction_output; - gpio->set_rv = pch_gpio_set; + gpio->set = pch_gpio_set; gpio->base = -1; gpio->ngpio = gpio_pins[chip->ioh]; gpio->can_sleep = false; diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c index 98cfac4eac85..02e4ffcf5a6f 100644 --- a/drivers/gpio/gpio-pl061.c +++ b/drivers/gpio/gpio-pl061.c @@ -330,7 +330,7 @@ static int pl061_probe(struct amba_device *adev, const struct amba_id *id) pl061->gc.direction_input = pl061_direction_input; pl061->gc.direction_output = pl061_direction_output; pl061->gc.get = pl061_get_value; - pl061->gc.set_rv = pl061_set_value; + pl061->gc.set = pl061_set_value; pl061->gc.ngpio = PL061_GPIO_NR; pl061->gc.label = dev_name(dev); pl061->gc.parent = dev; diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index cbcdd416f8b9..fa22f3faa163 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -355,7 +355,7 @@ static int pxa_init_gpio_chip(struct pxa_gpio_chip *pchip, int ngpio, void __iom pchip->chip.direction_input = pxa_gpio_direction_input; pchip->chip.direction_output = pxa_gpio_direction_output; pchip->chip.get = pxa_gpio_get; - pchip->chip.set_rv = pxa_gpio_set; + pchip->chip.set = pxa_gpio_set; pchip->chip.to_irq = pxa_gpio_to_irq; pchip->chip.ngpio = ngpio; pchip->chip.request = gpiochip_generic_request; diff --git a/drivers/gpio/gpio-raspberrypi-exp.c b/drivers/gpio/gpio-raspberrypi-exp.c index b4b607515a04..40413e06b69c 100644 --- a/drivers/gpio/gpio-raspberrypi-exp.c +++ b/drivers/gpio/gpio-raspberrypi-exp.c @@ -232,7 +232,7 @@ static int rpi_exp_gpio_probe(struct platform_device *pdev) rpi_gpio->gc.direction_output = rpi_exp_gpio_dir_out; rpi_gpio->gc.get_direction = rpi_exp_gpio_get_direction; rpi_gpio->gc.get = rpi_exp_gpio_get; - rpi_gpio->gc.set_rv = rpi_exp_gpio_set; + rpi_gpio->gc.set = rpi_exp_gpio_set; rpi_gpio->gc.can_sleep = true; return devm_gpiochip_add_data(dev, &rpi_gpio->gc, rpi_gpio); diff --git a/drivers/gpio/gpio-rc5t583.c b/drivers/gpio/gpio-rc5t583.c index cf3e91d235df..5a69e4534591 100644 --- a/drivers/gpio/gpio-rc5t583.c +++ b/drivers/gpio/gpio-rc5t583.c @@ -118,7 +118,7 @@ static int rc5t583_gpio_probe(struct platform_device *pdev) rc5t583_gpio->gpio_chip.free = rc5t583_gpio_free, rc5t583_gpio->gpio_chip.direction_input = rc5t583_gpio_dir_input, rc5t583_gpio->gpio_chip.direction_output = rc5t583_gpio_dir_output, - rc5t583_gpio->gpio_chip.set_rv = rc5t583_gpio_set, + rc5t583_gpio->gpio_chip.set = rc5t583_gpio_set, rc5t583_gpio->gpio_chip.get = rc5t583_gpio_get, rc5t583_gpio->gpio_chip.to_irq = rc5t583_gpio_to_irq, rc5t583_gpio->gpio_chip.ngpio = RC5T583_MAX_GPIO, diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index cd31580effa9..86777e097fd8 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -535,8 +535,8 @@ static int gpio_rcar_probe(struct platform_device *pdev) gpio_chip->get = gpio_rcar_get; gpio_chip->get_multiple = gpio_rcar_get_multiple; gpio_chip->direction_output = gpio_rcar_direction_output; - gpio_chip->set_rv = gpio_rcar_set; - gpio_chip->set_multiple_rv = gpio_rcar_set_multiple; + gpio_chip->set = gpio_rcar_set; + gpio_chip->set_multiple = gpio_rcar_set_multiple; gpio_chip->label = name; gpio_chip->parent = dev; gpio_chip->owner = THIS_MODULE; diff --git a/drivers/gpio/gpio-rdc321x.c b/drivers/gpio/gpio-rdc321x.c index a75ed8021de5..ba62b81aa8ae 100644 --- a/drivers/gpio/gpio-rdc321x.c +++ b/drivers/gpio/gpio-rdc321x.c @@ -159,7 +159,7 @@ static int rdc321x_gpio_probe(struct platform_device *pdev) rdc321x_gpio_dev->chip.direction_input = rdc_gpio_direction_input; rdc321x_gpio_dev->chip.direction_output = rdc_gpio_config; rdc321x_gpio_dev->chip.get = rdc_gpio_get_value; - rdc321x_gpio_dev->chip.set_rv = rdc_gpio_set_value; + rdc321x_gpio_dev->chip.set = rdc_gpio_set_value; rdc321x_gpio_dev->chip.base = 0; rdc321x_gpio_dev->chip.ngpio = pdata->max_gpios; diff --git a/drivers/gpio/gpio-reg.c b/drivers/gpio/gpio-reg.c index d8da99f97385..f2238196faf1 100644 --- a/drivers/gpio/gpio-reg.c +++ b/drivers/gpio/gpio-reg.c @@ -46,7 +46,7 @@ static int gpio_reg_direction_output(struct gpio_chip *gc, unsigned offset, if (r->direction & BIT(offset)) return -ENOTSUPP; - gc->set_rv(gc, offset, value); + gc->set(gc, offset, value); return 0; } @@ -161,9 +161,9 @@ struct gpio_chip *gpio_reg_init(struct device *dev, void __iomem *reg, r->gc.get_direction = gpio_reg_get_direction; r->gc.direction_input = gpio_reg_direction_input; r->gc.direction_output = gpio_reg_direction_output; - r->gc.set_rv = gpio_reg_set; + r->gc.set = gpio_reg_set; r->gc.get = gpio_reg_get; - r->gc.set_multiple_rv = gpio_reg_set_multiple; + r->gc.set_multiple = gpio_reg_set_multiple; if (irqs) r->gc.to_irq = gpio_reg_to_irq; r->gc.base = base; diff --git a/drivers/gpio/gpio-regmap.c b/drivers/gpio/gpio-regmap.c index 87c4225784cf..e8a32dfebdcb 100644 --- a/drivers/gpio/gpio-regmap.c +++ b/drivers/gpio/gpio-regmap.c @@ -260,9 +260,9 @@ struct gpio_regmap *gpio_regmap_register(const struct gpio_regmap_config *config chip->free = gpiochip_generic_free; chip->get = gpio_regmap_get; if (gpio->reg_set_base && gpio->reg_clr_base) - chip->set_rv = gpio_regmap_set_with_clear; + chip->set = gpio_regmap_set_with_clear; else if (gpio->reg_set_base) - chip->set_rv = gpio_regmap_set; + chip->set = gpio_regmap_set; chip->get_direction = gpio_regmap_get_direction; if (gpio->reg_dir_in_base || gpio->reg_dir_out_base) { diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c index ecd60ff9e1dd..bcfc323a8315 100644 --- a/drivers/gpio/gpio-rockchip.c +++ b/drivers/gpio/gpio-rockchip.c @@ -327,7 +327,7 @@ static int rockchip_gpio_to_irq(struct gpio_chip *gc, unsigned int offset) static const struct gpio_chip rockchip_gpiolib_chip = { .request = gpiochip_generic_request, .free = gpiochip_generic_free, - .set_rv = rockchip_gpio_set, + .set = rockchip_gpio_set, .get = rockchip_gpio_get, .get_direction = rockchip_gpio_get_direction, .direction_input = rockchip_gpio_direction_input, diff --git a/drivers/gpio/gpio-rtd.c b/drivers/gpio/gpio-rtd.c index 25bbd749b019..d46b40dd5283 100644 --- a/drivers/gpio/gpio-rtd.c +++ b/drivers/gpio/gpio-rtd.c @@ -565,7 +565,7 @@ static int rtd_gpio_probe(struct platform_device *pdev) data->gpio_chip.get_direction = rtd_gpio_get_direction; data->gpio_chip.direction_input = rtd_gpio_direction_input; data->gpio_chip.direction_output = rtd_gpio_direction_output; - data->gpio_chip.set_rv = rtd_gpio_set; + data->gpio_chip.set = rtd_gpio_set; data->gpio_chip.get = rtd_gpio_get; data->gpio_chip.set_config = rtd_gpio_set_config; data->gpio_chip.parent = dev; diff --git a/drivers/gpio/gpio-sa1100.c b/drivers/gpio/gpio-sa1100.c index e9d054d78ccb..7f6a62f5d1ee 100644 --- a/drivers/gpio/gpio-sa1100.c +++ b/drivers/gpio/gpio-sa1100.c @@ -99,7 +99,7 @@ static struct sa1100_gpio_chip sa1100_gpio_chip = { .get_direction = sa1100_get_direction, .direction_input = sa1100_direction_input, .direction_output = sa1100_direction_output, - .set_rv = sa1100_gpio_set, + .set = sa1100_gpio_set, .get = sa1100_gpio_get, .to_irq = sa1100_to_irq, .base = 0, diff --git a/drivers/gpio/gpio-sama5d2-piobu.c b/drivers/gpio/gpio-sama5d2-piobu.c index c31244cf5e89..5005688f6e67 100644 --- a/drivers/gpio/gpio-sama5d2-piobu.c +++ b/drivers/gpio/gpio-sama5d2-piobu.c @@ -196,7 +196,7 @@ static int sama5d2_piobu_probe(struct platform_device *pdev) piobu->chip.direction_input = sama5d2_piobu_direction_input; piobu->chip.direction_output = sama5d2_piobu_direction_output; piobu->chip.get = sama5d2_piobu_get; - piobu->chip.set_rv = sama5d2_piobu_set; + piobu->chip.set = sama5d2_piobu_set; piobu->chip.base = -1; piobu->chip.ngpio = PIOBU_NUM; piobu->chip.can_sleep = 0; diff --git a/drivers/gpio/gpio-sch.c b/drivers/gpio/gpio-sch.c index 833ffdd98d74..966d16a6d515 100644 --- a/drivers/gpio/gpio-sch.c +++ b/drivers/gpio/gpio-sch.c @@ -167,7 +167,7 @@ static const struct gpio_chip sch_gpio_chip = { .direction_input = sch_gpio_direction_in, .get = sch_gpio_get, .direction_output = sch_gpio_direction_out, - .set_rv = sch_gpio_set, + .set = sch_gpio_set, .get_direction = sch_gpio_get_direction, }; diff --git a/drivers/gpio/gpio-sch311x.c b/drivers/gpio/gpio-sch311x.c index 44fb5fc21fb8..f95566998d30 100644 --- a/drivers/gpio/gpio-sch311x.c +++ b/drivers/gpio/gpio-sch311x.c @@ -297,7 +297,7 @@ static int sch311x_gpio_probe(struct platform_device *pdev) block->chip.get_direction = sch311x_gpio_get_direction; block->chip.set_config = sch311x_gpio_set_config; block->chip.get = sch311x_gpio_get; - block->chip.set_rv = sch311x_gpio_set; + block->chip.set = sch311x_gpio_set; block->chip.ngpio = 8; block->chip.parent = &pdev->dev; block->chip.base = sch311x_gpio_blocks[i].base; diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index 9503296422fd..050092583f79 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -486,9 +486,9 @@ static int gpio_sim_add_bank(struct fwnode_handle *swnode, struct device *dev) gc->parent = dev; gc->fwnode = swnode; gc->get = gpio_sim_get; - gc->set_rv = gpio_sim_set; + gc->set = gpio_sim_set; gc->get_multiple = gpio_sim_get_multiple; - gc->set_multiple_rv = gpio_sim_set_multiple; + gc->set_multiple = gpio_sim_set_multiple; gc->direction_output = gpio_sim_direction_output; gc->direction_input = gpio_sim_direction_input; gc->get_direction = gpio_sim_get_direction; diff --git a/drivers/gpio/gpio-siox.c b/drivers/gpio/gpio-siox.c index 95355dda621b..958034b9f3f3 100644 --- a/drivers/gpio/gpio-siox.c +++ b/drivers/gpio/gpio-siox.c @@ -237,7 +237,7 @@ static int gpio_siox_probe(struct siox_device *sdevice) gc->parent = dev; gc->owner = THIS_MODULE; gc->get = gpio_siox_get; - gc->set_rv = gpio_siox_set; + gc->set = gpio_siox_set; gc->direction_input = gpio_siox_direction_input; gc->direction_output = gpio_siox_direction_output; gc->get_direction = gpio_siox_get_direction; diff --git a/drivers/gpio/gpio-spear-spics.c b/drivers/gpio/gpio-spear-spics.c index 55f0e8afa291..96a0e1211500 100644 --- a/drivers/gpio/gpio-spear-spics.c +++ b/drivers/gpio/gpio-spear-spics.c @@ -140,7 +140,7 @@ static int spics_gpio_probe(struct platform_device *pdev) spics->chip.request = spics_request; spics->chip.free = spics_free; spics->chip.direction_output = spics_direction_output; - spics->chip.set_rv = spics_set_value; + spics->chip.set = spics_set_value; spics->chip.label = dev_name(&pdev->dev); spics->chip.parent = &pdev->dev; spics->chip.owner = THIS_MODULE; diff --git a/drivers/gpio/gpio-sprd.c b/drivers/gpio/gpio-sprd.c index bbd5bf51c088..413bcd0a4240 100644 --- a/drivers/gpio/gpio-sprd.c +++ b/drivers/gpio/gpio-sprd.c @@ -245,7 +245,7 @@ static int sprd_gpio_probe(struct platform_device *pdev) sprd_gpio->chip.request = sprd_gpio_request; sprd_gpio->chip.free = sprd_gpio_free; sprd_gpio->chip.get = sprd_gpio_get; - sprd_gpio->chip.set_rv = sprd_gpio_set; + sprd_gpio->chip.set = sprd_gpio_set; sprd_gpio->chip.direction_input = sprd_gpio_direction_input; sprd_gpio->chip.direction_output = sprd_gpio_direction_output; diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c index 0a270156e0be..5dd4c21a8e60 100644 --- a/drivers/gpio/gpio-stmpe.c +++ b/drivers/gpio/gpio-stmpe.c @@ -136,7 +136,7 @@ static const struct gpio_chip template_chip = { .direction_input = stmpe_gpio_direction_input, .get = stmpe_gpio_get, .direction_output = stmpe_gpio_direction_output, - .set_rv = stmpe_gpio_set, + .set = stmpe_gpio_set, .request = stmpe_gpio_request, .can_sleep = true, }; diff --git a/drivers/gpio/gpio-stp-xway.c b/drivers/gpio/gpio-stp-xway.c index fdda8de6ca36..493c027afdd6 100644 --- a/drivers/gpio/gpio-stp-xway.c +++ b/drivers/gpio/gpio-stp-xway.c @@ -249,7 +249,7 @@ static int xway_stp_probe(struct platform_device *pdev) chip->gc.label = "stp-xway"; chip->gc.direction_output = xway_stp_dir_out; chip->gc.get = xway_stp_get; - chip->gc.set_rv = xway_stp_set; + chip->gc.set = xway_stp_set; chip->gc.request = xway_stp_request; chip->gc.base = -1; chip->gc.owner = THIS_MODULE; diff --git a/drivers/gpio/gpio-syscon.c b/drivers/gpio/gpio-syscon.c index f86f78655c24..40064d4cf47f 100644 --- a/drivers/gpio/gpio-syscon.c +++ b/drivers/gpio/gpio-syscon.c @@ -115,7 +115,7 @@ static int syscon_gpio_dir_out(struct gpio_chip *chip, unsigned offset, int val) BIT(offs % SYSCON_REG_BITS)); } - return chip->set_rv(chip, offset, val); + return chip->set(chip, offset, val); } static const struct syscon_gpio_data clps711x_mctrl_gpio = { @@ -251,7 +251,7 @@ static int syscon_gpio_probe(struct platform_device *pdev) if (priv->data->flags & GPIO_SYSCON_FEAT_IN) priv->chip.direction_input = syscon_gpio_dir_in; if (priv->data->flags & GPIO_SYSCON_FEAT_OUT) { - priv->chip.set_rv = priv->data->set ? : syscon_gpio_set; + priv->chip.set = priv->data->set ? : syscon_gpio_set; priv->chip.direction_output = syscon_gpio_dir_out; } diff --git a/drivers/gpio/gpio-tangier.c b/drivers/gpio/gpio-tangier.c index ce17b98e0623..ba5a8ede8912 100644 --- a/drivers/gpio/gpio-tangier.c +++ b/drivers/gpio/gpio-tangier.c @@ -430,7 +430,7 @@ int devm_tng_gpio_probe(struct device *dev, struct tng_gpio *gpio) gpio->chip.direction_input = tng_gpio_direction_input; gpio->chip.direction_output = tng_gpio_direction_output; gpio->chip.get = tng_gpio_get; - gpio->chip.set_rv = tng_gpio_set; + gpio->chip.set = tng_gpio_set; gpio->chip.get_direction = tng_gpio_get_direction; gpio->chip.set_config = tng_gpio_set_config; gpio->chip.base = info->base; diff --git a/drivers/gpio/gpio-tc3589x.c b/drivers/gpio/gpio-tc3589x.c index 0bd32809fd68..90d048f9da08 100644 --- a/drivers/gpio/gpio-tc3589x.c +++ b/drivers/gpio/gpio-tc3589x.c @@ -149,7 +149,7 @@ static const struct gpio_chip template_chip = { .label = "tc3589x", .owner = THIS_MODULE, .get = tc3589x_gpio_get, - .set_rv = tc3589x_gpio_set, + .set = tc3589x_gpio_set, .direction_output = tc3589x_gpio_direction_output, .direction_input = tc3589x_gpio_direction_input, .get_direction = tc3589x_gpio_get_direction, diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index 126fd12550aa..15a5762a82c2 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -720,7 +720,7 @@ static int tegra_gpio_probe(struct platform_device *pdev) tgi->gc.direction_input = tegra_gpio_direction_input; tgi->gc.get = tegra_gpio_get; tgi->gc.direction_output = tegra_gpio_direction_output; - tgi->gc.set_rv = tegra_gpio_set; + tgi->gc.set = tegra_gpio_set; tgi->gc.get_direction = tegra_gpio_get_direction; tgi->gc.base = 0; tgi->gc.ngpio = tgi->bank_count * 32; diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c index f902da15c419..5fd3ec3e2c53 100644 --- a/drivers/gpio/gpio-tegra186.c +++ b/drivers/gpio/gpio-tegra186.c @@ -891,7 +891,7 @@ static int tegra186_gpio_probe(struct platform_device *pdev) gpio->gpio.direction_input = tegra186_gpio_direction_input; gpio->gpio.direction_output = tegra186_gpio_direction_output; gpio->gpio.get = tegra186_gpio_get; - gpio->gpio.set_rv = tegra186_gpio_set; + gpio->gpio.set = tegra186_gpio_set; gpio->gpio.set_config = tegra186_gpio_set_config; gpio->gpio.add_pin_ranges = tegra186_gpio_add_pin_ranges; gpio->gpio.init_valid_mask = tegra186_init_valid_mask; diff --git a/drivers/gpio/gpio-thunderx.c b/drivers/gpio/gpio-thunderx.c index eb6a1f0279c0..be96853063ba 100644 --- a/drivers/gpio/gpio-thunderx.c +++ b/drivers/gpio/gpio-thunderx.c @@ -533,8 +533,8 @@ static int thunderx_gpio_probe(struct pci_dev *pdev, chip->direction_input = thunderx_gpio_dir_in; chip->get = thunderx_gpio_get; chip->direction_output = thunderx_gpio_dir_out; - chip->set_rv = thunderx_gpio_set; - chip->set_multiple_rv = thunderx_gpio_set_multiple; + chip->set = thunderx_gpio_set; + chip->set_multiple = thunderx_gpio_set_multiple; chip->set_config = thunderx_gpio_set_config; girq = &chip->irq; gpio_irq_chip_set_chip(girq, &thunderx_gpio_irq_chip); diff --git a/drivers/gpio/gpio-timberdale.c b/drivers/gpio/gpio-timberdale.c index fbb883089189..679e27f00ff6 100644 --- a/drivers/gpio/gpio-timberdale.c +++ b/drivers/gpio/gpio-timberdale.c @@ -253,7 +253,7 @@ static int timbgpio_probe(struct platform_device *pdev) gc->direction_input = timbgpio_gpio_direction_input; gc->get = timbgpio_gpio_get; gc->direction_output = timbgpio_gpio_direction_output; - gc->set_rv = timbgpio_gpio_set; + gc->set = timbgpio_gpio_set; gc->to_irq = (irq >= 0 && tgpio->irq_base > 0) ? timbgpio_to_irq : NULL; gc->dbg_show = NULL; gc->base = pdata->gpio_base; diff --git a/drivers/gpio/gpio-tpic2810.c b/drivers/gpio/gpio-tpic2810.c index d5b8568ab061..866ff2d436d5 100644 --- a/drivers/gpio/gpio-tpic2810.c +++ b/drivers/gpio/gpio-tpic2810.c @@ -80,8 +80,8 @@ static const struct gpio_chip template_chip = { .owner = THIS_MODULE, .get_direction = tpic2810_get_direction, .direction_output = tpic2810_direction_output, - .set_rv = tpic2810_set, - .set_multiple_rv = tpic2810_set_multiple, + .set = tpic2810_set, + .set_multiple = tpic2810_set_multiple, .base = -1, .ngpio = 8, .can_sleep = true, diff --git a/drivers/gpio/gpio-tps65086.c b/drivers/gpio/gpio-tps65086.c index 08fa061b73ef..84b17b83476f 100644 --- a/drivers/gpio/gpio-tps65086.c +++ b/drivers/gpio/gpio-tps65086.c @@ -69,7 +69,7 @@ static const struct gpio_chip template_chip = { .direction_input = tps65086_gpio_direction_input, .direction_output = tps65086_gpio_direction_output, .get = tps65086_gpio_get, - .set_rv = tps65086_gpio_set, + .set = tps65086_gpio_set, .base = -1, .ngpio = 4, .can_sleep = true, diff --git a/drivers/gpio/gpio-tps65218.c b/drivers/gpio/gpio-tps65218.c index 49cd7754ed05..3b4c41f5ef55 100644 --- a/drivers/gpio/gpio-tps65218.c +++ b/drivers/gpio/gpio-tps65218.c @@ -169,7 +169,7 @@ static const struct gpio_chip template_chip = { .request = tps65218_gpio_request, .direction_output = tps65218_gpio_output, .get = tps65218_gpio_get, - .set_rv = tps65218_gpio_set, + .set = tps65218_gpio_set, .set_config = tps65218_gpio_set_config, .can_sleep = true, .ngpio = 3, diff --git a/drivers/gpio/gpio-tps65219.c b/drivers/gpio/gpio-tps65219.c index c0177088c54c..158f63bcf10c 100644 --- a/drivers/gpio/gpio-tps65219.c +++ b/drivers/gpio/gpio-tps65219.c @@ -203,7 +203,7 @@ static const struct gpio_chip tps65214_template_chip = { .direction_input = tps65219_gpio_direction_input, .direction_output = tps65219_gpio_direction_output, .get = tps65219_gpio_get, - .set_rv = tps65219_gpio_set, + .set = tps65219_gpio_set, .base = -1, .ngpio = 2, .can_sleep = true, @@ -216,7 +216,7 @@ static const struct gpio_chip tps65219_template_chip = { .direction_input = tps65219_gpio_direction_input, .direction_output = tps65219_gpio_direction_output, .get = tps65219_gpio_get, - .set_rv = tps65219_gpio_set, + .set = tps65219_gpio_set, .base = -1, .ngpio = 3, .can_sleep = true, diff --git a/drivers/gpio/gpio-tps6586x.c b/drivers/gpio/gpio-tps6586x.c index f1ced092f38a..aaacbb54bf5d 100644 --- a/drivers/gpio/gpio-tps6586x.c +++ b/drivers/gpio/gpio-tps6586x.c @@ -98,7 +98,7 @@ static int tps6586x_gpio_probe(struct platform_device *pdev) /* FIXME: add handling of GPIOs as dedicated inputs */ tps6586x_gpio->gpio_chip.direction_output = tps6586x_gpio_output; - tps6586x_gpio->gpio_chip.set_rv = tps6586x_gpio_set; + tps6586x_gpio->gpio_chip.set = tps6586x_gpio_set; tps6586x_gpio->gpio_chip.get = tps6586x_gpio_get; tps6586x_gpio->gpio_chip.to_irq = tps6586x_gpio_to_irq; diff --git a/drivers/gpio/gpio-tps65910.c b/drivers/gpio/gpio-tps65910.c index 3204f55394cf..25e9f41efe78 100644 --- a/drivers/gpio/gpio-tps65910.c +++ b/drivers/gpio/gpio-tps65910.c @@ -139,7 +139,7 @@ static int tps65910_gpio_probe(struct platform_device *pdev) tps65910_gpio->gpio_chip.can_sleep = true; tps65910_gpio->gpio_chip.direction_input = tps65910_gpio_input; tps65910_gpio->gpio_chip.direction_output = tps65910_gpio_output; - tps65910_gpio->gpio_chip.set_rv = tps65910_gpio_set; + tps65910_gpio->gpio_chip.set = tps65910_gpio_set; tps65910_gpio->gpio_chip.get = tps65910_gpio_get; tps65910_gpio->gpio_chip.parent = &pdev->dev; diff --git a/drivers/gpio/gpio-tps65912.c b/drivers/gpio/gpio-tps65912.c index d586ccfbfc56..7a2c5685c2fd 100644 --- a/drivers/gpio/gpio-tps65912.c +++ b/drivers/gpio/gpio-tps65912.c @@ -92,7 +92,7 @@ static const struct gpio_chip template_chip = { .direction_input = tps65912_gpio_direction_input, .direction_output = tps65912_gpio_direction_output, .get = tps65912_gpio_get, - .set_rv = tps65912_gpio_set, + .set = tps65912_gpio_set, .base = -1, .ngpio = 5, .can_sleep = true, diff --git a/drivers/gpio/gpio-tps68470.c b/drivers/gpio/gpio-tps68470.c index 3b8805c854f7..d4fbdf90e190 100644 --- a/drivers/gpio/gpio-tps68470.c +++ b/drivers/gpio/gpio-tps68470.c @@ -142,7 +142,7 @@ static int tps68470_gpio_probe(struct platform_device *pdev) tps68470_gpio->gc.direction_output = tps68470_gpio_output; tps68470_gpio->gc.get = tps68470_gpio_get; tps68470_gpio->gc.get_direction = tps68470_gpio_get_direction; - tps68470_gpio->gc.set_rv = tps68470_gpio_set; + tps68470_gpio->gc.set = tps68470_gpio_set; tps68470_gpio->gc.can_sleep = true; tps68470_gpio->gc.names = tps68470_names; tps68470_gpio->gc.ngpio = TPS68470_N_GPIO; diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c index 056799ecce6a..27dd09273292 100644 --- a/drivers/gpio/gpio-tqmx86.c +++ b/drivers/gpio/gpio-tqmx86.c @@ -370,7 +370,7 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) chip->direction_output = tqmx86_gpio_direction_output; chip->get_direction = tqmx86_gpio_get_direction; chip->get = tqmx86_gpio_get; - chip->set_rv = tqmx86_gpio_set; + chip->set = tqmx86_gpio_set; chip->ngpio = TQMX86_NGPIO; chip->parent = pdev->dev.parent; diff --git a/drivers/gpio/gpio-ts4900.c b/drivers/gpio/gpio-ts4900.c index 35dd2d09b4d4..d9ee8fc77ccd 100644 --- a/drivers/gpio/gpio-ts4900.c +++ b/drivers/gpio/gpio-ts4900.c @@ -119,7 +119,7 @@ static const struct gpio_chip template_chip = { .direction_input = ts4900_gpio_direction_input, .direction_output = ts4900_gpio_direction_output, .get = ts4900_gpio_get, - .set_rv = ts4900_gpio_set, + .set = ts4900_gpio_set, .base = -1, .can_sleep = true, }; diff --git a/drivers/gpio/gpio-ts5500.c b/drivers/gpio/gpio-ts5500.c index bb432ed73698..3c7f2efe10fd 100644 --- a/drivers/gpio/gpio-ts5500.c +++ b/drivers/gpio/gpio-ts5500.c @@ -340,7 +340,7 @@ static int ts5500_dio_probe(struct platform_device *pdev) priv->gpio_chip.direction_input = ts5500_gpio_input; priv->gpio_chip.direction_output = ts5500_gpio_output; priv->gpio_chip.get = ts5500_gpio_get; - priv->gpio_chip.set_rv = ts5500_gpio_set; + priv->gpio_chip.set = ts5500_gpio_set; priv->gpio_chip.to_irq = ts5500_gpio_to_irq; priv->gpio_chip.base = -1; diff --git a/drivers/gpio/gpio-twl4030.c b/drivers/gpio/gpio-twl4030.c index e39e39e3ef85..a33dc7c7e7a0 100644 --- a/drivers/gpio/gpio-twl4030.c +++ b/drivers/gpio/gpio-twl4030.c @@ -419,7 +419,7 @@ static const struct gpio_chip template_chip = { .direction_output = twl_direction_out, .get_direction = twl_get_direction, .get = twl_get, - .set_rv = twl_set, + .set = twl_set, .to_irq = twl_to_irq, .can_sleep = true, }; diff --git a/drivers/gpio/gpio-twl6040.c b/drivers/gpio/gpio-twl6040.c index b2196b62b528..4ec9bcd40439 100644 --- a/drivers/gpio/gpio-twl6040.c +++ b/drivers/gpio/gpio-twl6040.c @@ -69,7 +69,7 @@ static struct gpio_chip twl6040gpo_chip = { .get = twl6040gpo_get, .direction_output = twl6040gpo_direction_out, .get_direction = twl6040gpo_get_direction, - .set_rv = twl6040gpo_set, + .set = twl6040gpo_set, .can_sleep = true, }; diff --git a/drivers/gpio/gpio-uniphier.c b/drivers/gpio/gpio-uniphier.c index 8939556f42b6..197bb1d22b3c 100644 --- a/drivers/gpio/gpio-uniphier.c +++ b/drivers/gpio/gpio-uniphier.c @@ -386,8 +386,8 @@ static int uniphier_gpio_probe(struct platform_device *pdev) chip->direction_input = uniphier_gpio_direction_input; chip->direction_output = uniphier_gpio_direction_output; chip->get = uniphier_gpio_get; - chip->set_rv = uniphier_gpio_set; - chip->set_multiple_rv = uniphier_gpio_set_multiple; + chip->set = uniphier_gpio_set; + chip->set_multiple = uniphier_gpio_set_multiple; chip->to_irq = uniphier_gpio_to_irq; chip->base = -1; chip->ngpio = ngpios; diff --git a/drivers/gpio/gpio-viperboard.c b/drivers/gpio/gpio-viperboard.c index e8e906b54d51..15e495c109d2 100644 --- a/drivers/gpio/gpio-viperboard.c +++ b/drivers/gpio/gpio-viperboard.c @@ -408,7 +408,7 @@ static int vprbrd_gpio_probe(struct platform_device *pdev) vb_gpio->gpioa.base = -1; vb_gpio->gpioa.ngpio = 16; vb_gpio->gpioa.can_sleep = true; - vb_gpio->gpioa.set_rv = vprbrd_gpioa_set; + vb_gpio->gpioa.set = vprbrd_gpioa_set; vb_gpio->gpioa.get = vprbrd_gpioa_get; vb_gpio->gpioa.direction_input = vprbrd_gpioa_direction_input; vb_gpio->gpioa.direction_output = vprbrd_gpioa_direction_output; @@ -424,7 +424,7 @@ static int vprbrd_gpio_probe(struct platform_device *pdev) vb_gpio->gpiob.base = -1; vb_gpio->gpiob.ngpio = 16; vb_gpio->gpiob.can_sleep = true; - vb_gpio->gpiob.set_rv = vprbrd_gpiob_set; + vb_gpio->gpiob.set = vprbrd_gpiob_set; vb_gpio->gpiob.get = vprbrd_gpiob_get; vb_gpio->gpiob.direction_input = vprbrd_gpiob_direction_input; vb_gpio->gpiob.direction_output = vprbrd_gpiob_direction_output; diff --git a/drivers/gpio/gpio-virtio.c b/drivers/gpio/gpio-virtio.c index 07552611da98..17e040991e46 100644 --- a/drivers/gpio/gpio-virtio.c +++ b/drivers/gpio/gpio-virtio.c @@ -567,7 +567,7 @@ static int virtio_gpio_probe(struct virtio_device *vdev) vgpio->gc.direction_input = virtio_gpio_direction_input; vgpio->gc.direction_output = virtio_gpio_direction_output; vgpio->gc.get = virtio_gpio_get; - vgpio->gc.set_rv = virtio_gpio_set; + vgpio->gc.set = virtio_gpio_set; vgpio->gc.ngpio = ngpio; vgpio->gc.base = -1; /* Allocate base dynamically */ vgpio->gc.label = dev_name(dev); diff --git a/drivers/gpio/gpio-vx855.c b/drivers/gpio/gpio-vx855.c index a3bceac7854c..84b3a973a503 100644 --- a/drivers/gpio/gpio-vx855.c +++ b/drivers/gpio/gpio-vx855.c @@ -216,7 +216,7 @@ static void vx855gpio_gpio_setup(struct vx855_gpio *vg) c->direction_input = vx855gpio_direction_input; c->direction_output = vx855gpio_direction_output; c->get = vx855gpio_get; - c->set_rv = vx855gpio_set; + c->set = vx855gpio_set; c->set_config = vx855gpio_set_config; c->dbg_show = NULL; c->base = 0; diff --git a/drivers/gpio/gpio-wcd934x.c b/drivers/gpio/gpio-wcd934x.c index c89da9a22016..4af504c23e6f 100644 --- a/drivers/gpio/gpio-wcd934x.c +++ b/drivers/gpio/gpio-wcd934x.c @@ -98,7 +98,7 @@ static int wcd_gpio_probe(struct platform_device *pdev) chip->direction_output = wcd_gpio_direction_output; chip->get_direction = wcd_gpio_get_direction; chip->get = wcd_gpio_get; - chip->set_rv = wcd_gpio_set; + chip->set = wcd_gpio_set; chip->parent = dev; chip->base = -1; chip->ngpio = WCD934X_NPINS; diff --git a/drivers/gpio/gpio-wcove.c b/drivers/gpio/gpio-wcove.c index f7df3d5fc71c..4a5e20e936a9 100644 --- a/drivers/gpio/gpio-wcove.c +++ b/drivers/gpio/gpio-wcove.c @@ -439,7 +439,7 @@ static int wcove_gpio_probe(struct platform_device *pdev) wg->chip.direction_output = wcove_gpio_dir_out; wg->chip.get_direction = wcove_gpio_get_direction; wg->chip.get = wcove_gpio_get; - wg->chip.set_rv = wcove_gpio_set; + wg->chip.set = wcove_gpio_set; wg->chip.set_config = wcove_gpio_set_config; wg->chip.base = -1; wg->chip.ngpio = WCOVE_VGPIO_NUM; diff --git a/drivers/gpio/gpio-winbond.c b/drivers/gpio/gpio-winbond.c index 421655b5d4c2..dcfda738fd69 100644 --- a/drivers/gpio/gpio-winbond.c +++ b/drivers/gpio/gpio-winbond.c @@ -494,7 +494,7 @@ static struct gpio_chip winbond_gpio_chip = { .can_sleep = true, .get = winbond_gpio_get, .direction_input = winbond_gpio_direction_in, - .set_rv = winbond_gpio_set, + .set = winbond_gpio_set, .direction_output = winbond_gpio_direction_out, }; diff --git a/drivers/gpio/gpio-wm831x.c b/drivers/gpio/gpio-wm831x.c index ab58aa7c0b99..f03c0e808fab 100644 --- a/drivers/gpio/gpio-wm831x.c +++ b/drivers/gpio/gpio-wm831x.c @@ -253,7 +253,7 @@ static const struct gpio_chip template_chip = { .direction_input = wm831x_gpio_direction_in, .get = wm831x_gpio_get, .direction_output = wm831x_gpio_direction_out, - .set_rv = wm831x_gpio_set, + .set = wm831x_gpio_set, .to_irq = wm831x_gpio_to_irq, .set_config = wm831x_set_config, .dbg_show = wm831x_gpio_dbg_show, diff --git a/drivers/gpio/gpio-wm8350.c b/drivers/gpio/gpio-wm8350.c index 9a7677f841fc..46923b23a72e 100644 --- a/drivers/gpio/gpio-wm8350.c +++ b/drivers/gpio/gpio-wm8350.c @@ -93,7 +93,7 @@ static const struct gpio_chip template_chip = { .direction_input = wm8350_gpio_direction_in, .get = wm8350_gpio_get, .direction_output = wm8350_gpio_direction_out, - .set_rv = wm8350_gpio_set, + .set = wm8350_gpio_set, .to_irq = wm8350_gpio_to_irq, .can_sleep = true, }; diff --git a/drivers/gpio/gpio-wm8994.c b/drivers/gpio/gpio-wm8994.c index ccc005628dd2..df47a27f508d 100644 --- a/drivers/gpio/gpio-wm8994.c +++ b/drivers/gpio/gpio-wm8994.c @@ -256,7 +256,7 @@ static const struct gpio_chip template_chip = { .direction_input = wm8994_gpio_direction_in, .get = wm8994_gpio_get, .direction_output = wm8994_gpio_direction_out, - .set_rv = wm8994_gpio_set, + .set = wm8994_gpio_set, .set_config = wm8994_gpio_set_config, .to_irq = wm8994_gpio_to_irq, .dbg_show = wm8994_gpio_dbg_show, diff --git a/drivers/gpio/gpio-xgene.c b/drivers/gpio/gpio-xgene.c index 28f794e5eb26..4f627de3f56c 100644 --- a/drivers/gpio/gpio-xgene.c +++ b/drivers/gpio/gpio-xgene.c @@ -178,7 +178,7 @@ static int xgene_gpio_probe(struct platform_device *pdev) gpio->chip.direction_input = xgene_gpio_dir_in; gpio->chip.direction_output = xgene_gpio_dir_out; gpio->chip.get = xgene_gpio_get; - gpio->chip.set_rv = xgene_gpio_set; + gpio->chip.set = xgene_gpio_set; gpio->chip.label = dev_name(&pdev->dev); gpio->chip.base = -1; diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c index 36d91cacc2d9..83675ac81077 100644 --- a/drivers/gpio/gpio-xilinx.c +++ b/drivers/gpio/gpio-xilinx.c @@ -604,10 +604,10 @@ static int xgpio_probe(struct platform_device *pdev) chip->gc.direction_input = xgpio_dir_in; chip->gc.direction_output = xgpio_dir_out; chip->gc.get = xgpio_get; - chip->gc.set_rv = xgpio_set; + chip->gc.set = xgpio_set; chip->gc.request = xgpio_request; chip->gc.free = xgpio_free; - chip->gc.set_multiple_rv = xgpio_set_multiple; + chip->gc.set_multiple = xgpio_set_multiple; chip->gc.label = dev_name(dev); diff --git a/drivers/gpio/gpio-xlp.c b/drivers/gpio/gpio-xlp.c index bcd2dfec462d..aede6324387f 100644 --- a/drivers/gpio/gpio-xlp.c +++ b/drivers/gpio/gpio-xlp.c @@ -274,7 +274,7 @@ static int xlp_gpio_probe(struct platform_device *pdev) gc->ngpio = 70; gc->direction_output = xlp_gpio_dir_output; gc->direction_input = xlp_gpio_dir_input; - gc->set_rv = xlp_gpio_set; + gc->set = xlp_gpio_set; gc->get = xlp_gpio_get; spin_lock_init(&priv->lock); diff --git a/drivers/gpio/gpio-xra1403.c b/drivers/gpio/gpio-xra1403.c index 70402c6b5407..faadcb4b0b2d 100644 --- a/drivers/gpio/gpio-xra1403.c +++ b/drivers/gpio/gpio-xra1403.c @@ -164,7 +164,7 @@ static int xra1403_probe(struct spi_device *spi) xra->chip.direction_output = xra1403_direction_output; xra->chip.get_direction = xra1403_get_direction; xra->chip.get = xra1403_get; - xra->chip.set_rv = xra1403_set; + xra->chip.set = xra1403_set; xra->chip.dbg_show = xra1403_dbg_show; diff --git a/drivers/gpio/gpio-xtensa.c b/drivers/gpio/gpio-xtensa.c index e7ff3c60324d..4418947a10e5 100644 --- a/drivers/gpio/gpio-xtensa.c +++ b/drivers/gpio/gpio-xtensa.c @@ -132,7 +132,7 @@ static struct gpio_chip expstate_chip = { .ngpio = 32, .get_direction = xtensa_expstate_get_direction, .get = xtensa_expstate_get_value, - .set_rv = xtensa_expstate_set_value, + .set = xtensa_expstate_set_value, }; static int xtensa_gpio_probe(struct platform_device *pdev) diff --git a/drivers/gpio/gpio-zevio.c b/drivers/gpio/gpio-zevio.c index 0799f7976710..29375bea2289 100644 --- a/drivers/gpio/gpio-zevio.c +++ b/drivers/gpio/gpio-zevio.c @@ -161,7 +161,7 @@ static int zevio_gpio_to_irq(struct gpio_chip *chip, unsigned pin) static const struct gpio_chip zevio_gpio_chip = { .direction_input = zevio_gpio_direction_input, .direction_output = zevio_gpio_direction_output, - .set_rv = zevio_gpio_set, + .set = zevio_gpio_set, .get = zevio_gpio_get, .to_irq = zevio_gpio_to_irq, .base = 0, diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c index b22b4e25c68d..0ffd76e8951f 100644 --- a/drivers/gpio/gpio-zynq.c +++ b/drivers/gpio/gpio-zynq.c @@ -932,7 +932,7 @@ static int zynq_gpio_probe(struct platform_device *pdev) chip->owner = THIS_MODULE; chip->parent = &pdev->dev; chip->get = zynq_gpio_get_value; - chip->set_rv = zynq_gpio_set_value; + chip->set = zynq_gpio_set_value; chip->request = zynq_gpio_request; chip->free = zynq_gpio_free; chip->direction_input = zynq_gpio_dir_in; diff --git a/drivers/gpio/gpio-zynqmp-modepin.c b/drivers/gpio/gpio-zynqmp-modepin.c index 6dc5d7acb89c..5e651482e985 100644 --- a/drivers/gpio/gpio-zynqmp-modepin.c +++ b/drivers/gpio/gpio-zynqmp-modepin.c @@ -130,7 +130,7 @@ static int modepin_gpio_probe(struct platform_device *pdev) chip->owner = THIS_MODULE; chip->parent = &pdev->dev; chip->get = modepin_gpio_get_value; - chip->set_rv = modepin_gpio_set_value; + chip->set = modepin_gpio_set_value; chip->direction_input = modepin_gpio_dir_in; chip->direction_output = modepin_gpio_dir_out; chip->label = dev_name(&pdev->dev); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 9ac4c23d656a..0d2b470a252e 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2886,10 +2886,10 @@ static int gpiochip_set(struct gpio_chip *gc, unsigned int offset, int value) lockdep_assert_held(&gc->gpiodev->srcu); - if (WARN_ON(unlikely(!gc->set_rv))) + if (WARN_ON(unlikely(!gc->set))) return -EOPNOTSUPP; - ret = gc->set_rv(gc, offset, value); + ret = gc->set(gc, offset, value); if (ret > 0) ret = -EBADE; @@ -2909,7 +2909,7 @@ static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value) * output-only, but if there is then not even a .set() operation it * is pretty tricky to drive the output line. */ - if (!guard.gc->set_rv && !guard.gc->direction_output) { + if (!guard.gc->set && !guard.gc->direction_output) { gpiod_warn(desc, "%s: missing set() and direction_output() operations\n", __func__); @@ -3655,8 +3655,8 @@ static int gpiochip_set_multiple(struct gpio_chip *gc, lockdep_assert_held(&gc->gpiodev->srcu); - if (gc->set_multiple_rv) { - ret = gc->set_multiple_rv(gc, mask, bits); + if (gc->set_multiple) { + ret = gc->set_multiple(gc, mask, bits); if (ret > 0) ret = -EBADE; diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index e3a8c0c0c945..464390372b34 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -1836,7 +1836,7 @@ static int ti_sn_gpio_probe(struct auxiliary_device *adev, pdata->gchip.direction_input = ti_sn_bridge_gpio_direction_input; pdata->gchip.direction_output = ti_sn_bridge_gpio_direction_output; pdata->gchip.get = ti_sn_bridge_gpio_get; - pdata->gchip.set_rv = ti_sn_bridge_gpio_set; + pdata->gchip.set = ti_sn_bridge_gpio_set; pdata->gchip.can_sleep = true; pdata->gchip.names = ti_sn_bridge_gpio_names; pdata->gchip.ngpio = SN_NUM_GPIOS; diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 234fa82eab07..482f62a78c41 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -1288,7 +1288,7 @@ static int cp2112_probe(struct hid_device *hdev, const struct hid_device_id *id) dev->gc.label = "cp2112_gpio"; dev->gc.direction_input = cp2112_gpio_direction_input; dev->gc.direction_output = cp2112_gpio_direction_output; - dev->gc.set_rv = cp2112_gpio_set; + dev->gc.set = cp2112_gpio_set; dev->gc.get = cp2112_gpio_get; dev->gc.base = -1; dev->gc.ngpio = CP2112_GPIO_MAX_GPIO; diff --git a/drivers/hid/hid-mcp2200.c b/drivers/hid/hid-mcp2200.c index e6ea0a2140eb..dafdd5b4a079 100644 --- a/drivers/hid/hid-mcp2200.c +++ b/drivers/hid/hid-mcp2200.c @@ -279,8 +279,8 @@ static const struct gpio_chip template_chip = { .get_direction = mcp_get_direction, .direction_input = mcp_direction_input, .direction_output = mcp_direction_output, - .set_rv = mcp_set, - .set_multiple_rv = mcp_set_multiple, + .set = mcp_set, + .set_multiple = mcp_set_multiple, .get = mcp_get, .get_multiple = mcp_get_multiple, .base = -1, diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c index fcfe9370a887..475ac352df30 100644 --- a/drivers/hid/hid-mcp2221.c +++ b/drivers/hid/hid-mcp2221.c @@ -1298,7 +1298,7 @@ static int mcp2221_probe(struct hid_device *hdev, mcp->gc->direction_input = mcp_gpio_direction_input; mcp->gc->direction_output = mcp_gpio_direction_output; mcp->gc->get_direction = mcp_gpio_get_direction; - mcp->gc->set_rv = mcp_gpio_set; + mcp->gc->set = mcp_gpio_set; mcp->gc->get = mcp_gpio_get; mcp->gc->ngpio = MCP_NGPIO; mcp->gc->base = -1; diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c index a07e2eb93c71..1fcd320d6161 100644 --- a/drivers/hwmon/ltc2992.c +++ b/drivers/hwmon/ltc2992.c @@ -339,8 +339,8 @@ static int ltc2992_config_gpio(struct ltc2992_state *st) st->gc.ngpio = ARRAY_SIZE(st->gpio_names); st->gc.get = ltc2992_gpio_get; st->gc.get_multiple = ltc2992_gpio_get_multiple; - st->gc.set_rv = ltc2992_gpio_set; - st->gc.set_multiple_rv = ltc2992_gpio_set_multiple; + st->gc.set = ltc2992_gpio_set; + st->gc.set_multiple = ltc2992_gpio_set_multiple; ret = devm_gpiochip_add_data(&st->client->dev, &st->gc, st); if (ret) diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c index 52d4000902d5..55e7af3a5f98 100644 --- a/drivers/hwmon/pmbus/ucd9000.c +++ b/drivers/hwmon/pmbus/ucd9000.c @@ -364,7 +364,7 @@ static void ucd9000_probe_gpio(struct i2c_client *client, data->gpio.direction_input = ucd9000_gpio_direction_input; data->gpio.direction_output = ucd9000_gpio_direction_output; data->gpio.get = ucd9000_gpio_get; - data->gpio.set_rv = ucd9000_gpio_set; + data->gpio.set = ucd9000_gpio_set; data->gpio.can_sleep = true; data->gpio.base = -1; data->gpio.parent = &client->dev; diff --git a/drivers/i2c/muxes/i2c-mux-ltc4306.c b/drivers/i2c/muxes/i2c-mux-ltc4306.c index c688af270a11..50fbc0d06e62 100644 --- a/drivers/i2c/muxes/i2c-mux-ltc4306.c +++ b/drivers/i2c/muxes/i2c-mux-ltc4306.c @@ -164,7 +164,7 @@ static int ltc4306_gpio_init(struct ltc4306 *data) data->gpiochip.direction_input = ltc4306_gpio_direction_input; data->gpiochip.direction_output = ltc4306_gpio_direction_output; data->gpiochip.get = ltc4306_gpio_get; - data->gpiochip.set_rv = ltc4306_gpio_set; + data->gpiochip.set = ltc4306_gpio_set; data->gpiochip.set_config = ltc4306_gpio_set_config; data->gpiochip.owner = THIS_MODULE; diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c index 6cf790ff3eb5..dcdb5778f7d6 100644 --- a/drivers/iio/adc/ad4130.c +++ b/drivers/iio/adc/ad4130.c @@ -2064,7 +2064,7 @@ static int ad4130_probe(struct spi_device *spi) st->gc.can_sleep = true; st->gc.init_valid_mask = ad4130_gpio_init_valid_mask; st->gc.get_direction = ad4130_gpio_get_direction; - st->gc.set_rv = ad4130_gpio_set; + st->gc.set = ad4130_gpio_set; ret = devm_gpiochip_add_data(dev, &st->gc, st); if (ret) diff --git a/drivers/iio/adc/ad4170-4.c b/drivers/iio/adc/ad4170-4.c index 6cd84d6fb08b..efaed92191f1 100644 --- a/drivers/iio/adc/ad4170-4.c +++ b/drivers/iio/adc/ad4170-4.c @@ -1807,7 +1807,7 @@ static int ad4170_gpio_init(struct iio_dev *indio_dev) st->gpiochip.direction_input = ad4170_gpio_direction_input; st->gpiochip.direction_output = ad4170_gpio_direction_output; st->gpiochip.get = ad4170_gpio_get; - st->gpiochip.set_rv = ad4170_gpio_set; + st->gpiochip.set = ad4170_gpio_set; st->gpiochip.owner = THIS_MODULE; return devm_gpiochip_add_data(&st->spi->dev, &st->gpiochip, indio_dev); diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index a2e061f0cb08..ca8fa91796ca 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c @@ -673,7 +673,7 @@ static int ad7768_gpio_init(struct iio_dev *indio_dev) .direction_input = ad7768_gpio_direction_input, .direction_output = ad7768_gpio_direction_output, .get = ad7768_gpio_get, - .set_rv = ad7768_gpio_set, + .set = ad7768_gpio_set, .owner = THIS_MODULE, }; diff --git a/drivers/iio/adc/rohm-bd79124.c b/drivers/iio/adc/rohm-bd79124.c index bb7c93ae4055..06c55c8da93f 100644 --- a/drivers/iio/adc/rohm-bd79124.c +++ b/drivers/iio/adc/rohm-bd79124.c @@ -246,8 +246,8 @@ static int bd79124_init_valid_mask(struct gpio_chip *gc, static const struct gpio_chip bd79124gpo_chip = { .label = "bd79124-gpo", .get_direction = bd79124gpo_direction_get, - .set_rv = bd79124gpo_set, - .set_multiple_rv = bd79124gpo_set_multiple, + .set = bd79124gpo_set, + .set_multiple = bd79124gpo_set_multiple, .init_valid_mask = bd79124_init_valid_mask, .can_sleep = true, .ngpio = 8, diff --git a/drivers/iio/adc/ti-ads7950.c b/drivers/iio/adc/ti-ads7950.c index 0356ccf23fea..bbe1ce577789 100644 --- a/drivers/iio/adc/ti-ads7950.c +++ b/drivers/iio/adc/ti-ads7950.c @@ -648,7 +648,7 @@ static int ti_ads7950_probe(struct spi_device *spi) st->chip.direction_input = ti_ads7950_direction_input; st->chip.direction_output = ti_ads7950_direction_output; st->chip.get = ti_ads7950_get; - st->chip.set_rv = ti_ads7950_set; + st->chip.set = ti_ads7950_set; ret = gpiochip_add_data(&st->chip, st); if (ret) { diff --git a/drivers/iio/addac/ad74115.c b/drivers/iio/addac/ad74115.c index 4d8b64048e4f..f8b04d86b01f 100644 --- a/drivers/iio/addac/ad74115.c +++ b/drivers/iio/addac/ad74115.c @@ -1577,7 +1577,7 @@ static int ad74115_setup_gpio_chip(struct ad74115_state *st) .direction_input = ad74115_gpio_direction_input, .direction_output = ad74115_gpio_direction_output, .get = ad74115_gpio_get, - .set_rv = ad74115_gpio_set, + .set = ad74115_gpio_set, }; return devm_gpiochip_add_data(dev, &st->gc, st); diff --git a/drivers/iio/addac/ad74413r.c b/drivers/iio/addac/ad74413r.c index a0bb1dbcb7ad..a20b4d48c5f7 100644 --- a/drivers/iio/addac/ad74413r.c +++ b/drivers/iio/addac/ad74413r.c @@ -1425,8 +1425,8 @@ static int ad74413r_probe(struct spi_device *spi) st->gpo_gpiochip.ngpio = st->num_gpo_gpios; st->gpo_gpiochip.parent = st->dev; st->gpo_gpiochip.can_sleep = true; - st->gpo_gpiochip.set_rv = ad74413r_gpio_set; - st->gpo_gpiochip.set_multiple_rv = ad74413r_gpio_set_multiple; + st->gpo_gpiochip.set = ad74413r_gpio_set; + st->gpo_gpiochip.set_multiple = ad74413r_gpio_set_multiple; st->gpo_gpiochip.set_config = ad74413r_gpio_set_gpo_config; st->gpo_gpiochip.get_direction = ad74413r_gpio_get_gpo_direction; diff --git a/drivers/iio/dac/ad5592r-base.c b/drivers/iio/dac/ad5592r-base.c index 5f2cd51723f6..4720733d66b2 100644 --- a/drivers/iio/dac/ad5592r-base.c +++ b/drivers/iio/dac/ad5592r-base.c @@ -129,7 +129,7 @@ static int ad5592r_gpio_init(struct ad5592r_state *st) st->gpiochip.direction_input = ad5592r_gpio_direction_input; st->gpiochip.direction_output = ad5592r_gpio_direction_output; st->gpiochip.get = ad5592r_gpio_get; - st->gpiochip.set_rv = ad5592r_gpio_set; + st->gpiochip.set = ad5592r_gpio_set; st->gpiochip.request = ad5592r_gpio_request; st->gpiochip.owner = THIS_MODULE; st->gpiochip.names = ad5592r_gpio_names; diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c index 2b2aca08423a..414fbef4abf9 100644 --- a/drivers/input/keyboard/adp5588-keys.c +++ b/drivers/input/keyboard/adp5588-keys.c @@ -425,7 +425,7 @@ static int adp5588_gpio_add(struct adp5588_kpad *kpad) kpad->gc.direction_input = adp5588_gpio_direction_input; kpad->gc.direction_output = adp5588_gpio_direction_output; kpad->gc.get = adp5588_gpio_get_value; - kpad->gc.set_rv = adp5588_gpio_set_value; + kpad->gc.set = adp5588_gpio_set_value; kpad->gc.set_config = adp5588_gpio_set_config; kpad->gc.can_sleep = 1; diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c index d2a3a5e016b6..8b4f3e3660b8 100644 --- a/drivers/input/touchscreen/ad7879.c +++ b/drivers/input/touchscreen/ad7879.c @@ -475,7 +475,7 @@ static int ad7879_gpio_add(struct ad7879 *ts) ts->gc.direction_input = ad7879_gpio_direction_input; ts->gc.direction_output = ad7879_gpio_direction_output; ts->gc.get = ad7879_gpio_get_value; - ts->gc.set_rv = ad7879_gpio_set_value; + ts->gc.set = ad7879_gpio_set_value; ts->gc.can_sleep = 1; ts->gc.base = -1; ts->gc.ngpio = 1; diff --git a/drivers/leds/blink/leds-lgm-sso.c b/drivers/leds/blink/leds-lgm-sso.c index c9027f9c4bb7..8923d2df4704 100644 --- a/drivers/leds/blink/leds-lgm-sso.c +++ b/drivers/leds/blink/leds-lgm-sso.c @@ -471,7 +471,7 @@ static int sso_gpio_gc_init(struct device *dev, struct sso_led_priv *priv) gc->get_direction = sso_gpio_get_dir; gc->direction_output = sso_gpio_dir_out; gc->get = sso_gpio_get; - gc->set_rv = sso_gpio_set; + gc->set = sso_gpio_set; gc->label = "lgm-sso"; gc->base = -1; diff --git a/drivers/leds/leds-pca9532.c b/drivers/leds/leds-pca9532.c index 7d4c071a6cd0..0344189bb991 100644 --- a/drivers/leds/leds-pca9532.c +++ b/drivers/leds/leds-pca9532.c @@ -473,7 +473,7 @@ static int pca9532_configure(struct i2c_client *client, data->gpio.label = "gpio-pca9532"; data->gpio.direction_input = pca9532_gpio_direction_input; data->gpio.direction_output = pca9532_gpio_direction_output; - data->gpio.set_rv = pca9532_gpio_set_value; + data->gpio.set = pca9532_gpio_set_value; data->gpio.get = pca9532_gpio_get_value; data->gpio.request = pca9532_gpio_request_pin; data->gpio.can_sleep = 1; diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c index 70d109246088..2007fe6217ec 100644 --- a/drivers/leds/leds-pca955x.c +++ b/drivers/leds/leds-pca955x.c @@ -737,7 +737,7 @@ static int pca955x_probe(struct i2c_client *client) pca955x->gpio.label = "gpio-pca955x"; pca955x->gpio.direction_input = pca955x_gpio_direction_input; pca955x->gpio.direction_output = pca955x_gpio_direction_output; - pca955x->gpio.set_rv = pca955x_gpio_set_value; + pca955x->gpio.set = pca955x_gpio_set_value; pca955x->gpio.get = pca955x_gpio_get_value; pca955x->gpio.request = pca955x_gpio_request_pin; pca955x->gpio.free = pca955x_gpio_free_pin; diff --git a/drivers/leds/leds-tca6507.c b/drivers/leds/leds-tca6507.c index 89c165c8ee9c..fd0e8bab9a4b 100644 --- a/drivers/leds/leds-tca6507.c +++ b/drivers/leds/leds-tca6507.c @@ -637,7 +637,7 @@ static int tca6507_probe_gpios(struct device *dev, tca->gpio.base = -1; tca->gpio.owner = THIS_MODULE; tca->gpio.direction_output = tca6507_gpio_direction_output; - tca->gpio.set_rv = tca6507_gpio_set_value; + tca->gpio.set = tca6507_gpio_set_value; tca->gpio.parent = dev; err = devm_gpiochip_add_data(dev, &tca->gpio, tca); if (err) { diff --git a/drivers/media/dvb-frontends/cxd2820r_core.c b/drivers/media/dvb-frontends/cxd2820r_core.c index a31a8a6a4946..5aa3d45a691a 100644 --- a/drivers/media/dvb-frontends/cxd2820r_core.c +++ b/drivers/media/dvb-frontends/cxd2820r_core.c @@ -651,7 +651,7 @@ static int cxd2820r_probe(struct i2c_client *client) priv->gpio_chip.parent = &client->dev; priv->gpio_chip.owner = THIS_MODULE; priv->gpio_chip.direction_output = cxd2820r_gpio_direction_output; - priv->gpio_chip.set_rv = cxd2820r_gpio_set; + priv->gpio_chip.set = cxd2820r_gpio_set; priv->gpio_chip.get = cxd2820r_gpio_get; priv->gpio_chip.base = -1; /* Dynamic allocation */ priv->gpio_chip.ngpio = GPIO_COUNT; diff --git a/drivers/media/i2c/ds90ub913.c b/drivers/media/i2c/ds90ub913.c index bc74499b0a96..a80da2b4a8fa 100644 --- a/drivers/media/i2c/ds90ub913.c +++ b/drivers/media/i2c/ds90ub913.c @@ -235,7 +235,7 @@ static int ub913_gpiochip_probe(struct ub913_data *priv) gc->ngpio = UB913_NUM_GPIOS; gc->get_direction = ub913_gpio_get_direction; gc->direction_output = ub913_gpio_direction_out; - gc->set_rv = ub913_gpio_set; + gc->set = ub913_gpio_set; gc->of_xlate = ub913_gpio_of_xlate; gc->of_gpio_n_cells = 2; diff --git a/drivers/media/i2c/ds90ub953.c b/drivers/media/i2c/ds90ub953.c index a865bfc89500..e3fc9d66970a 100644 --- a/drivers/media/i2c/ds90ub953.c +++ b/drivers/media/i2c/ds90ub953.c @@ -361,7 +361,7 @@ static int ub953_gpiochip_probe(struct ub953_data *priv) gc->direction_input = ub953_gpio_direction_in; gc->direction_output = ub953_gpio_direction_out; gc->get = ub953_gpio_get; - gc->set_rv = ub953_gpio_set; + gc->set = ub953_gpio_set; gc->of_xlate = ub953_gpio_of_xlate; gc->of_gpio_n_cells = 2; diff --git a/drivers/media/i2c/max9286.c b/drivers/media/i2c/max9286.c index 1d0b5f56f989..7c0961688d61 100644 --- a/drivers/media/i2c/max9286.c +++ b/drivers/media/i2c/max9286.c @@ -1220,7 +1220,7 @@ static int max9286_register_gpio(struct max9286_priv *priv) gpio->owner = THIS_MODULE; gpio->ngpio = 2; gpio->base = -1; - gpio->set_rv = max9286_gpiochip_set; + gpio->set = max9286_gpiochip_set; gpio->get = max9286_gpiochip_get; gpio->can_sleep = true; diff --git a/drivers/media/i2c/max96717.c b/drivers/media/i2c/max96717.c index 015e42fbe246..c8ae7890d9fa 100644 --- a/drivers/media/i2c/max96717.c +++ b/drivers/media/i2c/max96717.c @@ -355,7 +355,7 @@ static int max96717_gpiochip_probe(struct max96717_priv *priv) gc->get_direction = max96717_gpio_get_direction; gc->direction_input = max96717_gpio_direction_in; gc->direction_output = max96717_gpio_direction_out; - gc->set_rv = max96717_gpiochip_set; + gc->set = max96717_gpiochip_set; gc->get = max96717_gpiochip_get; /* Disable GPIO forwarding */ diff --git a/drivers/media/pci/solo6x10/solo6x10-gpio.c b/drivers/media/pci/solo6x10/solo6x10-gpio.c index b16a8453a62a..71848741c55c 100644 --- a/drivers/media/pci/solo6x10/solo6x10-gpio.c +++ b/drivers/media/pci/solo6x10/solo6x10-gpio.c @@ -158,7 +158,7 @@ int solo_gpio_init(struct solo_dev *solo_dev) solo_dev->gpio_dev.get_direction = solo_gpiochip_get_direction; solo_dev->gpio_dev.get = solo_gpiochip_get; - solo_dev->gpio_dev.set_rv = solo_gpiochip_set; + solo_dev->gpio_dev.set = solo_gpiochip_set; ret = gpiochip_add_data(&solo_dev->gpio_dev, solo_dev); diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c index a5f9241fa3f2..50bf3260f65d 100644 --- a/drivers/mfd/sm501.c +++ b/drivers/mfd/sm501.c @@ -965,7 +965,7 @@ static const struct gpio_chip gpio_chip_template = { .ngpio = 32, .direction_input = sm501_gpio_input, .direction_output = sm501_gpio_output, - .set_rv = sm501_gpio_set, + .set = sm501_gpio_set, .get = sm501_gpio_get, }; diff --git a/drivers/mfd/tps65010.c b/drivers/mfd/tps65010.c index 03bd5cd66798..8a144ec52201 100644 --- a/drivers/mfd/tps65010.c +++ b/drivers/mfd/tps65010.c @@ -620,7 +620,7 @@ static int tps65010_probe(struct i2c_client *client) tps->chip.parent = &client->dev; tps->chip.owner = THIS_MODULE; - tps->chip.set_rv = tps65010_gpio_set; + tps->chip.set = tps65010_gpio_set; tps->chip.direction_output = tps65010_output; /* NOTE: only partial support for inputs; nyet IRQs */ diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c index fd71ba29f6b5..4b450d78a65f 100644 --- a/drivers/mfd/ucb1x00-core.c +++ b/drivers/mfd/ucb1x00-core.c @@ -570,7 +570,7 @@ static int ucb1x00_probe(struct mcp *mcp) ucb->gpio.owner = THIS_MODULE; ucb->gpio.base = pdata->gpio_base; ucb->gpio.ngpio = 10; - ucb->gpio.set_rv = ucb1x00_gpio_set; + ucb->gpio.set = ucb1x00_gpio_set; ucb->gpio.get = ucb1x00_gpio_get; ucb->gpio.direction_input = ucb1x00_gpio_direction_input; ucb->gpio.direction_output = ucb1x00_gpio_direction_output; diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c index ff8f4404d10f..8eddbaa1fccd 100644 --- a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c +++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c @@ -438,7 +438,7 @@ static int pci1xxxx_gpio_setup(struct pci1xxxx_gpio *priv, int irq) gchip->direction_output = pci1xxxx_gpio_direction_output; gchip->get_direction = pci1xxxx_gpio_get_direction; gchip->get = pci1xxxx_gpio_get; - gchip->set_rv = pci1xxxx_gpio_set; + gchip->set = pci1xxxx_gpio_set; gchip->set_config = pci1xxxx_gpio_set_config; gchip->dbg_show = NULL; gchip->base = -1; diff --git a/drivers/misc/ti_fpc202.c b/drivers/misc/ti_fpc202.c index 0b1a6350c02b..7964e46c7448 100644 --- a/drivers/misc/ti_fpc202.c +++ b/drivers/misc/ti_fpc202.c @@ -333,7 +333,7 @@ static int fpc202_probe(struct i2c_client *client) priv->gpio.base = -1; priv->gpio.direction_input = fpc202_gpio_direction_input; priv->gpio.direction_output = fpc202_gpio_direction_output; - priv->gpio.set_rv = fpc202_gpio_set; + priv->gpio.set = fpc202_gpio_set; priv->gpio.get = fpc202_gpio_get; priv->gpio.ngpio = FPC202_GPIO_COUNT; priv->gpio.parent = dev; diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index 5a95877b7419..313e1d241f01 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -607,8 +607,8 @@ static int mcp251x_gpio_setup(struct mcp251x_priv *priv) gpio->get_direction = mcp251x_gpio_get_direction; gpio->get = mcp251x_gpio_get; gpio->get_multiple = mcp251x_gpio_get_multiple; - gpio->set_rv = mcp251x_gpio_set; - gpio->set_multiple_rv = mcp251x_gpio_set_multiple; + gpio->set = mcp251x_gpio_set; + gpio->set_multiple = mcp251x_gpio_set_multiple; gpio->base = -1; gpio->ngpio = ARRAY_SIZE(mcp251x_gpio_names); gpio->names = mcp251x_gpio_names; diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index e5bed4237ff4..548b85befbf4 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2187,7 +2187,7 @@ mt7530_setup_gpio(struct mt7530_priv *priv) gc->direction_input = mt7530_gpio_direction_input; gc->direction_output = mt7530_gpio_direction_output; gc->get = mt7530_gpio_get; - gc->set_rv = mt7530_gpio_set; + gc->set = mt7530_gpio_set; gc->base = -1; gc->ngpio = 15; gc->can_sleep = true; diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c index 4f9687ab3b2b..9d31b8258268 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-core.c +++ b/drivers/net/dsa/vitesse-vsc73xx-core.c @@ -2317,7 +2317,7 @@ static int vsc73xx_gpio_probe(struct vsc73xx *vsc) vsc->gc.parent = vsc->dev; vsc->gc.base = -1; vsc->gc.get = vsc73xx_gpio_get; - vsc->gc.set_rv = vsc73xx_gpio_set; + vsc->gc.set = vsc73xx_gpio_set; vsc->gc.direction_input = vsc73xx_gpio_direction_input; vsc->gc.direction_output = vsc73xx_gpio_direction_output; vsc->gc.get_direction = vsc73xx_gpio_get_direction; diff --git a/drivers/net/phy/qcom/qca807x.c b/drivers/net/phy/qcom/qca807x.c index 04e84ebb646c..070dc8c00835 100644 --- a/drivers/net/phy/qcom/qca807x.c +++ b/drivers/net/phy/qcom/qca807x.c @@ -427,7 +427,7 @@ static int qca807x_gpio(struct phy_device *phydev) gc->get_direction = qca807x_gpio_get_direction; gc->direction_output = qca807x_gpio_dir_out; gc->get = qca807x_gpio_get; - gc->set_rv = qca807x_gpio_set; + gc->set = qca807x_gpio_set; return devm_gpiochip_add_data(dev, gc, priv); } diff --git a/drivers/pinctrl/actions/pinctrl-owl.c b/drivers/pinctrl/actions/pinctrl-owl.c index 86f3d5c69e36..1f0ef4727ba7 100644 --- a/drivers/pinctrl/actions/pinctrl-owl.c +++ b/drivers/pinctrl/actions/pinctrl-owl.c @@ -962,7 +962,7 @@ int owl_pinctrl_probe(struct platform_device *pdev, pctrl->chip.direction_input = owl_gpio_direction_input; pctrl->chip.direction_output = owl_gpio_direction_output; pctrl->chip.get = owl_gpio_get; - pctrl->chip.set_rv = owl_gpio_set; + pctrl->chip.set = owl_gpio_set; pctrl->chip.request = owl_gpio_request; pctrl->chip.free = owl_gpio_free; diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c index 826827800474..7dbf079739bc 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c @@ -397,7 +397,7 @@ static const struct gpio_chip bcm2835_gpio_chip = { .direction_output = bcm2835_gpio_direction_output, .get_direction = bcm2835_gpio_get_direction, .get = bcm2835_gpio_get, - .set_rv = bcm2835_gpio_set, + .set = bcm2835_gpio_set, .set_config = gpiochip_generic_config, .base = -1, .ngpio = BCM2835_NUM_GPIOS, @@ -414,7 +414,7 @@ static const struct gpio_chip bcm2711_gpio_chip = { .direction_output = bcm2835_gpio_direction_output, .get_direction = bcm2835_gpio_get_direction, .get = bcm2835_gpio_get, - .set_rv = bcm2835_gpio_set, + .set = bcm2835_gpio_set, .set_config = gpiochip_generic_config, .base = -1, .ngpio = BCM2711_NUM_GPIOS, diff --git a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c index 1d08b8d4cdd7..8c353676f2af 100644 --- a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c @@ -865,7 +865,7 @@ static int iproc_gpio_probe(struct platform_device *pdev) gc->direction_input = iproc_gpio_direction_input; gc->direction_output = iproc_gpio_direction_output; gc->get_direction = iproc_gpio_get_direction; - gc->set_rv = iproc_gpio_set; + gc->set = iproc_gpio_set; gc->get = iproc_gpio_get; chip->pinmux_is_supported = of_property_read_bool(dev->of_node, diff --git a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c index b08f8480ddc6..b425ecacd1b0 100644 --- a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c @@ -656,7 +656,7 @@ static int nsp_gpio_probe(struct platform_device *pdev) gc->direction_input = nsp_gpio_direction_input; gc->direction_output = nsp_gpio_direction_output; gc->get_direction = nsp_gpio_get_direction; - gc->set_rv = nsp_gpio_set; + gc->set = nsp_gpio_set; gc->get = nsp_gpio_get; /* optional GPIO interrupt support */ diff --git a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c index 4e47710eb3d5..68abb6d6cecd 100644 --- a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c +++ b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c @@ -555,7 +555,7 @@ static int cs42l43_pin_probe(struct platform_device *pdev) priv->gpio_chip.direction_output = cs42l43_gpio_direction_out; priv->gpio_chip.add_pin_ranges = cs42l43_gpio_add_pin_ranges; priv->gpio_chip.get = cs42l43_gpio_get; - priv->gpio_chip.set_rv = cs42l43_gpio_set; + priv->gpio_chip.set = cs42l43_gpio_set; priv->gpio_chip.label = dev_name(priv->dev); priv->gpio_chip.parent = priv->dev; priv->gpio_chip.can_sleep = true; diff --git a/drivers/pinctrl/cirrus/pinctrl-lochnagar.c b/drivers/pinctrl/cirrus/pinctrl-lochnagar.c index dcc0a2f3c7dd..ca6ae566082b 100644 --- a/drivers/pinctrl/cirrus/pinctrl-lochnagar.c +++ b/drivers/pinctrl/cirrus/pinctrl-lochnagar.c @@ -1161,7 +1161,7 @@ static int lochnagar_pin_probe(struct platform_device *pdev) priv->gpio_chip.request = gpiochip_generic_request; priv->gpio_chip.free = gpiochip_generic_free; priv->gpio_chip.direction_output = lochnagar_gpio_direction_out; - priv->gpio_chip.set_rv = lochnagar_gpio_set; + priv->gpio_chip.set = lochnagar_gpio_set; priv->gpio_chip.can_sleep = true; priv->gpio_chip.parent = dev; priv->gpio_chip.base = -1; diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 6eb649f1ffd6..5fd107a00ef8 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -1231,7 +1231,7 @@ static const struct gpio_chip byt_gpio_chip = { .direction_input = byt_gpio_direction_input, .direction_output = byt_gpio_direction_output, .get = byt_gpio_get, - .set_rv = byt_gpio_set, + .set = byt_gpio_set, .set_config = gpiochip_generic_config, .dbg_show = byt_gpio_dbg_show, }; diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 769e8c4102a5..f81f7929cd3b 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1168,7 +1168,7 @@ static const struct gpio_chip chv_gpio_chip = { .direction_input = chv_gpio_direction_input, .direction_output = chv_gpio_direction_output, .get = chv_gpio_get, - .set_rv = chv_gpio_set, + .set = chv_gpio_set, }; static void chv_gpio_irq_ack(struct irq_data *d) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index f2ff71e5ea6f..d68cef4ec52a 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -1114,7 +1114,7 @@ static const struct gpio_chip intel_gpio_chip = { .direction_input = intel_gpio_direction_input, .direction_output = intel_gpio_direction_output, .get = intel_gpio_get, - .set_rv = intel_gpio_set, + .set = intel_gpio_set, .set_config = gpiochip_generic_config, }; diff --git a/drivers/pinctrl/intel/pinctrl-lynxpoint.c b/drivers/pinctrl/intel/pinctrl-lynxpoint.c index 5d4a5dd493d1..3fb628309fb2 100644 --- a/drivers/pinctrl/intel/pinctrl-lynxpoint.c +++ b/drivers/pinctrl/intel/pinctrl-lynxpoint.c @@ -777,7 +777,7 @@ static int lp_gpio_probe(struct platform_device *pdev) gc->direction_input = lp_gpio_direction_input; gc->direction_output = lp_gpio_direction_output; gc->get = lp_gpio_get; - gc->set_rv = lp_gpio_set; + gc->set = lp_gpio_set; gc->set_config = gpiochip_generic_config; gc->get_direction = lp_gpio_get_direction; gc->base = -1; diff --git a/drivers/pinctrl/mediatek/pinctrl-airoha.c b/drivers/pinctrl/mediatek/pinctrl-airoha.c index 1737b88530c3..5f1ec9e0de21 100644 --- a/drivers/pinctrl/mediatek/pinctrl-airoha.c +++ b/drivers/pinctrl/mediatek/pinctrl-airoha.c @@ -2418,7 +2418,7 @@ static int airoha_pinctrl_add_gpiochip(struct airoha_pinctrl *pinctrl, gc->free = gpiochip_generic_free; gc->direction_input = pinctrl_gpio_direction_input; gc->direction_output = airoha_gpio_direction_output; - gc->set_rv = airoha_gpio_set; + gc->set = airoha_gpio_set; gc->get = airoha_gpio_get; gc->base = -1; gc->ngpio = AIROHA_NUM_PINS; diff --git a/drivers/pinctrl/mediatek/pinctrl-moore.c b/drivers/pinctrl/mediatek/pinctrl-moore.c index ba0d6f880c6e..6e4f6c07a509 100644 --- a/drivers/pinctrl/mediatek/pinctrl-moore.c +++ b/drivers/pinctrl/mediatek/pinctrl-moore.c @@ -569,7 +569,7 @@ static int mtk_build_gpiochip(struct mtk_pinctrl *hw) chip->direction_input = pinctrl_gpio_direction_input; chip->direction_output = mtk_gpio_direction_output; chip->get = mtk_gpio_get; - chip->set_rv = mtk_gpio_set; + chip->set = mtk_gpio_set; chip->to_irq = mtk_gpio_to_irq; chip->set_config = mtk_gpio_set_config; chip->base = -1; diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index a4cb6d511fcd..d10306024111 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -898,7 +898,7 @@ static const struct gpio_chip mtk_gpio_chip = { .direction_input = pinctrl_gpio_direction_input, .direction_output = mtk_gpio_direction_output, .get = mtk_gpio_get, - .set_rv = mtk_gpio_set, + .set = mtk_gpio_set, .to_irq = mtk_gpio_to_irq, .set_config = mtk_gpio_set_config, }; diff --git a/drivers/pinctrl/mediatek/pinctrl-paris.c b/drivers/pinctrl/mediatek/pinctrl-paris.c index 89ef4e530fcc..3e714554789d 100644 --- a/drivers/pinctrl/mediatek/pinctrl-paris.c +++ b/drivers/pinctrl/mediatek/pinctrl-paris.c @@ -949,7 +949,7 @@ static int mtk_build_gpiochip(struct mtk_pinctrl *hw) chip->direction_input = mtk_gpio_direction_input; chip->direction_output = mtk_gpio_direction_output; chip->get = mtk_gpio_get; - chip->set_rv = mtk_gpio_set; + chip->set = mtk_gpio_set; chip->to_irq = mtk_gpio_to_irq; chip->set_config = mtk_gpio_set_config; chip->base = -1; diff --git a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c index c8958222df8c..e34e984c2b38 100644 --- a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c +++ b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c @@ -888,7 +888,7 @@ static const struct gpio_chip aml_gpio_template = { .request = gpiochip_generic_request, .free = gpiochip_generic_free, .set_config = gpiochip_generic_config, - .set_rv = aml_gpio_set, + .set = aml_gpio_set, .get = aml_gpio_get, .direction_input = aml_gpio_direction_input, .direction_output = aml_gpio_direction_output, diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index f5be61f2ede4..277e9c40490d 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -616,7 +616,7 @@ static int meson_gpiolib_register(struct meson_pinctrl *pc) pc->chip.direction_input = meson_gpio_direction_input; pc->chip.direction_output = meson_gpio_direction_output; pc->chip.get = meson_gpio_get; - pc->chip.set_rv = meson_gpio_set; + pc->chip.set = meson_gpio_set; pc->chip.base = -1; pc->chip.ngpio = pc->data->num_pins; pc->chip.can_sleep = false; diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index a6b106984e12..881df5e08f61 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -518,7 +518,7 @@ static const struct pinmux_ops armada_37xx_pmx_ops = { static const struct gpio_chip armada_37xx_gpiolib_chip = { .request = gpiochip_generic_request, .free = gpiochip_generic_free, - .set_rv = armada_37xx_gpio_set, + .set = armada_37xx_gpio_set, .get = armada_37xx_gpio_get, .get_direction = armada_37xx_gpio_get_direction, .direction_input = armada_37xx_gpio_direction_input, diff --git a/drivers/pinctrl/nomadik/pinctrl-abx500.c b/drivers/pinctrl/nomadik/pinctrl-abx500.c index 2f55f83127cf..7b5f94d8cb23 100644 --- a/drivers/pinctrl/nomadik/pinctrl-abx500.c +++ b/drivers/pinctrl/nomadik/pinctrl-abx500.c @@ -536,7 +536,7 @@ static const struct gpio_chip abx500gpio_chip = { .direction_input = abx500_gpio_direction_input, .get = abx500_gpio_get, .direction_output = abx500_gpio_direction_output, - .set_rv = abx500_gpio_set, + .set = abx500_gpio_set, .to_irq = abx500_gpio_to_irq, .dbg_show = abx500_gpio_dbg_show, }; diff --git a/drivers/pinctrl/nuvoton/pinctrl-ma35.c b/drivers/pinctrl/nuvoton/pinctrl-ma35.c index da5220da5149..54652bfbe6ac 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-ma35.c +++ b/drivers/pinctrl/nuvoton/pinctrl-ma35.c @@ -526,7 +526,7 @@ static int ma35_gpiolib_register(struct platform_device *pdev, struct ma35_pinct bank->chip.direction_input = ma35_gpio_core_direction_in; bank->chip.direction_output = ma35_gpio_core_direction_out; bank->chip.get = ma35_gpio_core_get; - bank->chip.set_rv = ma35_gpio_core_set; + bank->chip.set = ma35_gpio_core_set; bank->chip.base = -1; bank->chip.ngpio = bank->nr_pins; bank->chip.can_sleep = false; diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index b90ef3a26ae8..09a5425d54ba 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -1187,7 +1187,7 @@ static int amd_gpio_probe(struct platform_device *pdev) gpio_dev->gc.direction_input = amd_gpio_direction_input; gpio_dev->gc.direction_output = amd_gpio_direction_output; gpio_dev->gc.get = amd_gpio_get_value; - gpio_dev->gc.set_rv = amd_gpio_set_value; + gpio_dev->gc.set = amd_gpio_set_value; gpio_dev->gc.set_config = amd_gpio_set_config; gpio_dev->gc.dbg_show = amd_gpio_dbg_show; diff --git a/drivers/pinctrl/pinctrl-amdisp.c b/drivers/pinctrl/pinctrl-amdisp.c index 2e706bf8bcde..efbf40c776ea 100644 --- a/drivers/pinctrl/pinctrl-amdisp.c +++ b/drivers/pinctrl/pinctrl-amdisp.c @@ -151,7 +151,7 @@ static int amdisp_gpiochip_add(struct platform_device *pdev, gc->direction_input = amdisp_gpio_direction_input; gc->direction_output = amdisp_gpio_direction_output; gc->get = amdisp_gpio_get; - gc->set_rv = amdisp_gpio_set; + gc->set = amdisp_gpio_set; gc->base = -1; gc->ngpio = ARRAY_SIZE(amdisp_range_pins); diff --git a/drivers/pinctrl/pinctrl-apple-gpio.c b/drivers/pinctrl/pinctrl-apple-gpio.c index dcf3a921b4df..a09daa72bfe4 100644 --- a/drivers/pinctrl/pinctrl-apple-gpio.c +++ b/drivers/pinctrl/pinctrl-apple-gpio.c @@ -378,7 +378,7 @@ static int apple_gpio_register(struct apple_gpio_pinctrl *pctl) pctl->gpio_chip.direction_input = apple_gpio_direction_input; pctl->gpio_chip.direction_output = apple_gpio_direction_output; pctl->gpio_chip.get = apple_gpio_get; - pctl->gpio_chip.set_rv = apple_gpio_set; + pctl->gpio_chip.set = apple_gpio_set; pctl->gpio_chip.base = -1; pctl->gpio_chip.ngpio = pctl->pinctrl_desc.npins; pctl->gpio_chip.parent = pctl->dev; diff --git a/drivers/pinctrl/pinctrl-as3722.c b/drivers/pinctrl/pinctrl-as3722.c index 30ed758bbe9d..e713dea98aa8 100644 --- a/drivers/pinctrl/pinctrl-as3722.c +++ b/drivers/pinctrl/pinctrl-as3722.c @@ -529,7 +529,7 @@ static const struct gpio_chip as3722_gpio_chip = { .request = gpiochip_generic_request, .free = gpiochip_generic_free, .get = as3722_gpio_get, - .set_rv = as3722_gpio_set, + .set = as3722_gpio_set, .direction_input = pinctrl_gpio_direction_input, .direction_output = as3722_gpio_direction_output, .to_irq = as3722_gpio_to_irq, diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c index 57f105ac962d..35ea3414cb96 100644 --- a/drivers/pinctrl/pinctrl-at91-pio4.c +++ b/drivers/pinctrl/pinctrl-at91-pio4.c @@ -442,8 +442,8 @@ static struct gpio_chip atmel_gpio_chip = { .get = atmel_gpio_get, .get_multiple = atmel_gpio_get_multiple, .direction_output = atmel_gpio_direction_output, - .set_rv = atmel_gpio_set, - .set_multiple_rv = atmel_gpio_set_multiple, + .set = atmel_gpio_set, + .set_multiple = atmel_gpio_set_multiple, .to_irq = atmel_gpio_to_irq, .base = 0, }; diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index 6c2727bd55bc..0a57ed51d4c9 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -1801,8 +1801,8 @@ static const struct gpio_chip at91_gpio_template = { .direction_input = at91_gpio_direction_input, .get = at91_gpio_get, .direction_output = at91_gpio_direction_output, - .set_rv = at91_gpio_set, - .set_multiple_rv = at91_gpio_set_multiple, + .set = at91_gpio_set, + .set_multiple = at91_gpio_set_multiple, .dbg_show = at91_gpio_dbg_show, .can_sleep = false, .ngpio = MAX_NB_GPIO_PER_BANK, diff --git a/drivers/pinctrl/pinctrl-aw9523.c b/drivers/pinctrl/pinctrl-aw9523.c index 9570ef346af6..890b83fddea3 100644 --- a/drivers/pinctrl/pinctrl-aw9523.c +++ b/drivers/pinctrl/pinctrl-aw9523.c @@ -785,8 +785,8 @@ static int aw9523_init_gpiochip(struct aw9523 *awi, unsigned int npins) gc->direction_output = aw9523_direction_output; gc->get = aw9523_gpio_get; gc->get_multiple = aw9523_gpio_get_multiple; - gc->set_rv = aw9523_gpio_set; - gc->set_multiple_rv = aw9523_gpio_set_multiple; + gc->set = aw9523_gpio_set; + gc->set_multiple = aw9523_gpio_set_multiple; gc->set_config = gpiochip_generic_config; gc->parent = dev; gc->owner = THIS_MODULE; diff --git a/drivers/pinctrl/pinctrl-axp209.c b/drivers/pinctrl/pinctrl-axp209.c index fff408b60c4a..2bd8487484a8 100644 --- a/drivers/pinctrl/pinctrl-axp209.c +++ b/drivers/pinctrl/pinctrl-axp209.c @@ -192,7 +192,7 @@ static int axp20x_gpio_get_direction(struct gpio_chip *chip, static int axp20x_gpio_output(struct gpio_chip *chip, unsigned int offset, int value) { - return chip->set_rv(chip, offset, value); + return chip->set(chip, offset, value); } static int axp20x_gpio_set(struct gpio_chip *chip, unsigned int offset, @@ -463,7 +463,7 @@ static int axp20x_pctl_probe(struct platform_device *pdev) pctl->chip.owner = THIS_MODULE; pctl->chip.get = axp20x_gpio_get; pctl->chip.get_direction = axp20x_gpio_get_direction; - pctl->chip.set_rv = axp20x_gpio_set; + pctl->chip.set = axp20x_gpio_set; pctl->chip.direction_input = pinctrl_gpio_direction_input; pctl->chip.direction_output = axp20x_gpio_output; diff --git a/drivers/pinctrl/pinctrl-cy8c95x0.c b/drivers/pinctrl/pinctrl-cy8c95x0.c index 8a2fd632bdd4..cf7f80497fde 100644 --- a/drivers/pinctrl/pinctrl-cy8c95x0.c +++ b/drivers/pinctrl/pinctrl-cy8c95x0.c @@ -939,10 +939,10 @@ static int cy8c95x0_setup_gpiochip(struct cy8c95x0_pinctrl *chip) gc->direction_input = cy8c95x0_gpio_direction_input; gc->direction_output = cy8c95x0_gpio_direction_output; gc->get = cy8c95x0_gpio_get_value; - gc->set_rv = cy8c95x0_gpio_set_value; + gc->set = cy8c95x0_gpio_set_value; gc->get_direction = cy8c95x0_gpio_get_direction; gc->get_multiple = cy8c95x0_gpio_get_multiple; - gc->set_multiple_rv = cy8c95x0_gpio_set_multiple; + gc->set_multiple = cy8c95x0_gpio_set_multiple; gc->set_config = gpiochip_generic_config; gc->can_sleep = true; gc->add_pin_ranges = cy8c95x0_add_pin_ranges; diff --git a/drivers/pinctrl/pinctrl-da9062.c b/drivers/pinctrl/pinctrl-da9062.c index 3295b09dfc3d..53298cbcc5cf 100644 --- a/drivers/pinctrl/pinctrl-da9062.c +++ b/drivers/pinctrl/pinctrl-da9062.c @@ -233,7 +233,7 @@ static int da9062_gpio_to_irq(struct gpio_chip *gc, unsigned int offset) static const struct gpio_chip reference_gc = { .owner = THIS_MODULE, .get = da9062_gpio_get, - .set_rv = da9062_gpio_set, + .set = da9062_gpio_set, .get_direction = da9062_gpio_get_direction, .direction_input = da9062_gpio_direction_input, .direction_output = da9062_gpio_direction_output, diff --git a/drivers/pinctrl/pinctrl-digicolor.c b/drivers/pinctrl/pinctrl-digicolor.c index 1676cb3cc4c9..2e16f09aeb47 100644 --- a/drivers/pinctrl/pinctrl-digicolor.c +++ b/drivers/pinctrl/pinctrl-digicolor.c @@ -248,7 +248,7 @@ static int dc_gpiochip_add(struct dc_pinmap *pmap) chip->direction_input = dc_gpio_direction_input; chip->direction_output = dc_gpio_direction_output; chip->get = dc_gpio_get; - chip->set_rv = dc_gpio_set; + chip->set = dc_gpio_set; chip->base = -1; chip->ngpio = PINS_COUNT; diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c index 79119cf20efc..2900513467fa 100644 --- a/drivers/pinctrl/pinctrl-ingenic.c +++ b/drivers/pinctrl/pinctrl-ingenic.c @@ -4451,7 +4451,7 @@ static int __init ingenic_gpio_probe(struct ingenic_pinctrl *jzpc, jzgc->gc.fwnode = fwnode; jzgc->gc.owner = THIS_MODULE; - jzgc->gc.set_rv = ingenic_gpio_set; + jzgc->gc.set = ingenic_gpio_set; jzgc->gc.get = ingenic_gpio_get; jzgc->gc.direction_input = pinctrl_gpio_direction_input; jzgc->gc.direction_output = ingenic_gpio_direction_output; diff --git a/drivers/pinctrl/pinctrl-keembay.c b/drivers/pinctrl/pinctrl-keembay.c index 30e641571cfe..60cf017498b3 100644 --- a/drivers/pinctrl/pinctrl-keembay.c +++ b/drivers/pinctrl/pinctrl-keembay.c @@ -1481,7 +1481,7 @@ static int keembay_gpiochip_probe(struct keembay_pinctrl *kpc, gc->direction_input = keembay_gpio_set_direction_in; gc->direction_output = keembay_gpio_set_direction_out; gc->get = keembay_gpio_get; - gc->set_rv = keembay_gpio_set; + gc->set = keembay_gpio_set; gc->set_config = gpiochip_generic_config; gc->base = -1; gc->ngpio = kpc->npins; diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c index c8027ef03ecc..a17fcaddf490 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08.c +++ b/drivers/pinctrl/pinctrl-mcp23s08.c @@ -632,8 +632,8 @@ int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, mcp->chip.get = mcp23s08_get; mcp->chip.get_multiple = mcp23s08_get_multiple; mcp->chip.direction_output = mcp23s08_direction_output; - mcp->chip.set_rv = mcp23s08_set; - mcp->chip.set_multiple_rv = mcp23s08_set_multiple; + mcp->chip.set = mcp23s08_set; + mcp->chip.set_multiple = mcp23s08_set_multiple; mcp->chip.base = base; mcp->chip.can_sleep = true; diff --git a/drivers/pinctrl/pinctrl-microchip-sgpio.c b/drivers/pinctrl/pinctrl-microchip-sgpio.c index 88c2f14cfc6b..6191e5c13815 100644 --- a/drivers/pinctrl/pinctrl-microchip-sgpio.c +++ b/drivers/pinctrl/pinctrl-microchip-sgpio.c @@ -858,7 +858,7 @@ static int microchip_sgpio_register_bank(struct device *dev, gc->direction_input = microchip_sgpio_direction_input; gc->direction_output = microchip_sgpio_direction_output; gc->get = microchip_sgpio_get_value; - gc->set_rv = microchip_sgpio_set_value; + gc->set = microchip_sgpio_set_value; gc->request = gpiochip_generic_request; gc->free = gpiochip_generic_free; gc->of_xlate = microchip_sgpio_of_xlate; diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c index fbb3d43746bb..b82bf83fed25 100644 --- a/drivers/pinctrl/pinctrl-ocelot.c +++ b/drivers/pinctrl/pinctrl-ocelot.c @@ -1997,7 +1997,7 @@ static int ocelot_gpio_direction_output(struct gpio_chip *chip, static const struct gpio_chip ocelot_gpiolib_chip = { .request = gpiochip_generic_request, .free = gpiochip_generic_free, - .set_rv = ocelot_gpio_set, + .set = ocelot_gpio_set, .get = ocelot_gpio_get, .get_direction = ocelot_gpio_get_direction, .direction_input = pinctrl_gpio_direction_input, diff --git a/drivers/pinctrl/pinctrl-pic32.c b/drivers/pinctrl/pinctrl-pic32.c index 6d64cab97e81..37c2bf752154 100644 --- a/drivers/pinctrl/pinctrl-pic32.c +++ b/drivers/pinctrl/pinctrl-pic32.c @@ -2120,7 +2120,7 @@ static void pic32_gpio_irq_handler(struct irq_desc *desc) .direction_input = pic32_gpio_direction_input, \ .direction_output = pic32_gpio_direction_output, \ .get = pic32_gpio_get, \ - .set_rv = pic32_gpio_set, \ + .set = pic32_gpio_set, \ .ngpio = _npins, \ .base = GPIO_BANK_START(_bank), \ .owner = THIS_MODULE, \ diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c index 7f8b562c81c9..0b33b01dbaad 100644 --- a/drivers/pinctrl/pinctrl-pistachio.c +++ b/drivers/pinctrl/pinctrl-pistachio.c @@ -1331,7 +1331,7 @@ static void pistachio_gpio_irq_handler(struct irq_desc *desc) .direction_input = pistachio_gpio_direction_input, \ .direction_output = pistachio_gpio_direction_output, \ .get = pistachio_gpio_get, \ - .set_rv = pistachio_gpio_set, \ + .set = pistachio_gpio_set, \ .base = _pin_base, \ .ngpio = _npins, \ }, \ diff --git a/drivers/pinctrl/pinctrl-rk805.c b/drivers/pinctrl/pinctrl-rk805.c index fc0e330b1d11..3acf770316c1 100644 --- a/drivers/pinctrl/pinctrl-rk805.c +++ b/drivers/pinctrl/pinctrl-rk805.c @@ -378,7 +378,7 @@ static const struct gpio_chip rk805_gpio_chip = { .free = gpiochip_generic_free, .get_direction = rk805_gpio_get_direction, .get = rk805_gpio_get, - .set_rv = rk805_gpio_set, + .set = rk805_gpio_set, .direction_input = pinctrl_gpio_direction_input, .direction_output = rk805_gpio_direction_output, .can_sleep = true, diff --git a/drivers/pinctrl/pinctrl-rp1.c b/drivers/pinctrl/pinctrl-rp1.c index 6080b57a5d87..dadafc935dbb 100644 --- a/drivers/pinctrl/pinctrl-rp1.c +++ b/drivers/pinctrl/pinctrl-rp1.c @@ -851,7 +851,7 @@ static const struct gpio_chip rp1_gpio_chip = { .direction_output = rp1_gpio_direction_output, .get_direction = rp1_gpio_get_direction, .get = rp1_gpio_get, - .set_rv = rp1_gpio_set, + .set = rp1_gpio_set, .base = -1, .set_config = rp1_gpio_set_config, .ngpio = RP1_NUM_GPIOS, diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index 574fe2cbfbec..d3cea3437d7f 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -1467,7 +1467,7 @@ static const struct gpio_chip st_gpio_template = { .request = gpiochip_generic_request, .free = gpiochip_generic_free, .get = st_gpio_get, - .set_rv = st_gpio_set, + .set = st_gpio_set, .direction_input = pinctrl_gpio_direction_input, .direction_output = st_gpio_direction_output, .get_direction = st_gpio_get_direction, diff --git a/drivers/pinctrl/pinctrl-stmfx.c b/drivers/pinctrl/pinctrl-stmfx.c index f4fdcaa043e6..c89b99003b71 100644 --- a/drivers/pinctrl/pinctrl-stmfx.c +++ b/drivers/pinctrl/pinctrl-stmfx.c @@ -697,7 +697,7 @@ static int stmfx_pinctrl_probe(struct platform_device *pdev) pctl->gpio_chip.direction_input = stmfx_gpio_direction_input; pctl->gpio_chip.direction_output = stmfx_gpio_direction_output; pctl->gpio_chip.get = stmfx_gpio_get; - pctl->gpio_chip.set_rv = stmfx_gpio_set; + pctl->gpio_chip.set = stmfx_gpio_set; pctl->gpio_chip.set_config = gpiochip_generic_config; pctl->gpio_chip.base = -1; pctl->gpio_chip.ngpio = pctl->pctl_desc.npins; diff --git a/drivers/pinctrl/pinctrl-sx150x.c b/drivers/pinctrl/pinctrl-sx150x.c index d3a12c1c0de2..53cf8168b274 100644 --- a/drivers/pinctrl/pinctrl-sx150x.c +++ b/drivers/pinctrl/pinctrl-sx150x.c @@ -1176,7 +1176,7 @@ static int sx150x_probe(struct i2c_client *client) pctl->gpio.direction_input = sx150x_gpio_direction_input; pctl->gpio.direction_output = sx150x_gpio_direction_output; pctl->gpio.get = sx150x_gpio_get; - pctl->gpio.set_rv = sx150x_gpio_set; + pctl->gpio.set = sx150x_gpio_set; pctl->gpio.set_config = gpiochip_generic_config; pctl->gpio.parent = dev; pctl->gpio.can_sleep = true; @@ -1191,7 +1191,7 @@ static int sx150x_probe(struct i2c_client *client) * would require locking that is not in place at this time. */ if (pctl->data->model != SX150X_789) - pctl->gpio.set_multiple_rv = sx150x_gpio_set_multiple; + pctl->gpio.set_multiple = sx150x_gpio_set_multiple; /* Add Interrupt support if an irq is specified */ if (client->irq > 0) { diff --git a/drivers/pinctrl/pinctrl-xway.c b/drivers/pinctrl/pinctrl-xway.c index 53c6c22ff24d..3d4ad61d0da9 100644 --- a/drivers/pinctrl/pinctrl-xway.c +++ b/drivers/pinctrl/pinctrl-xway.c @@ -1354,7 +1354,7 @@ static struct gpio_chip xway_chip = { .direction_input = xway_gpio_dir_in, .direction_output = xway_gpio_dir_out, .get = xway_gpio_get, - .set_rv = xway_gpio_set, + .set = xway_gpio_set, .request = gpiochip_generic_request, .free = gpiochip_generic_free, .to_irq = xway_gpio_to_irq, diff --git a/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c b/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c index 57fefeb603f0..54c77e0b96e9 100644 --- a/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c +++ b/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c @@ -398,7 +398,7 @@ static const struct gpio_chip lpi_gpio_template = { .direction_input = lpi_gpio_direction_input, .direction_output = lpi_gpio_direction_output, .get = lpi_gpio_get, - .set_rv = lpi_gpio_set, + .set = lpi_gpio_set, .request = gpiochip_generic_request, .free = gpiochip_generic_free, .dbg_show = lpi_gpio_dbg_show, diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index f713c80d7f3e..83eb075b6bfa 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -792,7 +792,7 @@ static const struct gpio_chip msm_gpio_template = { .direction_output = msm_gpio_direction_output, .get_direction = msm_gpio_get_direction, .get = msm_gpio_get, - .set_rv = msm_gpio_set, + .set = msm_gpio_set, .request = gpiochip_generic_request, .free = gpiochip_generic_free, .dbg_show = msm_gpio_dbg_show, diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c index 606becc160eb..b7b15874e488 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c @@ -802,7 +802,7 @@ static const struct gpio_chip pmic_gpio_gpio_template = { .direction_input = pmic_gpio_direction_input, .direction_output = pmic_gpio_direction_output, .get = pmic_gpio_get, - .set_rv = pmic_gpio_set, + .set = pmic_gpio_set, .request = gpiochip_generic_request, .free = gpiochip_generic_free, .of_xlate = pmic_gpio_of_xlate, diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c index ba9084978f90..22d76b1013a3 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c @@ -638,7 +638,7 @@ static const struct gpio_chip pmic_mpp_gpio_template = { .direction_input = pmic_mpp_direction_input, .direction_output = pmic_mpp_direction_output, .get = pmic_mpp_get, - .set_rv = pmic_mpp_set, + .set = pmic_mpp_set, .request = gpiochip_generic_request, .free = gpiochip_generic_free, .of_xlate = pmic_mpp_of_xlate, diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c index 3a8014ebf064..fb37b1c1acb4 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c @@ -597,7 +597,7 @@ static const struct gpio_chip pm8xxx_gpio_template = { .direction_input = pm8xxx_gpio_direction_input, .direction_output = pm8xxx_gpio_direction_output, .get = pm8xxx_gpio_get, - .set_rv = pm8xxx_gpio_set, + .set = pm8xxx_gpio_set, .of_xlate = pm8xxx_gpio_of_xlate, .dbg_show = pm8xxx_gpio_dbg_show, .owner = THIS_MODULE, diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c index 087c37d304fc..6103849af042 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c @@ -634,7 +634,7 @@ static const struct gpio_chip pm8xxx_mpp_template = { .direction_input = pm8xxx_mpp_direction_input, .direction_output = pm8xxx_mpp_direction_output, .get = pm8xxx_mpp_get, - .set_rv = pm8xxx_mpp_set, + .set = pm8xxx_mpp_set, .of_xlate = pm8xxx_mpp_of_xlate, .dbg_show = pm8xxx_mpp_dbg_show, .owner = THIS_MODULE, diff --git a/drivers/pinctrl/renesas/gpio.c b/drivers/pinctrl/renesas/gpio.c index 8efbdc1b0078..2293af642849 100644 --- a/drivers/pinctrl/renesas/gpio.c +++ b/drivers/pinctrl/renesas/gpio.c @@ -234,7 +234,7 @@ static int gpio_pin_setup(struct sh_pfc_chip *chip) gc->direction_input = gpio_pin_direction_input; gc->get = gpio_pin_get; gc->direction_output = gpio_pin_direction_output; - gc->set_rv = gpio_pin_set; + gc->set = gpio_pin_set; gc->to_irq = gpio_pin_to_irq; gc->label = pfc->info->name; diff --git a/drivers/pinctrl/renesas/pinctrl-rza1.c b/drivers/pinctrl/renesas/pinctrl-rza1.c index 3d8492c91710..23812116ef42 100644 --- a/drivers/pinctrl/renesas/pinctrl-rza1.c +++ b/drivers/pinctrl/renesas/pinctrl-rza1.c @@ -846,7 +846,7 @@ static const struct gpio_chip rza1_gpiochip_template = { .direction_input = rza1_gpio_direction_input, .direction_output = rza1_gpio_direction_output, .get = rza1_gpio_get, - .set_rv = rza1_gpio_set, + .set = rza1_gpio_set, }; /* ---------------------------------------------------------------------------- * pinctrl operations diff --git a/drivers/pinctrl/renesas/pinctrl-rza2.c b/drivers/pinctrl/renesas/pinctrl-rza2.c index 7a0b268d3eb9..b78b5b4ec5af 100644 --- a/drivers/pinctrl/renesas/pinctrl-rza2.c +++ b/drivers/pinctrl/renesas/pinctrl-rza2.c @@ -237,7 +237,7 @@ static struct gpio_chip chip = { .direction_input = rza2_chip_direction_input, .direction_output = rza2_chip_direction_output, .get = rza2_chip_get, - .set_rv = rza2_chip_set, + .set = rza2_chip_set, }; static int rza2_gpio_register(struct rza2_pinctrl_priv *priv) diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c index 2a10ae0bf5bd..c52263c2a7b0 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c +++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c @@ -2795,7 +2795,7 @@ static int rzg2l_gpio_register(struct rzg2l_pinctrl *pctrl) chip->direction_input = rzg2l_gpio_direction_input; chip->direction_output = rzg2l_gpio_direction_output; chip->get = rzg2l_gpio_get; - chip->set_rv = rzg2l_gpio_set; + chip->set = rzg2l_gpio_set; chip->label = name; chip->parent = pctrl->dev; chip->owner = THIS_MODULE; diff --git a/drivers/pinctrl/renesas/pinctrl-rzv2m.c b/drivers/pinctrl/renesas/pinctrl-rzv2m.c index a17b68b4c466..daaa986d994d 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzv2m.c +++ b/drivers/pinctrl/renesas/pinctrl-rzv2m.c @@ -957,7 +957,7 @@ static int rzv2m_gpio_register(struct rzv2m_pinctrl *pctrl) chip->direction_input = rzv2m_gpio_direction_input; chip->direction_output = rzv2m_gpio_direction_output; chip->get = rzv2m_gpio_get; - chip->set_rv = rzv2m_gpio_set; + chip->set = rzv2m_gpio_set; chip->label = name; chip->parent = pctrl->dev; chip->owner = THIS_MODULE; diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c index fe1ac82b9d79..24745e1d78ce 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.c +++ b/drivers/pinctrl/samsung/pinctrl-samsung.c @@ -1067,7 +1067,7 @@ static int samsung_gpio_set_config(struct gpio_chip *gc, unsigned int offset, static const struct gpio_chip samsung_gpiolib_chip = { .request = gpiochip_generic_request, .free = gpiochip_generic_free, - .set_rv = samsung_gpio_set, + .set = samsung_gpio_set, .get = samsung_gpio_get, .direction_input = samsung_gpio_direction_input, .direction_output = samsung_gpio_direction_output, diff --git a/drivers/pinctrl/spear/pinctrl-plgpio.c b/drivers/pinctrl/spear/pinctrl-plgpio.c index e8234d2156da..1ec22010a3f9 100644 --- a/drivers/pinctrl/spear/pinctrl-plgpio.c +++ b/drivers/pinctrl/spear/pinctrl-plgpio.c @@ -582,7 +582,7 @@ static int plgpio_probe(struct platform_device *pdev) plgpio->chip.direction_input = plgpio_direction_input; plgpio->chip.direction_output = plgpio_direction_output; plgpio->chip.get = plgpio_get_value; - plgpio->chip.set_rv = plgpio_set_value; + plgpio->chip.set = plgpio_set_value; plgpio->chip.label = dev_name(&pdev->dev); plgpio->chip.parent = &pdev->dev; plgpio->chip.owner = THIS_MODULE; diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c index b729ca4de422..7fa13f282b85 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c @@ -1302,7 +1302,7 @@ static int starfive_probe(struct platform_device *pdev) sfp->gc.direction_input = starfive_gpio_direction_input; sfp->gc.direction_output = starfive_gpio_direction_output; sfp->gc.get = starfive_gpio_get; - sfp->gc.set_rv = starfive_gpio_set; + sfp->gc.set = starfive_gpio_set; sfp->gc.set_config = starfive_gpio_set_config; sfp->gc.add_pin_ranges = starfive_gpio_add_pin_ranges; sfp->gc.base = -1; diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c index 082bb1c6cea9..05e3af75b09f 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c @@ -935,7 +935,7 @@ int jh7110_pinctrl_probe(struct platform_device *pdev) sfp->gc.direction_input = jh7110_gpio_direction_input; sfp->gc.direction_output = jh7110_gpio_direction_output; sfp->gc.get = jh7110_gpio_get; - sfp->gc.set_rv = jh7110_gpio_set; + sfp->gc.set = jh7110_gpio_set; sfp->gc.set_config = jh7110_gpio_set_config; sfp->gc.add_pin_ranges = jh7110_gpio_add_pin_ranges; sfp->gc.base = info->gc_base; diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index f47c4e6f12b4..823c8fe758e2 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -433,7 +433,7 @@ static const struct gpio_chip stm32_gpio_template = { .request = stm32_gpio_request, .free = stm32_gpio_free, .get = stm32_gpio_get, - .set_rv = stm32_gpio_set, + .set = stm32_gpio_set, .direction_input = pinctrl_gpio_direction_input, .direction_output = stm32_gpio_direction_output, .to_irq = stm32_gpio_to_irq, diff --git a/drivers/pinctrl/sunplus/sppctl.c b/drivers/pinctrl/sunplus/sppctl.c index 3c3357f80889..3e924aa86cc2 100644 --- a/drivers/pinctrl/sunplus/sppctl.c +++ b/drivers/pinctrl/sunplus/sppctl.c @@ -547,7 +547,7 @@ static int sppctl_gpio_new(struct platform_device *pdev, struct sppctl_pdata *pc gchip->direction_input = sppctl_gpio_direction_input; gchip->direction_output = sppctl_gpio_direction_output; gchip->get = sppctl_gpio_get; - gchip->set_rv = sppctl_gpio_set; + gchip->set = sppctl_gpio_set; gchip->set_config = sppctl_gpio_set_config; gchip->dbg_show = IS_ENABLED(CONFIG_DEBUG_FS) ? sppctl_gpio_dbg_show : NULL; diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index 0db8429a013f..0fb057a07dcc 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -1604,7 +1604,7 @@ int sunxi_pinctrl_init_with_flags(struct platform_device *pdev, pctl->chip->direction_input = sunxi_pinctrl_gpio_direction_input; pctl->chip->direction_output = sunxi_pinctrl_gpio_direction_output; pctl->chip->get = sunxi_pinctrl_gpio_get; - pctl->chip->set_rv = sunxi_pinctrl_gpio_set; + pctl->chip->set = sunxi_pinctrl_gpio_set; pctl->chip->of_xlate = sunxi_pinctrl_gpio_of_xlate; pctl->chip->to_irq = sunxi_pinctrl_gpio_to_irq; pctl->chip->of_gpio_n_cells = 3; diff --git a/drivers/pinctrl/vt8500/pinctrl-wmt.c b/drivers/pinctrl/vt8500/pinctrl-wmt.c index 767c6808a463..7213a8d4bf09 100644 --- a/drivers/pinctrl/vt8500/pinctrl-wmt.c +++ b/drivers/pinctrl/vt8500/pinctrl-wmt.c @@ -549,7 +549,7 @@ static const struct gpio_chip wmt_gpio_chip = { .direction_input = pinctrl_gpio_direction_input, .direction_output = wmt_gpio_direction_output, .get = wmt_gpio_get_value, - .set_rv = wmt_gpio_set_value, + .set = wmt_gpio_set_value, .can_sleep = false, }; diff --git a/drivers/platform/cznic/turris-omnia-mcu-gpio.c b/drivers/platform/cznic/turris-omnia-mcu-gpio.c index 77184c8b42ea..7f0ada4fa606 100644 --- a/drivers/platform/cznic/turris-omnia-mcu-gpio.c +++ b/drivers/platform/cznic/turris-omnia-mcu-gpio.c @@ -1024,8 +1024,8 @@ int omnia_mcu_register_gpiochip(struct omnia_mcu *mcu) mcu->gc.direction_output = omnia_gpio_direction_output; mcu->gc.get = omnia_gpio_get; mcu->gc.get_multiple = omnia_gpio_get_multiple; - mcu->gc.set_rv = omnia_gpio_set; - mcu->gc.set_multiple_rv = omnia_gpio_set_multiple; + mcu->gc.set = omnia_gpio_set; + mcu->gc.set_multiple = omnia_gpio_set_multiple; mcu->gc.init_valid_mask = omnia_gpio_init_valid_mask; mcu->gc.can_sleep = true; mcu->gc.names = omnia_mcu_gpio_names; diff --git a/drivers/platform/x86/barco-p50-gpio.c b/drivers/platform/x86/barco-p50-gpio.c index bb3393bbfb89..28012eebdb10 100644 --- a/drivers/platform/x86/barco-p50-gpio.c +++ b/drivers/platform/x86/barco-p50-gpio.c @@ -316,7 +316,7 @@ static int p50_gpio_probe(struct platform_device *pdev) p50->gc.base = -1; p50->gc.get_direction = p50_gpio_get_direction; p50->gc.get = p50_gpio_get; - p50->gc.set_rv = p50_gpio_set; + p50->gc.set = p50_gpio_set; /* reset mbox */ diff --git a/drivers/platform/x86/intel/int0002_vgpio.c b/drivers/platform/x86/intel/int0002_vgpio.c index 9bc24ed19c64..6f5629dc3f8d 100644 --- a/drivers/platform/x86/intel/int0002_vgpio.c +++ b/drivers/platform/x86/intel/int0002_vgpio.c @@ -193,7 +193,7 @@ static int int0002_probe(struct platform_device *pdev) chip->parent = dev; chip->owner = THIS_MODULE; chip->get = int0002_gpio_get; - chip->set_rv = int0002_gpio_set; + chip->set = int0002_gpio_set; chip->direction_input = int0002_gpio_get; chip->direction_output = int0002_gpio_direction_output; chip->base = -1; diff --git a/drivers/platform/x86/portwell-ec.c b/drivers/platform/x86/portwell-ec.c index 3e019c51913e..322f296e9315 100644 --- a/drivers/platform/x86/portwell-ec.c +++ b/drivers/platform/x86/portwell-ec.c @@ -86,7 +86,7 @@ static int pwec_gpio_get(struct gpio_chip *chip, unsigned int offset) return pwec_read(PORTWELL_GPIO_VAL_REG) & BIT(offset) ? 1 : 0; } -static int pwec_gpio_set_rv(struct gpio_chip *chip, unsigned int offset, int val) +static int pwec_gpio_set(struct gpio_chip *chip, unsigned int offset, int val) { u8 tmp = pwec_read(PORTWELL_GPIO_VAL_REG); @@ -130,7 +130,7 @@ static struct gpio_chip pwec_gpio_chip = { .direction_input = pwec_gpio_direction_input, .direction_output = pwec_gpio_direction_output, .get = pwec_gpio_get, - .set_rv = pwec_gpio_set_rv, + .set = pwec_gpio_set, .base = -1, .ngpio = PORTWELL_GPIO_PINS, }; diff --git a/drivers/platform/x86/silicom-platform.c b/drivers/platform/x86/silicom-platform.c index 63b5da410ed5..266f7bc5e416 100644 --- a/drivers/platform/x86/silicom-platform.c +++ b/drivers/platform/x86/silicom-platform.c @@ -466,7 +466,7 @@ static struct gpio_chip silicom_gpio_chip = { .direction_input = silicom_gpio_direction_input, .direction_output = silicom_gpio_direction_output, .get = silicom_gpio_get, - .set_rv = silicom_gpio_set, + .set = silicom_gpio_set, .base = -1, .ngpio = ARRAY_SIZE(plat_0222_gpio_channels), .names = plat_0222_gpio_names, diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c index eb03ccd5b688..9ce75704a15f 100644 --- a/drivers/pwm/pwm-pca9685.c +++ b/drivers/pwm/pwm-pca9685.c @@ -323,7 +323,7 @@ static int pca9685_pwm_gpio_probe(struct pwm_chip *chip) pca->gpio.direction_input = pca9685_pwm_gpio_direction_input; pca->gpio.direction_output = pca9685_pwm_gpio_direction_output; pca->gpio.get = pca9685_pwm_gpio_get; - pca->gpio.set_rv = pca9685_pwm_gpio_set; + pca->gpio.set = pca9685_pwm_gpio_set; pca->gpio.base = -1; pca->gpio.ngpio = PCA9685_MAXCHAN; pca->gpio.can_sleep = true; diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c index 58dbf8bffa5d..3020839b9ef1 100644 --- a/drivers/regulator/rpi-panel-attiny-regulator.c +++ b/drivers/regulator/rpi-panel-attiny-regulator.c @@ -351,7 +351,7 @@ static int attiny_i2c_probe(struct i2c_client *i2c) state->gc.base = -1; state->gc.ngpio = NUM_GPIO; - state->gc.set_rv = attiny_gpio_set; + state->gc.set = attiny_gpio_set; state->gc.get_direction = attiny_gpio_get_direction; state->gc.can_sleep = true; diff --git a/drivers/soc/fsl/qe/gpio.c b/drivers/soc/fsl/qe/gpio.c index 710a3a03758b..8df1e8fa86a5 100644 --- a/drivers/soc/fsl/qe/gpio.c +++ b/drivers/soc/fsl/qe/gpio.c @@ -321,8 +321,8 @@ static int __init qe_add_gpiochips(void) gc->direction_input = qe_gpio_dir_in; gc->direction_output = qe_gpio_dir_out; gc->get = qe_gpio_get; - gc->set_rv = qe_gpio_set; - gc->set_multiple_rv = qe_gpio_set_multiple; + gc->set = qe_gpio_set; + gc->set_multiple = qe_gpio_set_multiple; ret = of_mm_gpiochip_add_data(np, mm_gc, qe_gc); if (ret) diff --git a/drivers/soc/renesas/pwc-rzv2m.c b/drivers/soc/renesas/pwc-rzv2m.c index 4dbcb3d4a90c..6209168b3734 100644 --- a/drivers/soc/renesas/pwc-rzv2m.c +++ b/drivers/soc/renesas/pwc-rzv2m.c @@ -64,7 +64,7 @@ static const struct gpio_chip rzv2m_pwc_gc = { .label = "gpio_rzv2m_pwc", .owner = THIS_MODULE, .get = rzv2m_pwc_gpio_get, - .set_rv = rzv2m_pwc_gpio_set, + .set = rzv2m_pwc_gpio_set, .direction_output = rzv2m_pwc_gpio_direction_output, .can_sleep = false, .ngpio = 2, diff --git a/drivers/spi/spi-xcomm.c b/drivers/spi/spi-xcomm.c index 1a40c4866ce1..33b78c537520 100644 --- a/drivers/spi/spi-xcomm.c +++ b/drivers/spi/spi-xcomm.c @@ -70,7 +70,7 @@ static int spi_xcomm_gpio_add(struct spi_xcomm *spi_xcomm) return 0; spi_xcomm->gc.get_direction = spi_xcomm_gpio_get_direction; - spi_xcomm->gc.set_rv = spi_xcomm_gpio_set_value; + spi_xcomm->gc.set = spi_xcomm_gpio_set_value; spi_xcomm->gc.can_sleep = 1; spi_xcomm->gc.base = -1; spi_xcomm->gc.ngpio = 1; diff --git a/drivers/ssb/driver_gpio.c b/drivers/ssb/driver_gpio.c index e1f5f0a9c8a2..905657c925bc 100644 --- a/drivers/ssb/driver_gpio.c +++ b/drivers/ssb/driver_gpio.c @@ -225,7 +225,7 @@ static int ssb_gpio_chipco_init(struct ssb_bus *bus) chip->request = ssb_gpio_chipco_request; chip->free = ssb_gpio_chipco_free; chip->get = ssb_gpio_chipco_get_value; - chip->set_rv = ssb_gpio_chipco_set_value; + chip->set = ssb_gpio_chipco_set_value; chip->direction_input = ssb_gpio_chipco_direction_input; chip->direction_output = ssb_gpio_chipco_direction_output; #if IS_ENABLED(CONFIG_SSB_EMBEDDED) @@ -422,7 +422,7 @@ static int ssb_gpio_extif_init(struct ssb_bus *bus) chip->label = "ssb_extif_gpio"; chip->owner = THIS_MODULE; chip->get = ssb_gpio_extif_get_value; - chip->set_rv = ssb_gpio_extif_set_value; + chip->set = ssb_gpio_extif_set_value; chip->direction_input = ssb_gpio_extif_direction_input; chip->direction_output = ssb_gpio_extif_direction_output; #if IS_ENABLED(CONFIG_SSB_EMBEDDED) diff --git a/drivers/staging/greybus/gpio.c b/drivers/staging/greybus/gpio.c index 1280530c8987..ac62b932e6a4 100644 --- a/drivers/staging/greybus/gpio.c +++ b/drivers/staging/greybus/gpio.c @@ -551,7 +551,7 @@ static int gb_gpio_probe(struct gbphy_device *gbphy_dev, gpio->direction_input = gb_gpio_direction_input; gpio->direction_output = gb_gpio_direction_output; gpio->get = gb_gpio_get; - gpio->set_rv = gb_gpio_set; + gpio->set = gb_gpio_set; gpio->set_config = gb_gpio_set_config; gpio->base = -1; /* Allocate base dynamically */ gpio->ngpio = ggc->line_max + 1; diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 541c790c0109..ce260e9949c3 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1414,7 +1414,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty s->gpio.direction_input = max310x_gpio_direction_input; s->gpio.get = max310x_gpio_get; s->gpio.direction_output= max310x_gpio_direction_output; - s->gpio.set_rv = max310x_gpio_set; + s->gpio.set = max310x_gpio_set; s->gpio.set_config = max310x_gpio_set_config; s->gpio.base = -1; s->gpio.ngpio = devtype->nr * 4; diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 5ea8aadb6e69..3f38fba8f6ea 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1425,7 +1425,7 @@ static int sc16is7xx_setup_gpio_chip(struct sc16is7xx_port *s) s->gpio.direction_input = sc16is7xx_gpio_direction_input; s->gpio.get = sc16is7xx_gpio_get; s->gpio.direction_output = sc16is7xx_gpio_direction_output; - s->gpio.set_rv = sc16is7xx_gpio_set; + s->gpio.set = sc16is7xx_gpio_set; s->gpio.base = -1; s->gpio.ngpio = s->devtype->nr_gpio; s->gpio.can_sleep = 1; diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index cfa1d68c7919..36b25418b214 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -1962,7 +1962,7 @@ static int cp210x_gpio_init(struct usb_serial *serial) priv->gc.direction_input = cp210x_gpio_direction_input; priv->gc.direction_output = cp210x_gpio_direction_output; priv->gc.get = cp210x_gpio_get; - priv->gc.set_rv = cp210x_gpio_set; + priv->gc.set = cp210x_gpio_set; priv->gc.set_config = cp210x_gpio_set_config; priv->gc.init_valid_mask = cp210x_gpio_init_valid_mask; priv->gc.owner = THIS_MODULE; diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 7737285a84ba..49666c33b41f 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -2150,9 +2150,9 @@ static int ftdi_gpio_init(struct usb_serial_port *port) priv->gc.direction_output = ftdi_gpio_direction_output; priv->gc.init_valid_mask = ftdi_gpio_init_valid_mask; priv->gc.get = ftdi_gpio_get; - priv->gc.set_rv = ftdi_gpio_set; + priv->gc.set = ftdi_gpio_set; priv->gc.get_multiple = ftdi_gpio_get_multiple; - priv->gc.set_multiple_rv = ftdi_gpio_set_multiple; + priv->gc.set_multiple = ftdi_gpio_set_multiple; priv->gc.owner = THIS_MODULE; priv->gc.parent = &serial->interface->dev; priv->gc.base = -1; diff --git a/drivers/video/fbdev/via/via-gpio.c b/drivers/video/fbdev/via/via-gpio.c index 72302384bf77..45c0a4a6f85c 100644 --- a/drivers/video/fbdev/via/via-gpio.c +++ b/drivers/video/fbdev/via/via-gpio.c @@ -145,7 +145,7 @@ static struct viafb_gpio_cfg viafb_gpio_config = { .label = "VIAFB onboard GPIO", .owner = THIS_MODULE, .direction_output = via_gpio_dir_out, - .set_rv = via_gpio_set, + .set = via_gpio_set, .direction_input = via_gpio_dir_input, .get = via_gpio_get, .base = -1, diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 90567dde7d8e..667f8fd58a79 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -347,10 +347,10 @@ struct gpio_irq_chip { * @get: returns value for signal "offset", 0=low, 1=high, or negative error * @get_multiple: reads values for multiple signals defined by "mask" and * stores them in "bits", returns 0 on success or negative error - * @set_rv: assigns output value for signal "offset", returns 0 on success or - * negative error value - * @set_multiple_rv: assigns output values for multiple signals defined by - * "mask", returns 0 on success or negative error value + * @set: assigns output value for signal "offset", returns 0 on success or + * negative error value + * @set_multiple: assigns output values for multiple signals defined by + * "mask", returns 0 on success or negative error value * @set_config: optional hook for all kinds of settings. Uses the same * packed config format as generic pinconf. Must return 0 on success and * a negative error number on failure. @@ -443,12 +443,11 @@ struct gpio_chip { int (*get_multiple)(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits); - int (*set_rv)(struct gpio_chip *gc, - unsigned int offset, - int value); - int (*set_multiple_rv)(struct gpio_chip *gc, - unsigned long *mask, - unsigned long *bits); + int (*set)(struct gpio_chip *gc, + unsigned int offset, int value); + int (*set_multiple)(struct gpio_chip *gc, + unsigned long *mask, + unsigned long *bits); int (*set_config)(struct gpio_chip *gc, unsigned int offset, unsigned long config); diff --git a/include/linux/gpio/generic.h b/include/linux/gpio/generic.h index b511acd58ab0..f3a8db4598bb 100644 --- a/include/linux/gpio/generic.h +++ b/include/linux/gpio/generic.h @@ -88,10 +88,10 @@ static inline int gpio_generic_chip_set(struct gpio_generic_chip *chip, unsigned int offset, int value) { - if (WARN_ON(!chip->gc.set_rv)) + if (WARN_ON(!chip->gc.set)) return -EOPNOTSUPP; - return chip->gc.set_rv(&chip->gc, offset, value); + return chip->gc.set(&chip->gc, offset, value); } #define gpio_generic_chip_lock(gen_gc) \ diff --git a/sound/hda/codecs/side-codecs/cirrus_scodec_test.c b/sound/hda/codecs/side-codecs/cirrus_scodec_test.c index 93b9cbf1f08a..9ba14c09c07f 100644 --- a/sound/hda/codecs/side-codecs/cirrus_scodec_test.c +++ b/sound/hda/codecs/side-codecs/cirrus_scodec_test.c @@ -86,7 +86,7 @@ static const struct gpio_chip cirrus_scodec_test_gpio_chip = { .direction_input = cirrus_scodec_test_gpio_direction_in, .get = cirrus_scodec_test_gpio_get, .direction_output = cirrus_scodec_test_gpio_direction_out, - .set_rv = cirrus_scodec_test_gpio_set, + .set = cirrus_scodec_test_gpio_set, .set_config = cirrus_scodec_test_gpio_set_config, .base = -1, .ngpio = 32, diff --git a/sound/soc/codecs/idt821034.c b/sound/soc/codecs/idt821034.c index 55e90604bbaa..6738cf21983b 100644 --- a/sound/soc/codecs/idt821034.c +++ b/sound/soc/codecs/idt821034.c @@ -1117,7 +1117,7 @@ static int idt821034_gpio_init(struct idt821034 *idt821034) idt821034->gpio_chip.direction_input = idt821034_chip_direction_input; idt821034->gpio_chip.direction_output = idt821034_chip_direction_output; idt821034->gpio_chip.get = idt821034_chip_gpio_get; - idt821034->gpio_chip.set_rv = idt821034_chip_gpio_set; + idt821034->gpio_chip.set = idt821034_chip_gpio_set; idt821034->gpio_chip.can_sleep = true; return devm_gpiochip_add_data(&idt821034->spi->dev, &idt821034->gpio_chip, diff --git a/sound/soc/codecs/peb2466.c b/sound/soc/codecs/peb2466.c index b8905c03445e..c0c5b3c3e98b 100644 --- a/sound/soc/codecs/peb2466.c +++ b/sound/soc/codecs/peb2466.c @@ -1945,7 +1945,7 @@ static int peb2466_gpio_init(struct peb2466 *peb2466) peb2466->gpio.gpio_chip.direction_input = peb2466_chip_direction_input; peb2466->gpio.gpio_chip.direction_output = peb2466_chip_direction_output; peb2466->gpio.gpio_chip.get = peb2466_chip_gpio_get; - peb2466->gpio.gpio_chip.set_rv = peb2466_chip_gpio_set; + peb2466->gpio.gpio_chip.set = peb2466_chip_gpio_set; peb2466->gpio.gpio_chip.can_sleep = true; return devm_gpiochip_add_data(&peb2466->spi->dev, &peb2466->gpio.gpio_chip, diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c index 69a0fb8d7f77..6b6c690a9e45 100644 --- a/sound/soc/codecs/rt5677.c +++ b/sound/soc/codecs/rt5677.c @@ -4835,7 +4835,7 @@ static const struct gpio_chip rt5677_template_chip = { .label = RT5677_DRV_NAME, .owner = THIS_MODULE, .direction_output = rt5677_gpio_direction_out, - .set_rv = rt5677_gpio_set, + .set = rt5677_gpio_set, .direction_input = rt5677_gpio_direction_in, .get = rt5677_gpio_get, .to_irq = rt5677_to_irq, diff --git a/sound/soc/codecs/tlv320adc3xxx.c b/sound/soc/codecs/tlv320adc3xxx.c index 1035ba17dc5d..258fbcaf345a 100644 --- a/sound/soc/codecs/tlv320adc3xxx.c +++ b/sound/soc/codecs/tlv320adc3xxx.c @@ -1052,7 +1052,7 @@ static const struct gpio_chip adc3xxx_gpio_chip = { .owner = THIS_MODULE, .request = adc3xxx_gpio_request, .direction_output = adc3xxx_gpio_direction_out, - .set_rv = adc3xxx_gpio_set, + .set = adc3xxx_gpio_set, .get = adc3xxx_gpio_get, .can_sleep = 1, }; diff --git a/sound/soc/codecs/wm5100.c b/sound/soc/codecs/wm5100.c index fb5ed4ba7f60..2d0a20f2fd8c 100644 --- a/sound/soc/codecs/wm5100.c +++ b/sound/soc/codecs/wm5100.c @@ -2290,7 +2290,7 @@ static const struct gpio_chip wm5100_template_chip = { .label = "wm5100", .owner = THIS_MODULE, .direction_output = wm5100_gpio_direction_out, - .set_rv = wm5100_gpio_set, + .set = wm5100_gpio_set, .direction_input = wm5100_gpio_direction_in, .get = wm5100_gpio_get, .can_sleep = 1, diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index 2ed9f493d507..f7d726e3052c 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -1843,7 +1843,7 @@ static const struct gpio_chip wm8903_template_chip = { .direction_input = wm8903_gpio_direction_in, .get = wm8903_gpio_get, .direction_output = wm8903_gpio_direction_out, - .set_rv = wm8903_gpio_set, + .set = wm8903_gpio_set, .can_sleep = 1, }; diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index d69aa8b15629..7bf6b88c056c 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -3442,7 +3442,7 @@ static const struct gpio_chip wm8962_template_chip = { .owner = THIS_MODULE, .request = wm8962_gpio_request, .direction_output = wm8962_gpio_direction_out, - .set_rv = wm8962_gpio_set, + .set = wm8962_gpio_set, .can_sleep = 1, }; diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c index e364d0da9044..459b39998307 100644 --- a/sound/soc/codecs/wm8996.c +++ b/sound/soc/codecs/wm8996.c @@ -2186,7 +2186,7 @@ static const struct gpio_chip wm8996_template_chip = { .label = "wm8996", .owner = THIS_MODULE, .direction_output = wm8996_gpio_direction_out, - .set_rv = wm8996_gpio_set, + .set = wm8996_gpio_set, .direction_input = wm8996_gpio_direction_in, .get = wm8996_gpio_get, .can_sleep = 1, diff --git a/sound/soc/codecs/zl38060.c b/sound/soc/codecs/zl38060.c index 180d45a349ac..7de4014e626d 100644 --- a/sound/soc/codecs/zl38060.c +++ b/sound/soc/codecs/zl38060.c @@ -440,7 +440,7 @@ static const struct gpio_chip template_chip = { .direction_input = chip_direction_input, .direction_output = chip_direction_output, .get = chip_gpio_get, - .set_rv = chip_gpio_set, + .set = chip_gpio_set, .can_sleep = true, }; diff --git a/sound/soc/soc-ac97.c b/sound/soc/soc-ac97.c index 29790807d785..37486d6a438e 100644 --- a/sound/soc/soc-ac97.c +++ b/sound/soc/soc-ac97.c @@ -125,7 +125,7 @@ static const struct gpio_chip snd_soc_ac97_gpio_chip = { .direction_input = snd_soc_ac97_gpio_direction_in, .get = snd_soc_ac97_gpio_get, .direction_output = snd_soc_ac97_gpio_direction_out, - .set_rv = snd_soc_ac97_gpio_set, + .set = snd_soc_ac97_gpio_set, .can_sleep = 1, }; diff --git a/sound/soc/ti/davinci-mcasp.c b/sound/soc/ti/davinci-mcasp.c index caf1887cc9d1..621a9d5f9377 100644 --- a/sound/soc/ti/davinci-mcasp.c +++ b/sound/soc/ti/davinci-mcasp.c @@ -2218,7 +2218,7 @@ static const struct gpio_chip davinci_mcasp_template_chip = { .request = davinci_mcasp_gpio_request, .free = davinci_mcasp_gpio_free, .direction_output = davinci_mcasp_gpio_direction_out, - .set_rv = davinci_mcasp_gpio_set, + .set = davinci_mcasp_gpio_set, .direction_input = davinci_mcasp_gpio_direction_in, .get = davinci_mcasp_gpio_get, .get_direction = davinci_mcasp_gpio_get_direction, From f752adfaf5f7d796007f9c1a867b9bdccc15cc2c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 25 Jul 2025 14:31:34 +0200 Subject: [PATCH 1684/2411] MAINTAINERS: resurrect my netfilter maintainer entry This reverts commit b5048d27872a9734d142540ea23c3e897e47e05c. Its been more than a year, hope my motivation lasts a bit longer than last time :-) Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index b968bc6959d1..cd9415702b28 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17313,6 +17313,7 @@ F: drivers/net/ethernet/neterion/ NETFILTER M: Pablo Neira Ayuso M: Jozsef Kadlecsik +M: Florian Westphal L: netfilter-devel@vger.kernel.org L: coreteam@netfilter.org S: Maintained From 25a8b88f000c33a1d580c317e93e40b953dc2fa5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 30 Jul 2025 23:45:32 +0200 Subject: [PATCH 1685/2411] netfilter: add back NETFILTER_XTABLES dependencies Some Kconfig symbols were changed to depend on the 'bool' symbol NETFILTER_XTABLES_LEGACY, which means they can now be set to built-in when the xtables code itself is in a loadable module: x86_64-linux-ld: vmlinux.o: in function `arpt_unregister_table_pre_exit': (.text+0x1831987): undefined reference to `xt_find_table' x86_64-linux-ld: vmlinux.o: in function `get_info.constprop.0': arp_tables.c:(.text+0x1831aab): undefined reference to `xt_request_find_table_lock' x86_64-linux-ld: arp_tables.c:(.text+0x1831bea): undefined reference to `xt_table_unlock' x86_64-linux-ld: vmlinux.o: in function `do_arpt_get_ctl': arp_tables.c:(.text+0x183205d): undefined reference to `xt_find_table_lock' x86_64-linux-ld: arp_tables.c:(.text+0x18320c1): undefined reference to `xt_table_unlock' x86_64-linux-ld: arp_tables.c:(.text+0x183219a): undefined reference to `xt_recseq' Change these to depend on both NETFILTER_XTABLES and NETFILTER_XTABLES_LEGACY. Fixes: 9fce66583f06 ("netfilter: Exclude LEGACY TABLES on PREEMPT_RT.") Signed-off-by: Arnd Bergmann Acked-by: Florian Westphal Tested-by: Breno Leitao Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/Kconfig | 1 + net/ipv4/netfilter/Kconfig | 3 +++ net/ipv6/netfilter/Kconfig | 1 + 3 files changed, 5 insertions(+) diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 60f28e4fb5c0..4fd5a6ea26b4 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -43,6 +43,7 @@ config NF_CONNTRACK_BRIDGE config BRIDGE_NF_EBTABLES_LEGACY tristate "Legacy EBTABLES support" depends on BRIDGE && NETFILTER_XTABLES_LEGACY + depends on NETFILTER_XTABLES default n help Legacy ebtables packet/frame classifier. diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 2c438b140e88..7dc9772fe2d8 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -14,6 +14,7 @@ config NF_DEFRAG_IPV4 config IP_NF_IPTABLES_LEGACY tristate "Legacy IP tables support" depends on NETFILTER_XTABLES_LEGACY + depends on NETFILTER_XTABLES default m if NETFILTER_XTABLES_LEGACY help iptables is a legacy packet classifier. @@ -326,6 +327,7 @@ endif # IP_NF_IPTABLES config IP_NF_ARPTABLES tristate "Legacy ARPTABLES support" depends on NETFILTER_XTABLES_LEGACY + depends on NETFILTER_XTABLES default n help arptables is a legacy packet classifier. @@ -343,6 +345,7 @@ config IP_NF_ARPFILTER select IP_NF_ARPTABLES select NETFILTER_FAMILY_ARP depends on NETFILTER_XTABLES_LEGACY + depends on NETFILTER_XTABLES help ARP packet filtering defines a table `filter', which has a series of rules for simple ARP packet filtering at local input and diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 276860f65baa..81daf82ddc2d 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -10,6 +10,7 @@ menu "IPv6: Netfilter Configuration" config IP6_NF_IPTABLES_LEGACY tristate "Legacy IP6 tables support" depends on INET && IPV6 && NETFILTER_XTABLES_LEGACY + depends on NETFILTER_XTABLES default m if NETFILTER_XTABLES_LEGACY help ip6tables is a legacy packet classifier. From de788b2e6227462b6dcd0e07474e72c089008f74 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Aug 2025 17:25:08 +0200 Subject: [PATCH 1686/2411] netfilter: ctnetlink: fix refcount leak on table dump There is a reference count leak in ctnetlink_dump_table(): if (res < 0) { nf_conntrack_get(&ct->ct_general); // HERE cb->args[1] = (unsigned long)ct; ... While its very unlikely, its possible that ct == last. If this happens, then the refcount of ct was already incremented. This 2nd increment is never undone. This prevents the conntrack object from being released, which in turn keeps prevents cnet->count from dropping back to 0. This will then block the netns dismantle (or conntrack rmmod) as nf_conntrack_cleanup_net_list() will wait forever. This can be reproduced by running conntrack_resize.sh selftest in a loop. It takes ~20 minutes for me on a preemptible kernel on average before I see a runaway kworker spinning in nf_conntrack_cleanup_net_list. One fix would to change this to: if (res < 0) { if (ct != last) nf_conntrack_get(&ct->ct_general); But this reference counting isn't needed in the first place. We can just store a cookie value instead. A followup patch will do the same for ctnetlink_exp_dump_table, it looks to me as if this has the same problem and like ctnetlink_dump_table, we only need a 'skip hint', not the actual object so we can apply the same cookie strategy there as well. Fixes: d205dc40798d ("[NETFILTER]: ctnetlink: fix deadlock in table dumping") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 486d52b45fe5..f403acd82437 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -884,8 +884,6 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) static int ctnetlink_done(struct netlink_callback *cb) { - if (cb->args[1]) - nf_ct_put((struct nf_conn *)cb->args[1]); kfree(cb->data); return 0; } @@ -1208,19 +1206,26 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) return 0; } +static unsigned long ctnetlink_get_id(const struct nf_conn *ct) +{ + unsigned long id = nf_ct_get_id(ct); + + return id ? id : 1; +} + static int ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { unsigned int flags = cb->data ? NLM_F_DUMP_FILTERED : 0; struct net *net = sock_net(skb->sk); - struct nf_conn *ct, *last; + unsigned long last_id = cb->args[1]; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; struct nf_conn *nf_ct_evict[8]; + struct nf_conn *ct; int res, i; spinlock_t *lockp; - last = (struct nf_conn *)cb->args[1]; i = 0; local_bh_disable(); @@ -1257,7 +1262,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) continue; if (cb->args[1]) { - if (ct != last) + if (ctnetlink_get_id(ct) != last_id) continue; cb->args[1] = 0; } @@ -1270,8 +1275,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) NFNL_MSG_TYPE(cb->nlh->nlmsg_type), ct, true, flags); if (res < 0) { - nf_conntrack_get(&ct->ct_general); - cb->args[1] = (unsigned long)ct; + cb->args[1] = ctnetlink_get_id(ct); spin_unlock(lockp); goto out; } @@ -1284,12 +1288,10 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) } out: local_bh_enable(); - if (last) { + if (last_id) { /* nf ct hash resize happened, now clear the leftover. */ - if ((struct nf_conn *)cb->args[1] == last) + if (cb->args[1] == last_id) cb->args[1] = 0; - - nf_ct_put(last); } while (i) { From 1492e3dcb2be3aa46d1963da96aa9593e4e4db5a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Aug 2025 17:25:09 +0200 Subject: [PATCH 1687/2411] netfilter: ctnetlink: remove refcounting in expectation dumpers Same pattern as previous patch: do not keep the expectation object alive via refcount, only store a cookie value and then use that as the skip hint for dump resumption. AFAICS this has the same issue as the one resolved in the conntrack dumper, when we do if (!refcount_inc_not_zero(&exp->use)) to increment the refcount, there is a chance that exp == last, which causes a double-increment of the refcount and subsequent memory leak. Fixes: cf6994c2b981 ("[NETFILTER]: nf_conntrack_netlink: sync expectation dumping with conntrack table dumping") Fixes: e844a928431f ("netfilter: ctnetlink: allow to dump expectation per master conntrack") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 41 ++++++++++++---------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index f403acd82437..50fd6809380f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3170,23 +3170,27 @@ ctnetlink_expect_event(unsigned int events, const struct nf_exp_event *item) return 0; } #endif -static int ctnetlink_exp_done(struct netlink_callback *cb) + +static unsigned long ctnetlink_exp_id(const struct nf_conntrack_expect *exp) { - if (cb->args[1]) - nf_ct_expect_put((struct nf_conntrack_expect *)cb->args[1]); - return 0; + unsigned long id = (unsigned long)exp; + + id += nf_ct_get_id(exp->master); + id += exp->class; + + return id ? id : 1; } static int ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - struct nf_conntrack_expect *exp, *last; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); u_int8_t l3proto = nfmsg->nfgen_family; + unsigned long last_id = cb->args[1]; + struct nf_conntrack_expect *exp; rcu_read_lock(); - last = (struct nf_conntrack_expect *)cb->args[1]; for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { restart: hlist_for_each_entry_rcu(exp, &nf_ct_expect_hash[cb->args[0]], @@ -3198,7 +3202,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) continue; if (cb->args[1]) { - if (exp != last) + if (ctnetlink_exp_id(exp) != last_id) continue; cb->args[1] = 0; } @@ -3207,9 +3211,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp) < 0) { - if (!refcount_inc_not_zero(&exp->use)) - continue; - cb->args[1] = (unsigned long)exp; + cb->args[1] = ctnetlink_exp_id(exp); goto out; } } @@ -3220,32 +3222,30 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) } out: rcu_read_unlock(); - if (last) - nf_ct_expect_put(last); - return skb->len; } static int ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { - struct nf_conntrack_expect *exp, *last; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); struct nf_conn *ct = cb->data; struct nf_conn_help *help = nfct_help(ct); u_int8_t l3proto = nfmsg->nfgen_family; + unsigned long last_id = cb->args[1]; + struct nf_conntrack_expect *exp; if (cb->args[0]) return 0; rcu_read_lock(); - last = (struct nf_conntrack_expect *)cb->args[1]; + restart: hlist_for_each_entry_rcu(exp, &help->expectations, lnode) { if (l3proto && exp->tuple.src.l3num != l3proto) continue; if (cb->args[1]) { - if (exp != last) + if (ctnetlink_exp_id(exp) != last_id) continue; cb->args[1] = 0; } @@ -3253,9 +3253,7 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp) < 0) { - if (!refcount_inc_not_zero(&exp->use)) - continue; - cb->args[1] = (unsigned long)exp; + cb->args[1] = ctnetlink_exp_id(exp); goto out; } } @@ -3266,9 +3264,6 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0] = 1; out: rcu_read_unlock(); - if (last) - nf_ct_expect_put(last); - return skb->len; } @@ -3287,7 +3282,6 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, struct nf_conntrack_zone zone; struct netlink_dump_control c = { .dump = ctnetlink_exp_ct_dump_table, - .done = ctnetlink_exp_done, }; err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, @@ -3337,7 +3331,6 @@ static int ctnetlink_get_expect(struct sk_buff *skb, else { struct netlink_dump_control c = { .dump = ctnetlink_exp_dump_table, - .done = ctnetlink_exp_done, }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } From c8a7c2c608180f3b4e51dc958b3861242dcdd76d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 Aug 2025 12:10:41 +0200 Subject: [PATCH 1688/2411] netfilter: nft_set_pipapo: don't return bogus extension pointer Dan Carpenter says: Commit 17a20e09f086 ("netfilter: nft_set: remove one argument from lookup and update functions") [..] leads to the following Smatch static checker warning: net/netfilter/nft_set_pipapo_avx2.c:1269 nft_pipapo_avx2_lookup() error: uninitialized symbol 'ext'. Fix this by initing ext to NULL and set it only once we've found a match. Fixes: 17a20e09f086 ("netfilter: nft_set: remove one argument from lookup and update functions") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/netfilter-devel/aJBzc3V5wk-yPOnH@stanley.mountain/ Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo_avx2.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index db5d367e43c4..2f090e253caf 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1150,12 +1150,12 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, const u32 *key) { struct nft_pipapo *priv = nft_set_priv(set); + const struct nft_set_ext *ext = NULL; struct nft_pipapo_scratch *scratch; u8 genmask = nft_genmask_cur(net); const struct nft_pipapo_match *m; const struct nft_pipapo_field *f; const u8 *rp = (const u8 *)key; - const struct nft_set_ext *ext; unsigned long *res, *fill; bool map_index; int i; @@ -1246,13 +1246,13 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, goto out; if (last) { - ext = &f->mt[ret].e->ext; - if (unlikely(nft_set_elem_expired(ext) || - !nft_set_elem_active(ext, genmask))) { - ext = NULL; - goto next_match; - } + const struct nft_set_ext *e = &f->mt[ret].e->ext; + if (unlikely(nft_set_elem_expired(e) || + !nft_set_elem_active(e, genmask))) + goto next_match; + + ext = e; goto out; } From f54186df806fb1e9cb262d553f4ff942f9467cf1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 4 Aug 2025 13:35:15 +0300 Subject: [PATCH 1689/2411] netfilter: conntrack: clean up returns in nf_conntrack_log_invalid_sysctl() Smatch complains that these look like error paths with missing error codes, especially the one where we return if nf_log_is_registered() is true: net/netfilter/nf_conntrack_standalone.c:575 nf_conntrack_log_invalid_sysctl() warn: missing error code? 'ret' In fact, all these return zero deliberately. Change them to return a literal instead which helps readability as well as silencing the warning. Fixes: e89a68046687 ("netfilter: load nf_log_syslog on enabling nf_conntrack_log_invalid") Signed-off-by: Dan Carpenter Acked-by: Lance Yang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 9b8b10a85233..1f14ef0436c6 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -567,16 +567,16 @@ nf_conntrack_log_invalid_sysctl(const struct ctl_table *table, int write, return ret; if (*(u8 *)table->data == 0) - return ret; + return 0; /* Load nf_log_syslog only if no logger is currently registered */ for (i = 0; i < NFPROTO_NUMPROTO; i++) { if (nf_log_is_registered(i)) - return ret; + return 0; } request_module("%s", "nf_log_syslog"); - return ret; + return 0; } static struct ctl_table_header *nf_ct_netfilter_header; From 1dee968d22eaeb3eede70df513ab3f8dd1712e3e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 7 Aug 2025 12:02:42 +0200 Subject: [PATCH 1690/2411] netfilter: nft_socket: remove WARN_ON_ONCE with huge level value syzbot managed to reach this WARN_ON_ONCE by passing a huge level value, remove it. WARNING: CPU: 0 PID: 5853 at net/netfilter/nft_socket.c:220 nft_socket_init+0x2f4/0x3d0 net/netfilter/nft_socket.c:220 Reported-by: syzbot+a225fea35d7baf8dbdc3@syzkaller.appspotmail.com Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 35d0409b0095..36affbb697c2 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -217,7 +217,7 @@ static int nft_socket_init(const struct nft_ctx *ctx, level += err; /* Implies a giant cgroup tree */ - if (WARN_ON_ONCE(level > 255)) + if (level > 255) return -EOPNOTSUPP; priv->level = level; From 80f21806b8e34ae1e24c0fc6a0f0dfd9b055e130 Mon Sep 17 00:00:00 2001 From: Mohamed Khalfella Date: Wed, 6 Aug 2025 22:35:07 -0700 Subject: [PATCH 1691/2411] nvmet: exit debugfs after discovery subsystem exits Commit 528589947c180 ("nvmet: initialize discovery subsys after debugfs is initialized") changed nvmet_init() to initialize nvme discovery after "nvmet" debugfs directory is initialized. The change broke nvmet_exit() because discovery subsystem now depends on debugfs. Debugfs should be destroyed after discovery subsystem. Fix nvmet_exit() to do that. Reported-by: Yi Zhang Closes: https://lore.kernel.org/all/CAHj4cs96AfFQpyDKF_MdfJsnOEo=2V7dQgqjFv+k3t7H-=yGhA@mail.gmail.com/ Fixes: 528589947c180 ("nvmet: initialize discovery subsys after debugfs is initialized") Signed-off-by: Mohamed Khalfella Reviewed-by: Hannes Reinecke Reviewed-by: Daniel Wagner Link: https://lore.kernel.org/r/20250807053507.2794335-1-mkhalfella@purestorage.com Signed-off-by: Jens Axboe --- drivers/nvme/target/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 83f3d2f8ef2d..0dd7bd99afa3 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1992,8 +1992,8 @@ static int __init nvmet_init(void) static void __exit nvmet_exit(void) { nvmet_exit_configfs(); - nvmet_exit_debugfs(); nvmet_exit_discovery(); + nvmet_exit_debugfs(); ida_destroy(&cntlid_ida); destroy_workqueue(nvmet_wq); destroy_workqueue(buffered_io_wq); From 42e6c6ce03fd3e41e39a0f93f9b1a1d9fa664338 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 7 Aug 2025 11:24:12 +0800 Subject: [PATCH 1692/2411] lib/sbitmap: convert shallow_depth from one word to the whole sbitmap Currently elevators will record internal 'async_depth' to throttle asynchronous requests, and they both calculate shallow_dpeth based on sb->shift, with the respect that sb->shift is the available tags in one word. However, sb->shift is not the availbale tags in the last word, see __map_depth: if (index == sb->map_nr - 1) return sb->depth - (index << sb->shift); For consequence, if the last word is used, more tags can be get than expected, for example, assume nr_requests=256 and there are four words, in the worst case if user set nr_requests=32, then the first word is the last word, and still use bits per word, which is 64, to calculate async_depth is wrong. One the ohter hand, due to cgroup qos, bfq can allow only one request to be allocated, and set shallow_dpeth=1 will still allow the number of words request to be allocated. Fix this problems by using shallow_depth to the whole sbitmap instead of per word, also change kyber, mq-deadline and bfq to follow this, a new helper __map_depth_with_shallow() is introduced to calculate available bits in each word. Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20250807032413.1469456-2-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 35 ++++++++++++-------------- block/bfq-iosched.h | 3 +-- block/kyber-iosched.c | 9 ++----- block/mq-deadline.c | 16 +----------- include/linux/sbitmap.h | 6 ++--- lib/sbitmap.c | 56 +++++++++++++++++++++-------------------- 6 files changed, 52 insertions(+), 73 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index aca9886c9ee3..3bf76902f07f 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -694,17 +694,13 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) { struct bfq_data *bfqd = data->q->elevator->elevator_data; struct bfq_io_cq *bic = bfq_bic_lookup(data->q); - int depth; - unsigned limit = data->q->nr_requests; - unsigned int act_idx; + unsigned int limit, act_idx; /* Sync reads have full depth available */ - if (op_is_sync(opf) && !op_is_write(opf)) { - depth = 0; - } else { - depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(opf)]; - limit = (limit * depth) >> bfqd->full_depth_shift; - } + if (op_is_sync(opf) && !op_is_write(opf)) + limit = data->q->nr_requests; + else + limit = bfqd->async_depths[!!bfqd->wr_busy_queues][op_is_sync(opf)]; for (act_idx = 0; bic && act_idx < bfqd->num_actuators; act_idx++) { /* Fast path to check if bfqq is already allocated. */ @@ -718,14 +714,16 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) * available requests and thus starve other entities. */ if (bfqq_request_over_limit(bfqd, bic, opf, act_idx, limit)) { - depth = 1; + limit = 1; break; } } + bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u", - __func__, bfqd->wr_busy_queues, op_is_sync(opf), depth); - if (depth) - data->shallow_depth = depth; + __func__, bfqd->wr_busy_queues, op_is_sync(opf), limit); + + if (limit < data->q->nr_requests) + data->shallow_depth = limit; } static struct bfq_queue * @@ -7114,9 +7112,8 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg) */ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt) { - unsigned int depth = 1U << bt->sb.shift; + unsigned int nr_requests = bfqd->queue->nr_requests; - bfqd->full_depth_shift = bt->sb.shift; /* * In-word depths if no bfq_queue is being weight-raised: * leaving 25% of tags only for sync reads. @@ -7128,13 +7125,13 @@ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt) * limit 'something'. */ /* no more than 50% of tags for async I/O */ - bfqd->word_depths[0][0] = max(depth >> 1, 1U); + bfqd->async_depths[0][0] = max(nr_requests >> 1, 1U); /* * no more than 75% of tags for sync writes (25% extra tags * w.r.t. async I/O, to prevent async I/O from starving sync * writes) */ - bfqd->word_depths[0][1] = max((depth * 3) >> 2, 1U); + bfqd->async_depths[0][1] = max((nr_requests * 3) >> 2, 1U); /* * In-word depths in case some bfq_queue is being weight- @@ -7144,9 +7141,9 @@ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt) * shortage. */ /* no more than ~18% of tags for async I/O */ - bfqd->word_depths[1][0] = max((depth * 3) >> 4, 1U); + bfqd->async_depths[1][0] = max((nr_requests * 3) >> 4, 1U); /* no more than ~37% of tags for sync writes (~20% extra tags) */ - bfqd->word_depths[1][1] = max((depth * 6) >> 4, 1U); + bfqd->async_depths[1][1] = max((nr_requests * 6) >> 4, 1U); } static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx) diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 0b4704932d72..34a498e6b2a5 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -813,8 +813,7 @@ struct bfq_data { * Depth limits used in bfq_limit_depth (see comments on the * function) */ - unsigned int word_depths[2][2]; - unsigned int full_depth_shift; + unsigned int async_depths[2][2]; /* * Number of independent actuators. This is equal to 1 in diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 7b6832cb3a8d..70cbc7b2deb4 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -157,10 +157,7 @@ struct kyber_queue_data { */ struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS]; - /* - * Async request percentage, converted to per-word depth for - * sbitmap_get_shallow(). - */ + /* Number of allowed async requests. */ unsigned int async_depth; struct kyber_cpu_latency __percpu *cpu_latency; @@ -447,10 +444,8 @@ static void kyber_depth_updated(struct blk_mq_hw_ctx *hctx) { struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data; struct blk_mq_tags *tags = hctx->sched_tags; - unsigned int shift = tags->bitmap_tags.sb.shift; - - kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U; + kqd->async_depth = hctx->queue->nr_requests * KYBER_ASYNC_PERCENT / 100U; sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, kqd->async_depth); } diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 7b6caf30e00a..b9b7cdf1d3c9 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -487,20 +487,6 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) return rq; } -/* - * 'depth' is a number in the range 1..INT_MAX representing a number of - * requests. Scale it with a factor (1 << bt->sb.shift) / q->nr_requests since - * 1..(1 << bt->sb.shift) is the range expected by sbitmap_get_shallow(). - * Values larger than q->nr_requests have the same effect as q->nr_requests. - */ -static int dd_to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth) -{ - struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags; - const unsigned int nrr = hctx->queue->nr_requests; - - return ((qdepth << bt->sb.shift) + nrr - 1) / nrr; -} - /* * Called by __blk_mq_alloc_request(). The shallow_depth value set by this * function is used by __blk_mq_get_tag(). @@ -517,7 +503,7 @@ static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) * Throttle asynchronous requests and writes such that these requests * do not block the allocation of synchronous requests. */ - data->shallow_depth = dd_to_word_depth(data->hctx, dd->async_depth); + data->shallow_depth = dd->async_depth; } /* Called by blk_mq_update_nr_requests(). */ diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 189140bf11fc..4adf4b364fcd 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -213,12 +213,12 @@ int sbitmap_get(struct sbitmap *sb); * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, * limiting the depth used from each word. * @sb: Bitmap to allocate from. - * @shallow_depth: The maximum number of bits to allocate from a single word. + * @shallow_depth: The maximum number of bits to allocate from the bitmap. * * This rather specific operation allows for having multiple users with * different allocation limits. E.g., there can be a high-priority class that * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow() - * with a @shallow_depth of (1 << (@sb->shift - 1)). Then, the low-priority + * with a @shallow_depth of (sb->depth >> 1). Then, the low-priority * class can only allocate half of the total bits in the bitmap, preventing it * from starving out the high-priority class. * @@ -478,7 +478,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, * sbitmap_queue, limiting the depth used from each word, with preemption * already disabled. * @sbq: Bitmap queue to allocate from. - * @shallow_depth: The maximum number of bits to allocate from a single word. + * @shallow_depth: The maximum number of bits to allocate from the queue. * See sbitmap_get_shallow(). * * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after diff --git a/lib/sbitmap.c b/lib/sbitmap.c index d3412984170c..c07e3cd82e29 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -208,8 +208,28 @@ static int sbitmap_find_bit_in_word(struct sbitmap_word *map, return nr; } +static unsigned int __map_depth_with_shallow(const struct sbitmap *sb, + int index, + unsigned int shallow_depth) +{ + u64 shallow_word_depth; + unsigned int word_depth, reminder; + + word_depth = __map_depth(sb, index); + if (shallow_depth >= sb->depth) + return word_depth; + + shallow_word_depth = word_depth * shallow_depth; + reminder = do_div(shallow_word_depth, sb->depth); + + if (reminder >= (index + 1) * word_depth) + shallow_word_depth++; + + return (unsigned int)shallow_word_depth; +} + static int sbitmap_find_bit(struct sbitmap *sb, - unsigned int depth, + unsigned int shallow_depth, unsigned int index, unsigned int alloc_hint, bool wrap) @@ -218,12 +238,12 @@ static int sbitmap_find_bit(struct sbitmap *sb, int nr = -1; for (i = 0; i < sb->map_nr; i++) { - nr = sbitmap_find_bit_in_word(&sb->map[index], - min_t(unsigned int, - __map_depth(sb, index), - depth), - alloc_hint, wrap); + unsigned int depth = __map_depth_with_shallow(sb, index, + shallow_depth); + if (depth) + nr = sbitmap_find_bit_in_word(&sb->map[index], depth, + alloc_hint, wrap); if (nr != -1) { nr += index << sb->shift; break; @@ -406,27 +426,9 @@ EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, unsigned int depth) { - unsigned int wake_batch; - unsigned int shallow_depth; - - /* - * Each full word of the bitmap has bits_per_word bits, and there might - * be a partial word. There are depth / bits_per_word full words and - * depth % bits_per_word bits left over. In bitwise arithmetic: - * - * bits_per_word = 1 << shift - * depth / bits_per_word = depth >> shift - * depth % bits_per_word = depth & ((1 << shift) - 1) - * - * Each word can be limited to sbq->min_shallow_depth bits. - */ - shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth); - depth = ((depth >> sbq->sb.shift) * shallow_depth + - min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth)); - wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1, - SBQ_WAKE_BATCH); - - return wake_batch; + return clamp_t(unsigned int, + min(depth, sbq->min_shallow_depth) / SBQ_WAIT_QUEUES, + 1, SBQ_WAKE_BATCH); } int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, From 45fa9f97e65231a9fd4f9429489cb74c10ccd0fd Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 7 Aug 2025 11:24:13 +0800 Subject: [PATCH 1693/2411] lib/sbitmap: make sbitmap_get_shallow() internal Because it's only used in sbitmap.c Signed-off-by: Yu Kuai Reviewed-by: Damien Le Moal Reviewed-by: Jan Kara Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20250807032413.1469456-3-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 17 ----------------- lib/sbitmap.c | 18 ++++++++++++++++-- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 4adf4b364fcd..ffb9907c7070 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -209,23 +209,6 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth); */ int sbitmap_get(struct sbitmap *sb); -/** - * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, - * limiting the depth used from each word. - * @sb: Bitmap to allocate from. - * @shallow_depth: The maximum number of bits to allocate from the bitmap. - * - * This rather specific operation allows for having multiple users with - * different allocation limits. E.g., there can be a high-priority class that - * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow() - * with a @shallow_depth of (sb->depth >> 1). Then, the low-priority - * class can only allocate half of the total bits in the bitmap, preventing it - * from starving out the high-priority class. - * - * Return: Non-negative allocated bit number if successful, -1 otherwise. - */ -int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth); - /** * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap. * @sb: Bitmap to check. diff --git a/lib/sbitmap.c b/lib/sbitmap.c index c07e3cd82e29..4d188d05db15 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -307,7 +307,22 @@ static int __sbitmap_get_shallow(struct sbitmap *sb, return sbitmap_find_bit(sb, shallow_depth, index, alloc_hint, true); } -int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) +/** + * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, + * limiting the depth used from each word. + * @sb: Bitmap to allocate from. + * @shallow_depth: The maximum number of bits to allocate from the bitmap. + * + * This rather specific operation allows for having multiple users with + * different allocation limits. E.g., there can be a high-priority class that + * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow() + * with a @shallow_depth of (sb->depth >> 1). Then, the low-priority + * class can only allocate half of the total bits in the bitmap, preventing it + * from starving out the high-priority class. + * + * Return: Non-negative allocated bit number if successful, -1 otherwise. + */ +static int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) { int nr; unsigned int hint, depth; @@ -322,7 +337,6 @@ int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) return nr; } -EXPORT_SYMBOL_GPL(sbitmap_get_shallow); bool sbitmap_any_bit_set(const struct sbitmap *sb) { From af357a6a3b7d685e7aa621c6fb1d4ed6c349ec9e Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Thu, 7 Aug 2025 12:07:42 +0200 Subject: [PATCH 1694/2411] spi: spi-fsl-lpspi: Clamp too high speed_hz Currently the driver is not able to handle the case that a SPI device specifies a higher spi-max-frequency than half of per-clk: per-clk should be at least two times of transfer speed Fix this by clamping to the max possible value and use the minimum SCK period of 2 cycles. Fixes: 77736a98b859 ("spi: lpspi: add the error info of transfer speed setting") Signed-off-by: Stefan Wahren Link: https://patch.msgid.link/20250807100742.9917-1-wahrenst@gmx.net Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-lpspi.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index 67d4000c3cef..313e444a34f3 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -330,13 +330,11 @@ static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) } if (config.speed_hz > perclk_rate / 2) { - dev_err(fsl_lpspi->dev, - "per-clk should be at least two times of transfer speed"); - return -EINVAL; + div = 2; + } else { + div = DIV_ROUND_UP(perclk_rate, config.speed_hz); } - div = DIV_ROUND_UP(perclk_rate, config.speed_hz); - for (prescale = 0; prescale <= prescale_max; prescale++) { scldiv = div / (1 << prescale) - 2; if (scldiv >= 0 && scldiv < 256) { From 13d0fe84a214658254a7412b2b46ec1507dc51f0 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Tue, 5 Aug 2025 18:05:42 +0200 Subject: [PATCH 1695/2411] spi: spi-qpic-snand: fix calculating of ECC OOB regions' properties The OOB layout used by the driver has two distinct regions which contains hardware specific ECC data, yet the qcom_spi_ooblayout_ecc() function sets the same offset and length values for both regions which is clearly wrong. Change the code to calculate the correct values for both regions. For reference, the following table shows the computed offset and length values for various OOB size/ECC strength configurations: +-----------------+-----------------+ |before the change| after the change| +-------+----------+--------+--------+--------+--------+--------+ | OOB | ECC | region | region | region | region | region | | size | strength | index | offset | length | offset | length | +-------+----------+--------+--------+--------+--------+--------+ | 128 | 8 | 0 | 113 | 15 | 0 | 49 | | | | 1 | 113 | 15 | 65 | 63 | +-------+----------+--------+--------+--------+--------+--------+ | 128 | 4 | 0 | 117 | 11 | 0 | 37 | | | | 1 | 117 | 11 | 53 | 75 | +-------+----------+--------+--------+--------+--------+--------+ | 64 | 4 | 0 | 53 | 11 | 0 | 37 | | | | 1 | 53 | 11 | 53 | 11 | +-------+----------+--------+--------+--------+--------+--------+ Fixes: 7304d1909080 ("spi: spi-qpic: add driver for QCOM SPI NAND flash Interface") Signed-off-by: Gabor Juhos Reviewed-by: Konrad Dybcio Link: https://patch.msgid.link/20250805-qpic-snand-oob-ecc-fix-v2-1-e6f811c70d6f@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-qpic-snand.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index 5216d60e01aa..7b76d2c82a52 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -210,13 +210,21 @@ static int qcom_spi_ooblayout_ecc(struct mtd_info *mtd, int section, struct qcom_nand_controller *snandc = nand_to_qcom_snand(nand); struct qpic_ecc *qecc = snandc->qspi->ecc; - if (section > 1) - return -ERANGE; + switch (section) { + case 0: + oobregion->offset = 0; + oobregion->length = qecc->bytes * (qecc->steps - 1) + + qecc->bbm_size; + return 0; + case 1: + oobregion->offset = qecc->bytes * (qecc->steps - 1) + + qecc->bbm_size + + qecc->steps * 4; + oobregion->length = mtd->oobsize - oobregion->offset; + return 0; + } - oobregion->length = qecc->ecc_bytes_hw + qecc->spare_bytes; - oobregion->offset = mtd->oobsize - oobregion->length; - - return 0; + return -ERANGE; } static int qcom_spi_ooblayout_free(struct mtd_info *mtd, int section, From ae633388cae349886f1a3cfb27aa092854b24c1b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Aug 2025 14:21:46 +0000 Subject: [PATCH 1696/2411] pptp: fix pptp_xmit() error path I accidentally added a bug in pptp_xmit() that syzbot caught for us. Only call ip_rt_put() if a route has been allocated. BUG: unable to handle page fault for address: ffffffffffffffdb PGD df3b067 P4D df3b067 PUD df3d067 PMD 0 Oops: Oops: 0002 [#1] SMP KASAN PTI CPU: 1 UID: 0 PID: 6346 Comm: syz.0.336 Not tainted 6.16.0-next-20250804-syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 RIP: 0010:arch_atomic_add_return arch/x86/include/asm/atomic.h:85 [inline] RIP: 0010:raw_atomic_sub_return_release include/linux/atomic/atomic-arch-fallback.h:846 [inline] RIP: 0010:atomic_sub_return_release include/linux/atomic/atomic-instrumented.h:327 [inline] RIP: 0010:__rcuref_put include/linux/rcuref.h:109 [inline] RIP: 0010:rcuref_put+0x172/0x210 include/linux/rcuref.h:173 Call Trace: dst_release+0x24/0x1b0 net/core/dst.c:167 ip_rt_put include/net/route.h:285 [inline] pptp_xmit+0x14b/0x1a90 drivers/net/ppp/pptp.c:267 __ppp_channel_push+0xf2/0x1c0 drivers/net/ppp/ppp_generic.c:2166 ppp_channel_push+0x123/0x660 drivers/net/ppp/ppp_generic.c:2198 ppp_write+0x2b0/0x400 drivers/net/ppp/ppp_generic.c:544 vfs_write+0x27b/0xb30 fs/read_write.c:684 ksys_write+0x145/0x250 fs/read_write.c:738 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: de9c4861fb42 ("pptp: ensure minimal skb length in pptp_xmit()") Reported-by: syzbot+27d7cfbc93457e472e00@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/689095a5.050a0220.1fc43d.0009.GAE@google.com/ Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250807142146.2877060-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ppp/pptp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 4cd6f67bd5d3..90737cb71892 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -159,17 +159,17 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) int len; unsigned char *data; __u32 seq_recv; - struct rtable *rt = NULL; + struct rtable *rt; struct net_device *tdev; struct iphdr *iph; int max_headroom; if (sk_pppox(po)->sk_state & PPPOX_DEAD) - goto tx_error; + goto tx_drop; rt = pptp_route_output(po, &fl4); if (IS_ERR(rt)) - goto tx_error; + goto tx_drop; tdev = rt->dst.dev; @@ -265,6 +265,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) tx_error: ip_rt_put(rt); +tx_drop: kfree_skb(skb); return 1; } From ad580dfa388fabb52af033e3f8cc5d04be985e54 Mon Sep 17 00:00:00 2001 From: Leo Martins Date: Mon, 21 Jul 2025 10:49:16 -0700 Subject: [PATCH 1697/2411] btrfs: fix subpage deadlock in try_release_subpage_extent_buffer() There is a potential deadlock that can happen in try_release_subpage_extent_buffer() because the irq-safe xarray spin lock fs_info->buffer_tree is being acquired before the irq-unsafe eb->refs_lock. This leads to the potential race: // T1 (random eb->refs user) // T2 (release folio) spin_lock(&eb->refs_lock); // interrupt end_bbio_meta_write() btrfs_meta_folio_clear_writeback() btree_release_folio() folio_test_writeback() //false try_release_extent_buffer() try_release_subpage_extent_buffer() xa_lock_irq(&fs_info->buffer_tree) spin_lock(&eb->refs_lock); // blocked; held by T1 buffer_tree_clear_mark() xas_lock_irqsave() // blocked; held by T2 I believe that the spin lock can safely be replaced by an rcu_read_lock. The xa_for_each loop does not need the spin lock as it's already internally protected by the rcu_read_lock. The extent buffer is also protected by the rcu_read_lock so it won't be freed before we take the eb->refs_lock and check the ref count. The rcu_read_lock is taken and released every iteration, just like the spin lock, which means we're not protected against concurrent insertions into the xarray. This is fine because we rely on folio->private to detect if there are any ebs remaining in the folio. There is already some precedent for this with find_extent_buffer_nolock, which loads an extent buffer from the xarray with only rcu_read_lock. lockdep warning: ===================================================== WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected 6.16.0-0_fbk701_debug_rc0_123_g4c06e63b9203 #1 Tainted: G E N ----------------------------------------------------- kswapd0/66 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: ffff000011ffd600 (&eb->refs_lock){+.+.}-{3:3}, at: try_release_extent_buffer+0x18c/0x560 and this task is already holding: ffff0000c1d91b88 (&buffer_xa_class){-.-.}-{3:3}, at: try_release_extent_buffer+0x13c/0x560 which would create a new lock dependency: (&buffer_xa_class){-.-.}-{3:3} -> (&eb->refs_lock){+.+.}-{3:3} but this new dependency connects a HARDIRQ-irq-safe lock: (&buffer_xa_class){-.-.}-{3:3} ... which became HARDIRQ-irq-safe at: lock_acquire+0x178/0x358 _raw_spin_lock_irqsave+0x60/0x88 buffer_tree_clear_mark+0xc4/0x160 end_bbio_meta_write+0x238/0x398 btrfs_bio_end_io+0x1f8/0x330 btrfs_orig_write_end_io+0x1c4/0x2c0 bio_endio+0x63c/0x678 blk_update_request+0x1c4/0xa00 blk_mq_end_request+0x54/0x88 virtblk_request_done+0x124/0x1d0 blk_mq_complete_request+0x84/0xa0 virtblk_done+0x130/0x238 vring_interrupt+0x130/0x288 __handle_irq_event_percpu+0x1e8/0x708 handle_irq_event+0x98/0x1b0 handle_fasteoi_irq+0x264/0x7c0 generic_handle_domain_irq+0xa4/0x108 gic_handle_irq+0x7c/0x1a0 do_interrupt_handler+0xe4/0x148 el1_interrupt+0x30/0x50 el1h_64_irq_handler+0x14/0x20 el1h_64_irq+0x6c/0x70 _raw_spin_unlock_irq+0x38/0x70 __run_timer_base+0xdc/0x5e0 run_timer_softirq+0xa0/0x138 handle_softirqs.llvm.13542289750107964195+0x32c/0xbd0 ____do_softirq.llvm.17674514681856217165+0x18/0x28 call_on_irq_stack+0x24/0x30 __irq_exit_rcu+0x164/0x430 irq_exit_rcu+0x18/0x88 el1_interrupt+0x34/0x50 el1h_64_irq_handler+0x14/0x20 el1h_64_irq+0x6c/0x70 arch_local_irq_enable+0x4/0x8 do_idle+0x1a0/0x3b8 cpu_startup_entry+0x60/0x80 rest_init+0x204/0x228 start_kernel+0x394/0x3f0 __primary_switched+0x8c/0x8958 to a HARDIRQ-irq-unsafe lock: (&eb->refs_lock){+.+.}-{3:3} ... which became HARDIRQ-irq-unsafe at: ... lock_acquire+0x178/0x358 _raw_spin_lock+0x4c/0x68 free_extent_buffer_stale+0x2c/0x170 btrfs_read_sys_array+0x1b0/0x338 open_ctree+0xeb0/0x1df8 btrfs_get_tree+0xb60/0x1110 vfs_get_tree+0x8c/0x250 fc_mount+0x20/0x98 btrfs_get_tree+0x4a4/0x1110 vfs_get_tree+0x8c/0x250 do_new_mount+0x1e0/0x6c0 path_mount+0x4ec/0xa58 __arm64_sys_mount+0x370/0x490 invoke_syscall+0x6c/0x208 el0_svc_common+0x14c/0x1b8 do_el0_svc+0x4c/0x60 el0_svc+0x4c/0x160 el0t_64_sync_handler+0x70/0x100 el0t_64_sync+0x168/0x170 other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&eb->refs_lock); local_irq_disable(); lock(&buffer_xa_class); lock(&eb->refs_lock); lock(&buffer_xa_class); *** DEADLOCK *** 2 locks held by kswapd0/66: #0: ffff800085506e40 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0xe8/0xe50 #1: ffff0000c1d91b88 (&buffer_xa_class){-.-.}-{3:3}, at: try_release_extent_buffer+0x13c/0x560 Link: https://www.kernel.org/doc/Documentation/locking/lockdep-design.rst#:~:text=Multi%2Dlock%20dependency%20rules%3A Fixes: 19d7f65f032f ("btrfs: convert the buffer_radix to an xarray") CC: stable@vger.kernel.org # 6.16+ Reviewed-by: Boris Burkov Reviewed-by: Qu Wenruo Signed-off-by: Leo Martins Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 835b0deef9bb..f23d75986947 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4331,15 +4331,18 @@ static int try_release_subpage_extent_buffer(struct folio *folio) unsigned long end = index + (PAGE_SIZE >> fs_info->nodesize_bits) - 1; int ret; - xa_lock_irq(&fs_info->buffer_tree); + rcu_read_lock(); xa_for_each_range(&fs_info->buffer_tree, index, eb, start, end) { /* * The same as try_release_extent_buffer(), to ensure the eb * won't disappear out from under us. */ spin_lock(&eb->refs_lock); + rcu_read_unlock(); + if (refcount_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) { spin_unlock(&eb->refs_lock); + rcu_read_lock(); continue; } @@ -4358,11 +4361,10 @@ static int try_release_subpage_extent_buffer(struct folio *folio) * check the folio private at the end. And * release_extent_buffer() will release the refs_lock. */ - xa_unlock_irq(&fs_info->buffer_tree); release_extent_buffer(eb); - xa_lock_irq(&fs_info->buffer_tree); + rcu_read_lock(); } - xa_unlock_irq(&fs_info->buffer_tree); + rcu_read_unlock(); /* * Finally to check if we have cleared folio private, as if we have @@ -4375,7 +4377,6 @@ static int try_release_subpage_extent_buffer(struct folio *folio) ret = 0; spin_unlock(&folio->mapping->i_private_lock); return ret; - } int try_release_extent_buffer(struct folio *folio) From 15fc0bec883c95007a4901fe75f247bd0ca21651 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sun, 20 Jul 2025 07:56:48 +0930 Subject: [PATCH 1698/2411] btrfs: make btrfs_cleanup_ordered_extents() support large folios When hitting a large folio, btrfs_cleanup_ordered_extents() will get the same large folio multiple times, and clearing the same range again and again. Thankfully this is not causing anything wrong, just inefficiency. This is caused by the fact that we're iterating folios using the old page index, thus can hit the same large folio again and again. Enhance it by increasing @index to the index of the folio end, and only increase @index by 1 if we failed to grab a folio. Reviewed-by: Boris Burkov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b77dd22b8cdb..a2de289e662b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -401,10 +401,12 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode, while (index <= end_index) { folio = filemap_get_folio(inode->vfs_inode.i_mapping, index); - index++; - if (IS_ERR(folio)) + if (IS_ERR(folio)) { + index++; continue; + } + index = folio_end(folio) >> PAGE_SHIFT; /* * Here we just clear all Ordered bits for every page in the * range, then btrfs_mark_ordered_io_finished() will handle From deaf895212da74635a7f0a420e1ecf8f5eca1fe5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sun, 20 Jul 2025 15:01:39 +0930 Subject: [PATCH 1699/2411] btrfs: fix wrong length parameter for btrfs_cleanup_ordered_extents() Inside nocow_one_range(), if the checksum cloning for data reloc inode failed, we call btrfs_cleanup_ordered_extents() to cleanup the just allocated ordered extents. But unlike extent_clear_unlock_delalloc(), btrfs_cleanup_ordered_extents() requires a length, not an inclusive end bytenr. This can be problematic, as the @end is normally way larger than @len. This means btrfs_cleanup_ordered_extents() can be called on folios out of the correct range, and if the out-of-range folio is under writeback, we can incorrectly clear the ordered flag of the folio, and trigger the DEBUG_WARN() inside btrfs_writepage_cow_fixup(). Fix the wrong parameter with correct length instead. Fixes: 94f6c5c17e52 ("btrfs: move ordered extent cleanup to where they are allocated") CC: stable@vger.kernel.org # 6.15+ Reviewed-by: Boris Burkov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a2de289e662b..d740910e071a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2015,7 +2015,7 @@ static int nocow_one_range(struct btrfs_inode *inode, struct folio *locked_folio * cleaered by the caller. */ if (ret < 0) - btrfs_cleanup_ordered_extents(inode, file_pos, end); + btrfs_cleanup_ordered_extents(inode, file_pos, len); return ret; } From fc5799986fbca957e2e3c0480027f249951b7bcf Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 16 Jul 2025 11:41:21 +0100 Subject: [PATCH 1700/2411] btrfs: error on missing block group when unaccounting log tree extent buffers Currently we only log an error message if we can't find the block group for a log tree extent buffer when unaccounting it (while freeing a log tree). A missing block group means something is seriously wrong and we end up leaking space from the metadata space info. So return -ENOENT in case we don't find the block group. CC: stable@vger.kernel.org # 6.12+ Reviewed-by: Boris Burkov Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 2186e87fb61b..69e11557fd13 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2605,14 +2605,14 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, /* * Correctly adjust the reserved bytes occupied by a log tree extent buffer */ -static void unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start) +static int unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start) { struct btrfs_block_group *cache; cache = btrfs_lookup_block_group(fs_info, start); if (!cache) { btrfs_err(fs_info, "unable to find block group for %llu", start); - return; + return -ENOENT; } spin_lock(&cache->space_info->lock); @@ -2623,27 +2623,22 @@ static void unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start) spin_unlock(&cache->space_info->lock); btrfs_put_block_group(cache); + + return 0; } static int clean_log_buffer(struct btrfs_trans_handle *trans, struct extent_buffer *eb) { - int ret; - btrfs_tree_lock(eb); btrfs_clear_buffer_dirty(trans, eb); wait_on_extent_buffer_writeback(eb); btrfs_tree_unlock(eb); - if (trans) { - ret = btrfs_pin_reserved_extent(trans, eb); - if (ret) - return ret; - } else { - unaccount_log_buffer(eb->fs_info, eb->start); - } + if (trans) + return btrfs_pin_reserved_extent(trans, eb); - return 0; + return unaccount_log_buffer(eb->fs_info, eb->start); } static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, From 4289b494ac553e74e86fed1c66b2bf9530bc1082 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 25 Jul 2025 20:33:25 +0930 Subject: [PATCH 1701/2411] btrfs: do not allow relocation of partially dropped subvolumes [BUG] There is an internal report that balance triggered transaction abort, with the following call trace: item 85 key (594509824 169 0) itemoff 12599 itemsize 33 extent refs 1 gen 197740 flags 2 ref#0: tree block backref root 7 item 86 key (594558976 169 0) itemoff 12566 itemsize 33 extent refs 1 gen 197522 flags 2 ref#0: tree block backref root 7 ... BTRFS error (device loop0): extent item not found for insert, bytenr 594526208 num_bytes 16384 parent 449921024 root_objectid 934 owner 1 offset 0 BTRFS error (device loop0): failed to run delayed ref for logical 594526208 num_bytes 16384 type 182 action 1 ref_mod 1: -117 ------------[ cut here ]------------ BTRFS: Transaction aborted (error -117) WARNING: CPU: 1 PID: 6963 at ../fs/btrfs/extent-tree.c:2168 btrfs_run_delayed_refs+0xfa/0x110 [btrfs] And btrfs check doesn't report anything wrong related to the extent tree. [CAUSE] The cause is a little complex, firstly the extent tree indeed doesn't have the backref for 594526208. The extent tree only have the following two backrefs around that bytenr on-disk: item 65 key (594509824 METADATA_ITEM 0) itemoff 13880 itemsize 33 refs 1 gen 197740 flags TREE_BLOCK tree block skinny level 0 (176 0x7) tree block backref root CSUM_TREE item 66 key (594558976 METADATA_ITEM 0) itemoff 13847 itemsize 33 refs 1 gen 197522 flags TREE_BLOCK tree block skinny level 0 (176 0x7) tree block backref root CSUM_TREE But the such missing backref item is not an corruption on disk, as the offending delayed ref belongs to subvolume 934, and that subvolume is being dropped: item 0 key (934 ROOT_ITEM 198229) itemoff 15844 itemsize 439 generation 198229 root_dirid 256 bytenr 10741039104 byte_limit 0 bytes_used 345571328 last_snapshot 198229 flags 0x1000000000001(RDONLY) refs 0 drop_progress key (206324 EXTENT_DATA 2711650304) drop_level 2 level 2 generation_v2 198229 And that offending tree block 594526208 is inside the dropped range of that subvolume. That explains why there is no backref item for that bytenr and why btrfs check is not reporting anything wrong. But this also shows another problem, as btrfs will do all the orphan subvolume cleanup at a read-write mount. So half-dropped subvolume should not exist after an RW mount, and balance itself is also exclusive to subvolume cleanup, meaning we shouldn't hit a subvolume half-dropped during relocation. The root cause is, there is no orphan item for this subvolume. In fact there are 5 subvolumes from around 2021 that have the same problem. It looks like the original report has some older kernels running, and caused those zombie subvolumes. Thankfully upstream commit 8d488a8c7ba2 ("btrfs: fix subvolume/snapshot deletion not triggered on mount") has long fixed the bug. [ENHANCEMENT] For repairing such old fs, btrfs-progs will be enhanced. Considering how delayed the problem will show up (at run delayed ref time) and at that time we have to abort transaction already, it is too late. Instead here we reject any half-dropped subvolume for reloc tree at the earliest time, preventing confusion and extra time wasted on debugging similar bugs. CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index e58151933844..7256f6748c8f 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -602,6 +602,25 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, if (btrfs_root_id(root) == objectid) { u64 commit_root_gen; + /* + * Relocation will wait for cleaner thread, and any half-dropped + * subvolume will be fully cleaned up at mount time. + * So here we shouldn't hit a subvolume with non-zero drop_progress. + * + * If this isn't the case, error out since it can make us attempt to + * drop references for extents that were already dropped before. + */ + if (unlikely(btrfs_disk_key_objectid(&root->root_item.drop_progress))) { + struct btrfs_key cpu_key; + + btrfs_disk_key_to_cpu(&cpu_key, &root->root_item.drop_progress); + btrfs_err(fs_info, + "cannot relocate partially dropped subvolume %llu, drop progress key (%llu %u %llu)", + objectid, cpu_key.objectid, cpu_key.type, cpu_key.offset); + ret = -EUCLEAN; + goto fail; + } + /* called by btrfs_init_reloc_root */ ret = btrfs_copy_root(trans, root, root->commit_root, &eb, BTRFS_TREE_RELOC_OBJECTID); From 3a931e9b39c7ff8066657042f5f00d3b7e6ad315 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 16 Jul 2025 16:59:52 +0900 Subject: [PATCH 1702/2411] btrfs: zoned: do not select metadata BG as finish target We call btrfs_zone_finish_one_bg() to zone finish one block group and make room to activate another block group. Currently, we can choose a metadata block group as a target. But, as we reserve an active metadata block group, we no longer want to select a metadata block group. So, skip it in the loop. CC: stable@vger.kernel.org # 6.6+ Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 245e813ecd78..db11b5b5f0e6 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2650,7 +2650,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info) spin_lock(&block_group->lock); if (block_group->reserved || block_group->alloc_offset == 0 || - (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) || + !(block_group->flags & BTRFS_BLOCK_GROUP_DATA) || test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { spin_unlock(&block_group->lock); continue; From 7b632596188e1973c6b3ac1c9f8252f735e1039f Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 30 Jul 2025 09:29:23 -0700 Subject: [PATCH 1703/2411] btrfs: fix iteration bug in __qgroup_excl_accounting() __qgroup_excl_accounting() uses the qgroup iterator machinery to update the account of one qgroups usage for all its parent hierarchy, when we either add or remove a relation and have only exclusive usage. However, there is a small bug there: we loop with an extra iteration temporary qgroup called `cur` but never actually refer to that in the body of the loop. As a result, we redundantly account the same usage to the first qgroup in the list. This can be reproduced in the following way: mkfs.btrfs -f -O squota mount btrfs subvol create /sv dd if=/dev/zero of=/sv/f bs=1M count=1 sync btrfs qgroup create 1/100 btrfs qgroup create 2/200 btrfs qgroup assign 1/100 2/200 btrfs qgroup assign 0/256 1/100 btrfs qgroup show and the broken result is (note the 2MiB on 1/100 and 0Mib on 2/100): Qgroupid Referenced Exclusive Path -------- ---------- --------- ---- 0/5 16.00KiB 16.00KiB 0/256 1.02MiB 1.02MiB sv Qgroupid Referenced Exclusive Path -------- ---------- --------- ---- 0/5 16.00KiB 16.00KiB 0/256 1.02MiB 1.02MiB sv 1/100 2.03MiB 2.03MiB 2/100<1 member qgroup> 2/100 0.00B 0.00B <0 member qgroups> With this fix, which simply re-uses `qgroup` as the iteration variable, we see the expected result: Qgroupid Referenced Exclusive Path -------- ---------- --------- ---- 0/5 16.00KiB 16.00KiB 0/256 1.02MiB 1.02MiB sv Qgroupid Referenced Exclusive Path -------- ---------- --------- ---- 0/5 16.00KiB 16.00KiB 0/256 1.02MiB 1.02MiB sv 1/100 1.02MiB 1.02MiB 2/100<1 member qgroup> 2/100 1.02MiB 1.02MiB <0 member qgroups> The existing fstests did not exercise two layer inheritance so this bug was missed. I intend to add that testing there, as well. Fixes: a0bdc04b0732 ("btrfs: qgroup: use qgroup_iterator in __qgroup_excl_accounting()") CC: stable@vger.kernel.org # 6.12+ Reviewed-by: Filipe Manana Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1a5972178b3a..ccaa9a3cf1ce 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1453,7 +1453,6 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root, struct btrfs_qgroup *src, int sign) { struct btrfs_qgroup *qgroup; - struct btrfs_qgroup *cur; LIST_HEAD(qgroup_list); u64 num_bytes = src->excl; int ret = 0; @@ -1463,7 +1462,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root, goto out; qgroup_iterator_add(&qgroup_list, qgroup); - list_for_each_entry(cur, &qgroup_list, iterator) { + list_for_each_entry(qgroup, &qgroup_list, iterator) { struct btrfs_qgroup_list *glist; qgroup->rfer += sign * num_bytes; From 9474e27a24a41e55d0ac2b77d8171fddec7dbb87 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 6 Aug 2025 18:22:41 +0200 Subject: [PATCH 1704/2411] libbpf: Add the ability to suppress perf event enablement Automatically enabling a perf event after attaching a BPF prog to it is not always desirable. Add a new "dont_enable" field to struct bpf_perf_event_opts. While introducing "enable" instead would be nicer in that it would avoid a double negation in the implementation, it would make DECLARE_LIBBPF_OPTS() less efficient. Acked-by: Eduard Zingerman Suggested-by: Jiri Olsa Tested-by: Thomas Richter Co-developed-by: Thomas Richter Signed-off-by: Thomas Richter Signed-off-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/20250806162417.19666-2-iii@linux.ibm.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 13 ++++++++----- tools/lib/bpf/libbpf.h | 4 +++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index fb4d92c5c339..8f5a81b672e1 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10965,11 +10965,14 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p } link->link.fd = pfd; } - if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { - err = -errno; - pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", - prog->name, pfd, errstr(err)); - goto err_out; + + if (!OPTS_GET(opts, dont_enable, false)) { + if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { + err = -errno; + pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", + prog->name, pfd, errstr(err)); + goto err_out; + } } return &link->link; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index d1cf813a057b..455a957cb702 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -499,9 +499,11 @@ struct bpf_perf_event_opts { __u64 bpf_cookie; /* don't use BPF link when attach BPF program */ bool force_ioctl_attach; + /* don't automatically enable the event */ + bool dont_enable; size_t :0; }; -#define bpf_perf_event_opts__last_field force_ioctl_attach +#define bpf_perf_event_opts__last_field dont_enable LIBBPF_API struct bpf_link * bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd); From 5e2ac8e8571df54d0a9c9d08f287e006269a6674 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 6 Aug 2025 18:22:42 +0200 Subject: [PATCH 1705/2411] perf bpf-filter: Enable events manually On s390, and, in general, on all platforms where the respective event supports auxiliary data gathering, the command: # ./perf record -u 0 -aB --synth=no -- ./perf test -w thloop [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.011 MB perf.data ] # ./perf report --stats | grep SAMPLE # does not generate samples in the perf.data file. On x86 the command: # sudo perf record -e intel_pt// -u 0 ls is broken too. Looking at the sequence of calls in 'perf record' reveals this behavior: 1. The event 'cycles' is created and enabled: record__open() +-> evlist__apply_filters() +-> perf_bpf_filter__prepare() +-> bpf_program.attach_perf_event() +-> bpf_program.attach_perf_event_opts() +-> __GI___ioctl(..., PERF_EVENT_IOC_ENABLE, ...) The event 'cycles' is enabled and active now. However the event's ring-buffer to store the samples generated by hardware is not allocated yet. 2. The event's fd is mmap()ed to create the ring buffer: record__open() +-> record__mmap() +-> record__mmap_evlist() +-> evlist__mmap_ex() +-> perf_evlist__mmap_ops() +-> mmap_per_cpu() +-> mmap_per_evsel() +-> mmap__mmap() +-> perf_mmap__mmap() +-> mmap() This allocates the ring buffer for the event 'cycles'. With mmap() the kernel creates the ring buffer: perf_mmap(): kernel function to create the event's ring | buffer to save the sampled data. | +-> ring_buffer_attach(): Allocates memory for ring buffer. | The PMU has auxiliary data setup function. The | has_aux(event) condition is true and the PMU's | stop() is called to stop sampling. It is not | restarted: | | if (has_aux(event)) | perf_event_stop(event, 0); | +-> cpumsf_pmu_stop(): Hardware sampling is stopped. No samples are generated and saved anymore. 3. After the event 'cycles' has been mapped, the event is enabled a second time in: __cmd_record() +-> evlist__enable() +-> __evlist__enable() +-> evsel__enable_cpu() +-> perf_evsel__enable_cpu() +-> perf_evsel__run_ioctl() +-> perf_evsel__ioctl() +-> __GI___ioctl(., PERF_EVENT_IOC_ENABLE, .) The second ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); is just a NOP in this case. The first invocation in (1.) sets the event::state to PERF_EVENT_STATE_ACTIVE. The kernel functions perf_ioctl() +-> _perf_ioctl() +-> _perf_event_enable() +-> __perf_event_enable() return immediately because event::state is already set to PERF_EVENT_STATE_ACTIVE. This happens on s390, because the event 'cycles' offers the possibility to save auxilary data. The PMU callbacks setup_aux() and free_aux() are defined. Without both callback functions, cpumsf_pmu_stop() is not invoked and sampling continues. To remedy this, remove the first invocation of ioctl(..., PERF_EVENT_IOC_ENABLE, ...). in step (1.) Create the event in step (1.) and enable it in step (3.) after the ring buffer has been mapped. Output after: # ./perf record -aB --synth=no -u 0 -- ./perf test -w thloop 2 [ perf record: Woken up 3 times to write data ] [ perf record: Captured and wrote 0.876 MB perf.data ] # ./perf report --stats | grep SAMPLE SAMPLE events: 16200 (99.5%) SAMPLE events: 16200 # The software event succeeded both before and after the patch: # ./perf record -e cpu-clock -aB --synth=no -u 0 -- \ ./perf test -w thloop 2 [ perf record: Woken up 7 times to write data ] [ perf record: Captured and wrote 2.870 MB perf.data ] # ./perf report --stats | grep SAMPLE SAMPLE events: 53506 (99.8%) SAMPLE events: 53506 # Fixes: b4c658d4d63d61 ("perf target: Remove uid from target") Suggested-by: Jiri Olsa Tested-by: Thomas Richter Acked-by: Namhyung Kim Co-developed-by: Thomas Richter Signed-off-by: Thomas Richter Signed-off-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/20250806162417.19666-3-iii@linux.ibm.com Signed-off-by: Alexei Starovoitov --- tools/perf/util/bpf-filter.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c index d0e013eeb0f7..a0b11f35395f 100644 --- a/tools/perf/util/bpf-filter.c +++ b/tools/perf/util/bpf-filter.c @@ -451,6 +451,8 @@ int perf_bpf_filter__prepare(struct evsel *evsel, struct target *target) struct bpf_link *link; struct perf_bpf_filter_entry *entry; bool needs_idx_hash = !target__has_cpu(target); + DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts, + .dont_enable = true); entry = calloc(MAX_FILTERS, sizeof(*entry)); if (entry == NULL) @@ -522,7 +524,8 @@ int perf_bpf_filter__prepare(struct evsel *evsel, struct target *target) prog = skel->progs.perf_sample_filter; for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) { for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) { - link = bpf_program__attach_perf_event(prog, FD(evsel, x, y)); + link = bpf_program__attach_perf_event_opts(prog, FD(evsel, x, y), + &pe_opts); if (IS_ERR(link)) { pr_err("Failed to attach perf sample-filter program\n"); ret = PTR_ERR(link); From 08c5b422807435cdb79bee60da84262102e5f26a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 23 Jul 2025 13:28:23 -0700 Subject: [PATCH 1706/2411] drm/msm: Defer fd_install in VM_BIND ioctl Avoid fd_install() until there are no more potential error paths, to avoid put_unused_fd() after the fd is made visible to userspace. Fixes: 2e6a8a1fe2b2 ("drm/msm: Add VM_BIND ioctl") Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/665365/ --- drivers/gpu/drm/msm/msm_gem_vma.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 3cd8562a5109..dc54c693b28d 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -1460,12 +1460,8 @@ msm_ioctl_vm_bind(struct drm_device *dev, void *data, struct drm_file *file) if (args->flags & MSM_VM_BIND_FENCE_FD_OUT) { sync_file = sync_file_create(job->fence); - if (!sync_file) { + if (!sync_file) ret = -ENOMEM; - } else { - fd_install(out_fence_fd, sync_file->file); - args->fence_fd = out_fence_fd; - } } if (ret) @@ -1494,10 +1490,14 @@ msm_ioctl_vm_bind(struct drm_device *dev, void *data, struct drm_file *file) out_unlock: mutex_unlock(&queue->lock); out_post_unlock: - if (ret && (out_fence_fd >= 0)) { - put_unused_fd(out_fence_fd); + if (ret) { + if (out_fence_fd >= 0) + put_unused_fd(out_fence_fd); if (sync_file) fput(sync_file->file); + } else if (sync_file) { + fd_install(out_fence_fd, sync_file->file); + args->fence_fd = out_fence_fd; } if (!IS_ERR_OR_NULL(job)) { From d02d50cb062737f2b0c689fa24ef8b86f14756e5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 30 Jul 2025 15:29:05 +0100 Subject: [PATCH 1707/2411] drm/msm: Fix dereference of pointer minor before null check Currently the pointer minor is being dereferenced before it is null checked, leading to a potential null pointer dereference issue. Fix this by dereferencing the pointer only after it has been null checked. Also Replace minor->dev with dev. Fixes: 4f89cf40d01e ("drm/msm: bail out late_init_minor() if it is not a GPU device") Signed-off-by: Colin Ian King Reviewed-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/666259/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_debugfs.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index bbda865addae..97dc70876442 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -325,25 +325,28 @@ static struct drm_info_list msm_debugfs_list[] = { static int late_init_minor(struct drm_minor *minor) { - struct drm_device *dev = minor->dev; - struct msm_drm_private *priv = dev->dev_private; + struct drm_device *dev; + struct msm_drm_private *priv; int ret; if (!minor) return 0; + dev = minor->dev; + priv = dev->dev_private; + if (!priv->gpu_pdev) return 0; ret = msm_rd_debugfs_init(minor); if (ret) { - DRM_DEV_ERROR(minor->dev->dev, "could not install rd debugfs\n"); + DRM_DEV_ERROR(dev->dev, "could not install rd debugfs\n"); return ret; } ret = msm_perf_debugfs_init(minor); if (ret) { - DRM_DEV_ERROR(minor->dev->dev, "could not install perf debugfs\n"); + DRM_DEV_ERROR(dev->dev, "could not install perf debugfs\n"); return ret; } From 4a00bf1fd5add1e0da37009cba5b5ffb4de255d9 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 1 Aug 2025 10:59:58 -0700 Subject: [PATCH 1708/2411] drm/msm: Add missing "location"s to devcoredump This is needed to properly interpret some of the sections. v2: Fix missing \n Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/666651/ --- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index faca2a0243ab..8420cbe129c0 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -1796,6 +1796,7 @@ static void a7xx_show_shader(struct a6xx_gpu_state_obj *obj, print_name(p, " - type: ", a7xx_statetype_names[block->statetype]); print_name(p, " - pipe: ", a7xx_pipe_names[block->pipeid]); + drm_printf(p, " - location: %d\n", block->location); for (i = 0; i < block->num_sps; i++) { drm_printf(p, " - sp: %d\n", i); @@ -1873,6 +1874,7 @@ static void a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj, print_name(p, " - pipe: ", a7xx_pipe_names[dbgahb->pipe_id]); print_name(p, " - cluster-name: ", a7xx_cluster_names[dbgahb->cluster_id]); drm_printf(p, " - context: %d\n", dbgahb->context_id); + drm_printf(p, " - location: %d\n", dbgahb->location_id); a7xx_show_registers_indented(dbgahb->regs, obj->data, p, 4); } } From 9466b45c19f6646787d6249116e52a9c4382e6ad Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 1 Aug 2025 10:59:59 -0700 Subject: [PATCH 1709/2411] drm/msm: Fix section names and sizes The section names randomly appended _DATA or _ADDR in many cases, and/or didn't match the reg names. Fix them so crashdec can properly resolve the section names back to reg names. Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/666654/ --- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h | 38 +++++++++---------- .../drm/msm/adreno/adreno_gen7_9_0_snapshot.h | 24 ++++++------ 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h index 95d93ac6812a..1c18499b60bb 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h @@ -419,47 +419,47 @@ static const struct a6xx_indexed_registers a6xx_indexed_reglist[] = { REG_A6XX_CP_SQE_STAT_DATA, 0x33, NULL }, { "CP_DRAW_STATE", REG_A6XX_CP_DRAW_STATE_ADDR, REG_A6XX_CP_DRAW_STATE_DATA, 0x100, NULL }, - { "CP_UCODE_DBG_DATA", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, + { "CP_SQE_UCODE_DBG", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, REG_A6XX_CP_SQE_UCODE_DBG_DATA, 0x8000, NULL }, - { "CP_ROQ", REG_A6XX_CP_ROQ_DBG_ADDR, + { "CP_ROQ_DBG", REG_A6XX_CP_ROQ_DBG_ADDR, REG_A6XX_CP_ROQ_DBG_DATA, 0, a6xx_get_cp_roq_size}, }; static const struct a6xx_indexed_registers a7xx_indexed_reglist[] = { { "CP_SQE_STAT", REG_A6XX_CP_SQE_STAT_ADDR, - REG_A6XX_CP_SQE_STAT_DATA, 0x33, NULL }, + REG_A6XX_CP_SQE_STAT_DATA, 0x40, NULL }, { "CP_DRAW_STATE", REG_A6XX_CP_DRAW_STATE_ADDR, REG_A6XX_CP_DRAW_STATE_DATA, 0x100, NULL }, - { "CP_UCODE_DBG_DATA", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, + { "CP_SQE_UCODE_DBG", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, REG_A6XX_CP_SQE_UCODE_DBG_DATA, 0x8000, NULL }, - { "CP_BV_SQE_STAT_ADDR", REG_A7XX_CP_BV_SQE_STAT_ADDR, - REG_A7XX_CP_BV_SQE_STAT_DATA, 0x33, NULL }, - { "CP_BV_DRAW_STATE_ADDR", REG_A7XX_CP_BV_DRAW_STATE_ADDR, + { "CP_BV_SQE_STAT", REG_A7XX_CP_BV_SQE_STAT_ADDR, + REG_A7XX_CP_BV_SQE_STAT_DATA, 0x40, NULL }, + { "CP_BV_DRAW_STATE", REG_A7XX_CP_BV_DRAW_STATE_ADDR, REG_A7XX_CP_BV_DRAW_STATE_DATA, 0x100, NULL }, - { "CP_BV_SQE_UCODE_DBG_ADDR", REG_A7XX_CP_BV_SQE_UCODE_DBG_ADDR, + { "CP_BV_SQE_UCODE_DBG", REG_A7XX_CP_BV_SQE_UCODE_DBG_ADDR, REG_A7XX_CP_BV_SQE_UCODE_DBG_DATA, 0x8000, NULL }, - { "CP_SQE_AC_STAT_ADDR", REG_A7XX_CP_SQE_AC_STAT_ADDR, - REG_A7XX_CP_SQE_AC_STAT_DATA, 0x33, NULL }, - { "CP_LPAC_DRAW_STATE_ADDR", REG_A7XX_CP_LPAC_DRAW_STATE_ADDR, + { "CP_SQE_AC_STAT", REG_A7XX_CP_SQE_AC_STAT_ADDR, + REG_A7XX_CP_SQE_AC_STAT_DATA, 0x40, NULL }, + { "CP_LPAC_DRAW_STATE", REG_A7XX_CP_LPAC_DRAW_STATE_ADDR, REG_A7XX_CP_LPAC_DRAW_STATE_DATA, 0x100, NULL }, - { "CP_SQE_AC_UCODE_DBG_ADDR", REG_A7XX_CP_SQE_AC_UCODE_DBG_ADDR, + { "CP_SQE_AC_UCODE_DBG", REG_A7XX_CP_SQE_AC_UCODE_DBG_ADDR, REG_A7XX_CP_SQE_AC_UCODE_DBG_DATA, 0x8000, NULL }, - { "CP_LPAC_FIFO_DBG_ADDR", REG_A7XX_CP_LPAC_FIFO_DBG_ADDR, + { "CP_LPAC_FIFO_DBG", REG_A7XX_CP_LPAC_FIFO_DBG_ADDR, REG_A7XX_CP_LPAC_FIFO_DBG_DATA, 0x40, NULL }, - { "CP_ROQ", REG_A6XX_CP_ROQ_DBG_ADDR, + { "CP_ROQ_DBG", REG_A6XX_CP_ROQ_DBG_ADDR, REG_A6XX_CP_ROQ_DBG_DATA, 0, a7xx_get_cp_roq_size }, }; static const struct a6xx_indexed_registers a6xx_cp_mempool_indexed = { - "CP_MEMPOOL", REG_A6XX_CP_MEM_POOL_DBG_ADDR, + "CP_MEM_POOL_DBG", REG_A6XX_CP_MEM_POOL_DBG_ADDR, REG_A6XX_CP_MEM_POOL_DBG_DATA, 0x2060, NULL, }; static const struct a6xx_indexed_registers a7xx_cp_bv_mempool_indexed[] = { - { "CP_MEMPOOL", REG_A6XX_CP_MEM_POOL_DBG_ADDR, - REG_A6XX_CP_MEM_POOL_DBG_DATA, 0x2100, NULL }, - { "CP_BV_MEMPOOL", REG_A7XX_CP_BV_MEM_POOL_DBG_ADDR, - REG_A7XX_CP_BV_MEM_POOL_DBG_DATA, 0x2100, NULL }, + { "CP_MEM_POOL_DBG", REG_A6XX_CP_MEM_POOL_DBG_ADDR, + REG_A6XX_CP_MEM_POOL_DBG_DATA, 0x2200, NULL }, + { "CP_BV_MEM_POOL_DBG", REG_A7XX_CP_BV_MEM_POOL_DBG_ADDR, + REG_A7XX_CP_BV_MEM_POOL_DBG_DATA, 0x2200, NULL }, }; #define DEBUGBUS(_id, _count) { .id = _id, .name = #_id, .count = _count } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h b/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h index e02cabb39f19..b1f8bbf1d843 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h @@ -1299,29 +1299,29 @@ static struct a6xx_indexed_registers gen7_9_0_cp_indexed_reg_list[] = { REG_A6XX_CP_SQE_STAT_DATA, 0x00040}, { "CP_DRAW_STATE", REG_A6XX_CP_DRAW_STATE_ADDR, REG_A6XX_CP_DRAW_STATE_DATA, 0x00200}, - { "CP_ROQ", REG_A6XX_CP_ROQ_DBG_ADDR, + { "CP_ROQ_DBG", REG_A6XX_CP_ROQ_DBG_ADDR, REG_A6XX_CP_ROQ_DBG_DATA, 0x00800}, - { "CP_UCODE_DBG_DATA", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, + { "CP_SQE_UCODE_DBG", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, REG_A6XX_CP_SQE_UCODE_DBG_DATA, 0x08000}, - { "CP_BV_DRAW_STATE_ADDR", REG_A7XX_CP_BV_DRAW_STATE_ADDR, + { "CP_BV_DRAW_STATE", REG_A7XX_CP_BV_DRAW_STATE_ADDR, REG_A7XX_CP_BV_DRAW_STATE_DATA, 0x00200}, - { "CP_BV_ROQ_DBG_ADDR", REG_A7XX_CP_BV_ROQ_DBG_ADDR, + { "CP_BV_ROQ_DBG", REG_A7XX_CP_BV_ROQ_DBG_ADDR, REG_A7XX_CP_BV_ROQ_DBG_DATA, 0x00800}, - { "CP_BV_SQE_UCODE_DBG_ADDR", REG_A7XX_CP_BV_SQE_UCODE_DBG_ADDR, + { "CP_BV_SQE_UCODE_DBG", REG_A7XX_CP_BV_SQE_UCODE_DBG_ADDR, REG_A7XX_CP_BV_SQE_UCODE_DBG_DATA, 0x08000}, - { "CP_BV_SQE_STAT_ADDR", REG_A7XX_CP_BV_SQE_STAT_ADDR, + { "CP_BV_SQE_STAT", REG_A7XX_CP_BV_SQE_STAT_ADDR, REG_A7XX_CP_BV_SQE_STAT_DATA, 0x00040}, - { "CP_RESOURCE_TBL", REG_A7XX_CP_RESOURCE_TABLE_DBG_ADDR, + { "CP_RESOURCE_TABLE_DBG", REG_A7XX_CP_RESOURCE_TABLE_DBG_ADDR, REG_A7XX_CP_RESOURCE_TABLE_DBG_DATA, 0x04100}, - { "CP_LPAC_DRAW_STATE_ADDR", REG_A7XX_CP_LPAC_DRAW_STATE_ADDR, + { "CP_LPAC_DRAW_STATE", REG_A7XX_CP_LPAC_DRAW_STATE_ADDR, REG_A7XX_CP_LPAC_DRAW_STATE_DATA, 0x00200}, - { "CP_LPAC_ROQ", REG_A7XX_CP_LPAC_ROQ_DBG_ADDR, + { "CP_LPAC_ROQ_DBG", REG_A7XX_CP_LPAC_ROQ_DBG_ADDR, REG_A7XX_CP_LPAC_ROQ_DBG_DATA, 0x00200}, - { "CP_SQE_AC_UCODE_DBG_ADDR", REG_A7XX_CP_SQE_AC_UCODE_DBG_ADDR, + { "CP_SQE_AC_UCODE_DBG", REG_A7XX_CP_SQE_AC_UCODE_DBG_ADDR, REG_A7XX_CP_SQE_AC_UCODE_DBG_DATA, 0x08000}, - { "CP_SQE_AC_STAT_ADDR", REG_A7XX_CP_SQE_AC_STAT_ADDR, + { "CP_SQE_AC_STAT", REG_A7XX_CP_SQE_AC_STAT_ADDR, REG_A7XX_CP_SQE_AC_STAT_DATA, 0x00040}, - { "CP_LPAC_FIFO_DBG_ADDR", REG_A7XX_CP_LPAC_FIFO_DBG_ADDR, + { "CP_LPAC_FIFO_DBG", REG_A7XX_CP_LPAC_FIFO_DBG_ADDR, REG_A7XX_CP_LPAC_FIFO_DBG_DATA, 0x00040}, { "CP_AQE_ROQ_0", REG_A7XX_CP_AQE_ROQ_DBG_ADDR_0, REG_A7XX_CP_AQE_ROQ_DBG_DATA_0, 0x00100}, From a506578d8909e7e6f0d545af9850ccd4318bf6cf Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 1 Aug 2025 11:00:00 -0700 Subject: [PATCH 1710/2411] drm/msm: Fix order of selector programming in cluster snapshot Program the selector _after_ selecting the aperture. This aligns with the downstream driver, and fixes a case where we were failing to capture ctx0 regs (and presumably what we thought were ctx1 regs were actually ctx0). Suggested-by: Akhil P Oommen Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/666655/ --- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index 8420cbe129c0..b4c8583dd6ca 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -759,15 +759,15 @@ static void a7xx_get_cluster(struct msm_gpu *gpu, size_t datasize; int i, regcount = 0; - /* Some clusters need a selector register to be programmed too */ - if (cluster->sel) - in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val); - in += CRASHDUMP_WRITE(in, REG_A7XX_CP_APERTURE_CNTL_CD, A7XX_CP_APERTURE_CNTL_CD_PIPE(cluster->pipe_id) | A7XX_CP_APERTURE_CNTL_CD_CLUSTER(cluster->cluster_id) | A7XX_CP_APERTURE_CNTL_CD_CONTEXT(cluster->context_id)); + /* Some clusters need a selector register to be programmed too */ + if (cluster->sel) + in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val); + for (i = 0; cluster->regs[i] != UINT_MAX; i += 2) { int count = RANGE(cluster->regs, i); From 2f2cc939ad672361ca81fcb27d76dc8154b17a1c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 1 Aug 2025 11:00:01 -0700 Subject: [PATCH 1711/2411] drm/msm: Constify snapshot tables A bit of divergence from the downstream driver from which these headers were imported. But no need for these tables not to be const. Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/666656/ --- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 2 +- drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h | 8 ++++---- drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h | 8 ++++---- drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h | 10 +++++----- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index b4c8583dd6ca..7fc450ab8c12 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -11,7 +11,7 @@ static const unsigned int *gen7_0_0_external_core_regs[] __always_unused; static const unsigned int *gen7_2_0_external_core_regs[] __always_unused; static const unsigned int *gen7_9_0_external_core_regs[] __always_unused; -static struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] __always_unused; +static const struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] __always_unused; static const u32 gen7_9_0_cx_debugbus_blocks[] __always_unused; #include "adreno_gen7_0_0_snapshot.h" diff --git a/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h b/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h index cb66ece6606b..afcc7498983f 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h @@ -81,7 +81,7 @@ static const u32 gen7_0_0_debugbus_blocks[] = { A7XX_DBGBUS_USPTP_7, }; -static struct gen7_shader_block gen7_0_0_shader_blocks[] = { +static const struct gen7_shader_block gen7_0_0_shader_blocks[] = { {A7XX_TP0_TMO_DATA, 0x200, 4, 2, A7XX_PIPE_BR, A7XX_USPTP}, {A7XX_TP0_SMO_DATA, 0x80, 4, 2, A7XX_PIPE_BR, A7XX_USPTP}, {A7XX_TP0_MIPMAP_BASE_DATA, 0x3c0, 4, 2, A7XX_PIPE_BR, A7XX_USPTP}, @@ -695,7 +695,7 @@ static const struct gen7_sel_reg gen7_0_0_rb_rbp_sel = { .val = 0x9, }; -static struct gen7_cluster_registers gen7_0_0_clusters[] = { +static const struct gen7_cluster_registers gen7_0_0_clusters[] = { { A7XX_CLUSTER_NONE, A7XX_PIPE_BR, STATE_NON_CONTEXT, gen7_0_0_noncontext_pipe_br_registers, }, { A7XX_CLUSTER_NONE, A7XX_PIPE_BV, STATE_NON_CONTEXT, @@ -764,7 +764,7 @@ static struct gen7_cluster_registers gen7_0_0_clusters[] = { gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers, }, }; -static struct gen7_sptp_cluster_registers gen7_0_0_sptp_clusters[] = { +static const struct gen7_sptp_cluster_registers gen7_0_0_sptp_clusters[] = { { A7XX_CLUSTER_NONE, A7XX_SP_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_HLSQ_STATE, gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers, 0xae00 }, { A7XX_CLUSTER_NONE, A7XX_SP_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_SP_TOP, @@ -914,7 +914,7 @@ static const u32 gen7_0_0_dpm_registers[] = { }; static_assert(IS_ALIGNED(sizeof(gen7_0_0_dpm_registers), 8)); -static struct gen7_reg_list gen7_0_0_reg_list[] = { +static const struct gen7_reg_list gen7_0_0_reg_list[] = { { gen7_0_0_gpu_registers, NULL }, { gen7_0_0_cx_misc_registers, NULL }, { gen7_0_0_dpm_registers, NULL }, diff --git a/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h b/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h index 6f8ad50f32ce..6569f12bf12f 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h @@ -95,7 +95,7 @@ static const u32 gen7_2_0_debugbus_blocks[] = { A7XX_DBGBUS_CCHE_2, }; -static struct gen7_shader_block gen7_2_0_shader_blocks[] = { +static const struct gen7_shader_block gen7_2_0_shader_blocks[] = { {A7XX_TP0_TMO_DATA, 0x200, 6, 2, A7XX_PIPE_BR, A7XX_USPTP}, {A7XX_TP0_SMO_DATA, 0x80, 6, 2, A7XX_PIPE_BR, A7XX_USPTP}, {A7XX_TP0_MIPMAP_BASE_DATA, 0x3c0, 6, 2, A7XX_PIPE_BR, A7XX_USPTP}, @@ -489,7 +489,7 @@ static const struct gen7_sel_reg gen7_2_0_rb_rbp_sel = { .val = 0x9, }; -static struct gen7_cluster_registers gen7_2_0_clusters[] = { +static const struct gen7_cluster_registers gen7_2_0_clusters[] = { { A7XX_CLUSTER_NONE, A7XX_PIPE_BR, STATE_NON_CONTEXT, gen7_2_0_noncontext_pipe_br_registers, }, { A7XX_CLUSTER_NONE, A7XX_PIPE_BV, STATE_NON_CONTEXT, @@ -558,7 +558,7 @@ static struct gen7_cluster_registers gen7_2_0_clusters[] = { gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers, }, }; -static struct gen7_sptp_cluster_registers gen7_2_0_sptp_clusters[] = { +static const struct gen7_sptp_cluster_registers gen7_2_0_sptp_clusters[] = { { A7XX_CLUSTER_NONE, A7XX_SP_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_HLSQ_STATE, gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers, 0xae00 }, { A7XX_CLUSTER_NONE, A7XX_SP_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_SP_TOP, @@ -737,7 +737,7 @@ static const u32 gen7_2_0_dpm_registers[] = { }; static_assert(IS_ALIGNED(sizeof(gen7_2_0_dpm_registers), 8)); -static struct gen7_reg_list gen7_2_0_reg_list[] = { +static const struct gen7_reg_list gen7_2_0_reg_list[] = { { gen7_2_0_gpu_registers, NULL }, { gen7_2_0_cx_misc_registers, NULL }, { gen7_2_0_dpm_registers, NULL }, diff --git a/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h b/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h index b1f8bbf1d843..0956dfca1f05 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h @@ -117,7 +117,7 @@ static const u32 gen7_9_0_cx_debugbus_blocks[] = { A7XX_DBGBUS_GBIF_CX, }; -static struct gen7_shader_block gen7_9_0_shader_blocks[] = { +static const struct gen7_shader_block gen7_9_0_shader_blocks[] = { { A7XX_TP0_TMO_DATA, 0x0200, 6, 2, A7XX_PIPE_BR, A7XX_USPTP }, { A7XX_TP0_SMO_DATA, 0x0080, 6, 2, A7XX_PIPE_BR, A7XX_USPTP }, { A7XX_TP0_MIPMAP_BASE_DATA, 0x03C0, 6, 2, A7XX_PIPE_BR, A7XX_USPTP }, @@ -1116,7 +1116,7 @@ static const struct gen7_sel_reg gen7_9_0_rb_rbp_sel = { .val = 0x9, }; -static struct gen7_cluster_registers gen7_9_0_clusters[] = { +static const struct gen7_cluster_registers gen7_9_0_clusters[] = { { A7XX_CLUSTER_NONE, A7XX_PIPE_BR, STATE_NON_CONTEXT, gen7_9_0_non_context_pipe_br_registers, }, { A7XX_CLUSTER_NONE, A7XX_PIPE_BV, STATE_NON_CONTEXT, @@ -1185,7 +1185,7 @@ static struct gen7_cluster_registers gen7_9_0_clusters[] = { gen7_9_0_vpc_pipe_bv_cluster_vpc_ps_registers, }, }; -static struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] = { +static const struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] = { { A7XX_CLUSTER_NONE, A7XX_SP_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_HLSQ_STATE, gen7_9_0_non_context_sp_pipe_br_hlsq_state_registers, 0xae00}, { A7XX_CLUSTER_NONE, A7XX_SP_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_SP_TOP, @@ -1294,7 +1294,7 @@ static struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] = { gen7_9_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, }; -static struct a6xx_indexed_registers gen7_9_0_cp_indexed_reg_list[] = { +static const struct a6xx_indexed_registers gen7_9_0_cp_indexed_reg_list[] = { { "CP_SQE_STAT", REG_A6XX_CP_SQE_STAT_ADDR, REG_A6XX_CP_SQE_STAT_DATA, 0x00040}, { "CP_DRAW_STATE", REG_A6XX_CP_DRAW_STATE_ADDR, @@ -1337,7 +1337,7 @@ static struct a6xx_indexed_registers gen7_9_0_cp_indexed_reg_list[] = { REG_A7XX_CP_AQE_STAT_DATA_1, 0x00040}, }; -static struct gen7_reg_list gen7_9_0_reg_list[] = { +static const struct gen7_reg_list gen7_9_0_reg_list[] = { { gen7_9_0_gpu_registers, NULL}, { gen7_9_0_cx_misc_registers, NULL}, { gen7_9_0_cx_dbgc_registers, NULL}, From 13ed0a1af263b56a5ebbf38ab7163cbc9dcb009e Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 1 Aug 2025 11:00:02 -0700 Subject: [PATCH 1712/2411] drm/msm: Fix a7xx debugbus read The bitfield positions changed in a7xx. v2: Don't open-code the bitfield building v3: Also fix cx_debugbus Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/666659/ --- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 32 ++++++++++++++----- drivers/gpu/drm/msm/registers/adreno/a6xx.xml | 14 +++++++- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index 7fc450ab8c12..75f93213e114 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -174,8 +174,15 @@ static int a6xx_crashdumper_run(struct msm_gpu *gpu, static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset, u32 *data) { - u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | - A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); + u32 reg; + + if (to_adreno_gpu(gpu)->info->family >= ADRENO_7XX_GEN1) { + reg = A7XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | + A7XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); + } else { + reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | + A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); + } gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg); gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg); @@ -198,11 +205,18 @@ static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset, readl((ptr) + ((offset) << 2)) /* read a value from the CX debug bus */ -static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset, +static int cx_debugbus_read(struct msm_gpu *gpu, void __iomem *cxdbg, u32 block, u32 offset, u32 *data) { - u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | - A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); + u32 reg; + + if (to_adreno_gpu(gpu)->info->family >= ADRENO_7XX_GEN1) { + reg = A7XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | + A7XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); + } else { + reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | + A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); + } cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg); cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg); @@ -315,7 +329,8 @@ static void a6xx_get_debugbus_block(struct msm_gpu *gpu, ptr += debugbus_read(gpu, block->id, i, ptr); } -static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg, +static void a6xx_get_cx_debugbus_block(struct msm_gpu *gpu, + void __iomem *cxdbg, struct a6xx_gpu_state *a6xx_state, const struct a6xx_debugbus_block *block, struct a6xx_gpu_state_obj *obj) @@ -330,7 +345,7 @@ static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg, obj->handle = block; for (ptr = obj->data, i = 0; i < block->count; i++) - ptr += cx_debugbus_read(cxdbg, block->id, i, ptr); + ptr += cx_debugbus_read(gpu, cxdbg, block->id, i, ptr); } static void a6xx_get_debugbus_blocks(struct msm_gpu *gpu, @@ -526,7 +541,8 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu, int i; for (i = 0; i < nr_cx_debugbus_blocks; i++) - a6xx_get_cx_debugbus_block(cxdbg, + a6xx_get_cx_debugbus_block(gpu, + cxdbg, a6xx_state, &cx_debugbus_blocks[i], &a6xx_state->cx_debugbus[i]); diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml index d860fd94feae..86fab2750ba7 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml @@ -594,10 +594,14 @@ by a particular renderpass/blit. - + + + + + @@ -3796,6 +3800,14 @@ by a particular renderpass/blit. + + + + + + + + From a814ba2d7b847cff15565bbab781df89e190619c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 1 Aug 2025 11:00:03 -0700 Subject: [PATCH 1713/2411] drm/msm: Fix debugbus snapshot We weren't setting the # of captured debugbus blocks. Reported-by: Connor Abbott Suggested-by: Connor Abbott Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/666660/ --- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index 75f93213e114..d5d1271fce61 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -438,8 +438,9 @@ static void a7xx_get_debugbus_blocks(struct msm_gpu *gpu, a6xx_state, &a7xx_debugbus_blocks[gbif_debugbus_blocks[i]], &a6xx_state->debugbus[i + debugbus_blocks_count]); } - } + a6xx_state->nr_debugbus = total_debugbus_blocks; + } } static void a6xx_get_debugbus(struct msm_gpu *gpu, From e9621ef610c4a600678da5d8020d4a0dfe686faa Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 1 Aug 2025 11:00:04 -0700 Subject: [PATCH 1714/2411] drm/msm: Fix a7xx TPL1 cluster snapshot Later gens have both a PIPE_BR and PIPE_NONE section. The snapshot tool seems to expect this for x1-85 as well. I guess this was just a bug in downstream kgsl, which went unnoticed? Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/666662/ --- drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h | 11 +++++++++-- drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h b/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h index afcc7498983f..04b49d385f9d 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h @@ -668,12 +668,19 @@ static const u32 gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers[] = { }; static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers), 8)); -/* Block: TPl1 Cluster: noncontext Pipeline: A7XX_PIPE_BR */ -static const u32 gen7_0_0_tpl1_noncontext_pipe_br_registers[] = { +/* Block: TPl1 Cluster: noncontext Pipeline: A7XX_PIPE_NONE */ +static const u32 gen7_0_0_tpl1_noncontext_pipe_none_registers[] = { 0x0b600, 0x0b600, 0x0b602, 0x0b602, 0x0b604, 0x0b604, 0x0b608, 0x0b60c, 0x0b60f, 0x0b621, 0x0b630, 0x0b633, UINT_MAX, UINT_MAX, }; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_noncontext_pipe_none_registers), 8)); + +/* Block: TPl1 Cluster: noncontext Pipeline: A7XX_PIPE_BR */ +static const u32 gen7_0_0_tpl1_noncontext_pipe_br_registers[] = { + 0x0b600, 0x0b600, + UINT_MAX, UINT_MAX, +}; static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_noncontext_pipe_br_registers), 8)); /* Block: TPl1 Cluster: noncontext Pipeline: A7XX_PIPE_LPAC */ diff --git a/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h b/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h index 6569f12bf12f..772652eb61f3 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h @@ -573,6 +573,8 @@ static const struct gen7_sptp_cluster_registers gen7_2_0_sptp_clusters[] = { gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers, 0xaf80 }, { A7XX_CLUSTER_NONE, A7XX_TP0_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_USPTP, gen7_0_0_tpl1_noncontext_pipe_br_registers, 0xb600 }, + { A7XX_CLUSTER_NONE, A7XX_TP0_NCTX_REG, A7XX_PIPE_NONE, 0, A7XX_USPTP, + gen7_0_0_tpl1_noncontext_pipe_none_registers, 0xb600 }, { A7XX_CLUSTER_NONE, A7XX_TP0_NCTX_REG, A7XX_PIPE_LPAC, 0, A7XX_USPTP, gen7_0_0_tpl1_noncontext_pipe_lpac_registers, 0xb780 }, { A7XX_CLUSTER_SP_PS, A7XX_SP_CTX0_3D_CPS_REG, A7XX_PIPE_BR, 0, A7XX_HLSQ_STATE, From ba3afadeb81ebb92ab23546fbd2ec7d9dfe216f8 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 5 Aug 2025 09:43:08 -0700 Subject: [PATCH 1715/2411] drm/msm: Fix a few comments Fix a couple comments which had become (partially) obsolete or incorrect with the gpuvm conversion. Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/667237/ --- drivers/gpu/drm/msm/msm_gem.h | 2 +- drivers/gpu/drm/msm/msm_gem_vma.c | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 88239da1cd72..751c3b4965bc 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -100,7 +100,7 @@ struct msm_gem_vm { * * Only used for kernel managed VMs, unused for user managed VMs. * - * Protected by @mm_lock. + * Protected by vm lock. See msm_gem_lock_vm_and_obj(), for ex. */ struct drm_mm mm; diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index dc54c693b28d..d1f5bb2e0a16 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -319,13 +319,10 @@ msm_gem_vma_map(struct drm_gpuva *vma, int prot, struct sg_table *sgt) mutex_lock(&vm->mmu_lock); /* - * NOTE: iommu/io-pgtable can allocate pages, so we cannot hold + * NOTE: if not using pgtable preallocation, we cannot hold * a lock across map/unmap which is also used in the job_run() * path, as this can cause deadlock in job_run() vs shrinker/ * reclaim. - * - * Revisit this if we can come up with a scheme to pre-alloc pages - * for the pgtable in map/unmap ops. */ ret = vm_map_op(vm, &(struct msm_vm_map_op){ .iova = vma->va.addr, From fe2f3b1c702f0e02906419c662ca9446cc789354 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 5 Aug 2025 09:44:31 -0700 Subject: [PATCH 1716/2411] drm/msm: Handle in-place remaps Detect and handle the special case of a MAP op simply updating the vma flags of an existing vma, and skip the pgtable updates. This allows turnip to set the MSM_VMA_DUMP flag on an existing mapping without requiring additional synchronization against commands running on the GPU. Signed-off-by: Rob Clark Tested-by: Connor Abbott Patchwork: https://patchwork.freedesktop.org/patch/667238/ --- drivers/gpu/drm/msm/msm_gem_vma.c | 41 ++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index d1f5bb2e0a16..00d0f3b7ba32 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -451,6 +451,8 @@ msm_gem_vm_bo_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) struct op_arg { unsigned flags; struct msm_vm_bind_job *job; + const struct msm_vm_bind_op *op; + bool kept; }; static void @@ -472,14 +474,18 @@ vma_from_op(struct op_arg *arg, struct drm_gpuva_op_map *op) } static int -msm_gem_vm_sm_step_map(struct drm_gpuva_op *op, void *arg) +msm_gem_vm_sm_step_map(struct drm_gpuva_op *op, void *_arg) { - struct msm_vm_bind_job *job = ((struct op_arg *)arg)->job; + struct op_arg *arg = _arg; + struct msm_vm_bind_job *job = arg->job; struct drm_gem_object *obj = op->map.gem.obj; struct drm_gpuva *vma; struct sg_table *sgt; unsigned prot; + if (arg->kept) + return 0; + vma = vma_from_op(arg, &op->map); if (WARN_ON(IS_ERR(vma))) return PTR_ERR(vma); @@ -599,15 +605,41 @@ msm_gem_vm_sm_step_remap(struct drm_gpuva_op *op, void *arg) } static int -msm_gem_vm_sm_step_unmap(struct drm_gpuva_op *op, void *arg) +msm_gem_vm_sm_step_unmap(struct drm_gpuva_op *op, void *_arg) { - struct msm_vm_bind_job *job = ((struct op_arg *)arg)->job; + struct op_arg *arg = _arg; + struct msm_vm_bind_job *job = arg->job; struct drm_gpuva *vma = op->unmap.va; struct msm_gem_vma *msm_vma = to_msm_vma(vma); vm_dbg("%p:%p:%p: %016llx %016llx", vma->vm, vma, vma->gem.obj, vma->va.addr, vma->va.range); + /* + * Detect in-place remap. Turnip does this to change the vma flags, + * in particular MSM_VMA_DUMP. In this case we want to avoid actually + * touching the page tables, as that would require synchronization + * against SUBMIT jobs running on the GPU. + */ + if (op->unmap.keep && + (arg->op->op == MSM_VM_BIND_OP_MAP) && + (vma->gem.obj == arg->op->obj) && + (vma->gem.offset == arg->op->obj_offset) && + (vma->va.addr == arg->op->iova) && + (vma->va.range == arg->op->range)) { + /* We are only expecting a single in-place unmap+map cb pair: */ + WARN_ON(arg->kept); + + /* Leave the existing VMA in place, but signal that to the map cb: */ + arg->kept = true; + + /* Only flags are changing, so update that in-place: */ + unsigned orig_flags = vma->flags & (DRM_GPUVA_USERBITS - 1); + vma->flags = orig_flags | arg->flags; + + return 0; + } + if (!msm_vma->mapped) goto out_close; @@ -1268,6 +1300,7 @@ vm_bind_job_prepare(struct msm_vm_bind_job *job) const struct msm_vm_bind_op *op = &job->ops[i]; struct op_arg arg = { .job = job, + .op = op, }; switch (op->op) { From 42464c51ccccb6343a932a7ea8bc9181e589f270 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 7 Aug 2025 09:10:58 -0400 Subject: [PATCH 1717/2411] drm/msm: Fix objtool warning in submit_lock_objects() Split the vmbind case into a separate helper function submit_lock_objects_vmbind() to fix objtool warning: drivers/gpu/drm/msm/msm.o: warning: objtool: submit_lock_objects+0x451: sibling call from callable instruction with modified stack frame The drm_exec_until_all_locked() macro uses computed gotos internally for its retry loop. Having return statements inside this macro, or immediately after it in certain code paths, confuses objtool's static analysis of stack frames, causing it to incorrectly flag tail call optimizations. Fixes: 92395af63a99 ("drm/msm: Add VM_BIND submitqueue") Signed-off-by: Sasha Levin Patchwork: https://patchwork.freedesktop.org/patch/667539/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 53 +++++++++++++++------------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index bfea19baf6d9..3ab3b27134f9 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -271,32 +271,37 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit, return ret; } +static int submit_lock_objects_vmbind(struct msm_gem_submit *submit) +{ + unsigned flags = DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES; + struct drm_exec *exec = &submit->exec; + int ret = 0; + + drm_exec_init(&submit->exec, flags, submit->nr_bos); + + drm_exec_until_all_locked (&submit->exec) { + ret = drm_gpuvm_prepare_vm(submit->vm, exec, 1); + drm_exec_retry_on_contention(exec); + if (ret) + break; + + ret = drm_gpuvm_prepare_objects(submit->vm, exec, 1); + drm_exec_retry_on_contention(exec); + if (ret) + break; + } + + return ret; +} + /* This is where we make sure all the bo's are reserved and pin'd: */ static int submit_lock_objects(struct msm_gem_submit *submit) { unsigned flags = DRM_EXEC_INTERRUPTIBLE_WAIT; - struct drm_exec *exec = &submit->exec; - int ret; + int ret = 0; - if (msm_context_is_vmbind(submit->queue->ctx)) { - flags |= DRM_EXEC_IGNORE_DUPLICATES; - - drm_exec_init(&submit->exec, flags, submit->nr_bos); - - drm_exec_until_all_locked (&submit->exec) { - ret = drm_gpuvm_prepare_vm(submit->vm, exec, 1); - drm_exec_retry_on_contention(exec); - if (ret) - return ret; - - ret = drm_gpuvm_prepare_objects(submit->vm, exec, 1); - drm_exec_retry_on_contention(exec); - if (ret) - return ret; - } - - return 0; - } + if (msm_context_is_vmbind(submit->queue->ctx)) + return submit_lock_objects_vmbind(submit); drm_exec_init(&submit->exec, flags, submit->nr_bos); @@ -305,17 +310,17 @@ static int submit_lock_objects(struct msm_gem_submit *submit) drm_gpuvm_resv_obj(submit->vm)); drm_exec_retry_on_contention(&submit->exec); if (ret) - return ret; + break; for (unsigned i = 0; i < submit->nr_bos; i++) { struct drm_gem_object *obj = submit->bos[i].obj; ret = drm_exec_prepare_obj(&submit->exec, obj, 1); drm_exec_retry_on_contention(&submit->exec); if (ret) - return ret; + break; } } - return 0; + return ret; } static int submit_fence_sync(struct msm_gem_submit *submit) From d5c647b08ee02cb7fa50d89414ed0f5dc7c1ca0e Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 7 Aug 2025 10:10:51 +0200 Subject: [PATCH 1718/2411] PCI: vmd: Fix wrong kfree() in vmd_msi_free() vmd_msi_alloc() allocates struct vmd_irq and stashes it into irq_data->chip_data associated with the VMD's interrupt domain. vmd_msi_free() extracts the pointer by calling irq_get_chip_data() and frees it. irq_get_chip_data() returns the chip_data associated with the top interrupt domain. This worked in the past because VMD's interrupt domain was the top domain. But d7d8ab87e3e7 ("PCI: vmd: Switch to msi_create_parent_irq_domain()") changed the interrupt domain hierarchy so VMD's interrupt domain is not the top domain anymore. irq_get_chip_data() now returns the chip_data at the MSI devices' interrupt domains. It is therefore broken for vmd_msi_free() to kfree() this chip_data. Fix by extracting the chip_data associated with the VMD's interrupt domain. Fixes: d7d8ab87e3e7 ("PCI: vmd: Switch to msi_create_parent_irq_domain()") Reported-by: Kenneth Crudup Closes: https://lore.kernel.org/linux-pci/dfa40e48-8840-4e61-9fda-25cdb3ad81c1@panix.com/ Reported-by: Ammar Faizi Closes: https://lore.kernel.org/linux-pci/ed53280ed15d1140700b96cca2734bf327ee92539e5eb68e80f5bbbf0f01@linux.gnuweeb.org/ Tested-by: Ammar Faizi Tested-by: Kenneth Crudup Signed-off-by: Nam Cao Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Reviewed-by: Jinjie Ruan Acked-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250807081051.2253962-1-namcao@linutronix.de --- drivers/pci/controller/vmd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 9bbb0ff4cc15..b679c7f28f51 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -280,10 +280,12 @@ static int vmd_msi_alloc(struct irq_domain *domain, unsigned int virq, static void vmd_msi_free(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs) { + struct irq_data *irq_data; struct vmd_irq *vmdirq; for (int i = 0; i < nr_irqs; ++i) { - vmdirq = irq_get_chip_data(virq + i); + irq_data = irq_domain_get_irq_data(domain, virq + i); + vmdirq = irq_data->chip_data; synchronize_srcu(&vmdirq->irq->srcu); From 03537826f77f1c829d0593d211b38b9c876c1722 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 7 Aug 2025 18:12:11 +0200 Subject: [PATCH 1719/2411] smb: client: return an error if rdma_connect does not return within 5 seconds This matches the timeout for tcp connections. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: f198186aa9bb ("CIFS: SMBD: Establish SMB Direct connection") Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 6c2af00be44c..181349eda7a3 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1653,8 +1653,10 @@ static struct smbd_connection *_smbd_get_connection( goto rdma_connect_failed; } - wait_event_interruptible( - info->conn_wait, sc->status != SMBDIRECT_SOCKET_CONNECTING); + wait_event_interruptible_timeout( + info->conn_wait, + sc->status != SMBDIRECT_SOCKET_CONNECTING, + msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { log_rdma_event(ERR, "rdma_connect failed port=%d\n", port); From 761399745710fd37fa4312af1b675b2fe73c1c52 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 7 Aug 2025 18:12:12 +0200 Subject: [PATCH 1720/2411] smb: client: improve logging in smbd_conn_upcall() Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 181349eda7a3..8ed4ab6f1d3a 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -178,9 +178,10 @@ static int smbd_conn_upcall( { struct smbd_connection *info = id->context; struct smbdirect_socket *sc = &info->socket; + const char *event_name = rdma_event_msg(event->event); - log_rdma_event(INFO, "event=%d status=%d\n", - event->event, event->status); + log_rdma_event(INFO, "event=%s status=%d\n", + event_name, event->status); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: @@ -190,17 +191,19 @@ static int smbd_conn_upcall( break; case RDMA_CM_EVENT_ADDR_ERROR: + log_rdma_event(ERR, "connecting failed event=%s\n", event_name); info->ri_rc = -EHOSTUNREACH; complete(&info->ri_done); break; case RDMA_CM_EVENT_ROUTE_ERROR: + log_rdma_event(ERR, "connecting failed event=%s\n", event_name); info->ri_rc = -ENETUNREACH; complete(&info->ri_done); break; case RDMA_CM_EVENT_ESTABLISHED: - log_rdma_event(INFO, "connected event=%d\n", event->event); + log_rdma_event(INFO, "connected event=%s\n", event_name); sc->status = SMBDIRECT_SOCKET_CONNECTED; wake_up_interruptible(&info->conn_wait); break; @@ -208,7 +211,7 @@ static int smbd_conn_upcall( case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_REJECTED: - log_rdma_event(INFO, "connecting failed event=%d\n", event->event); + log_rdma_event(ERR, "connecting failed event=%s\n", event_name); sc->status = SMBDIRECT_SOCKET_DISCONNECTED; wake_up_interruptible(&info->conn_wait); break; @@ -217,6 +220,7 @@ static int smbd_conn_upcall( case RDMA_CM_EVENT_DISCONNECTED: /* This happens when we fail the negotiation */ if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_FAILED) { + log_rdma_event(ERR, "event=%s during negotiation\n", event_name); sc->status = SMBDIRECT_SOCKET_DISCONNECTED; wake_up(&info->conn_wait); break; @@ -229,6 +233,8 @@ static int smbd_conn_upcall( break; default: + log_rdma_event(ERR, "unexpected event=%s status=%d\n", + event_name, event->status); break; } From 550a194c5998e4e77affc6235e80d3766dc2d27e Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 7 Aug 2025 18:12:13 +0200 Subject: [PATCH 1721/2411] smb: client: don't call init_waitqueue_head(&info->conn_wait) twice in _smbd_get_connection It is already called long before we may hit this cleanup code path. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 8ed4ab6f1d3a..c819cc6dcc4f 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1716,7 +1716,6 @@ static struct smbd_connection *_smbd_get_connection( cancel_delayed_work_sync(&info->idle_timer_work); destroy_caches_and_workqueue(info); sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; - init_waitqueue_head(&info->conn_wait); rdma_disconnect(sc->rdma.cm_id); wait_event(info->conn_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); From dfe6f14aedbf59bfb7145de5c7da908583ae50fd Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 7 Aug 2025 18:12:14 +0200 Subject: [PATCH 1722/2411] smb: client: only use a single wait_queue to monitor smbdirect connection status There's no need for separate conn_wait and disconn_wait queues. This will simplify the move to common code, the server code already a single wait_queue for this. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 17 ++++++++--------- fs/smb/client/smbdirect.h | 3 +-- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index c819cc6dcc4f..c628e91c328b 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -205,7 +205,7 @@ static int smbd_conn_upcall( case RDMA_CM_EVENT_ESTABLISHED: log_rdma_event(INFO, "connected event=%s\n", event_name); sc->status = SMBDIRECT_SOCKET_CONNECTED; - wake_up_interruptible(&info->conn_wait); + wake_up_interruptible(&info->status_wait); break; case RDMA_CM_EVENT_CONNECT_ERROR: @@ -213,7 +213,7 @@ static int smbd_conn_upcall( case RDMA_CM_EVENT_REJECTED: log_rdma_event(ERR, "connecting failed event=%s\n", event_name); sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - wake_up_interruptible(&info->conn_wait); + wake_up_interruptible(&info->status_wait); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: @@ -222,12 +222,12 @@ static int smbd_conn_upcall( if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_FAILED) { log_rdma_event(ERR, "event=%s during negotiation\n", event_name); sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - wake_up(&info->conn_wait); + wake_up(&info->status_wait); break; } sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - wake_up_interruptible(&info->disconn_wait); + wake_up_interruptible(&info->status_wait); wake_up_interruptible(&sc->recv_io.reassembly.wait_queue); wake_up_interruptible_all(&info->wait_send_queue); break; @@ -1325,7 +1325,7 @@ void smbd_destroy(struct TCP_Server_Info *server) rdma_disconnect(sc->rdma.cm_id); log_rdma_event(INFO, "wait for transport being disconnected\n"); wait_event_interruptible( - info->disconn_wait, + info->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); } @@ -1650,8 +1650,7 @@ static struct smbd_connection *_smbd_get_connection( log_rdma_event(INFO, "connecting to IP %pI4 port %d\n", &addr_in->sin_addr, port); - init_waitqueue_head(&info->conn_wait); - init_waitqueue_head(&info->disconn_wait); + init_waitqueue_head(&info->status_wait); init_waitqueue_head(&sc->recv_io.reassembly.wait_queue); rc = rdma_connect(sc->rdma.cm_id, &conn_param); if (rc) { @@ -1660,7 +1659,7 @@ static struct smbd_connection *_smbd_get_connection( } wait_event_interruptible_timeout( - info->conn_wait, + info->status_wait, sc->status != SMBDIRECT_SOCKET_CONNECTING, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); @@ -1717,7 +1716,7 @@ static struct smbd_connection *_smbd_get_connection( destroy_caches_and_workqueue(info); sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; rdma_disconnect(sc->rdma.cm_id); - wait_event(info->conn_wait, + wait_event(info->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); allocate_cache_failed: diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 0d4d45428c85..e45aa9ddd71d 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -47,8 +47,7 @@ struct smbd_connection { int ri_rc; struct completion ri_done; - wait_queue_head_t conn_wait; - wait_queue_head_t disconn_wait; + wait_queue_head_t status_wait; struct completion negotiate_completion; bool negotiate_done; From e6bb9193974059ddbb0ce7763fa3882bd60d4dc3 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 5 Aug 2025 18:13:13 +0900 Subject: [PATCH 1723/2411] ksmbd: limit repeated connections from clients with the same IP Repeated connections from clients with the same IP address may exhaust the max connections and prevent other normal client connections. This patch limit repeated connections from clients with the same IP. Reported-by: tianshuo han Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.h | 1 + fs/smb/server/transport_tcp.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index dd3e0e3f7bf0..31dd1caac1e8 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -46,6 +46,7 @@ struct ksmbd_conn { struct mutex srv_mutex; int status; unsigned int cli_cap; + __be32 inet_addr; char *request_buf; struct ksmbd_transport *transport; struct nls_table *local_nls; diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index 4e9f98db9ff4..d72588f33b9c 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -87,6 +87,7 @@ static struct tcp_transport *alloc_transport(struct socket *client_sk) return NULL; } + conn->inet_addr = inet_sk(client_sk->sk)->inet_daddr; conn->transport = KSMBD_TRANS(t); KSMBD_TRANS(t)->conn = conn; KSMBD_TRANS(t)->ops = &ksmbd_tcp_transport_ops; @@ -230,6 +231,8 @@ static int ksmbd_kthread_fn(void *p) { struct socket *client_sk = NULL; struct interface *iface = (struct interface *)p; + struct inet_sock *csk_inet; + struct ksmbd_conn *conn; int ret; while (!kthread_should_stop()) { @@ -248,6 +251,20 @@ static int ksmbd_kthread_fn(void *p) continue; } + /* + * Limits repeated connections from clients with the same IP. + */ + csk_inet = inet_sk(client_sk->sk); + down_read(&conn_list_lock); + list_for_each_entry(conn, &conn_list, conns_list) + if (csk_inet->inet_daddr == conn->inet_addr) { + ret = -EAGAIN; + break; + } + up_read(&conn_list_lock); + if (ret == -EAGAIN) + continue; + if (server_conf.max_connections && atomic_inc_return(&active_num_conn) >= server_conf.max_connections) { pr_info_ratelimited("Limit the maximum number of connections(%u)\n", From 8e7d178d06e8937454b6d2f2811fa6a15656a214 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 6 Aug 2025 03:03:49 +0200 Subject: [PATCH 1724/2411] smb: server: Fix extension string in ksmbd_extract_shortname() In ksmbd_extract_shortname(), strscpy() is incorrectly called with the length of the source string (excluding the NUL terminator) rather than the size of the destination buffer. This results in "__" being copied to 'extension' rather than "___" (two underscores instead of three). Use the destination buffer size instead to ensure that the string "___" (three underscores) is copied correctly. Cc: stable@vger.kernel.org Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Signed-off-by: Thorsten Blum Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index 425c756bcfb8..b23203a1c286 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -515,7 +515,7 @@ int ksmbd_extract_shortname(struct ksmbd_conn *conn, const char *longname, p = strrchr(longname, '.'); if (p == longname) { /*name starts with a dot*/ - strscpy(extension, "___", strlen("___")); + strscpy(extension, "___", sizeof(extension)); } else { if (p) { p++; From 5378bdf6a611a32500fccf13d14156f219bb0c85 Mon Sep 17 00:00:00 2001 From: Adam Young Date: Mon, 14 Jul 2025 20:10:07 -0400 Subject: [PATCH 1725/2411] mailbox/pcc: support mailbox management of the shared buffer Define a new, optional, callback that allows the driver to specify how the return data buffer is allocated. If that callback is set, mailbox/pcc.c is now responsible for reading from and writing to the PCC shared buffer. This also allows for proper checks of the Commnand complete flag between the PCC sender and receiver. For Type 4 channels, initialize the command complete flag prior to accepting messages. Since the mailbox does not know what memory allocation scheme to use for response messages, the client now has an optional callback that allows it to allocate the buffer for a response message. When an outbound message is written to the buffer, the mailbox checks for the flag indicating the client wants an tx complete notification via IRQ. Upon receipt of the interrupt It will pair it with the outgoing message. The expected use is to free the kernel memory buffer for the previous outgoing message. Signed-off-by: Adam Young Signed-off-by: Jassi Brar --- drivers/mailbox/pcc.c | 102 ++++++++++++++++++++++++++++++++++++++++-- include/acpi/pcc.h | 29 ++++++++++++ 2 files changed, 127 insertions(+), 4 deletions(-) diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c index f6714c233f5a..0a00719b2482 100644 --- a/drivers/mailbox/pcc.c +++ b/drivers/mailbox/pcc.c @@ -306,6 +306,22 @@ static void pcc_chan_acknowledge(struct pcc_chan_info *pchan) pcc_chan_reg_read_modify_write(&pchan->db); } +static void *write_response(struct pcc_chan_info *pchan) +{ + struct pcc_header pcc_header; + void *buffer; + int data_len; + + memcpy_fromio(&pcc_header, pchan->chan.shmem, + sizeof(pcc_header)); + data_len = pcc_header.length - sizeof(u32) + sizeof(struct pcc_header); + + buffer = pchan->chan.rx_alloc(pchan->chan.mchan->cl, data_len); + if (buffer != NULL) + memcpy_fromio(buffer, pchan->chan.shmem, data_len); + return buffer; +} + /** * pcc_mbox_irq - PCC mailbox interrupt handler * @irq: interrupt number @@ -317,6 +333,8 @@ static irqreturn_t pcc_mbox_irq(int irq, void *p) { struct pcc_chan_info *pchan; struct mbox_chan *chan = p; + struct pcc_header *pcc_header = chan->active_req; + void *handle = NULL; pchan = chan->con_priv; @@ -340,7 +358,17 @@ static irqreturn_t pcc_mbox_irq(int irq, void *p) * required to avoid any possible race in updatation of this flag. */ pchan->chan_in_use = false; - mbox_chan_received_data(chan, NULL); + + if (pchan->chan.rx_alloc) + handle = write_response(pchan); + + if (chan->active_req) { + pcc_header = chan->active_req; + if (pcc_header->flags & PCC_CMD_COMPLETION_NOTIFY) + mbox_chan_txdone(chan, 0); + } + + mbox_chan_received_data(chan, handle); pcc_chan_acknowledge(pchan); @@ -384,9 +412,24 @@ pcc_mbox_request_channel(struct mbox_client *cl, int subspace_id) pcc_mchan = &pchan->chan; pcc_mchan->shmem = acpi_os_ioremap(pcc_mchan->shmem_base_addr, pcc_mchan->shmem_size); - if (pcc_mchan->shmem) - return pcc_mchan; + if (!pcc_mchan->shmem) + goto err; + pcc_mchan->manage_writes = false; + + /* This indicates that the channel is ready to accept messages. + * This needs to happen after the channel has registered + * its callback. There is no access point to do that in + * the mailbox API. That implies that the mailbox client must + * have set the allocate callback function prior to + * sending any messages. + */ + if (pchan->type == ACPI_PCCT_TYPE_EXT_PCC_SLAVE_SUBSPACE) + pcc_chan_reg_read_modify_write(&pchan->cmd_update); + + return pcc_mchan; + +err: mbox_free_channel(chan); return ERR_PTR(-ENXIO); } @@ -417,8 +460,38 @@ void pcc_mbox_free_channel(struct pcc_mbox_chan *pchan) } EXPORT_SYMBOL_GPL(pcc_mbox_free_channel); +static int pcc_write_to_buffer(struct mbox_chan *chan, void *data) +{ + struct pcc_chan_info *pchan = chan->con_priv; + struct pcc_mbox_chan *pcc_mbox_chan = &pchan->chan; + struct pcc_header *pcc_header = data; + + if (!pchan->chan.manage_writes) + return 0; + + /* The PCC header length includes the command field + * but not the other values from the header. + */ + int len = pcc_header->length - sizeof(u32) + sizeof(struct pcc_header); + u64 val; + + pcc_chan_reg_read(&pchan->cmd_complete, &val); + if (!val) { + pr_info("%s pchan->cmd_complete not set", __func__); + return -1; + } + memcpy_toio(pcc_mbox_chan->shmem, data, len); + return 0; +} + + /** - * pcc_send_data - Called from Mailbox Controller code. Used + * pcc_send_data - Called from Mailbox Controller code. If + * pchan->chan.rx_alloc is set, then the command complete + * flag is checked and the data is written to the shared + * buffer io memory. + * + * If pchan->chan.rx_alloc is not set, then it is used * here only to ring the channel doorbell. The PCC client * specific read/write is done in the client driver in * order to maintain atomicity over PCC channel once @@ -434,17 +507,37 @@ static int pcc_send_data(struct mbox_chan *chan, void *data) int ret; struct pcc_chan_info *pchan = chan->con_priv; + ret = pcc_write_to_buffer(chan, data); + if (ret) + return ret; + ret = pcc_chan_reg_read_modify_write(&pchan->cmd_update); if (ret) return ret; ret = pcc_chan_reg_read_modify_write(&pchan->db); + if (!ret && pchan->plat_irq > 0) pchan->chan_in_use = true; return ret; } + +static bool pcc_last_tx_done(struct mbox_chan *chan) +{ + struct pcc_chan_info *pchan = chan->con_priv; + u64 val; + + pcc_chan_reg_read(&pchan->cmd_complete, &val); + if (!val) + return false; + else + return true; +} + + + /** * pcc_startup - Called from Mailbox Controller code. Used here * to request the interrupt. @@ -490,6 +583,7 @@ static const struct mbox_chan_ops pcc_chan_ops = { .send_data = pcc_send_data, .startup = pcc_startup, .shutdown = pcc_shutdown, + .last_tx_done = pcc_last_tx_done, }; /** diff --git a/include/acpi/pcc.h b/include/acpi/pcc.h index 840bfc95bae3..9af3b502f839 100644 --- a/include/acpi/pcc.h +++ b/include/acpi/pcc.h @@ -17,6 +17,35 @@ struct pcc_mbox_chan { u32 latency; u32 max_access_rate; u16 min_turnaround_time; + + /* Set to true to indicate that the mailbox should manage + * writing the dat to the shared buffer. This differs from + * the case where the drivesr are writing to the buffer and + * using send_data only to ring the doorbell. If this flag + * is set, then the void * data parameter of send_data must + * point to a kernel-memory buffer formatted in accordance with + * the PCC specification. + * + * The active buffer management will include reading the + * notify_on_completion flag, and will then + * call mbox_chan_txdone when the acknowledgment interrupt is + * received. + */ + bool manage_writes; + + /* Optional callback that allows the driver + * to allocate the memory used for receiving + * messages. The return value is the location + * inside the buffer where the mailbox should write the data. + */ + void *(*rx_alloc)(struct mbox_client *cl, int size); +}; + +struct pcc_header { + u32 signature; + u32 flags; + u32 length; + u32 command; }; /* Generic Communications Channel Shared Memory Region */ From 33503c083fda048c77903460ac0429e1e2c0e341 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 8 Aug 2025 06:35:14 -0600 Subject: [PATCH 1726/2411] io_uring/memmap: cast nr_pages to size_t before shifting If the allocated size exceeds UINT_MAX, then it's necessary to cast the mr->nr_pages value to size_t to prevent it from overflowing. In practice this isn't much of a concern as the required memory size will have been validated upfront, and accounted to the user. And > 4GB sizes will be necessary to make the lack of a cast a problem, which greatly exceeds normal user locked_vm settings that are generally in the kb to mb range. However, if root is used, then accounting isn't done, and then it's possible to hit this issue. Link: https://lore.kernel.org/all/6895b298.050a0220.7f033.0059.GAE@google.com/ Cc: stable@vger.kernel.org Reported-by: syzbot+23727438116feb13df15@syzkaller.appspotmail.com Fixes: 087f997870a9 ("io_uring/memmap: implement mmap for regions") Signed-off-by: Jens Axboe --- io_uring/memmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/memmap.c b/io_uring/memmap.c index 725dc0bec24c..2e99dffddfc5 100644 --- a/io_uring/memmap.c +++ b/io_uring/memmap.c @@ -156,7 +156,7 @@ static int io_region_allocate_pages(struct io_ring_ctx *ctx, unsigned long mmap_offset) { gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN; - unsigned long size = mr->nr_pages << PAGE_SHIFT; + size_t size = (size_t) mr->nr_pages << PAGE_SHIFT; unsigned long nr_allocated; struct page **pages; void *p; From c875503a9b9082928d7d3fc60b5400d16fbfae4e Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Wed, 6 Aug 2025 18:27:56 +0800 Subject: [PATCH 1727/2411] net: hibmcge: fix rtnl deadlock issue Currently, the hibmcge netdev acquires the rtnl_lock in pci_error_handlers.reset_prepare() and releases it in pci_error_handlers.reset_done(). However, in the PCI framework: pci_reset_bus - __pci_reset_slot - pci_slot_save_and_disable_locked - pci_dev_save_and_disable - err_handler->reset_prepare(dev); In pci_slot_save_and_disable_locked(): list_for_each_entry(dev, &slot->bus->devices, bus_list) { if (!dev->slot || dev->slot!= slot) continue; pci_dev_save_and_disable(dev); if (dev->subordinate) pci_bus_save_and_disable_locked(dev->subordinate); } This will iterate through all devices under the current bus and execute err_handler->reset_prepare(), causing two devices of the hibmcge driver to sequentially request the rtnl_lock, leading to a deadlock. Since the driver now executes netif_device_detach() before the reset process, it will not concurrently with other netdev APIs, so there is no need to hold the rtnl_lock now. Therefore, this patch removes the rtnl_lock during the reset process and adjusts the position of HBG_NIC_STATE_RESETTING to ensure that multiple resets are not executed concurrently. Fixes: 3f5a61f6d504f ("net: hibmcge: Add reset supported in this module") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c index 503cfbfb4a8a..83cf75bf7a17 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c @@ -53,9 +53,11 @@ static int hbg_reset_prepare(struct hbg_priv *priv, enum hbg_reset_type type) { int ret; - ASSERT_RTNL(); + if (test_and_set_bit(HBG_NIC_STATE_RESETTING, &priv->state)) + return -EBUSY; if (netif_running(priv->netdev)) { + clear_bit(HBG_NIC_STATE_RESETTING, &priv->state); dev_warn(&priv->pdev->dev, "failed to reset because port is up\n"); return -EBUSY; @@ -64,7 +66,6 @@ static int hbg_reset_prepare(struct hbg_priv *priv, enum hbg_reset_type type) netif_device_detach(priv->netdev); priv->reset_type = type; - set_bit(HBG_NIC_STATE_RESETTING, &priv->state); clear_bit(HBG_NIC_STATE_RESET_FAIL, &priv->state); ret = hbg_hw_event_notify(priv, HBG_HW_EVENT_RESET); if (ret) { @@ -84,29 +85,26 @@ static int hbg_reset_done(struct hbg_priv *priv, enum hbg_reset_type type) type != priv->reset_type) return 0; - ASSERT_RTNL(); - - clear_bit(HBG_NIC_STATE_RESETTING, &priv->state); ret = hbg_rebuild(priv); if (ret) { priv->stats.reset_fail_cnt++; set_bit(HBG_NIC_STATE_RESET_FAIL, &priv->state); + clear_bit(HBG_NIC_STATE_RESETTING, &priv->state); dev_err(&priv->pdev->dev, "failed to rebuild after reset\n"); return ret; } netif_device_attach(priv->netdev); + clear_bit(HBG_NIC_STATE_RESETTING, &priv->state); dev_info(&priv->pdev->dev, "reset done\n"); return ret; } -/* must be protected by rtnl lock */ int hbg_reset(struct hbg_priv *priv) { int ret; - ASSERT_RTNL(); ret = hbg_reset_prepare(priv, HBG_RESET_TYPE_FUNCTION); if (ret) return ret; @@ -171,7 +169,6 @@ static void hbg_pci_err_reset_prepare(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct hbg_priv *priv = netdev_priv(netdev); - rtnl_lock(); hbg_reset_prepare(priv, HBG_RESET_TYPE_FLR); } @@ -181,7 +178,6 @@ static void hbg_pci_err_reset_done(struct pci_dev *pdev) struct hbg_priv *priv = netdev_priv(netdev); hbg_reset_done(priv, HBG_RESET_TYPE_FLR); - rtnl_unlock(); } static const struct pci_error_handlers hbg_pci_err_handler = { From 7004b26f0b64331143eb0b312e77a357a11427ce Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Wed, 6 Aug 2025 18:27:57 +0800 Subject: [PATCH 1728/2411] net: hibmcge: fix the division by zero issue When the network port is down, the queue is released, and ring->len is 0. In debugfs, hbg_get_queue_used_num() will be called, which may lead to a division by zero issue. This patch adds a check, if ring->len is 0, hbg_get_queue_used_num() directly returns 0. Fixes: 40735e7543f9 ("net: hibmcge: Implement .ndo_start_xmit function") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.h index 2883a5899ae2..8b6110599e10 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.h @@ -29,7 +29,12 @@ static inline bool hbg_fifo_is_full(struct hbg_priv *priv, enum hbg_dir dir) static inline u32 hbg_get_queue_used_num(struct hbg_ring *ring) { - return (ring->ntu + ring->len - ring->ntc) % ring->len; + u32 len = READ_ONCE(ring->len); + + if (!len) + return 0; + + return (READ_ONCE(ring->ntu) + len - READ_ONCE(ring->ntc)) % len; } netdev_tx_t hbg_net_start_xmit(struct sk_buff *skb, struct net_device *netdev); From 62c50180ffda01468e640ac14925503796f255e2 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Wed, 6 Aug 2025 18:27:58 +0800 Subject: [PATCH 1729/2411] net: hibmcge: fix the np_link_fail error reporting issue Currently, after modifying device port mode, the np_link_ok state is immediately checked. At this point, the device may not yet ready, leading to the querying of an intermediate state. This patch will poll to check if np_link is ok after modifying device port mode, and only report np_link_fail upon timeout. Fixes: e0306637e85d ("net: hibmcge: Add support for mac link exception handling feature") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c index 8cca8316ba40..d0aa0661ecd4 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c @@ -12,6 +12,8 @@ #define HBG_HW_EVENT_WAIT_TIMEOUT_US (2 * 1000 * 1000) #define HBG_HW_EVENT_WAIT_INTERVAL_US (10 * 1000) +#define HBG_MAC_LINK_WAIT_TIMEOUT_US (500 * 1000) +#define HBG_MAC_LINK_WAIT_INTERVAL_US (5 * 1000) /* little endian or big endian. * ctrl means packet description, data means skb packet data */ @@ -228,6 +230,9 @@ void hbg_hw_fill_buffer(struct hbg_priv *priv, u32 buffer_dma_addr) void hbg_hw_adjust_link(struct hbg_priv *priv, u32 speed, u32 duplex) { + u32 link_status; + int ret; + hbg_hw_mac_enable(priv, HBG_STATUS_DISABLE); hbg_reg_write_field(priv, HBG_REG_PORT_MODE_ADDR, @@ -239,8 +244,14 @@ void hbg_hw_adjust_link(struct hbg_priv *priv, u32 speed, u32 duplex) hbg_hw_mac_enable(priv, HBG_STATUS_ENABLE); - if (!hbg_reg_read_field(priv, HBG_REG_AN_NEG_STATE_ADDR, - HBG_REG_AN_NEG_STATE_NP_LINK_OK_B)) + /* wait MAC link up */ + ret = readl_poll_timeout(priv->io_base + HBG_REG_AN_NEG_STATE_ADDR, + link_status, + FIELD_GET(HBG_REG_AN_NEG_STATE_NP_LINK_OK_B, + link_status), + HBG_MAC_LINK_WAIT_INTERVAL_US, + HBG_MAC_LINK_WAIT_TIMEOUT_US); + if (ret) hbg_np_link_fail_task_schedule(priv); } From 06feac15406f4f66f4c0c6ea60b10d44775d4133 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Tue, 5 Aug 2025 23:08:12 +0530 Subject: [PATCH 1730/2411] net: ti: icssg-prueth: Fix emac link speed handling When link settings are changed emac->speed is populated by emac_adjust_link(). The link speed and other settings are then written into the DRAM. However if both ports are brought down after this and brought up again or if the operating mode is changed and a firmware reload is needed, the DRAM is cleared by icssg_config(). As a result the link settings are lost. Fix this by calling emac_adjust_link() after icssg_config(). This re populates the settings in the DRAM after a new firmware load. Fixes: 9facce84f406 ("net: ti: icssg-prueth: Fix firmware load sequence.") Signed-off-by: MD Danish Anwar Reviewed-by: Andrew Lunn Message-ID: <20250805173812.2183161-1-danishanwar@ti.com> Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 2b973d6e2341..6c7d776ae4ee 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -50,6 +50,8 @@ /* CTRLMMR_ICSSG_RGMII_CTRL register bits */ #define ICSSG_CTRL_RGMII_ID_MODE BIT(24) +static void emac_adjust_link(struct net_device *ndev); + static int emac_get_tx_ts(struct prueth_emac *emac, struct emac_tx_ts_response *rsp) { @@ -229,6 +231,10 @@ static int prueth_emac_common_start(struct prueth *prueth) ret = icssg_config(prueth, emac, slice); if (ret) goto disable_class; + + mutex_lock(&emac->ndev->phydev->lock); + emac_adjust_link(emac->ndev); + mutex_unlock(&emac->ndev->phydev->lock); } ret = prueth_emac_start(prueth); From 64fdaa94bfe0cca3a0f4b2dd922486c5f59fe678 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Aug 2025 17:36:54 -0700 Subject: [PATCH 1731/2411] net: page_pool: allow enabling recycling late, fix false positive warning Page pool can have pages "directly" (locklessly) recycled to it, if the NAPI that owns the page pool is scheduled to run on the same CPU. To make this safe we check that the NAPI is disabled while we destroy the page pool. In most cases NAPI and page pool lifetimes are tied together so this happens naturally. The queue API expects the following order of calls: -> mem_alloc alloc new pp -> stop napi_disable -> start napi_enable -> mem_free free old pp Here we allocate the page pool in ->mem_alloc and free in ->mem_free. But the NAPIs are only stopped between ->stop and ->start. We created page_pool_disable_direct_recycling() to safely shut down the recycling in ->stop. This way the page_pool_destroy() call in ->mem_free doesn't have to worry about recycling any more. Unfortunately, the page_pool_disable_direct_recycling() is not enough to deal with failures which necessitate freeing the _new_ page pool. If we hit a failure in ->mem_alloc or ->stop the new page pool has to be freed while the NAPI is active (assuming driver attaches the page pool to an existing NAPI instance and doesn't reallocate NAPIs). Freeing the new page pool is technically safe because it hasn't been used for any packets, yet, so there can be no recycling. But the check in napi_assert_will_not_race() has no way of knowing that. We could check if page pool is empty but that'd make the check much less likely to trigger during development. Add page_pool_enable_direct_recycling(), pairing with page_pool_disable_direct_recycling(). It will allow us to create the new page pools in "disabled" state and only enable recycling when we know the reconfig operation will not fail. Coincidentally it will also let us re-enable the recycling for the old pool, if the reconfig failed: -> mem_alloc (new) -> stop (old) # disables direct recycling for old -> start (new) # fail!! -> start (old) # go back to old pp but direct recycling is lost :( -> mem_free (new) The new helper is idempotent to make the life easier for drivers, which can operate in HDS mode and support zero-copy Rx. The driver can call the helper twice whether there are two pools or it has multiple references to a single pool. Fixes: 40eca00ae605 ("bnxt_en: unlink page pool when stopping Rx queue") Tested-by: David Wei Link: https://patch.msgid.link/20250805003654.2944974-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 ++++++- include/net/page_pool/types.h | 2 ++ net/core/page_pool.c | 29 +++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5578ddcb465d..76a4c5ae8000 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3819,7 +3819,6 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp, if (BNXT_RX_PAGE_MODE(bp)) pp.pool_size += bp->rx_ring_size / rx_size_fac; pp.nid = numa_node; - pp.napi = &rxr->bnapi->napi; pp.netdev = bp->dev; pp.dev = &bp->pdev->dev; pp.dma_dir = bp->rx_dir; @@ -3851,6 +3850,12 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp, return PTR_ERR(pool); } +static void bnxt_enable_rx_page_pool(struct bnxt_rx_ring_info *rxr) +{ + page_pool_enable_direct_recycling(rxr->head_pool, &rxr->bnapi->napi); + page_pool_enable_direct_recycling(rxr->page_pool, &rxr->bnapi->napi); +} + static int bnxt_alloc_rx_agg_bmap(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) { u16 mem_size; @@ -3889,6 +3894,7 @@ static int bnxt_alloc_rx_rings(struct bnxt *bp) rc = bnxt_alloc_rx_page_pool(bp, rxr, cpu_node); if (rc) return rc; + bnxt_enable_rx_page_pool(rxr); rc = xdp_rxq_info_reg(&rxr->xdp_rxq, bp->dev, i, 0); if (rc < 0) @@ -16031,6 +16037,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) goto err_reset; } + bnxt_enable_rx_page_pool(rxr); napi_enable_locked(&bnapi->napi); bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 431b593de709..1509a536cb85 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -265,6 +265,8 @@ struct page_pool *page_pool_create_percpu(const struct page_pool_params *params, struct xdp_mem_info; #ifdef CONFIG_PAGE_POOL +void page_pool_enable_direct_recycling(struct page_pool *pool, + struct napi_struct *napi); void page_pool_disable_direct_recycling(struct page_pool *pool); void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 05e2e22a8f7c..343a6cac21e3 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -1201,6 +1201,35 @@ void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), pool->xdp_mem_id = mem->id; } +/** + * page_pool_enable_direct_recycling() - mark page pool as owned by NAPI + * @pool: page pool to modify + * @napi: NAPI instance to associate the page pool with + * + * Associate a page pool with a NAPI instance for lockless page recycling. + * This is useful when a new page pool has to be added to a NAPI instance + * without disabling that NAPI instance, to mark the point at which control + * path "hands over" the page pool to the NAPI instance. In most cases driver + * can simply set the @napi field in struct page_pool_params, and does not + * have to call this helper. + * + * The function is idempotent, but does not implement any refcounting. + * Single page_pool_disable_direct_recycling() will disable recycling, + * no matter how many times enable was called. + */ +void page_pool_enable_direct_recycling(struct page_pool *pool, + struct napi_struct *napi) +{ + if (READ_ONCE(pool->p.napi) == napi) + return; + WARN_ON(!napi || pool->p.napi); + + mutex_lock(&page_pools_lock); + WRITE_ONCE(pool->p.napi, napi); + mutex_unlock(&page_pools_lock); +} +EXPORT_SYMBOL(page_pool_enable_direct_recycling); + void page_pool_disable_direct_recycling(struct page_pool *pool) { /* Disable direct recycling based on pool->cpuid. From 5f1d1d14db7dabce9c815e7d7cd351f8d58b8585 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Tue, 5 Aug 2025 07:23:18 -0700 Subject: [PATCH 1732/2411] net: ti: icss-iep: Fix incorrect type for return value in extts_enable() The variable ret in icss_iep_extts_enable() was incorrectly declared as u32, while the function returns int and may return negative error codes. This will cause sign extension issues and incorrect error propagation. Update ret to be int to fix error handling. This change corrects the declaration to avoid potential type mismatch. Fixes: c1e0230eeaab ("net: ti: icss-iep: Add IEP driver") Signed-off-by: Alok Tiwari Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250805142323.1949406-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icss_iep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c index 50bfbc2779e4..d8c9fe1d98c4 100644 --- a/drivers/net/ethernet/ti/icssg/icss_iep.c +++ b/drivers/net/ethernet/ti/icssg/icss_iep.c @@ -621,7 +621,8 @@ static int icss_iep_pps_enable(struct icss_iep *iep, int on) static int icss_iep_extts_enable(struct icss_iep *iep, u32 index, int on) { - u32 val, cap, ret = 0; + u32 val, cap; + int ret = 0; mutex_lock(&iep->ptp_clk_mutex); From aba0c94f61ec05315fa7815d21aefa4c87f6a9f4 Mon Sep 17 00:00:00 2001 From: Budimir Markovic Date: Thu, 7 Aug 2025 04:18:11 +0000 Subject: [PATCH 1733/2411] vsock: Do not allow binding to VMADDR_PORT_ANY It is possible for a vsock to autobind to VMADDR_PORT_ANY. This can cause a use-after-free when a connection is made to the bound socket. The socket returned by accept() also has port VMADDR_PORT_ANY but is not on the list of unbound sockets. Binding it will result in an extra refcount decrement similar to the one fixed in fcdd2242c023 (vsock: Keep the binding until socket destruction). Modify the check in __vsock_bind_connectible() to also prevent binding to VMADDR_PORT_ANY. Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") Reported-by: Budimir Markovic Signed-off-by: Budimir Markovic Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20250807041811.678-1-markovicbudimir@gmail.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/af_vsock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index ead6a3c14b87..bebb355f3ffe 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -689,7 +689,8 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk, unsigned int i; for (i = 0; i < MAX_PORT_RETRIES; i++) { - if (port <= LAST_RESERVED_PORT) + if (port == VMADDR_PORT_ANY || + port <= LAST_RESERVED_PORT) port = LAST_RESERVED_PORT + 1; new_addr.svm_port = port++; From d44c40e4e30f6aa1ca8ed1c8b715d4c5829f0560 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 10 Jun 2025 18:07:49 -0400 Subject: [PATCH 1734/2411] tools/power turbostat: verify arguments to params --show and --hide $ sudo turbostat --quiet --show junk turbostat: Counter 'junk' can not be added. Previously, invalid arguments to --show and --hide were silently ignored Acked-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 33 +++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 33a54a9e0781..4056b7e26a0f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2310,6 +2310,8 @@ char *deferred_add_names[MAX_DEFERRED]; char *deferred_skip_names[MAX_DEFERRED]; int deferred_add_index; int deferred_skip_index; +unsigned int deferred_add_consumed; +unsigned int deferred_skip_consumed; /* * HIDE_LIST - hide this list of counters, show the rest [default] @@ -10512,8 +10514,10 @@ int is_deferred_add(char *name) int i; for (i = 0; i < deferred_add_index; ++i) - if (!strcmp(name, deferred_add_names[i])) + if (!strcmp(name, deferred_add_names[i])) { + deferred_add_consumed |= (1 << i); return 1; + } return 0; } @@ -10522,11 +10526,34 @@ int is_deferred_skip(char *name) int i; for (i = 0; i < deferred_skip_index; ++i) - if (!strcmp(name, deferred_skip_names[i])) + if (!strcmp(name, deferred_skip_names[i])) { + deferred_skip_consumed |= (1 << i); return 1; + } return 0; } +void verify_deferred_consumed(void) +{ + int i; + int fail = 0; + + for (i = 0; i < deferred_add_index; ++i) { + if (!(deferred_add_consumed & (1 << i))) { + warnx("Counter '%s' can not be added.", deferred_add_names[i]); + fail++; + } + } + for (i = 0; i < deferred_skip_index; ++i) { + if (!(deferred_skip_consumed & (1 << i))) { + warnx("Counter '%s' can not be skipped.", deferred_skip_names[i]); + fail++; + } + } + if (fail) + exit(-EINVAL); +} + void probe_cpuidle_residency(void) { char path[64]; @@ -10885,6 +10912,8 @@ int main(int argc, char **argv) probe_cpuidle_residency(); probe_cpuidle_counts(); + verify_deferred_consumed(); + if (!getuid()) set_rlimit(); From 6ea0ec1b958a84aff9f03fb0ae4613a4d5bed3ea Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Fri, 13 Jun 2025 09:54:23 -0700 Subject: [PATCH 1735/2411] tools/power turbostat: Fix build with musl turbostat.c: In function 'parse_int_file': turbostat.c:5567:19: error: 'PATH_MAX' undeclared (first use in this function) 5567 | char path[PATH_MAX]; | ^~~~~~~~ turbostat.c: In function 'probe_graphics': turbostat.c:6787:19: error: 'PATH_MAX' undeclared (first use in this function) 6787 | char path[PATH_MAX]; | ^~~~~~~~ Signed-off-by: Calvin Owens Reviewed-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4056b7e26a0f..778aee48c6e6 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -67,6 +67,7 @@ #include #include #include +#include #define UNUSED(x) (void)(x) From d34fe509f5f76d9dc36291242d67c6528027ebbd Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Fri, 13 Jun 2025 19:20:28 -0700 Subject: [PATCH 1736/2411] tools/power turbostat: Handle cap_get_proc() ENOSYS Kernels configured with CONFIG_MULTIUSER=n have no cap_get_proc(). Check for ENOSYS to recognize this case, and continue on to attempt to access the requested MSRs (such as temperature). Signed-off-by: Calvin Owens Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 778aee48c6e6..fa81e273d3a2 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -6574,8 +6574,16 @@ int check_for_cap_sys_rawio(void) int ret = 0; caps = cap_get_proc(); - if (caps == NULL) + if (caps == NULL) { + /* + * CONFIG_MULTIUSER=n kernels have no cap_get_proc() + * Allow them to continue and attempt to access MSRs + */ + if (errno == ENOSYS) + return 0; + return 1; + } if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) { ret = 1; From 44207567fa64e995d4f2ec2d45af4c947cb1a465 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 17 Jun 2025 20:48:59 +0800 Subject: [PATCH 1737/2411] tools/power turbostat: Fix bogus SysWatt for forked program Similar to delta_cpu(), delta_platform() is called in turbostat main loop. This ensures accurate SysWatt readings in periodic monitoring mode $ sudo turbostat -S -q --show power -i 1 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt RAMWatt PKG_% RAM_% SysWatt 60 61 6.21 1.13 0.16 0.00 0.00 0.00 13.07 58 61 6.00 1.07 0.18 0.00 0.00 0.00 12.75 58 61 5.74 1.05 0.17 0.00 0.00 0.00 12.22 58 60 6.27 1.11 0.24 0.00 0.00 0.00 13.55 However, delta_platform() is missing for forked program and causes bogus SysWatt reporting, $ sudo turbostat -S -q --show power sleep 1 1.004736 sec CoreTmp PkgTmp PkgWatt CorWatt GFXWatt RAMWatt PKG_% RAM_% SysWatt 57 58 6.05 1.02 0.16 0.00 0.00 0.00 0.03 Add missing delta_platform() for forked program. Fixes: e5f687b89bc2 ("tools/power turbostat: Add RAPL psys as a built-in counter") Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index fa81e273d3a2..4cb3f1aa3a88 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9827,6 +9827,7 @@ int fork_it(char **argv) timersub(&tv_odd, &tv_even, &tv_delta); if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) fprintf(outf, "%s: Counter reset detected\n", progname); + delta_platform(&platform_counters_odd, &platform_counters_even); compute_average(EVEN_COUNTERS); format_all_counters(EVEN_COUNTERS); From fd60d8a086191fe33c2d719732d2482052fa6805 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 7 Aug 2025 15:40:11 -0400 Subject: [PATCH 1738/2411] sctp: linearize cloned gso packets in sctp_rcv A cloned head skb still shares these frag skbs in fraglist with the original head skb. It's not safe to access these frag skbs. syzbot reported two use-of-uninitialized-memory bugs caused by this: BUG: KMSAN: uninit-value in sctp_inq_pop+0x15b7/0x1920 net/sctp/inqueue.c:211 sctp_inq_pop+0x15b7/0x1920 net/sctp/inqueue.c:211 sctp_assoc_bh_rcv+0x1a7/0xc50 net/sctp/associola.c:998 sctp_inq_push+0x2ef/0x380 net/sctp/inqueue.c:88 sctp_backlog_rcv+0x397/0xdb0 net/sctp/input.c:331 sk_backlog_rcv+0x13b/0x420 include/net/sock.h:1122 __release_sock+0x1da/0x330 net/core/sock.c:3106 release_sock+0x6b/0x250 net/core/sock.c:3660 sctp_wait_for_connect+0x487/0x820 net/sctp/socket.c:9360 sctp_sendmsg_to_asoc+0x1ec1/0x1f00 net/sctp/socket.c:1885 sctp_sendmsg+0x32b9/0x4a80 net/sctp/socket.c:2031 inet_sendmsg+0x25a/0x280 net/ipv4/af_inet.c:851 sock_sendmsg_nosec net/socket.c:718 [inline] and BUG: KMSAN: uninit-value in sctp_assoc_bh_rcv+0x34e/0xbc0 net/sctp/associola.c:987 sctp_assoc_bh_rcv+0x34e/0xbc0 net/sctp/associola.c:987 sctp_inq_push+0x2a3/0x350 net/sctp/inqueue.c:88 sctp_backlog_rcv+0x3c7/0xda0 net/sctp/input.c:331 sk_backlog_rcv+0x142/0x420 include/net/sock.h:1148 __release_sock+0x1d3/0x330 net/core/sock.c:3213 release_sock+0x6b/0x270 net/core/sock.c:3767 sctp_wait_for_connect+0x458/0x820 net/sctp/socket.c:9367 sctp_sendmsg_to_asoc+0x223a/0x2260 net/sctp/socket.c:1886 sctp_sendmsg+0x3910/0x49f0 net/sctp/socket.c:2032 inet_sendmsg+0x269/0x2a0 net/ipv4/af_inet.c:851 sock_sendmsg_nosec net/socket.c:712 [inline] This patch fixes it by linearizing cloned gso packets in sctp_rcv(). Fixes: 90017accff61 ("sctp: Add GSO support") Reported-by: syzbot+773e51afe420baaf0e2b@syzkaller.appspotmail.com Reported-by: syzbot+70a42f45e76bede082be@syzkaller.appspotmail.com Signed-off-by: Xin Long Reviewed-by: Marcelo Ricardo Leitner Link: https://patch.msgid.link/dd7dc337b99876d4132d0961f776913719f7d225.1754595611.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 2dc2666988fb..7e99894778d4 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -117,7 +117,7 @@ int sctp_rcv(struct sk_buff *skb) * it's better to just linearize it otherwise crc computing * takes longer. */ - if ((!is_gso && skb_linearize(skb)) || + if (((!is_gso || skb_cloned(skb)) && skb_linearize(skb)) || !pskb_may_pull(skb, sizeof(struct sctphdr))) goto discard_it; From 829f45f9d992019b49f08ab425ca11288b084aed Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Wed, 6 Aug 2025 17:54:53 -0700 Subject: [PATCH 1739/2411] net: dsa: microchip: Fix KSZ8863 reset problem ksz8873_valid_regs[] was added for register access for KSZ8863/KSZ8873 switches, but the reset register is not in the list so ksz8_reset_switch() does not take any effect. Replace regmap_update_bits() using ksz_regmap_8 with ksz_rmw8() so that an error message will be given if the register is not defined. A side effect of not resetting the switch is the static MAC table is not cleared. Further additions to the table will show write error as there are only 8 entries in the table. Fixes: d0dec3333040 ("net: dsa: microchip: Add register access control for KSZ8873 chip") Signed-off-by: Tristram Ha Reviewed-by: Oleksij Rempel Link: https://patch.msgid.link/20250807005453.8306-1-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz8.c | 20 +++++++++++--------- drivers/net/dsa/microchip/ksz_common.c | 1 + 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz8.c b/drivers/net/dsa/microchip/ksz8.c index 76e490070e9c..c354abdafc1b 100644 --- a/drivers/net/dsa/microchip/ksz8.c +++ b/drivers/net/dsa/microchip/ksz8.c @@ -36,15 +36,14 @@ static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set) { - regmap_update_bits(ksz_regmap_8(dev), addr, bits, set ? bits : 0); + ksz_rmw8(dev, addr, bits, set ? bits : 0); } static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits, bool set) { - regmap_update_bits(ksz_regmap_8(dev), - dev->dev_ops->get_port_addr(port, offset), - bits, set ? bits : 0); + ksz_rmw8(dev, dev->dev_ops->get_port_addr(port, offset), bits, + set ? bits : 0); } /** @@ -1955,16 +1954,19 @@ int ksz8_setup(struct dsa_switch *ds) ksz_cfg(dev, S_LINK_AGING_CTRL, SW_LINK_AUTO_AGING, true); /* Enable aggressive back off algorithm in half duplex mode. */ - regmap_update_bits(ksz_regmap_8(dev), REG_SW_CTRL_1, - SW_AGGR_BACKOFF, SW_AGGR_BACKOFF); + ret = ksz_rmw8(dev, REG_SW_CTRL_1, SW_AGGR_BACKOFF, SW_AGGR_BACKOFF); + if (ret) + return ret; /* * Make sure unicast VLAN boundary is set as default and * enable no excessive collision drop. */ - regmap_update_bits(ksz_regmap_8(dev), REG_SW_CTRL_2, - UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP, - UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP); + ret = ksz_rmw8(dev, REG_SW_CTRL_2, + UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP, + UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP); + if (ret) + return ret; ksz_cfg(dev, S_REPLACE_VID_CTRL, SW_REPLACE_VID, false); diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 7292bfe2f7ca..4cb14288ff0f 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -1447,6 +1447,7 @@ static const struct regmap_range ksz8873_valid_regs[] = { regmap_reg_range(0x3f, 0x3f), /* advanced control registers */ + regmap_reg_range(0x43, 0x43), regmap_reg_range(0x60, 0x6f), regmap_reg_range(0x70, 0x75), regmap_reg_range(0x76, 0x78), From 53898ebabe843bfa7baea9dae152797d5d0563c9 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 6 Aug 2025 14:37:25 -0700 Subject: [PATCH 1740/2411] net: lapbether: ignore ops-locked netdevs Syzkaller managed to trigger lock dependency in xsk_notify via register_netdevice. As discussed in [0], using register_netdevice in the notifiers is problematic so skip adding lapbeth for ops-locked devices. xsk_notifier+0xa4/0x280 net/xdp/xsk.c:1645 notifier_call_chain+0xbc/0x410 kernel/notifier.c:85 call_netdevice_notifiers_info+0xbe/0x140 net/core/dev.c:2230 call_netdevice_notifiers_extack net/core/dev.c:2268 [inline] call_netdevice_notifiers net/core/dev.c:2282 [inline] unregister_netdevice_many_notify+0xf9d/0x2700 net/core/dev.c:12077 unregister_netdevice_many net/core/dev.c:12140 [inline] unregister_netdevice_queue+0x305/0x3f0 net/core/dev.c:11984 register_netdevice+0x18f1/0x2270 net/core/dev.c:11149 lapbeth_new_device drivers/net/wan/lapbether.c:420 [inline] lapbeth_device_event+0x5b1/0xbe0 drivers/net/wan/lapbether.c:462 notifier_call_chain+0xbc/0x410 kernel/notifier.c:85 call_netdevice_notifiers_info+0xbe/0x140 net/core/dev.c:2230 call_netdevice_notifiers_extack net/core/dev.c:2268 [inline] call_netdevice_notifiers net/core/dev.c:2282 [inline] __dev_notify_flags+0x12c/0x2e0 net/core/dev.c:9497 netif_change_flags+0x108/0x160 net/core/dev.c:9526 dev_change_flags+0xba/0x250 net/core/dev_api.c:68 devinet_ioctl+0x11d5/0x1f50 net/ipv4/devinet.c:1200 inet_ioctl+0x3a7/0x3f0 net/ipv4/af_inet.c:1001 0: https://lore.kernel.org/netdev/20250625140357.6203d0af@kernel.org/ Fixes: 4c975fd70002 ("net: hold instance lock during NETDEV_REGISTER/UP") Suggested-by: Jakub Kicinski Reported-by: syzbot+e67ea9c235b13b4f0020@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e67ea9c235b13b4f0020 Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250806213726.1383379-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/wan/lapbether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 995a7207bdf8..f357a7ac70ac 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -81,7 +81,7 @@ static struct lapbethdev *lapbeth_get_x25_dev(struct net_device *dev) static __inline__ int dev_is_ethdev(struct net_device *dev) { - return dev->type == ARPHRD_ETHER && strncmp(dev->name, "dummy", 5); + return dev->type == ARPHRD_ETHER && !netdev_need_ops_lock(dev); } /* ------------------------------------------------------------------------ */ From c64237960819aee1766d03f446ae6de94b1e3f73 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 6 Aug 2025 14:37:26 -0700 Subject: [PATCH 1741/2411] hamradio: ignore ops-locked netdevs Syzkaller managed to trigger lock dependency in xsk_notify via register_netdevice. As discussed in [0], using register_netdevice in the notifiers is problematic so skip adding hamradio for ops-locked devices. xsk_notifier+0x89/0x230 net/xdp/xsk.c:1664 notifier_call_chain+0x1b6/0x3e0 kernel/notifier.c:85 call_netdevice_notifiers_extack net/core/dev.c:2267 [inline] call_netdevice_notifiers net/core/dev.c:2281 [inline] unregister_netdevice_many_notify+0x14d7/0x1ff0 net/core/dev.c:12156 unregister_netdevice_many net/core/dev.c:12219 [inline] unregister_netdevice_queue+0x33c/0x380 net/core/dev.c:12063 register_netdevice+0x1689/0x1ae0 net/core/dev.c:11241 bpq_new_device drivers/net/hamradio/bpqether.c:481 [inline] bpq_device_event+0x491/0x600 drivers/net/hamradio/bpqether.c:523 notifier_call_chain+0x1b6/0x3e0 kernel/notifier.c:85 call_netdevice_notifiers_extack net/core/dev.c:2267 [inline] call_netdevice_notifiers net/core/dev.c:2281 [inline] __dev_notify_flags+0x18d/0x2e0 net/core/dev.c:-1 netif_change_flags+0xe8/0x1a0 net/core/dev.c:9608 dev_change_flags+0x130/0x260 net/core/dev_api.c:68 devinet_ioctl+0xbb4/0x1b50 net/ipv4/devinet.c:1200 inet_ioctl+0x3c0/0x4c0 net/ipv4/af_inet.c:1001 0: https://lore.kernel.org/netdev/20250625140357.6203d0af@kernel.org/ Fixes: 4c975fd70002 ("net: hold instance lock during NETDEV_REGISTER/UP") Suggested-by: Jakub Kicinski Reported-by: syzbot+e6300f66a999a6612477@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e6300f66a999a6612477 Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250806213726.1383379-2-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/hamradio/bpqether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 0e0fe32d2da4..045c5177262e 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -138,7 +138,7 @@ static inline struct net_device *bpq_get_ax25_dev(struct net_device *dev) static inline int dev_is_ethdev(struct net_device *dev) { - return dev->type == ARPHRD_ETHER && strncmp(dev->name, "dummy", 5); + return dev->type == ARPHRD_ETHER && !netdev_need_ops_lock(dev); } /* ------------------------------------------------------------------------ */ From 33caa208dba6fa639e8a92fd0c8320b652e5550c Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Wed, 6 Aug 2025 13:21:51 -0700 Subject: [PATCH 1742/2411] hv_netvsc: Fix panic during namespace deletion with VF The existing code move the VF NIC to new namespace when NETDEV_REGISTER is received on netvsc NIC. During deletion of the namespace, default_device_exit_batch() >> default_device_exit_net() is called. When netvsc NIC is moved back and registered to the default namespace, it automatically brings VF NIC back to the default namespace. This will cause the default_device_exit_net() >> for_each_netdev_safe loop unable to detect the list end, and hit NULL ptr: [ 231.449420] mana 7870:00:00.0 enP30832s1: Moved VF to namespace with: eth0 [ 231.449656] BUG: kernel NULL pointer dereference, address: 0000000000000010 [ 231.450246] #PF: supervisor read access in kernel mode [ 231.450579] #PF: error_code(0x0000) - not-present page [ 231.450916] PGD 17b8a8067 P4D 0 [ 231.451163] Oops: Oops: 0000 [#1] SMP NOPTI [ 231.451450] CPU: 82 UID: 0 PID: 1394 Comm: kworker/u768:1 Not tainted 6.16.0-rc4+ #3 VOLUNTARY [ 231.452042] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 11/21/2024 [ 231.452692] Workqueue: netns cleanup_net [ 231.452947] RIP: 0010:default_device_exit_batch+0x16c/0x3f0 [ 231.453326] Code: c0 0c f5 b3 e8 d5 db fe ff 48 85 c0 74 15 48 c7 c2 f8 fd ca b2 be 10 00 00 00 48 8d 7d c0 e8 7b 77 25 00 49 8b 86 28 01 00 00 <48> 8b 50 10 4c 8b 2a 4c 8d 62 f0 49 83 ed 10 4c 39 e0 0f 84 d6 00 [ 231.454294] RSP: 0018:ff75fc7c9bf9fd00 EFLAGS: 00010246 [ 231.454610] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 61c8864680b583eb [ 231.455094] RDX: ff1fa9f71462d800 RSI: ff75fc7c9bf9fd38 RDI: 0000000030766564 [ 231.455686] RBP: ff75fc7c9bf9fd78 R08: 0000000000000000 R09: 0000000000000000 [ 231.456126] R10: 0000000000000001 R11: 0000000000000004 R12: ff1fa9f70088e340 [ 231.456621] R13: ff1fa9f70088e340 R14: ffffffffb3f50c20 R15: ff1fa9f7103e6340 [ 231.457161] FS: 0000000000000000(0000) GS:ff1faa6783a08000(0000) knlGS:0000000000000000 [ 231.457707] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 231.458031] CR2: 0000000000000010 CR3: 0000000179ab2006 CR4: 0000000000b73ef0 [ 231.458434] Call Trace: [ 231.458600] [ 231.458777] ops_undo_list+0x100/0x220 [ 231.459015] cleanup_net+0x1b8/0x300 [ 231.459285] process_one_work+0x184/0x340 To fix it, move the ns change to a workqueue, and take rtnl_lock to avoid changing the netdev list when default_device_exit_net() is using it. Cc: stable@vger.kernel.org Fixes: 4c262801ea60 ("hv_netvsc: Fix VF namespace also in synthetic NIC NETDEV_REGISTER event") Signed-off-by: Haiyang Zhang Link: https://patch.msgid.link/1754511711-11188-1-git-send-email-haiyangz@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/hyperv_net.h | 3 +++ drivers/net/hyperv/netvsc_drv.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index cb6f5482d203..7397c693f984 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -1061,6 +1061,7 @@ struct net_device_context { struct net_device __rcu *vf_netdev; struct netvsc_vf_pcpu_stats __percpu *vf_stats; struct delayed_work vf_takeover; + struct delayed_work vfns_work; /* 1: allocated, serial number is valid. 0: not allocated */ u32 vf_alloc; @@ -1075,6 +1076,8 @@ struct net_device_context { struct netvsc_device_info *saved_netvsc_dev_info; }; +void netvsc_vfns_work(struct work_struct *w); + /* Azure hosts don't support non-TCP port numbers in hashing for fragmented * packets. We can use ethtool to change UDP hash level when necessary. */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f44753756358..39c892e46cb0 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2522,6 +2522,7 @@ static int netvsc_probe(struct hv_device *dev, spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup); + INIT_DELAYED_WORK(&net_device_ctx->vfns_work, netvsc_vfns_work); net_device_ctx->vf_stats = netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats); @@ -2666,6 +2667,8 @@ static void netvsc_remove(struct hv_device *dev) cancel_delayed_work_sync(&ndev_ctx->dwork); rtnl_lock(); + cancel_delayed_work_sync(&ndev_ctx->vfns_work); + nvdev = rtnl_dereference(ndev_ctx->nvdev); if (nvdev) { cancel_work_sync(&nvdev->subchan_work); @@ -2707,6 +2710,7 @@ static int netvsc_suspend(struct hv_device *dev) cancel_delayed_work_sync(&ndev_ctx->dwork); rtnl_lock(); + cancel_delayed_work_sync(&ndev_ctx->vfns_work); nvdev = rtnl_dereference(ndev_ctx->nvdev); if (nvdev == NULL) { @@ -2800,6 +2804,27 @@ static void netvsc_event_set_vf_ns(struct net_device *ndev) } } +void netvsc_vfns_work(struct work_struct *w) +{ + struct net_device_context *ndev_ctx = + container_of(w, struct net_device_context, vfns_work.work); + struct net_device *ndev; + + if (!rtnl_trylock()) { + schedule_delayed_work(&ndev_ctx->vfns_work, 1); + return; + } + + ndev = hv_get_drvdata(ndev_ctx->device_ctx); + if (!ndev) + goto out; + + netvsc_event_set_vf_ns(ndev); + +out: + rtnl_unlock(); +} + /* * On Hyper-V, every VF interface is matched with a corresponding * synthetic interface. The synthetic interface is presented first @@ -2810,10 +2835,12 @@ static int netvsc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + struct net_device_context *ndev_ctx; int ret = 0; if (event_dev->netdev_ops == &device_ops && event == NETDEV_REGISTER) { - netvsc_event_set_vf_ns(event_dev); + ndev_ctx = netdev_priv(event_dev); + schedule_delayed_work(&ndev_ctx->vfns_work, 0); return NOTIFY_DONE; } From bb8aeaa3191b617c6faf8ae937252e059673b7ea Mon Sep 17 00:00:00 2001 From: Fanhua Li Date: Mon, 28 Jul 2025 19:50:27 +0800 Subject: [PATCH 1743/2411] drm/nouveau/nvif: Fix potential memory leak in nvif_vmm_ctor(). When the nvif_vmm_type is invalid, we will return error directly without freeing the args in nvif_vmm_ctor(), which leading a memory leak. Fix it by setting the ret -EINVAL and goto done. Reported-by: kernel test robot Closes: https://lore.kernel.org/all/202312040659.4pJpMafN-lkp@intel.com/ Fixes: 6b252cf42281 ("drm/nouveau: nvkm/vmm: implement raw ops to manage uvmm") Signed-off-by: Fanhua Li Link: https://lore.kernel.org/r/20250728115027.50878-1-lifanhua5@huawei.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nvif/vmm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvif/vmm.c b/drivers/gpu/drm/nouveau/nvif/vmm.c index 99296f03371a..07c1ebc2a941 100644 --- a/drivers/gpu/drm/nouveau/nvif/vmm.c +++ b/drivers/gpu/drm/nouveau/nvif/vmm.c @@ -219,7 +219,8 @@ nvif_vmm_ctor(struct nvif_mmu *mmu, const char *name, s32 oclass, case RAW: args->type = NVIF_VMM_V0_TYPE_RAW; break; default: WARN_ON(1); - return -EINVAL; + ret = -EINVAL; + goto done; } memcpy(args->data, argv, argc); From 01c2afe7358385a5381835293dfb6901f11b1691 Mon Sep 17 00:00:00 2001 From: Madhur Kumar Date: Fri, 8 Aug 2025 13:08:40 +0530 Subject: [PATCH 1744/2411] drm/nouveau: fix typos in comments Fixed three spelling mistakes in nouveau_exec.c comments: - alloctor -> allocator - exectued -> executed - depent -> depend No functional changes. Fixes: b88baab82871 ("drm/nouveau: implement new VM_BIND uAPI") Signed-off-by: Madhur Kumar Link: https://lore.kernel.org/r/20250808073840.376764-1-madhurkumar004@gmail.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_exec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c index 41b7c608c905..46294134f294 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.c +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c @@ -60,14 +60,14 @@ * virtual address in the GPU's VA space there is no guarantee that the actual * mappings are created in the GPU's MMU. If the given memory is swapped out * at the time the bind operation is executed the kernel will stash the mapping - * details into it's internal alloctor and create the actual MMU mappings once + * details into it's internal allocator and create the actual MMU mappings once * the memory is swapped back in. While this is transparent for userspace, it is * guaranteed that all the backing memory is swapped back in and all the memory * mappings, as requested by userspace previously, are actually mapped once the * DRM_NOUVEAU_EXEC ioctl is called to submit an exec job. * * A VM_BIND job can be executed either synchronously or asynchronously. If - * exectued asynchronously, userspace may provide a list of syncobjs this job + * executed asynchronously, userspace may provide a list of syncobjs this job * will wait for and/or a list of syncobj the kernel will signal once the * VM_BIND job finished execution. If executed synchronously the ioctl will * block until the bind job is finished. For synchronous jobs the kernel will @@ -82,7 +82,7 @@ * Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have * an up to date view of the VA space. However, the actual mappings might still * be pending. Hence, EXEC jobs require to have the particular fences - of - * the corresponding VM_BIND jobs they depent on - attached to them. + * the corresponding VM_BIND jobs they depend on - attached to them. */ static int From d240b441b5cbb389f90fce37edb9ef76a3c9a42b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Mon, 23 Jun 2025 13:24:25 -0700 Subject: [PATCH 1745/2411] tools/power turbostat.8: Document Totl%C0, Any%C0, GFX%C0, CPUGFX% columns Explain the meaning of the Totl%C0, Any%C0, GFX%C0, CPUGFX% columns. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index fb11108aaf42..db3888b8af12 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -186,6 +186,14 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBSAMAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used. .PP +\fBTotl%C0\fP Weighted percentage of time that CPUs are busy. If N CPUs are busy during an interval, the percentage is N * 100%. +.PP +\fBAny%C0\fP Percentage of time that at least one CPU is busy. +.PP +\fBGFX%C0\fP Percentage of time that at least one GFX compute engine is busy. +.PP +\fBCPUGFX%\fP Percentage of time that at least one CPU is busy at the same time as at least one Graphics compute enginer is busy. +.PP \fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters. .PP \fBPkgWatt\fP Watts consumed by the whole package. From 65f97cc81b0adc5f49cf6cff5d874be0058e3f41 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 6 Aug 2025 13:24:28 -0400 Subject: [PATCH 1746/2411] cgroup/cpuset: Use static_branch_enable_cpuslocked() on cpusets_insane_config_key The following lockdep splat was observed. [ 812.359086] ============================================ [ 812.359089] WARNING: possible recursive locking detected [ 812.359097] -------------------------------------------- [ 812.359100] runtest.sh/30042 is trying to acquire lock: [ 812.359105] ffffffffa7f27420 (cpu_hotplug_lock){++++}-{0:0}, at: static_key_enable+0xe/0x20 [ 812.359131] [ 812.359131] but task is already holding lock: [ 812.359134] ffffffffa7f27420 (cpu_hotplug_lock){++++}-{0:0}, at: cpuset_write_resmask+0x98/0xa70 : [ 812.359267] Call Trace: [ 812.359272] [ 812.359367] cpus_read_lock+0x3c/0xe0 [ 812.359382] static_key_enable+0xe/0x20 [ 812.359389] check_insane_mems_config.part.0+0x11/0x30 [ 812.359398] cpuset_write_resmask+0x9f2/0xa70 [ 812.359411] cgroup_file_write+0x1c7/0x660 [ 812.359467] kernfs_fop_write_iter+0x358/0x530 [ 812.359479] vfs_write+0xabe/0x1250 [ 812.359529] ksys_write+0xf9/0x1d0 [ 812.359558] do_syscall_64+0x5f/0xe0 Since commit d74b27d63a8b ("cgroup/cpuset: Change cpuset_rwsem and hotplug lock order"), the ordering of cpu hotplug lock and cpuset_mutex had been reversed. That patch correctly used the cpuslocked version of the static branch API to enable cpusets_pre_enable_key and cpusets_enabled_key, but it didn't do the same for cpusets_insane_config_key. The cpusets_insane_config_key can be enabled in the check_insane_mems_config() which is called from update_nodemask() or cpuset_hotplug_update_tasks() with both cpu hotplug lock and cpuset_mutex held. Deadlock can happen with a pending hotplug event that tries to acquire the cpu hotplug write lock which will block further cpus_read_lock() attempt from check_insane_mems_config(). Fix that by switching to use static_branch_enable_cpuslocked(). Fixes: d74b27d63a8b ("cgroup/cpuset: Change cpuset_rwsem and hotplug lock order") Signed-off-by: Waiman Long Reviewed-by: Juri Lelli Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index f74d04429a29..bf149246e001 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -280,7 +280,7 @@ static inline void check_insane_mems_config(nodemask_t *nodes) { if (!cpusets_insane_config() && movable_only_nodes(nodes)) { - static_branch_enable(&cpusets_insane_config_key); + static_branch_enable_cpuslocked(&cpusets_insane_config_key); pr_info("Unsupported (movable nodes only) cpuset configuration detected (nmask=%*pbl)!\n" "Cpuset allocations might fail even with a lot of memory available.\n", nodemask_pr_args(nodes)); From 150e298ae0ccbecff2357a72fbabd80f8849ea6e Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 6 Aug 2025 13:24:29 -0400 Subject: [PATCH 1747/2411] cgroup/cpuset: Fix a partition error with CPU hotplug It was found during testing that an invalid leaf partition with an empty effective exclusive CPU list can become a valid empty partition with no CPU afer an offline/online operation of an unrelated CPU. An empty partition root is allowed in the special case that it has no task in its cgroup and has distributed out all its CPUs to its child partitions. That is certainly not the case here. The problem is in the cpumask_subsets() test in the hotplug case (update with no new mask) of update_parent_effective_cpumask() as it also returns true if the effective exclusive CPU list is empty. Fix that by addding the cpumask_empty() test to root out this exception case. Also add the cpumask_empty() test in cpuset_hotplug_update_tasks() to avoid calling update_parent_effective_cpumask() for this special case. Fixes: 0c7f293efc87 ("cgroup/cpuset: Add cpuset.cpus.exclusive.effective for v2") Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index bf149246e001..d993e058a663 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1843,7 +1843,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, if (is_partition_valid(cs)) adding = cpumask_and(tmp->addmask, xcpus, parent->effective_xcpus); - } else if (is_partition_invalid(cs) && + } else if (is_partition_invalid(cs) && !cpumask_empty(xcpus) && cpumask_subset(xcpus, parent->effective_xcpus)) { struct cgroup_subsys_state *css; struct cpuset *child; @@ -3870,9 +3870,10 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) partcmd = partcmd_invalidate; /* * On the other hand, an invalid partition root may be transitioned - * back to a regular one. + * back to a regular one with a non-empty effective xcpus. */ - else if (is_partition_valid(parent) && is_partition_invalid(cs)) + else if (is_partition_valid(parent) && is_partition_invalid(cs) && + !cpumask_empty(cs->effective_xcpus)) partcmd = partcmd_update; if (partcmd >= 0) { From 87eba5bc5ab1d99e31c9d3b2c386187da94a5ab1 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 6 Aug 2025 13:24:30 -0400 Subject: [PATCH 1748/2411] cgroup/cpuset: Remove the unnecessary css_get/put() in cpuset_partition_write() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The css_get/put() calls in cpuset_partition_write() are unnecessary as an active reference of the kernfs node will be taken which will prevent its removal and guarantee the existence of the css. Only the online check is needed. Signed-off-by: Waiman Long Reviewed-by: Michal Koutný Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index d993e058a663..27adb04df675 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3358,14 +3358,12 @@ static ssize_t cpuset_partition_write(struct kernfs_open_file *of, char *buf, else return -EINVAL; - css_get(&cs->css); cpus_read_lock(); mutex_lock(&cpuset_mutex); if (is_cpuset_online(cs)) retval = update_prstate(cs, val); mutex_unlock(&cpuset_mutex); cpus_read_unlock(); - css_put(&cs->css); return retval ?: nbytes; } From eea51c6e3f6675b795f6439eaa960eb2948d6905 Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Wed, 6 Aug 2025 17:33:50 -0700 Subject: [PATCH 1749/2411] cgroup: avoid null de-ref in css_rstat_exit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit css_rstat_exit() may be called asynchronously in scenarios where preceding calls to css_rstat_init() have not completed. One such example is this sequence below: css_create(...) { ... init_and_link_css(css, ...); err = percpu_ref_init(...); if (err) goto err_free_css; err = cgroup_idr_alloc(...); if (err) goto err_free_css; err = css_rstat_init(css, ...); if (err) goto err_free_css; ... err_free_css: INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); return ERR_PTR(err); } If any of the three goto jumps are taken, async cleanup will begin and css_rstat_exit() will be invoked on an uninitialized css->rstat_cpu. Avoid accessing the unitialized field by returning early in css_rstat_exit() if this is the case. Signed-off-by: JP Kobryn Suggested-by: Michal Koutný Fixes: 5da3bfa029d68 ("cgroup: use separate rstat trees for each subsystem") Cc: stable@vger.kernel.org # v6.16 Reported-by: syzbot+8d052e8b99e40bc625ed@syzkaller.appspotmail.com Acked-by: Shakeel Butt Signed-off-by: Tejun Heo --- kernel/cgroup/rstat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 981e2f77ad4e..a198e40c799b 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -479,6 +479,9 @@ void css_rstat_exit(struct cgroup_subsys_state *css) if (!css_uses_rstat(css)) return; + if (!css->rstat_cpu) + return; + css_rstat_flush(css); /* sanity check */ From 8d14a098b47cc7e5cfa703b9e015d6ca1074489a Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 12 Jul 2025 16:16:56 -0400 Subject: [PATCH 1750/2411] tools/power turbostat: Support more than 64 built-in-counters We have out-grown the ability to use a 64-bit memory location to inventory every possible built-in counter. Leverage the the CPU_SET(3) macros to break this barrier. Also, break the Joules & Watts counters into two, since we can no longer 'or' them together... Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 552 +++++++++++++++++++------- 1 file changed, 402 insertions(+), 150 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4cb3f1aa3a88..ee948e671741 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -210,91 +210,236 @@ struct msr_counter bic[] = { { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 }, }; -#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) -#define BIC_USEC (1ULL << 0) -#define BIC_TOD (1ULL << 1) -#define BIC_Package (1ULL << 2) -#define BIC_Node (1ULL << 3) -#define BIC_Avg_MHz (1ULL << 4) -#define BIC_Busy (1ULL << 5) -#define BIC_Bzy_MHz (1ULL << 6) -#define BIC_TSC_MHz (1ULL << 7) -#define BIC_IRQ (1ULL << 8) -#define BIC_SMI (1ULL << 9) -#define BIC_cpuidle (1ULL << 10) -#define BIC_CPU_c1 (1ULL << 11) -#define BIC_CPU_c3 (1ULL << 12) -#define BIC_CPU_c6 (1ULL << 13) -#define BIC_CPU_c7 (1ULL << 14) -#define BIC_ThreadC (1ULL << 15) -#define BIC_CoreTmp (1ULL << 16) -#define BIC_CoreCnt (1ULL << 17) -#define BIC_PkgTmp (1ULL << 18) -#define BIC_GFX_rc6 (1ULL << 19) -#define BIC_GFXMHz (1ULL << 20) -#define BIC_Pkgpc2 (1ULL << 21) -#define BIC_Pkgpc3 (1ULL << 22) -#define BIC_Pkgpc6 (1ULL << 23) -#define BIC_Pkgpc7 (1ULL << 24) -#define BIC_Pkgpc8 (1ULL << 25) -#define BIC_Pkgpc9 (1ULL << 26) -#define BIC_Pkgpc10 (1ULL << 27) -#define BIC_CPU_LPI (1ULL << 28) -#define BIC_SYS_LPI (1ULL << 29) -#define BIC_PkgWatt (1ULL << 30) -#define BIC_CorWatt (1ULL << 31) -#define BIC_GFXWatt (1ULL << 32) -#define BIC_PkgCnt (1ULL << 33) -#define BIC_RAMWatt (1ULL << 34) -#define BIC_PKG__ (1ULL << 35) -#define BIC_RAM__ (1ULL << 36) -#define BIC_Pkg_J (1ULL << 37) -#define BIC_Cor_J (1ULL << 38) -#define BIC_GFX_J (1ULL << 39) -#define BIC_RAM_J (1ULL << 40) -#define BIC_Mod_c6 (1ULL << 41) -#define BIC_Totl_c0 (1ULL << 42) -#define BIC_Any_c0 (1ULL << 43) -#define BIC_GFX_c0 (1ULL << 44) -#define BIC_CPUGFX (1ULL << 45) -#define BIC_Core (1ULL << 46) -#define BIC_CPU (1ULL << 47) -#define BIC_APIC (1ULL << 48) -#define BIC_X2APIC (1ULL << 49) -#define BIC_Die (1ULL << 50) -#define BIC_GFXACTMHz (1ULL << 51) -#define BIC_IPC (1ULL << 52) -#define BIC_CORE_THROT_CNT (1ULL << 53) -#define BIC_UNCORE_MHZ (1ULL << 54) -#define BIC_SAM_mc6 (1ULL << 55) -#define BIC_SAMMHz (1ULL << 56) -#define BIC_SAMACTMHz (1ULL << 57) -#define BIC_Diec6 (1ULL << 58) -#define BIC_SysWatt (1ULL << 59) -#define BIC_Sys_J (1ULL << 60) -#define BIC_NMI (1ULL << 61) -#define BIC_CPU_c1e (1ULL << 62) -#define BIC_pct_idle (1ULL << 63) +/* n.b. bic_names must match the order in bic[], above */ +enum bic_names { + BIC_USEC, + BIC_TOD, + BIC_Package, + BIC_Node, + BIC_Avg_MHz, + BIC_Busy, + BIC_Bzy_MHz, + BIC_TSC_MHz, + BIC_IRQ, + BIC_SMI, + BIC_cpuidle, + BIC_CPU_c1, + BIC_CPU_c3, + BIC_CPU_c6, + BIC_CPU_c7, + BIC_ThreadC, + BIC_CoreTmp, + BIC_CoreCnt, + BIC_PkgTmp, + BIC_GFX_rc6, + BIC_GFXMHz, + BIC_Pkgpc2, + BIC_Pkgpc3, + BIC_Pkgpc6, + BIC_Pkgpc7, + BIC_Pkgpc8, + BIC_Pkgpc9, + BIC_Pkgpc10, + BIC_CPU_LPI, + BIC_SYS_LPI, + BIC_PkgWatt, + BIC_CorWatt, + BIC_GFXWatt, + BIC_PkgCnt, + BIC_RAMWatt, + BIC_PKG__, + BIC_RAM__, + BIC_Pkg_J, + BIC_Cor_J, + BIC_GFX_J, + BIC_RAM_J, + BIC_Mod_c6, + BIC_Totl_c0, + BIC_Any_c0, + BIC_GFX_c0, + BIC_CPUGFX, + BIC_Core, + BIC_CPU, + BIC_APIC, + BIC_X2APIC, + BIC_Die, + BIC_GFXACTMHz, + BIC_IPC, + BIC_CORE_THROT_CNT, + BIC_UNCORE_MHZ, + BIC_SAM_mc6, + BIC_SAMMHz, + BIC_SAMACTMHz, + BIC_Diec6, + BIC_SysWatt, + BIC_Sys_J, + BIC_NMI, + BIC_CPU_c1e, + BIC_pct_idle, + MAX_BIC +}; -#define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) -#define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) -#define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_GROUP_HW_IDLE (BIC_Busy | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) -#define BIC_GROUP_SW_IDLE (BIC_Busy | BIC_cpuidle | BIC_pct_idle ) -#define BIC_GROUP_IDLE (BIC_GROUP_HW_IDLE | BIC_pct_idle) -#define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) +void print_bic_set(char *s, cpu_set_t *set) +{ + int i; -#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_cpuidle) + assert(MAX_BIC < CPU_SETSIZE); -unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); -unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_cpuidle | BIC_pct_idle | BIC_APIC | BIC_X2APIC; + printf("%s:", s); -#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) -#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) -#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) -#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) -#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) -#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT) + for (i = 0; i <= MAX_BIC; ++i) { + + if (CPU_ISSET(i, set)) { + assert(i < MAX_BIC); + printf(" %s", bic[i].name); + } + } + putchar('\n'); +} + +static cpu_set_t bic_group_topology; +static cpu_set_t bic_group_thermal_pwr; +static cpu_set_t bic_group_frequency; +static cpu_set_t bic_group_hw_idle; +static cpu_set_t bic_group_sw_idle; +static cpu_set_t bic_group_idle; +static cpu_set_t bic_group_other; +static cpu_set_t bic_group_disabled_by_default; +static cpu_set_t bic_enabled; +static cpu_set_t bic_present; + +/* modify */ +#define BIC_INIT(set) CPU_ZERO(set) + +#define SET_BIC(COUNTER_NUMBER, set) CPU_SET(COUNTER_NUMBER, set) +#define CLR_BIC(COUNTER_NUMBER, set) CPU_CLR(COUNTER_NUMBER, set) + +#define BIC_PRESENT(COUNTER_NUMBER) SET_BIC(COUNTER_NUMBER, &bic_present) +#define BIC_NOT_PRESENT(COUNTER_NUMBER) CPU_CLR(COUNTER_NUMBER, &bic_present) + +/* test */ +#define BIC_IS_ENABLED(COUNTER_NUMBER) CPU_ISSET(COUNTER_NUMBER, &bic_enabled) +#define DO_BIC_READ(COUNTER_NUMBER) CPU_ISSET(COUNTER_NUMBER, &bic_present) +#define DO_BIC(COUNTER_NUMBER) (CPU_ISSET(COUNTER_NUMBER, &bic_enabled) && CPU_ISSET(COUNTER_NUMBER, &bic_present)) + +static void bic_set_all(cpu_set_t *set) +{ + int i; + + assert(MAX_BIC < CPU_SETSIZE); + + for (i = 0; i < MAX_BIC; ++i) + SET_BIC(i, set); +} + +/* + * bic_clear_bits() + * clear all the bits from "clr" in "dst" + */ +static void bic_clear_bits(cpu_set_t *dst, cpu_set_t *clr) +{ + int i; + + assert(MAX_BIC < CPU_SETSIZE); + + for (i = 0; i < MAX_BIC; ++i) + if (CPU_ISSET(i, clr)) + CLR_BIC(i, dst); +} + +static void bic_groups_init(void) +{ + BIC_INIT(&bic_group_topology); + SET_BIC(BIC_Package, &bic_group_topology); + SET_BIC(BIC_Node, &bic_group_topology); + SET_BIC(BIC_CoreCnt, &bic_group_topology); + SET_BIC(BIC_PkgCnt, &bic_group_topology); + SET_BIC(BIC_Core, &bic_group_topology); + SET_BIC(BIC_CPU, &bic_group_topology); + SET_BIC(BIC_Die, &bic_group_topology); + + BIC_INIT(&bic_group_thermal_pwr); + SET_BIC(BIC_CoreTmp, &bic_group_thermal_pwr); + SET_BIC(BIC_PkgTmp, &bic_group_thermal_pwr); + SET_BIC(BIC_PkgWatt, &bic_group_thermal_pwr); + SET_BIC(BIC_CorWatt, &bic_group_thermal_pwr); + SET_BIC(BIC_GFXWatt, &bic_group_thermal_pwr); + SET_BIC(BIC_RAMWatt, &bic_group_thermal_pwr); + SET_BIC(BIC_PKG__, &bic_group_thermal_pwr); + SET_BIC(BIC_RAM__, &bic_group_thermal_pwr); + SET_BIC(BIC_SysWatt, &bic_group_thermal_pwr); + + BIC_INIT(&bic_group_frequency); + SET_BIC(BIC_Avg_MHz, &bic_group_frequency); + SET_BIC(BIC_Busy, &bic_group_frequency); + SET_BIC(BIC_Bzy_MHz, &bic_group_frequency); + SET_BIC(BIC_TSC_MHz, &bic_group_frequency); + SET_BIC(BIC_GFXMHz, &bic_group_frequency); + SET_BIC(BIC_GFXACTMHz, &bic_group_frequency); + SET_BIC(BIC_SAMMHz, &bic_group_frequency); + SET_BIC(BIC_SAMACTMHz, &bic_group_frequency); + SET_BIC(BIC_UNCORE_MHZ, &bic_group_frequency); + + BIC_INIT(&bic_group_hw_idle); + SET_BIC(BIC_Busy, &bic_group_hw_idle); + SET_BIC(BIC_CPU_c1, &bic_group_hw_idle); + SET_BIC(BIC_CPU_c3, &bic_group_hw_idle); + SET_BIC(BIC_CPU_c6, &bic_group_hw_idle); + SET_BIC(BIC_CPU_c7, &bic_group_hw_idle); + SET_BIC(BIC_GFX_rc6, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc2, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc3, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc6, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc7, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc8, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc9, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc10, &bic_group_hw_idle); + SET_BIC(BIC_CPU_LPI, &bic_group_hw_idle); + SET_BIC(BIC_SYS_LPI, &bic_group_hw_idle); + SET_BIC(BIC_Mod_c6, &bic_group_hw_idle); + SET_BIC(BIC_Totl_c0, &bic_group_hw_idle); + SET_BIC(BIC_Any_c0, &bic_group_hw_idle); + SET_BIC(BIC_GFX_c0, &bic_group_hw_idle); + SET_BIC(BIC_CPUGFX, &bic_group_hw_idle); + SET_BIC(BIC_SAM_mc6, &bic_group_hw_idle); + SET_BIC(BIC_Diec6, &bic_group_hw_idle); + + BIC_INIT(&bic_group_sw_idle); + SET_BIC(BIC_Busy, &bic_group_sw_idle); + SET_BIC(BIC_cpuidle, &bic_group_sw_idle); + SET_BIC(BIC_pct_idle, &bic_group_sw_idle); + + BIC_INIT(&bic_group_idle); + CPU_OR(&bic_group_idle, &bic_group_idle, &bic_group_hw_idle); + SET_BIC(BIC_pct_idle, &bic_group_idle); + + BIC_INIT(&bic_group_other); + SET_BIC(BIC_IRQ, &bic_group_other); + SET_BIC(BIC_NMI, &bic_group_other); + SET_BIC(BIC_SMI, &bic_group_other); + SET_BIC(BIC_ThreadC, &bic_group_other); + SET_BIC(BIC_CoreTmp, &bic_group_other); + SET_BIC(BIC_IPC, &bic_group_other); + + BIC_INIT(&bic_group_disabled_by_default); + SET_BIC(BIC_USEC, &bic_group_disabled_by_default); + SET_BIC(BIC_TOD, &bic_group_disabled_by_default); + SET_BIC(BIC_cpuidle, &bic_group_disabled_by_default); + SET_BIC(BIC_APIC, &bic_group_disabled_by_default); + SET_BIC(BIC_X2APIC, &bic_group_disabled_by_default); + + BIC_INIT(&bic_enabled); + bic_set_all(&bic_enabled); + bic_clear_bits(&bic_enabled, &bic_group_disabled_by_default); + + BIC_INIT(&bic_present); + SET_BIC(BIC_USEC, &bic_present); + SET_BIC(BIC_TOD, &bic_present); + SET_BIC(BIC_cpuidle, &bic_present); + SET_BIC(BIC_APIC, &bic_present); + SET_BIC(BIC_X2APIC, &bic_present); + SET_BIC(BIC_pct_idle, &bic_present); +} /* * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit: @@ -1205,7 +1350,7 @@ struct rapl_counter_arch_info { int msr_shift; /* Positive mean shift right, negative mean shift left */ double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */ unsigned int rci_index; /* Maps data from perf counters to global variables */ - unsigned long long bic; + unsigned int bic_number; double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */ unsigned long long flags; }; @@ -1220,7 +1365,20 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_energy_units, .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, - .bic = BIC_PkgWatt | BIC_Pkg_J, + .bic_number = BIC_PkgWatt, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_PKG, + .perf_subsys = "power", + .perf_name = "energy-pkg", + .msr = MSR_PKG_ENERGY_STATUS, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, + .bic_number = BIC_Pkg_J, .compat_scale = 1.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1233,7 +1391,20 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_energy_units, .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, - .bic = BIC_PkgWatt | BIC_Pkg_J, + .bic_number = BIC_PkgWatt, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_AMD_F17H, + .perf_subsys = "power", + .perf_name = "energy-pkg", + .msr = MSR_PKG_ENERGY_STAT, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, + .bic_number = BIC_Pkg_J, .compat_scale = 1.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1246,7 +1417,20 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_energy_units, .rci_index = RAPL_RCI_INDEX_ENERGY_CORES, - .bic = BIC_CorWatt | BIC_Cor_J, + .bic_number = BIC_CorWatt, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_CORE_ENERGY_STATUS, + .perf_subsys = "power", + .perf_name = "energy-cores", + .msr = MSR_PP0_ENERGY_STATUS, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_ENERGY_CORES, + .bic_number = BIC_Cor_J, .compat_scale = 1.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1259,7 +1443,20 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_dram_energy_units, .rci_index = RAPL_RCI_INDEX_DRAM, - .bic = BIC_RAMWatt | BIC_RAM_J, + .bic_number = BIC_RAMWatt, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_DRAM, + .perf_subsys = "power", + .perf_name = "energy-ram", + .msr = MSR_DRAM_ENERGY_STATUS, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_dram_energy_units, + .rci_index = RAPL_RCI_INDEX_DRAM, + .bic_number = BIC_RAM_J, .compat_scale = 1.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1272,7 +1469,20 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_energy_units, .rci_index = RAPL_RCI_INDEX_GFX, - .bic = BIC_GFXWatt | BIC_GFX_J, + .bic_number = BIC_GFXWatt, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_GFX, + .perf_subsys = "power", + .perf_name = "energy-gpu", + .msr = MSR_PP1_ENERGY_STATUS, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_GFX, + .bic_number = BIC_GFX_J, .compat_scale = 1.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1285,7 +1495,7 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_time_units, .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS, - .bic = BIC_PKG__, + .bic_number = BIC_PKG__, .compat_scale = 100.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1298,7 +1508,7 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_time_units, .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS, - .bic = BIC_RAM__, + .bic_number = BIC_RAM__, .compat_scale = 100.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1311,7 +1521,20 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_energy_units, .rci_index = RAPL_RCI_INDEX_CORE_ENERGY, - .bic = BIC_CorWatt | BIC_Cor_J, + .bic_number = BIC_CorWatt, + .compat_scale = 1.0, + .flags = 0, + }, + { + .feature_mask = RAPL_AMD_F17H, + .perf_subsys = NULL, + .perf_name = NULL, + .msr = MSR_CORE_ENERGY_STAT, + .msr_mask = 0xFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_CORE_ENERGY, + .bic_number = BIC_Cor_J, .compat_scale = 1.0, .flags = 0, }, @@ -1324,7 +1547,20 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_psys_energy_units, .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM, - .bic = BIC_SysWatt | BIC_Sys_J, + .bic_number = BIC_SysWatt, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_PLATFORM_COUNTER | RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_PSYS, + .perf_subsys = "power", + .perf_name = "energy-psys", + .msr = MSR_PLATFORM_ENERGY_STATUS, + .msr_mask = 0x00000000FFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_psys_energy_units, + .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM, + .bic_number = BIC_Sys_J, .compat_scale = 1.0, .flags = RAPL_COUNTER_FLAG_PLATFORM_COUNTER | RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1373,7 +1609,7 @@ struct cstate_counter_arch_info { const char *perf_name; unsigned long long msr; unsigned int rci_index; /* Maps data from perf counters to global variables */ - unsigned long long bic; + unsigned int bic_number; unsigned long long flags; int pkg_cstate_limit; }; @@ -1385,7 +1621,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c1-residency", .msr = MSR_CORE_C1_RES, .rci_index = CCSTATE_RCI_INDEX_C1_RESIDENCY, - .bic = BIC_CPU_c1, + .bic_number = BIC_CPU_c1, .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD, .pkg_cstate_limit = 0, }, @@ -1395,7 +1631,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c3-residency", .msr = MSR_CORE_C3_RESIDENCY, .rci_index = CCSTATE_RCI_INDEX_C3_RESIDENCY, - .bic = BIC_CPU_c3, + .bic_number = BIC_CPU_c3, .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, .pkg_cstate_limit = 0, }, @@ -1405,7 +1641,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c6-residency", .msr = MSR_CORE_C6_RESIDENCY, .rci_index = CCSTATE_RCI_INDEX_C6_RESIDENCY, - .bic = BIC_CPU_c6, + .bic_number = BIC_CPU_c6, .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, .pkg_cstate_limit = 0, }, @@ -1415,7 +1651,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c7-residency", .msr = MSR_CORE_C7_RESIDENCY, .rci_index = CCSTATE_RCI_INDEX_C7_RESIDENCY, - .bic = BIC_CPU_c7, + .bic_number = BIC_CPU_c7, .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, .pkg_cstate_limit = 0, }, @@ -1425,7 +1661,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c2-residency", .msr = MSR_PKG_C2_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C2_RESIDENCY, - .bic = BIC_Pkgpc2, + .bic_number = BIC_Pkgpc2, .flags = 0, .pkg_cstate_limit = PCL__2, }, @@ -1435,7 +1671,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c3-residency", .msr = MSR_PKG_C3_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C3_RESIDENCY, - .bic = BIC_Pkgpc3, + .bic_number = BIC_Pkgpc3, .flags = 0, .pkg_cstate_limit = PCL__3, }, @@ -1445,7 +1681,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c6-residency", .msr = MSR_PKG_C6_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C6_RESIDENCY, - .bic = BIC_Pkgpc6, + .bic_number = BIC_Pkgpc6, .flags = 0, .pkg_cstate_limit = PCL__6, }, @@ -1455,7 +1691,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c7-residency", .msr = MSR_PKG_C7_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C7_RESIDENCY, - .bic = BIC_Pkgpc7, + .bic_number = BIC_Pkgpc7, .flags = 0, .pkg_cstate_limit = PCL__7, }, @@ -1465,7 +1701,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c8-residency", .msr = MSR_PKG_C8_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C8_RESIDENCY, - .bic = BIC_Pkgpc8, + .bic_number = BIC_Pkgpc8, .flags = 0, .pkg_cstate_limit = PCL__8, }, @@ -1475,7 +1711,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c9-residency", .msr = MSR_PKG_C9_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C9_RESIDENCY, - .bic = BIC_Pkgpc9, + .bic_number = BIC_Pkgpc9, .flags = 0, .pkg_cstate_limit = PCL__9, }, @@ -1485,7 +1721,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c10-residency", .msr = MSR_PKG_C10_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C10_RESIDENCY, - .bic = BIC_Pkgpc10, + .bic_number = BIC_Pkgpc10, .flags = 0, .pkg_cstate_limit = PCL_10, }, @@ -2180,10 +2416,13 @@ int get_msr_fd(int cpu) static void bic_disable_msr_access(void) { - const unsigned long bic_msrs = BIC_Mod_c6 | BIC_CoreTmp | - BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_PkgTmp; - - bic_enabled &= ~bic_msrs; + CLR_BIC(BIC_Mod_c6, &bic_enabled); + CLR_BIC(BIC_CoreTmp, &bic_enabled); + CLR_BIC(BIC_Totl_c0, &bic_enabled); + CLR_BIC(BIC_Any_c0, &bic_enabled); + CLR_BIC(BIC_GFX_c0, &bic_enabled); + CLR_BIC(BIC_CPUGFX, &bic_enabled); + CLR_BIC(BIC_PkgTmp, &bic_enabled); free_sys_msr_counters(); } @@ -2383,10 +2622,9 @@ void help(void) * for all the strings in comma separate name_list, * set the approprate bit in return value. */ -unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) +void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode) { unsigned int i; - unsigned long long retval = 0; while (name_list) { char *comma; @@ -2398,38 +2636,37 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) for (i = 0; i < MAX_BIC; ++i) { if (!strcmp(name_list, bic[i].name)) { - retval |= (1ULL << i); + SET_BIC(i, ret_set); break; } if (!strcmp(name_list, "all")) { - retval |= ~0; + bic_set_all(ret_set); break; } else if (!strcmp(name_list, "topology")) { - retval |= BIC_GROUP_TOPOLOGY; + CPU_OR(ret_set, ret_set, &bic_group_topology); break; } else if (!strcmp(name_list, "power")) { - retval |= BIC_GROUP_THERMAL_PWR; + CPU_OR(ret_set, ret_set, &bic_group_thermal_pwr); break; } else if (!strcmp(name_list, "idle")) { - retval |= BIC_GROUP_IDLE; + CPU_OR(ret_set, ret_set, &bic_group_idle); break; } else if (!strcmp(name_list, "swidle")) { - retval |= BIC_GROUP_SW_IDLE; + CPU_OR(ret_set, ret_set, &bic_group_sw_idle); break; } else if (!strcmp(name_list, "sysfs")) { /* legacy compatibility */ - retval |= BIC_GROUP_SW_IDLE; + CPU_OR(ret_set, ret_set, &bic_group_sw_idle); break; } else if (!strcmp(name_list, "hwidle")) { - retval |= BIC_GROUP_HW_IDLE; + CPU_OR(ret_set, ret_set, &bic_group_hw_idle); break; } else if (!strcmp(name_list, "frequency")) { - retval |= BIC_GROUP_FREQUENCY; + CPU_OR(ret_set, ret_set, &bic_group_frequency); break; } else if (!strcmp(name_list, "other")) { - retval |= BIC_OTHER; + CPU_OR(ret_set, ret_set, &bic_group_other); break; } - } if (i == MAX_BIC) { if (mode == SHOW_LIST) { @@ -2458,7 +2695,6 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) name_list++; } - return retval; } void print_header(char *delim) @@ -7345,21 +7581,28 @@ void rapl_probe_intel(void) unsigned long long msr; unsigned int time_unit; double tdp; - const unsigned long long bic_watt_bits = BIC_SysWatt | BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt; - const unsigned long long bic_joules_bits = BIC_Sys_J | BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J; - if (rapl_joules) - bic_enabled &= ~bic_watt_bits; - else - bic_enabled &= ~bic_joules_bits; + if (rapl_joules) { + CLR_BIC(BIC_SysWatt, &bic_enabled); + CLR_BIC(BIC_PkgWatt, &bic_enabled); + CLR_BIC(BIC_CorWatt, &bic_enabled); + CLR_BIC(BIC_RAMWatt, &bic_enabled); + CLR_BIC(BIC_GFXWatt, &bic_enabled); + } else { + CLR_BIC(BIC_Sys_J, &bic_enabled); + CLR_BIC(BIC_Pkg_J, &bic_enabled); + CLR_BIC(BIC_Cor_J, &bic_enabled); + CLR_BIC(BIC_RAM_J, &bic_enabled); + CLR_BIC(BIC_GFX_J, &bic_enabled); + } if (!platform->rapl_msrs || no_msr) return; if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS)) - bic_enabled &= ~BIC_PKG__; + CLR_BIC(BIC_PKG__, &bic_enabled); if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)) - bic_enabled &= ~BIC_RAM__; + CLR_BIC(BIC_RAM__, &bic_enabled); /* units on package 0, verify later other packages match */ if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) @@ -7398,13 +7641,14 @@ void rapl_probe_amd(void) { unsigned long long msr; double tdp; - const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt; - const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J; - if (rapl_joules) - bic_enabled &= ~bic_watt_bits; - else - bic_enabled &= ~bic_joules_bits; + if (rapl_joules) { + CLR_BIC(BIC_SysWatt, &bic_enabled); + CLR_BIC(BIC_CorWatt, &bic_enabled); + } else { + CLR_BIC(BIC_Pkg_J, &bic_enabled); + CLR_BIC(BIC_Cor_J, &bic_enabled); + } if (!platform->rapl_msrs || no_msr) return; @@ -8151,7 +8395,7 @@ void rapl_perf_init(void) enum rapl_unit unit; unsigned int next_domain; - if (!BIC_IS_ENABLED(cai->bic)) + if (!BIC_IS_ENABLED(cai->bic_number)) continue; memset(domain_visited, 0, num_domains * sizeof(*domain_visited)); @@ -8215,7 +8459,7 @@ void rapl_perf_init(void) /* If any CPU has access to the counter, make it present */ if (has_counter) - BIC_PRESENT(cai->bic); + BIC_PRESENT(cai->bic_number); } free(domain_visited); @@ -8436,7 +8680,7 @@ void cstate_perf_init_(bool soft_c1) if (!per_core && pkg_visited[pkg_id]) continue; - const bool counter_needed = BIC_IS_ENABLED(cai->bic) || + const bool counter_needed = BIC_IS_ENABLED(cai->bic_number) || (soft_c1 && (cai->flags & CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY)); const bool counter_supported = (platform->supported_cstates & cai->feature_mask); @@ -8463,7 +8707,7 @@ void cstate_perf_init_(bool soft_c1) /* If any CPU has access to the counter, make it present */ if (has_counter) - BIC_PRESENT(cai->bic); + BIC_PRESENT(cai->bic_number); } free(cores_visited); @@ -9199,7 +9443,7 @@ void check_msr_access(void) void check_perf_access(void) { if (no_perf || !BIC_IS_ENABLED(BIC_IPC) || !has_instr_count_access()) - bic_enabled &= ~BIC_IPC; + CLR_BIC(BIC_IPC, &bic_enabled); } bool perf_has_hybrid_devices(void) @@ -9768,8 +10012,8 @@ void turbostat_init() * disable more BICs, since it can't be reported accurately. */ if (platform->enable_tsc_tweak && !has_base_hz) { - bic_enabled &= ~BIC_Busy; - bic_enabled &= ~BIC_Bzy_MHz; + CLR_BIC(BIC_Busy, &bic_enabled); + CLR_BIC(BIC_Bzy_MHz, &bic_enabled); } } @@ -10785,22 +11029,29 @@ void cmdline(int argc, char **argv) no_perf = 1; break; case 'e': - /* --enable specified counter */ - bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST); + /* --enable specified counter, without clearning existing list */ + bic_lookup(&bic_enabled, optarg, SHOW_LIST); break; case 'f': force_load++; break; case 'd': debug++; - ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); + bic_set_all(&bic_enabled); break; case 'H': /* * --hide: do not show those specified * multiple invocations simply clear more bits in enabled mask */ - bic_enabled &= ~bic_lookup(optarg, HIDE_LIST); + { + cpu_set_t bic_group_hide; + + BIC_INIT(&bic_group_hide); + + bic_lookup(&bic_group_hide, optarg, HIDE_LIST); + bic_clear_bits(&bic_enabled, &bic_group_hide); + } break; case 'h': default: @@ -10824,7 +11075,7 @@ void cmdline(int argc, char **argv) rapl_joules++; break; case 'l': - ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); + bic_set_all(&bic_enabled); list_header_only++; quiet++; break; @@ -10861,9 +11112,8 @@ void cmdline(int argc, char **argv) * subsequent invocations can add to it. */ if (shown == 0) - bic_enabled = bic_lookup(optarg, SHOW_LIST); - else - bic_enabled |= bic_lookup(optarg, SHOW_LIST); + BIC_INIT(&bic_enabled); + bic_lookup(&bic_enabled, optarg, SHOW_LIST); shown = 1; break; case 'S': @@ -10900,6 +11150,8 @@ int main(int argc, char **argv) { int fd, ret; + bic_groups_init(); + fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY); if (fd < 0) goto skip_cgroup_setting; From 5f961fb2a7d8f4d89d64a9e2cd584738de5f9c58 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Mon, 14 Jul 2025 23:33:55 -0400 Subject: [PATCH 1751/2411] tools/power turbostat: probe and display L3 cache topology Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 34 ++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ee948e671741..54f270226746 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -195,6 +195,7 @@ struct msr_counter bic[] = { { 0x0, "APIC", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "X2APIC", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "Die", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "L3", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "GFXAMHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "IPC", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CoreThr", NULL, 0, 0, 0, NULL, 0 }, @@ -263,6 +264,7 @@ enum bic_names { BIC_APIC, BIC_X2APIC, BIC_Die, + BIC_L3, BIC_GFXACTMHz, BIC_IPC, BIC_CORE_THROT_CNT, @@ -292,7 +294,7 @@ void print_bic_set(char *s, cpu_set_t *set) if (CPU_ISSET(i, set)) { assert(i < MAX_BIC); printf(" %s", bic[i].name); - } + } } putchar('\n'); } @@ -357,6 +359,7 @@ static void bic_groups_init(void) SET_BIC(BIC_Core, &bic_group_topology); SET_BIC(BIC_CPU, &bic_group_topology); SET_BIC(BIC_Die, &bic_group_topology); + SET_BIC(BIC_L3, &bic_group_topology); BIC_INIT(&bic_group_thermal_pwr); SET_BIC(BIC_CoreTmp, &bic_group_thermal_pwr); @@ -2273,6 +2276,7 @@ struct platform_counters { struct cpu_topology { int physical_package_id; int die_id; + int l3_id; int logical_cpu_id; int physical_node_id; int logical_node_id; /* 0-based count within the package */ @@ -2294,6 +2298,7 @@ struct topo_params { int max_core_id; int max_package_id; int max_die_id; + int max_l3_id; int max_node_num; int nodes_per_pkg; int cores_per_node; @@ -2712,6 +2717,8 @@ void print_header(char *delim) outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); if (DO_BIC(BIC_Die)) outp += sprintf(outp, "%sDie", (printed++ ? delim : "")); + if (DO_BIC(BIC_L3)) + outp += sprintf(outp, "%sL3", (printed++ ? delim : "")); if (DO_BIC(BIC_Node)) outp += sprintf(outp, "%sNode", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) @@ -3183,6 +3190,8 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_Die)) outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + if (DO_BIC(BIC_L3)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_Node)) outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) @@ -3206,6 +3215,12 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data else outp += sprintf(outp, "%s-", (printed++ ? delim : "")); } + if (DO_BIC(BIC_L3)) { + if (c) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].l3_id); + else + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + } if (DO_BIC(BIC_Node)) { if (t) outp += sprintf(outp, "%s%d", @@ -5911,6 +5926,11 @@ int get_die_id(int cpu) return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu); } +int get_l3_id(int cpu) +{ + return parse_int_file("/sys/devices/system/cpu/cpu%d/cache/index3/id", cpu); +} + int get_core_id(int cpu) { return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); @@ -9203,6 +9223,11 @@ void topology_probe(bool startup) if (cpus[i].die_id > topo.max_die_id) topo.max_die_id = cpus[i].die_id; + /* get l3 information */ + cpus[i].l3_id = get_l3_id(i); + if (cpus[i].l3_id > topo.max_l3_id) + topo.max_l3_id = cpus[i].l3_id; + /* get numa node information */ cpus[i].physical_node_id = get_physical_node_id(&cpus[i]); if (cpus[i].physical_node_id > topo.max_node_num) @@ -9235,6 +9260,9 @@ void topology_probe(bool startup) if (!summary_only && topo.num_die > 1) BIC_PRESENT(BIC_Die); + if (!summary_only && topo.max_l3_id > 0) + BIC_PRESENT(BIC_L3); + topo.num_packages = max_package_id + 1; if (debug > 1) fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); @@ -9258,8 +9286,8 @@ void topology_probe(bool startup) if (cpu_is_not_present(i)) continue; fprintf(outf, - "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n", - i, cpus[i].physical_package_id, cpus[i].die_id, + "cpu %d pkg %d die %d l3 %d node %d lnode %d core %d thread %d\n", + i, cpus[i].physical_package_id, cpus[i].die_id, cpus[i].l3_id, cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id); } From a5015d945de6003cf813af2bb11189982f5b3d54 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 22 Jul 2025 00:17:04 -0400 Subject: [PATCH 1752/2411] tools/power turbostat: delete GET_PKG() pkg_base[pkg_id] is a simple array of structure pointers, let the compiler treat it that way. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 54f270226746..dd1160144625 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2080,8 +2080,6 @@ struct pkg_data { ((node_no) * topo.cores_per_node) + \ (core_no)) -#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) - /* * The accumulated sum of MSR is defined as a monotonic * increasing MSR, it will be accumulated periodically, @@ -2345,16 +2343,15 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { struct thread_data *t; struct core_data *c; - struct pkg_data *p; + t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); if (cpu_is_not_allowed(t->cpu_id)) continue; c = GET_CORE(core_base, core_no, node_no, pkg_no); - p = GET_PKG(pkg_base, pkg_no); - retval |= func(t, c, p); + retval |= func(t, c, &pkg_base[pkg_no]); } } } @@ -6119,7 +6116,6 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { struct thread_data *t, *t2; struct core_data *c, *c2; - struct pkg_data *p, *p2; t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); @@ -6131,10 +6127,7 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, c = GET_CORE(core_base, core_no, node_no, pkg_no); c2 = GET_CORE(core_base2, core_no, node_no, pkg_no); - p = GET_PKG(pkg_base, pkg_no); - p2 = GET_PKG(pkg_base2, pkg_no); - - retval |= func(t, c, p, t2, c2, p2); + retval |= func(t, c, &pkg_base[pkg_no], t2, c2, &pkg_base2[pkg_no]); } } } @@ -9342,7 +9335,6 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base, int thread_id = cpus[cpu_id].thread_id; struct thread_data *t; struct core_data *c; - struct pkg_data *p; /* Workaround for systems where physical_node_id==-1 * and logical_node_id==(-1 - topo.num_cpus) @@ -9352,18 +9344,17 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base, t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id); c = GET_CORE(core_base, core_id, node_id, pkg_id); - p = GET_PKG(pkg_base, pkg_id); t->cpu_id = cpu_id; if (!cpu_is_not_allowed(cpu_id)) { if (c->base_cpu < 0) c->base_cpu = t->cpu_id; - if (p->base_cpu < 0) - p->base_cpu = t->cpu_id; + if (pkg_base[pkg_id].base_cpu < 0) + pkg_base[pkg_id].base_cpu = t->cpu_id; } c->core_id = core_id; - p->package_id = pkg_id; + pkg_base[pkg_id].package_id = pkg_id; } int initialize_counters(int cpu_id) From dcd1c379b0f179763956e8596ad99912165a95ec Mon Sep 17 00:00:00 2001 From: Michael Hebenstreit Date: Fri, 8 Aug 2025 15:57:53 -0400 Subject: [PATCH 1753/2411] tools/power turbostat: add format "average" for external attributes External atributes with format "raw" are not printed in summary lines for nodes/packages (or with option -S). The new format "average" behaves like "raw" but also adds the summary data Signed-off-by: Michael Hebenstreit Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 3 ++- tools/power/x86/turbostat/turbostat.c | 30 ++++++++++++++++++--------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index db3888b8af12..3340def58d01 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -47,10 +47,11 @@ name as necessary to disambiguate it from others is necessary. Note that option MSRs are read as 64-bits, u32 truncates the displayed value to 32-bits. default: u64 - format: {\fBraw\fP | \fBdelta\fP | \fBpercent\fP} + format: {\fBraw\fP | \fBdelta\fP | \fBpercent\fP | \fBaverage\fP} 'raw' shows the MSR contents in hex. 'delta' shows the difference in values during the measurement interval. 'percent' shows the delta as a percentage of the cycles elapsed. + 'average' similar to raw, but also averaged for node/package summaries (or when using -S). default: delta name: "name_string" diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index dd1160144625..9ad3b1aa79ef 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2756,7 +2756,7 @@ void print_header(char *delim) for (mp = sys.tp; mp; mp = mp->next) { - if (mp->format == FORMAT_RAW) { + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { if (mp->width == 64) outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name); else @@ -2831,7 +2831,7 @@ void print_header(char *delim) } for (mp = sys.cp; mp; mp = mp->next) { - if (mp->format == FORMAT_RAW) { + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { if (mp->width == 64) outp += sprintf(outp, "%s%18.18s", delim, mp->name); else @@ -2961,7 +2961,7 @@ void print_header(char *delim) outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : "")); for (mp = sys.pp; mp; mp = mp->next) { - if (mp->format == FORMAT_RAW) { + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { if (mp->width == 64) outp += sprintf(outp, "%s%18.18s", delim, mp->name); else if (mp->width == 32) @@ -3282,7 +3282,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data /* Added counters */ for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) { + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { if (mp->width == 32) outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]); @@ -3379,7 +3379,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt); for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) { + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { if (mp->width == 32) outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]); @@ -3578,7 +3578,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz); for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) { + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { if (mp->width == 32) outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]); @@ -3755,7 +3755,7 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value; for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) old->counter[i] = new->counter[i]; else if (mp->format == FORMAT_AVERAGE) old->counter[i] = new->counter[i]; @@ -3799,7 +3799,7 @@ void delta_core(struct core_data *new, struct core_data *old) DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value); for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) old->counter[i] = new->counter[i]; else old->counter[i] = new->counter[i] - old->counter[i]; @@ -3913,7 +3913,7 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d old->smi_count = new->smi_count - old->smi_count; for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) old->counter[i] = new->counter[i]; else old->counter[i] = new->counter[i] - old->counter[i]; @@ -10419,6 +10419,10 @@ void parse_add_command_msr(char *add_command) format = FORMAT_RAW; goto next; } + if (!strncmp(add_command, "average", strlen("average"))) { + format = FORMAT_AVERAGE; + goto next; + } if (!strncmp(add_command, "delta", strlen("delta"))) { format = FORMAT_DELTA; goto next; @@ -10691,13 +10695,19 @@ void parse_add_command_pmt(char *add_command) has_format = true; } + if (strcmp("average", format_name) == 0) { + format = FORMAT_AVERAGE; + has_format = true; + } + if (strcmp("delta", format_name) == 0) { format = FORMAT_DELTA; has_format = true; } if (!has_format) { - fprintf(stderr, "%s: Invalid format %s. Expected raw or delta\n", __func__, format_name); + fprintf(stderr, "%s: Invalid format %s. Expected raw, average or delta\n", + __func__, format_name); exit(1); } } From 3a088b07c4f10bf577f4a2392111704195a794ba Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 11 Jun 2025 14:50:26 +0800 Subject: [PATCH 1754/2411] tools/power turbostat: Fix DMR support Together with the RAPL MSRs, there are more MSRs gone on DMR, including PLR (Perf Limit Reasons), and IRTL (Package cstate Interrupt Response Time Limit) MSRs. The configurable TDP info should also be retrieved from TPMI based Intel Speed Select Technology feature. Remove the access of these MSRs for DMR. Improve the DMR platform feature table to make it more readable at the same time. Fixes: 83075bd59de2 ("tools/power turbostat: Add initial support for DMR") Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 29 ++++++++++++++------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9ad3b1aa79ef..e540bb0bb093 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -989,20 +989,21 @@ static const struct platform_features spr_features = { }; static const struct platform_features dmr_features = { - .has_msr_misc_feature_control = spr_features.has_msr_misc_feature_control, - .has_msr_misc_pwr_mgmt = spr_features.has_msr_misc_pwr_mgmt, - .has_nhm_msrs = spr_features.has_nhm_msrs, - .has_config_tdp = spr_features.has_config_tdp, - .bclk_freq = spr_features.bclk_freq, - .supported_cstates = spr_features.supported_cstates, - .cst_limit = spr_features.cst_limit, - .has_msr_core_c1_res = spr_features.has_msr_core_c1_res, - .has_msr_module_c6_res_ms = 1, /* DMR has Dual Core Module and MC6 MSR */ - .has_irtl_msrs = spr_features.has_irtl_msrs, - .has_cst_prewake_bit = spr_features.has_cst_prewake_bit, - .has_fixed_rapl_psys_unit = spr_features.has_fixed_rapl_psys_unit, - .trl_msrs = spr_features.trl_msrs, - .rapl_msrs = 0, /* DMR does not have RAPL MSRs */ + .has_msr_misc_feature_control = spr_features.has_msr_misc_feature_control, + .has_msr_misc_pwr_mgmt = spr_features.has_msr_misc_pwr_mgmt, + .has_nhm_msrs = spr_features.has_nhm_msrs, + .bclk_freq = spr_features.bclk_freq, + .supported_cstates = spr_features.supported_cstates, + .cst_limit = spr_features.cst_limit, + .has_msr_core_c1_res = spr_features.has_msr_core_c1_res, + .has_cst_prewake_bit = spr_features.has_cst_prewake_bit, + .has_fixed_rapl_psys_unit = spr_features.has_fixed_rapl_psys_unit, + .trl_msrs = spr_features.trl_msrs, + .has_msr_module_c6_res_ms = 1, /* DMR has Dual-Core-Module and MC6 MSR */ + .rapl_msrs = 0, /* DMR does not have RAPL MSRs */ + .plr_msrs = 0, /* DMR does not have PLR MSRs */ + .has_irtl_msrs = 0, /* DMR does not have IRTL MSRs */ + .has_config_tdp = 0, /* DMR does not have CTDP MSRs */ }; static const struct platform_features srf_features = { From 378e901160256d2ab66e45ffb97afaca51e65706 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 8 Aug 2025 19:30:07 -0400 Subject: [PATCH 1755/2411] tools/power turbostat: standardize PER_THREAD_PARAMS use a macro for PER_THREAD_PARAMS to make adding one later more clear. no functional change Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 42 ++++++++++++++------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index e540bb0bb093..d65a504a6c5f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2331,6 +2331,8 @@ int cpu_is_not_allowed(int cpu) * skip non-present cpus */ +#define PER_THREAD_PARAMS struct thread_data *t, struct core_data *c, struct pkg_data *p + int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *), struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) { @@ -2360,21 +2362,21 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk return retval; } -int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int is_cpu_first_thread_in_core(PER_THREAD_PARAMS) { UNUSED(p); return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0); } -int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int is_cpu_first_core_in_package(PER_THREAD_PARAMS) { UNUSED(c); return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0); } -int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int is_cpu_first_thread_in_package(PER_THREAD_PARAMS) { return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p); } @@ -3020,7 +3022,7 @@ void print_header(char *delim) outp += sprintf(outp, "\n"); } -int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int dump_counters(PER_THREAD_PARAMS) { int i; struct msr_counter *mp; @@ -3135,7 +3137,7 @@ double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desir /* * column formatting convention & formats */ -int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int format_counters(PER_THREAD_PARAMS) { static int count; @@ -3677,7 +3679,7 @@ void flush_output_stderr(void) outp = output_buffer; } -void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +void format_all_counters(PER_THREAD_PARAMS) { static int count; @@ -3968,7 +3970,7 @@ void rapl_counter_clear(struct rapl_counter *c) c->unit = RAPL_UNIT_INVALID; } -void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +void clear_counters(PER_THREAD_PARAMS) { int i; struct msr_counter *mp; @@ -4065,7 +4067,7 @@ void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter dst->raw_value += src->raw_value; } -int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int sum_counters(PER_THREAD_PARAMS) { int i; struct msr_counter *mp; @@ -4213,7 +4215,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) * sum the counters for all cpus in the system * compute the weighted average */ -void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p) +void compute_average(PER_THREAD_PARAMS) { int i; struct msr_counter *mp; @@ -4796,7 +4798,7 @@ char *find_sysfs_path_by_id(struct sysfs_path *sp, int id) return NULL; } -int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_data *c, struct pkg_data *p) +int get_cstate_counters(unsigned int cpu, PER_THREAD_PARAMS) { /* * Overcommit memory a little bit here, @@ -5096,7 +5098,7 @@ static inline int get_rapl_domain_id(int cpu) * migrate to cpu * acquire and record local counters for that cpu */ -int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int get_counters(PER_THREAD_PARAMS) { int cpu = t->cpu_id; unsigned long long msr; @@ -6586,7 +6588,7 @@ int get_msr_sum(int cpu, off_t offset, unsigned long long *msr) timer_t timerid; /* Timer callback, update the sum of MSRs periodically. */ -static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p) +static int update_msr_sum(PER_THREAD_PARAMS) { int i, ret; int cpu = t->cpu_id; @@ -7332,7 +7334,7 @@ static void dump_sysfs_pstate_config(void) * print_epb() * Decode the ENERGY_PERF_BIAS MSR */ -int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int print_epb(PER_THREAD_PARAMS) { char *epb_string; int cpu, epb; @@ -7381,7 +7383,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) * print_hwp() * Decode the MSR_HWP_CAPABILITIES */ -int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int print_hwp(PER_THREAD_PARAMS) { unsigned long long msr; int cpu; @@ -7470,7 +7472,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) /* * print_perf_limit() */ -int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int print_perf_limit(PER_THREAD_PARAMS) { unsigned long long msr; int cpu; @@ -7845,7 +7847,7 @@ static int print_rapl_sysfs(void) return 0; } -int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int print_rapl(PER_THREAD_PARAMS) { unsigned long long msr; const char *msr_name; @@ -7999,7 +8001,7 @@ void probe_rapl(void) * below this value, including the Digital Thermal Sensor (DTS), * Package Thermal Management Sensor (PTM), and thermal event thresholds. */ -int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int set_temperature_target(PER_THREAD_PARAMS) { unsigned long long msr; unsigned int tcc_default, tcc_offset; @@ -8067,7 +8069,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk return 0; } -int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int print_thermal(PER_THREAD_PARAMS) { unsigned long long msr; unsigned int dts, dts2; @@ -8147,7 +8149,7 @@ void probe_thermal(void) for_all_cpus(print_thermal, ODD_COUNTERS); } -int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int get_cpu_type(PER_THREAD_PARAMS) { unsigned int eax, ebx, ecx, edx; @@ -9395,7 +9397,7 @@ void allocate_irq_buffers(void) err(-1, "calloc %d NMI", topo.max_cpu_num + 1); } -int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int update_topo(PER_THREAD_PARAMS) { topo.allowed_cpus++; if ((int)t->cpu_id == c->base_cpu) From e60a13bcef206795d3ddf82f130fe8f570176d06 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 9 Aug 2025 16:31:31 -0400 Subject: [PATCH 1756/2411] tools/power turbostat: Handle non-root legacy-uncore sysfs permissions /sys/devices/system/cpu/intel_uncore_frequency/package_X_die_Y/ may be readable by all, but /sys/devices/system/cpu/intel_uncore_frequency/package_X_die_Y/current_freq_khz may be readable only by root. Non-root turbostat users see complaints in this scenario. Fail probe of the interface if we can't read current_freq_khz. Reported-by: Artem Bityutskiy Original-patch-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d65a504a6c5f..76f4093959d5 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -7002,7 +7002,8 @@ static void probe_intel_uncore_frequency_legacy(void) sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, j); - if (access(path_base, R_OK)) + sprintf(path, "%s/current_freq_khz", path_base); + if (access(path, R_OK)) continue; BIC_PRESENT(BIC_UNCORE_MHZ); From 5e98a5e73edcc4114c5ad10596db87e24f50ee4d Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 9 Aug 2025 21:08:26 -0400 Subject: [PATCH 1757/2411] tools/power turbostat: version 2025.09.09 Probe and display L3 Cache topology Add ability to average an added counter (useful for pre-integrated "counters", such as Watts) Break the limit of 64 built-in counters. Assorted bug fixes and minor feature tweaks Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 76f4093959d5..72a280e7a9d5 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -10126,7 +10126,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2025.06.08 - Len Brown \n"); + fprintf(outf, "turbostat version 2025.09.09 - Len Brown \n"); } #define COMMAND_LINE_SIZE 2048 From 8f5ae30d69d7543eee0d70083daf4de8fe15d585 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 10 Aug 2025 19:41:16 +0300 Subject: [PATCH 1758/2411] Linux 6.17-rc1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 37e37565515e..6bfe776bf3c5 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 -PATCHLEVEL = 16 +PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Baby Opossum Posse # *DOCUMENTATION* From eb5ca9094a18fb98777bf4814ea84c93bf7c271d Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Sun, 27 Jul 2025 12:59:06 +0200 Subject: [PATCH 1759/2411] mm/vmscan: fix inverted polarity in lru_gen_seq_show() Commit a7694ff11aa9 ("vmscan: don't bother with debugfs_real_fops()") started using debugfs_get_aux_num() to distinguish between the RW "lru_gen" and the RO "lru_gen_full" file [1]. Willy reported the inverted polarity [2] and Al fixed it up in [3]. However, the patch in [1] was applied. Hence, fix this up accordingly. Cc: Alexander Viro Cc: Matthew Wilcox Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/all/20250704040720.GP1880847@ZenIV/ [1] Link: https://lore.kernel.org/all/aGZu3Z730FQtqxsE@casper.infradead.org/ [2] Link: https://lore.kernel.org/all/20250704040720.GP1880847@ZenIV/ [3] Fixes: a7694ff11aa9 ("vmscan: don't bother with debugfs_real_fops()") Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250727105937.7480-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- mm/vmscan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 7de11524a936..a48aec8bfd92 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -5772,9 +5772,9 @@ static int __init init_lru_gen(void) if (sysfs_create_group(mm_kobj, &lru_gen_attr_group)) pr_err("lru_gen: failed to create sysfs group\n"); - debugfs_create_file_aux_num("lru_gen", 0644, NULL, NULL, 1, + debugfs_create_file_aux_num("lru_gen", 0644, NULL, NULL, false, &lru_gen_rw_fops); - debugfs_create_file_aux_num("lru_gen_full", 0444, NULL, NULL, 0, + debugfs_create_file_aux_num("lru_gen_full", 0444, NULL, NULL, true, &lru_gen_ro_fops); return 0; From 0af1561b2d60bab2a2b00720a5c7b292ecc549ec Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 8 Aug 2025 12:20:17 -0300 Subject: [PATCH 1760/2411] smb: client: fix race with concurrent opens in unlink(2) According to some logs reported by customers, CIFS client might end up reporting unlinked files as existing in stat(2) due to concurrent opens racing with unlink(2). Besides sending the removal request to the server, the unlink process could involve closing any deferred close as well as marking all existing open handles as deleted to prevent them from deferring closes, which increases the race window for potential concurrent opens. Fix this by unhashing the dentry in cifs_unlink() to prevent any subsequent opens. Any open attempts, while we're still unlinking, will block on parent's i_rwsem. Reported-by: Jay Shin Signed-off-by: Paulo Alcantara (Red Hat) Reviewed-by: David Howells Cc: Al Viro Cc: linux-cifs@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/inode.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 75be4b46bc6f..cf9060f0fc08 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1943,15 +1943,24 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct tcon_link *tlink; struct cifs_tcon *tcon; + __u32 dosattr = 0, origattr = 0; struct TCP_Server_Info *server; struct iattr *attrs = NULL; - __u32 dosattr = 0, origattr = 0; + bool rehash = false; cifs_dbg(FYI, "cifs_unlink, dir=0x%p, dentry=0x%p\n", dir, dentry); if (unlikely(cifs_forced_shutdown(cifs_sb))) return -EIO; + /* Unhash dentry in advance to prevent any concurrent opens */ + spin_lock(&dentry->d_lock); + if (!d_unhashed(dentry)) { + __d_drop(dentry); + rehash = true; + } + spin_unlock(&dentry->d_lock); + tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return PTR_ERR(tlink); @@ -2003,7 +2012,8 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) cifs_drop_nlink(inode); } } else if (rc == -ENOENT) { - d_drop(dentry); + if (simple_positive(dentry)) + d_delete(dentry); } else if (rc == -EBUSY) { if (server->ops->rename_pending_delete) { rc = server->ops->rename_pending_delete(full_path, @@ -2056,6 +2066,8 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) kfree(attrs); free_xid(xid); cifs_put_tlink(tlink); + if (rehash) + d_rehash(dentry); return rc; } From d84291fc7453df7881a970716f8256273aca5747 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 8 Aug 2025 11:43:29 -0300 Subject: [PATCH 1761/2411] smb: client: fix race with concurrent opens in rename(2) Besides sending the rename request to the server, the rename process also involves closing any deferred close, waiting for outstanding I/O to complete as well as marking all existing open handles as deleted to prevent them from deferring closes, which increases the race window for potential concurrent opens on the target file. Fix this by unhashing the dentry in advance to prevent any concurrent opens on the target. Signed-off-by: Paulo Alcantara (Red Hat) Reviewed-by: David Howells Cc: Al Viro Cc: linux-cifs@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/inode.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index cf9060f0fc08..fe453a4b3dc8 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -2474,6 +2474,7 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir, struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; struct cifs_tcon *tcon; + bool rehash = false; unsigned int xid; int rc, tmprc; int retry_count = 0; @@ -2489,6 +2490,17 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir, if (unlikely(cifs_forced_shutdown(cifs_sb))) return -EIO; + /* + * Prevent any concurrent opens on the target by unhashing the dentry. + * VFS already unhashes the target when renaming directories. + */ + if (d_is_positive(target_dentry) && !d_is_dir(target_dentry)) { + if (!d_unhashed(target_dentry)) { + d_drop(target_dentry); + rehash = true; + } + } + tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return PTR_ERR(tlink); @@ -2530,6 +2542,8 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir, } } + if (!rc) + rehash = false; /* * No-replace is the natural behavior for CIFS, so skip unlink hacks. */ @@ -2588,12 +2602,16 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir, goto cifs_rename_exit; rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, to_name); + if (!rc) + rehash = false; } /* force revalidate to go get info when needed */ CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0; cifs_rename_exit: + if (rehash) + d_rehash(target_dentry); kfree(info_buf_source); free_dentry_path(page2); free_dentry_path(page1); From 0e270f32975fd21874185ba53653630dd40bf560 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 7 Aug 2025 10:03:18 +0800 Subject: [PATCH 1762/2411] ASoC: fsl_sai: replace regmap_write with regmap_update_bits Use the regmap_write() for software reset in fsl_sai_config_disable would cause the FSL_SAI_CSR_BCE bit to be cleared. Refer to commit 197c53c8ecb34 ("ASoC: fsl_sai: Don't disable bitclock for i.MX8MP") FSL_SAI_CSR_BCE should not be cleared. So need to use regmap_update_bits() instead of regmap_write() for these bit operations. Fixes: dc78f7e59169d ("ASoC: fsl_sai: Force a software reset when starting in consumer mode") Signed-off-by: Shengjiu Wang Link: https://patch.msgid.link/20250807020318.2143219-1-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_sai.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index c313b654236c..d0367b21f775 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -809,9 +809,9 @@ static void fsl_sai_config_disable(struct fsl_sai *sai, int dir) * are running concurrently. */ /* Software Reset */ - regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR); + regmap_update_bits(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR, FSL_SAI_CSR_SR); /* Clear SR bit to finish the reset */ - regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), 0); + regmap_update_bits(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR, 0); } static int fsl_sai_trigger(struct snd_pcm_substream *substream, int cmd, @@ -930,11 +930,11 @@ static int fsl_sai_dai_probe(struct snd_soc_dai *cpu_dai) unsigned int ofs = sai->soc_data->reg_offset; /* Software Reset for both Tx and Rx */ - regmap_write(sai->regmap, FSL_SAI_TCSR(ofs), FSL_SAI_CSR_SR); - regmap_write(sai->regmap, FSL_SAI_RCSR(ofs), FSL_SAI_CSR_SR); + regmap_update_bits(sai->regmap, FSL_SAI_TCSR(ofs), FSL_SAI_CSR_SR, FSL_SAI_CSR_SR); + regmap_update_bits(sai->regmap, FSL_SAI_RCSR(ofs), FSL_SAI_CSR_SR, FSL_SAI_CSR_SR); /* Clear SR bit to finish the reset */ - regmap_write(sai->regmap, FSL_SAI_TCSR(ofs), 0); - regmap_write(sai->regmap, FSL_SAI_RCSR(ofs), 0); + regmap_update_bits(sai->regmap, FSL_SAI_TCSR(ofs), FSL_SAI_CSR_SR, 0); + regmap_update_bits(sai->regmap, FSL_SAI_RCSR(ofs), FSL_SAI_CSR_SR, 0); regmap_update_bits(sai->regmap, FSL_SAI_TCR1(ofs), FSL_SAI_CR1_RFW_MASK(sai->soc_data->fifo_depth), @@ -1824,11 +1824,11 @@ static int fsl_sai_runtime_resume(struct device *dev) regcache_cache_only(sai->regmap, false); regcache_mark_dirty(sai->regmap); - regmap_write(sai->regmap, FSL_SAI_TCSR(ofs), FSL_SAI_CSR_SR); - regmap_write(sai->regmap, FSL_SAI_RCSR(ofs), FSL_SAI_CSR_SR); + regmap_update_bits(sai->regmap, FSL_SAI_TCSR(ofs), FSL_SAI_CSR_SR, FSL_SAI_CSR_SR); + regmap_update_bits(sai->regmap, FSL_SAI_RCSR(ofs), FSL_SAI_CSR_SR, FSL_SAI_CSR_SR); usleep_range(1000, 2000); - regmap_write(sai->regmap, FSL_SAI_TCSR(ofs), 0); - regmap_write(sai->regmap, FSL_SAI_RCSR(ofs), 0); + regmap_update_bits(sai->regmap, FSL_SAI_TCSR(ofs), FSL_SAI_CSR_SR, 0); + regmap_update_bits(sai->regmap, FSL_SAI_RCSR(ofs), FSL_SAI_CSR_SR, 0); ret = regcache_sync(sai->regmap); if (ret) From 43e0da37d5cfb23eec6aeee9422f84d86621ce2b Mon Sep 17 00:00:00 2001 From: Alexey Klimov Date: Wed, 6 Aug 2025 15:00:30 +0100 Subject: [PATCH 1763/2411] ASoC: codecs: tx-macro: correct tx_macro_component_drv name We already have a component driver named "RX-MACRO", which is lpass-rx-macro.c. The tx macro component driver's name should be "TX-MACRO" accordingly. Fix it. Cc: Srinivas Kandagatla Signed-off-by: Alexey Klimov Reviewed-by: Neil Armstrong Link: https://patch.msgid.link/20250806140030.691477-1-alexey.klimov@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-tx-macro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c index 40d79bee4584..1da34cb3505f 100644 --- a/sound/soc/codecs/lpass-tx-macro.c +++ b/sound/soc/codecs/lpass-tx-macro.c @@ -2229,7 +2229,7 @@ static int tx_macro_register_mclk_output(struct tx_macro *tx) } static const struct snd_soc_component_driver tx_macro_component_drv = { - .name = "RX-MACRO", + .name = "TX-MACRO", .probe = tx_macro_component_probe, .controls = tx_macro_snd_controls, .num_controls = ARRAY_SIZE(tx_macro_snd_controls), From 7cdadac0d2b3614d04651be7104a89a1998efec0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 8 Aug 2025 11:53:24 +0100 Subject: [PATCH 1764/2411] ASoC: codec: sma1307: replace spelling mistake with new error message There is a spelling mistake in a failure message, replace the message with something a little more meaningful. Signed-off-by: Colin Ian King Link: https://patch.msgid.link/20250808105324.829883-1-colin.i.king@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/sma1307.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/sma1307.c b/sound/soc/codecs/sma1307.c index b3d401ada176..6a601e7134ea 100644 --- a/sound/soc/codecs/sma1307.c +++ b/sound/soc/codecs/sma1307.c @@ -1749,7 +1749,7 @@ static void sma1307_setting_loaded(struct sma1307_priv *sma1307, const char *fil sma1307->set.header_size * sizeof(int)); if ((sma1307->set.checksum >> 8) != SMA1307_SETTING_CHECKSUM) { - dev_err(sma1307->dev, "%s: failed by dismatch \"%s\"\n", + dev_err(sma1307->dev, "%s: checksum failed \"%s\"\n", __func__, setting_file); sma1307->set.status = false; return; From f13ab498726bb6c636d6c5cd8c7df911444316dc Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 6 Aug 2025 23:34:52 +0000 Subject: [PATCH 1765/2411] ASoC: generic: tidyup standardized ASoC menu for generic commit acc84d15e45393fb ("ASoC: generic: Standardize ASoC menu") standardized ASoC generic menu. Then, it moved generic menu position under SoC group. It should be kept generic position. Tidyup it. Suggested-by: Geert Uytterhoeven Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87v7n0c9d0.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/Kconfig b/sound/soc/Kconfig index bf362bfca456..ce74818bd715 100644 --- a/sound/soc/Kconfig +++ b/sound/soc/Kconfig @@ -111,7 +111,6 @@ source "sound/soc/bcm/Kconfig" source "sound/soc/cirrus/Kconfig" source "sound/soc/dwc/Kconfig" source "sound/soc/fsl/Kconfig" -source "sound/soc/generic/Kconfig" source "sound/soc/google/Kconfig" source "sound/soc/hisilicon/Kconfig" source "sound/soc/jz4740/Kconfig" @@ -149,5 +148,8 @@ source "sound/soc/codecs/Kconfig" source "sound/soc/sdw_utils/Kconfig" +# generic frame-work +source "sound/soc/generic/Kconfig" + endif # SND_SOC From 633e391d45bda3fc848d26bee6bbe57ef2935713 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Fri, 8 Aug 2025 13:57:06 +0800 Subject: [PATCH 1766/2411] ASoC: rt721: fix FU33 Boost Volume control not working This patch fixed FU33 Boost Volume control not working. Signed-off-by: Shuming Fan Link: https://patch.msgid.link/20250808055706.1110766-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt721-sdca.c | 2 ++ sound/soc/codecs/rt721-sdca.h | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/sound/soc/codecs/rt721-sdca.c b/sound/soc/codecs/rt721-sdca.c index f6f7c2ffde1c..a4bd29d7220b 100644 --- a/sound/soc/codecs/rt721-sdca.c +++ b/sound/soc/codecs/rt721-sdca.c @@ -278,6 +278,8 @@ static void rt721_sdca_jack_preset(struct rt721_sdca_priv *rt721) RT721_ENT_FLOAT_CTL1, 0x4040); rt_sdca_index_write(rt721->mbq_regmap, RT721_HDA_SDCA_FLOAT, RT721_ENT_FLOAT_CTL4, 0x1201); + rt_sdca_index_write(rt721->mbq_regmap, RT721_BOOST_CTRL, + RT721_BST_4CH_TOP_GATING_CTRL1, 0x002a); regmap_write(rt721->regmap, 0x2f58, 0x07); } diff --git a/sound/soc/codecs/rt721-sdca.h b/sound/soc/codecs/rt721-sdca.h index 0a82c107b19a..71fac9cd8739 100644 --- a/sound/soc/codecs/rt721-sdca.h +++ b/sound/soc/codecs/rt721-sdca.h @@ -56,6 +56,7 @@ struct rt721_sdca_dmic_kctrl_priv { #define RT721_CBJ_CTRL 0x0a #define RT721_CAP_PORT_CTRL 0x0c #define RT721_CLASD_AMP_CTRL 0x0d +#define RT721_BOOST_CTRL 0x0f #define RT721_VENDOR_REG 0x20 #define RT721_RC_CALIB_CTRL 0x40 #define RT721_VENDOR_EQ_L 0x53 @@ -93,6 +94,9 @@ struct rt721_sdca_dmic_kctrl_priv { /* Index (NID:0dh) */ #define RT721_CLASD_AMP_2CH_CAL 0x14 +/* Index (NID:0fh) */ +#define RT721_BST_4CH_TOP_GATING_CTRL1 0x05 + /* Index (NID:20h) */ #define RT721_JD_PRODUCT_NUM 0x00 #define RT721_ANALOG_BIAS_CTL3 0x04 From f48d7a1b0bf11d16d8c9f77a5b9c80a82272f625 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Thu, 7 Aug 2025 17:24:32 +0800 Subject: [PATCH 1767/2411] ASoC: rt1320: fix random cycle mute issue This patch fixed the random cycle mute issue that occurs during long-time playback. Signed-off-by: Shuming Fan Link: https://patch.msgid.link/20250807092432.997989-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt1320-sdw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt1320-sdw.c b/sound/soc/codecs/rt1320-sdw.c index b13d7a99bf63..dcddc28e8856 100644 --- a/sound/soc/codecs/rt1320-sdw.c +++ b/sound/soc/codecs/rt1320-sdw.c @@ -109,6 +109,7 @@ static const struct reg_sequence rt1320_blind_write[] = { { 0x0000d540, 0x01 }, { 0xd172, 0x2a }, { 0xc5d6, 0x01 }, + { 0xd478, 0xff }, }; static const struct reg_sequence rt1320_vc_blind_write[] = { @@ -159,7 +160,7 @@ static const struct reg_sequence rt1320_vc_blind_write[] = { { 0xd471, 0x3a }, { 0xd474, 0x11 }, { 0xd475, 0x32 }, - { 0xd478, 0x64 }, + { 0xd478, 0xff }, { 0xd479, 0x20 }, { 0xd47a, 0x10 }, { 0xd47c, 0xff }, From b11f2a9745401d9ccc51c91b5482044d2ea936e8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 8 Aug 2025 11:49:43 +0100 Subject: [PATCH 1768/2411] ASoC: tas2781: Fix spelling mistake "dismatch" -> "mismatch" There is a spelling mistake (or neologism of dis and match) in a dev_err message. Fix it. Signed-off-by: Colin Ian King Link: https://patch.msgid.link/20250808104943.829668-1-colin.i.king@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2781-i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index 9f4d965a1335..8e7e45c046b8 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -1480,7 +1480,7 @@ static ssize_t acoustic_ctl_write(struct file *file, return PTR_ERR(src); if (src[0] > max_pkg_len && src[0] != count) { - dev_err(priv->dev, "pkg(%u), max(%u), count(%u) dismatch.\n", + dev_err(priv->dev, "pkg(%u), max(%u), count(%u) mismatch.\n", src[0], max_pkg_len, (unsigned int)count); ret = 0; goto exit; From c6993c4cb91803fceb82d6b5e0ec5e0aec2d0ad6 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Mon, 4 Aug 2025 16:20:31 +0800 Subject: [PATCH 1769/2411] erofs: Fallback to normal access if DAX is not supported on extra device If using multiple devices, we should check if the extra device support DAX instead of checking the primary device when deciding if to use DAX to access a file. If an extra device does not support DAX we should fallback to normal access otherwise the data on that device will be inaccessible. Signed-off-by: Yuezhang Mo Reviewed-by: Friendy Su Reviewed-by: Jacky Cao Reviewed-by: Daniel Palmer Reviewed-by: Gao Xiang Reviewed-by: Hongbo Li Link: https://lore.kernel.org/r/20250804082030.3667257-2-Yuezhang.Mo@sony.com Signed-off-by: Gao Xiang --- fs/erofs/super.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index e1020aa60771..8c7a5985b4ee 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -174,6 +174,11 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, if (!erofs_is_fileio_mode(sbi)) { dif->dax_dev = fs_dax_get_by_bdev(file_bdev(file), &dif->dax_part_off, NULL, NULL); + if (!dif->dax_dev && test_opt(&sbi->opt, DAX_ALWAYS)) { + erofs_info(sb, "DAX unsupported by %s. Turning off DAX.", + dif->path); + clear_opt(&sbi->opt, DAX_ALWAYS); + } } else if (!S_ISREG(file_inode(file)->i_mode)) { fput(file); return -EINVAL; @@ -210,8 +215,13 @@ static int erofs_scan_devices(struct super_block *sb, ondisk_extradevs, sbi->devs->extra_devices); return -EINVAL; } - if (!ondisk_extradevs) + if (!ondisk_extradevs) { + if (test_opt(&sbi->opt, DAX_ALWAYS) && !sbi->dif0.dax_dev) { + erofs_info(sb, "DAX unsupported by block device. Turning off DAX."); + clear_opt(&sbi->opt, DAX_ALWAYS); + } return 0; + } if (!sbi->devs->extra_devices && !erofs_is_fscache_mode(sb)) sbi->devs->flatdev = true; @@ -338,7 +348,6 @@ static int erofs_read_superblock(struct super_block *sb) if (ret < 0) goto out; - /* handle multiple devices */ ret = erofs_scan_devices(sb, dsb); if (erofs_sb_has_48bit(sbi)) @@ -671,14 +680,9 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) return invalfc(fc, "cannot use fsoffset in fscache mode"); } - if (test_opt(&sbi->opt, DAX_ALWAYS)) { - if (!sbi->dif0.dax_dev) { - errorfc(fc, "DAX unsupported by block device. Turning off DAX."); - clear_opt(&sbi->opt, DAX_ALWAYS); - } else if (sbi->blkszbits != PAGE_SHIFT) { - errorfc(fc, "unsupported blocksize for DAX"); - clear_opt(&sbi->opt, DAX_ALWAYS); - } + if (test_opt(&sbi->opt, DAX_ALWAYS) && sbi->blkszbits != PAGE_SHIFT) { + erofs_info(sb, "unsupported blocksize for DAX"); + clear_opt(&sbi->opt, DAX_ALWAYS); } sb->s_time_gran = 1; From 74da24f0ac9b8aabfb8d7feeba6c32ddff3065e0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 30 Jul 2025 14:44:49 +0200 Subject: [PATCH 1770/2411] erofs: Do not select tristate symbols from bool symbols The EROFS filesystem has many configurable options, controlled through boolean Kconfig symbols. When enabled, these options may need to enable additional library functionality elsewhere. Currently this is done by selecting the symbol for the additional functionality. However, if EROFS_FS itself is modular, and the target symbol is a tristate symbol, the additional functionality is always forced built-in. Selecting tristate symbols from a tristate symbol does keep modular transitivity. Hence fix this by moving selects of tristate symbols to the main EROFS_FS symbol. Signed-off-by: Geert Uytterhoeven Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/da1b899e511145dd43fd2d398f64b2e03c6a39e7.1753879351.git.geert+renesas@glider.be Signed-off-by: Gao Xiang --- fs/erofs/Kconfig | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index 7b26efc271ee..d81f3318417d 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -3,8 +3,18 @@ config EROFS_FS tristate "EROFS filesystem support" depends on BLOCK + select CACHEFILES if EROFS_FS_ONDEMAND select CRC32 + select CRYPTO if EROFS_FS_ZIP_ACCEL + select CRYPTO_DEFLATE if EROFS_FS_ZIP_ACCEL select FS_IOMAP + select LZ4_DECOMPRESS if EROFS_FS_ZIP + select NETFS_SUPPORT if EROFS_FS_ONDEMAND + select XXHASH if EROFS_FS_XATTR + select XZ_DEC if EROFS_FS_ZIP_LZMA + select XZ_DEC_MICROLZMA if EROFS_FS_ZIP_LZMA + select ZLIB_INFLATE if EROFS_FS_ZIP_DEFLATE + select ZSTD_DECOMPRESS if EROFS_FS_ZIP_ZSTD help EROFS (Enhanced Read-Only File System) is a lightweight read-only file system with modern designs (e.g. no buffer heads, inline @@ -38,7 +48,6 @@ config EROFS_FS_DEBUG config EROFS_FS_XATTR bool "EROFS extended attributes" depends on EROFS_FS - select XXHASH default y help Extended attributes are name:value pairs associated with inodes by @@ -94,7 +103,6 @@ config EROFS_FS_BACKED_BY_FILE config EROFS_FS_ZIP bool "EROFS Data Compression Support" depends on EROFS_FS - select LZ4_DECOMPRESS default y help Enable transparent compression support for EROFS file systems. @@ -104,8 +112,6 @@ config EROFS_FS_ZIP config EROFS_FS_ZIP_LZMA bool "EROFS LZMA compressed data support" depends on EROFS_FS_ZIP - select XZ_DEC - select XZ_DEC_MICROLZMA help Saying Y here includes support for reading EROFS file systems containing LZMA compressed data, specifically called microLZMA. It @@ -117,7 +123,6 @@ config EROFS_FS_ZIP_LZMA config EROFS_FS_ZIP_DEFLATE bool "EROFS DEFLATE compressed data support" depends on EROFS_FS_ZIP - select ZLIB_INFLATE help Saying Y here includes support for reading EROFS file systems containing DEFLATE compressed data. It gives better compression @@ -132,7 +137,6 @@ config EROFS_FS_ZIP_DEFLATE config EROFS_FS_ZIP_ZSTD bool "EROFS Zstandard compressed data support" depends on EROFS_FS_ZIP - select ZSTD_DECOMPRESS help Saying Y here includes support for reading EROFS file systems containing Zstandard compressed data. It gives better compression @@ -147,8 +151,6 @@ config EROFS_FS_ZIP_ZSTD config EROFS_FS_ZIP_ACCEL bool "EROFS hardware decompression support" depends on EROFS_FS_ZIP - select CRYPTO - select CRYPTO_DEFLATE help Saying Y here includes hardware accelerator support for reading EROFS file systems containing compressed data. It gives better @@ -163,9 +165,7 @@ config EROFS_FS_ZIP_ACCEL config EROFS_FS_ONDEMAND bool "EROFS fscache-based on-demand read support (deprecated)" depends on EROFS_FS - select NETFS_SUPPORT select FSCACHE - select CACHEFILES select CACHEFILES_ONDEMAND help This permits EROFS to use fscache-backed data blobs with on-demand From c99fab6e80b76422741d34aafc2f930a482afbdd Mon Sep 17 00:00:00 2001 From: Junli Liu Date: Tue, 5 Aug 2025 09:19:58 +0800 Subject: [PATCH 1771/2411] erofs: fix atomic context detection when !CONFIG_DEBUG_LOCK_ALLOC Since EROFS handles decompression in non-atomic contexts due to uncontrollable decompression latencies and vmap() usage, it tries to detect atomic contexts and only kicks off a kworker on demand in order to reduce unnecessary scheduling overhead. However, the current approach is insufficient and can lead to sleeping function calls in invalid contexts, causing kernel warnings and potential system instability. See the stacktrace [1] and previous discussion [2]. The current implementation only checks rcu_read_lock_any_held(), which behaves inconsistently across different kernel configurations: - When CONFIG_DEBUG_LOCK_ALLOC is enabled: correctly detects RCU critical sections by checking rcu_lock_map - When CONFIG_DEBUG_LOCK_ALLOC is disabled: compiles to "!preemptible()", which only checks preempt_count and misses RCU critical sections This patch introduces z_erofs_in_atomic() to provide comprehensive atomic context detection: 1. Check RCU preemption depth when CONFIG_PREEMPTION is enabled, as RCU critical sections may not affect preempt_count but still require atomic handling 2. Always use async processing when CONFIG_PREEMPT_COUNT is disabled, as preemption state cannot be reliably determined 3. Fall back to standard preemptible() check for remaining cases The function replaces the previous complex condition check and ensures that z_erofs always uses (kthread_)work in atomic contexts to minimize scheduling overhead and prevent sleeping in invalid contexts. [1] Problem stacktrace [ 61.266692] BUG: sleeping function called from invalid context at kernel/locking/rtmutex_api.c:510 [ 61.266702] in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 107, name: irq/54-ufshcd [ 61.266704] preempt_count: 0, expected: 0 [ 61.266705] RCU nest depth: 2, expected: 0 [ 61.266710] CPU: 0 UID: 0 PID: 107 Comm: irq/54-ufshcd Tainted: G W O 6.12.17 #1 [ 61.266714] Tainted: [W]=WARN, [O]=OOT_MODULE [ 61.266715] Hardware name: schumacher (DT) [ 61.266717] Call trace: [ 61.266718] dump_backtrace+0x9c/0x100 [ 61.266727] show_stack+0x20/0x38 [ 61.266728] dump_stack_lvl+0x78/0x90 [ 61.266734] dump_stack+0x18/0x28 [ 61.266736] __might_resched+0x11c/0x180 [ 61.266743] __might_sleep+0x64/0xc8 [ 61.266745] mutex_lock+0x2c/0xc0 [ 61.266748] z_erofs_decompress_queue+0xe8/0x978 [ 61.266753] z_erofs_decompress_kickoff+0xa8/0x190 [ 61.266756] z_erofs_endio+0x168/0x288 [ 61.266758] bio_endio+0x160/0x218 [ 61.266762] blk_update_request+0x244/0x458 [ 61.266766] scsi_end_request+0x38/0x278 [ 61.266770] scsi_io_completion+0x4c/0x600 [ 61.266772] scsi_finish_command+0xc8/0xe8 [ 61.266775] scsi_complete+0x88/0x148 [ 61.266777] blk_mq_complete_request+0x3c/0x58 [ 61.266780] scsi_done_internal+0xcc/0x158 [ 61.266782] scsi_done+0x1c/0x30 [ 61.266783] ufshcd_compl_one_cqe+0x12c/0x438 [ 61.266786] __ufshcd_transfer_req_compl+0x2c/0x78 [ 61.266788] ufshcd_poll+0xf4/0x210 [ 61.266789] ufshcd_transfer_req_compl+0x50/0x88 [ 61.266791] ufshcd_intr+0x21c/0x7c8 [ 61.266792] irq_forced_thread_fn+0x44/0xd8 [ 61.266796] irq_thread+0x1a4/0x358 [ 61.266799] kthread+0x12c/0x138 [ 61.266802] ret_from_fork+0x10/0x20 [2] https://lore.kernel.org/r/58b661d0-0ebb-4b45-a10d-c5927fb791cd@paulmck-laptop Signed-off-by: Junli Liu Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20250805011957.911186-1-liujunli@lixiang.com [ Gao Xiang: Use the original trace in v1. ] Signed-off-by: Gao Xiang --- fs/erofs/zdata.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 792f20888a8f..2d73297003d2 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1432,6 +1432,16 @@ static void z_erofs_decompressqueue_kthread_work(struct kthread_work *work) } #endif +/* Use (kthread_)work in atomic contexts to minimize scheduling overhead */ +static inline bool z_erofs_in_atomic(void) +{ + if (IS_ENABLED(CONFIG_PREEMPTION) && rcu_preempt_depth()) + return true; + if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) + return true; + return !preemptible(); +} + static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, int bios) { @@ -1446,8 +1456,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, if (atomic_add_return(bios, &io->pending_bios)) return; - /* Use (kthread_)work and sync decompression for atomic contexts only */ - if (!in_task() || irqs_disabled() || rcu_read_lock_any_held()) { + if (z_erofs_in_atomic()) { #ifdef CONFIG_EROFS_FS_PCPU_KTHREAD struct kthread_worker *worker; From 0b96d9bed324a1c1b7d02bfb9596351ef178428d Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Thu, 7 Aug 2025 16:20:19 +0800 Subject: [PATCH 1772/2411] erofs: fix block count report when 48-bit layout is on Fix incorrect shift order when combining the 48-bit block count. Fixes: 2e1473d5195f ("erofs: implement 48-bit block addressing for unencoded inodes") Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20250807082019.3093539-1-hsiangkao@linux.alibaba.com --- fs/erofs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 8c7a5985b4ee..1b529ace4db0 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -323,8 +323,8 @@ static int erofs_read_superblock(struct super_block *sb) sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); if (erofs_sb_has_48bit(sbi) && dsb->rootnid_8b) { sbi->root_nid = le64_to_cpu(dsb->rootnid_8b); - sbi->dif0.blocks = (sbi->dif0.blocks << 32) | - le16_to_cpu(dsb->rb.blocks_hi); + sbi->dif0.blocks = sbi->dif0.blocks | + ((u64)le16_to_cpu(dsb->rb.blocks_hi) << 32); } else { sbi->root_nid = le16_to_cpu(dsb->rb.rootnid_2b); } From 61399e0c5410567ef60cb1cda34cca42903842e3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 8 Aug 2025 19:03:22 +0200 Subject: [PATCH 1773/2411] rcu: Fix racy re-initialization of irq_work causing hangs RCU re-initializes the deferred QS irq work everytime before attempting to queue it. However there are situations where the irq work is attempted to be queued even though it is already queued. In that case re-initializing messes-up with the irq work queue that is about to be handled. The chances for that to happen are higher when the architecture doesn't support self-IPIs and irq work are then all lazy, such as with the following sequence: 1) rcu_read_unlock() is called when IRQs are disabled and there is a grace period involving blocked tasks on the node. The irq work is then initialized and queued. 2) The related tasks are unblocked and the CPU quiescent state is reported. rdp->defer_qs_iw_pending is reset to DEFER_QS_IDLE, allowing the irq work to be requeued in the future (note the previous one hasn't fired yet). 3) A new grace period starts and the node has blocked tasks. 4) rcu_read_unlock() is called when IRQs are disabled again. The irq work is re-initialized (but it's queued! and its node is cleared) and requeued. Which means it's requeued to itself. 5) The irq work finally fires with the tick. But since it was requeued to itself, it loops and hangs. Fix this with initializing the irq work only once before the CPU boots. Fixes: b41642c87716 ("rcu: Fix rcu_read_unlock() deadloop due to IRQ work") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202508071303.c1134cce-lkp@intel.com Signed-off-by: Frederic Weisbecker Reviewed-by: Joel Fernandes Signed-off-by: Neeraj Upadhyay (AMD) --- kernel/rcu/tree.c | 2 ++ kernel/rcu/tree.h | 1 + kernel/rcu/tree_plugin.h | 8 ++++++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 174ee243b349..8eff357b0436 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4262,6 +4262,8 @@ int rcutree_prepare_cpu(unsigned int cpu) rdp->rcu_iw_gp_seq = rdp->gp_seq - 1; trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl")); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + + rcu_preempt_deferred_qs_init(rdp); rcu_spawn_rnp_kthreads(rnp); rcu_spawn_cpu_nocb_kthread(cpu); ASSERT_EXCLUSIVE_WRITER(rcu_state.n_online_cpus); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index de6ca13a7b5f..b8bbe7960cda 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -488,6 +488,7 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp); static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); static void rcu_flavor_sched_clock_irq(int user); static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck); +static void rcu_preempt_deferred_qs_init(struct rcu_data *rdp); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); static bool rcu_is_callbacks_kthread(struct rcu_data *rdp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index fc14adf15cbb..4cd170b2d655 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -763,8 +763,6 @@ static void rcu_read_unlock_special(struct task_struct *t) cpu_online(rdp->cpu)) { // Get scheduler to re-evaluate and call hooks. // If !IRQ_WORK, FQS scan will eventually IPI. - rdp->defer_qs_iw = - IRQ_WORK_INIT_HARD(rcu_preempt_deferred_qs_handler); rdp->defer_qs_iw_pending = DEFER_QS_PENDING; irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu); } @@ -904,6 +902,10 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) } } +static void rcu_preempt_deferred_qs_init(struct rcu_data *rdp) +{ + rdp->defer_qs_iw = IRQ_WORK_INIT_HARD(rcu_preempt_deferred_qs_handler); +} #else /* #ifdef CONFIG_PREEMPT_RCU */ /* @@ -1103,6 +1105,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) WARN_ON_ONCE(!list_empty(&rnp->blkd_tasks)); } +static void rcu_preempt_deferred_qs_init(struct rcu_data *rdp) { } + #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ /* From 0db77eccd964b11ab2b757031d1354fcc5a025ea Mon Sep 17 00:00:00 2001 From: Christopher Eby Date: Sat, 9 Aug 2025 20:00:06 -0700 Subject: [PATCH 1774/2411] ALSA: hda/realtek: Add Framework Laptop 13 (AMD Ryzen AI 300) to quirks Framework Laptop 13 (AMD Ryzen AI 300) requires the same quirk for headset detection as other Framework 13 models. Signed-off-by: Christopher Eby Cc: Link: https://patch.msgid.link/20250810030006.9060-1-kreed@kreed.org Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index e27a36e4e92a..337e33a59de8 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7158,6 +7158,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x0006, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x0009, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0xf111, 0x000b, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x000c, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 From b6bcbce3359619d05bf387d4f5cc3af63668dbaa Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Thu, 31 Jul 2025 13:18:32 +0100 Subject: [PATCH 1775/2411] soc/tegra: pmc: Ensure power-domains are in a known state After commit 13a4b7fb6260 ("pmdomain: core: Leave powered-on genpds on until late_initcall_sync") was applied, the Tegra210 Jetson TX1 board failed to boot. Looking into this issue, before this commit was applied, if any of the Tegra power-domains were in 'on' state when the kernel booted, they were being turned off by the genpd core before any driver had chance to request them. This was purely by luck and a consequence of the power-domains being turned off earlier during boot. After this commit was applied, any power-domains in the 'on' state are kept on for longer during boot and therefore, may never transitioned to the off state before they are requested/used. The hang on the Tegra210 Jetson TX1 is caused because devices in some power-domains are accessed without the power-domain being turned off and on, indicating that the power-domain is not in a completely on state. >From reviewing the Tegra PMC driver code, if a power-domain is in the 'on' state there is no guarantee that all the necessary clocks associated with the power-domain are on and even if they are they would not have been requested via the clock framework and so could be turned off later. Some power-domains also have a 'clamping' register that needs to be configured as well. In short, if a power-domain is already 'on' it is difficult to know if it has been configured correctly. Given that the power-domains happened to be switched off during boot previously, to ensure that they are in a good known state on boot, fix this by switching off any power-domains that are on initially when registering the power-domains with the genpd framework. Note that commit 05cfb988a4d0 ("soc/tegra: pmc: Initialise resets associated with a power partition") updated the tegra_powergate_of_get_resets() function to pass the 'off' to ensure that the resets for the power-domain are in the correct state on boot. However, now that we may power off a domain on boot, if it is on, it is better to move this logic into the tegra_powergate_add() function so that there is a single place where we are handling the initial state of the power-domain. Fixes: a38045121bf4 ("soc/tegra: pmc: Add generic PM domain support") Signed-off-by: Jon Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250731121832.213671-1-jonathanh@nvidia.com Signed-off-by: Ulf Hansson --- drivers/soc/tegra/pmc.c | 51 +++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index 2a5f24ee858c..034a2a535a1e 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -1232,7 +1232,7 @@ static int tegra_powergate_of_get_clks(struct tegra_powergate *pg, } static int tegra_powergate_of_get_resets(struct tegra_powergate *pg, - struct device_node *np, bool off) + struct device_node *np) { struct device *dev = pg->pmc->dev; int err; @@ -1247,22 +1247,6 @@ static int tegra_powergate_of_get_resets(struct tegra_powergate *pg, err = reset_control_acquire(pg->reset); if (err < 0) { pr_err("failed to acquire resets: %d\n", err); - goto out; - } - - if (off) { - err = reset_control_assert(pg->reset); - } else { - err = reset_control_deassert(pg->reset); - if (err < 0) - goto out; - - reset_control_release(pg->reset); - } - -out: - if (err) { - reset_control_release(pg->reset); reset_control_put(pg->reset); } @@ -1308,20 +1292,43 @@ static int tegra_powergate_add(struct tegra_pmc *pmc, struct device_node *np) goto set_available; } - err = tegra_powergate_of_get_resets(pg, np, off); + err = tegra_powergate_of_get_resets(pg, np); if (err < 0) { dev_err(dev, "failed to get resets for %pOFn: %d\n", np, err); goto remove_clks; } - if (!IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) { - if (off) - WARN_ON(tegra_powergate_power_up(pg, true)); + /* + * If the power-domain is off, then ensure the resets are asserted. + * If the power-domain is on, then power down to ensure that when is + * it turned on the power-domain, clocks and resets are all in the + * expected state. + */ + if (off) { + err = reset_control_assert(pg->reset); + if (err) { + pr_err("failed to assert resets: %d\n", err); + goto remove_resets; + } + } else { + err = tegra_powergate_power_down(pg); + if (err) { + dev_err(dev, "failed to turn off PM domain %s: %d\n", + pg->genpd.name, err); + goto remove_resets; + } + } + /* + * If PM_GENERIC_DOMAINS is not enabled, power-on + * the domain and skip the genpd registration. + */ + if (!IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) { + WARN_ON(tegra_powergate_power_up(pg, true)); goto remove_resets; } - err = pm_genpd_init(&pg->genpd, NULL, off); + err = pm_genpd_init(&pg->genpd, NULL, true); if (err < 0) { dev_err(dev, "failed to initialise PM domain %pOFn: %d\n", np, err); From 2c223f7239f376a90d71903ec474ba887cf21d94 Mon Sep 17 00:00:00 2001 From: Oreoluwa Babatunde Date: Wed, 6 Aug 2025 10:24:21 -0700 Subject: [PATCH 1776/2411] of: reserved_mem: Restructure call site for dma_contiguous_early_fixup() Restructure the call site for dma_contiguous_early_fixup() to where the reserved_mem nodes are being parsed from the DT so that dma_mmu_remap[] is populated before dma_contiguous_remap() is called. Fixes: 8a6e02d0c00e ("of: reserved_mem: Restructure how the reserved memory regions are processed") Signed-off-by: Oreoluwa Babatunde Tested-by: William Zhang Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20250806172421.2748302-1-oreoluwa.babatunde@oss.qualcomm.com --- drivers/of/of_reserved_mem.c | 16 ++++++++++++---- include/linux/dma-map-ops.h | 3 +++ kernel/dma/contiguous.c | 2 -- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 77016c0cc296..7350b23cb734 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "of_private.h" @@ -175,13 +176,17 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, base = dt_mem_next_cell(dt_root_addr_cells, &prop); size = dt_mem_next_cell(dt_root_size_cells, &prop); - if (size && - early_init_dt_reserve_memory(base, size, nomap) == 0) + if (size && early_init_dt_reserve_memory(base, size, nomap) == 0) { + /* Architecture specific contiguous memory fixup. */ + if (of_flat_dt_is_compatible(node, "shared-dma-pool") && + of_get_flat_dt_prop(node, "reusable", NULL)) + dma_contiguous_early_fixup(base, size); pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n", uname, &base, (unsigned long)(size / SZ_1M)); - else + } else { pr_err("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n", uname, &base, (unsigned long)(size / SZ_1M)); + } len -= t_len; } @@ -472,7 +477,10 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam uname, (unsigned long)(size / SZ_1M)); return -ENOMEM; } - + /* Architecture specific contiguous memory fixup. */ + if (of_flat_dt_is_compatible(node, "shared-dma-pool") && + of_get_flat_dt_prop(node, "reusable", NULL)) + dma_contiguous_early_fixup(base, size); /* Save region in the reserved_mem array */ fdt_reserved_mem_save_node(node, uname, base, size); return 0; diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index f48e5fb88bd5..332b80c42b6f 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -153,6 +153,9 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page, { __free_pages(page, get_order(size)); } +static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) +{ +} #endif /* CONFIG_DMA_CMA*/ #ifdef CONFIG_DMA_DECLARE_COHERENT diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 67af8a55185d..d9b9dcba6ff7 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -483,8 +483,6 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem) pr_err("Reserved memory: unable to setup CMA region\n"); return err; } - /* Architecture specific contiguous memory fixup. */ - dma_contiguous_early_fixup(rmem->base, rmem->size); if (default_cma) dma_contiguous_default_area = cma; From 647b3d59c768d7638dd17c78c8044178364383ca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 31 Jul 2025 07:19:41 -0700 Subject: [PATCH 1777/2411] xfs: fix frozen file system assert in xfs_trans_alloc Commit 83a80e95e797 ("xfs: decouple xfs_trans_alloc_empty from xfs_trans_alloc") move the place of the assert for a frozen file system after the sb_start_intwrite call that ensures it doesn't run on frozen file systems, and thus allows to incorrect trigger it. Fix that by moving it back to where it belongs. Fixes: 83a80e95e797 ("xfs: decouple xfs_trans_alloc_empty from xfs_trans_alloc") Reported-by: Dave Chinner Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_trans.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index ece374d622b3..575e7028f423 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -253,8 +253,8 @@ xfs_trans_alloc( * by doing GFP_KERNEL allocations inside sb_start_intwrite(). */ retry: - WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); tp = __xfs_trans_alloc(mp, flags); + WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); error = xfs_trans_reserve(tp, resp, blocks, rtextents); if (error == -ENOSPC && want_retry) { xfs_trans_cancel(tp); From d2845519b0723c5d5a0266cbf410495f9b8fd65c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jul 2025 14:19:44 +0200 Subject: [PATCH 1778/2411] xfs: fully decouple XFS_IBULK* flags from XFS_IWALK* flags Fix up xfs_inumbers to now pass in the XFS_IBULK* flags into the flags argument to xfs_inobt_walk, which expects the XFS_IWALK* flags. Currently passing the wrong flags works for non-debug builds because the only XFS_IWALK* flag has the same encoding as the corresponding XFS_IBULK* flag, but in debug builds it can trigger an assert that no incorrect flag is passed. Instead just extra the relevant flag. Fixes: 5b35d922c52798 ("xfs: Decouple XFS_IBULK flags from XFS_IWALK flags") Cc: # v5.19 Reported-by: cen zhang Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_itable.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index c8c9b8d8309f..5116842420b2 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -447,17 +447,21 @@ xfs_inumbers( .breq = breq, }; struct xfs_trans *tp; + unsigned int iwalk_flags = 0; int error = 0; if (xfs_bulkstat_already_done(breq->mp, breq->startino)) return 0; + if (breq->flags & XFS_IBULK_SAME_AG) + iwalk_flags |= XFS_IWALK_SAME_AG; + /* * Grab an empty transaction so that we can use its recursive buffer * locking abilities to detect cycles in the inobt without deadlocking. */ tp = xfs_trans_alloc_empty(breq->mp); - error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags, + error = xfs_inobt_walk(breq->mp, tp, breq->startino, iwalk_flags, xfs_inumbers_walk, breq->icount, &ic); xfs_trans_cancel(tp); From 82efde9cf2e4ce25eac96a20e36eae7c338df1e0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jul 2025 14:19:45 +0200 Subject: [PATCH 1779/2411] xfs: remove XFS_IBULK_SAME_AG Add a new field to struct xfs_ibulk to directly pass XFS_IWALK* flags, and thus remove the need to indirect the SAME_AG flag through XFS_IBULK*. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_ioctl.c | 2 +- fs/xfs/xfs_itable.c | 12 ++---------- fs/xfs/xfs_itable.h | 10 ++++------ 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index fe1f74a3b6a3..e1051a530a50 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -219,7 +219,7 @@ xfs_bulk_ireq_setup( else if (XFS_INO_TO_AGNO(mp, breq->startino) < hdr->agno) return -EINVAL; - breq->flags |= XFS_IBULK_SAME_AG; + breq->iwalk_flags |= XFS_IWALK_SAME_AG; /* Asking for an inode past the end of the AG? We're done! */ if (XFS_INO_TO_AGNO(mp, breq->startino) > hdr->agno) diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 5116842420b2..2aa37a4d2706 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -307,7 +307,6 @@ xfs_bulkstat( .breq = breq, }; struct xfs_trans *tp; - unsigned int iwalk_flags = 0; int error; if (breq->idmap != &nop_mnt_idmap) { @@ -328,10 +327,7 @@ xfs_bulkstat( * locking abilities to detect cycles in the inobt without deadlocking. */ tp = xfs_trans_alloc_empty(breq->mp); - if (breq->flags & XFS_IBULK_SAME_AG) - iwalk_flags |= XFS_IWALK_SAME_AG; - - error = xfs_iwalk(breq->mp, tp, breq->startino, iwalk_flags, + error = xfs_iwalk(breq->mp, tp, breq->startino, breq->iwalk_flags, xfs_bulkstat_iwalk, breq->icount, &bc); xfs_trans_cancel(tp); kfree(bc.buf); @@ -447,21 +443,17 @@ xfs_inumbers( .breq = breq, }; struct xfs_trans *tp; - unsigned int iwalk_flags = 0; int error = 0; if (xfs_bulkstat_already_done(breq->mp, breq->startino)) return 0; - if (breq->flags & XFS_IBULK_SAME_AG) - iwalk_flags |= XFS_IWALK_SAME_AG; - /* * Grab an empty transaction so that we can use its recursive buffer * locking abilities to detect cycles in the inobt without deadlocking. */ tp = xfs_trans_alloc_empty(breq->mp); - error = xfs_inobt_walk(breq->mp, tp, breq->startino, iwalk_flags, + error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->iwalk_flags, xfs_inumbers_walk, breq->icount, &ic); xfs_trans_cancel(tp); diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index f10e8f8f2335..2d0612f14d6e 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -13,17 +13,15 @@ struct xfs_ibulk { xfs_ino_t startino; /* start with this inode */ unsigned int icount; /* number of elements in ubuffer */ unsigned int ocount; /* number of records returned */ - unsigned int flags; /* see XFS_IBULK_FLAG_* */ + unsigned int flags; /* XFS_IBULK_FLAG_* */ + unsigned int iwalk_flags; /* XFS_IWALK_FLAG_* */ }; -/* Only iterate within the same AG as startino */ -#define XFS_IBULK_SAME_AG (1U << 0) - /* Fill out the bs_extents64 field if set. */ -#define XFS_IBULK_NREXT64 (1U << 1) +#define XFS_IBULK_NREXT64 (1U << 0) /* Signal that we can return metadata directories. */ -#define XFS_IBULK_METADIR (1U << 2) +#define XFS_IBULK_METADIR (1U << 1) /* * Advance the user buffer pointer by one record of the given size. If the From e7fb9b71326f43bab25fb8f18c6bfebd7a628696 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 24 Jul 2025 08:12:13 +0000 Subject: [PATCH 1780/2411] fs/dax: Reject IOCB_ATOMIC in dax_iomap_rw() The DAX write path does not support IOCB_ATOMIC, so reject it when set. Suggested-by: Darrick J. Wong Signed-off-by: John Garry Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/dax.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index 4229513806be..20ecf652c129 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1743,6 +1743,9 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, loff_t done = 0; int ret; + if (WARN_ON_ONCE(iocb->ki_flags & IOCB_ATOMIC)) + return -EIO; + if (!iomi.len) return 0; From 68456d05eb57a5d16b4be2d3caf421bdcf2de72e Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 24 Jul 2025 08:12:14 +0000 Subject: [PATCH 1781/2411] xfs: disallow atomic writes on DAX Atomic writes are not currently supported for DAX, but two problems exist: - we may go down DAX write path for IOCB_ATOMIC, which does not handle IOCB_ATOMIC properly - we report non-zero atomic write limits in statx (for DAX inodes) We may want atomic writes support on DAX in future, but just disallow for now. For this, ensure when IOCB_ATOMIC is set that we check the write size versus the atomic write min and max before branching off to the DAX write path. This is not strictly required for DAX, as we should not get this far in the write path as FMODE_CAN_ATOMIC_WRITE should not be set. In addition, due to reflink being supported for DAX, we automatically get CoW-based atomic writes support being advertised. Remedy this by disallowing atomic writes for a DAX inode for both sw and hw modes. Reported-by: Darrick J. Wong Fixes: 9dffc58f2384 ("xfs: update atomic write limits") Reviewed-by: Darrick J. Wong Signed-off-by: John Garry Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_file.c | 6 +++--- fs/xfs/xfs_inode.h | 11 +++++++++++ fs/xfs/xfs_iops.c | 5 +++-- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 55a304cb3aef..f96fbf5c54c9 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1101,9 +1101,6 @@ xfs_file_write_iter( if (xfs_is_shutdown(ip->i_mount)) return -EIO; - if (IS_DAX(inode)) - return xfs_file_dax_write(iocb, from); - if (iocb->ki_flags & IOCB_ATOMIC) { if (ocount < xfs_get_atomic_write_min(ip)) return -EINVAL; @@ -1116,6 +1113,9 @@ xfs_file_write_iter( return ret; } + if (IS_DAX(inode)) + return xfs_file_dax_write(iocb, from); + if (iocb->ki_flags & IOCB_DIRECT) { /* * Allow a directio write to fall back to a buffered diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 07fbdcc4cbf5..bd6d33557194 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -358,9 +358,20 @@ static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip) static inline bool xfs_inode_can_hw_atomic_write(const struct xfs_inode *ip) { + if (IS_DAX(VFS_IC(ip))) + return false; + return xfs_inode_buftarg(ip)->bt_awu_max > 0; } +static inline bool xfs_inode_can_sw_atomic_write(const struct xfs_inode *ip) +{ + if (IS_DAX(VFS_IC(ip))) + return false; + + return xfs_can_sw_atomic_write(ip->i_mount); +} + /* * In-core inode flags. */ diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 149b5460fbfd..603effabe1ee 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -616,7 +616,8 @@ xfs_get_atomic_write_min( * write of exactly one single fsblock if the bdev will make that * guarantee for us. */ - if (xfs_inode_can_hw_atomic_write(ip) || xfs_can_sw_atomic_write(mp)) + if (xfs_inode_can_hw_atomic_write(ip) || + xfs_inode_can_sw_atomic_write(ip)) return mp->m_sb.sb_blocksize; return 0; @@ -633,7 +634,7 @@ xfs_get_atomic_write_max( * write of exactly one single fsblock if the bdev will make that * guarantee for us. */ - if (!xfs_can_sw_atomic_write(mp)) { + if (!xfs_inode_can_sw_atomic_write(ip)) { if (xfs_inode_can_hw_atomic_write(ip)) return mp->m_sb.sb_blocksize; return 0; From 8dc5e9b037138317c1d3151a7dabe41fa171cee1 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 24 Jul 2025 08:12:15 +0000 Subject: [PATCH 1782/2411] xfs: reject max_atomic_write mount option for no reflink If the FS has no reflink, then atomic writes greater than 1x block are not supported. As such, for no reflink it is pointless to accept setting max_atomic_write when it cannot be supported, so reject max_atomic_write mount option in this case. It could be still possible to accept max_atomic_write option of size 1x block if HW atomics are supported, so check for this specifically. Fixes: 4528b9052731 ("xfs: allow sysadmins to specify a maximum atomic write limit at mount time") Signed-off-by: John Garry Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_mount.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 2133fbaf1766..dc32c5e34d81 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -779,6 +779,25 @@ xfs_set_max_atomic_write_opt( return -EINVAL; } + if (xfs_has_reflink(mp)) + goto set_limit; + + if (new_max_fsbs == 1) { + if (mp->m_ddev_targp->bt_awu_max || + (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_awu_max)) { + } else { + xfs_warn(mp, + "cannot support atomic writes of size %lluk with no reflink or HW support", + new_max_bytes >> 10); + return -EINVAL; + } + } else { + xfs_warn(mp, + "cannot support atomic writes of size %lluk with no reflink support", + new_max_bytes >> 10); + return -EINVAL; + } + set_limit: error = xfs_calc_atomic_write_reservation(mp, new_max_fsbs); if (error) { From 5d94b19f066480addfcdcb5efde66152ad5a7c0e Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Thu, 31 Jul 2025 19:07:22 +0200 Subject: [PATCH 1783/2411] xfs: fix scrub trace with null pointer in quotacheck The quotacheck doesn't initialize sc->ip. Cc: stable@vger.kernel.org # v6.8 Fixes: 21d7500929c8a0 ("xfs: improve dquot iteration for scrub") Reviewed-by: Darrick J. Wong Signed-off-by: Andrey Albershteyn Signed-off-by: Carlos Maiolino --- fs/xfs/scrub/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 1e6e9c10cea2..a8187281eb96 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -479,7 +479,7 @@ DECLARE_EVENT_CLASS(xchk_dqiter_class, __field(xfs_exntst_t, state) ), TP_fast_assign( - __entry->dev = cursor->sc->ip->i_mount->m_super->s_dev; + __entry->dev = cursor->sc->mp->m_super->s_dev; __entry->dqtype = cursor->dqtype; __entry->ino = cursor->quota_ip->i_ino; __entry->cur_id = cursor->id; From f76823e3b284aae30797fded988a807eab2da246 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jul 2025 07:35:44 +0200 Subject: [PATCH 1784/2411] xfs: split xfs_zone_record_blocks xfs_zone_record_blocks not only records successfully written blocks that now back file data, but is also used for blocks speculatively written by garbage collection that were never linked to an inode and instantly become invalid. Split the latter functionality out to be easier to understand. This also make it clear that we don't need to attach the rmap inode to a transaction for the skipped blocks case as we never dirty any peristent data structure. Also make the argument order to xfs_zone_record_blocks a bit more natural. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_trace.h | 1 + fs/xfs/xfs_zone_alloc.c | 42 ++++++++++++++++++++++++++++------------- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index e1794e3e3156..ac344e42846c 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -455,6 +455,7 @@ DEFINE_EVENT(xfs_zone_alloc_class, name, \ xfs_extlen_t len), \ TP_ARGS(oz, rgbno, len)) DEFINE_ZONE_ALLOC_EVENT(xfs_zone_record_blocks); +DEFINE_ZONE_ALLOC_EVENT(xfs_zone_skip_blocks); DEFINE_ZONE_ALLOC_EVENT(xfs_zone_alloc_blocks); TRACE_EVENT(xfs_zone_gc_select_victim, diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 33f7eee521a8..f8bd6d741755 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -166,10 +166,9 @@ xfs_open_zone_mark_full( static void xfs_zone_record_blocks( struct xfs_trans *tp, - xfs_fsblock_t fsbno, - xfs_filblks_t len, struct xfs_open_zone *oz, - bool used) + xfs_fsblock_t fsbno, + xfs_filblks_t len) { struct xfs_mount *mp = tp->t_mountp; struct xfs_rtgroup *rtg = oz->oz_rtg; @@ -179,18 +178,37 @@ xfs_zone_record_blocks( xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP); - if (used) { - rmapip->i_used_blocks += len; - ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg)); - } else { - xfs_add_frextents(mp, len); - } + rmapip->i_used_blocks += len; + ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg)); oz->oz_written += len; if (oz->oz_written == rtg_blocks(rtg)) xfs_open_zone_mark_full(oz); xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE); } +/* + * Called for blocks that have been written to disk, but not actually linked to + * an inode, which can happen when garbage collection races with user data + * writes to a file. + */ +static void +xfs_zone_skip_blocks( + struct xfs_open_zone *oz, + xfs_filblks_t len) +{ + struct xfs_rtgroup *rtg = oz->oz_rtg; + + trace_xfs_zone_skip_blocks(oz, 0, len); + + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); + oz->oz_written += len; + if (oz->oz_written == rtg_blocks(rtg)) + xfs_open_zone_mark_full(oz); + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); + + xfs_add_frextents(rtg_mount(rtg), len); +} + static int xfs_zoned_map_extent( struct xfs_trans *tp, @@ -250,8 +268,7 @@ xfs_zoned_map_extent( } } - xfs_zone_record_blocks(tp, new->br_startblock, new->br_blockcount, oz, - true); + xfs_zone_record_blocks(tp, oz, new->br_startblock, new->br_blockcount); /* Map the new blocks into the data fork. */ xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, new); @@ -259,8 +276,7 @@ xfs_zoned_map_extent( skip: trace_xfs_reflink_cow_remap_skip(ip, new); - xfs_zone_record_blocks(tp, new->br_startblock, new->br_blockcount, oz, - false); + xfs_zone_skip_blocks(oz, new->br_blockcount); return 0; } From d02d2c98d25793902f65803ab853b592c7a96b29 Mon Sep 17 00:00:00 2001 From: Jiufei Xue Date: Mon, 28 Jul 2025 18:07:15 +0800 Subject: [PATCH 1785/2411] fs: writeback: fix use-after-free in __mark_inode_dirty() An use-after-free issue occurred when __mark_inode_dirty() get the bdi_writeback that was in the progress of switching. CPU: 1 PID: 562 Comm: systemd-random- Not tainted 6.6.56-gb4403bd46a8e #1 ...... pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __mark_inode_dirty+0x124/0x418 lr : __mark_inode_dirty+0x118/0x418 sp : ffffffc08c9dbbc0 ........ Call trace: __mark_inode_dirty+0x124/0x418 generic_update_time+0x4c/0x60 file_modified+0xcc/0xd0 ext4_buffered_write_iter+0x58/0x124 ext4_file_write_iter+0x54/0x704 vfs_write+0x1c0/0x308 ksys_write+0x74/0x10c __arm64_sys_write+0x1c/0x28 invoke_syscall+0x48/0x114 el0_svc_common.constprop.0+0xc0/0xe0 do_el0_svc+0x1c/0x28 el0_svc+0x40/0xe4 el0t_64_sync_handler+0x120/0x12c el0t_64_sync+0x194/0x198 Root cause is: systemd-random-seed kworker ---------------------------------------------------------------------- ___mark_inode_dirty inode_switch_wbs_work_fn spin_lock(&inode->i_lock); inode_attach_wb locked_inode_to_wb_and_lock_list get inode->i_wb spin_unlock(&inode->i_lock); spin_lock(&wb->list_lock) spin_lock(&inode->i_lock) inode_io_list_move_locked spin_unlock(&wb->list_lock) spin_unlock(&inode->i_lock) spin_lock(&old_wb->list_lock) inode_do_switch_wbs spin_lock(&inode->i_lock) inode->i_wb = new_wb spin_unlock(&inode->i_lock) spin_unlock(&old_wb->list_lock) wb_put_many(old_wb, nr_switched) cgwb_release old wb released wb_wakeup_delayed() accesses wb, then trigger the use-after-free issue Fix this race condition by holding inode spinlock until wb_wakeup_delayed() finished. Signed-off-by: Jiufei Xue Link: https://lore.kernel.org/20250728100715.3863241-1-jiufei.xue@samsung.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/fs-writeback.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index cc57367fb641..a07b8cf73ae2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2608,10 +2608,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) wakeup_bdi = inode_io_list_move_locked(inode, wb, dirty_list); - spin_unlock(&wb->list_lock); - spin_unlock(&inode->i_lock); - trace_writeback_dirty_inode_enqueue(inode); - /* * If this is the first dirty inode for this bdi, * we have to wake-up the corresponding bdi thread @@ -2621,6 +2617,11 @@ void __mark_inode_dirty(struct inode *inode, int flags) if (wakeup_bdi && (wb->bdi->capabilities & BDI_CAP_WRITEBACK)) wb_wakeup_delayed(wb); + + spin_unlock(&wb->list_lock); + spin_unlock(&inode->i_lock); + trace_writeback_dirty_inode_enqueue(inode); + return; } } From 9308366f062129d52e0ee3f7a019f7dd41db33df Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Fri, 8 Aug 2025 03:55:05 +1000 Subject: [PATCH 1786/2411] open_tree_attr: do not allow id-mapping changes without OPEN_TREE_CLONE As described in commit 7a54947e727b ('Merge patch series "fs: allow changing idmappings"'), open_tree_attr(2) was necessary in order to allow for a detached mount to be created and have its idmappings changed without the risk of any racing threads operating on it. For this reason, mount_setattr(2) still does not allow for id-mappings to be changed. However, there was a bug in commit 2462651ffa76 ("fs: allow changing idmappings") which allowed users to bypass this restriction by calling open_tree_attr(2) *without* OPEN_TREE_CLONE. can_idmap_mount() prevented this bug from allowing an attached mountpoint's id-mapping from being modified (thanks to an is_anon_ns() check), but this still allows for detached (but visible) mounts to have their be id-mapping changed. This risks the same UAF and locking issues as described in the merge commit, and was likely unintentional. Fixes: 2462651ffa76 ("fs: allow changing idmappings") Cc: stable@vger.kernel.org # v6.15+ Signed-off-by: Aleksa Sarai Link: https://lore.kernel.org/20250808-open_tree_attr-bugfix-idmap-v1-1-0ec7bc05646c@cyphar.com Signed-off-by: Christian Brauner --- fs/namespace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index ddfd4457d338..ceb6b57e6a57 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5176,7 +5176,8 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename, int ret; struct mount_kattr kattr = {}; - kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE; + if (flags & OPEN_TREE_CLONE) + kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE; if (flags & AT_RECURSIVE) kattr.kflags |= MOUNT_KATTR_RECURSE; From 81e4b9cf365df4cde30157a85cc9f3d673946118 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Fri, 8 Aug 2025 03:55:06 +1000 Subject: [PATCH 1787/2411] selftests/mount_setattr: add smoke tests for open_tree_attr(2) bug There appear to be no other open_tree_attr(2) tests at the moment, but as a minimal solution just add some additional checks in the existing MOUNT_ATTR_IDMAP tests to make sure that open_tree_attr(2) cannot be used to bypass the tested restrictions that apply to mount_setattr(2). Signed-off-by: Aleksa Sarai Link: https://lore.kernel.org/20250808-open_tree_attr-bugfix-idmap-v1-2-0ec7bc05646c@cyphar.com Signed-off-by: Christian Brauner --- .../mount_setattr/mount_setattr_test.c | 77 +++++++++++++++---- 1 file changed, 64 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index b1e4618399be..a688871a98eb 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -107,6 +107,26 @@ #endif #endif +#ifndef __NR_open_tree_attr + #if defined __alpha__ + #define __NR_open_tree_attr 577 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_open_tree_attr (467 + 4000) + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_open_tree_attr (467 + 6000) + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_open_tree_attr (467 + 5000) + #endif + #elif defined __ia64__ + #define __NR_open_tree_attr (467 + 1024) + #else + #define __NR_open_tree_attr 467 + #endif +#endif + #ifndef MOUNT_ATTR_IDMAP #define MOUNT_ATTR_IDMAP 0x00100000 #endif @@ -121,6 +141,12 @@ static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flag return syscall(__NR_mount_setattr, dfd, path, flags, attr, size); } +static inline int sys_open_tree_attr(int dfd, const char *path, unsigned int flags, + struct mount_attr *attr, size_t size) +{ + return syscall(__NR_open_tree_attr, dfd, path, flags, attr, size); +} + static ssize_t write_nointr(int fd, const void *buf, size_t count) { ssize_t ret; @@ -1222,6 +1248,12 @@ TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace) attr.userns_fd = get_userns_fd(0, 10000, 10000); ASSERT_GE(attr.userns_fd, 0); ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + /* + * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way + * to bypass this mount_setattr() restriction. + */ + ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); ASSERT_EQ(close(open_tree_fd), 0); } @@ -1255,6 +1287,12 @@ TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace) ASSERT_GE(attr.userns_fd, 0); ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + /* + * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way + * to bypass this mount_setattr() restriction. + */ + ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); ASSERT_EQ(close(open_tree_fd), 0); } @@ -1321,6 +1359,19 @@ TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace) ASSERT_EQ(close(open_tree_fd), 0); } +static bool expected_uid_gid(int dfd, const char *path, int flags, + uid_t expected_uid, gid_t expected_gid) +{ + int ret; + struct stat st; + + ret = fstatat(dfd, path, &st, flags); + if (ret < 0) + return false; + + return st.st_uid == expected_uid && st.st_gid == expected_gid; +} + /** * Validate that currently changing the idmapping of an idmapped mount fails. */ @@ -1331,6 +1382,8 @@ TEST_F(mount_setattr_idmapped, change_idmapping) .attr_set = MOUNT_ATTR_IDMAP, }; + ASSERT_TRUE(expected_uid_gid(-EBADF, "/mnt/D", 0, 0, 0)); + if (!mount_setattr_supported()) SKIP(return, "mount_setattr syscall not supported"); @@ -1348,27 +1401,25 @@ TEST_F(mount_setattr_idmapped, change_idmapping) AT_EMPTY_PATH, &attr, sizeof(attr)), 0); ASSERT_EQ(close(attr.userns_fd), 0); + EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 0, 0)); + EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000)); + /* Change idmapping on a detached mount that is already idmapped. */ attr.userns_fd = get_userns_fd(0, 20000, 10000); ASSERT_GE(attr.userns_fd, 0); ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + /* + * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way + * to bypass this mount_setattr() restriction. + */ + EXPECT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 20000, 20000)); + EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000)); + ASSERT_EQ(close(attr.userns_fd), 0); ASSERT_EQ(close(open_tree_fd), 0); } -static bool expected_uid_gid(int dfd, const char *path, int flags, - uid_t expected_uid, gid_t expected_gid) -{ - int ret; - struct stat st; - - ret = fstatat(dfd, path, &st, flags); - if (ret < 0) - return false; - - return st.st_uid == expected_uid && st.st_gid == expected_gid; -} - TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid) { int open_tree_fd = -EBADF; From 6b65028e2b51c023a816eabffea88980fdd5564e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 30 Jul 2025 12:28:41 +0200 Subject: [PATCH 1788/2411] iomap: Fix broken data integrity guarantees for O_SYNC writes Commit d279c80e0bac ("iomap: inline iomap_dio_bio_opflags()") has broken the logic in iomap_dio_bio_iter() in a way that when the device does support FUA (or has no writeback cache) and the direct IO happens to freshly allocated or unwritten extents, we will *not* issue fsync after completing direct IO O_SYNC / O_DSYNC write because the IOMAP_DIO_WRITE_THROUGH flag stays mistakenly set. Fix the problem by clearing IOMAP_DIO_WRITE_THROUGH whenever we do not perform FUA write as it was originally intended. CC: John Garry CC: Ritesh Harjani (IBM) Fixes: d279c80e0bac ("iomap: inline iomap_dio_bio_opflags()") CC: stable@vger.kernel.org Signed-off-by: Jan Kara Link: https://lore.kernel.org/20250730102840.20470-2-jack@suse.cz Reviewed-by: Ritesh Harjani (IBM) Reviewed-by: John Garry Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/iomap/direct-io.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 6f25d4cfea9f..b84f6af2eb4c 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -363,14 +363,14 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio) if (iomap->flags & IOMAP_F_SHARED) dio->flags |= IOMAP_DIO_COW; - if (iomap->flags & IOMAP_F_NEW) { + if (iomap->flags & IOMAP_F_NEW) need_zeroout = true; - } else if (iomap->type == IOMAP_MAPPED) { - if (iomap_dio_can_use_fua(iomap, dio)) - bio_opf |= REQ_FUA; - else - dio->flags &= ~IOMAP_DIO_WRITE_THROUGH; - } + else if (iomap->type == IOMAP_MAPPED && + iomap_dio_can_use_fua(iomap, dio)) + bio_opf |= REQ_FUA; + + if (!(bio_opf & REQ_FUA)) + dio->flags &= ~IOMAP_DIO_WRITE_THROUGH; /* * We can only do deferred completion for pure overwrites that From 542ede096e48436dbd70869640c0d88180565933 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Thu, 7 Aug 2025 10:50:15 -0700 Subject: [PATCH 1789/2411] fuse: keep inode->i_blkbits constant With fuse now using iomap for writeback handling, inode blkbits changes are problematic because iomap relies on inode->i_blkbits for its internal bitmap logic. Currently we change inode->i_blkbits in fuse to match the attr->blksize value passed in by the server. This commit keeps inode->i_blkbits constant in fuse. Any attr->blksize values passed in by the server will not update inode->i_blkbits. The client-side behavior for stat is unaffected, stat will still reflect the blocksize passed in by the server. Signed-off-by: Joanne Koong Link: https://lore.kernel.org/20250807175015.515192-1-joannelkoong@gmail.com Fixes: ef7e7cbb32 ("fuse: use iomap for writeback") Signed-off-by: Christian Brauner --- fs/fuse/inode.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index ecb869e895ab..67c2318bfc42 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -289,11 +289,6 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, } } - if (attr->blksize != 0) - inode->i_blkbits = ilog2(attr->blksize); - else - inode->i_blkbits = inode->i_sb->s_blocksize_bits; - /* * Don't set the sticky bit in i_mode, unless we want the VFS * to check permissions. This prevents failures due to the From 2319f9d0aa644eb9666c7be903078f50ecc2eb5b Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Mon, 11 Aug 2025 09:49:57 +0200 Subject: [PATCH 1790/2411] selftests/coredump: Remove the read() that fails the test Resolve a conflict between commit 6a68d28066b6 ("selftests/coredump: Fix "socket_detect_userspace_client" test failure") and commit 994dc26302ed ("selftests/coredump: fix build") The first commit adds a read() to wait for write() from another thread to finish. But the second commit removes the write(). Now that the two commits are in the same tree, the read() now gets EOF and the test fails. Remove this read() so that the test passes. Signed-off-by: Nam Cao Link: https://lore.kernel.org/20250811074957.4079616-1-namcao@linutronix.de Signed-off-by: Christian Brauner --- tools/testing/selftests/coredump/stackdump_test.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 5a5a7a5f7e1d..a4ac80bb1003 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -446,9 +446,6 @@ TEST_F(coredump, socket_detect_userspace_client) if (info.coredump_mask & PIDFD_COREDUMPED) goto out; - if (read(fd_coredump, &c, 1) < 1) - goto out; - exit_code = EXIT_SUCCESS; out: if (fd_peer_pidfd >= 0) From d5dd409812eca084e68208926bb629c8f708651f Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 5 Jun 2025 12:38:52 +0200 Subject: [PATCH 1791/2411] drbd: Remove the open-coded page pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the network stack keeps a reference for too long, DRBD keeps references on a higher number of pages as a consequence. Fix all that by no longer relying on page reference counts dropping to an expected value. Instead, DRBD gives up its reference and lets the system handle everything else. While at it, remove the open-coded custom page pool mechanism and use the page_pool included in the kernel. Signed-off-by: Philipp Reisner Signed-off-by: Christoph Böhmwalder Tested-by: Eric Hagberg Link: https://lore.kernel.org/r/20250605103852.23029-1-christoph.boehmwalder@linbit.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 39 +---- drivers/block/drbd/drbd_main.c | 59 ++----- drivers/block/drbd/drbd_receiver.c | 262 ++++------------------------- drivers/block/drbd/drbd_worker.c | 56 ++---- 4 files changed, 70 insertions(+), 346 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e21492981f7d..f6d6276974ee 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -380,6 +380,9 @@ enum { /* this is/was a write request */ __EE_WRITE, + /* hand back using mempool_free(e, drbd_buffer_page_pool) */ + __EE_RELEASE_TO_MEMPOOL, + /* this is/was a write same request */ __EE_WRITE_SAME, @@ -402,6 +405,7 @@ enum { #define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE) #define EE_SUBMITTED (1<<__EE_SUBMITTED) #define EE_WRITE (1<<__EE_WRITE) +#define EE_RELEASE_TO_MEMPOOL (1<<__EE_RELEASE_TO_MEMPOOL) #define EE_WRITE_SAME (1<<__EE_WRITE_SAME) #define EE_APPLICATION (1<<__EE_APPLICATION) #define EE_RS_THIN_REQ (1<<__EE_RS_THIN_REQ) @@ -858,7 +862,6 @@ struct drbd_device { struct list_head sync_ee; /* IO in progress (P_RS_DATA_REPLY gets written to disk) */ struct list_head done_ee; /* need to send P_WRITE_ACK */ struct list_head read_ee; /* [RS]P_DATA_REQUEST being read */ - struct list_head net_ee; /* zero-copy network send in progress */ struct list_head resync_reads; atomic_t pp_in_use; /* allocated from page pool */ @@ -1329,24 +1332,6 @@ extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ extern mempool_t drbd_request_mempool; extern mempool_t drbd_ee_mempool; -/* drbd's page pool, used to buffer data received from the peer, - * or data requested by the peer. - * - * This does not have an emergency reserve. - * - * When allocating from this pool, it first takes pages from the pool. - * Only if the pool is depleted will try to allocate from the system. - * - * The assumption is that pages taken from this pool will be processed, - * and given back, "quickly", and then can be recycled, so we can avoid - * frequent calls to alloc_page(), and still will be able to make progress even - * under memory pressure. - */ -extern struct page *drbd_pp_pool; -extern spinlock_t drbd_pp_lock; -extern int drbd_pp_vacant; -extern wait_queue_head_t drbd_pp_wait; - /* We also need a standard (emergency-reserve backed) page pool * for meta data IO (activity log, bitmap). * We can keep it global, as long as it is used as "N pages at a time". @@ -1354,6 +1339,7 @@ extern wait_queue_head_t drbd_pp_wait; */ #define DRBD_MIN_POOL_PAGES 128 extern mempool_t drbd_md_io_page_pool; +extern mempool_t drbd_buffer_page_pool; /* We also need to make sure we get a bio * when we need it for housekeeping purposes */ @@ -1488,10 +1474,7 @@ extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, sector_t, unsigned int, unsigned int, gfp_t) __must_hold(local); -extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *, - int); -#define drbd_free_peer_req(m,e) __drbd_free_peer_req(m, e, 0) -#define drbd_free_net_peer_req(m,e) __drbd_free_peer_req(m, e, 1) +extern void drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *req); extern struct page *drbd_alloc_pages(struct drbd_peer_device *, unsigned int, bool); extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed); extern int drbd_connected(struct drbd_peer_device *); @@ -1610,16 +1593,6 @@ static inline struct page *page_chain_next(struct page *page) for (; page && ({ n = page_chain_next(page); 1; }); page = n) -static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_req) -{ - struct page *page = peer_req->pages; - page_chain_for_each(page) { - if (page_count(page) > 1) - return 1; - } - return 0; -} - static inline union drbd_state drbd_read_state(struct drbd_device *device) { struct drbd_resource *resource = device->resource; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 52724b79be30..c73376886e7a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -114,20 +114,10 @@ struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ mempool_t drbd_request_mempool; mempool_t drbd_ee_mempool; mempool_t drbd_md_io_page_pool; +mempool_t drbd_buffer_page_pool; struct bio_set drbd_md_io_bio_set; struct bio_set drbd_io_bio_set; -/* I do not use a standard mempool, because: - 1) I want to hand out the pre-allocated objects first. - 2) I want to be able to interrupt sleeping allocation with a signal. - Note: This is a single linked list, the next pointer is the private - member of struct page. - */ -struct page *drbd_pp_pool; -DEFINE_SPINLOCK(drbd_pp_lock); -int drbd_pp_vacant; -wait_queue_head_t drbd_pp_wait; - DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5); static const struct block_device_operations drbd_ops = { @@ -1611,6 +1601,7 @@ static int _drbd_send_zc_bio(struct drbd_peer_device *peer_device, struct bio *b static int _drbd_send_zc_ee(struct drbd_peer_device *peer_device, struct drbd_peer_request *peer_req) { + bool use_sendpage = !(peer_req->flags & EE_RELEASE_TO_MEMPOOL); struct page *page = peer_req->pages; unsigned len = peer_req->i.size; int err; @@ -1619,8 +1610,13 @@ static int _drbd_send_zc_ee(struct drbd_peer_device *peer_device, page_chain_for_each(page) { unsigned l = min_t(unsigned, len, PAGE_SIZE); - err = _drbd_send_page(peer_device, page, 0, l, - page_chain_next(page) ? MSG_MORE : 0); + if (likely(use_sendpage)) + err = _drbd_send_page(peer_device, page, 0, l, + page_chain_next(page) ? MSG_MORE : 0); + else + err = _drbd_no_send_page(peer_device, page, 0, l, + page_chain_next(page) ? MSG_MORE : 0); + if (err) return err; len -= l; @@ -1962,7 +1958,6 @@ void drbd_init_set_defaults(struct drbd_device *device) INIT_LIST_HEAD(&device->sync_ee); INIT_LIST_HEAD(&device->done_ee); INIT_LIST_HEAD(&device->read_ee); - INIT_LIST_HEAD(&device->net_ee); INIT_LIST_HEAD(&device->resync_reads); INIT_LIST_HEAD(&device->resync_work.list); INIT_LIST_HEAD(&device->unplug_work.list); @@ -2043,7 +2038,6 @@ void drbd_device_cleanup(struct drbd_device *device) D_ASSERT(device, list_empty(&device->sync_ee)); D_ASSERT(device, list_empty(&device->done_ee)); D_ASSERT(device, list_empty(&device->read_ee)); - D_ASSERT(device, list_empty(&device->net_ee)); D_ASSERT(device, list_empty(&device->resync_reads)); D_ASSERT(device, list_empty(&first_peer_device(device)->connection->sender_work.q)); D_ASSERT(device, list_empty(&device->resync_work.list)); @@ -2055,19 +2049,11 @@ void drbd_device_cleanup(struct drbd_device *device) static void drbd_destroy_mempools(void) { - struct page *page; - - while (drbd_pp_pool) { - page = drbd_pp_pool; - drbd_pp_pool = (struct page *)page_private(page); - __free_page(page); - drbd_pp_vacant--; - } - /* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */ bioset_exit(&drbd_io_bio_set); bioset_exit(&drbd_md_io_bio_set); + mempool_exit(&drbd_buffer_page_pool); mempool_exit(&drbd_md_io_page_pool); mempool_exit(&drbd_ee_mempool); mempool_exit(&drbd_request_mempool); @@ -2086,9 +2072,8 @@ static void drbd_destroy_mempools(void) static int drbd_create_mempools(void) { - struct page *page; const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count; - int i, ret; + int ret; /* caches */ drbd_request_cache = kmem_cache_create( @@ -2125,6 +2110,10 @@ static int drbd_create_mempools(void) if (ret) goto Enomem; + ret = mempool_init_page_pool(&drbd_buffer_page_pool, number, 0); + if (ret) + goto Enomem; + ret = mempool_init_slab_pool(&drbd_request_mempool, number, drbd_request_cache); if (ret) @@ -2134,15 +2123,6 @@ static int drbd_create_mempools(void) if (ret) goto Enomem; - for (i = 0; i < number; i++) { - page = alloc_page(GFP_HIGHUSER); - if (!page) - goto Enomem; - set_page_private(page, (unsigned long)drbd_pp_pool); - drbd_pp_pool = page; - } - drbd_pp_vacant = number; - return 0; Enomem: @@ -2169,10 +2149,6 @@ static void drbd_release_all_peer_reqs(struct drbd_device *device) rr = drbd_free_peer_reqs(device, &device->done_ee); if (rr) drbd_err(device, "%d EEs in done list found!\n", rr); - - rr = drbd_free_peer_reqs(device, &device->net_ee); - if (rr) - drbd_err(device, "%d EEs in net list found!\n", rr); } /* caution. no locking. */ @@ -2863,11 +2839,6 @@ static int __init drbd_init(void) return err; } - /* - * allocate all necessary structs - */ - init_waitqueue_head(&drbd_pp_wait); - drbd_proc = NULL; /* play safe for drbd_cleanup */ idr_init(&drbd_devices); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 975024cf03c5..caaf2781136d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "drbd_int.h" #include "drbd_protocol.h" #include "drbd_req.h" @@ -63,182 +64,31 @@ static int e_end_block(struct drbd_work *, int); #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) -/* - * some helper functions to deal with single linked page lists, - * page->private being our "next" pointer. - */ - -/* If at least n pages are linked at head, get n pages off. - * Otherwise, don't modify head, and return NULL. - * Locking is the responsibility of the caller. - */ -static struct page *page_chain_del(struct page **head, int n) -{ - struct page *page; - struct page *tmp; - - BUG_ON(!n); - BUG_ON(!head); - - page = *head; - - if (!page) - return NULL; - - while (page) { - tmp = page_chain_next(page); - if (--n == 0) - break; /* found sufficient pages */ - if (tmp == NULL) - /* insufficient pages, don't use any of them. */ - return NULL; - page = tmp; - } - - /* add end of list marker for the returned list */ - set_page_private(page, 0); - /* actual return value, and adjustment of head */ - page = *head; - *head = tmp; - return page; -} - -/* may be used outside of locks to find the tail of a (usually short) - * "private" page chain, before adding it back to a global chain head - * with page_chain_add() under a spinlock. */ -static struct page *page_chain_tail(struct page *page, int *len) -{ - struct page *tmp; - int i = 1; - while ((tmp = page_chain_next(page))) { - ++i; - page = tmp; - } - if (len) - *len = i; - return page; -} - -static int page_chain_free(struct page *page) -{ - struct page *tmp; - int i = 0; - page_chain_for_each_safe(page, tmp) { - put_page(page); - ++i; - } - return i; -} - -static void page_chain_add(struct page **head, - struct page *chain_first, struct page *chain_last) -{ -#if 1 - struct page *tmp; - tmp = page_chain_tail(chain_first, NULL); - BUG_ON(tmp != chain_last); -#endif - - /* add chain to head */ - set_page_private(chain_last, (unsigned long)*head); - *head = chain_first; -} - -static struct page *__drbd_alloc_pages(struct drbd_device *device, - unsigned int number) +static struct page *__drbd_alloc_pages(unsigned int number) { struct page *page = NULL; struct page *tmp = NULL; unsigned int i = 0; - /* Yes, testing drbd_pp_vacant outside the lock is racy. - * So what. It saves a spin_lock. */ - if (drbd_pp_vacant >= number) { - spin_lock(&drbd_pp_lock); - page = page_chain_del(&drbd_pp_pool, number); - if (page) - drbd_pp_vacant -= number; - spin_unlock(&drbd_pp_lock); - if (page) - return page; - } - /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD * "criss-cross" setup, that might cause write-out on some other DRBD, * which in turn might block on the other node at this very place. */ for (i = 0; i < number; i++) { - tmp = alloc_page(GFP_TRY); + tmp = mempool_alloc(&drbd_buffer_page_pool, GFP_TRY); if (!tmp) - break; + goto fail; set_page_private(tmp, (unsigned long)page); page = tmp; } - - if (i == number) - return page; - - /* Not enough pages immediately available this time. - * No need to jump around here, drbd_alloc_pages will retry this - * function "soon". */ - if (page) { - tmp = page_chain_tail(page, NULL); - spin_lock(&drbd_pp_lock); - page_chain_add(&drbd_pp_pool, page, tmp); - drbd_pp_vacant += i; - spin_unlock(&drbd_pp_lock); + return page; +fail: + page_chain_for_each_safe(page, tmp) { + set_page_private(page, 0); + mempool_free(page, &drbd_buffer_page_pool); } return NULL; } -static void reclaim_finished_net_peer_reqs(struct drbd_device *device, - struct list_head *to_be_freed) -{ - struct drbd_peer_request *peer_req, *tmp; - - /* The EEs are always appended to the end of the list. Since - they are sent in order over the wire, they have to finish - in order. As soon as we see the first not finished we can - stop to examine the list... */ - - list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) { - if (drbd_peer_req_has_active_page(peer_req)) - break; - list_move(&peer_req->w.list, to_be_freed); - } -} - -static void drbd_reclaim_net_peer_reqs(struct drbd_device *device) -{ - LIST_HEAD(reclaimed); - struct drbd_peer_request *peer_req, *t; - - spin_lock_irq(&device->resource->req_lock); - reclaim_finished_net_peer_reqs(device, &reclaimed); - spin_unlock_irq(&device->resource->req_lock); - list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) - drbd_free_net_peer_req(device, peer_req); -} - -static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection) -{ - struct drbd_peer_device *peer_device; - int vnr; - - rcu_read_lock(); - idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { - struct drbd_device *device = peer_device->device; - if (!atomic_read(&device->pp_in_use_by_net)) - continue; - - kref_get(&device->kref); - rcu_read_unlock(); - drbd_reclaim_net_peer_reqs(device); - kref_put(&device->kref, drbd_destroy_device); - rcu_read_lock(); - } - rcu_read_unlock(); -} - /** * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled) * @peer_device: DRBD device. @@ -263,9 +113,8 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int bool retry) { struct drbd_device *device = peer_device->device; - struct page *page = NULL; + struct page *page; struct net_conf *nc; - DEFINE_WAIT(wait); unsigned int mxb; rcu_read_lock(); @@ -273,37 +122,9 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int mxb = nc ? nc->max_buffers : 1000000; rcu_read_unlock(); - if (atomic_read(&device->pp_in_use) < mxb) - page = __drbd_alloc_pages(device, number); - - /* Try to keep the fast path fast, but occasionally we need - * to reclaim the pages we lended to the network stack. */ - if (page && atomic_read(&device->pp_in_use_by_net) > 512) - drbd_reclaim_net_peer_reqs(device); - - while (page == NULL) { - prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE); - - drbd_reclaim_net_peer_reqs(device); - - if (atomic_read(&device->pp_in_use) < mxb) { - page = __drbd_alloc_pages(device, number); - if (page) - break; - } - - if (!retry) - break; - - if (signal_pending(current)) { - drbd_warn(device, "drbd_alloc_pages interrupted!\n"); - break; - } - - if (schedule_timeout(HZ/10) == 0) - mxb = UINT_MAX; - } - finish_wait(&drbd_pp_wait, &wait); + if (atomic_read(&device->pp_in_use) >= mxb) + schedule_timeout_interruptible(HZ / 10); + page = __drbd_alloc_pages(number); if (page) atomic_add(number, &device->pp_in_use); @@ -314,29 +135,25 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int * Is also used from inside an other spin_lock_irq(&resource->req_lock); * Either links the page chain back to the global pool, * or returns all pages to the system. */ -static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net) +static void drbd_free_pages(struct drbd_device *device, struct page *page) { - atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use; - int i; + struct page *tmp; + int i = 0; if (page == NULL) return; - if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count) - i = page_chain_free(page); - else { - struct page *tmp; - tmp = page_chain_tail(page, &i); - spin_lock(&drbd_pp_lock); - page_chain_add(&drbd_pp_pool, page, tmp); - drbd_pp_vacant += i; - spin_unlock(&drbd_pp_lock); + page_chain_for_each_safe(page, tmp) { + set_page_private(page, 0); + if (page_count(page) == 1) + mempool_free(page, &drbd_buffer_page_pool); + else + put_page(page); + i++; } - i = atomic_sub_return(i, a); + i = atomic_sub_return(i, &device->pp_in_use); if (i < 0) - drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n", - is_net ? "pp_in_use_by_net" : "pp_in_use", i); - wake_up(&drbd_pp_wait); + drbd_warn(device, "ASSERTION FAILED: pp_in_use: %d < 0\n", i); } /* @@ -380,6 +197,8 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto gfpflags_allow_blocking(gfp_mask)); if (!page) goto fail; + if (!mempool_is_saturated(&drbd_buffer_page_pool)) + peer_req->flags |= EE_RELEASE_TO_MEMPOOL; } memset(peer_req, 0, sizeof(*peer_req)); @@ -403,13 +222,12 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto return NULL; } -void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req, - int is_net) +void drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req) { might_sleep(); if (peer_req->flags & EE_HAS_DIGEST) kfree(peer_req->digest); - drbd_free_pages(device, peer_req->pages, is_net); + drbd_free_pages(device, peer_req->pages); D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0); D_ASSERT(device, drbd_interval_empty(&peer_req->i)); if (!expect(device, !(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) { @@ -424,14 +242,13 @@ int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list) LIST_HEAD(work_list); struct drbd_peer_request *peer_req, *t; int count = 0; - int is_net = list == &device->net_ee; spin_lock_irq(&device->resource->req_lock); list_splice_init(list, &work_list); spin_unlock_irq(&device->resource->req_lock); list_for_each_entry_safe(peer_req, t, &work_list, w.list) { - __drbd_free_peer_req(device, peer_req, is_net); + drbd_free_peer_req(device, peer_req); count++; } return count; @@ -443,18 +260,13 @@ int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list) static int drbd_finish_peer_reqs(struct drbd_device *device) { LIST_HEAD(work_list); - LIST_HEAD(reclaimed); struct drbd_peer_request *peer_req, *t; int err = 0; spin_lock_irq(&device->resource->req_lock); - reclaim_finished_net_peer_reqs(device, &reclaimed); list_splice_init(&device->done_ee, &work_list); spin_unlock_irq(&device->resource->req_lock); - list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) - drbd_free_net_peer_req(device, peer_req); - /* possible callbacks here: * e_end_block, and e_end_resync_block, e_send_superseded. * all ignore the last argument. @@ -1975,7 +1787,7 @@ static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size) data_size -= len; } kunmap(page); - drbd_free_pages(peer_device->device, page, 0); + drbd_free_pages(peer_device->device, page); return err; } @@ -5224,16 +5036,6 @@ static int drbd_disconnected(struct drbd_peer_device *peer_device) put_ldev(device); } - /* tcp_close and release of sendpage pages can be deferred. I don't - * want to use SO_LINGER, because apparently it can be deferred for - * more than 20 seconds (longest time I checked). - * - * Actually we don't care for exactly when the network stack does its - * put_page(), but release our reference on these pages right here. - */ - i = drbd_free_peer_reqs(device, &device->net_ee); - if (i) - drbd_info(device, "net_ee not empty, killed %u entries\n", i); i = atomic_read(&device->pp_in_use_by_net); if (i) drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i); @@ -5980,8 +5782,6 @@ int drbd_ack_receiver(struct drbd_thread *thi) while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); - conn_reclaim_net_peer_reqs(connection); - if (test_and_clear_bit(SEND_PING, &connection->flags)) { if (drbd_send_ping(connection)) { drbd_err(connection, "drbd_send_ping has failed\n"); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a6ea737b3b71..dea3e79d044f 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1030,22 +1030,6 @@ int drbd_resync_finished(struct drbd_peer_device *peer_device) return 1; } -/* helper */ -static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req) -{ - if (drbd_peer_req_has_active_page(peer_req)) { - /* This might happen if sendpage() has not finished */ - int i = PFN_UP(peer_req->i.size); - atomic_add(i, &device->pp_in_use_by_net); - atomic_sub(i, &device->pp_in_use); - spin_lock_irq(&device->resource->req_lock); - list_add_tail(&peer_req->w.list, &device->net_ee); - spin_unlock_irq(&device->resource->req_lock); - wake_up(&drbd_pp_wait); - } else - drbd_free_peer_req(device, peer_req); -} - /** * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST * @w: work object. @@ -1059,9 +1043,8 @@ int w_e_end_data_req(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - drbd_free_peer_req(device, peer_req); - dec_unacked(device); - return 0; + err = 0; + goto out; } if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { @@ -1074,12 +1057,12 @@ int w_e_end_data_req(struct drbd_work *w, int cancel) err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req); } - dec_unacked(device); - - move_to_net_ee_or_free(device, peer_req); - if (unlikely(err)) drbd_err(device, "drbd_send_block() failed\n"); +out: + dec_unacked(device); + drbd_free_peer_req(device, peer_req); + return err; } @@ -1120,9 +1103,8 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - drbd_free_peer_req(device, peer_req); - dec_unacked(device); - return 0; + err = 0; + goto out; } if (get_ldev_if_state(device, D_FAILED)) { @@ -1155,13 +1137,12 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) /* update resync data with failure */ drbd_rs_failed_io(peer_device, peer_req->i.sector, peer_req->i.size); } - - dec_unacked(device); - - move_to_net_ee_or_free(device, peer_req); - if (unlikely(err)) drbd_err(device, "drbd_send_block() failed\n"); +out: + dec_unacked(device); + drbd_free_peer_req(device, peer_req); + return err; } @@ -1176,9 +1157,8 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) int err, eq = 0; if (unlikely(cancel)) { - drbd_free_peer_req(device, peer_req); - dec_unacked(device); - return 0; + err = 0; + goto out; } if (get_ldev(device)) { @@ -1220,12 +1200,12 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) if (drbd_ratelimit()) drbd_err(device, "Sending NegDReply. I guess it gets messy.\n"); } - - dec_unacked(device); - move_to_net_ee_or_free(device, peer_req); - if (unlikely(err)) drbd_err(device, "drbd_send_block/ack() failed\n"); +out: + dec_unacked(device); + drbd_free_peer_req(device, peer_req); + return err; } From 212c928d01e9ea1d1c46a114650b551da8ca823e Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Fri, 8 Aug 2025 15:44:43 -0600 Subject: [PATCH 1792/2411] ublk: don't quiesce in ublk_ch_release ublk_ch_release currently quiesces the device's request_queue while setting force_abort/fail_io. This avoids data races by preventing concurrent reads from the I/O path, but is not strictly needed - at this point, canceling is already set and guaranteed to be observed by any concurrently executing I/Os, so they will be handled properly even if the changes to force_abort/fail_io propagate to the I/O path later. Remove the quiesce/unquiesce calls from ublk_ch_release. This makes the writes to force_abort/fail_io concurrent with the reads in the I/O path, so make the accesses atomic. Before this change, the call to blk_mq_quiesce_queue was responsible for most (90%) of the runtime of ublk_ch_release. With that call eliminated, ublk_ch_release runs much faster. Here is a comparison of the total time spent in calls to ublk_ch_release when a server handling 128 devices exits, before and after this change: before: 1.11s after: 0.09s Signed-off-by: Uday Shankar Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250808-ublk_quiesce2-v1-1-f87ade33fa3d@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 6561d2a561fa..6b95cf48ae77 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1389,7 +1389,7 @@ static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq, { blk_status_t res; - if (unlikely(ubq->fail_io)) + if (unlikely(READ_ONCE(ubq->fail_io))) return BLK_STS_TARGET; /* With recovery feature enabled, force_abort is set in @@ -1401,7 +1401,8 @@ static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq, * Note: force_abort is guaranteed to be seen because it is set * before request queue is unqiuesced. */ - if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) + if (ublk_nosrv_should_queue_io(ubq) && + unlikely(READ_ONCE(ubq->force_abort))) return BLK_STS_IOERR; if (check_cancel && unlikely(ubq->canceling)) @@ -1644,7 +1645,6 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) * Transition the device to the nosrv state. What exactly this * means depends on the recovery flags */ - blk_mq_quiesce_queue(disk->queue); if (ublk_nosrv_should_stop_dev(ub)) { /* * Allow any pending/future I/O to pass through quickly @@ -1652,8 +1652,7 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) * waits for all pending I/O to complete */ for (i = 0; i < ub->dev_info.nr_hw_queues; i++) - ublk_get_queue(ub, i)->force_abort = true; - blk_mq_unquiesce_queue(disk->queue); + WRITE_ONCE(ublk_get_queue(ub, i)->force_abort, true); ublk_stop_dev_unlocked(ub); } else { @@ -1663,9 +1662,8 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) } else { ub->dev_info.state = UBLK_S_DEV_FAIL_IO; for (i = 0; i < ub->dev_info.nr_hw_queues; i++) - ublk_get_queue(ub, i)->fail_io = true; + WRITE_ONCE(ublk_get_queue(ub, i)->fail_io, true); } - blk_mq_unquiesce_queue(disk->queue); } unlock: mutex_unlock(&ub->mutex); From 5058a62875e1916e5133a1639f0207ea2148c0bc Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 8 Aug 2025 09:52:15 -0600 Subject: [PATCH 1793/2411] ublk: check for unprivileged daemon on each I/O fetch Commit ab03a61c6614 ("ublk: have a per-io daemon instead of a per-queue daemon") allowed each ublk I/O to have an independent daemon task. However, nr_privileged_daemon is only computed based on whether the last I/O fetched in each ublk queue has an unprivileged daemon task. Fix this by checking whether every fetched I/O's daemon is privileged. Change nr_privileged_daemon from a count of queues to a boolean indicating whether any I/Os have an unprivileged daemon. Signed-off-by: Caleb Sander Mateos Fixes: ab03a61c6614 ("ublk: have a per-io daemon instead of a per-queue daemon") Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250808155216.296170-1-csander@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 6b95cf48ae77..99abd67b708b 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -235,7 +235,7 @@ struct ublk_device { struct completion completion; unsigned int nr_queues_ready; - unsigned int nr_privileged_daemon; + bool unprivileged_daemons; struct mutex cancel_mutex; bool canceling; pid_t ublksrv_tgid; @@ -1551,7 +1551,7 @@ static void ublk_reset_ch_dev(struct ublk_device *ub) /* set to NULL, otherwise new tasks cannot mmap io_cmd_buf */ ub->mm = NULL; ub->nr_queues_ready = 0; - ub->nr_privileged_daemon = 0; + ub->unprivileged_daemons = false; ub->ublksrv_tgid = -1; } @@ -1978,12 +1978,10 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) __must_hold(&ub->mutex) { ubq->nr_io_ready++; - if (ublk_queue_ready(ubq)) { + if (ublk_queue_ready(ubq)) ub->nr_queues_ready++; - - if (capable(CAP_SYS_ADMIN)) - ub->nr_privileged_daemon++; - } + if (!ub->unprivileged_daemons && !capable(CAP_SYS_ADMIN)) + ub->unprivileged_daemons = true; if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) { /* now we are ready for handling ublk io request */ @@ -2878,8 +2876,8 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, ublk_apply_params(ub); - /* don't probe partitions if any one ubq daemon is un-trusted */ - if (ub->nr_privileged_daemon != ub->nr_queues_ready) + /* don't probe partitions if any daemon task is un-trusted */ + if (ub->unprivileged_daemons) set_bit(GD_SUPPRESS_PART_SCAN, &disk->state); ublk_get_device(ub); From 8f3e4e87b0945aeea8b5a5aa43c419f4a1b4ca6a Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Mon, 11 Aug 2025 16:11:35 +0800 Subject: [PATCH 1794/2411] block, bfq: remove redundant __GFP_NOWARN Commit 16f5dfbc851b ("gfp: include __GFP_NOWARN in GFP_NOWAIT") made GFP_NOWAIT implicitly include __GFP_NOWARN. Therefore, explicit __GFP_NOWARN combined with GFP_NOWAIT (e.g., `GFP_NOWAIT | __GFP_NOWARN`) is now redundant. Let's clean up these redundant flags across subsystems. Reviewed-by: Yu Kuai Signed-off-by: Qianfeng Rong Link: https://lore.kernel.org/r/20250811081135.374315-1-rongqianfeng@vivo.com Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 3bf76902f07f..50e51047e1fe 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -5847,8 +5847,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, goto out; } - bfqq = kmem_cache_alloc_node(bfq_pool, - GFP_NOWAIT | __GFP_ZERO | __GFP_NOWARN, + bfqq = kmem_cache_alloc_node(bfq_pool, GFP_NOWAIT | __GFP_ZERO, bfqd->queue->node); if (bfqq) { From 196447c712dd486f4315356c572a1d13dd743f08 Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Sat, 9 Aug 2025 22:13:58 +0800 Subject: [PATCH 1795/2411] blk-cgroup: remove redundant __GFP_NOWARN Commit 16f5dfbc851b ("gfp: include __GFP_NOWARN in GFP_NOWAIT") made GFP_NOWAIT implicitly include __GFP_NOWARN. Therefore, explicit __GFP_NOWARN combined with GFP_NOWAIT (e.g., `GFP_NOWAIT | __GFP_NOWARN`) is now redundant. Let's clean up these redundant flags across subsystems. Signed-off-by: Qianfeng Rong Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20250809141358.168781-1-rongqianfeng@vivo.com Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 5936db7f8475..fe9ebd6a2e14 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -394,7 +394,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk, /* allocate */ if (!new_blkg) { - new_blkg = blkg_alloc(blkcg, disk, GFP_NOWAIT | __GFP_NOWARN); + new_blkg = blkg_alloc(blkcg, disk, GFP_NOWAIT); if (unlikely(!new_blkg)) { ret = -ENOMEM; goto err_put_css; @@ -1467,7 +1467,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) spin_lock_init(&blkcg->lock); refcount_set(&blkcg->online_pin, 1); - INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT | __GFP_NOWARN); + INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT); INIT_HLIST_HEAD(&blkcg->blkg_list); #ifdef CONFIG_CGROUP_WRITEBACK INIT_LIST_HEAD(&blkcg->cgwb_list); @@ -1630,7 +1630,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol) pd_prealloc = NULL; } else { pd = pol->pd_alloc_fn(disk, blkg->blkcg, - GFP_NOWAIT | __GFP_NOWARN); + GFP_NOWAIT); } if (!pd) { From 343dc5423bfe876c12bb80c56f5e44286e442a07 Mon Sep 17 00:00:00 2001 From: Zheng Qixing Date: Fri, 8 Aug 2025 13:36:09 +0800 Subject: [PATCH 1796/2411] block: fix kobject double initialization in add_disk Device-mapper can call add_disk() multiple times for the same gendisk due to its two-phase creation process (dm create + dm load). This leads to kobject double initialization errors when the underlying iSCSI devices become temporarily unavailable and then reappear. However, if the first add_disk() call fails and is retried, the queue_kobj gets initialized twice, causing: kobject: kobject (ffff88810c27bb90): tried to init an initialized object, something is seriously wrong. Call Trace: dump_stack_lvl+0x5b/0x80 kobject_init.cold+0x43/0x51 blk_register_queue+0x46/0x280 add_disk_fwnode+0xb5/0x280 dm_setup_md_queue+0x194/0x1c0 table_load+0x297/0x2d0 ctl_ioctl+0x2a2/0x480 dm_ctl_ioctl+0xe/0x20 __x64_sys_ioctl+0xc7/0x110 do_syscall_64+0x72/0x390 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fix this by separating kobject initialization from sysfs registration: - Initialize queue_kobj early during gendisk allocation - add_disk() only adds the already-initialized kobject to sysfs - del_gendisk() removes from sysfs but doesn't destroy the kobject - Final cleanup happens when the disk is released Fixes: 2bd85221a625 ("block: untangle request_queue refcounting from sysfs") Reported-by: Li Lingfeng Closes: https://lore.kernel.org/all/83591d0b-2467-433c-bce0-5581298eb161@huawei.com/ Signed-off-by: Zheng Qixing Reviewed-by: Ming Lei Reviewed-by: Yu Kuai Reviewed-by: Nilay Shroff Link: https://lore.kernel.org/r/20250808053609.3237836-1-zhengqixing@huaweicloud.com Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 12 +++++------- block/blk.h | 1 + block/genhd.c | 2 ++ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 396cded255ea..c5cf79a20842 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -847,7 +847,7 @@ static void blk_queue_release(struct kobject *kobj) /* nothing to do here, all data is associated with the parent gendisk */ } -static const struct kobj_type blk_queue_ktype = { +const struct kobj_type blk_queue_ktype = { .default_groups = blk_queue_attr_groups, .sysfs_ops = &queue_sysfs_ops, .release = blk_queue_release, @@ -875,15 +875,14 @@ int blk_register_queue(struct gendisk *disk) struct request_queue *q = disk->queue; int ret; - kobject_init(&disk->queue_kobj, &blk_queue_ktype); ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue"); if (ret < 0) - goto out_put_queue_kobj; + return ret; if (queue_is_mq(q)) { ret = blk_mq_sysfs_register(disk); if (ret) - goto out_put_queue_kobj; + goto out_del_queue_kobj; } mutex_lock(&q->sysfs_lock); @@ -934,8 +933,8 @@ int blk_register_queue(struct gendisk *disk) mutex_unlock(&q->sysfs_lock); if (queue_is_mq(q)) blk_mq_sysfs_unregister(disk); -out_put_queue_kobj: - kobject_put(&disk->queue_kobj); +out_del_queue_kobj: + kobject_del(&disk->queue_kobj); return ret; } @@ -986,5 +985,4 @@ void blk_unregister_queue(struct gendisk *disk) elevator_set_none(q); blk_debugfs_remove(disk); - kobject_put(&disk->queue_kobj); } diff --git a/block/blk.h b/block/blk.h index 0a2eccf28ca4..46f566f9b126 100644 --- a/block/blk.h +++ b/block/blk.h @@ -29,6 +29,7 @@ struct elevator_tags; /* Max future timer expiry for timeouts */ #define BLK_MAX_TIMEOUT (5 * HZ) +extern const struct kobj_type blk_queue_ktype; extern struct dentry *blk_debugfs_root; struct blk_flush_queue { diff --git a/block/genhd.c b/block/genhd.c index c26733f6324b..9bbc38d12792 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1303,6 +1303,7 @@ static void disk_release(struct device *dev) disk_free_zone_resources(disk); xa_destroy(&disk->part_tbl); + kobject_put(&disk->queue_kobj); disk->queue->disk = NULL; blk_put_queue(disk->queue); @@ -1486,6 +1487,7 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, INIT_LIST_HEAD(&disk->slave_bdevs); #endif mutex_init(&disk->rqos_state_mutex); + kobject_init(&disk->queue_kobj, &blk_queue_ktype); return disk; out_erase_part0: From 593d9e4c3d634c370f226f55453c376bf43b3684 Mon Sep 17 00:00:00 2001 From: Yuntao Wang Date: Mon, 11 Aug 2025 13:24:26 +0800 Subject: [PATCH 1797/2411] fs: fix incorrect lflags value in the move_mount syscall The lflags value used to look up from_path was overwritten by the one used to look up to_path. In other words, from_path was looked up with the wrong lflags value. Fix it. Fixes: f9fde814de37 ("fs: support getname_maybe_null() in move_mount()") Signed-off-by: Yuntao Wang Link: https://lore.kernel.org/20250811052426.129188-1-yuntao.wang@linux.dev [Christian Brauner : massage patch] Signed-off-by: Christian Brauner --- fs/namespace.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index ceb6b57e6a57..43f32ee9f95c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4551,20 +4551,10 @@ SYSCALL_DEFINE5(move_mount, if (flags & MOVE_MOUNT_SET_GROUP) mflags |= MNT_TREE_PROPAGATION; if (flags & MOVE_MOUNT_BENEATH) mflags |= MNT_TREE_BENEATH; - lflags = 0; - if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW; - if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT; uflags = 0; - if (flags & MOVE_MOUNT_F_EMPTY_PATH) uflags = AT_EMPTY_PATH; - from_name = getname_maybe_null(from_pathname, uflags); - if (IS_ERR(from_name)) - return PTR_ERR(from_name); + if (flags & MOVE_MOUNT_T_EMPTY_PATH) + uflags = AT_EMPTY_PATH; - lflags = 0; - if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW; - if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT; - uflags = 0; - if (flags & MOVE_MOUNT_T_EMPTY_PATH) uflags = AT_EMPTY_PATH; to_name = getname_maybe_null(to_pathname, uflags); if (IS_ERR(to_name)) return PTR_ERR(to_name); @@ -4577,11 +4567,24 @@ SYSCALL_DEFINE5(move_mount, to_path = fd_file(f_to)->f_path; path_get(&to_path); } else { + lflags = 0; + if (flags & MOVE_MOUNT_T_SYMLINKS) + lflags |= LOOKUP_FOLLOW; + if (flags & MOVE_MOUNT_T_AUTOMOUNTS) + lflags |= LOOKUP_AUTOMOUNT; ret = filename_lookup(to_dfd, to_name, lflags, &to_path, NULL); if (ret) return ret; } + uflags = 0; + if (flags & MOVE_MOUNT_F_EMPTY_PATH) + uflags = AT_EMPTY_PATH; + + from_name = getname_maybe_null(from_pathname, uflags); + if (IS_ERR(from_name)) + return PTR_ERR(from_name); + if (!from_name && from_dfd >= 0) { CLASS(fd_raw, f_from)(from_dfd); if (fd_empty(f_from)) @@ -4590,6 +4593,11 @@ SYSCALL_DEFINE5(move_mount, return vfs_move_mount(&fd_file(f_from)->f_path, &to_path, mflags); } + lflags = 0; + if (flags & MOVE_MOUNT_F_SYMLINKS) + lflags |= LOOKUP_FOLLOW; + if (flags & MOVE_MOUNT_F_AUTOMOUNTS) + lflags |= LOOKUP_AUTOMOUNT; ret = filename_lookup(from_dfd, from_name, lflags, &from_path, NULL); if (ret) return ret; From 6d3c3ca4c77e93660cce5819bf707f75df03e0c8 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 8 Aug 2025 15:28:47 +0200 Subject: [PATCH 1798/2411] module: Rename EXPORT_SYMBOL_GPL_FOR_MODULES to EXPORT_SYMBOL_FOR_MODULES Christoph suggested that the explicit _GPL_ can be dropped from the module namespace export macro, as it's intended for in-tree modules only. It would be possible to restrict it technically, but it was pointed out [2] that some cases of using an out-of-tree build of an in-tree module with the same name are legitimate. But in that case those also have to be GPL anyway so it's unnecessary to spell it out in the macro name. Link: https://lore.kernel.org/all/aFleJN_fE-RbSoFD@infradead.org/ [1] Link: https://lore.kernel.org/all/CAK7LNATRkZHwJGpojCnvdiaoDnP%2BaeUXgdey5sb_8muzdWTMkA@mail.gmail.com/ [2] Suggested-by: Christoph Hellwig Reviewed-by: Shivank Garg Acked-by: David Hildenbrand Acked-by: Nicolas Schier Reviewed-by: Daniel Gomez Reviewed-by: Christian Brauner Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/20250808-export_modules-v4-1-426945bcc5e1@suse.cz Signed-off-by: Christian Brauner --- Documentation/core-api/symbol-namespaces.rst | 11 ++++++----- drivers/tty/serial/8250/8250_rsa.c | 8 ++++---- fs/anon_inodes.c | 2 +- include/linux/export.h | 2 +- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/Documentation/core-api/symbol-namespaces.rst b/Documentation/core-api/symbol-namespaces.rst index 32fc73dc5529..034898e81ba2 100644 --- a/Documentation/core-api/symbol-namespaces.rst +++ b/Documentation/core-api/symbol-namespaces.rst @@ -76,20 +76,21 @@ unit as preprocessor statement. The above example would then read:: within the corresponding compilation unit before the #include for . Typically it's placed before the first #include statement. -Using the EXPORT_SYMBOL_GPL_FOR_MODULES() macro ------------------------------------------------ +Using the EXPORT_SYMBOL_FOR_MODULES() macro +------------------------------------------- Symbols exported using this macro are put into a module namespace. This -namespace cannot be imported. +namespace cannot be imported. These exports are GPL-only as they are only +intended for in-tree modules. The macro takes a comma separated list of module names, allowing only those modules to access this symbol. Simple tail-globs are supported. For example:: - EXPORT_SYMBOL_GPL_FOR_MODULES(preempt_notifier_inc, "kvm,kvm-*") + EXPORT_SYMBOL_FOR_MODULES(preempt_notifier_inc, "kvm,kvm-*") -will limit usage of this symbol to modules whoes name matches the given +will limit usage of this symbol to modules whose name matches the given patterns. How to use Symbols exported in Namespaces diff --git a/drivers/tty/serial/8250/8250_rsa.c b/drivers/tty/serial/8250/8250_rsa.c index d34093cc03ad..12a65b79583c 100644 --- a/drivers/tty/serial/8250/8250_rsa.c +++ b/drivers/tty/serial/8250/8250_rsa.c @@ -147,7 +147,7 @@ void rsa_enable(struct uart_8250_port *up) if (up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16) serial_out(up, UART_RSA_FRR, 0); } -EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_enable, "8250_base"); +EXPORT_SYMBOL_FOR_MODULES(rsa_enable, "8250_base"); /* * Attempts to turn off the RSA FIFO and resets the RSA board back to 115kbps compat mode. It is @@ -179,7 +179,7 @@ void rsa_disable(struct uart_8250_port *up) up->port.uartclk = SERIAL_RSA_BAUD_BASE_LO * 16; uart_port_unlock_irq(&up->port); } -EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_disable, "8250_base"); +EXPORT_SYMBOL_FOR_MODULES(rsa_disable, "8250_base"); void rsa_autoconfig(struct uart_8250_port *up) { @@ -192,7 +192,7 @@ void rsa_autoconfig(struct uart_8250_port *up) if (__rsa_enable(up)) up->port.type = PORT_RSA; } -EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_autoconfig, "8250_base"); +EXPORT_SYMBOL_FOR_MODULES(rsa_autoconfig, "8250_base"); void rsa_reset(struct uart_8250_port *up) { @@ -201,7 +201,7 @@ void rsa_reset(struct uart_8250_port *up) serial_out(up, UART_RSA_FRR, 0); } -EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_reset, "8250_base"); +EXPORT_SYMBOL_FOR_MODULES(rsa_reset, "8250_base"); #ifdef CONFIG_SERIAL_8250_DEPRECATED_OPTIONS #ifndef MODULE diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 1d847a939f29..180a458fc4f7 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -129,7 +129,7 @@ struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *n } return inode; } -EXPORT_SYMBOL_GPL_FOR_MODULES(anon_inode_make_secure_inode, "kvm"); +EXPORT_SYMBOL_FOR_MODULES(anon_inode_make_secure_inode, "kvm"); static struct file *__anon_inode_getfile(const char *name, const struct file_operations *fops, diff --git a/include/linux/export.h b/include/linux/export.h index f35d03b4113b..a686fd0ba406 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -91,6 +91,6 @@ #define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", ns) #define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "GPL", ns) -#define EXPORT_SYMBOL_GPL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods) +#define EXPORT_SYMBOL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods) #endif /* _LINUX_EXPORT_H */ From b26e2afb3834d4a61ce54c8484ff6014bef0b4b7 Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Mon, 11 Aug 2025 16:27:16 +0300 Subject: [PATCH 1799/2411] ALSA: hda/realtek: Fix headset mic on HONOR BRB-X Add a PCI quirk to enable microphone input on the headphone jack on the HONOR BRB-X M1010 laptop. Signed-off-by: Vasiliy Kovalev Cc: Link: https://patch.msgid.link/20250811132716.45076-1-kovalev@altlinux.org Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 337e33a59de8..e90c4047ea62 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7140,6 +7140,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), + SND_PCI_QUIRK(0x1ee7, 0x2078, "HONOR BRB-X M1010", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1f66, 0x0105, "Ayaneo Portable Game Player", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x2014, 0x800a, "Positivo ARN50", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x2782, 0x0214, "VAIO VJFE-CL", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From d28b9d2925b4f773adb21b1fc20260ddc370fb13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Mon, 30 Jun 2025 11:00:53 +0200 Subject: [PATCH 1800/2411] drm/tests: Fix endian warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling with sparse enabled, this warning is thrown: warning: incorrect type in argument 2 (different base types) expected restricted __le32 const [usertype] *buf got unsigned int [usertype] *[assigned] buf Add a cast to fix it. Fixes: 453114319699 ("drm/format-helper: Add KUnit tests for drm_fb_xrgb8888_to_xrgb2101010()") Signed-off-by: José Expósito Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250630090054.353246-1-jose.exposito89@gmail.com --- drivers/gpu/drm/tests/drm_format_helper_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tests/drm_format_helper_test.c b/drivers/gpu/drm/tests/drm_format_helper_test.c index 7299fa8971ce..86829e1cb7f0 100644 --- a/drivers/gpu/drm/tests/drm_format_helper_test.c +++ b/drivers/gpu/drm/tests/drm_format_helper_test.c @@ -1033,7 +1033,7 @@ static void drm_test_fb_xrgb8888_to_xrgb2101010(struct kunit *test) NULL : &result->dst_pitch; drm_fb_xrgb8888_to_xrgb2101010(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); - buf = le32buf_to_cpu(test, buf, dst_size / sizeof(u32)); + buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); buf = dst.vaddr; /* restore original value of buf */ From 05663d88fd0b8ee1c54ab2d5fb36f9b6a3ed37f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Mon, 30 Jun 2025 11:00:54 +0200 Subject: [PATCH 1801/2411] drm/tests: Fix drm_test_fb_xrgb8888_to_xrgb2101010() on big-endian MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix failures on big-endian architectures on tests cases single_pixel_source_buffer, single_pixel_clip_rectangle, well_known_colors and destination_pitch. Fixes: 15bda1f8de5d ("drm/tests: Add calls to drm_fb_blit() on supported format conversion tests") Signed-off-by: José Expósito Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250630090054.353246-2-jose.exposito89@gmail.com --- drivers/gpu/drm/tests/drm_format_helper_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/tests/drm_format_helper_test.c b/drivers/gpu/drm/tests/drm_format_helper_test.c index 86829e1cb7f0..981dada8f3a8 100644 --- a/drivers/gpu/drm/tests/drm_format_helper_test.c +++ b/drivers/gpu/drm/tests/drm_format_helper_test.c @@ -1040,6 +1040,7 @@ static void drm_test_fb_xrgb8888_to_xrgb2101010(struct kunit *test) memset(buf, 0, dst_size); drm_fb_xrgb8888_to_xrgb2101010(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); + buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); } From 5634c8cb298a7146b4e38873473e280b50e27a2c Mon Sep 17 00:00:00 2001 From: Nitin Gote Date: Fri, 18 Jul 2025 16:20:51 +0530 Subject: [PATCH 1802/2411] iosys-map: Fix undefined behavior in iosys_map_clear() The current iosys_map_clear() implementation reads the potentially uninitialized 'is_iomem' boolean field to decide which union member to clear. This causes undefined behavior when called on uninitialized structures, as 'is_iomem' may contain garbage values like 0xFF. UBSAN detects this as: UBSAN: invalid-load in include/linux/iosys-map.h:267 load of value 255 is not a valid value for type '_Bool' Fix by unconditionally clearing the entire structure with memset(), eliminating the need to read uninitialized data and ensuring all fields are set to known good values. Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14639 Fixes: 01fd30da0474 ("dma-buf: Add struct dma-buf-map for storing struct dma_buf.vaddr_ptr") Signed-off-by: Nitin Gote Reviewed-by: Andi Shyti Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250718105051.2709487-1-nitin.r.gote@intel.com --- include/linux/iosys-map.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index 4696abfd311c..3e85afe794c0 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -264,12 +264,7 @@ static inline bool iosys_map_is_set(const struct iosys_map *map) */ static inline void iosys_map_clear(struct iosys_map *map) { - if (map->is_iomem) { - map->vaddr_iomem = NULL; - map->is_iomem = false; - } else { - map->vaddr = NULL; - } + memset(map, 0, sizeof(*map)); } /** From 4fa7d880aeb8cdbdaa4fb72be3e53ac1d6bcc088 Mon Sep 17 00:00:00 2001 From: David Kaplan Date: Mon, 21 Jul 2025 11:03:10 -0500 Subject: [PATCH 1803/2411] x86/bugs: Select best SRSO mitigation The SRSO bug can theoretically be used to conduct user->user or guest->guest attacks and requires a mitigation (namely IBPB instead of SBPB on context switch) for these. So mark SRSO as being applicable to the user->user and guest->guest attack vectors. Additionally, SRSO supports multiple mitigations which mitigate different potential attack vectors. Some CPUs are also immune to SRSO from certain attack vectors (like user->kernel). Use the specific attack vectors requiring mitigation to select the best SRSO mitigation to avoid unnecessary performance hits. Signed-off-by: David Kaplan Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250721160310.1804203-1-david.kaplan@amd.com --- .../admin-guide/hw-vuln/attack_vector_controls.rst | 2 +- arch/x86/kernel/cpu/bugs.c | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst b/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst index b4de16f5ec44..6dd0800146f6 100644 --- a/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst +++ b/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst @@ -214,7 +214,7 @@ Spectre_v1 X Spectre_v2 X X Spectre_v2_user X X * (Note 1) SRBDS X X X X -SRSO X X +SRSO X X X X SSB (Note 4) TAA X X X X * (Note 2) TSA X X X X diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b74bf937cd9f..2186a771b9fc 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -386,7 +386,6 @@ static bool __init should_mitigate_vuln(unsigned int bug) case X86_BUG_SPECTRE_V2: case X86_BUG_RETBLEED: - case X86_BUG_SRSO: case X86_BUG_L1TF: case X86_BUG_ITS: return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL) || @@ -3184,8 +3183,18 @@ static void __init srso_select_mitigation(void) } if (srso_mitigation == SRSO_MITIGATION_AUTO) { - if (should_mitigate_vuln(X86_BUG_SRSO)) { + /* + * Use safe-RET if user->kernel or guest->host protection is + * required. Otherwise the 'microcode' mitigation is sufficient + * to protect the user->user and guest->guest vectors. + */ + if (cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST) || + (cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL) && + !boot_cpu_has(X86_FEATURE_SRSO_USER_KERNEL_NO))) { srso_mitigation = SRSO_MITIGATION_SAFE_RET; + } else if (cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER) || + cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_GUEST)) { + srso_mitigation = SRSO_MITIGATION_MICROCODE; } else { srso_mitigation = SRSO_MITIGATION_NONE; return; From f87d597e8393f7038de046ed7f13bb176a4ead55 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 8 Aug 2025 11:20:49 +0200 Subject: [PATCH 1804/2411] MAINTAINERS: entry for DRM GPUVM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GPUVM deserves a bit more coordination, also given the upcoming Rust work for GPUVM, hence add a dedicated maintainers entry for DRM GPUVM. Cc: Boris Brezillon Cc: Matthew Brost Cc: Thomas Hellström Cc: Alice Ryhl Acked-by: Thomas Hellström Acked-by: Matthew Brost Acked-by: Alice Ryhl Link: https://lore.kernel.org/r/20250808092432.461250-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index fe168477caa4..20ffd334e0a7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8426,6 +8426,17 @@ T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: drivers/gpu/drm/scheduler/ F: include/drm/gpu_scheduler.h +DRM GPUVM +M: Danilo Krummrich +R: Matthew Brost +R: Thomas Hellström +R: Alice Ryhl +L: dri-devel@lists.freedesktop.org +S: Supported +T: git https://gitlab.freedesktop.org/drm/misc/kernel.git +F: drivers/gpu/drm/drm_gpuvm.c +F: include/drm/drm_gpuvm.h + DRM LOG M: Jocelyn Falempe M: Javier Martinez Canillas From dfb36e4a8db0cd56f92d4cb445f54e85a9b40897 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 11 Aug 2025 10:11:47 -0400 Subject: [PATCH 1805/2411] futex: Use user_write_access_begin/_end() in futex_put_value() Commit cec199c5e39b ("futex: Implement FUTEX2_NUMA") introduced the futex_put_value() helper to write a value to the given user address. However, it uses user_read_access_begin() before the write. For architectures that differentiate between read and write accesses, like PowerPC, futex_put_value() fails with -EFAULT. Fix that by using the user_write_access_begin/user_write_access_end() pair instead. Fixes: cec199c5e39b ("futex: Implement FUTEX2_NUMA") Signed-off-by: Waiman Long Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20250811141147.322261-1-longman@redhat.com --- kernel/futex/futex.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index c74eac572acd..2cd57096c38e 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -319,13 +319,13 @@ static __always_inline int futex_put_value(u32 val, u32 __user *to) { if (can_do_masked_user_access()) to = masked_user_access_begin(to); - else if (!user_read_access_begin(to, sizeof(*to))) + else if (!user_write_access_begin(to, sizeof(*to))) return -EFAULT; unsafe_put_user(val, to, Efault); - user_read_access_end(); + user_write_access_end(); return 0; Efault: - user_read_access_end(); + user_write_access_end(); return -EFAULT; } From 4b0ad968717eb1862b74c1f549e4225bf95c286f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 11 Aug 2025 16:15:27 +0200 Subject: [PATCH 1806/2411] regulator: dt-bindings: infineon,ir38060: Add Guenter as maintainer from IBM The infineon,ir38060 binding never got maintainer and fake "Not Me" entry have been causing dt_binding_check warnings for 1.5 years now: regulator/infineon,ir38060.yaml: maintainers:0: 'Not Me.' does not match '@' Guenter agreed to keep an eye for this hardware and binding. Cc: Guenter Roeck Cc: Conor Dooley Cc: Andrew Jeffery Cc: Ninad Palsule Signed-off-by: Krzysztof Kozlowski Acked-by: Guenter Roeck Link: https://patch.msgid.link/20250811141526.168752-2-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- .../devicetree/bindings/regulator/infineon,ir38060.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/regulator/infineon,ir38060.yaml b/Documentation/devicetree/bindings/regulator/infineon,ir38060.yaml index e6ffbc2a2298..57ff6bf1e188 100644 --- a/Documentation/devicetree/bindings/regulator/infineon,ir38060.yaml +++ b/Documentation/devicetree/bindings/regulator/infineon,ir38060.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Infineon Buck Regulators with PMBUS interfaces maintainers: - - Not Me. + - Guenter Roeck allOf: - $ref: regulator.yaml# From d8b96a79622e03813c221450498ca9742704ebf2 Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Mon, 28 Jul 2025 01:39:57 +0800 Subject: [PATCH 1807/2411] blk-wbt: Optimize wbt_done() for non-throttled writes In the current implementation, the sync_cookie and last_cookie members of struct rq_wb are used only by read requests and not by non-throttled write requests. Based on this, we can optimize wbt_done() by removing one if condition check for non-throttled write requests. Signed-off-by: Tang Yizhou Reviewed-by: Jan Kara Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20250727173959.160835-2-yizhou.tang@shopee.com Signed-off-by: Jens Axboe --- block/blk-wbt.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index a50d4cd55f41..30886d44f6cd 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -248,13 +248,14 @@ static void wbt_done(struct rq_qos *rqos, struct request *rq) struct rq_wb *rwb = RQWB(rqos); if (!wbt_is_tracked(rq)) { - if (rwb->sync_cookie == rq) { - rwb->sync_issue = 0; - rwb->sync_cookie = NULL; - } + if (wbt_is_read(rq)) { + if (rwb->sync_cookie == rq) { + rwb->sync_issue = 0; + rwb->sync_cookie = NULL; + } - if (wbt_is_read(rq)) wb_timestamp(rwb, &rwb->last_comp); + } } else { WARN_ON_ONCE(rq == rwb->sync_cookie); __wbt_done(rqos, wbt_flags(rq)); From bccdfcd56d4b5b78d0d76f46d0e89a51330dfd75 Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Mon, 28 Jul 2025 01:39:58 +0800 Subject: [PATCH 1808/2411] blk-wbt: Eliminate ambiguity in the comments of struct rq_wb In the current implementation, the last_issue and last_comp members of struct rq_wb are used only by read requests and not by non-throttled write requests. Therefore, eliminate the ambiguity here. Signed-off-by: Tang Yizhou Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20250727173959.160835-3-yizhou.tang@shopee.com Signed-off-by: Jens Axboe --- block/blk-wbt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 30886d44f6cd..eb8037bae0bd 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -85,8 +85,8 @@ struct rq_wb { u64 sync_issue; void *sync_cookie; - unsigned long last_issue; /* last non-throttled issue */ - unsigned long last_comp; /* last non-throttled comp */ + unsigned long last_issue; /* issue time of last read rq */ + unsigned long last_comp; /* completion time of last read rq */ unsigned long min_lat_nsec; struct rq_qos rqos; struct rq_wait rq_wait[WBT_NUM_RWQ]; From 0452f08395f8e7d04fe3744443dad396b3330d0c Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Mon, 28 Jul 2025 01:39:59 +0800 Subject: [PATCH 1809/2411] blk-wbt: doc: Update the doc of the wbt_lat_usec interface The symbol wb_window_usec cannot be found. Update the doc to reflect the latest implementation, in other words, the debugfs interface 'curr_win_nsec'. Signed-off-by: Tang Yizhou Reviewed-by: Jan Kara Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20250727173959.160835-4-yizhou.tang@shopee.com Signed-off-by: Jens Axboe --- Documentation/ABI/stable/sysfs-block | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block index 803f578dc023..0ddffc9133d0 100644 --- a/Documentation/ABI/stable/sysfs-block +++ b/Documentation/ABI/stable/sysfs-block @@ -731,7 +731,7 @@ Contact: linux-block@vger.kernel.org Description: [RW] If the device is registered for writeback throttling, then this file shows the target minimum read latency. If this latency - is exceeded in a given window of time (see wb_window_usec), then + is exceeded in a given window of time (see curr_win_nsec), then the writeback throttling will start scaling back writes. Writing a value of '0' to this file disables the feature. Writing a value of '-1' to this file resets the value to the default From ddf7233fcab6c247379d0928d46cc316ee122229 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Tue, 5 Aug 2025 10:59:11 +0200 Subject: [PATCH 1810/2411] sched/ext: Fix invalid task state transitions on class switch When enabling a sched_ext scheduler, we may trigger invalid task state transitions, resulting in warnings like the following (which can be easily reproduced by running the hotplug selftest in a loop): sched_ext: Invalid task state transition 0 -> 3 for fish[770] WARNING: CPU: 18 PID: 787 at kernel/sched/ext.c:3862 scx_set_task_state+0x7c/0xc0 ... RIP: 0010:scx_set_task_state+0x7c/0xc0 ... Call Trace: scx_enable_task+0x11f/0x2e0 switching_to_scx+0x24/0x110 scx_enable.isra.0+0xd14/0x13d0 bpf_struct_ops_link_create+0x136/0x1a0 __sys_bpf+0x1edd/0x2c30 __x64_sys_bpf+0x21/0x30 do_syscall_64+0xbb/0x370 entry_SYSCALL_64_after_hwframe+0x77/0x7f This happens because we skip initialization for tasks that are already dead (with their usage counter set to zero), but we don't exclude them during the scheduling class transition phase. Fix this by also skipping dead tasks during class swiching, preventing invalid task state transitions. Fixes: a8532fac7b5d2 ("sched_ext: TASK_DEAD tasks must be switched into SCX on ops_enable") Cc: stable@vger.kernel.org # v6.12+ Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 7dedc9a16281..4ae32ef179dd 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5749,6 +5749,9 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) __setscheduler_class(p->policy, p->prio); struct sched_enq_and_set_ctx ctx; + if (!tryget_task_struct(p)) + continue; + if (old_class != new_class && p->se.sched_delayed) dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED); @@ -5761,6 +5764,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) sched_enq_and_set_task(&ctx); check_class_changed(task_rq(p), p, old_class, p->prio); + put_task_struct(p); } scx_task_iter_stop(&sti); percpu_up_write(&scx_fork_rwsem); From e69980bd16f264581c3f606bae987e54f0ba8c4a Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 25 Jul 2025 17:04:12 +0800 Subject: [PATCH 1811/2411] selftests/sched_ext: Remove duplicate sched.h header ./tools/testing/selftests/sched_ext/hotplug.c: sched.h is included more than once. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=22941 Signed-off-by: Jiapeng Chong Acked-by: Andrea Righi Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/hotplug.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/sched_ext/hotplug.c b/tools/testing/selftests/sched_ext/hotplug.c index 1c9ceb661c43..0cfbb111a2d0 100644 --- a/tools/testing/selftests/sched_ext/hotplug.c +++ b/tools/testing/selftests/sched_ext/hotplug.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include From e91a158b694d7f4bd937763dde79ed0afa472d8a Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 8 Aug 2025 15:37:14 -0400 Subject: [PATCH 1812/2411] intel_idle: Allow loading ACPI tables for any family There is no reason to limit intel_idle's loading of ACPI tables to family 6. Upcoming Intel processors are not in family 6. Below "Fixes" really means "applies cleanly until". That syntax commit didn't change the previous logic, but shows this patch applies back 5-years. Fixes: 4a9f45a0533f ("intel_idle: Convert to new X86 CPU match macros") Signed-off-by: Len Brown Link: https://patch.msgid.link/06101aa4fe784e5b0be1cb2c0bdd9afcf16bd9d4.1754681697.git.len.brown@intel.com Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 73747d20df85..91a7b7e7c0c8 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1679,7 +1679,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { }; static const struct x86_cpu_id intel_mwait_ids[] __initconst = { - X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), + X86_MATCH_VENDOR_FAM_FEATURE(INTEL, X86_FAMILY_ANY, X86_FEATURE_MWAIT, NULL), {} }; From fa3fa55de0d6177fdcaf6fc254f13cc8f33c3eed Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 11 Aug 2025 17:03:11 +0200 Subject: [PATCH 1813/2411] cpuidle: governors: menu: Avoid using invalid recent intervals data Marc has reported that commit 85975daeaa4d ("cpuidle: menu: Avoid discarding useful information") caused the number of wakeup interrupts to increase on an idle system [1], which was not expected to happen after merely allowing shallower idle states to be selected by the governor in some cases. However, on the system in question, all of the idle states deeper than WFI are rejected by the driver due to a firmware issue [2]. This causes the governor to only consider the recent interval duriation data corresponding to attempts to enter WFI that are successful and the recent invervals table is filled with values lower than the scheduler tick period. Consequently, the governor predicts an idle duration below the scheduler tick period length and avoids stopping the tick more often which leads to the observed symptom. Address it by modifying the governor to update the recent intervals table also when entering the previously selected idle state fails, so it knows that the short idle intervals might have been the minority had the selected idle states been actually entered every time. Fixes: 85975daeaa4d ("cpuidle: menu: Avoid discarding useful information") Link: https://lore.kernel.org/linux-pm/86o6sv6n94.wl-maz@kernel.org/ [1] Link: https://lore.kernel.org/linux-pm/7ffcb716-9a1b-48c2-aaa4-469d0df7c792@arm.com/ [2] Signed-off-by: Rafael J. Wysocki Tested-by: Christian Loehle Tested-by: Marc Zyngier Reviewed-by: Christian Loehle Link: https://patch.msgid.link/2793874.mvXUDI8C0e@rafael.j.wysocki --- drivers/cpuidle/governors/menu.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 52d5d26fc7c6..81306612a5c6 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -97,6 +97,14 @@ static inline int which_bucket(u64 duration_ns) static DEFINE_PER_CPU(struct menu_device, menu_devices); +static void menu_update_intervals(struct menu_device *data, unsigned int interval_us) +{ + /* Update the repeating-pattern data. */ + data->intervals[data->interval_ptr++] = interval_us; + if (data->interval_ptr >= INTERVALS) + data->interval_ptr = 0; +} + static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev); /* @@ -222,6 +230,14 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (data->needs_update) { menu_update(drv, dev); data->needs_update = 0; + } else if (!dev->last_residency_ns) { + /* + * This happens when the driver rejects the previously selected + * idle state and returns an error, so update the recent + * intervals table to prevent invalid information from being + * used going forward. + */ + menu_update_intervals(data, UINT_MAX); } /* Find the shortest expected idle interval. */ @@ -482,10 +498,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->correction_factor[data->bucket] = new_factor; - /* update the repeating-pattern data */ - data->intervals[data->interval_ptr++] = ktime_to_us(measured_ns); - if (data->interval_ptr >= INTERVALS) - data->interval_ptr = 0; + menu_update_intervals(data, ktime_to_us(measured_ns)); } /** From 3ead77989c20cb2d774a3b6045d7a928b6fb53ed Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 8 Aug 2025 07:51:22 -0700 Subject: [PATCH 1814/2411] cpufreq: intel_pstate: Support Clearwater Forest OOB mode Prevent intel_pstate from loading when OOB (Out Of Band) P-states mode is enabled. Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20250808145122.4057208-1-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 06a1c7dd081f..f366d35c5840 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2793,6 +2793,7 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { X86_MATCH(INTEL_GRANITERAPIDS_X, core_funcs), X86_MATCH(INTEL_ATOM_CRESTMONT, core_funcs), X86_MATCH(INTEL_ATOM_CRESTMONT_X, core_funcs), + X86_MATCH(INTEL_ATOM_DARKMONT_X, core_funcs), {} }; #endif From 31cd31c9e17ece125aad27259501a2af69ccb020 Mon Sep 17 00:00:00 2001 From: Fushuai Wang Date: Mon, 11 Aug 2025 11:50:44 -0700 Subject: [PATCH 1815/2411] x86/fpu: Fix NULL dereference in avx512_status() Problem ------- With CONFIG_X86_DEBUG_FPU enabled, reading /proc/[kthread]/arch_status causes a warning and a NULL pointer dereference. This is because the AVX-512 timestamp code uses x86_task_fpu() but doesn't check it for NULL. CONFIG_X86_DEBUG_FPU addles that function for kernel threads (PF_KTHREAD specifically), making it return NULL. The point of the warning was to ensure that kernel threads only access task->fpu after going through kernel_fpu_begin()/_end(). Note: all kernel tasks exposed in /proc have a valid task->fpu. Solution -------- One option is to silence the warning and check for NULL from x86_task_fpu(). However, that warning is fairly fresh and seems like a defense against misuse of the FPU state in kernel threads. Instead, stop outputting AVX-512_elapsed_ms for kernel threads altogether. The data was garbage anyway because avx512_timestamp is only updated for user threads, not kernel threads. If anyone ever wants to track kernel thread AVX-512 use, they can come back later and do it properly, separate from this bug fix. [ dhansen: mostly rewrite changelog ] Fixes: 22aafe3bcb67 ("x86/fpu: Remove init_task FPU state dependencies, add debugging warning for PF_KTHREAD tasks") Co-developed-by: Sohil Mehta Signed-off-by: Sohil Mehta Signed-off-by: Fushuai Wang Signed-off-by: Dave Hansen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20250811185044.2227268-1-sohil.mehta%40intel.com --- arch/x86/kernel/fpu/xstate.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 12ed75c1b567..28e4fd65c9da 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1881,19 +1881,20 @@ long fpu_xstate_prctl(int option, unsigned long arg2) #ifdef CONFIG_PROC_PID_ARCH_STATUS /* * Report the amount of time elapsed in millisecond since last AVX512 - * use in the task. + * use in the task. Report -1 if no AVX-512 usage. */ static void avx512_status(struct seq_file *m, struct task_struct *task) { - unsigned long timestamp = READ_ONCE(x86_task_fpu(task)->avx512_timestamp); - long delta; + unsigned long timestamp; + long delta = -1; - if (!timestamp) { - /* - * Report -1 if no AVX512 usage - */ - delta = -1; - } else { + /* AVX-512 usage is not tracked for kernel threads. Don't report anything. */ + if (task->flags & (PF_KTHREAD | PF_USER_WORKER)) + return; + + timestamp = READ_ONCE(x86_task_fpu(task)->avx512_timestamp); + + if (timestamp) { delta = (long)(jiffies - timestamp); /* * Cap to LONG_MAX if time difference > LONG_MAX From 4e5b705cc6147f0b9173c6219079f41416bdd3c0 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Wed, 23 Jul 2025 10:29:12 -0500 Subject: [PATCH 1816/2411] iio: accel: sca3300: fix uninitialized iio scan data Fix potential leak of uninitialized stack data to userspace by ensuring that the `channels` array is zeroed before use. Fixes: edeb67fbbf4b ("iio: accel: sca3300: use IIO_DECLARE_BUFFER_WITH_TS") Signed-off-by: David Lechner Link: https://patch.msgid.link/20250723-iio-accel-sca3300-fix-uninitialized-iio-scan-data-v1-1-12dbfb3307b7@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/sca3300.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/sca3300.c b/drivers/iio/accel/sca3300.c index bda370c0f660..8380b237831c 100644 --- a/drivers/iio/accel/sca3300.c +++ b/drivers/iio/accel/sca3300.c @@ -477,7 +477,7 @@ static irqreturn_t sca3300_trigger_handler(int irq, void *p) struct iio_dev *indio_dev = pf->indio_dev; struct sca3300_data *data = iio_priv(indio_dev); int bit, ret, val, i = 0; - IIO_DECLARE_BUFFER_WITH_TS(s16, channels, SCA3300_SCAN_MAX); + IIO_DECLARE_BUFFER_WITH_TS(s16, channels, SCA3300_SCAN_MAX) = { }; iio_for_each_active_channel(indio_dev, bit) { ret = sca3300_read_reg(data, indio_dev->channels[bit].address, &val); From de18e978d0cda23e4c102e18092b63a5b0b3a800 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 22 Jul 2025 15:54:21 -0500 Subject: [PATCH 1817/2411] iio: proximity: isl29501: fix buffered read on big-endian systems Fix passing a u32 value as a u16 buffer scan item. This works on little- endian systems, but not on big-endian systems. A new local variable is introduced for getting the register value and the array is changed to a struct to make the data layout more explicit rather than just changing the type and having to recalculate the proper length needed for the timestamp. Fixes: 1c28799257bc ("iio: light: isl29501: Add support for the ISL29501 ToF sensor.") Signed-off-by: David Lechner Link: https://patch.msgid.link/20250722-iio-use-more-iio_declare_buffer_with_ts-7-v2-1-d3ebeb001ed3@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/isl29501.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/iio/proximity/isl29501.c b/drivers/iio/proximity/isl29501.c index d1510fe24050..f69db6f2f380 100644 --- a/drivers/iio/proximity/isl29501.c +++ b/drivers/iio/proximity/isl29501.c @@ -938,12 +938,18 @@ static irqreturn_t isl29501_trigger_handler(int irq, void *p) struct iio_dev *indio_dev = pf->indio_dev; struct isl29501_private *isl29501 = iio_priv(indio_dev); const unsigned long *active_mask = indio_dev->active_scan_mask; - u32 buffer[4] __aligned(8) = {}; /* 1x16-bit + naturally aligned ts */ + u32 value; + struct { + u16 data; + aligned_s64 ts; + } scan = { }; - if (test_bit(ISL29501_DISTANCE_SCAN_INDEX, active_mask)) - isl29501_register_read(isl29501, REG_DISTANCE, buffer); + if (test_bit(ISL29501_DISTANCE_SCAN_INDEX, active_mask)) { + isl29501_register_read(isl29501, REG_DISTANCE, &value); + scan.data = value; + } - iio_push_to_buffers_with_timestamp(indio_dev, buffer, pf->timestamp); + iio_push_to_buffers_with_timestamp(indio_dev, &scan, pf->timestamp); iio_trigger_notify_done(indio_dev->trig); return IRQ_HANDLED; From 1cfb22c277c7274f54babaa5b416dfbc00181e16 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 22 Jul 2025 14:20:07 -0500 Subject: [PATCH 1818/2411] iio: adc: ad7173: prevent scan if too many setups requested Add a check to ad7173_update_scan_mode() to ensure that we didn't exceed the maximum number of unique channel configurations. In the AD7173 family of chips, there are some chips that have 16 CHANNELx registers but only 8 setups (combination of CONFIGx, FILTERx, GAINx and OFFSETx registers). Since commit 92c247216918 ("iio: adc: ad7173: fix num_slots"), it is possible to have more than 8 channels enabled in a scan at the same time, so it is possible to get a bad configuration when more than 8 channels are using unique configurations. This happens because the algorithm to allocate the setup slots only takes into account which slot has been least recently used and doesn't know about the maximum number of slots available. Since the algorithm to allocate the setup slots is quite complex, it is simpler to check after the fact if the current state is valid or not. So this patch adds a check in ad7173_update_scan_mode() after setting up all of the configurations to make sure that the actual setup still matches the requested setup for each enabled channel. If not, we prevent the scan from being enabled and return an error. The setup comparison in ad7173_setup_equal() is refactored to a separate function since we need to call it in two places now. Fixes: 92c247216918 ("iio: adc: ad7173: fix num_slots") Signed-off-by: David Lechner Link: https://patch.msgid.link/20250722-iio-adc-ad7173-fix-setup-use-limits-v2-1-8e96bdb72a9c@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7173.c | 87 ++++++++++++++++++++++++++++++++++------ 1 file changed, 75 insertions(+), 12 deletions(-) diff --git a/drivers/iio/adc/ad7173.c b/drivers/iio/adc/ad7173.c index 4413207be28f..683146e83ab2 100644 --- a/drivers/iio/adc/ad7173.c +++ b/drivers/iio/adc/ad7173.c @@ -200,7 +200,7 @@ struct ad7173_channel_config { /* * Following fields are used to compare equality. If you * make adaptations in it, you most likely also have to adapt - * ad7173_find_live_config(), too. + * ad7173_is_setup_equal(), too. */ struct_group(config_props, bool bipolar; @@ -561,12 +561,19 @@ static void ad7173_reset_usage_cnts(struct ad7173_state *st) st->config_usage_counter = 0; } -static struct ad7173_channel_config * -ad7173_find_live_config(struct ad7173_state *st, struct ad7173_channel_config *cfg) +/** + * ad7173_is_setup_equal - Compare two channel setups + * @cfg1: First channel configuration + * @cfg2: Second channel configuration + * + * Compares all configuration options that affect the registers connected to + * SETUP_SEL, namely CONFIGx, FILTERx, GAINx and OFFSETx. + * + * Returns: true if the setups are identical, false otherwise + */ +static bool ad7173_is_setup_equal(const struct ad7173_channel_config *cfg1, + const struct ad7173_channel_config *cfg2) { - struct ad7173_channel_config *cfg_aux; - int i; - /* * This is just to make sure that the comparison is adapted after * struct ad7173_channel_config was changed. @@ -579,14 +586,22 @@ ad7173_find_live_config(struct ad7173_state *st, struct ad7173_channel_config *c u8 ref_sel; })); + return cfg1->bipolar == cfg2->bipolar && + cfg1->input_buf == cfg2->input_buf && + cfg1->odr == cfg2->odr && + cfg1->ref_sel == cfg2->ref_sel; +} + +static struct ad7173_channel_config * +ad7173_find_live_config(struct ad7173_state *st, struct ad7173_channel_config *cfg) +{ + struct ad7173_channel_config *cfg_aux; + int i; + for (i = 0; i < st->num_channels; i++) { cfg_aux = &st->channels[i].cfg; - if (cfg_aux->live && - cfg->bipolar == cfg_aux->bipolar && - cfg->input_buf == cfg_aux->input_buf && - cfg->odr == cfg_aux->odr && - cfg->ref_sel == cfg_aux->ref_sel) + if (cfg_aux->live && ad7173_is_setup_equal(cfg, cfg_aux)) return cfg_aux; } return NULL; @@ -1228,7 +1243,7 @@ static int ad7173_update_scan_mode(struct iio_dev *indio_dev, const unsigned long *scan_mask) { struct ad7173_state *st = iio_priv(indio_dev); - int i, ret; + int i, j, k, ret; for (i = 0; i < indio_dev->num_channels; i++) { if (test_bit(i, scan_mask)) @@ -1239,6 +1254,54 @@ static int ad7173_update_scan_mode(struct iio_dev *indio_dev, return ret; } + /* + * On some chips, there are more channels that setups, so if there were + * more unique setups requested than the number of available slots, + * ad7173_set_channel() will have written over some of the slots. We + * can detect this by making sure each assigned cfg_slot matches the + * requested configuration. If it doesn't, we know that the slot was + * overwritten by a different channel. + */ + for_each_set_bit(i, scan_mask, indio_dev->num_channels) { + const struct ad7173_channel_config *cfg1, *cfg2; + + cfg1 = &st->channels[i].cfg; + + for_each_set_bit(j, scan_mask, indio_dev->num_channels) { + cfg2 = &st->channels[j].cfg; + + /* + * Only compare configs that are assigned to the same + * SETUP_SEL slot and don't compare channel to itself. + */ + if (i == j || cfg1->cfg_slot != cfg2->cfg_slot) + continue; + + /* + * If we find two different configs trying to use the + * same SETUP_SEL slot, then we know that the that we + * have too many unique configurations requested for + * the available slots and at least one was overwritten. + */ + if (!ad7173_is_setup_equal(cfg1, cfg2)) { + /* + * At this point, there isn't a way to tell + * which setups are actually programmed in the + * ADC anymore, so we could read them back to + * see, but it is simpler to just turn off all + * of the live flags so that everything gets + * reprogramed on the next attempt read a sample. + */ + for (k = 0; k < st->num_channels; k++) + st->channels[k].cfg.live = false; + + dev_err(&st->sd.spi->dev, + "Too many unique channel configurations requested for scan\n"); + return -EINVAL; + } + } + } + return 0; } From ae5bc07ec9f73a41734270ef3f800c5c8a7e0ad3 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Mon, 21 Jul 2025 18:04:04 -0500 Subject: [PATCH 1819/2411] iio: temperature: maxim_thermocouple: use DMA-safe buffer for spi_read() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace using stack-allocated buffers with a DMA-safe buffer for use with spi_read(). This allows the driver to be safely used with DMA-enabled SPI controllers. The buffer array is also converted to a struct with a union to make the usage of the memory in the buffer more clear and ensure proper alignment. Fixes: 1f25ca11d84a ("iio: temperature: add support for Maxim thermocouple chips") Signed-off-by: David Lechner Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250721-iio-use-more-iio_declare_buffer_with_ts-3-v2-1-0c68d41ccf6c@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/maxim_thermocouple.c | 26 ++++++++++++-------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/iio/temperature/maxim_thermocouple.c b/drivers/iio/temperature/maxim_thermocouple.c index cae8e84821d7..205939680fd4 100644 --- a/drivers/iio/temperature/maxim_thermocouple.c +++ b/drivers/iio/temperature/maxim_thermocouple.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -121,8 +122,15 @@ struct maxim_thermocouple_data { struct spi_device *spi; const struct maxim_thermocouple_chip *chip; char tc_type; - - u8 buffer[16] __aligned(IIO_DMA_MINALIGN); + /* Buffer for reading up to 2 hardware channels. */ + struct { + union { + __be16 raw16; + __be32 raw32; + __be16 raw[2]; + }; + aligned_s64 timestamp; + } buffer __aligned(IIO_DMA_MINALIGN); }; static int maxim_thermocouple_read(struct maxim_thermocouple_data *data, @@ -130,18 +138,16 @@ static int maxim_thermocouple_read(struct maxim_thermocouple_data *data, { unsigned int storage_bytes = data->chip->read_size; unsigned int shift = chan->scan_type.shift + (chan->address * 8); - __be16 buf16; - __be32 buf32; int ret; switch (storage_bytes) { case 2: - ret = spi_read(data->spi, (void *)&buf16, storage_bytes); - *val = be16_to_cpu(buf16); + ret = spi_read(data->spi, &data->buffer.raw16, storage_bytes); + *val = be16_to_cpu(data->buffer.raw16); break; case 4: - ret = spi_read(data->spi, (void *)&buf32, storage_bytes); - *val = be32_to_cpu(buf32); + ret = spi_read(data->spi, &data->buffer.raw32, storage_bytes); + *val = be32_to_cpu(data->buffer.raw32); break; default: ret = -EINVAL; @@ -166,9 +172,9 @@ static irqreturn_t maxim_thermocouple_trigger_handler(int irq, void *private) struct maxim_thermocouple_data *data = iio_priv(indio_dev); int ret; - ret = spi_read(data->spi, data->buffer, data->chip->read_size); + ret = spi_read(data->spi, data->buffer.raw, data->chip->read_size); if (!ret) { - iio_push_to_buffers_with_ts(indio_dev, data->buffer, + iio_push_to_buffers_with_ts(indio_dev, &data->buffer, sizeof(data->buffer), iio_get_time_ns(indio_dev)); } From 197e299aae42ffa19028eaea92b2f30dd9fb8445 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Sat, 26 Jul 2025 11:28:48 -0500 Subject: [PATCH 1820/2411] iio: adc: ad7124: fix channel lookup in syscalib functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix possible incorrect channel lookup in the syscalib functions by using the correct channel address instead of the channel number. In the ad7124 driver, the channel field of struct iio_chan_spec is the input pin number of the positive input of the channel. This can be, but is not always the same as the index in the channels array. The correct index in the channels array is stored in the address field (and also scan_index). We use the address field to perform the correct lookup. Fixes: 47036a03a303 ("iio: adc: ad7124: Implement internal calibration at probe time") Signed-off-by: David Lechner Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250726-iio-adc-ad7124-fix-channel-lookup-in-syscalib-v1-1-b9d14bb684af@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7124.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index 9808df2e9242..4d8c6bafd1c3 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -849,7 +849,7 @@ enum { static int ad7124_syscalib_locked(struct ad7124_state *st, const struct iio_chan_spec *chan) { struct device *dev = &st->sd.spi->dev; - struct ad7124_channel *ch = &st->channels[chan->channel]; + struct ad7124_channel *ch = &st->channels[chan->address]; int ret; if (ch->syscalib_mode == AD7124_SYSCALIB_ZERO_SCALE) { @@ -865,8 +865,8 @@ static int ad7124_syscalib_locked(struct ad7124_state *st, const struct iio_chan if (ret < 0) return ret; - dev_dbg(dev, "offset for channel %d after zero-scale calibration: 0x%x\n", - chan->channel, ch->cfg.calibration_offset); + dev_dbg(dev, "offset for channel %lu after zero-scale calibration: 0x%x\n", + chan->address, ch->cfg.calibration_offset); } else { ch->cfg.calibration_gain = st->gain_default; @@ -880,8 +880,8 @@ static int ad7124_syscalib_locked(struct ad7124_state *st, const struct iio_chan if (ret < 0) return ret; - dev_dbg(dev, "gain for channel %d after full-scale calibration: 0x%x\n", - chan->channel, ch->cfg.calibration_gain); + dev_dbg(dev, "gain for channel %lu after full-scale calibration: 0x%x\n", + chan->address, ch->cfg.calibration_gain); } return 0; @@ -924,7 +924,7 @@ static int ad7124_set_syscalib_mode(struct iio_dev *indio_dev, { struct ad7124_state *st = iio_priv(indio_dev); - st->channels[chan->channel].syscalib_mode = mode; + st->channels[chan->address].syscalib_mode = mode; return 0; } @@ -934,7 +934,7 @@ static int ad7124_get_syscalib_mode(struct iio_dev *indio_dev, { struct ad7124_state *st = iio_priv(indio_dev); - return st->channels[chan->channel].syscalib_mode; + return st->channels[chan->address].syscalib_mode; } static const struct iio_enum ad7124_syscalib_mode_enum = { From dfdc31e7ccf3ac1d5ec01d5120c71e14745e3dd8 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Maneyrol Date: Fri, 8 Aug 2025 09:40:10 +0200 Subject: [PATCH 1821/2411] iio: imu: inv_icm42600: change invalid data error to -EBUSY Temperature sensor returns the temperature of the mechanical parts of the chip. If both accel and gyro are off, the temperature sensor is also automatically turned off and returns invalid data. In this case, returning -EBUSY error code is better then -EINVAL and indicates userspace that it needs to retry reading temperature in another context. Fixes: bc3eb0207fb5 ("iio: imu: inv_icm42600: add temperature sensor support") Signed-off-by: Jean-Baptiste Maneyrol Cc: stable@vger.kernel.org Reviewed-by: Andy Shevchenko Reviewed-by: Sean Nyekjaer Link: https://patch.msgid.link/20250808-inv-icm42600-change-temperature-error-code-v1-1-986fbf63b77d@tdk.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/inv_icm42600/inv_icm42600_temp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_temp.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_temp.c index 8b15afca498c..271a4788604a 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_temp.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_temp.c @@ -32,8 +32,12 @@ static int inv_icm42600_temp_read(struct inv_icm42600_state *st, s16 *temp) goto exit; *temp = (s16)be16_to_cpup(raw); + /* + * Temperature data is invalid if both accel and gyro are off. + * Return -EBUSY in this case. + */ if (*temp == INV_ICM42600_DATA_INVALID) - ret = -EINVAL; + ret = -EBUSY; exit: mutex_unlock(&st->lock); From fde578c86281f27b182680c7642836a0dbbd0be7 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 31 Jul 2025 17:48:06 +0200 Subject: [PATCH 1822/2411] rust: alloc: replace aligned_size() with Kmalloc::aligned_layout() aligned_size() dates back to when Rust did support kmalloc() only, but is now used in ReallocFunc::call() and hence for all allocators. However, the additional padding applied by aligned_size() is only required by the kmalloc() allocator backend. Hence, replace aligned_size() with Kmalloc::aligned_layout() and use it for the affected allocators, i.e. kmalloc() and kvmalloc(), only. While at it, make Kmalloc::aligned_layout() public, such that Rust abstractions, which have to call subsystem specific kmalloc() based allocation primitives directly, can make use of it. Fixes: 8a799831fc63 ("rust: alloc: implement `ReallocFunc`") Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250731154919.4132-2-dakr@kernel.org [ Remove `const` from Kmalloc::aligned_layout(). - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/alloc/allocator.rs | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/rust/kernel/alloc/allocator.rs b/rust/kernel/alloc/allocator.rs index aa2dfa9dca4c..2692cf90c948 100644 --- a/rust/kernel/alloc/allocator.rs +++ b/rust/kernel/alloc/allocator.rs @@ -43,17 +43,6 @@ /// For more details see [self]. pub struct KVmalloc; -/// Returns a proper size to alloc a new object aligned to `new_layout`'s alignment. -fn aligned_size(new_layout: Layout) -> usize { - // Customized layouts from `Layout::from_size_align()` can have size < align, so pad first. - let layout = new_layout.pad_to_align(); - - // Note that `layout.size()` (after padding) is guaranteed to be a multiple of `layout.align()` - // which together with the slab guarantees means the `krealloc` will return a properly aligned - // object (see comments in `kmalloc()` for more information). - layout.size() -} - /// # Invariants /// /// One of the following: `krealloc`, `vrealloc`, `kvrealloc`. @@ -88,7 +77,7 @@ unsafe fn call( old_layout: Layout, flags: Flags, ) -> Result, AllocError> { - let size = aligned_size(layout); + let size = layout.size(); let ptr = match ptr { Some(ptr) => { if old_layout.size() == 0 { @@ -123,6 +112,17 @@ unsafe fn call( } } +impl Kmalloc { + /// Returns a [`Layout`] that makes [`Kmalloc`] fulfill the requested size and alignment of + /// `layout`. + pub fn aligned_layout(layout: Layout) -> Layout { + // Note that `layout.size()` (after padding) is guaranteed to be a multiple of + // `layout.align()` which together with the slab guarantees means that `Kmalloc` will return + // a properly aligned object (see comments in `kmalloc()` for more information). + layout.pad_to_align() + } +} + // SAFETY: `realloc` delegates to `ReallocFunc::call`, which guarantees that // - memory remains valid until it is explicitly freed, // - passing a pointer to a valid memory allocation is OK, @@ -135,6 +135,8 @@ unsafe fn realloc( old_layout: Layout, flags: Flags, ) -> Result, AllocError> { + let layout = Kmalloc::aligned_layout(layout); + // SAFETY: `ReallocFunc::call` has the same safety requirements as `Allocator::realloc`. unsafe { ReallocFunc::KREALLOC.call(ptr, layout, old_layout, flags) } } @@ -176,6 +178,10 @@ unsafe fn realloc( old_layout: Layout, flags: Flags, ) -> Result, AllocError> { + // `KVmalloc` may use the `Kmalloc` backend, hence we have to enforce a `Kmalloc` + // compatible layout. + let layout = Kmalloc::aligned_layout(layout); + // TODO: Support alignments larger than PAGE_SIZE. if layout.align() > bindings::PAGE_SIZE { pr_warn!("KVmalloc does not support alignments larger than PAGE_SIZE yet.\n"); From 22ab0641b939967f630d108e33a3582841ad6846 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 31 Jul 2025 17:48:07 +0200 Subject: [PATCH 1823/2411] rust: drm: ensure kmalloc() compatible Layout drm::Device is allocated through __drm_dev_alloc() (which uses kmalloc()) and the driver private data, ::Data, is initialized in-place. Due to the order of fields in drm::Device pub struct Device { dev: Opaque, data: T::Data, } even with an arbitrary large alignment requirement of T::Data it can't happen that the size of Device is smaller than its alignment requirement. However, let's not rely on this subtle circumstance and create a proper kmalloc() compatible Layout. Fixes: 1e4b8896c0f3 ("rust: drm: add device abstraction") Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250731154919.4132-3-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/drm/device.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index 3bb7c83966cf..d19410deaf6c 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -5,6 +5,7 @@ //! C header: [`include/linux/drm/drm_device.h`](srctree/include/linux/drm/drm_device.h) use crate::{ + alloc::allocator::Kmalloc, bindings, device, drm, drm::driver::AllocImpl, error::from_err_ptr, @@ -12,7 +13,7 @@ prelude::*, types::{ARef, AlwaysRefCounted, Opaque}, }; -use core::{mem, ops::Deref, ptr, ptr::NonNull}; +use core::{alloc::Layout, mem, ops::Deref, ptr, ptr::NonNull}; #[cfg(CONFIG_DRM_LEGACY)] macro_rules! drm_legacy_fields { @@ -96,6 +97,10 @@ impl Device { /// Create a new `drm::Device` for a `drm::Driver`. pub fn new(dev: &device::Device, data: impl PinInit) -> Result> { + // `__drm_dev_alloc` uses `kmalloc()` to allocate memory, hence ensure a `kmalloc()` + // compatible `Layout`. + let layout = Kmalloc::aligned_layout(Layout::new::()); + // SAFETY: // - `VTABLE`, as a `const` is pinned to the read-only section of the compilation, // - `dev` is valid by its type invarants, @@ -103,7 +108,7 @@ pub fn new(dev: &device::Device, data: impl PinInit) -> Result(), + layout.size(), mem::offset_of!(Self, dev), ) } From 0c04a81c1d0214d5b2025f805ccec1ac37c96b08 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 31 Jul 2025 17:48:08 +0200 Subject: [PATCH 1824/2411] rust: drm: remove pin annotations from drm::Device The #[pin_data] and #[pin] annotations are not necessary for drm::Device, since we don't use any pin-init macros, but only __pinned_init() on the impl PinInit argument of drm::Device::new(). Fixes: 1e4b8896c0f3 ("rust: drm: add device abstraction") Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250731154919.4132-4-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/drm/device.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index d19410deaf6c..d0a9528121f1 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -54,10 +54,8 @@ macro_rules! drm_legacy_fields { /// /// `self.dev` is a valid instance of a `struct device`. #[repr(C)] -#[pin_data] pub struct Device { dev: Opaque, - #[pin] data: T::Data, } From 360077278ba62e81310080f075a1a3028e778ef9 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 31 Jul 2025 17:48:09 +0200 Subject: [PATCH 1825/2411] rust: drm: don't pass the address of drm::Device to drm_dev_put() In drm_dev_put() call in AlwaysRefCounted::dec_ref() we rely on struct drm_device to be the first field in drm::Device, whereas everywhere else we correctly obtain the address of the actual struct drm_device. Analogous to the from_drm_device() helper, provide the into_drm_device() helper in order to address this. Fixes: 1e4b8896c0f3 ("rust: drm: add device abstraction") Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250731154919.4132-5-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/drm/device.rs | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index d0a9528121f1..d29c477e89a8 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -120,9 +120,13 @@ pub fn new(dev: &device::Device, data: impl PinInit) -> Result *mut Self { unsafe { crate::container_of!(Opaque::cast_from(ptr), Self, dev) }.cast_mut() } + /// # Safety + /// + /// `ptr` must be a valid pointer to `Self`. + unsafe fn into_drm_device(ptr: NonNull) -> *mut bindings::drm_device { + // SAFETY: By the safety requirements of this function, `ptr` is a valid pointer to `Self`. + unsafe { &raw mut (*ptr.as_ptr()).dev }.cast() + } + /// Not intended to be called externally, except via declare_drm_ioctls!() /// /// # Safety @@ -192,8 +204,11 @@ fn inc_ref(&self) { } unsafe fn dec_ref(obj: NonNull) { + // SAFETY: `obj` is a valid pointer to `Self`. + let drm_dev = unsafe { Self::into_drm_device(obj) }; + // SAFETY: The safety requirements guarantee that the refcount is non-zero. - unsafe { bindings::drm_dev_put(obj.cast().as_ptr()) }; + unsafe { bindings::drm_dev_put(drm_dev) }; } } From 7573980c7049450a0af22acd4f8e96f37ea30c48 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 8 Aug 2025 10:45:05 -0700 Subject: [PATCH 1826/2411] MAINTAINERS: Mark Intel WWAN IOSM driver as orphaned This maintainer's email no longer works. Remove it from MAINTAINERS. I've been unable to locate a new maintainer for this at Intel. Mark the driver as Orphaned. Signed-off-by: Dave Hansen Cc: Loic Poulain Cc: Johannes Berg Cc: Andrew Lunn Acked-by: Sergey Ryazanov Link: https://patch.msgid.link/20250808174505.C9FF434F@davehans-spike.ostc.intel.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index eed16e53b9b2..ea82fc952c17 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12724,9 +12724,8 @@ S: Maintained F: drivers/platform/x86/intel/wmi/thunderbolt.c INTEL WWAN IOSM DRIVER -M: M Chetan Kumar L: netdev@vger.kernel.org -S: Maintained +S: Orphan F: drivers/net/wwan/iosm/ INTEL(R) FLEXIBLE RETURN AND EVENT DELIVERY From b56e9fb1c9669ce460dc899cecb09a54a6d71cf4 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 8 Aug 2025 10:53:24 -0700 Subject: [PATCH 1827/2411] MAINTAINERS: Mark Intel PTP DFL ToD as orphaned This maintainer's email no longer works. Remove it from MAINTAINERS. Also mark the code as an Orphan. Signed-off-by: Dave Hansen Cc: Richard Cochran Cc: Tianfei Zhang Cc: Andrew Lunn Link: https://patch.msgid.link/20250808175324.8C4B7354@davehans-spike.ostc.intel.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ea82fc952c17..3a46ef3f8a77 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12583,10 +12583,9 @@ S: Supported F: drivers/cpufreq/intel_pstate.c INTEL PTP DFL ToD DRIVER -M: Tianfei Zhang L: linux-fpga@vger.kernel.org L: netdev@vger.kernel.org -S: Maintained +S: Orphan F: drivers/ptp/ptp_dfl_tod.c INTEL QUADRATURE ENCODER PERIPHERAL DRIVER From b132a3b0c228125dfda03ba7c7903ef133e1ee21 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 8 Aug 2025 10:39:25 -0700 Subject: [PATCH 1828/2411] MAINTAINERS: Remove bouncing T7XX reviewer This reviewer's email no longer works. Remove it from MAINTAINERS. Signed-off-by: Dave Hansen Cc: Chandrashekar Devegowda Cc: Liu Haijun Cc: Ricardo Martinez Link: https://patch.msgid.link/20250808173925.FECE3782@davehans-spike.ostc.intel.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3a46ef3f8a77..6c7bc7b27eca 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15672,7 +15672,6 @@ MEDIATEK T7XX 5G WWAN MODEM DRIVER M: Chandrashekar Devegowda R: Chiranjeevi Rapolu R: Liu Haijun -R: M Chetan Kumar R: Ricardo Martinez L: netdev@vger.kernel.org S: Supported From 61aaca8b89fb98be58b8df19f01181bb983cccff Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Fri, 8 Aug 2025 15:31:08 +0200 Subject: [PATCH 1829/2411] net: usb: qmi_wwan: add Telit Cinterion FN990A w/audio composition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the following Telit Cinterion FN990A w/audio composition: 0x1077: tty (diag) + adb + rmnet + audio + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (AT) T: Bus=01 Lev=01 Prnt=01 Port=09 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1077 Rev=05.04 S: Manufacturer=Telit Wireless Solutions S: Product=FN990 S: SerialNumber=67e04c35 C: #Ifs=10 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 0 Cls=01(audio) Sub=01 Prot=20 Driver=snd-usb-audio I: If#= 4 Alt= 1 #EPs= 1 Cls=01(audio) Sub=02 Prot=20 Driver=snd-usb-audio E: Ad=03(O) Atr=0d(Isoc) MxPS= 68 Ivl=1ms I: If#= 5 Alt= 1 #EPs= 1 Cls=01(audio) Sub=02 Prot=20 Driver=snd-usb-audio E: Ad=84(I) Atr=0d(Isoc) MxPS= 68 Ivl=1ms I: If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 7 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 8 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 9 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8c(I) Atr=03(Int.) MxPS= 10 Ivl=32ms Cc: stable@vger.kernel.org Signed-off-by: Fabio Porcedda Acked-by: Bjørn Mork Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index f5647ee0adde..e56901bb6ebc 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1361,6 +1361,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x1057, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990A */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1077, 2)}, /* Telit FN990A w/audio */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1080, 2)}, /* Telit FE990A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a0, 0)}, /* Telit FN920C04 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a4, 0)}, /* Telit FN920C04 */ From 52966bf71de98fef4ca7b3be1349adc7459d6d53 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 8 Aug 2025 07:45:23 -0400 Subject: [PATCH 1830/2411] ref_tracker: use %p instead of %px in debugfs dentry name As Kees points out, this is a kernel address leak, and debugging is not a sufficiently good reason to expose the real kernel address. Fixes: 65b584f53611 ("ref_tracker: automatically register a file in debugfs for a ref_tracker_dir") Reported-by: Kees Cook Closes: https://lore.kernel.org/netdev/202507301603.62E553F93@keescook/ Signed-off-by: Jeff Layton Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- lib/ref_tracker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c index a9e6ffcff04b..cce12287708e 100644 --- a/lib/ref_tracker.c +++ b/lib/ref_tracker.c @@ -434,7 +434,7 @@ void ref_tracker_dir_debugfs(struct ref_tracker_dir *dir) if (dentry && !xa_is_err(dentry)) return; - ret = snprintf(name, sizeof(name), "%s@%px", dir->class, dir); + ret = snprintf(name, sizeof(name), "%s@%p", dir->class, dir); name[sizeof(name) - 1] = '\0'; if (ret < sizeof(name)) { From de1e963ad064caf73ee2c7485b925f381a3aefbf Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 8 Aug 2025 13:16:34 +0100 Subject: [PATCH 1831/2411] net: stmmac: rk: put the PHY clock on remove The PHY clock (bsp_priv->clk_phy) is obtained using of_clk_get(), which doesn't take part in the devm release. Therefore, when a device is unbound, this clock needs to be explicitly put. Fix this. Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Fixes: fecd4d7eef8b ("net: stmmac: dwmac-rk: Add integrated PHY support") Link: https://patch.msgid.link/E1ukM1S-0086qo-PC@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 79b92130a03f..f6687c2f30f6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1765,11 +1765,15 @@ static int rk_gmac_probe(struct platform_device *pdev) static void rk_gmac_remove(struct platform_device *pdev) { - struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(&pdev->dev); + struct stmmac_priv *priv = netdev_priv(platform_get_drvdata(pdev)); + struct rk_priv_data *bsp_priv = priv->plat->bsp_priv; stmmac_dvr_remove(&pdev->dev); rk_gmac_powerdown(bsp_priv); + + if (priv->plat->phy_node && bsp_priv->integrated_phy) + clk_put(bsp_priv->clk_phy); } #ifdef CONFIG_PM_SLEEP From 89886abd073489e26614e4d80fb8eb70d3938a0b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 8 Aug 2025 13:16:39 +0100 Subject: [PATCH 1832/2411] net: stmmac: dwc-qos: fix clk prepare/enable leak on probe failure dwc_eth_dwmac_probe() gets bulk clocks, and then prepares and enables them. Unfortunately, if dwc_eth_dwmac_config_dt() or stmmac_dvr_probe() fail, we leave the clocks prepared and enabled. Fix this by using devm_clk_bulk_get_all_enabled() to combine the steps and provide devm based release of the prepare and enable state. This also fixes a similar leakin dwc_eth_dwmac_remove() which wasn't correctly retrieving the struct plat_stmmacenet_data. This becomes unnecessary. Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Fixes: a045e40645df ("net: stmmac: refactor clock management in EQoS driver") Link: https://patch.msgid.link/E1ukM1X-0086qu-Td@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index 09ae16e026eb..6c363f9b0ce2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -330,15 +330,11 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); - ret = devm_clk_bulk_get_all(&pdev->dev, &plat_dat->clks); + ret = devm_clk_bulk_get_all_enabled(&pdev->dev, &plat_dat->clks); if (ret < 0) - return dev_err_probe(&pdev->dev, ret, "Failed to retrieve all required clocks\n"); + return dev_err_probe(&pdev->dev, ret, "Failed to retrieve and enable all required clocks\n"); plat_dat->num_clks = ret; - ret = clk_bulk_prepare_enable(plat_dat->num_clks, plat_dat->clks); - if (ret) - return dev_err_probe(&pdev->dev, ret, "Failed to enable clocks\n"); - plat_dat->stmmac_clk = stmmac_pltfr_find_clk(plat_dat, data->stmmac_clk_name); @@ -346,7 +342,6 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) ret = data->probe(pdev, plat_dat, &stmmac_res); if (ret < 0) { dev_err_probe(&pdev->dev, ret, "failed to probe subdriver\n"); - clk_bulk_disable_unprepare(plat_dat->num_clks, plat_dat->clks); return ret; } @@ -370,15 +365,11 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) static void dwc_eth_dwmac_remove(struct platform_device *pdev) { const struct dwc_eth_dwmac_data *data = device_get_match_data(&pdev->dev); - struct plat_stmmacenet_data *plat_dat = dev_get_platdata(&pdev->dev); stmmac_dvr_remove(&pdev->dev); if (data->remove) data->remove(pdev); - - if (plat_dat) - clk_bulk_disable_unprepare(plat_dat->num_clks, plat_dat->clks); } static const struct of_device_id dwc_eth_dwmac_match[] = { From b63335fb3d32579c5ff0b7038b9cc23688fff528 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 11 Aug 2025 08:34:04 +0100 Subject: [PATCH 1833/2411] cifs: Fix collect_sample() to handle any iterator type collect_sample() is used to gather samples of the data in a Write op for analysis to try and determine if the compression algorithm is likely to achieve anything more quickly than actually running the compression algorithm. However, collect_sample() assumes that the data it is going to be sampling is stored in an ITER_XARRAY-type iterator (which it now should never be) and doesn't actually check that it is before accessing the underlying xarray directly. Fix this by replacing the code with a loop that just uses the standard iterator functions to sample every other 2KiB block, skipping the intervening ones. It's not quite the same as the previous algorithm as it doesn't necessarily align to the pages within an ordinary write from the pagecache. Note that the btrfs code from which this was derived samples the inode's pagecache directly rather than the iterator - but that doesn't necessarily work for network filesystems if O_DIRECT is in operation. Fixes: 94ae8c3fee94 ("smb: client: compress: LZ77 code improvements cleanup") Signed-off-by: David Howells Acked-by: Paulo Alcantara (Red Hat) cc: Enzo Matsumiya cc: Shyam Prasad N cc: Tom Talpey cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/compress.c | 61 +++++++++++----------------------------- 1 file changed, 16 insertions(+), 45 deletions(-) diff --git a/fs/smb/client/compress.c b/fs/smb/client/compress.c index 766b4de13da7..db709f5cd2e1 100644 --- a/fs/smb/client/compress.c +++ b/fs/smb/client/compress.c @@ -155,58 +155,29 @@ static int cmp_bkt(const void *_a, const void *_b) } /* - * TODO: - * Support other iter types, if required. - * Only ITER_XARRAY is supported for now. + * Collect some 2K samples with 2K gaps between. */ -static int collect_sample(const struct iov_iter *iter, ssize_t max, u8 *sample) +static int collect_sample(const struct iov_iter *source, ssize_t max, u8 *sample) { - struct folio *folios[16], *folio; - unsigned int nr, i, j, npages; - loff_t start = iter->xarray_start + iter->iov_offset; - pgoff_t last, index = start / PAGE_SIZE; - size_t len, off, foff; - void *p; - int s = 0; + struct iov_iter iter = *source; + size_t s = 0; - last = (start + max - 1) / PAGE_SIZE; - do { - nr = xa_extract(iter->xarray, (void **)folios, index, last, ARRAY_SIZE(folios), - XA_PRESENT); - if (nr == 0) - return -EIO; + while (iov_iter_count(&iter) >= SZ_2K) { + size_t part = umin(umin(iov_iter_count(&iter), SZ_2K), max); + size_t n; - for (i = 0; i < nr; i++) { - folio = folios[i]; - npages = folio_nr_pages(folio); - foff = start - folio_pos(folio); - off = foff % PAGE_SIZE; + n = copy_from_iter(sample + s, part, &iter); + if (n != part) + return -EFAULT; - for (j = foff / PAGE_SIZE; j < npages; j++) { - size_t len2; + s += n; + max -= n; - len = min_t(size_t, max, PAGE_SIZE - off); - len2 = min_t(size_t, len, SZ_2K); + if (iov_iter_count(&iter) < PAGE_SIZE - SZ_2K) + break; - p = kmap_local_page(folio_page(folio, j)); - memcpy(&sample[s], p, len2); - kunmap_local(p); - - s += len2; - - if (len2 < SZ_2K || s >= max - SZ_2K) - return s; - - max -= len; - if (max <= 0) - return s; - - start += len; - off = 0; - index++; - } - } - } while (nr == ARRAY_SIZE(folios)); + iov_iter_advance(&iter, SZ_2K); + } return s; } From d7f1affc556e07208453224a025bd67583671ae2 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Fri, 8 Aug 2025 17:52:21 +0300 Subject: [PATCH 1834/2411] cifs: avoid extra calls to strlen() in cifs_get_spnego_key() Since 'snprintf()' returns the number of characters emitted, an output position may be advanced with this return value rather than using an explicit calls to 'strlen()'. Compile tested only. Signed-off-by: Dmitry Antipov Signed-off-by: Steve French --- fs/smb/client/cifs_spnego.c | 47 ++++++++++++++----------------------- 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c index bc1c1e9b288a..43b86fa4d695 100644 --- a/fs/smb/client/cifs_spnego.c +++ b/fs/smb/client/cifs_spnego.c @@ -124,55 +124,44 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo, dp = description; /* start with version and hostname portion of UNC string */ spnego_key = ERR_PTR(-EINVAL); - sprintf(dp, "ver=0x%x;host=%s;", CIFS_SPNEGO_UPCALL_VERSION, - hostname); - dp = description + strlen(description); + dp += sprintf(dp, "ver=0x%x;host=%s;", CIFS_SPNEGO_UPCALL_VERSION, + hostname); /* add the server address */ if (server->dstaddr.ss_family == AF_INET) - sprintf(dp, "ip4=%pI4", &sa->sin_addr); + dp += sprintf(dp, "ip4=%pI4", &sa->sin_addr); else if (server->dstaddr.ss_family == AF_INET6) - sprintf(dp, "ip6=%pI6", &sa6->sin6_addr); + dp += sprintf(dp, "ip6=%pI6", &sa6->sin6_addr); else goto out; - dp = description + strlen(description); - /* for now, only sec=krb5 and sec=mskrb5 and iakerb are valid */ if (server->sec_kerberos) - sprintf(dp, ";sec=krb5"); + dp += sprintf(dp, ";sec=krb5"); else if (server->sec_mskerberos) - sprintf(dp, ";sec=mskrb5"); + dp += sprintf(dp, ";sec=mskrb5"); else if (server->sec_iakerb) - sprintf(dp, ";sec=iakerb"); + dp += sprintf(dp, ";sec=iakerb"); else { cifs_dbg(VFS, "unknown or missing server auth type, use krb5\n"); - sprintf(dp, ";sec=krb5"); + dp += sprintf(dp, ";sec=krb5"); } - dp = description + strlen(description); - sprintf(dp, ";uid=0x%x", - from_kuid_munged(&init_user_ns, sesInfo->linux_uid)); + dp += sprintf(dp, ";uid=0x%x", + from_kuid_munged(&init_user_ns, sesInfo->linux_uid)); - dp = description + strlen(description); - sprintf(dp, ";creduid=0x%x", + dp += sprintf(dp, ";creduid=0x%x", from_kuid_munged(&init_user_ns, sesInfo->cred_uid)); - if (sesInfo->user_name) { - dp = description + strlen(description); - sprintf(dp, ";user=%s", sesInfo->user_name); - } + if (sesInfo->user_name) + dp += sprintf(dp, ";user=%s", sesInfo->user_name); - dp = description + strlen(description); - sprintf(dp, ";pid=0x%x", current->pid); + dp += sprintf(dp, ";pid=0x%x", current->pid); - if (sesInfo->upcall_target == UPTARGET_MOUNT) { - dp = description + strlen(description); - sprintf(dp, ";upcall_target=mount"); - } else { - dp = description + strlen(description); - sprintf(dp, ";upcall_target=app"); - } + if (sesInfo->upcall_target == UPTARGET_MOUNT) + dp += sprintf(dp, ";upcall_target=mount"); + else + dp += sprintf(dp, ";upcall_target=app"); cifs_dbg(FYI, "key description = %s\n", description); saved_cred = override_creds(spnego_cred); From ab5ac789efa985e42bdb5b5e8a7a4ad84935d44e Mon Sep 17 00:00:00 2001 From: Sukrut Heroorkar Date: Tue, 5 Aug 2025 00:56:14 +0200 Subject: [PATCH 1835/2411] selftests/proc: fix string literal warning in proc-maps-race.c This change resolves non literal string format warning invoked for proc-maps-race.c while compiling. proc-maps-race.c:205:17: warning: format not a string literal and no format arguments [-Wformat-security] 205 | printf(text); | ^~~~~~ proc-maps-race.c:209:17: warning: format not a string literal and no format arguments [-Wformat-security] 209 | printf(text); | ^~~~~~ proc-maps-race.c: In function `print_last_lines': proc-maps-race.c:224:9: warning: format not a string literal and no format arguments [-Wformat-security] 224 | printf(start); | ^~~~~~ Add string format specifier %s for the printf calls in both print_first_lines() and print_last_lines() thus resolving the warnings. The test executes fine after this change thus causing no effect to the functional behavior of the test. Link: https://lkml.kernel.org/r/20250804225633.841777-1-hsukrut3@gmail.com Fixes: aadc099c480f ("selftests/proc: add verbose mode for /proc/pid/maps tearing tests") Signed-off-by: Sukrut Heroorkar Acked-by: Suren Baghdasaryan Cc: David Hunter Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/proc/proc-maps-race.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c index 66773685a047..94bba4553130 100644 --- a/tools/testing/selftests/proc/proc-maps-race.c +++ b/tools/testing/selftests/proc/proc-maps-race.c @@ -202,11 +202,11 @@ static void print_first_lines(char *text, int nr) int offs = end - text; text[offs] = '\0'; - printf(text); + printf("%s", text); text[offs] = '\n'; printf("\n"); } else { - printf(text); + printf("%s", text); } } @@ -221,7 +221,7 @@ static void print_last_lines(char *text, int nr) nr--; start--; } - printf(start); + printf("%s", start); } static void print_boundaries(const char *title, FIXTURE_DATA(proc_maps_race) *self) From cf1b80dc31a1137b8b4568c138b453bf7453204a Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Wed, 6 Aug 2025 20:26:11 +0530 Subject: [PATCH 1836/2411] mm: pass page directly instead of using folio_page In commit_anon_folio_batch(), we iterate over all pages pointed to by the PTE batch. Therefore we need to know the first page of the batch; currently we derive that via folio_page(folio, 0), but, that takes us to the first (head) page of the folio instead - our PTE batch may lie in the middle of the folio, leading to incorrectness. Bite the bullet and throw away the micro-optimization of reusing the folio in favour of code simplicity. Derive the page and the folio in change_pte_range, and pass the page too to commit_anon_folio_batch to fix the aforementioned issue. Link: https://lkml.kernel.org/r/20250806145611.3962-1-dev.jain@arm.com Fixes: cac1db8c3aad ("mm: optimize mprotect() by PTE batching") Reported-by: syzbot+57bcc752f0df8bb1365c@syzkaller.appspotmail.com Signed-off-by: Dev Jain Reviewed-by: Lorenzo Stoakes Debugged-by: David Hildenbrand Acked-by: David Hildenbrand Cc: Anshuman Khandual Cc: Barry Song Cc: Catalin Marinas Cc: Christophe Leroy Cc: Hugh Dickins Cc: Jann Horn Cc: Joey Gouly Cc: Kevin Brodsky Cc: Lance Yang Cc: Liam Howlett Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Cc: Ryan Roberts Cc: Vlastimil Babka Cc: Will Deacon Cc: Yang Shi Cc: Yicong Yang Cc: Zhenhua Huang Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/mprotect.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 78bded7acf79..113b48985834 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -120,9 +120,8 @@ static int mprotect_folio_pte_batch(struct folio *folio, pte_t *ptep, static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr, pte_t oldpte, pte_t *pte, int target_node, - struct folio **foliop) + struct folio *folio) { - struct folio *folio = NULL; bool ret = true; bool toptier; int nid; @@ -131,7 +130,6 @@ static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr, if (pte_protnone(oldpte)) goto skip; - folio = vm_normal_folio(vma, addr, oldpte); if (!folio) goto skip; @@ -173,7 +171,6 @@ static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr, folio_xchg_access_time(folio, jiffies_to_msecs(jiffies)); skip: - *foliop = folio; return ret; } @@ -231,10 +228,9 @@ static int page_anon_exclusive_sub_batch(int start_idx, int max_len, * retrieve sub-batches. */ static void commit_anon_folio_batch(struct vm_area_struct *vma, - struct folio *folio, unsigned long addr, pte_t *ptep, + struct folio *folio, struct page *first_page, unsigned long addr, pte_t *ptep, pte_t oldpte, pte_t ptent, int nr_ptes, struct mmu_gather *tlb) { - struct page *first_page = folio_page(folio, 0); bool expected_anon_exclusive; int sub_batch_idx = 0; int len; @@ -251,7 +247,7 @@ static void commit_anon_folio_batch(struct vm_area_struct *vma, } static void set_write_prot_commit_flush_ptes(struct vm_area_struct *vma, - struct folio *folio, unsigned long addr, pte_t *ptep, + struct folio *folio, struct page *page, unsigned long addr, pte_t *ptep, pte_t oldpte, pte_t ptent, int nr_ptes, struct mmu_gather *tlb) { bool set_write; @@ -270,7 +266,7 @@ static void set_write_prot_commit_flush_ptes(struct vm_area_struct *vma, /* idx = */ 0, set_write, tlb); return; } - commit_anon_folio_batch(vma, folio, addr, ptep, oldpte, ptent, nr_ptes, tlb); + commit_anon_folio_batch(vma, folio, page, addr, ptep, oldpte, ptent, nr_ptes, tlb); } static long change_pte_range(struct mmu_gather *tlb, @@ -305,15 +301,19 @@ static long change_pte_range(struct mmu_gather *tlb, const fpb_t flags = FPB_RESPECT_SOFT_DIRTY | FPB_RESPECT_WRITE; int max_nr_ptes = (end - addr) >> PAGE_SHIFT; struct folio *folio = NULL; + struct page *page; pte_t ptent; + page = vm_normal_page(vma, addr, oldpte); + if (page) + folio = page_folio(page); /* * Avoid trapping faults against the zero or KSM * pages. See similar comment in change_huge_pmd. */ if (prot_numa) { int ret = prot_numa_skip(vma, addr, oldpte, pte, - target_node, &folio); + target_node, folio); if (ret) { /* determine batch to skip */ @@ -323,9 +323,6 @@ static long change_pte_range(struct mmu_gather *tlb, } } - if (!folio) - folio = vm_normal_folio(vma, addr, oldpte); - nr_ptes = mprotect_folio_pte_batch(folio, pte, oldpte, max_nr_ptes, flags); oldpte = modify_prot_start_ptes(vma, addr, pte, nr_ptes); @@ -351,7 +348,7 @@ static long change_pte_range(struct mmu_gather *tlb, */ if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pte_write(ptent)) - set_write_prot_commit_flush_ptes(vma, folio, + set_write_prot_commit_flush_ptes(vma, folio, page, addr, pte, oldpte, ptent, nr_ptes, tlb); else prot_commit_flush_ptes(vma, addr, pte, oldpte, ptent, From aba6faec0103ed8f169be8dce2ead41fcb689446 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 6 Aug 2025 15:00:22 -0700 Subject: [PATCH 1837/2411] userfaultfd: fix a crash in UFFDIO_MOVE when PMD is a migration entry When UFFDIO_MOVE encounters a migration PMD entry, it proceeds with obtaining a folio and accessing it even though the entry is swp_entry_t. Add the missing check and let split_huge_pmd() handle migration entries. While at it also remove unnecessary folio check. [surenb@google.com: remove extra folio check, per David] Link: https://lkml.kernel.org/r/20250807200418.1963585-1-surenb@google.com Link: https://lkml.kernel.org/r/20250806220022.926763-1-surenb@google.com Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Signed-off-by: Suren Baghdasaryan Reported-by: syzbot+b446dbe27035ef6bd6c2@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68794b5c.a70a0220.693ce.0050.GAE@google.com/ Reviewed-by: Peter Xu Acked-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Lokesh Gidra Cc: Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index cbed91b09640..45e6290e2e8b 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1821,13 +1821,16 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, /* Check if we can move the pmd without splitting it. */ if (move_splits_huge_pmd(dst_addr, src_addr, src_start + len) || !pmd_none(dst_pmdval)) { - struct folio *folio = pmd_folio(*src_pmd); + /* Can be a migration entry */ + if (pmd_present(*src_pmd)) { + struct folio *folio = pmd_folio(*src_pmd); - if (!folio || (!is_huge_zero_folio(folio) && - !PageAnonExclusive(&folio->page))) { - spin_unlock(ptl); - err = -EBUSY; - break; + if (!is_huge_zero_folio(folio) && + !PageAnonExclusive(&folio->page)) { + spin_unlock(ptl); + err = -EBUSY; + break; + } } spin_unlock(ptl); From 0b5be138ce00f421bd7cc5a226061bd62c4ab850 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 7 Aug 2025 19:58:19 +0100 Subject: [PATCH 1838/2411] mm/mremap: avoid expensive folio lookup on mremap folio pte batch It was discovered in the attached report that commit f822a9a81a31 ("mm: optimize mremap() by PTE batching") introduced a significant performance regression on a number of metrics on x86-64, most notably stress-ng.bigheap.realloc_calls_per_sec - indicating a 37.3% regression in number of mremap() calls per second. I was able to reproduce this locally on an intel x86-64 raptor lake system, noting an average of 143,857 realloc calls/sec (with a stddev of 4,531 or 3.1%) prior to this patch being applied, and 81,503 afterwards (stddev of 2,131 or 2.6%) - a 43.3% regression. During testing I was able to determine that there was no meaningful difference in efforts to optimise the folio_pte_batch() operation, nor checking folio_test_large(). This is within expectation, as a regression this large is likely to indicate we are accessing memory that is not yet in a cache line (and perhaps may even cause a main memory fetch). The expectation by those discussing this from the start was that vm_normal_folio() (invoked by mremap_folio_pte_batch()) would likely be the culprit due to having to retrieve memory from the vmemmap (which mremap() page table moves does not otherwise do, meaning this is inevitably cold memory). I was able to definitively determine that this theory is indeed correct and the cause of the issue. The solution is to restore part of an approach previously discarded on review, that is to invoke pte_batch_hint() which explicitly determines, through reference to the PTE alone (thus no vmemmap lookup), what the PTE batch size may be. On platforms other than arm64 this is currently hardcoded to return 1, so this naturally resolves the issue for x86-64, and for arm64 introduces little to no overhead as the pte cache line will be hot. With this patch applied, we move from 81,503 realloc calls/sec to 138,701 (stddev of 496.1 or 0.4%), which is a -3.6% regression, however accounting for the variance in the original result, this is broadly restoring performance to its prior state. Link: https://lkml.kernel.org/r/20250807185819.199865-1-lorenzo.stoakes@oracle.com Fixes: f822a9a81a31 ("mm: optimize mremap() by PTE batching") Signed-off-by: Lorenzo Stoakes Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202508071609.4e743d7c-lkp@intel.com Acked-by: David Hildenbrand Acked-by: Pedro Falcato Reviewed-by: Barry Song Acked-by: Vlastimil Babka Reviewed-by: Dev Jain Cc: Ryan Roberts Cc: Barry Song Cc: Jann Horn Cc: Liam Howlett Signed-off-by: Andrew Morton --- mm/mremap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/mremap.c b/mm/mremap.c index 677a4d744df9..9afa8cd524f5 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -179,6 +179,10 @@ static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr if (max_nr == 1) return 1; + /* Avoid expensive folio lookup if we stand no chance of benefit. */ + if (pte_batch_hint(ptep, pte) == 1) + return 1; + folio = vm_normal_folio(vma, addr, pte); if (!folio || !folio_test_large(folio)) return 1; From c0e1b774f68bdbea1618e356e30672c7f1e32509 Mon Sep 17 00:00:00 2001 From: Jialin Wang Date: Fri, 8 Aug 2025 00:54:55 +0800 Subject: [PATCH 1839/2411] proc: proc_maps_open allow proc_mem_open to return NULL The commit 65c66047259f ("proc: fix the issue of proc_mem_open returning NULL") caused proc_maps_open() to return -ESRCH when proc_mem_open() returns NULL. This breaks legitimate /proc//maps access for kernel threads since kernel threads have NULL mm_struct. The regression causes perf to fail and exit when profiling a kernel thread: # perf record -v -g -p $(pgrep kswapd0) ... couldn't open /proc/65/task/65/maps This patch partially reverts the commit to fix it. Link: https://lkml.kernel.org/r/20250807165455.73656-1-wjl.linux@gmail.com Fixes: 65c66047259f ("proc: fix the issue of proc_mem_open returning NULL") Signed-off-by: Jialin Wang Cc: Penglei Jiang Cc: Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ee1e4ccd33bd..29cca0e6d0ff 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -340,8 +340,8 @@ static int proc_maps_open(struct inode *inode, struct file *file, priv->inode = inode; priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); - if (IS_ERR_OR_NULL(priv->mm)) { - int err = priv->mm ? PTR_ERR(priv->mm) : -ESRCH; + if (IS_ERR(priv->mm)) { + int err = PTR_ERR(priv->mm); seq_release_private(inode, file); return err; From 54d4f445517fe8350d735624d7f4225e7511d9eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Larumbe?= Date: Fri, 8 Aug 2025 02:02:34 +0100 Subject: [PATCH 1840/2411] drm/panfrost: Print RSS for tiler heap BO's in debugfs GEMS file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise it would display the virtual allocation size, which is often much bigger than the RSS. Signed-off-by: Adrián Larumbe Fixes: e48ade5e23ba ("drm/panfrost: show device-wide list of DRM GEM objects over DebugFS") Tested-by: Christopher Healy Reviewed-by: Daniel Stone Signed-off-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250808010235.2831853-1-adrian.larumbe@collabora.com --- drivers/gpu/drm/panfrost/panfrost_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index bb73f2a68a12..85d6289a6eda 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -432,7 +432,7 @@ static void panfrost_gem_debugfs_bo_print(struct panfrost_gem_object *bo, if (!refcount) return; - resident_size = bo->base.pages ? bo->base.base.size : 0; + resident_size = panfrost_gem_rss(&bo->base.base); snprintf(creator_info, sizeof(creator_info), "%s/%d", bo->debugfs.creator.process_name, bo->debugfs.creator.tgid); From 9af8f2b469c0438620832f3729a3c5c03853b56b Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Fri, 27 Jun 2025 14:38:19 +0200 Subject: [PATCH 1841/2411] drm/panic: Add a u64 divide by 10 for arm32 On 32bits ARM, u64 divided by a constant is not optimized to a multiply by inverse by the compiler [1]. So do the multiply by inverse explicitly for this architecture. Link: https://github.com/llvm/llvm-project/issues/37280 [1] Reported-by: Andrei Lalaev Closes: https://lore.kernel.org/dri-devel/c0a2771c-f3f5-4d4c-aa82-d673b3c5cb46@gmail.com/ Fixes: 675008f196ca ("drm/panic: Use a decimal fifo to avoid u64 by u64 divide") Reviewed-by: Alice Ryhl Signed-off-by: Jocelyn Falempe --- drivers/gpu/drm/drm_panic_qr.rs | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index 09a9b452e8b7..50c286c5cee8 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -381,6 +381,26 @@ struct DecFifo { len: usize, } +// On arm32 architecture, dividing an `u64` by a constant will generate a call +// to `__aeabi_uldivmod` which is not present in the kernel. +// So use the multiply by inverse method for this architecture. +fn div10(val: u64) -> u64 { + if cfg!(target_arch = "arm") { + let val_h = val >> 32; + let val_l = val & 0xFFFFFFFF; + let b_h: u64 = 0x66666666; + let b_l: u64 = 0x66666667; + + let tmp1 = val_h * b_l + ((val_l * b_l) >> 32); + let tmp2 = val_l * b_h + (tmp1 & 0xffffffff); + let tmp3 = val_h * b_h + (tmp1 >> 32) + (tmp2 >> 32); + + tmp3 >> 2 + } else { + val / 10 + } +} + impl DecFifo { fn push(&mut self, data: u64, len: usize) { let mut chunk = data; @@ -389,7 +409,7 @@ fn push(&mut self, data: u64, len: usize) { } for i in 0..len { self.decimals[i] = (chunk % 10) as u8; - chunk /= 10; + chunk = div10(chunk); } self.len += len; } From fd56b9c9507f32b16159f9a922e1af5628254567 Mon Sep 17 00:00:00 2001 From: Vinod Govindapillai Date: Tue, 29 Jul 2025 15:46:48 +0300 Subject: [PATCH 1842/2411] drm/i915/fbc: fix the implementation of wa_18038517565 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As per the wa_18038517565, we need to disable FBC compressor clock gating before enabling FBC and enable after disabling FBC. Placing the enabling of clock gating in the fbc deactivate function can make the above wa logic go wrong in case of frontbuffer rendering FBC mechanism. FBC deactivate can get called during fb invalidate and then the corresponding FBC activate can get called without properly disabling the clock gating and can result in compression stalled. So move the enable clock gating at the end of one FBC session after FBC is completely disabled for a pipe. Bspec: 74212, 72197, 69741, 65555 Fixes: 010363c46189 ("drm/i915/display: implement wa_18038517565") Signed-off-by: Vinod Govindapillai Reviewed-by: Jouni Högander Link: https://lore.kernel.org/r/20250729124648.288497-1-vinod.govindapillai@intel.com (cherry picked from commit 82dde0407ab126f8413fd6c51429e5057ced5ba2) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_fbc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 6e26cb4c5724..685ac98bd001 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -552,10 +552,6 @@ static void ilk_fbc_deactivate(struct intel_fbc *fbc) if (dpfc_ctl & DPFC_CTL_EN) { dpfc_ctl &= ~DPFC_CTL_EN; intel_de_write(display, ILK_DPFC_CONTROL(fbc->id), dpfc_ctl); - - /* wa_18038517565 Enable DPFC clock gating after FBC disable */ - if (display->platform.dg2 || DISPLAY_VER(display) >= 14) - fbc_compressor_clkgate_disable_wa(fbc, false); } } @@ -1710,6 +1706,10 @@ static void __intel_fbc_disable(struct intel_fbc *fbc) __intel_fbc_cleanup_cfb(fbc); + /* wa_18038517565 Enable DPFC clock gating after FBC disable */ + if (display->platform.dg2 || DISPLAY_VER(display) >= 14) + fbc_compressor_clkgate_disable_wa(fbc, false); + fbc->state.plane = NULL; fbc->flip_pending = false; fbc->busy_bits = 0; From 184889dfe0568528fd6d14bba864dd57ed45bbf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Fri, 1 Aug 2025 09:29:05 +0300 Subject: [PATCH 1843/2411] drm/i915/psr: Do not trigger Frame Change events from frontbuffer flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to get rid of triggering "Frame Change" events from frontbuffer flush calls. We are about to move using TRANS_PUSH register for this on LunarLake and onwards. Touching TRANS_PUSH register from fronbuffer flush would be problematic as it's written by DSB as well. Fix this by using intel_psr_exit when flush or invalidate is done on LunarLake and onwards. This is not possible on AlderLake and MeteorLake due to HW bug in PSR2 disable. This patch is also fixing problems with cursor plane where cursor is disappearing or duplicate cursor is seen on the screen. v2: Commit message updated Bspec: 68927, 68934, 66624 Reported-by: Janna Martl Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5522 Fixes: 411ad63877bb ("drm/i915/psr: Use SFF_CTL on invalidate/flush for LunarLake onwards") Tested-by: Janna Martl Signed-off-by: Jouni Högander Reviewed-by: Suraj Kandpal Link: https://lore.kernel.org/r/20250801062905.564453-1-jouni.hogander@intel.com (cherry picked from commit 46fb38cb20c0d185a6391ab524b23e0e0219c41f) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_psr.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index ae9053919211..41988e193a41 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -3275,7 +3275,9 @@ static void intel_psr_configure_full_frame_update(struct intel_dp *intel_dp) static void _psr_invalidate_handle(struct intel_dp *intel_dp) { - if (intel_dp->psr.psr2_sel_fetch_enabled) { + struct intel_display *display = to_intel_display(intel_dp); + + if (DISPLAY_VER(display) < 20 && intel_dp->psr.psr2_sel_fetch_enabled) { if (!intel_dp->psr.psr2_sel_fetch_cff_enabled) { intel_dp->psr.psr2_sel_fetch_cff_enabled = true; intel_psr_configure_full_frame_update(intel_dp); @@ -3361,7 +3363,7 @@ static void _psr_flush_handle(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - if (intel_dp->psr.psr2_sel_fetch_enabled) { + if (DISPLAY_VER(display) < 20 && intel_dp->psr.psr2_sel_fetch_enabled) { if (intel_dp->psr.psr2_sel_fetch_cff_enabled) { /* can we turn CFF off? */ if (intel_dp->psr.busy_frontbuffer_bits == 0) @@ -3378,11 +3380,13 @@ static void _psr_flush_handle(struct intel_dp *intel_dp) * existing SU configuration */ intel_psr_configure_full_frame_update(intel_dp); + + intel_psr_force_update(intel_dp); + } else { + intel_psr_exit(intel_dp); } - intel_psr_force_update(intel_dp); - - if (!intel_dp->psr.psr2_sel_fetch_enabled && !intel_dp->psr.active && + if ((!intel_dp->psr.psr2_sel_fetch_enabled || DISPLAY_VER(display) >= 20) && !intel_dp->psr.busy_frontbuffer_bits) queue_work(display->wq.unordered, &intel_dp->psr.work); } From 8ee90742cf29427683294a6a80f1e2b7f4af1cff Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Thu, 7 Aug 2025 12:08:32 +0800 Subject: [PATCH 1844/2411] net: phy: nxp-c45-tja11xx: fix the PHY ID mismatch issue when using C45 TJA1103/04/20/21 support both C22 and C45 accessing methods. The TJA11xx driver has implemented the match_phy_device() API. However, it does not handle the C45 ID. If C45 was used to access TJA11xx, match_phy_device() would always return false due to phydev->phy_id only used by C22 being empty, resulting in the generic phy driver being used for TJA11xx PHYs. Therefore, check phydev->c45_ids.device_ids[MDIO_MMD_PMAPMD] when using C45. Fixes: 1b76b2497aba ("net: phy: nxp-c45-tja11xx: simplify .match_phy_device OP") Signed-off-by: Clark Wang Link: https://patch.msgid.link/20250807040832.2455306-1-xiaoning.wang@nxp.com Signed-off-by: Paolo Abeni --- drivers/net/phy/nxp-c45-tja11xx.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 4c6d905f0a9f..87adb6508017 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -1965,24 +1965,27 @@ static int nxp_c45_macsec_ability(struct phy_device *phydev) return macsec_ability; } +static bool tja11xx_phy_id_compare(struct phy_device *phydev, + const struct phy_driver *phydrv) +{ + u32 id = phydev->is_c45 ? phydev->c45_ids.device_ids[MDIO_MMD_PMAPMD] : + phydev->phy_id; + + return phy_id_compare(id, phydrv->phy_id, phydrv->phy_id_mask); +} + static int tja11xx_no_macsec_match_phy_device(struct phy_device *phydev, const struct phy_driver *phydrv) { - if (!phy_id_compare(phydev->phy_id, phydrv->phy_id, - phydrv->phy_id_mask)) - return 0; - - return !nxp_c45_macsec_ability(phydev); + return tja11xx_phy_id_compare(phydev, phydrv) && + !nxp_c45_macsec_ability(phydev); } static int tja11xx_macsec_match_phy_device(struct phy_device *phydev, const struct phy_driver *phydrv) { - if (!phy_id_compare(phydev->phy_id, phydrv->phy_id, - phydrv->phy_id_mask)) - return 0; - - return nxp_c45_macsec_ability(phydev); + return tja11xx_phy_id_compare(phydev, phydrv) && + nxp_c45_macsec_ability(phydev); } static const struct nxp_c45_regmap tja1120_regmap = { From 8ea25274ebaf2f6be8be374633b2ed8348ec0e70 Mon Sep 17 00:00:00 2001 From: Buday Csaba Date: Thu, 7 Aug 2025 15:54:49 +0200 Subject: [PATCH 1845/2411] net: mdiobus: release reset_gpio in mdiobus_unregister_device() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit reset_gpio is claimed in mdiobus_register_device(), but it is not released in mdiobus_unregister_device(). It is instead only released when the whole MDIO bus is unregistered. When a device uses the reset_gpio property, it becomes impossible to unregister it and register it again, because the GPIO remains claimed. This patch resolves that issue. Fixes: bafbdd527d56 ("phylib: Add device reset GPIO support") # see notes Reviewed-by: Andrew Lunn Cc: Csókás Bence [ csokas.bence: Resolve rebase conflict and clarify msg ] Signed-off-by: Buday Csaba Link: https://patch.msgid.link/20250807135449.254254-2-csokas.bence@prolan.hu Signed-off-by: Paolo Abeni --- drivers/net/phy/mdio_bus.c | 1 + drivers/net/phy/mdio_bus_provider.c | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index fda2e27c1810..cad6ed3aa10b 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -91,6 +91,7 @@ int mdiobus_unregister_device(struct mdio_device *mdiodev) if (mdiodev->bus->mdio_map[mdiodev->addr] != mdiodev) return -EINVAL; + gpiod_put(mdiodev->reset_gpio); reset_control_put(mdiodev->reset_ctrl); mdiodev->bus->mdio_map[mdiodev->addr] = NULL; diff --git a/drivers/net/phy/mdio_bus_provider.c b/drivers/net/phy/mdio_bus_provider.c index 48dc4bf85125..f43973e73ea3 100644 --- a/drivers/net/phy/mdio_bus_provider.c +++ b/drivers/net/phy/mdio_bus_provider.c @@ -443,9 +443,6 @@ void mdiobus_unregister(struct mii_bus *bus) if (!mdiodev) continue; - if (mdiodev->reset_gpio) - gpiod_put(mdiodev->reset_gpio); - mdiodev->device_remove(mdiodev); mdiodev->device_free(mdiodev); } From 5eb1bcdb6a8c088514019c3a9bda5d565beed1af Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Tue, 22 Jul 2025 13:18:53 +0530 Subject: [PATCH 1846/2411] x86/sev: Improve handling of writes to intercepted TSC MSRs Currently, when a Secure TSC enabled SNP guest attempts to write to the intercepted GUEST_TSC_FREQ MSR (a read-only MSR), the guest kernel response incorrectly implies a VMM configuration error, when in fact it is the usual VMM configuration to intercept writes to read-only MSRs, unless explicitly documented. Modify the intercepted TSC MSR #VC handling: * Write to GUEST_TSC_FREQ will generate a #GP instead of terminating the guest * Write to MSR_IA32_TSC will generate a #GP instead of silently ignoring it However, continue to terminate the guest when reading from intercepted GUEST_TSC_FREQ MSR with Secure TSC enabled, as intercepted reads indicate an improper VMM configuration for Secure TSC enabled SNP guests. [ bp: simplify comment. ] Fixes: 38cc6495cdec ("x86/sev: Prevent GUEST_TSC_FREQ MSR interception for Secure TSC enabled guests") Suggested-by: Sean Christopherson Signed-off-by: Nikunj A Dadhania Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/20250722074853.22253-1-nikunj@amd.com --- arch/x86/coco/sev/vc-handle.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/arch/x86/coco/sev/vc-handle.c b/arch/x86/coco/sev/vc-handle.c index faf1fce89ed4..c3b4acbde0d8 100644 --- a/arch/x86/coco/sev/vc-handle.c +++ b/arch/x86/coco/sev/vc-handle.c @@ -371,29 +371,30 @@ static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write) * executing with Secure TSC enabled, so special handling is required for * accesses of MSR_IA32_TSC and MSR_AMD64_GUEST_TSC_FREQ. */ -static enum es_result __vc_handle_secure_tsc_msrs(struct pt_regs *regs, bool write) +static enum es_result __vc_handle_secure_tsc_msrs(struct es_em_ctxt *ctxt, bool write) { + struct pt_regs *regs = ctxt->regs; u64 tsc; /* - * GUEST_TSC_FREQ should not be intercepted when Secure TSC is enabled. - * Terminate the SNP guest when the interception is enabled. + * Writing to MSR_IA32_TSC can cause subsequent reads of the TSC to + * return undefined values, and GUEST_TSC_FREQ is read-only. Generate + * a #GP on all writes. + */ + if (write) { + ctxt->fi.vector = X86_TRAP_GP; + ctxt->fi.error_code = 0; + return ES_EXCEPTION; + } + + /* + * GUEST_TSC_FREQ read should not be intercepted when Secure TSC is + * enabled. Terminate the guest if a read is attempted. */ if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ) return ES_VMM_ERROR; - /* - * Writes: Writing to MSR_IA32_TSC can cause subsequent reads of the TSC - * to return undefined values, so ignore all writes. - * - * Reads: Reads of MSR_IA32_TSC should return the current TSC value, use - * the value returned by rdtsc_ordered(). - */ - if (write) { - WARN_ONCE(1, "TSC MSR writes are verboten!\n"); - return ES_OK; - } - + /* Reads of MSR_IA32_TSC should return the current TSC value. */ tsc = rdtsc_ordered(); regs->ax = lower_32_bits(tsc); regs->dx = upper_32_bits(tsc); @@ -416,7 +417,7 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) case MSR_IA32_TSC: case MSR_AMD64_GUEST_TSC_FREQ: if (sev_status & MSR_AMD64_SNP_SECURE_TSC) - return __vc_handle_secure_tsc_msrs(regs, write); + return __vc_handle_secure_tsc_msrs(ctxt, write); break; default: break; From c8a9a619c072e9a45e9a9b4b035269427dc00aa8 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Fri, 8 Aug 2025 09:36:54 +0000 Subject: [PATCH 1847/2411] dt-bindings: net: thead,th1520-gmac: Describe APB interface clock Besides ones for GMAC core and peripheral registers, the TH1520 GMAC requires one more clock for configuring APB glue registers. Describe it in the binding. Fixes: f920ce04c399 ("dt-bindings: net: Add T-HEAD dwmac support") Signed-off-by: Yao Zi Acked-by: Krzysztof Kozlowski Reviewed-by: Drew Fustini Link: https://patch.msgid.link/20250808093655.48074-3-ziyao@disroot.org Signed-off-by: Paolo Abeni --- .../devicetree/bindings/net/thead,th1520-gmac.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml b/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml index 6d9de3303762..b3492a9aa4ef 100644 --- a/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml +++ b/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml @@ -62,11 +62,13 @@ properties: items: - description: GMAC main clock - description: Peripheral registers interface clock + - description: APB glue registers interface clock clock-names: items: - const: stmmaceth - const: pclk + - const: apb interrupts: items: @@ -88,8 +90,8 @@ examples: compatible = "thead,th1520-gmac", "snps,dwmac-3.70a"; reg = <0xe7070000 0x2000>, <0xec003000 0x1000>; reg-names = "dwmac", "apb"; - clocks = <&clk 1>, <&clk 2>; - clock-names = "stmmaceth", "pclk"; + clocks = <&clk 1>, <&clk 2>, <&clk 3>; + clock-names = "stmmaceth", "pclk", "apb"; interrupts = <66>; interrupt-names = "macirq"; phy-mode = "rgmii-id"; From 4cc339ce482ba78589a2d5cbe1c84b735d263383 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Fri, 8 Aug 2025 09:36:55 +0000 Subject: [PATCH 1848/2411] net: stmmac: thead: Get and enable APB clock on initialization It's necessary to adjust the MAC TX clock when the linkspeed changes, but it's noted such adjustment always fails on TH1520 SoC, and reading back from APB glue registers that control clock generation results in garbage, causing broken link. With some testing, it's found a clock must be ungated for access to APB glue registers. Without any consumer, the clock is automatically disabled during late kernel startup. Let's get and enable it if it's described in devicetree. For backward compatibility with older devicetrees, probing won't fail if the APB clock isn't found. In this case, we emit a warning since the link will break if the speed changes. Fixes: 33a1a01e3afa ("net: stmmac: Add glue layer for T-HEAD TH1520 SoC") Signed-off-by: Yao Zi Tested-by: Drew Fustini Reviewed-by: Drew Fustini Link: https://patch.msgid.link/20250808093655.48074-4-ziyao@disroot.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c index c72ee759aae5..f2946bea0bc2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c @@ -211,6 +211,7 @@ static int thead_dwmac_probe(struct platform_device *pdev) struct stmmac_resources stmmac_res; struct plat_stmmacenet_data *plat; struct thead_dwmac *dwmac; + struct clk *apb_clk; void __iomem *apb; int ret; @@ -224,6 +225,19 @@ static int thead_dwmac_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, PTR_ERR(plat), "dt configuration failed\n"); + /* + * The APB clock is essential for accessing glue registers. However, + * old devicetrees don't describe it correctly. We continue to probe + * and emit a warning if it isn't present. + */ + apb_clk = devm_clk_get_enabled(&pdev->dev, "apb"); + if (PTR_ERR(apb_clk) == -ENOENT) + dev_warn(&pdev->dev, + "cannot get apb clock, link may break after speed changes\n"); + else if (IS_ERR(apb_clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(apb_clk), + "failed to get apb clock\n"); + dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); if (!dwmac) return -ENOMEM; From a7f75e2883c4bd57b12c3be61bb926929adad9c0 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Fri, 8 Aug 2025 09:36:56 +0000 Subject: [PATCH 1849/2411] riscv: dts: thead: Add APB clocks for TH1520 GMACs Describe perisys-apb4-hclk as the APB clock for TH1520 SoC, which is essential for accessing GMAC glue registers. Fixes: 7e756671a664 ("riscv: dts: thead: Add TH1520 ethernet nodes") Signed-off-by: Yao Zi Reviewed-by: Drew Fustini Tested-by: Drew Fustini Link: https://patch.msgid.link/20250808093655.48074-5-ziyao@disroot.org Signed-off-by: Paolo Abeni --- arch/riscv/boot/dts/thead/th1520.dtsi | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/riscv/boot/dts/thead/th1520.dtsi b/arch/riscv/boot/dts/thead/th1520.dtsi index 42724bf7e90e..03f1d7319049 100644 --- a/arch/riscv/boot/dts/thead/th1520.dtsi +++ b/arch/riscv/boot/dts/thead/th1520.dtsi @@ -297,8 +297,9 @@ gmac1: ethernet@ffe7060000 { reg-names = "dwmac", "apb"; interrupts = <67 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq"; - clocks = <&clk CLK_GMAC_AXI>, <&clk CLK_GMAC1>; - clock-names = "stmmaceth", "pclk"; + clocks = <&clk CLK_GMAC_AXI>, <&clk CLK_GMAC1>, + <&clk CLK_PERISYS_APB4_HCLK>; + clock-names = "stmmaceth", "pclk", "apb"; snps,pbl = <32>; snps,fixed-burst; snps,multicast-filter-bins = <64>; @@ -319,8 +320,9 @@ gmac0: ethernet@ffe7070000 { reg-names = "dwmac", "apb"; interrupts = <66 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq"; - clocks = <&clk CLK_GMAC_AXI>, <&clk CLK_GMAC0>; - clock-names = "stmmaceth", "pclk"; + clocks = <&clk CLK_GMAC_AXI>, <&clk CLK_GMAC0>, + <&clk CLK_PERISYS_APB4_HCLK>; + clock-names = "stmmaceth", "pclk", "apb"; snps,pbl = <32>; snps,fixed-burst; snps,multicast-filter-bins = <64>; From e93f7af148222303c4632318536c0f649b4ee5b1 Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Fri, 8 Aug 2025 11:57:56 -0700 Subject: [PATCH 1850/2411] docs: Fix name for net.ipv4.udp_child_hash_entries udp_child_ehash_entries -> udp_child_hash_entries Fixes: 9804985bf27f ("udp: Introduce optional per-netns hash table.") Signed-off-by: Jordan Rife Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250808185800.1189042-1-jordan@jrife.io Signed-off-by: Paolo Abeni --- Documentation/networking/ip-sysctl.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index bb620f554598..9756d16e3df1 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1420,7 +1420,7 @@ udp_hash_entries - INTEGER A negative value means the networking namespace does not own its hash buckets and shares the initial networking namespace's one. -udp_child_ehash_entries - INTEGER +udp_child_hash_entries - INTEGER Control the number of hash buckets for UDP sockets in the child networking namespace, which must be set before clone() or unshare(). From dcb82900b12f5809e66835918d4043284ce1d39c Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sun, 10 Aug 2025 23:41:05 +0200 Subject: [PATCH 1851/2411] ASoC: codecs: Call strscpy() with correct size argument In aw8xxxx_profile_info(), strscpy() is called with the length of the source string "null" rather than the size of the destination buffer. This is fine as long as the destination buffer is larger than the source string, but we should still use the destination buffer size instead to call strscpy() as intended. And since 'name' points to the fixed-size buffer 'uinfo->value.enumerated.name', we can safely omit the size argument and let strscpy() infer it using sizeof() and remove 'name'. Signed-off-by: Thorsten Blum Link: https://patch.msgid.link/20250810214144.1985-2-thorsten.blum@linux.dev Signed-off-by: Mark Brown --- sound/soc/codecs/aw87390.c | 8 +++----- sound/soc/codecs/aw88081.c | 5 ++--- sound/soc/codecs/aw88166.c | 8 +++----- sound/soc/codecs/aw88261.c | 8 +++----- sound/soc/codecs/aw88395/aw88395.c | 8 +++----- sound/soc/codecs/aw88399.c | 8 +++----- 6 files changed, 17 insertions(+), 28 deletions(-) diff --git a/sound/soc/codecs/aw87390.c b/sound/soc/codecs/aw87390.c index 110009616966..ef6f64856988 100644 --- a/sound/soc/codecs/aw87390.c +++ b/sound/soc/codecs/aw87390.c @@ -177,7 +177,7 @@ static int aw87390_profile_info(struct snd_kcontrol *kcontrol, { struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); struct aw87390 *aw87390 = snd_soc_component_get_drvdata(codec); - char *prof_name, *name; + char *prof_name; int count, ret; uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; @@ -194,17 +194,15 @@ static int aw87390_profile_info(struct snd_kcontrol *kcontrol, if (uinfo->value.enumerated.item >= count) uinfo->value.enumerated.item = count - 1; - name = uinfo->value.enumerated.name; count = uinfo->value.enumerated.item; ret = aw87390_dev_get_prof_name(aw87390->aw_pa, count, &prof_name); if (ret) { - strscpy(uinfo->value.enumerated.name, "null", - strlen("null") + 1); + strscpy(uinfo->value.enumerated.name, "null"); return 0; } - strscpy(name, prof_name, sizeof(uinfo->value.enumerated.name)); + strscpy(uinfo->value.enumerated.name, prof_name); return 0; } diff --git a/sound/soc/codecs/aw88081.c b/sound/soc/codecs/aw88081.c index 3dd8428f08cc..d61a7b8c5470 100644 --- a/sound/soc/codecs/aw88081.c +++ b/sound/soc/codecs/aw88081.c @@ -914,12 +914,11 @@ static int aw88081_profile_info(struct snd_kcontrol *kcontrol, ret = aw88081_dev_get_prof_name(aw88081->aw_pa, count, &prof_name); if (ret) { - strscpy(uinfo->value.enumerated.name, "null", - sizeof(uinfo->value.enumerated.name)); + strscpy(uinfo->value.enumerated.name, "null"); return 0; } - strscpy(uinfo->value.enumerated.name, prof_name, sizeof(uinfo->value.enumerated.name)); + strscpy(uinfo->value.enumerated.name, prof_name); return 0; } diff --git a/sound/soc/codecs/aw88166.c b/sound/soc/codecs/aw88166.c index 4f76ebe11cc7..28f62b991ef2 100644 --- a/sound/soc/codecs/aw88166.c +++ b/sound/soc/codecs/aw88166.c @@ -1478,7 +1478,7 @@ static int aw88166_profile_info(struct snd_kcontrol *kcontrol, { struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); struct aw88166 *aw88166 = snd_soc_component_get_drvdata(codec); - char *prof_name, *name; + char *prof_name; int count, ret; uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; @@ -1495,17 +1495,15 @@ static int aw88166_profile_info(struct snd_kcontrol *kcontrol, if (uinfo->value.enumerated.item >= count) uinfo->value.enumerated.item = count - 1; - name = uinfo->value.enumerated.name; count = uinfo->value.enumerated.item; ret = aw88166_dev_get_prof_name(aw88166->aw_pa, count, &prof_name); if (ret) { - strscpy(uinfo->value.enumerated.name, "null", - strlen("null") + 1); + strscpy(uinfo->value.enumerated.name, "null"); return 0; } - strscpy(name, prof_name, sizeof(uinfo->value.enumerated.name)); + strscpy(uinfo->value.enumerated.name, prof_name); return 0; } diff --git a/sound/soc/codecs/aw88261.c b/sound/soc/codecs/aw88261.c index fb99871578c5..de11ae8dd9d9 100644 --- a/sound/soc/codecs/aw88261.c +++ b/sound/soc/codecs/aw88261.c @@ -819,7 +819,7 @@ static int aw88261_profile_info(struct snd_kcontrol *kcontrol, { struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); struct aw88261 *aw88261 = snd_soc_component_get_drvdata(codec); - char *prof_name, *name; + char *prof_name; int count, ret; uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; @@ -836,17 +836,15 @@ static int aw88261_profile_info(struct snd_kcontrol *kcontrol, if (uinfo->value.enumerated.item >= count) uinfo->value.enumerated.item = count - 1; - name = uinfo->value.enumerated.name; count = uinfo->value.enumerated.item; ret = aw88261_dev_get_prof_name(aw88261->aw_pa, count, &prof_name); if (ret) { - strscpy(uinfo->value.enumerated.name, "null", - strlen("null") + 1); + strscpy(uinfo->value.enumerated.name, "null"); return 0; } - strscpy(name, prof_name, sizeof(uinfo->value.enumerated.name)); + strscpy(uinfo->value.enumerated.name, prof_name); return 0; } diff --git a/sound/soc/codecs/aw88395/aw88395.c b/sound/soc/codecs/aw88395/aw88395.c index aea44a199b98..fb563b4c6971 100644 --- a/sound/soc/codecs/aw88395/aw88395.c +++ b/sound/soc/codecs/aw88395/aw88395.c @@ -175,7 +175,7 @@ static int aw88395_profile_info(struct snd_kcontrol *kcontrol, { struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); struct aw88395 *aw88395 = snd_soc_component_get_drvdata(codec); - char *prof_name, *name; + char *prof_name; int count, ret; uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; @@ -192,17 +192,15 @@ static int aw88395_profile_info(struct snd_kcontrol *kcontrol, if (uinfo->value.enumerated.item >= count) uinfo->value.enumerated.item = count - 1; - name = uinfo->value.enumerated.name; count = uinfo->value.enumerated.item; ret = aw88395_dev_get_prof_name(aw88395->aw_pa, count, &prof_name); if (ret) { - strscpy(uinfo->value.enumerated.name, "null", - strlen("null") + 1); + strscpy(uinfo->value.enumerated.name, "null"); return 0; } - strscpy(name, prof_name, sizeof(uinfo->value.enumerated.name)); + strscpy(uinfo->value.enumerated.name, prof_name); return 0; } diff --git a/sound/soc/codecs/aw88399.c b/sound/soc/codecs/aw88399.c index c23e70d64d0c..58846feb013d 100644 --- a/sound/soc/codecs/aw88399.c +++ b/sound/soc/codecs/aw88399.c @@ -1831,7 +1831,7 @@ static int aw88399_profile_info(struct snd_kcontrol *kcontrol, { struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); struct aw88399 *aw88399 = snd_soc_component_get_drvdata(codec); - char *prof_name, *name; + char *prof_name; int count, ret; uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; @@ -1848,17 +1848,15 @@ static int aw88399_profile_info(struct snd_kcontrol *kcontrol, if (uinfo->value.enumerated.item >= count) uinfo->value.enumerated.item = count - 1; - name = uinfo->value.enumerated.name; count = uinfo->value.enumerated.item; ret = aw88399_dev_get_prof_name(aw88399->aw_pa, count, &prof_name); if (ret) { - strscpy(uinfo->value.enumerated.name, "null", - strlen("null") + 1); + strscpy(uinfo->value.enumerated.name, "null"); return 0; } - strscpy(name, prof_name, sizeof(uinfo->value.enumerated.name)); + strscpy(uinfo->value.enumerated.name, prof_name); return 0; } From d26a9f4f0a7745f0d5127344379a62007df68dcd Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Tue, 22 Jul 2025 20:38:41 +0200 Subject: [PATCH 1852/2411] platform/x86: dell-smbios-wmi: Stop touching WMI device ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Dell SMBIOS driver uses the "id" field inside struct device for prioritizing the WMI backend over the SMM backend. Because of this the WMI backend modifies the "id" field of the underlying WMI device. However the WMI core itself uses wdev->dev.id internally to track device IDs, so modifying this value will result in a resource leak. Fix this by not using the "id" field inside struct device for SMBIOS prioritization. Instead extend struct smbios_device with a separate "priority" field. Tested on a Dell Inspiron 3505. Fixes: 73f0f2b52c5e ("platform/x86: wmi: Fix WMI device naming issue") Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20250722183841.9552-1-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/dell-smbios-base.c | 19 +++++++++---------- drivers/platform/x86/dell/dell-smbios-smm.c | 3 +-- drivers/platform/x86/dell/dell-smbios-wmi.c | 4 +--- drivers/platform/x86/dell/dell-smbios.h | 2 +- 4 files changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/platform/x86/dell/dell-smbios-base.c b/drivers/platform/x86/dell/dell-smbios-base.c index 01c72b91a50d..444786102f02 100644 --- a/drivers/platform/x86/dell/dell-smbios-base.c +++ b/drivers/platform/x86/dell/dell-smbios-base.c @@ -39,6 +39,7 @@ struct token_sysfs_data { struct smbios_device { struct list_head list; struct device *device; + int priority; int (*call_fn)(struct calling_interface_buffer *arg); }; @@ -145,7 +146,7 @@ int dell_smbios_error(int value) } EXPORT_SYMBOL_GPL(dell_smbios_error); -int dell_smbios_register_device(struct device *d, void *call_fn) +int dell_smbios_register_device(struct device *d, int priority, void *call_fn) { struct smbios_device *priv; @@ -154,6 +155,7 @@ int dell_smbios_register_device(struct device *d, void *call_fn) return -ENOMEM; get_device(d); priv->device = d; + priv->priority = priority; priv->call_fn = call_fn; mutex_lock(&smbios_mutex); list_add_tail(&priv->list, &smbios_device_list); @@ -292,28 +294,25 @@ EXPORT_SYMBOL_GPL(dell_smbios_call_filter); int dell_smbios_call(struct calling_interface_buffer *buffer) { - int (*call_fn)(struct calling_interface_buffer *) = NULL; - struct device *selected_dev = NULL; + struct smbios_device *selected = NULL; struct smbios_device *priv; int ret; mutex_lock(&smbios_mutex); list_for_each_entry(priv, &smbios_device_list, list) { - if (!selected_dev || priv->device->id >= selected_dev->id) { - dev_dbg(priv->device, "Trying device ID: %d\n", - priv->device->id); - call_fn = priv->call_fn; - selected_dev = priv->device; + if (!selected || priv->priority >= selected->priority) { + dev_dbg(priv->device, "Trying device ID: %d\n", priv->priority); + selected = priv; } } - if (!selected_dev) { + if (!selected) { ret = -ENODEV; pr_err("No dell-smbios drivers are loaded\n"); goto out_smbios_call; } - ret = call_fn(buffer); + ret = selected->call_fn(buffer); out_smbios_call: mutex_unlock(&smbios_mutex); diff --git a/drivers/platform/x86/dell/dell-smbios-smm.c b/drivers/platform/x86/dell/dell-smbios-smm.c index 4d375985c85f..7055e2c40f34 100644 --- a/drivers/platform/x86/dell/dell-smbios-smm.c +++ b/drivers/platform/x86/dell/dell-smbios-smm.c @@ -125,8 +125,7 @@ int init_dell_smbios_smm(void) if (ret) goto fail_platform_device_add; - ret = dell_smbios_register_device(&platform_device->dev, - &dell_smbios_smm_call); + ret = dell_smbios_register_device(&platform_device->dev, 0, &dell_smbios_smm_call); if (ret) goto fail_register; diff --git a/drivers/platform/x86/dell/dell-smbios-wmi.c b/drivers/platform/x86/dell/dell-smbios-wmi.c index ae9012549560..a7dca8c59d60 100644 --- a/drivers/platform/x86/dell/dell-smbios-wmi.c +++ b/drivers/platform/x86/dell/dell-smbios-wmi.c @@ -264,9 +264,7 @@ static int dell_smbios_wmi_probe(struct wmi_device *wdev, const void *context) if (ret) return ret; - /* ID is used by dell-smbios to set priority of drivers */ - wdev->dev.id = 1; - ret = dell_smbios_register_device(&wdev->dev, &dell_smbios_wmi_call); + ret = dell_smbios_register_device(&wdev->dev, 1, &dell_smbios_wmi_call); if (ret) return ret; diff --git a/drivers/platform/x86/dell/dell-smbios.h b/drivers/platform/x86/dell/dell-smbios.h index 77baa15eb523..f421b8533a9e 100644 --- a/drivers/platform/x86/dell/dell-smbios.h +++ b/drivers/platform/x86/dell/dell-smbios.h @@ -64,7 +64,7 @@ struct calling_interface_structure { struct calling_interface_token tokens[]; } __packed; -int dell_smbios_register_device(struct device *d, void *call_fn); +int dell_smbios_register_device(struct device *d, int priority, void *call_fn); void dell_smbios_unregister_device(struct device *d); int dell_smbios_error(int value); From 5b9e07551faa7bb2f26cb039cc6e8d00bc4d0831 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 24 Jul 2025 13:51:08 -0500 Subject: [PATCH 1853/2411] platform/x86/amd: pmc: Drop SMU F/W match for Cezanne MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Chris reported that even on a BIOS that has a new enough SMU F/W version there is still a spurious IRQ1. Although the solution was added to SMU F/W 64.66.0 it turns out there needs to be a matching SBIOS change to activate it. Thus Linux shouldn't be avoiding the IRQ1 workaround on newer SMU F/W because there is no indication the BIOS change is in place. Drop the match for 64.66.0+ and instead match all RN/CZN/BRC (they all share same SMU F/W). Adjust the quirk infrastructure to allow quirking the workaround on or off and also adjust existing quirks to match properly. Unfortunately this may cause some systems that did have the SBIOS change in place to regress in keyboard wakeup but we don't have a way to know. If a user reports a keyboard wakeup regression they can run with amd_pmc.disable_workarounds=1 to deactivate the workaround and share DMI data so that their system can be quirked not to use the workaround in the upstream kernel. Reported-by: Chris Bainbridge Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4449 Tested-by: Chris Bainbridge Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20250724185156.1827592-1-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmc/pmc-quirks.c | 54 ++++++++++++++--------- drivers/platform/x86/amd/pmc/pmc.c | 13 ------ 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c index ded4c84f5ed1..7ffc659b2794 100644 --- a/drivers/platform/x86/amd/pmc/pmc-quirks.c +++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c @@ -28,10 +28,15 @@ static struct quirk_entry quirk_spurious_8042 = { .spurious_8042 = true, }; +static struct quirk_entry quirk_s2idle_spurious_8042 = { + .s2idle_bug_mmio = FCH_PM_BASE + FCH_PM_SCRATCH, + .spurious_8042 = true, +}; + static const struct dmi_system_id fwbug_list[] = { { .ident = "L14 Gen2 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "20X5"), @@ -39,7 +44,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "T14s Gen2 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "20XF"), @@ -47,7 +52,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "X13 Gen2 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "20XH"), @@ -55,7 +60,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "T14 Gen2 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "20XK"), @@ -63,7 +68,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "T14 Gen1 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "20UD"), @@ -71,7 +76,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "T14 Gen1 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "20UE"), @@ -79,7 +84,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "T14s Gen1 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "20UH"), @@ -87,7 +92,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "T14s Gen1 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "20UJ"), @@ -95,7 +100,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "P14s Gen1 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "20Y1"), @@ -103,7 +108,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "P14s Gen2 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21A0"), @@ -111,7 +116,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "P14s Gen2 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21A1"), @@ -152,7 +157,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "IdeaPad 1 14AMN7", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "82VF"), @@ -160,7 +165,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "IdeaPad 1 15AMN7", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "82VG"), @@ -168,7 +173,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "IdeaPad 1 15AMN7", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "82X5"), @@ -176,7 +181,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "IdeaPad Slim 3 14AMN8", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "82XN"), @@ -184,7 +189,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "IdeaPad Slim 3 15AMN8", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "82XQ"), @@ -193,7 +198,7 @@ static const struct dmi_system_id fwbug_list[] = { /* https://gitlab.freedesktop.org/drm/amd/-/issues/4434 */ { .ident = "Lenovo Yoga 6 13ALC6", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "82ND"), @@ -202,7 +207,7 @@ static const struct dmi_system_id fwbug_list[] = { /* https://gitlab.freedesktop.org/drm/amd/-/issues/2684 */ { .ident = "HP Laptop 15s-eq2xxx", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "HP"), DMI_MATCH(DMI_PRODUCT_NAME, "HP Laptop 15s-eq2xxx"), @@ -285,6 +290,16 @@ void amd_pmc_quirks_init(struct amd_pmc_dev *dev) { const struct dmi_system_id *dmi_id; + /* + * IRQ1 may cause an interrupt during resume even without a keyboard + * press. + * + * Affects Renoir, Cezanne and Barcelo SoCs + * + * A solution is available in PMFW 64.66.0, but it must be activated by + * SBIOS. If SBIOS is known to have the fix a quirk can be added for + * a given system to avoid workaround. + */ if (dev->cpu_id == AMD_CPU_ID_CZN) dev->disable_8042_wakeup = true; @@ -295,6 +310,5 @@ void amd_pmc_quirks_init(struct amd_pmc_dev *dev) if (dev->quirks->s2idle_bug_mmio) pr_info("Using s2idle quirk to avoid %s platform firmware bug\n", dmi_id->ident); - if (dev->quirks->spurious_8042) - dev->disable_8042_wakeup = true; + dev->disable_8042_wakeup = dev->quirks->spurious_8042; } diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index 0b9b23eb7c2c..bd318fd02ccf 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -530,19 +530,6 @@ static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev) static int amd_pmc_wa_irq1(struct amd_pmc_dev *pdev) { struct device *d; - int rc; - - /* cezanne platform firmware has a fix in 64.66.0 */ - if (pdev->cpu_id == AMD_CPU_ID_CZN) { - if (!pdev->major) { - rc = amd_pmc_get_smu_version(pdev); - if (rc) - return rc; - } - - if (pdev->major > 64 || (pdev->major == 64 && pdev->minor > 65)) - return 0; - } d = bus_find_device_by_name(&serio_bus, NULL, "serio0"); if (!d) From dff6f36878799a5ffabd15336ce993dc737374dc Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Sun, 27 Jul 2025 14:05:13 -0700 Subject: [PATCH 1854/2411] platform/x86/intel-uncore-freq: Check write blocked for ELC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the missing write_blocked check for updating sysfs related to uncore efficiency latency control (ELC). If write operation is blocked return error. Fixes: bb516dc79c4a ("platform/x86/intel-uncore-freq: Add support for efficiency latency control") Signed-off-by: Srinivas Pandruvada Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250727210513.2898630-1-srinivas.pandruvada@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- .../x86/intel/uncore-frequency/uncore-frequency-tpmi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c index 6df55c8e16b7..bfcf92aa4d69 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c @@ -192,9 +192,14 @@ static int uncore_read_control_freq(struct uncore_data *data, unsigned int *valu static int write_eff_lat_ctrl(struct uncore_data *data, unsigned int val, enum uncore_index index) { struct tpmi_uncore_cluster_info *cluster_info; + struct tpmi_uncore_struct *uncore_root; u64 control; cluster_info = container_of(data, struct tpmi_uncore_cluster_info, uncore_data); + uncore_root = cluster_info->uncore_root; + + if (uncore_root->write_blocked) + return -EPERM; if (cluster_info->root_domain) return -ENODATA; From 2c78fb287e1f430b929f2e49786518350d15605c Mon Sep 17 00:00:00 2001 From: Suma Hegde Date: Thu, 7 Aug 2025 10:06:37 +0000 Subject: [PATCH 1855/2411] platform/x86/amd/hsmp: Ensure sock->metric_tbl_addr is non-NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If metric table address is not allocated, accessing metrics_bin will result in a NULL pointer dereference, so add a check. Fixes: 5150542b8ec5 ("platform/x86/amd/hsmp: add support for metrics tbl") Signed-off-by: Suma Hegde Link: https://lore.kernel.org/r/20250807100637.952729-1-suma.hegde@amd.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/hsmp/hsmp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/platform/x86/amd/hsmp/hsmp.c b/drivers/platform/x86/amd/hsmp/hsmp.c index 885e2f8136fd..19f82c1d3090 100644 --- a/drivers/platform/x86/amd/hsmp/hsmp.c +++ b/drivers/platform/x86/amd/hsmp/hsmp.c @@ -356,6 +356,11 @@ ssize_t hsmp_metric_tbl_read(struct hsmp_socket *sock, char *buf, size_t size) if (!sock || !buf) return -EINVAL; + if (!sock->metric_tbl_addr) { + dev_err(sock->dev, "Metrics table address not available\n"); + return -ENOMEM; + } + /* Do not support lseek(), also don't allow more than the size of metric table */ if (size != sizeof(struct hsmp_metric_table)) { dev_err(sock->dev, "Wrong buffer size\n"); From de5cec220e4d45d7129e76f7d985c7b01f10f8d9 Mon Sep 17 00:00:00 2001 From: Suma Hegde Date: Mon, 4 Aug 2025 10:15:51 +0000 Subject: [PATCH 1856/2411] platform/x86/amd/hsmp: Ensure success even if hwmon registration fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even if hwmon registration fails, HSMP remains accessible through the device file, so the operation should return success. Signed-off-by: Suma Hegde Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20250804101551.89866-1-suma.hegde@amd.com Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/hsmp/acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd/hsmp/acpi.c b/drivers/platform/x86/amd/hsmp/acpi.c index 54986a752f7d..a94009203e01 100644 --- a/drivers/platform/x86/amd/hsmp/acpi.c +++ b/drivers/platform/x86/amd/hsmp/acpi.c @@ -504,7 +504,7 @@ static int init_acpi(struct device *dev) dev_set_drvdata(dev, &hsmp_pdev->sock[sock_ind]); - return ret; + return 0; } static const struct bin_attribute hsmp_metric_tbl_attr = { From 748f897511446c7578ca5f6d2ff099916bad6e28 Mon Sep 17 00:00:00 2001 From: Edip Hazuri Date: Mon, 28 Jul 2025 14:58:06 +0300 Subject: [PATCH 1857/2411] platform/x86: hp-wmi: mark Victus 16-r1xxx for victus_s fan and thermal profile support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds Victus 16-r1xxx laptop DMI board name into existing list. Tested on 16-r1077nt and works without any problem. Signed-off-by: Edip Hazuri Link: https://lore.kernel.org/r/20250728115805.20954-2-edip@medip.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/hp/hp-wmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index db5fdee2109c..60c8ac8d902c 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c @@ -92,9 +92,9 @@ static const char * const victus_thermal_profile_boards[] = { "8A25" }; -/* DMI Board names of Victus 16-s1000 laptops */ +/* DMI Board names of Victus 16-r1000 and Victus 16-s1000 laptops */ static const char * const victus_s_thermal_profile_boards[] = { - "8C9C" + "8C99", "8C9C" }; enum hp_wmi_radio { From bda053d6445717f8a4cd76f88caea2e39299fe07 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Aug 2025 17:12:03 -0700 Subject: [PATCH 1858/2411] selftests: drv-net: don't assume device has only 2 queues The test is implicitly assuming the device only has 2 queues. A real device will likely have more. The exact problem is that because NAPIs get added to the list from the head, the netlink dump reports them in reverse order. So the naive napis[0] will actually likely give us the _last_ NAPI, not the first one. Re-enable all the NAPIs instead of hard-coding 2 in the test. This way the NAPIs we operated on will always reappear, doesn't matter where they were in the registration order. Fixes: e6d76268813d ("net: Update threaded state in napi config in netif_set_threaded") Signed-off-by: Jakub Kicinski Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250809001205.1147153-2-kuba@kernel.org Signed-off-by: Paolo Abeni --- tools/testing/selftests/drivers/net/napi_threaded.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py index b2698db39817..9699a100a87d 100755 --- a/tools/testing/selftests/drivers/net/napi_threaded.py +++ b/tools/testing/selftests/drivers/net/napi_threaded.py @@ -35,6 +35,8 @@ def _setup_deferred_cleanup(cfg) -> None: threaded = cmd(f"cat /sys/class/net/{cfg.ifname}/threaded").stdout defer(_set_threaded_state, cfg, threaded) + return combined + def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: """ @@ -49,7 +51,7 @@ def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: napi0_id = napis[0]['id'] napi1_id = napis[1]['id'] - _setup_deferred_cleanup(cfg) + qcnt = _setup_deferred_cleanup(cfg) # set threaded _set_threaded_state(cfg, 1) @@ -62,7 +64,7 @@ def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: nl.napi_set({'id': napi1_id, 'threaded': 'disabled'}) cmd(f"ethtool -L {cfg.ifname} combined 1") - cmd(f"ethtool -L {cfg.ifname} combined 2") + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") _assert_napi_threaded_enabled(nl, napi0_id) _assert_napi_threaded_disabled(nl, napi1_id) @@ -80,7 +82,7 @@ def change_num_queues(cfg, nl) -> None: napi0_id = napis[0]['id'] napi1_id = napis[1]['id'] - _setup_deferred_cleanup(cfg) + qcnt = _setup_deferred_cleanup(cfg) # set threaded _set_threaded_state(cfg, 1) @@ -90,7 +92,7 @@ def change_num_queues(cfg, nl) -> None: _assert_napi_threaded_enabled(nl, napi1_id) cmd(f"ethtool -L {cfg.ifname} combined 1") - cmd(f"ethtool -L {cfg.ifname} combined 2") + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") # check napi threaded is set for both napis _assert_napi_threaded_enabled(nl, napi0_id) From ccba9f6baa900e31ad1a4c36e6f3c176694f9eac Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Aug 2025 17:12:04 -0700 Subject: [PATCH 1859/2411] net: update NAPI threaded config even for disabled NAPIs We have to make sure that all future NAPIs will have the right threaded state when the state is configured on the device level. We chose not to have an "unset" state for threaded, and not to wipe the NAPI config clean when channels are explicitly disabled. This means the persistent config structs "exist" even when their NAPIs are not instantiated. Differently put - the NAPI persistent state lives in the net_device (ncfg == struct napi_config): ,--- [napi 0] - [napi 1] [dev] | | `--- [ncfg 0] - [ncfg 1] so say we a device with 2 queues but only 1 enabled: ,--- [napi 0] [dev] | `--- [ncfg 0] - [ncfg 1] now we set the device to threaded=1: ,---------- [napi 0 (thr:1)] [dev(thr:1)] | `---------- [ncfg 0 (thr:1)] - [ncfg 1 (thr:?)] Since [ncfg 1] was not attached to a NAPI during configuration we skipped it. If we create a NAPI for it later it will have the old setting (presumably disabled). One could argue if this is right or not "in principle", but it's definitely not how things worked before per-NAPI config.. Fixes: 2677010e7793 ("Add support to set NAPI threaded for individual NAPI") Signed-off-by: Jakub Kicinski Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250809001205.1147153-3-kuba@kernel.org Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 5 ++++- net/core/dev.c | 7 ++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5e5de4b0a433..f3a3b761abfb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2071,6 +2071,8 @@ enum netdev_reg_state { * @max_pacing_offload_horizon: max EDT offload horizon in nsec. * @napi_config: An array of napi_config structures containing per-NAPI * settings. + * @num_napi_configs: number of allocated NAPI config structs, + * always >= max(num_rx_queues, num_tx_queues). * @gro_flush_timeout: timeout for GRO layer in NAPI * @napi_defer_hard_irqs: If not zero, provides a counter that would * allow to avoid NIC hard IRQ, on busy queues. @@ -2482,8 +2484,9 @@ struct net_device { u64 max_pacing_offload_horizon; struct napi_config *napi_config; - unsigned long gro_flush_timeout; + u32 num_napi_configs; u32 napi_defer_hard_irqs; + unsigned long gro_flush_timeout; /** * @up: copy of @state's IFF_UP, but safe to read with just @lock. diff --git a/net/core/dev.c b/net/core/dev.c index 68dc47d7e700..f180746382a1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6999,7 +6999,7 @@ int netif_set_threaded(struct net_device *dev, enum netdev_napi_threaded threaded) { struct napi_struct *napi; - int err = 0; + int i, err = 0; netdev_assert_locked_or_invisible(dev); @@ -7021,6 +7021,10 @@ int netif_set_threaded(struct net_device *dev, list_for_each_entry(napi, &dev->napi_list, dev_list) WARN_ON_ONCE(napi_set_threaded(napi, threaded)); + /* Override the config for all NAPIs even if currently not listed */ + for (i = 0; i < dev->num_napi_configs; i++) + dev->napi_config[i].threaded = threaded; + return err; } @@ -11873,6 +11877,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, goto free_all; dev->cfg_pending = dev->cfg; + dev->num_napi_configs = maxqs; napi_config_sz = array_size(maxqs, sizeof(*dev->napi_config)); dev->napi_config = kvzalloc(napi_config_sz, GFP_KERNEL_ACCOUNT); if (!dev->napi_config) From b3fc08ab9a565efb42fe08be046a0d203b82cdb8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Aug 2025 17:12:05 -0700 Subject: [PATCH 1860/2411] net: prevent deadlocks when enabling NAPIs with mixed kthread config The following order of calls currently deadlocks if: - device has threaded=1; and - NAPI has persistent config with threaded=0. netif_napi_add_weight_config() dev->threaded == 1 napi_kthread_create() napi_enable() napi_restore_config() napi_set_threaded(0) napi_stop_kthread() while (NAPIF_STATE_SCHED) msleep(20) We deadlock because disabled NAPI has STATE_SCHED set. Creating a thread in netif_napi_add() just to destroy it in napi_disable() is fairly ugly in the first place. Let's read both the device config and the NAPI config in netif_napi_add(). Fixes: e6d76268813d ("net: Update threaded state in napi config in netif_set_threaded") Signed-off-by: Jakub Kicinski Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250809001205.1147153-4-kuba@kernel.org Signed-off-by: Paolo Abeni --- net/core/dev.c | 5 +++-- net/core/dev.h | 8 ++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index f180746382a1..5a3c0f40a93f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7357,8 +7357,9 @@ void netif_napi_add_weight_locked(struct net_device *dev, * Clear dev->threaded if kthread creation failed so that * threaded mode will not be enabled in napi_enable(). */ - if (dev->threaded && napi_kthread_create(napi)) - dev->threaded = NETDEV_NAPI_THREADED_DISABLED; + if (napi_get_threaded_config(dev, napi)) + if (napi_kthread_create(napi)) + dev->threaded = NETDEV_NAPI_THREADED_DISABLED; netif_napi_set_irq_locked(napi, -1); } EXPORT_SYMBOL(netif_napi_add_weight_locked); diff --git a/net/core/dev.h b/net/core/dev.h index ab69edc0c3e3..d6b08d435479 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -323,6 +323,14 @@ static inline enum netdev_napi_threaded napi_get_threaded(struct napi_struct *n) return NETDEV_NAPI_THREADED_DISABLED; } +static inline enum netdev_napi_threaded +napi_get_threaded_config(struct net_device *dev, struct napi_struct *n) +{ + if (n->config) + return n->config->threaded; + return dev->threaded; +} + int napi_set_threaded(struct napi_struct *n, enum netdev_napi_threaded threaded); From 82b3644d3deab496cc09f29f3449ede6824b3e8e Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 22 Jul 2025 16:59:59 +0200 Subject: [PATCH 1861/2411] device: rust: expand documentation for DeviceContext Expand the documentation around DeviceContext states and types, in order to provide detailed information about their purpose and relationship with each other. Reviewed-by: Greg Kroah-Hartman Reviewed-by: Alexandre Courbot Reviewed-by: Alice Ryhl Reviewed-by: Daniel Almeida Link: https://lore.kernel.org/r/20250722150110.23565-2-dakr@kernel.org [ Fix two minor typos. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/device.rs | 69 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 11 deletions(-) diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index b8613289de8e..fe095a8eccb1 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -311,28 +311,75 @@ unsafe impl Send for Device {} // synchronization in `struct device`. unsafe impl Sync for Device {} -/// Marker trait for the context of a bus specific device. +/// Marker trait for the context or scope of a bus specific device. /// -/// Some functions of a bus specific device should only be called from a certain context, i.e. bus -/// callbacks, such as `probe()`. +/// [`DeviceContext`] is a marker trait for types representing the context of a bus specific +/// [`Device`]. /// -/// This is the marker trait for structures representing the context of a bus specific device. +/// The specific device context types are: [`CoreInternal`], [`Core`], [`Bound`] and [`Normal`]. +/// +/// [`DeviceContext`] types are hierarchical, which means that there is a strict hierarchy that +/// defines which [`DeviceContext`] type can be derived from another. For instance, any +/// [`Device`] can dereference to a [`Device`]. +/// +/// The following enumeration illustrates the dereference hierarchy of [`DeviceContext`] types. +/// +/// - [`CoreInternal`] => [`Core`] => [`Bound`] => [`Normal`] +/// +/// Bus devices can automatically implement the dereference hierarchy by using +/// [`impl_device_context_deref`]. +/// +/// Note that the guarantee for a [`Device`] reference to have a certain [`DeviceContext`] comes +/// from the specific scope the [`Device`] reference is valid in. +/// +/// [`impl_device_context_deref`]: kernel::impl_device_context_deref pub trait DeviceContext: private::Sealed {} -/// The [`Normal`] context is the context of a bus specific device when it is not an argument of -/// any bus callback. +/// The [`Normal`] context is the default [`DeviceContext`] of any [`Device`]. +/// +/// The normal context does not indicate any specific context. Any `Device` is also a valid +/// [`Device`]. It is the only [`DeviceContext`] for which it is valid to implement +/// [`AlwaysRefCounted`] for. +/// +/// [`AlwaysRefCounted`]: kernel::types::AlwaysRefCounted pub struct Normal; -/// The [`Core`] context is the context of a bus specific device when it is supplied as argument of -/// any of the bus callbacks, such as `probe()`. +/// The [`Core`] context is the context of a bus specific device when it appears as argument of +/// any bus specific callback, such as `probe()`. +/// +/// The core context indicates that the [`Device`] reference's scope is limited to the bus +/// callback it appears in. It is intended to be used for synchronization purposes. Bus device +/// implementations can implement methods for [`Device`], such that they can only be called +/// from bus callbacks. pub struct Core; -/// Semantically the same as [`Core`] but reserved for internal usage of the corresponding bus +/// Semantically the same as [`Core`], but reserved for internal usage of the corresponding bus /// abstraction. +/// +/// The internal core context is intended to be used in exactly the same way as the [`Core`] +/// context, with the difference that this [`DeviceContext`] is internal to the corresponding bus +/// abstraction. +/// +/// This context mainly exists to share generic [`Device`] infrastructure that should only be called +/// from bus callbacks with bus abstractions, but without making them accessible for drivers. pub struct CoreInternal; -/// The [`Bound`] context is the context of a bus specific device reference when it is guaranteed to -/// be bound for the duration of its lifetime. +/// The [`Bound`] context is the [`DeviceContext`] of a bus specific device when it is guaranteed to +/// be bound to a driver. +/// +/// The bound context indicates that for the entire duration of the lifetime of a [`Device`] +/// reference, the [`Device`] is guaranteed to be bound to a driver. +/// +/// Some APIs, such as [`dma::CoherentAllocation`] or [`Devres`] rely on the [`Device`] to be bound, +/// which can be proven with the [`Bound`] device context. +/// +/// Any abstraction that can guarantee a scope where the corresponding bus device is bound, should +/// provide a [`Device`] reference to its users for this scope. This allows users to benefit +/// from optimizations for accessing device resources, see also [`Devres::access`]. +/// +/// [`Devres`]: kernel::devres::Devres +/// [`Devres::access`]: kernel::devres::Devres::access +/// [`dma::CoherentAllocation`]: kernel::dma::CoherentAllocation pub struct Bound; mod private { From d6e26c1ae4a602d8b7eeb39e23514f6f98d91eb5 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 22 Jul 2025 17:00:00 +0200 Subject: [PATCH 1862/2411] device: rust: expand documentation for Device The documentation for the generic Device type is outdated and deserves much more detail. Hence, expand the documentation and cover topics such as device types, device contexts, as well as information on how to use the generic device infrastructure to implement bus and class specific device types. Reviewed-by: Daniel Almeida Reviewed-by: Greg Kroah-Hartman Reviewed-by: Alexandre Courbot Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250722150110.23565-3-dakr@kernel.org [ Add empty line after code blocks, "in" -> "within", remove unnecessary pin annotations in class device example. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/device.rs | 139 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 126 insertions(+), 13 deletions(-) diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index fe095a8eccb1..5902b3714a16 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -15,23 +15,130 @@ pub mod property; -/// A reference-counted device. +/// The core representation of a device in the kernel's driver model. /// -/// This structure represents the Rust abstraction for a C `struct device`. This implementation -/// abstracts the usage of an already existing C `struct device` within Rust code that we get -/// passed from the C side. +/// This structure represents the Rust abstraction for a C `struct device`. A [`Device`] can either +/// exist as temporary reference (see also [`Device::from_raw`]), which is only valid within a +/// certain scope or as [`ARef`], owning a dedicated reference count. /// -/// An instance of this abstraction can be obtained temporarily or permanent. +/// # Device Types /// -/// A temporary one is bound to the lifetime of the C `struct device` pointer used for creation. -/// A permanent instance is always reference-counted and hence not restricted by any lifetime -/// boundaries. +/// A [`Device`] can represent either a bus device or a class device. /// -/// For subsystems it is recommended to create a permanent instance to wrap into a subsystem -/// specific device structure (e.g. `pci::Device`). This is useful for passing it to drivers in -/// `T::probe()`, such that a driver can store the `ARef` (equivalent to storing a -/// `struct device` pointer in a C driver) for arbitrary purposes, e.g. allocating DMA coherent -/// memory. +/// ## Bus Devices +/// +/// A bus device is a [`Device`] that is associated with a physical or virtual bus. Examples of +/// buses include PCI, USB, I2C, and SPI. Devices attached to a bus are registered with a specific +/// bus type, which facilitates matching devices with appropriate drivers based on IDs or other +/// identifying information. Bus devices are visible in sysfs under `/sys/bus//devices/`. +/// +/// ## Class Devices +/// +/// A class device is a [`Device`] that is associated with a logical category of functionality +/// rather than a physical bus. Examples of classes include block devices, network interfaces, sound +/// cards, and input devices. Class devices are grouped under a common class and exposed to +/// userspace via entries in `/sys/class//`. +/// +/// # Device Context +/// +/// [`Device`] references are generic over a [`DeviceContext`], which represents the type state of +/// a [`Device`]. +/// +/// As the name indicates, this type state represents the context of the scope the [`Device`] +/// reference is valid in. For instance, the [`Bound`] context guarantees that the [`Device`] is +/// bound to a driver for the entire duration of the existence of a [`Device`] reference. +/// +/// Other [`DeviceContext`] types besides [`Bound`] are [`Normal`], [`Core`] and [`CoreInternal`]. +/// +/// Unless selected otherwise [`Device`] defaults to the [`Normal`] [`DeviceContext`], which by +/// itself has no additional requirements. +/// +/// It is always up to the caller of [`Device::from_raw`] to select the correct [`DeviceContext`] +/// type for the corresponding scope the [`Device`] reference is created in. +/// +/// All [`DeviceContext`] types other than [`Normal`] are intended to be used with +/// [bus devices](#bus-devices) only. +/// +/// # Implementing Bus Devices +/// +/// This section provides a guideline to implement bus specific devices, such as [`pci::Device`] or +/// [`platform::Device`]. +/// +/// A bus specific device should be defined as follows. +/// +/// ```ignore +/// #[repr(transparent)] +/// pub struct Device( +/// Opaque, +/// PhantomData, +/// ); +/// ``` +/// +/// Since devices are reference counted, [`AlwaysRefCounted`] should be implemented for `Device` +/// (i.e. `Device`). Note that [`AlwaysRefCounted`] must not be implemented for any other +/// [`DeviceContext`], since all other device context types are only valid within a certain scope. +/// +/// In order to be able to implement the [`DeviceContext`] dereference hierarchy, bus device +/// implementations should call the [`impl_device_context_deref`] macro as shown below. +/// +/// ```ignore +/// // SAFETY: `Device` is a transparent wrapper of a type that doesn't depend on `Device`'s +/// // generic argument. +/// kernel::impl_device_context_deref!(unsafe { Device }); +/// ``` +/// +/// In order to convert from a any [`Device`] to [`ARef`], bus devices can implement +/// the following macro call. +/// +/// ```ignore +/// kernel::impl_device_context_into_aref!(Device); +/// ``` +/// +/// Bus devices should also implement the following [`AsRef`] implementation, such that users can +/// easily derive a generic [`Device`] reference. +/// +/// ```ignore +/// impl AsRef> for Device { +/// fn as_ref(&self) -> &device::Device { +/// ... +/// } +/// } +/// ``` +/// +/// # Implementing Class Devices +/// +/// Class device implementations require less infrastructure and depend slightly more on the +/// specific subsystem. +/// +/// An example implementation for a class device could look like this. +/// +/// ```ignore +/// #[repr(C)] +/// pub struct Device { +/// dev: Opaque, +/// data: T::Data, +/// } +/// ``` +/// +/// This class device uses the sub-classing pattern to embed the driver's private data within the +/// allocation of the class device. For this to be possible the class device is generic over the +/// class specific `Driver` trait implementation. +/// +/// Just like any device, class devices are reference counted and should hence implement +/// [`AlwaysRefCounted`] for `Device`. +/// +/// Class devices should also implement the following [`AsRef`] implementation, such that users can +/// easily derive a generic [`Device`] reference. +/// +/// ```ignore +/// impl AsRef for Device { +/// fn as_ref(&self) -> &device::Device { +/// ... +/// } +/// } +/// ``` +/// +/// An example for a class device implementation is [`drm::Device`]. /// /// # Invariants /// @@ -42,6 +149,12 @@ /// /// `bindings::device::release` is valid to be called from any thread, hence `ARef` can be /// dropped from any thread. +/// +/// [`AlwaysRefCounted`]: kernel::types::AlwaysRefCounted +/// [`drm::Device`]: kernel::drm::Device +/// [`impl_device_context_deref`]: kernel::impl_device_context_deref +/// [`pci::Device`]: kernel::pci::Device +/// [`platform::Device`]: kernel::platform::Device #[repr(transparent)] pub struct Device(Opaque, PhantomData); From 970a7c68788e3fec237713eef22ace46507bcf9c Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 22 Jul 2025 17:00:01 +0200 Subject: [PATCH 1863/2411] driver: rust: expand documentation for driver infrastructure Add documentation about generic driver infrastructure, representing a guideline on how the generic driver infrastructure is intended to be used to implement bus specific driver APIs. This covers aspects such as the bus specific driver trait, adapter implementation, driver registration and custom device ID types. Reviewed-by: Daniel Almeida Reviewed-by: Greg Kroah-Hartman Reviewed-by: Alexandre Courbot Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250722150110.23565-4-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/driver.rs | 89 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/rust/kernel/driver.rs b/rust/kernel/driver.rs index a8f2675ba7a7..279e3af20682 100644 --- a/rust/kernel/driver.rs +++ b/rust/kernel/driver.rs @@ -2,8 +2,93 @@ //! Generic support for drivers of different buses (e.g., PCI, Platform, Amba, etc.). //! -//! Each bus / subsystem is expected to implement [`RegistrationOps`], which allows drivers to -//! register using the [`Registration`] class. +//! This documentation describes how to implement a bus specific driver API and how to align it with +//! the design of (bus specific) devices. +//! +//! Note: Readers are expected to know the content of the documentation of [`Device`] and +//! [`DeviceContext`]. +//! +//! # Driver Trait +//! +//! The main driver interface is defined by a bus specific driver trait. For instance: +//! +//! ```ignore +//! pub trait Driver: Send { +//! /// The type holding information about each device ID supported by the driver. +//! type IdInfo: 'static; +//! +//! /// The table of OF device ids supported by the driver. +//! const OF_ID_TABLE: Option> = None; +//! +//! /// The table of ACPI device ids supported by the driver. +//! const ACPI_ID_TABLE: Option> = None; +//! +//! /// Driver probe. +//! fn probe(dev: &Device, id_info: &Self::IdInfo) -> Result>>; +//! +//! /// Driver unbind (optional). +//! fn unbind(dev: &Device, this: Pin<&Self>) { +//! let _ = (dev, this); +//! } +//! } +//! ``` +//! +//! For specific examples see [`auxiliary::Driver`], [`pci::Driver`] and [`platform::Driver`]. +//! +//! The `probe()` callback should return a `Result>>`, i.e. the driver's private +//! data. The bus abstraction should store the pointer in the corresponding bus device. The generic +//! [`Device`] infrastructure provides common helpers for this purpose on its +//! [`Device`] implementation. +//! +//! All driver callbacks should provide a reference to the driver's private data. Once the driver +//! is unbound from the device, the bus abstraction should take back the ownership of the driver's +//! private data from the corresponding [`Device`] and [`drop`] it. +//! +//! All driver callbacks should provide a [`Device`] reference (see also [`device::Core`]). +//! +//! # Adapter +//! +//! The adapter implementation of a bus represents the abstraction layer between the C bus +//! callbacks and the Rust bus callbacks. It therefore has to be generic over an implementation of +//! the [driver trait](#driver-trait). +//! +//! ```ignore +//! pub struct Adapter; +//! ``` +//! +//! There's a common [`Adapter`] trait that can be implemented to inherit common driver +//! infrastructure, such as finding the ID info from an [`of::IdTable`] or [`acpi::IdTable`]. +//! +//! # Driver Registration +//! +//! In order to register C driver types (such as `struct platform_driver`) the [adapter](#adapter) +//! should implement the [`RegistrationOps`] trait. +//! +//! This trait implementation can be used to create the actual registration with the common +//! [`Registration`] type. +//! +//! Typically, bus abstractions want to provide a bus specific `module_bus_driver!` macro, which +//! creates a kernel module with exactly one [`Registration`] for the bus specific adapter. +//! +//! The generic driver infrastructure provides a helper for this with the [`module_driver`] macro. +//! +//! # Device IDs +//! +//! Besides the common device ID types, such as [`of::DeviceId`] and [`acpi::DeviceId`], most buses +//! may need to implement their own device ID types. +//! +//! For this purpose the generic infrastructure in [`device_id`] should be used. +//! +//! [`auxiliary::Driver`]: kernel::auxiliary::Driver +//! [`Core`]: device::Core +//! [`Device`]: device::Device +//! [`Device`]: device::Device +//! [`Device`]: device::Device +//! [`DeviceContext`]: device::DeviceContext +//! [`device_id`]: kernel::device_id +//! [`module_driver`]: kernel::module_driver +//! [`pci::Driver`]: kernel::pci::Driver +//! [`platform::Driver`]: kernel::platform::Driver use crate::error::{Error, Result}; use crate::{acpi, device, of, str::CStr, try_pin_init, types::Opaque, ThisModule}; From d405ec23df13e6df599f5bd965a55d13420366b8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 Aug 2025 14:57:06 +0200 Subject: [PATCH 1864/2411] ACPI: processor: perflib: Move problematic pr->performance check Commit d33bd88ac0eb ("ACPI: processor: perflib: Fix initial _PPC limit application") added a pr->performance check that prevents the frequency QoS request from being added when the given processor has no performance object. Unfortunately, this causes a WARN() in freq_qos_remove_request() to trigger on an attempt to take the given CPU offline later because the frequency QoS object has not been added for it due to the missing performance object. Address this by moving the pr->performance check before calling acpi_processor_get_platform_limit() so it only prevents a limit from being set for the CPU if the performance object is not present. This way, the frequency QoS request is added as it was before the above commit and it is present all the time along with the CPU's cpufreq policy regardless of whether or not the CPU is online. Fixes: d33bd88ac0eb ("ACPI: processor: perflib: Fix initial _PPC limit application") Tested-by: Rafael J. Wysocki Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/2801421.mvXUDI8C0e@rafael.j.wysocki --- drivers/acpi/processor_perflib.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 755003bf3a45..8972446b7162 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -180,7 +180,7 @@ void acpi_processor_ppc_init(struct cpufreq_policy *policy) struct acpi_processor *pr = per_cpu(processors, cpu); int ret; - if (!pr || !pr->performance) + if (!pr) continue; /* @@ -197,6 +197,9 @@ void acpi_processor_ppc_init(struct cpufreq_policy *policy) pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu, ret); + if (!pr->performance) + continue; + ret = acpi_processor_get_platform_limit(pr); if (ret) pr_err("Failed to update freq constraint for CPU%d (%d)\n", From 56bdf7270ff4f870e2d4bfacdc00161e766dba2d Mon Sep 17 00:00:00 2001 From: David Thompson Date: Mon, 11 Aug 2025 13:50:44 -0400 Subject: [PATCH 1865/2411] Revert "gpio: mlxbf3: only get IRQ for device instance 0" This reverts commit 10af0273a35ab4513ca1546644b8c853044da134. While this change was merged, it is not the preferred solution. During review of a similar change to the gpio-mlxbf2 driver, the use of "platform_get_irq_optional" was identified as the preferred solution, so let's use it for gpio-mlxbf3 driver as well. Cc: stable@vger.kernel.org Fixes: 10af0273a35a ("gpio: mlxbf3: only get IRQ for device instance 0") Signed-off-by: David Thompson Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/8d2b630c71b3742f2c74242cf7d602706a6108e6.1754928650.git.davthompson@nvidia.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mlxbf3.c | 52 +++++++++++++------------------------- 1 file changed, 18 insertions(+), 34 deletions(-) diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c index 9875e34bde72..10ea71273c89 100644 --- a/drivers/gpio/gpio-mlxbf3.c +++ b/drivers/gpio/gpio-mlxbf3.c @@ -190,9 +190,7 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) struct mlxbf3_gpio_context *gs; struct gpio_irq_chip *girq; struct gpio_chip *gc; - char *colon_ptr; int ret, irq; - long num; gs = devm_kzalloc(dev, sizeof(*gs), GFP_KERNEL); if (!gs) @@ -229,39 +227,25 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) gc->owner = THIS_MODULE; gc->add_pin_ranges = mlxbf3_gpio_add_pin_ranges; - colon_ptr = strchr(dev_name(dev), ':'); - if (!colon_ptr) { - dev_err(dev, "invalid device name format\n"); - return -EINVAL; - } + irq = platform_get_irq(pdev, 0); + if (irq >= 0) { + girq = &gs->gc.irq; + gpio_irq_chip_set_chip(girq, &gpio_mlxbf3_irqchip); + girq->default_type = IRQ_TYPE_NONE; + /* This will let us handle the parent IRQ in the driver */ + girq->num_parents = 0; + girq->parents = NULL; + girq->parent_handler = NULL; + girq->handler = handle_bad_irq; - ret = kstrtol(++colon_ptr, 16, &num); - if (ret) { - dev_err(dev, "invalid device instance\n"); - return ret; - } - - if (!num) { - irq = platform_get_irq(pdev, 0); - if (irq >= 0) { - girq = &gs->gc.irq; - gpio_irq_chip_set_chip(girq, &gpio_mlxbf3_irqchip); - girq->default_type = IRQ_TYPE_NONE; - /* This will let us handle the parent IRQ in the driver */ - girq->num_parents = 0; - girq->parents = NULL; - girq->parent_handler = NULL; - girq->handler = handle_bad_irq; - - /* - * Directly request the irq here instead of passing - * a flow-handler because the irq is shared. - */ - ret = devm_request_irq(dev, irq, mlxbf3_gpio_irq_handler, - IRQF_SHARED, dev_name(dev), gs); - if (ret) - return dev_err_probe(dev, ret, "failed to request IRQ"); - } + /* + * Directly request the irq here instead of passing + * a flow-handler because the irq is shared. + */ + ret = devm_request_irq(dev, irq, mlxbf3_gpio_irq_handler, + IRQF_SHARED, dev_name(dev), gs); + if (ret) + return dev_err_probe(dev, ret, "failed to request IRQ"); } platform_set_drvdata(pdev, gs); From 810bd9066fb1871b8a9528f31f2fdbf2a8b73bf2 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Mon, 11 Aug 2025 13:50:45 -0400 Subject: [PATCH 1866/2411] gpio: mlxbf3: use platform_get_irq_optional() The gpio-mlxbf3 driver interfaces with two GPIO controllers, device instance 0 and 1. There is a single IRQ resource shared between the two controllers, and it is found in the ACPI table for device instance 0. The driver should not use platform_get_irq(), otherwise this error is logged when probing instance 1: mlxbf3_gpio MLNXBF33:01: error -ENXIO: IRQ index 0 not found Cc: stable@vger.kernel.org Fixes: cd33f216d241 ("gpio: mlxbf3: Add gpio driver support") Signed-off-by: David Thompson Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/ce70b98a201ce82b9df9aa80ac7a5eeaa2268e52.1754928650.git.davthompson@nvidia.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mlxbf3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c index 10ea71273c89..ed29b07d16c1 100644 --- a/drivers/gpio/gpio-mlxbf3.c +++ b/drivers/gpio/gpio-mlxbf3.c @@ -227,7 +227,7 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) gc->owner = THIS_MODULE; gc->add_pin_ranges = mlxbf3_gpio_add_pin_ranges; - irq = platform_get_irq(pdev, 0); + irq = platform_get_irq_optional(pdev, 0); if (irq >= 0) { girq = &gs->gc.irq; gpio_irq_chip_set_chip(girq, &gpio_mlxbf3_irqchip); From 3f69f2e78799bf76e5dfe74f2eda4d67812d4edc Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Mon, 11 Aug 2025 07:41:44 +0200 Subject: [PATCH 1867/2411] PCI: xilinx: Fix NULL pointer dereference in xilinx_pcie_intr_handler() f29861aa301c5 ("PCI: xilinx: Switch to msi_create_parent_irq_domain()") changed xilinx_pcie::msi_domain from child devices' interrupt domain to Xilinx AXI bridge's interrupt domain. However, xilinx_pcie_intr_handler() wasn't changed and still reads Xilinx AXI bridge's interrupt domain from xilinx_pcie::msi_domain->parent. This pointer is NULL now. Update xilinx_pcie_intr_handler() to read the correct interrupt domain pointer. Fixes: f29861aa301c5 ("PCI: xilinx: Switch to msi_create_parent_irq_domain()") Signed-off-by: Nam Cao Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250811054144.4049448-1-namcao@linutronix.de --- drivers/pci/controller/pcie-xilinx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-xilinx.c b/drivers/pci/controller/pcie-xilinx.c index f121836c3cf4..937ea6ae1ac4 100644 --- a/drivers/pci/controller/pcie-xilinx.c +++ b/drivers/pci/controller/pcie-xilinx.c @@ -400,7 +400,7 @@ static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data) if (val & XILINX_PCIE_RPIFR1_MSI_INTR) { val = pcie_read(pcie, XILINX_PCIE_REG_RPIFR2) & XILINX_PCIE_RPIFR2_MSG_DATA; - domain = pcie->msi_domain->parent; + domain = pcie->msi_domain; } else { val = (val & XILINX_PCIE_RPIFR1_INTR_MASK) >> XILINX_PCIE_RPIFR1_INTR_SHIFT; From 9d7a1cbebbb691891671def57407ba2f8ee914e8 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 31 Jul 2025 10:38:09 +0100 Subject: [PATCH 1868/2411] drm/xe/migrate: prevent infinite recursion If the buf + offset is not aligned to XE_CAHELINE_BYTES we fallback to using a bounce buffer. However the bounce buffer here is allocated on the stack, and the only alignment requirement here is that it's naturally aligned to u8, and not XE_CACHELINE_BYTES. If the bounce buffer is also misaligned we then recurse back into the function again, however the new bounce buffer might also not be aligned, and might never be until we eventually blow through the stack, as we keep recursing. Instead of using the stack use kmalloc, which should respect the power-of-two alignment request here. Fixes a kernel panic when triggering this path through eudebug. v2 (Stuart): - Add build bug check for power-of-two restriction - s/EINVAL/ENOMEM/ Fixes: 270172f64b11 ("drm/xe: Update xe_ttm_access_memory to use GPU for non-visible access") Signed-off-by: Matthew Auld Cc: Maciej Patelczyk Cc: Stuart Summers Cc: Matthew Brost Reviewed-by: Stuart Summers Link: https://lore.kernel.org/r/20250731093807.207572-6-matthew.auld@intel.com (cherry picked from commit 38b34e928a08ba594c4bbf7118aa3aadacd62fff) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index ba1cff2e4cda..6193e2ca3741 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1820,15 +1820,19 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, if (!IS_ALIGNED(len, XE_CACHELINE_BYTES) || !IS_ALIGNED((unsigned long)buf + offset, XE_CACHELINE_BYTES)) { int buf_offset = 0; + void *bounce; + int err; + + BUILD_BUG_ON(!is_power_of_2(XE_CACHELINE_BYTES)); + bounce = kmalloc(XE_CACHELINE_BYTES, GFP_KERNEL); + if (!bounce) + return -ENOMEM; /* * Less than ideal for large unaligned access but this should be * fairly rare, can fixup if this becomes common. */ do { - u8 bounce[XE_CACHELINE_BYTES]; - void *ptr = (void *)bounce; - int err; int copy_bytes = min_t(int, bytes_left, XE_CACHELINE_BYTES - (offset & XE_CACHELINE_MASK)); @@ -1837,22 +1841,22 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, err = xe_migrate_access_memory(m, bo, offset & ~XE_CACHELINE_MASK, - (void *)ptr, - sizeof(bounce), 0); + bounce, + XE_CACHELINE_BYTES, 0); if (err) - return err; + break; if (write) { - memcpy(ptr + ptr_offset, buf + buf_offset, copy_bytes); + memcpy(bounce + ptr_offset, buf + buf_offset, copy_bytes); err = xe_migrate_access_memory(m, bo, offset & ~XE_CACHELINE_MASK, - (void *)ptr, - sizeof(bounce), write); + bounce, + XE_CACHELINE_BYTES, write); if (err) - return err; + break; } else { - memcpy(buf + buf_offset, ptr + ptr_offset, + memcpy(buf + buf_offset, bounce + ptr_offset, copy_bytes); } @@ -1861,7 +1865,8 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, offset += copy_bytes; } while (bytes_left); - return 0; + kfree(bounce); + return err; } dma_addr = xe_migrate_dma_map(xe, buf, len + page_offset, write); From 4126cb327a2e3273c81fcef1c594c5b7b645c44c Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 31 Jul 2025 10:38:10 +0100 Subject: [PATCH 1869/2411] drm/xe/migrate: don't overflow max copy size With non-page aligned copy, we need to use 4 byte aligned pitch, however the size itself might still be close to our maximum of ~8M, and so the dimensions of the copy can easily exceed the S16_MAX limit of the copy command leading to the following assert: xe 0000:03:00.0: [drm] Assertion `size / pitch <= ((s16)(((u16)~0U) >> 1))` failed! platform: BATTLEMAGE subplatform: 1 graphics: Xe2_HPG 20.01 step A0 media: Xe2_HPM 13.01 step A1 tile: 0 VRAM 10.0 GiB GT: 0 type 1 WARNING: CPU: 23 PID: 10605 at drivers/gpu/drm/xe/xe_migrate.c:673 emit_copy+0x4b5/0x4e0 [xe] To fix this account for the pitch when calculating the number of current bytes to copy. Fixes: 270172f64b11 ("drm/xe: Update xe_ttm_access_memory to use GPU for non-visible access") Signed-off-by: Matthew Auld Cc: Maciej Patelczyk Cc: Matthew Brost Reviewed-by: Stuart Summers Link: https://lore.kernel.org/r/20250731093807.207572-7-matthew.auld@intel.com (cherry picked from commit 8c2d61e0e916e077fda7e7b8e67f25ffe0f361fc) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 6193e2ca3741..95bcaa427d26 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1887,6 +1887,12 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, else current_bytes = min_t(int, bytes_left, cursor.size); + if (current_bytes & ~PAGE_MASK) { + int pitch = 4; + + current_bytes = min_t(int, current_bytes, S16_MAX * pitch); + } + if (fence) dma_fence_put(fence); From 145832fbdd17b1d77ffd6cdd1642259e101d1b7e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 31 Jul 2025 10:38:11 +0100 Subject: [PATCH 1870/2411] drm/xe/migrate: prevent potential UAF If we hit the error path, the previous fence (if there is one) has already been put() prior to this, so doing a fence_wait could lead to UAF. Tweak the flow to do to the put() until after we do the wait. Fixes: 270172f64b11 ("drm/xe: Update xe_ttm_access_memory to use GPU for non-visible access") Signed-off-by: Matthew Auld Cc: Maciej Patelczyk Cc: Matthew Brost Reviewed-by: Stuart Summers Link: https://lore.kernel.org/r/20250731093807.207572-8-matthew.auld@intel.com (cherry picked from commit 9b7ca35ed28fe5fad86e9d9c24ebd1271e4c9c3e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 95bcaa427d26..7d20ac4bb633 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1893,9 +1893,6 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, current_bytes = min_t(int, current_bytes, S16_MAX * pitch); } - if (fence) - dma_fence_put(fence); - __fence = xe_migrate_vram(m, current_bytes, (unsigned long)buf & ~PAGE_MASK, dma_addr + current_page, @@ -1903,11 +1900,15 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, XE_MIGRATE_COPY_TO_VRAM : XE_MIGRATE_COPY_TO_SRAM); if (IS_ERR(__fence)) { - if (fence) + if (fence) { dma_fence_wait(fence, false); + dma_fence_put(fence); + } fence = __fence; goto out_err; } + + dma_fence_put(fence); fence = __fence; buf += current_bytes; From 2dd7a47669ae6c1da18c55f8e89c4a44418c7006 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 5 Aug 2025 09:48:42 +0200 Subject: [PATCH 1871/2411] drm/xe: Defer buffer object shrinker write-backs and GPU waits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the xe buffer-object shrinker allows GPU waits and write-back, (typically from kswapd), perform multiple passes, skipping subsequent passes if the shrinker number of scanned objects target is reached. 1) Without GPU waits and write-back 2) Without write-back 3) With both GPU-waits and write-back This is to avoid stalls and costly write- and readbacks unless they are really necessary. v2: - Don't test for scan completion twice. (Stuart Summers) - Update tags. Reported-by: melvyn Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5557 Cc: Summers Stuart Fixes: 00c8efc3180f ("drm/xe: Add a shrinker for xe bos") Cc: # v6.15+ Signed-off-by: Thomas Hellström Reviewed-by: Stuart Summers Link: https://lore.kernel.org/r/20250805074842.11359-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 80944d334182ce5eb27d00e2bf20a88bfc32dea1) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_shrinker.c | 51 +++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c index 1c3c04d52f55..90244fe59b59 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.c +++ b/drivers/gpu/drm/xe/xe_shrinker.c @@ -54,10 +54,10 @@ xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgea write_unlock(&shrinker->lock); } -static s64 xe_shrinker_walk(struct xe_device *xe, - struct ttm_operation_ctx *ctx, - const struct xe_bo_shrink_flags flags, - unsigned long to_scan, unsigned long *scanned) +static s64 __xe_shrinker_walk(struct xe_device *xe, + struct ttm_operation_ctx *ctx, + const struct xe_bo_shrink_flags flags, + unsigned long to_scan, unsigned long *scanned) { unsigned int mem_type; s64 freed = 0, lret; @@ -93,6 +93,48 @@ static s64 xe_shrinker_walk(struct xe_device *xe, return freed; } +/* + * Try shrinking idle objects without writeback first, then if not sufficient, + * try also non-idle objects and finally if that's not sufficient either, + * add writeback. This avoids stalls and explicit writebacks with light or + * moderate memory pressure. + */ +static s64 xe_shrinker_walk(struct xe_device *xe, + struct ttm_operation_ctx *ctx, + const struct xe_bo_shrink_flags flags, + unsigned long to_scan, unsigned long *scanned) +{ + bool no_wait_gpu = true; + struct xe_bo_shrink_flags save_flags = flags; + s64 lret, freed; + + swap(no_wait_gpu, ctx->no_wait_gpu); + save_flags.writeback = false; + lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned); + swap(no_wait_gpu, ctx->no_wait_gpu); + if (lret < 0 || *scanned >= to_scan) + return lret; + + freed = lret; + if (!ctx->no_wait_gpu) { + lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned); + if (lret < 0) + return lret; + freed += lret; + if (*scanned >= to_scan) + return freed; + } + + if (flags.writeback) { + lret = __xe_shrinker_walk(xe, ctx, flags, to_scan, scanned); + if (lret < 0) + return lret; + freed += lret; + } + + return freed; +} + static unsigned long xe_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { @@ -199,6 +241,7 @@ static unsigned long xe_shrinker_scan(struct shrinker *shrink, struct shrink_con runtime_pm = xe_shrinker_runtime_pm_get(shrinker, true, 0, can_backup); shrink_flags.purge = false; + lret = xe_shrinker_walk(shrinker->xe, &ctx, shrink_flags, nr_to_scan, &nr_scanned); if (lret >= 0) From 55d49f06162e45686399df4ae6292167f0deab7c Mon Sep 17 00:00:00 2001 From: Karthik Poosa Date: Sat, 9 Aug 2025 00:23:10 +0530 Subject: [PATCH 1872/2411] drm/xe/hwmon: Add SW clamp for power limits writes Clamp writes to power limits powerX_crit/currX_crit, powerX_cap, powerX_max, to the maximum supported by the pcode mailbox when sysfs-provided values exceed this limit. Although the pcode already performs clamping, values beyond the pcode mailbox's supported range get truncated, leading to incorrect critical power settings. This patch ensures proper clamping to prevent such truncation. v2: - Address below review comments. (Riana) - Split comments into multiple sentences. - Use local variables for readability. - Add a debug log. - Use u64 instead of unsigned long. v3: - Change drm_dbg logs to drm_info. (Badal) v4: - Rephrase the drm_info log. (Rodrigo, Riana) - Rename variable max_mbx_power_limit to max_supp_power_limit, as limit is same for platforms with and without mailbox power limit support. Signed-off-by: Karthik Poosa Fixes: 92d44a422d0d ("drm/xe/hwmon: Expose card reactive critical power") Fixes: fb1b70607f73 ("drm/xe/hwmon: Expose power attributes") Reviewed-by: Riana Tauro Link: https://lore.kernel.org/r/20250808185310.3466529-1-karthik.poosa@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit d301eb950da59f962bafe874cf5eb6d61a85b2c2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hwmon.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index f08fc4377d25..c17ed1ae8649 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -332,6 +332,7 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe int ret = 0; u32 reg_val, max; struct xe_reg rapl_limit; + u64 max_supp_power_limit = 0; mutex_lock(&hwmon->hwmon_lock); @@ -356,6 +357,20 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe goto unlock; } + /* + * If the sysfs value exceeds the maximum pcode supported power limit value, clamp it to + * the supported maximum (U12.3 format). + * This is to avoid truncation during reg_val calculation below and ensure the valid + * power limit is sent for pcode which would clamp it to card-supported value. + */ + max_supp_power_limit = ((PWR_LIM_VAL) >> hwmon->scl_shift_power) * SF_POWER; + if (value > max_supp_power_limit) { + value = max_supp_power_limit; + drm_info(&hwmon->xe->drm, + "Power limit clamped as selected %s exceeds channel %d limit\n", + PWR_ATTR_TO_STR(attr), channel); + } + /* Computation in 64-bits to avoid overflow. Round to nearest. */ reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); @@ -739,9 +754,23 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel, { int ret; u32 uval; + u64 max_crit_power_curr = 0; mutex_lock(&hwmon->hwmon_lock); + /* + * If the sysfs value exceeds the pcode mailbox cmd POWER_SETUP_SUBCOMMAND_WRITE_I1 + * max supported value, clamp it to the command's max (U10.6 format). + * This is to avoid truncation during uval calculation below and ensure the valid power + * limit is sent for pcode which would clamp it to card-supported value. + */ + max_crit_power_curr = (POWER_SETUP_I1_DATA_MASK >> POWER_SETUP_I1_SHIFT) * scale_factor; + if (value > max_crit_power_curr) { + value = max_crit_power_curr; + drm_info(&hwmon->xe->drm, + "Power limit clamped as selected exceeds channel %d limit\n", + channel); + } uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor); ret = xe_hwmon_pcode_write_i1(hwmon, uval); From 963e22c084c2b6097e1e635d29c6336881f67708 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Tue, 29 Jul 2025 08:20:38 +0200 Subject: [PATCH 1873/2411] ACPI: EC: Relax sanity check of the ECDT ID string It turns out that the ECDT table inside the ThinkBook 14 G7 IML contains a valid EC description but an invalid ID string ("_SB.PC00.LPCB.EC0"). Ignoring this ECDT based on the invalid ID string prevents the kernel from detecting the built-in touchpad, so relax the sanity check of the ID string and only reject ECDTs with empty ID strings. Reported-by: Ilya K Fixes: 7a0d59f6a913 ("ACPI: EC: Ignore ECDT tables with an invalid ID string") Signed-off-by: Armin Wolf Tested-by: Ilya K Link: https://patch.msgid.link/20250729062038.303734-1-W_Armin@gmx.de Cc: 6.16+ # 6.16+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 75c7db8b156a..7855bbf752b1 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -2033,7 +2033,7 @@ void __init acpi_ec_ecdt_probe(void) goto out; } - if (!strstarts(ecdt_ptr->id, "\\")) { + if (!strlen(ecdt_ptr->id)) { /* * The ECDT table on some MSI notebooks contains invalid data, together * with an empty ID string (""). @@ -2042,9 +2042,13 @@ void __init acpi_ec_ecdt_probe(void) * a "fully qualified reference to the (...) embedded controller device", * so this string always has to start with a backslash. * - * By verifying this we can avoid such faulty ECDT tables in a safe way. + * However some ThinkBook machines have a ECDT table with a valid EC + * description but an invalid ID string ("_SB.PC00.LPCB.EC0"). + * + * Because of this we only check if the ID string is empty in order to + * avoid the obvious cases. */ - pr_err(FW_BUG "Ignoring ECDT due to invalid ID string \"%s\"\n", ecdt_ptr->id); + pr_err(FW_BUG "Ignoring ECDT due to empty ID string\n"); goto out; } From 5149bbb56bdcf5c5f72904025fbb502217580b63 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Mon, 11 Aug 2025 07:39:35 +0200 Subject: [PATCH 1874/2411] PCI: vmd: Remove MSI-X check on child devices d7d8ab87e3e7 ("PCI: vmd: Switch to msi_create_parent_irq_domain()") added a WARN_ON sanity check that child devices support MSI-X, because VMD document says [1]: Intel VMD only supports MSIx Interrupts from child devices and therefore the BIOS must enable PCIe Hot Plug and MSIx interrups [sic]. However, the VMD device can't even tell the difference between a child device using MSI and one using MSI-X. Per 185a383ada2e ("x86/PCI: Add driver for Intel Volume Management Device (VMD)"), VMD does not support INTx interrupts, but does support child devices using either MSI or MSI-X. Remove the sanity check to avoid the WARN_ON and allow child devices to use MSI, reported by Ammar. Fixes: d7d8ab87e3e7 ("PCI: vmd: Switch to msi_create_parent_irq_domain()") Link: https://cdrdv2-public.intel.com/776857/VMD_White_Paper.pdf [1] Reported-by: Ammar Faizi Closes: https://lore.kernel.org/linux-pci/aJXYhfc%2F6DfcqfqF@linux.gnuweeb.org/ Signed-off-by: Nam Cao [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Tested-by: Ammar Faizi Link: https://patch.msgid.link/20250811053935.4049211-1-namcao@linutronix.de --- drivers/pci/controller/vmd.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index b679c7f28f51..1bd5bf4a6097 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -306,9 +306,6 @@ static bool vmd_init_dev_msi_info(struct device *dev, struct irq_domain *domain, struct irq_domain *real_parent, struct msi_domain_info *info) { - if (WARN_ON_ONCE(info->bus_token != DOMAIN_BUS_PCI_DEVICE_MSIX)) - return false; - if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) return false; From abbf9a44944171ca99c150adad9361a2f517d3b6 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 27 Jul 2025 11:23:17 +0200 Subject: [PATCH 1875/2411] rust: workaround `rustdoc` target modifiers bug Starting with Rust 1.88.0 (released 2025-06-26), `rustdoc` complains about a target modifier mismatch in configurations where `-Zfixed-x18` is passed: error: mixing `-Zfixed-x18` will cause an ABI mismatch in crate `rust_out` | = help: the `-Zfixed-x18` flag modifies the ABI so Rust crates compiled with different values of this flag cannot be used together safely = note: unset `-Zfixed-x18` in this crate is incompatible with `-Zfixed-x18=` in dependency `core` = help: set `-Zfixed-x18=` in this crate or unset `-Zfixed-x18` in `core` = help: if you are sure this will not cause problems, you may use `-Cunsafe-allow-abi-mismatch=fixed-x18` to silence this error The reason is that `rustdoc` was not passing the target modifiers when configuring the session options, and thus it would report a mismatch that did not exist as soon as a target modifier is used in a dependency. We did not notice it in the kernel until now because `-Zfixed-x18` has been a target modifier only since 1.88.0 (and it is the only one we use so far). The issue has been reported upstream [1] and a fix has been submitted [2], including a test similar to the kernel case. [ This is now fixed upstream (thanks Guillaume for the quick review), so it will be fixed in Rust 1.90.0 (expected 2025-09-18). - Miguel ] Meanwhile, conditionally pass `-Cunsafe-allow-abi-mismatch=fixed-x18` to workaround the issue on our side. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Reported-by: Konrad Dybcio Closes: https://lore.kernel.org/rust-for-linux/36cdc798-524f-4910-8b77-d7b9fac08d77@oss.qualcomm.com/ Link: https://github.com/rust-lang/rust/issues/144521 [1] Link: https://github.com/rust-lang/rust/pull/144523 [2] Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250727092317.2930617-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rust/Makefile b/rust/Makefile index 4263462b8470..d47f82588d78 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -65,6 +65,10 @@ core-cfgs = \ core-edition := $(if $(call rustc-min-version,108700),2024,2021) +# `rustdoc` did not save the target modifiers, thus workaround for +# the time being (https://github.com/rust-lang/rust/issues/144521). +rustdoc_modifiers_workaround := $(if $(call rustc-min-version,108800),-Cunsafe-allow-abi-mismatch=fixed-x18) + # `rustc` recognizes `--remap-path-prefix` since 1.26.0, but `rustdoc` only # since Rust 1.81.0. Moreover, `rustdoc` ICEs on out-of-tree builds since Rust # 1.82.0 (https://github.com/rust-lang/rust/issues/138520). Thus workaround both @@ -77,6 +81,7 @@ quiet_cmd_rustdoc = RUSTDOC $(if $(rustdoc_host),H, ) $< -Zunstable-options --generate-link-to-definition \ --output $(rustdoc_output) \ --crate-name $(subst rustdoc-,,$@) \ + $(rustdoc_modifiers_workaround) \ $(if $(rustdoc_host),,--sysroot=/dev/null) \ @$(objtree)/include/generated/rustc_cfg $< @@ -215,6 +220,7 @@ quiet_cmd_rustdoc_test_kernel = RUSTDOC TK $< --extern bindings --extern uapi \ --no-run --crate-name kernel -Zunstable-options \ --sysroot=/dev/null \ + $(rustdoc_modifiers_workaround) \ --test-builder $(objtree)/scripts/rustdoc_test_builder \ $< $(rustdoc_test_kernel_quiet); \ $(objtree)/scripts/rustdoc_test_gen From 252fea131e15aba2cd487119d1a8f546471199e2 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 26 Jul 2025 15:34:35 +0200 Subject: [PATCH 1876/2411] rust: kbuild: clean output before running `rustdoc` `rustdoc` can get confused when generating documentation into a folder that contains generated files from other `rustdoc` versions. For instance, running something like: rustup default 1.78.0 make LLVM=1 rustdoc rustup default 1.88.0 make LLVM=1 rustdoc may generate errors like: error: couldn't generate documentation: invalid template: last line expected to start with a comment | = note: failed to create or modify "./Documentation/output/rust/rustdoc/src-files.js" Thus just always clean the output folder before generating the documentation -- we are anyway regenerating it every time the `rustdoc` target gets called, at least for the time being. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Reported-by: Daniel Almeida Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/288089/topic/x/near/527201113 Reviewed-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250726133435.2460085-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/rust/Makefile b/rust/Makefile index d47f82588d78..bfa915b0e588 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -111,14 +111,14 @@ rustdoc: rustdoc-core rustdoc-macros rustdoc-compiler_builtins \ rustdoc-macros: private rustdoc_host = yes rustdoc-macros: private rustc_target_flags = --crate-type proc-macro \ --extern proc_macro -rustdoc-macros: $(src)/macros/lib.rs FORCE +rustdoc-macros: $(src)/macros/lib.rs rustdoc-clean FORCE +$(call if_changed,rustdoc) # Starting with Rust 1.82.0, skipping `-Wrustdoc::unescaped_backticks` should # not be needed -- see https://github.com/rust-lang/rust/pull/128307. rustdoc-core: private skip_flags = --edition=2021 -Wrustdoc::unescaped_backticks rustdoc-core: private rustc_target_flags = --edition=$(core-edition) $(core-cfgs) -rustdoc-core: $(RUST_LIB_SRC)/core/src/lib.rs FORCE +rustdoc-core: $(RUST_LIB_SRC)/core/src/lib.rs rustdoc-clean FORCE +$(call if_changed,rustdoc) rustdoc-compiler_builtins: $(src)/compiler_builtins.rs rustdoc-core FORCE @@ -130,7 +130,8 @@ rustdoc-ffi: $(src)/ffi.rs rustdoc-core FORCE rustdoc-pin_init_internal: private rustdoc_host = yes rustdoc-pin_init_internal: private rustc_target_flags = --cfg kernel \ --extern proc_macro --crate-type proc-macro -rustdoc-pin_init_internal: $(src)/pin-init/internal/src/lib.rs FORCE +rustdoc-pin_init_internal: $(src)/pin-init/internal/src/lib.rs \ + rustdoc-clean FORCE +$(call if_changed,rustdoc) rustdoc-pin_init: private rustdoc_host = yes @@ -148,6 +149,9 @@ rustdoc-kernel: $(src)/kernel/lib.rs rustdoc-core rustdoc-ffi rustdoc-macros \ $(obj)/bindings.o FORCE +$(call if_changed,rustdoc) +rustdoc-clean: FORCE + $(Q)rm -rf $(rustdoc_output) + quiet_cmd_rustc_test_library = $(RUSTC_OR_CLIPPY_QUIET) TL $< cmd_rustc_test_library = \ OBJTREE=$(abspath $(objtree)) \ From 41b70df5b38bc80967d2e0ed55cc3c3896bba781 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Aug 2025 08:30:11 -0600 Subject: [PATCH 1877/2411] io_uring/net: commit partial buffers on retry Ring provided buffers are potentially only valid within the single execution context in which they were acquired. io_uring deals with this and invalidates them on retry. But on the networking side, if MSG_WAITALL is set, or if the socket is of the streaming type and too little was processed, then it will hang on to the buffer rather than recycle or commit it. This is problematic for two reasons: 1) If someone unregisters the provided buffer ring before a later retry, then the req->buf_list will no longer be valid. 2) If multiple sockers are using the same buffer group, then multiple receives can consume the same memory. This can cause data corruption in the application, as either receive could land in the same userspace buffer. Fix this by disallowing partial retries from pinning a provided buffer across multiple executions, if ring provided buffers are used. Cc: stable@vger.kernel.org Reported-by: pt x Fixes: c56e022c0a27 ("io_uring: add support for user mapped provided buffer ring") Signed-off-by: Jens Axboe --- io_uring/net.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index dd96e355982f..d69f2afa4f7a 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -494,6 +494,15 @@ static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret) return nbufs; } +static int io_net_kbuf_recyle(struct io_kiocb *req, + struct io_async_msghdr *kmsg, int len) +{ + req->flags |= REQ_F_BL_NO_RECYCLE; + if (req->flags & REQ_F_BUFFERS_COMMIT) + io_kbuf_commit(req, req->buf_list, len, io_bundle_nbufs(kmsg, len)); + return IOU_RETRY; +} + static inline bool io_send_finish(struct io_kiocb *req, int *ret, struct io_async_msghdr *kmsg, unsigned issue_flags) @@ -562,8 +571,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) kmsg->msg.msg_controllen = 0; kmsg->msg.msg_control = NULL; sr->done_io += ret; - req->flags |= REQ_F_BL_NO_RECYCLE; - return -EAGAIN; + return io_net_kbuf_recyle(req, kmsg, ret); } if (ret == -ERESTARTSYS) ret = -EINTR; @@ -674,8 +682,7 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags) sr->len -= ret; sr->buf += ret; sr->done_io += ret; - req->flags |= REQ_F_BL_NO_RECYCLE; - return -EAGAIN; + return io_net_kbuf_recyle(req, kmsg, ret); } if (ret == -ERESTARTSYS) ret = -EINTR; @@ -1071,8 +1078,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) } if (ret > 0 && io_net_retry(sock, flags)) { sr->done_io += ret; - req->flags |= REQ_F_BL_NO_RECYCLE; - return IOU_RETRY; + return io_net_kbuf_recyle(req, kmsg, ret); } if (ret == -ERESTARTSYS) ret = -EINTR; @@ -1218,8 +1224,7 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags) sr->len -= ret; sr->buf += ret; sr->done_io += ret; - req->flags |= REQ_F_BL_NO_RECYCLE; - return -EAGAIN; + return io_net_kbuf_recyle(req, kmsg, ret); } if (ret == -ERESTARTSYS) ret = -EINTR; @@ -1500,8 +1505,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) zc->len -= ret; zc->buf += ret; zc->done_io += ret; - req->flags |= REQ_F_BL_NO_RECYCLE; - return -EAGAIN; + return io_net_kbuf_recyle(req, kmsg, ret); } if (ret == -ERESTARTSYS) ret = -EINTR; @@ -1571,8 +1575,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) if (ret > 0 && io_net_retry(sock, flags)) { sr->done_io += ret; - req->flags |= REQ_F_BL_NO_RECYCLE; - return -EAGAIN; + return io_net_kbuf_recyle(req, kmsg, ret); } if (ret == -ERESTARTSYS) ret = -EINTR; From e67b8afcb6d86f1bc6a69e4e52cf9dcfe636f995 Mon Sep 17 00:00:00 2001 From: Frank Min Date: Tue, 5 Aug 2025 22:30:54 +0800 Subject: [PATCH 1878/2411] drm/amdgpu: Add PSP fw version check for fw reserve GFX command The fw reserved GFX command is only supported starting from PSP fw version 0x3a0e14 and 0x3b0e0d. Older versions do not support this command. Add a version guard to ensure the command is only used when the running PSP fw meets the minimum version requirement. This ensures backward compatibility and safe operation across fw revisions. Fixes: a3b7f9c306e1 ("drm/amdgpu: reclaim psp fw reservation memory region") Signed-off-by: Frank Min Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit 065e23170a1e09bc9104b761183e59562a029619) --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 0bd51a04be79..23484317a5fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1039,15 +1039,28 @@ int psp_update_fw_reservation(struct psp_context *psp) { int ret; uint64_t reserv_addr, reserv_addr_ext; - uint32_t reserv_size, reserv_size_ext; + uint32_t reserv_size, reserv_size_ext, mp0_ip_ver; struct amdgpu_device *adev = psp->adev; + mp0_ip_ver = amdgpu_ip_version(adev, MP0_HWIP, 0); + if (amdgpu_sriov_vf(psp->adev)) return 0; - if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(14, 0, 2)) && - (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(14, 0, 3))) + switch (mp0_ip_ver) { + case IP_VERSION(14, 0, 2): + if (adev->psp.sos.fw_version < 0x3b0e0d) + return 0; + break; + + case IP_VERSION(14, 0, 3): + if (adev->psp.sos.fw_version < 0x3a0e14) + return 0; + break; + + default: return 0; + } ret = psp_get_fw_reservation_info(psp, GFX_CMD_ID_FB_FW_RESERV_ADDR, &reserv_addr, &reserv_size); if (ret) From 10ef476aad1c848449934e7bec2ab2374333c7b6 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Tue, 12 Aug 2025 09:17:58 +0800 Subject: [PATCH 1879/2411] drm/amdgpu: fix vram reservation issue The vram block allocation flag must be cleared before making vram reservation, otherwise reserving addresses within the currently freed memory range will always fail. Fixes: c9cad937c0c5 ("drm/amdgpu: add drm buddy support to amdgpu") Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit d38eaf27de1b8584f42d6fb3f717b7ec44b3a7a1) --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 07c936e90d8e..78f9e86ccc09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -648,9 +648,8 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, list_for_each_entry(block, &vres->blocks, link) vis_usage += amdgpu_vram_mgr_vis_size(adev, block); - amdgpu_vram_mgr_do_reserve(man); - drm_buddy_free_list(mm, &vres->blocks, vres->flags); + amdgpu_vram_mgr_do_reserve(man); mutex_unlock(&mgr->lock); atomic64_sub(vis_usage, &mgr->vis_usage); From 040bc6d0e0e9c814c9c663f6f1544ebaff6824a8 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Mon, 11 Aug 2025 15:20:55 +0800 Subject: [PATCH 1880/2411] drm/amdgpu: fix incorrect vm flags to map bo It should use vm flags instead of pte flags to specify bo vm attributes. Fixes: 7946340fa389 ("drm/amdgpu: Move csa related code to separate file") Signed-off-by: Jack Xiao Reviewed-by: Likun Gao Signed-off-by: Alex Deucher (cherry picked from commit b08425fa77ad2f305fe57a33dceb456be03b653f) --- drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 02138aa55793..dfb6cfd83760 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -88,8 +88,8 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, } r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, - AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | - AMDGPU_PTE_EXECUTABLE); + AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | + AMDGPU_VM_PAGE_EXECUTABLE); if (r) { DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); From 0ebbab41fba1bae6ccd96c0eec17026700ac6534 Mon Sep 17 00:00:00 2001 From: Sergio Perez Gonzalez Date: Mon, 28 Jul 2025 20:00:49 -0600 Subject: [PATCH 1881/2411] ASoC: stm: stm32_i2s: Fix calc_clk_div() error handling in determine_rate() calc_clk_div() will only return a non-zero value (-EINVAL) in case of error. On the other hand, req->rate is an unsigned long. It seems quite odd that req->rate would be assigned a negative value, which is clearly not a rate, and success would be returned. Reinstate previous logic, which would just return error. Fixes: afd529d74002 ("ASoC: stm: stm32_i2s: convert from round_rate() to determine_rate()") Link: https://scan7.scan.coverity.com/#/project-view/53936/11354?selectedIssue=1647702 Signed-off-by: Sergio Perez Gonzalez Link: https://patch.msgid.link/20250729020052.404617-1-sperezglz@gmail.com Signed-off-by: Mark Brown --- sound/soc/stm/stm32_i2s.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sound/soc/stm/stm32_i2s.c b/sound/soc/stm/stm32_i2s.c index 0e489097d9c1..6ca21780f21d 100644 --- a/sound/soc/stm/stm32_i2s.c +++ b/sound/soc/stm/stm32_i2s.c @@ -469,11 +469,8 @@ static int stm32_i2smclk_determine_rate(struct clk_hw *hw, int ret; ret = stm32_i2s_calc_clk_div(i2s, req->best_parent_rate, req->rate); - if (ret) { - req->rate = ret; - - return 0; - } + if (ret) + return ret; mclk->freq = req->best_parent_rate / i2s->divider; From aa5fc4362fac9351557eb27c745579159a2e4520 Mon Sep 17 00:00:00 2001 From: Liu01 Tong Date: Mon, 11 Aug 2025 14:52:37 +0800 Subject: [PATCH 1882/2411] drm/amdgpu: fix task hang from failed job submission during process kill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During process kill, drm_sched_entity_flush() will kill the vm entities. The following job submissions of this process will fail, and the resources of these jobs have not been released, nor have the fences been signalled, causing tasks to hang and timeout. Fix by check entity status in amdgpu_vm_ready() and avoid submit jobs to stopped entity. v2: add amdgpu_vm_ready() check before amdgpu_vm_clear_freed() in function amdgpu_cs_vm_handling(). Fixes: 1f02f2044bda ("drm/amdgpu: Avoid extra evict-restore process.") Signed-off-by: Liu01 Tong Signed-off-by: Lin.Cao Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit f101c13a8720c73e67f8f9d511fbbeda95bcedb1) --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 +++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index a2adaacf6adb..d3f220be2ef9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1139,6 +1139,9 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) } } + if (!amdgpu_vm_ready(vm)) + return -EINVAL; + r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5cacf5717016..0b87798daebd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -654,11 +654,10 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, * Check if all VM PDs/PTs are ready for updates * * Returns: - * True if VM is not evicting. + * True if VM is not evicting and all VM entities are not stopped */ bool amdgpu_vm_ready(struct amdgpu_vm *vm) { - bool empty; bool ret; amdgpu_vm_eviction_lock(vm); @@ -666,10 +665,18 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) amdgpu_vm_eviction_unlock(vm); spin_lock(&vm->status_lock); - empty = list_empty(&vm->evicted); + ret &= list_empty(&vm->evicted); spin_unlock(&vm->status_lock); - return ret && empty; + spin_lock(&vm->immediate.lock); + ret &= !vm->immediate.stopped; + spin_unlock(&vm->immediate.lock); + + spin_lock(&vm->delayed.lock); + ret &= !vm->delayed.stopped; + spin_unlock(&vm->delayed.lock); + + return ret; } /** From c5ec7f49b480db0dfc83f395755b1c2a7c979920 Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Fri, 4 Jul 2025 13:17:47 +0200 Subject: [PATCH 1883/2411] devlink: let driver opt out of automatic phys_port_name generation Currently when adding devlink port, phys_port_name is automatically generated within devlink port initialization flow. As a result adding devlink port support to driver may result in forced changes of interface names, which breaks already existing network configs. This is an expected behavior but in some scenarios it would not be preferable to provide such limitation for legacy driver not being able to keep 'pre-devlink' interface name. Add flag no_phys_port_name to devlink_port_attrs struct which indicates if devlink should not alter name of interface. Suggested-by: Jiri Pirko Link: https://lore.kernel.org/all/nbwrfnjhvrcduqzjl4a2jafnvvud6qsbxlvxaxilnryglf4j7r@btuqrimnfuly/ Signed-off-by: Jedrzej Jagielski Signed-off-by: Tony Nguyen --- include/net/devlink.h | 6 +++++- net/devlink/port.c | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index 93640a29427c..b32c9ceeb81d 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -78,6 +78,9 @@ struct devlink_port_pci_sf_attrs { * @flavour: flavour of the port * @split: indicates if this is split port * @splittable: indicates if the port can be split. + * @no_phys_port_name: skip automatic phys_port_name generation; for + * compatibility only, newly added driver/port instance + * should never set this. * @lanes: maximum number of lanes the port supports. 0 value is not passed to netlink. * @switch_id: if the port is part of switch, this is buffer with ID, otherwise this is NULL * @phys: physical port attributes @@ -87,7 +90,8 @@ struct devlink_port_pci_sf_attrs { */ struct devlink_port_attrs { u8 split:1, - splittable:1; + splittable:1, + no_phys_port_name:1; u32 lanes; enum devlink_port_flavour flavour; struct netdev_phys_item_id switch_id; diff --git a/net/devlink/port.c b/net/devlink/port.c index 939081a0e615..cb8d4df61619 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -1519,7 +1519,7 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, struct devlink_port_attrs *attrs = &devlink_port->attrs; int n = 0; - if (!devlink_port->attrs_set) + if (!devlink_port->attrs_set || devlink_port->attrs.no_phys_port_name) return -EOPNOTSUPP; switch (attrs->flavour) { From e67a0bc3ed4fd8ee1697cb6d937e2b294ec13b5e Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Thu, 3 Jul 2025 12:41:15 +0200 Subject: [PATCH 1884/2411] ixgbe: prevent from unwanted interface name changes Users of the ixgbe driver report that after adding devlink support by the commit a0285236ab93 ("ixgbe: add initial devlink support") their configs got broken due to unwanted changes of interface names. It's caused by automatic phys_port_name generation during devlink port initialization flow. To prevent from that set no_phys_port_name flag for ixgbe devlink ports. Reported-by: David Howells Closes: https://lore.kernel.org/netdev/3452224.1745518016@warthog.procyon.org.uk/ Reported-by: David Kaplan Closes: https://lore.kernel.org/netdev/LV3PR12MB92658474624CCF60220157199470A@LV3PR12MB9265.namprd12.prod.outlook.com/ Fixes: a0285236ab93 ("ixgbe: add initial devlink support") Signed-off-by: Jedrzej Jagielski Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/devlink/devlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c b/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c index 54f1b83dfe42..d227f4d2a2d1 100644 --- a/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c @@ -543,6 +543,7 @@ int ixgbe_devlink_register_port(struct ixgbe_adapter *adapter) attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; attrs.phys.port_number = adapter->hw.bus.func; + attrs.no_phys_port_name = 1; ixgbe_devlink_set_switch_id(adapter, &attrs.switch_id); devlink_port_attrs_set(devlink_port, &attrs); From 6563623e604e3e235b2cee71190a4972be8f986b Mon Sep 17 00:00:00 2001 From: Soham Metha Date: Wed, 13 Aug 2025 02:19:46 +0530 Subject: [PATCH 1885/2411] docs: cgroup: fixed spelling mistakes in documentation found/fixed the following typo - Availablity -> Availability in `Documentation/admin-guide/cgroup-v2.rst` Signed-off-by: Soham Metha Signed-off-by: Tejun Heo --- Documentation/admin-guide/cgroup-v2.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index d9d3cc7df348..51c0bc4c2dc5 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -435,8 +435,8 @@ both cgroups. Controlling Controllers ----------------------- -Availablity -~~~~~~~~~~~ +Availability +~~~~~~~~~~~~ A controller is available in a cgroup when it is supported by the kernel (i.e., compiled in, not disabled and not attached to a v1 hierarchy) and listed in the From 2efe41234dbd0a83fdb7cd38226c2f70039a2cd3 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Mon, 28 Jul 2025 15:26:49 +0900 Subject: [PATCH 1886/2411] ptp: prevent possible ABBA deadlock in ptp_clock_freerun() syzbot reported the following ABBA deadlock: CPU0 CPU1 ---- ---- n_vclocks_store() lock(&ptp->n_vclocks_mux) [1] (physical clock) pc_clock_adjtime() lock(&clk->rwsem) [2] (physical clock) ... ptp_clock_freerun() ptp_vclock_in_use() lock(&ptp->n_vclocks_mux) [3] (physical clock) ptp_clock_unregister() posix_clock_unregister() lock(&clk->rwsem) [4] (virtual clock) Since ptp virtual clock is registered only under ptp physical clock, both ptp_clock and posix_clock must be physical clocks for ptp_vclock_in_use() to lock &ptp->n_vclocks_mux and check ptp->n_vclocks. However, when unregistering vclocks in n_vclocks_store(), the locking ptp->n_vclocks_mux is a physical clock lock, but clk->rwsem of ptp_clock_unregister() called through device_for_each_child_reverse() is a virtual clock lock. Therefore, clk->rwsem used in CPU0 and clk->rwsem used in CPU1 are different locks, but in lockdep, a false positive occurs because the possibility of deadlock is determined through lock-class. To solve this, lock subclass annotation must be added to the posix_clock rwsem of the vclock. Reported-by: syzbot+7cfb66a237c4a5fb22ad@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7cfb66a237c4a5fb22ad Fixes: 73f37068d540 ("ptp: support ptp physical/virtual clocks conversion") Signed-off-by: Jeongjun Park Acked-by: Richard Cochran Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250728062649.469882-1-aha310510@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_private.h | 5 +++++ drivers/ptp/ptp_vclock.c | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index a6aad743c282..b352df4cd3f9 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -24,6 +24,11 @@ #define PTP_DEFAULT_MAX_VCLOCKS 20 #define PTP_MAX_CHANNELS 2048 +enum { + PTP_LOCK_PHYSICAL = 0, + PTP_LOCK_VIRTUAL, +}; + struct timestamp_event_queue { struct ptp_extts_event buf[PTP_MAX_TIMESTAMPS]; int head; diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c index 2fdeedd60e21..64c950456517 100644 --- a/drivers/ptp/ptp_vclock.c +++ b/drivers/ptp/ptp_vclock.c @@ -154,6 +154,11 @@ static long ptp_vclock_refresh(struct ptp_clock_info *ptp) return PTP_VCLOCK_REFRESH_INTERVAL; } +static void ptp_vclock_set_subclass(struct ptp_clock *ptp) +{ + lockdep_set_subclass(&ptp->clock.rwsem, PTP_LOCK_VIRTUAL); +} + static const struct ptp_clock_info ptp_vclock_info = { .owner = THIS_MODULE, .name = "ptp virtual clock", @@ -213,6 +218,8 @@ struct ptp_vclock *ptp_vclock_register(struct ptp_clock *pclock) return NULL; } + ptp_vclock_set_subclass(vclock->clock); + timecounter_init(&vclock->tc, &vclock->cc, 0); ptp_schedule_worker(vclock->clock, PTP_VCLOCK_REFRESH_INTERVAL); From 6a912e8aa2b2fba2519e93a2eac197d16f137c9a Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 3 Jul 2025 15:39:02 +0800 Subject: [PATCH 1887/2411] ext4: show the default enabled i_version option Display `i_version` in `/proc/fs/ext4/sdx/options`, even though it's default enabled. This aids users managing multi-version scenarios and simplifies debugging. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://patch.msgid.link/20250703073903.6952-1-libaokun@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c7d39da7e733..9203518786e4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2975,6 +2975,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); + if (nodefs && sb->s_flags & SB_I_VERSION) + SEQ_OPTS_PUTS("i_version"); if (nodefs || sbi->s_stripe) SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); if (nodefs || EXT4_MOUNT_DATA_FLAGS & From 6db015fc4b5d5f63a64a193f65d98da3a7fc811d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 7 Aug 2025 16:29:06 -0700 Subject: [PATCH 1888/2411] tls: handle data disappearing from under the TLS ULP TLS expects that it owns the receive queue of the TCP socket. This cannot be guaranteed in case the reader of the TCP socket entered before the TLS ULP was installed, or uses some non-standard read API (eg. zerocopy ones). Replace the WARN_ON() and a buggy early exit (which leaves anchor pointing to a freed skb) with real error handling. Wipe the parsing state and tell the reader to retry. We already reload the anchor every time we (re)acquire the socket lock, so the only condition we need to avoid is an out of bounds read (not having enough bytes in the socket for previously parsed record len). If some data was read from under TLS but there's enough in the queue we'll reload and decrypt what is most likely not a valid TLS record. Leading to some undefined behavior from TLS perspective (corrupting a stream? missing an alert? missing an attack?) but no kernel crash should take place. Reported-by: William Liu Reported-by: Savino Dicanosa Link: https://lore.kernel.org/tFjq_kf7sWIG3A7CrCg_egb8CVsT_gsmHAK0_wxDPJXfIzxFAMxqmLwp3MlU5EHiet0AwwJldaaFdgyHpeIUCS-3m3llsmRzp9xIOBR4lAI=@syst3mfailure.io Fixes: 84c61fe1a75b ("tls: rx: do not use the standard strparser") Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250807232907.600366-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/tls/tls.h | 2 +- net/tls/tls_strp.c | 11 ++++++++--- net/tls/tls_sw.c | 3 ++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/net/tls/tls.h b/net/tls/tls.h index 774859b63f0d..4e077068e6d9 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -196,7 +196,7 @@ void tls_strp_msg_done(struct tls_strparser *strp); int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb); void tls_rx_msg_ready(struct tls_strparser *strp); -void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh); +bool tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh); int tls_strp_msg_cow(struct tls_sw_context_rx *ctx); struct sk_buff *tls_strp_msg_detach(struct tls_sw_context_rx *ctx); int tls_strp_msg_hold(struct tls_strparser *strp, struct sk_buff_head *dst); diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index 095cf31bae0b..d71643b494a1 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -475,7 +475,7 @@ static void tls_strp_load_anchor_with_queue(struct tls_strparser *strp, int len) strp->stm.offset = offset; } -void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) +bool tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) { struct strp_msg *rxm; struct tls_msg *tlm; @@ -484,8 +484,11 @@ void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) DEBUG_NET_WARN_ON_ONCE(!strp->stm.full_len); if (!strp->copy_mode && force_refresh) { - if (WARN_ON(tcp_inq(strp->sk) < strp->stm.full_len)) - return; + if (unlikely(tcp_inq(strp->sk) < strp->stm.full_len)) { + WRITE_ONCE(strp->msg_ready, 0); + memset(&strp->stm, 0, sizeof(strp->stm)); + return false; + } tls_strp_load_anchor_with_queue(strp, strp->stm.full_len); } @@ -495,6 +498,8 @@ void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) rxm->offset = strp->stm.offset; tlm = tls_msg(strp->anchor); tlm->control = strp->mark; + + return true; } /* Called with lock held on lower socket */ diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 549d1ea01a72..51c98a007dda 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1384,7 +1384,8 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, return sock_intr_errno(timeo); } - tls_strp_msg_load(&ctx->strp, released); + if (unlikely(!tls_strp_msg_load(&ctx->strp, released))) + return tls_rx_rec_wait(sk, psock, nonblock, false); return 1; } From d7e82594a45c5cb270940ac469846e8026c7db0f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 7 Aug 2025 16:29:07 -0700 Subject: [PATCH 1889/2411] selftests: tls: test TCP stealing data from under the TLS socket Check a race where data disappears from the TCP socket after TLS signaled that its ready to receive. ok 6 global.data_steal # RUN tls_basic.base_base ... # OK tls_basic.base_base Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250807232907.600366-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 63 +++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 5ded3b3a7538..d8cfcf9bb825 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -2708,6 +2708,69 @@ TEST(prequeue) { close(cfd); } +TEST(data_steal) { + struct tls_crypto_info_keys tls; + char buf[20000], buf2[20000]; + struct sockaddr_in addr; + int sfd, cfd, ret, fd; + int pid, status; + socklen_t len; + + len = sizeof(addr); + memrnd(buf, sizeof(buf)); + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls, 0); + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + + fd = socket(AF_INET, SOCK_STREAM, 0); + sfd = socket(AF_INET, SOCK_STREAM, 0); + + ASSERT_EQ(bind(sfd, &addr, sizeof(addr)), 0); + ASSERT_EQ(listen(sfd, 10), 0); + ASSERT_EQ(getsockname(sfd, &addr, &len), 0); + ASSERT_EQ(connect(fd, &addr, sizeof(addr)), 0); + ASSERT_GE(cfd = accept(sfd, &addr, &len), 0); + close(sfd); + + ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + if (ret) { + ASSERT_EQ(errno, ENOENT); + SKIP(return, "no TLS support"); + } + ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0); + + /* Spawn a child and get it into the read wait path of the underlying + * TCP socket. + */ + pid = fork(); + ASSERT_GE(pid, 0); + if (!pid) { + EXPECT_EQ(recv(cfd, buf, sizeof(buf), MSG_WAITALL), + sizeof(buf)); + exit(!__test_passed(_metadata)); + } + + usleep(2000); + ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0); + ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0); + + EXPECT_EQ(send(fd, buf, sizeof(buf), 0), sizeof(buf)); + usleep(2000); + EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT), -1); + /* Don't check errno, the error will be different depending + * on what random bytes TLS interpreted as the record length. + */ + + close(fd); + close(cfd); + + EXPECT_EQ(wait(&status), pid); + EXPECT_EQ(status, 0); +} + static void __attribute__((constructor)) fips_check(void) { int res; FILE *f; From f2326fd14a224e4cccbab89e14c52279ff79b7ec Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 3 Jul 2025 15:39:03 +0800 Subject: [PATCH 1890/2411] ext4: preserve SB_I_VERSION on remount IMA testing revealed that after an ext4 remount, file accesses triggered full measurements even without modifications, instead of skipping as expected when i_version is unchanged. Debugging showed `SB_I_VERSION` was cleared in reconfigure_super() during remount due to commit 1ff20307393e ("ext4: unconditionally enable the i_version counter") removing the fix from commit 960e0ab63b2e ("ext4: fix i_version handling on remount"). To rectify this, `SB_I_VERSION` is always set for `fc->sb_flags` in ext4_init_fs_context(), instead of `sb->s_flags` in __ext4_fill_super(), ensuring it persists across all mounts. Cc: stable@kernel.org Fixes: 1ff20307393e ("ext4: unconditionally enable the i_version counter") Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://patch.msgid.link/20250703073903.6952-2-libaokun@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9203518786e4..ed1b36bd51c8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1998,6 +1998,9 @@ int ext4_init_fs_context(struct fs_context *fc) fc->fs_private = ctx; fc->ops = &ext4_context_ops; + /* i_version is always enabled now */ + fc->sb_flags |= SB_I_VERSION; + return 0; } @@ -5316,9 +5319,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); - /* i_version is always enabled now */ - sb->s_flags |= SB_I_VERSION; - /* HSM events are allowed by default. */ sb->s_iflags |= SB_I_ALLOW_HSM; From b4cc4a4077268522e3d0d34de4b2dc144e2330fa Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Wed, 16 Jul 2025 19:36:42 -0600 Subject: [PATCH 1891/2411] ext4: check fast symlink for ea_inode correctly The check for a fast symlink in the presence of only an external xattr inode is incorrect. If a fast symlink does not have an xattr block (i_file_acl == 0), but does have an external xattr inode that increases inode i_blocks, then the check for a fast symlink will incorrectly fail and __ext4_iget()->ext4_ind_check_inode() will report the inode is corrupt when it "validates" i_data[] on the next read: # ln -s foo /mnt/tmp/bar # setfattr -h -n trusted.test \ -v "$(yes | head -n 4000)" /mnt/tmp/bar # umount /mnt/tmp # mount /mnt/tmp # ls -l /mnt/tmp ls: cannot access '/mnt/tmp/bar': Structure needs cleaning total 4 ? l?????????? ? ? ? ? ? bar # dmesg | tail -1 EXT4-fs error (device dm-8): __ext4_iget:5098: inode #24578: block 7303014: comm ls: invalid block (note that "block 7303014" = 0x6f6f66 = "foo" in LE order). ext4_inode_is_fast_symlink() should check the superblock EXT4_FEATURE_INCOMPAT_EA_INODE feature flag, not the inode EXT4_EA_INODE_FL, since the latter is only set on the xattr inode itself, and not on the inode that uses this xattr. Cc: stable@vger.kernel.org Fixes: fc82228a5e38 ("ext4: support fast symlinks from ext3 file systems") Signed-off-by: Andreas Dilger Reviewed-by: Li Dongyang Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin Reviewed-on: https://review.whamcloud.com/59879 Lustre-bug-id: https://jira.whamcloud.com/browse/LU-19121 Link: https://patch.msgid.link/20250717063709.757077-1-adilger@dilger.ca Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index eeccb6f588f9..731a800d9c14 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -146,7 +146,7 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, */ int ext4_inode_is_fast_symlink(struct inode *inode) { - if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { + if (!ext4_has_feature_ea_inode(inode->i_sb)) { int ea_blocks = EXT4_I(inode)->i_file_acl ? EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; From f3fbaa74d999c16b5caeca779e6d7e6e6e7feed8 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 21 Jul 2025 22:09:02 +0200 Subject: [PATCH 1892/2411] ext4: remove useless if check This if branch is only jumping to 'out' which is defined just after the branch itself. Hence this is if-check is a no-op and can be removed. Address-Coverity-ID: 1647981 ("Incorrect expression (IDENTICAL_BRANCHES)") Signed-off-by: Antonio Quartulli Link: https://patch.msgid.link/20250721200902.1071-1-antonio@mandelbit.com Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index d83f91b62317..01f379f5fd04 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2984,8 +2984,6 @@ int ext4_init_new_dir(handle_t *handle, struct inode *dir, return PTR_ERR(dir_block); de = (struct ext4_dir_entry_2 *)dir_block->b_data; err = ext4_init_dirblock(handle, inode, dir_block, dir->i_ino, NULL, 0); - if (err) - goto out; out: brelse(dir_block); return err; From 59d8731c887bf2f5bb8406ace26cbb6f6b36d6cc Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 31 Jul 2025 16:00:07 -0400 Subject: [PATCH 1893/2411] ext4: fix unused variable warning in ext4_init_new_dir Cc: stable@kernel.org Fixes: 90f097b1403f ("ext4: refactor the inline directory conversion and...") Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 01f379f5fd04..2cd36f59c9e3 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2965,7 +2965,6 @@ int ext4_init_new_dir(handle_t *handle, struct inode *dir, struct inode *inode) { struct buffer_head *dir_block = NULL; - struct ext4_dir_entry_2 *de; ext4_lblk_t block = 0; int err; @@ -2982,7 +2981,6 @@ int ext4_init_new_dir(handle_t *handle, struct inode *dir, dir_block = ext4_append(handle, inode, &block); if (IS_ERR(dir_block)) return PTR_ERR(dir_block); - de = (struct ext4_dir_entry_2 *)dir_block->b_data; err = ext4_init_dirblock(handle, inode, dir_block, dir->i_ino, NULL, 0); out: brelse(dir_block); From 4ba97589ed19210ff808929052696f5636139823 Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Sun, 3 Aug 2025 18:22:41 +0800 Subject: [PATCH 1894/2411] ext4: remove redundant __GFP_NOWARN GFP_NOWAIT already includes __GFP_NOWARN, so let's remove the redundant __GFP_NOWARN. Signed-off-by: Qianfeng Rong Link: https://patch.msgid.link/20250803102243.623705-4-rongqianfeng@vivo.com Signed-off-by: Theodore Ts'o --- fs/ext4/page-io.c | 2 +- fs/ext4/super.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 3d8b0f6d2dea..39abfeec5f36 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -547,7 +547,7 @@ int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *folio, * first page of the bio. Otherwise it can deadlock. */ if (io->io_bio) - gfp_flags = GFP_NOWAIT | __GFP_NOWARN; + gfp_flags = GFP_NOWAIT; retry_encrypt: bounce_page = fscrypt_encrypt_pagecache_blocks(folio, enc_bytes, 0, gfp_flags); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ed1b36bd51c8..b16ffa507b84 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -268,7 +268,7 @@ struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb, void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block) { struct buffer_head *bh = bdev_getblk(sb->s_bdev, block, - sb->s_blocksize, GFP_NOWAIT | __GFP_NOWARN); + sb->s_blocksize, GFP_NOWAIT); if (likely(bh)) { if (trylock_buffer(bh)) From bae76c035bf0852844151e68098c9b7cd63ef238 Mon Sep 17 00:00:00 2001 From: Ojaswin Mujoo Date: Tue, 5 Aug 2025 14:00:30 +0530 Subject: [PATCH 1895/2411] ext4: fix fsmap end of range reporting with bigalloc With bigalloc enabled, the logic to report last extent has a bug since we try to use cluster units instead of block units. This can cause an issue where extra incorrect entries might be returned back to the user. This was flagged by generic/365 with 64k bs and -O bigalloc. ** Details of issue ** The issue was noticed on 5G 64k blocksize FS with -O bigalloc which has only 1 bg. $ xfs_io -c "fsmap -d" /mnt/scratch 0: 253:48 [0..127]: static fs metadata 128 /* sb */ 1: 253:48 [128..255]: special 102:1 128 /* gdt */ 3: 253:48 [256..383]: special 102:3 128 /* block bitmap */ 4: 253:48 [384..2303]: unknown 1920 /* flex bg empty space */ 5: 253:48 [2304..2431]: special 102:4 128 /* inode bitmap */ 6: 253:48 [2432..4351]: unknown 1920 /* flex bg empty space */ 7: 253:48 [4352..6911]: inodes 2560 8: 253:48 [6912..538623]: unknown 531712 9: 253:48 [538624..10485759]: free space 9947136 The issue can be seen with: $ xfs_io -c "fsmap -d 0 3" /mnt/scratch 0: 253:48 [0..127]: static fs metadata 128 1: 253:48 [384..2047]: unknown 1664 Only the first entry was expected to be returned but we get 2. This is because: ext4_getfsmap_datadev() first_cluster, last_cluster = 0 ... info->gfi_last = true; ext4_getfsmap_datadev_helper(sb, end_ag, last_cluster + 1, 0, info); fsb = C2B(1) = 16 fslen = 0 ... /* Merge in any relevant extents from the meta_list */ list_for_each_entry_safe(p, tmp, &info->gfi_meta_list, fmr_list) { ... // since fsb = 16, considers all metadata which starts before 16 blockno iter 1: error = ext4_getfsmap_helper(sb, info, p); // p = sb (0,1), nop info->gfi_next_fsblk = 1 iter 2: error = ext4_getfsmap_helper(sb, info, p); // p = gdt (1,2), nop info->gfi_next_fsblk = 2 iter 3: error = ext4_getfsmap_helper(sb, info, p); // p = blk bitmap (2,3), nop info->gfi_next_fsblk = 3 iter 4: error = ext4_getfsmap_helper(sb, info, p); // p = ino bitmap (18,19) if (rec_blk > info->gfi_next_fsblk) { // (18 > 3) // emits an extra entry ** BUG ** } } Fix this by directly calling ext4_getfsmap_datadev() with a dummy record that has fmr_physical set to (end_fsb + 1) instead of last_cluster + 1. By using the block instead of cluster we get the correct behavior. Replacing ext4_getfsmap_datadev_helper() with ext4_getfsmap_helper() is okay since the gfi_lastfree and metadata checks in ext4_getfsmap_datadev_helper() are anyways redundant when we only want to emit the last allocated block of the range, as we have already taken care of emitting metadata and any last free blocks. Cc: stable@kernel.org Reported-by: Disha Goel Fixes: 4a622e4d477b ("ext4: fix FS_IOC_GETFSMAP handling") Signed-off-by: Ojaswin Mujoo Reviewed-by: Darrick J. Wong Link: https://patch.msgid.link/e7472c8535c9c5ec10f425f495366864ea12c9da.1754377641.git.ojaswin@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/fsmap.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c index 383c6edea6dd..9d63c39f6077 100644 --- a/fs/ext4/fsmap.c +++ b/fs/ext4/fsmap.c @@ -526,6 +526,7 @@ static int ext4_getfsmap_datadev(struct super_block *sb, ext4_group_t end_ag; ext4_grpblk_t first_cluster; ext4_grpblk_t last_cluster; + struct ext4_fsmap irec; int error = 0; bofs = le32_to_cpu(sbi->s_es->s_first_data_block); @@ -609,10 +610,18 @@ static int ext4_getfsmap_datadev(struct super_block *sb, goto err; } - /* Report any gaps at the end of the bg */ + /* + * The dummy record below will cause ext4_getfsmap_helper() to report + * any allocated blocks at the end of the range. + */ + irec.fmr_device = 0; + irec.fmr_physical = end_fsb + 1; + irec.fmr_length = 0; + irec.fmr_owner = EXT4_FMR_OWN_FREE; + irec.fmr_flags = 0; + info->gfi_last = true; - error = ext4_getfsmap_datadev_helper(sb, end_ag, last_cluster + 1, - 0, info); + error = ext4_getfsmap_helper(sb, info, &irec); if (error) goto err; From 3ffbdd1f1165f1b2d6a94d1b1aabef57120deaf7 Mon Sep 17 00:00:00 2001 From: Ojaswin Mujoo Date: Tue, 5 Aug 2025 14:00:31 +0530 Subject: [PATCH 1896/2411] ext4: fix reserved gdt blocks handling in fsmap In some cases like small FSes with no meta_bg and where the resize doesn't need extra gdt blocks as it can fit in the current one, s_reserved_gdt_blocks is set as 0, which causes fsmap to emit a 0 length entry, which is incorrect. $ mkfs.ext4 -b 65536 -O bigalloc /dev/sda 5G $ mount /dev/sda /mnt/scratch $ xfs_io -c "fsmap -d" /mnt/scartch 0: 253:48 [0..127]: static fs metadata 128 1: 253:48 [128..255]: special 102:1 128 2: 253:48 [256..255]: special 102:2 0 <---- 0 len entry 3: 253:48 [256..383]: special 102:3 128 Fix this by adding a check for this case. Cc: stable@kernel.org Fixes: 0c9ec4beecac ("ext4: support GETFSMAP ioctls") Signed-off-by: Ojaswin Mujoo Reviewed-by: Darrick J. Wong Link: https://patch.msgid.link/08781b796453a5770112aa96ad14c864fbf31935.1754377641.git.ojaswin@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/fsmap.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c index 9d63c39f6077..91185c40f755 100644 --- a/fs/ext4/fsmap.c +++ b/fs/ext4/fsmap.c @@ -393,6 +393,14 @@ static unsigned int ext4_getfsmap_find_sb(struct super_block *sb, /* Reserved GDT blocks */ if (!ext4_has_feature_meta_bg(sb) || metagroup < first_meta_bg) { len = le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); + + /* + * mkfs.ext4 can set s_reserved_gdt_blocks as 0 in some cases, + * check for that. + */ + if (!len) + return 0; + error = ext4_getfsmap_fill(meta_list, fsb, len, EXT4_FMR_OWN_RESV_GDT); if (error) From c5e104a91e7b6fa12c1dc2d8bf84abb7ef9b89ad Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 7 Aug 2025 09:35:20 -0400 Subject: [PATCH 1897/2411] ext4: don't try to clear the orphan_present feature block device is r/o When the file system is frozen in preparation for taking an LVM snapshot, the journal is checkpointed and if the orphan_file feature is enabled, and the orphan file is empty, we clear the orphan_present feature flag. But if there are pending inodes that need to be removed the orphan_present feature flag can't be cleared. The problem comes if the block device is read-only. In that case, we can't process the orphan inode list, so it is skipped in ext4_orphan_cleanup(). But then in ext4_mark_recovery_complete(), this results in the ext4 error "Orphan file not empty on read-only fs" firing and the file system mount is aborted. Fix this by clearing the needs_recovery flag in the block device is read-only. We do this after the call to ext4_load_and_init-journal() since there are some error checks need to be done in case the journal needs to be replayed and the block device is read-only, or if the block device containing the externa journal is read-only, etc. Cc: stable@kernel.org Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1108271 Cc: stable@vger.kernel.org Fixes: 02f310fcf47f ("ext4: Speedup ext4 orphan inode handling") Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b16ffa507b84..699c15db28a8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5416,6 +5416,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) err = ext4_load_and_init_journal(sb, es, ctx); if (err) goto failed_mount3a; + if (bdev_read_only(sb->s_bdev)) + needs_recovery = 0; } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) && ext4_has_feature_journal_needs_recovery(sb)) { ext4_msg(sb, KERN_ERR, "required journal recovery " From 02c7f7219ac0e2277b3379a3a0e9841ef464b6d4 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Mon, 11 Aug 2025 14:45:32 +0800 Subject: [PATCH 1898/2411] ext4: fix hole length calculation overflow in non-extent inodes In a filesystem with a block size larger than 4KB, the hole length calculation for a non-extent inode in ext4_ind_map_blocks() can easily exceed INT_MAX. Then it could return a zero length hole and trigger the following waring and infinite in the iomap infrastructure. ------------[ cut here ]------------ WARNING: CPU: 3 PID: 434101 at fs/iomap/iter.c:34 iomap_iter_done+0x148/0x190 CPU: 3 UID: 0 PID: 434101 Comm: fsstress Not tainted 6.16.0-rc7+ #128 PREEMPT(voluntary) Hardware name: QEMU KVM Virtual Machine, BIOS unknown 2/2/2022 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : iomap_iter_done+0x148/0x190 lr : iomap_iter+0x174/0x230 sp : ffff8000880af740 x29: ffff8000880af740 x28: ffff0000db8e6840 x27: 0000000000000000 x26: 0000000000000000 x25: ffff8000880af830 x24: 0000004000000000 x23: 0000000000000002 x22: 000001bfdbfa8000 x21: ffffa6a41c002e48 x20: 0000000000000001 x19: ffff8000880af808 x18: 0000000000000000 x17: 0000000000000000 x16: ffffa6a495ee6cd0 x15: 0000000000000000 x14: 00000000000003d4 x13: 00000000fa83b2da x12: 0000b236fc95f18c x11: ffffa6a4978b9c08 x10: 0000000000001da0 x9 : ffffa6a41c1a2a44 x8 : ffff8000880af5c8 x7 : 0000000001000000 x6 : 0000000000000000 x5 : 0000000000000004 x4 : 000001bfdbfa8000 x3 : 0000000000000000 x2 : 0000000000000000 x1 : 0000004004030000 x0 : 0000000000000000 Call trace: iomap_iter_done+0x148/0x190 (P) iomap_iter+0x174/0x230 iomap_fiemap+0x154/0x1d8 ext4_fiemap+0x110/0x140 [ext4] do_vfs_ioctl+0x4b8/0xbc0 __arm64_sys_ioctl+0x8c/0x120 invoke_syscall+0x6c/0x100 el0_svc_common.constprop.0+0x48/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x38/0x120 el0t_64_sync_handler+0x10c/0x138 el0t_64_sync+0x198/0x1a0 ---[ end trace 0000000000000000 ]--- Cc: stable@kernel.org Fixes: facab4d9711e ("ext4: return hole from ext4_map_blocks()") Reported-by: Qu Wenruo Closes: https://lore.kernel.org/linux-ext4/9b650a52-9672-4604-a765-bb6be55d1e4a@gmx.com/ Tested-by: Qu Wenruo Signed-off-by: Zhang Yi Link: https://patch.msgid.link/20250811064532.1788289-1-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/indirect.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 7de327fa7b1c..d45124318200 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -539,7 +539,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, int indirect_blks; int blocks_to_boundary = 0; int depth; - int count = 0; + u64 count = 0; ext4_fsblk_t first_block = 0; trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); @@ -588,7 +588,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, count++; /* Fill in size of a hole we found */ map->m_pblk = 0; - map->m_len = min_t(unsigned int, map->m_len, count); + map->m_len = umin(map->m_len, count); goto cleanup; } From 76dba1fe277f6befd6ef650e1946f626c547387a Mon Sep 17 00:00:00 2001 From: Liao Yuanhong Date: Mon, 11 Aug 2025 20:58:16 +0800 Subject: [PATCH 1899/2411] ext4: use kmalloc_array() for array space allocation Replace kmalloc(size * sizeof) with kmalloc_array() for safer memory allocation and overflow prevention. Cc: stable@kernel.org Signed-off-by: Liao Yuanhong Link: https://patch.msgid.link/20250811125816.570142-1-liaoyuanhong@vivo.com Signed-off-by: Theodore Ts'o --- fs/ext4/orphan.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c index 7c7f792ad6ab..524d4658fa40 100644 --- a/fs/ext4/orphan.c +++ b/fs/ext4/orphan.c @@ -589,8 +589,9 @@ int ext4_init_orphan_info(struct super_block *sb) } oi->of_blocks = inode->i_size >> sb->s_blocksize_bits; oi->of_csum_seed = EXT4_I(inode)->i_csum_seed; - oi->of_binfo = kmalloc(oi->of_blocks*sizeof(struct ext4_orphan_block), - GFP_KERNEL); + oi->of_binfo = kmalloc_array(oi->of_blocks, + sizeof(struct ext4_orphan_block), + GFP_KERNEL); if (!oi->of_binfo) { ret = -ENOMEM; goto out_put; From 30c1d25b9870d551be42535067d5481668b5e6f3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Aug 2025 12:26:10 +0200 Subject: [PATCH 1900/2411] netfilter: nft_set_pipapo: fix null deref for empty set Blamed commit broke the check for a null scratch map: - if (unlikely(!m || !*raw_cpu_ptr(m->scratch))) + if (unlikely(!raw_cpu_ptr(m->scratch))) This should have been "if (!*raw_ ...)". Use the pattern of the avx2 version which is more readable. This can only be reproduced if avx2 support isn't available. Fixes: d8d871a35ca9 ("netfilter: nft_set_pipapo: merge pipapo_get/lookup") Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 1a19649c2851..9a10251228fd 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -426,10 +426,9 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, local_bh_disable(); - if (unlikely(!raw_cpu_ptr(m->scratch))) - goto out; - scratch = *raw_cpu_ptr(m->scratch); + if (unlikely(!scratch)) + goto out; map_index = scratch->map_index; From c0a23bbc98e93704a1f4fb5e7e7bb2d7c0fb6eb3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 29 Jul 2025 14:26:11 +0200 Subject: [PATCH 1901/2411] ipvs: Fix estimator kthreads preferred affinity The estimator kthreads' affinity are defined by sysctl overwritten preferences and applied through a plain call to the scheduler's affinity API. However since the introduction of managed kthreads preferred affinity, such a practice shortcuts the kthreads core code which eventually overwrites the target to the default unbound affinity. Fix this with using the appropriate kthread's API. Fixes: d1a89197589c ("kthread: Default affine kthread to its preferred NUMA node") Signed-off-by: Frederic Weisbecker Acked-by: Julian Anastasov Signed-off-by: Florian Westphal --- include/net/ip_vs.h | 13 +++++++++++++ kernel/kthread.c | 1 + net/netfilter/ipvs/ip_vs_est.c | 3 ++- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index ff406ef4fd4a..29a36709e7f3 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1163,6 +1163,14 @@ static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs) return housekeeping_cpumask(HK_TYPE_KTHREAD); } +static inline const struct cpumask *sysctl_est_preferred_cpulist(struct netns_ipvs *ipvs) +{ + if (ipvs->est_cpulist_valid) + return ipvs->sysctl_est_cpulist; + else + return NULL; +} + static inline int sysctl_est_nice(struct netns_ipvs *ipvs) { return ipvs->sysctl_est_nice; @@ -1270,6 +1278,11 @@ static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs) return housekeeping_cpumask(HK_TYPE_KTHREAD); } +static inline const struct cpumask *sysctl_est_preferred_cpulist(struct netns_ipvs *ipvs) +{ + return NULL; +} + static inline int sysctl_est_nice(struct netns_ipvs *ipvs) { return IPVS_EST_NICE; diff --git a/kernel/kthread.c b/kernel/kthread.c index 0e98b228a8ef..31b072e8d427 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -893,6 +893,7 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask) return ret; } +EXPORT_SYMBOL_GPL(kthread_affine_preferred); /* * Re-affine kthreads according to their preferences diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index f821ad2e19b3..15049b826732 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -265,7 +265,8 @@ int ip_vs_est_kthread_start(struct netns_ipvs *ipvs, } set_user_nice(kd->task, sysctl_est_nice(ipvs)); - set_cpus_allowed_ptr(kd->task, sysctl_est_cpulist(ipvs)); + if (sysctl_est_preferred_cpulist(ipvs)) + kthread_affine_preferred(kd->task, sysctl_est_preferred_cpulist(ipvs)); pr_info("starting estimator thread %d...\n", kd->id); wake_up_process(kd->task); From cf5fb87fcdaaaafec55dcc0dc5a9e15ead343973 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 13 Aug 2025 02:38:50 +0200 Subject: [PATCH 1902/2411] netfilter: nf_tables: reject duplicate device on updates A chain/flowtable update with duplicated devices in the same batch is possible. Unfortunately, netdev event path only removes the first device that is found, leaving unregistered the hook of the duplicated device. Check if a duplicated device exists in the transaction batch, bail out with EEXIST in such case. WARNING is hit when unregistering the hook: [49042.221275] WARNING: CPU: 4 PID: 8425 at net/netfilter/core.c:340 nf_hook_entry_head+0xaa/0x150 [49042.221375] CPU: 4 UID: 0 PID: 8425 Comm: nft Tainted: G S 6.16.0+ #170 PREEMPT(full) [...] [49042.221382] RIP: 0010:nf_hook_entry_head+0xaa/0x150 Fixes: 78d9f48f7f44 ("netfilter: nf_tables: add devices to existing flowtable") Fixes: b9703ed44ffb ("netfilter: nf_tables: support for adding new devices to an existing netdev chain") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 13d0ed9d1895..58c5425d61c2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2803,6 +2803,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, struct nft_chain *chain = ctx->chain; struct nft_chain_hook hook = {}; struct nft_stats __percpu *stats = NULL; + struct nftables_pernet *nft_net; struct nft_hook *h, *next; struct nf_hook_ops *ops; struct nft_trans *trans; @@ -2845,6 +2846,20 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, if (nft_hook_list_find(&basechain->hook_list, h)) { list_del(&h->list); nft_netdev_hook_free(h); + continue; + } + + nft_net = nft_pernet(ctx->net); + list_for_each_entry(trans, &nft_net->commit_list, list) { + if (trans->msg_type != NFT_MSG_NEWCHAIN || + trans->table != ctx->table || + !nft_trans_chain_update(trans)) + continue; + + if (nft_hook_list_find(&nft_trans_chain_hooks(trans), h)) { + nft_chain_release_hook(&hook); + return -EEXIST; + } } } } else { @@ -9060,6 +9075,7 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, { const struct nlattr * const *nla = ctx->nla; struct nft_flowtable_hook flowtable_hook; + struct nftables_pernet *nft_net; struct nft_hook *hook, *next; struct nf_hook_ops *ops; struct nft_trans *trans; @@ -9076,6 +9092,20 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, if (nft_hook_list_find(&flowtable->hook_list, hook)) { list_del(&hook->list); nft_netdev_hook_free(hook); + continue; + } + + nft_net = nft_pernet(ctx->net); + list_for_each_entry(trans, &nft_net->commit_list, list) { + if (trans->msg_type != NFT_MSG_NEWFLOWTABLE || + trans->table != ctx->table || + !nft_trans_flowtable_update(trans)) + continue; + + if (nft_hook_list_find(&nft_trans_flowtable_hooks(trans), hook)) { + err = -EEXIST; + goto err_flowtable_update_hook; + } } } From d045c3154080a04beb07726fa311b89d21608981 Mon Sep 17 00:00:00 2001 From: Pratyush Brahma Date: Wed, 13 Aug 2025 12:51:02 +0530 Subject: [PATCH 1903/2411] mm/numa_memblks: Use pr_debug instead of printk(KERN_DEBUG) Replace the direct usage of printk(KERN_DEBUG ...) with pr_debug(...) to align with the consistent `pr_*` API usage within the file. Reviewed-by: Joshua Hahn Signed-off-by: Pratyush Brahma Link: https://lore.kernel.org/r/20250813-numa-dbg-v3-1-1dcd1234fcc5@oss.qualcomm.com Signed-off-by: Mike Rapoport (Microsoft) --- mm/numa_memblks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/numa_memblks.c b/mm/numa_memblks.c index 541a99c4071a..de626525a87c 100644 --- a/mm/numa_memblks.c +++ b/mm/numa_memblks.c @@ -76,7 +76,7 @@ static int __init numa_alloc_distance(void) for (j = 0; j < cnt; j++) numa_distance[i * cnt + j] = i == j ? LOCAL_DISTANCE : REMOTE_DISTANCE; - printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt); + pr_debug("NUMA: Initialized distance table, cnt=%d\n", cnt); return 0; } From 21924af67d69d7c9fdaf845be69043cfe75196a1 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 5 Aug 2025 00:10:02 +0000 Subject: [PATCH 1904/2411] locking: Fix __clear_task_blocked_on() warning from __ww_mutex_wound() path The __clear_task_blocked_on() helper added a number of sanity checks ensuring we hold the mutex wait lock and that the task we are clearing blocked_on pointer (if set) matches the mutex. However, there is an edge case in the _ww_mutex_wound() logic where we need to clear the blocked_on pointer for the task that owns the mutex, not the task that is waiting on the mutex. For this case the sanity checks aren't valid, so handle this by allowing a NULL lock to skip the additional checks. K Prateek Nayak and Maarten Lankhorst also pointed out that in this case where we don't hold the owner's mutex wait_lock, we need to be a bit more careful using READ_ONCE/WRITE_ONCE in both the __clear_task_blocked_on() and __set_task_blocked_on() implementations to avoid accidentally tripping WARN_ONs if two instances race. So do that here as well. This issue was easier to miss, I realized, as the test-ww_mutex driver only exercises the wait-die class of ww_mutexes. I've sent a patch[1] to address this so the logic will be easier to test. [1]: https://lore.kernel.org/lkml/20250801023358.562525-2-jstultz@google.com/ Fixes: a4f0b6fef4b0 ("locking/mutex: Add p->blocked_on wrappers for correctness checks") Closes: https://lore.kernel.org/lkml/68894443.a00a0220.26d0e1.0015.GAE@google.com/ Reported-by: syzbot+602c4720aed62576cd79@syzkaller.appspotmail.com Reported-by: Maarten Lankhorst Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: K Prateek Nayak Acked-by: Maarten Lankhorst Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20250805001026.2247040-1-jstultz@google.com --- include/linux/sched.h | 29 +++++++++++++++++------------ kernel/locking/ww_mutex.h | 6 +++++- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 40d2fa90df42..62103dd6a48e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2144,6 +2144,8 @@ static inline struct mutex *__get_task_blocked_on(struct task_struct *p) static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m) { + struct mutex *blocked_on = READ_ONCE(p->blocked_on); + WARN_ON_ONCE(!m); /* The task should only be setting itself as blocked */ WARN_ON_ONCE(p != current); @@ -2154,8 +2156,8 @@ static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m) * with a different mutex. Note, setting it to the same * lock repeatedly is ok. */ - WARN_ON_ONCE(p->blocked_on && p->blocked_on != m); - p->blocked_on = m; + WARN_ON_ONCE(blocked_on && blocked_on != m); + WRITE_ONCE(p->blocked_on, m); } static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m) @@ -2166,16 +2168,19 @@ static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m) static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m) { - WARN_ON_ONCE(!m); - /* Currently we serialize blocked_on under the mutex::wait_lock */ - lockdep_assert_held_once(&m->wait_lock); - /* - * There may be cases where we re-clear already cleared - * blocked_on relationships, but make sure we are not - * clearing the relationship with a different lock. - */ - WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m); - p->blocked_on = NULL; + if (m) { + struct mutex *blocked_on = READ_ONCE(p->blocked_on); + + /* Currently we serialize blocked_on under the mutex::wait_lock */ + lockdep_assert_held_once(&m->wait_lock); + /* + * There may be cases where we re-clear already cleared + * blocked_on relationships, but make sure we are not + * clearing the relationship with a different lock. + */ + WARN_ON_ONCE(blocked_on && blocked_on != m); + } + WRITE_ONCE(p->blocked_on, NULL); } static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m) diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h index 086fd5487ca7..31a785afee6c 100644 --- a/kernel/locking/ww_mutex.h +++ b/kernel/locking/ww_mutex.h @@ -342,8 +342,12 @@ static bool __ww_mutex_wound(struct MUTEX *lock, * When waking up the task to wound, be sure to clear the * blocked_on pointer. Otherwise we can see circular * blocked_on relationships that can't resolve. + * + * NOTE: We pass NULL here instead of lock, because we + * are waking the mutex owner, who may be currently + * blocked on a different mutex. */ - __clear_task_blocked_on(owner, lock); + __clear_task_blocked_on(owner, NULL); wake_q_add(wake_q, owner); } return true; From 89a2d212bdb4bc29bed8e7077abe054b801137ea Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Mon, 11 Aug 2025 13:17:59 -0500 Subject: [PATCH 1905/2411] dma/pool: Ensure DMA_DIRECT_REMAP allocations are decrypted When CONFIG_DMA_DIRECT_REMAP is enabled, atomic pool pages are remapped via dma_common_contiguous_remap() using the supplied pgprot. Currently, the mapping uses pgprot_dmacoherent(PAGE_KERNEL), which leaves the memory encrypted on systems with memory encryption enabled (e.g., ARM CCA Realms). This can cause the DMA layer to fail or crash when accessing the memory, as the underlying physical pages are not configured as expected. Fix this by requesting a decrypted mapping in the vmap() call: pgprot_decrypted(pgprot_dmacoherent(PAGE_KERNEL)) This ensures that atomic pool memory is consistently mapped unencrypted. Cc: stable@vger.kernel.org Signed-off-by: Shanker Donthineni Reviewed-by: Catalin Marinas Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20250811181759.998805-1-sdonthineni@nvidia.com --- kernel/dma/pool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 7b04f7575796..ee45dee33d49 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -102,8 +102,8 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, #ifdef CONFIG_DMA_DIRECT_REMAP addr = dma_common_contiguous_remap(page, pool_size, - pgprot_dmacoherent(PAGE_KERNEL), - __builtin_return_address(0)); + pgprot_decrypted(pgprot_dmacoherent(PAGE_KERNEL)), + __builtin_return_address(0)); if (!addr) goto free_page; #else From 3eb61d7cb74cea2ea697363669fa256937164758 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 15 Jul 2025 10:26:22 +0200 Subject: [PATCH 1906/2411] Revert "drm/amdgpu: Use dma_buf from GEM object instance" This reverts commit 515986100d176663d0a03219a3056e4252f729e6. The dma_buf field in struct drm_gem_object is not stable over the object instance's lifetime. The field becomes NULL when user space releases the final GEM handle on the buffer object. This resulted in a NULL-pointer deref. Workarounds in commit 5307dce878d4 ("drm/gem: Acquire references on GEM handles for framebuffers") and commit f6bfc9afc751 ("drm/framebuffer: Acquire internal references on GEM handles") only solved the problem partially. They especially don't work for buffer objects without a DRM framebuffer associated. Hence, this revert to going back to using .import_attach->dmabuf. Signed-off-by: Thomas Zimmermann Reviewed-by: Simona Vetter Acked-by: Alex Deucher Link: https://lore.kernel.org/r/20250715082635.34974-1-tzimmermann@suse.de --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index ff98c87b2e0b..5743ebb2f1b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -514,7 +514,7 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev, return false; if (drm_gem_is_imported(obj)) { - struct dma_buf *dma_buf = obj->dma_buf; + struct dma_buf *dma_buf = obj->import_attach->dmabuf; if (dma_buf->ops != &amdgpu_dmabuf_ops) /* No XGMI with non AMD GPUs */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 6626a6e64ff5..d1ccbfcf21fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -317,7 +317,8 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, */ if (!vm->is_compute_context || !vm->process_info) return 0; - if (!drm_gem_is_imported(obj) || !dma_buf_is_dynamic(obj->dma_buf)) + if (!drm_gem_is_imported(obj) || + !dma_buf_is_dynamic(obj->import_attach->dmabuf)) return 0; mutex_lock_nested(&vm->process_info->lock, 1); if (!WARN_ON(!vm->process_info->eviction_fence)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5cacf5717016..8777ed8facd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1276,7 +1276,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, struct drm_gem_object *obj = &bo->tbo.base; if (drm_gem_is_imported(obj) && bo_va->is_xgmi) { - struct dma_buf *dma_buf = obj->dma_buf; + struct dma_buf *dma_buf = obj->import_attach->dmabuf; struct drm_gem_object *gobj = dma_buf->priv; struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); From 3c3e9a9f2972b364e8c2cfbfdeb23c6d6be4f87f Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Fri, 25 Jul 2025 18:31:04 -0700 Subject: [PATCH 1907/2411] RDMA/rxe: Flush delayed SKBs while releasing RXE resources When skb packets are sent out, these skb packets still depends on the rxe resources, for example, QP, sk, when these packets are destroyed. If these rxe resources are released when the skb packets are destroyed, the call traces will appear. To avoid skb packets hang too long time in some network devices, a timestamp is added when these skb packets are created. If these skb packets hang too long time in network devices, these network devices can free these skb packets to release rxe resources. Reported-by: syzbot+8425ccfb599521edb153@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=8425ccfb599521edb153 Tested-by: syzbot+8425ccfb599521edb153@syzkaller.appspotmail.com Fixes: 1a633bdc8fd9 ("RDMA/rxe: Let destroy qp succeed with stuck packet") Signed-off-by: Zhu Yanjun Link: https://patch.msgid.link/20250726013104.463570-1-yanjun.zhu@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_net.c | 29 ++++++++--------------------- drivers/infiniband/sw/rxe/rxe_qp.c | 2 +- 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 132a87e52d5c..ac0183a2ff7a 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -345,33 +345,15 @@ int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, static void rxe_skb_tx_dtor(struct sk_buff *skb) { - struct net_device *ndev = skb->dev; - struct rxe_dev *rxe; - unsigned int qp_index; - struct rxe_qp *qp; + struct rxe_qp *qp = skb->sk->sk_user_data; int skb_out; - rxe = rxe_get_dev_from_net(ndev); - if (!rxe && is_vlan_dev(ndev)) - rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev)); - if (WARN_ON(!rxe)) - return; - - qp_index = (int)(uintptr_t)skb->sk->sk_user_data; - if (!qp_index) - return; - - qp = rxe_pool_get_index(&rxe->qp_pool, qp_index); - if (!qp) - goto put_dev; - skb_out = atomic_dec_return(&qp->skb_out); - if (qp->need_req_skb && skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW) + if (unlikely(qp->need_req_skb && + skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) rxe_sched_task(&qp->send_task); rxe_put(qp); -put_dev: - ib_device_put(&rxe->ib_dev); sock_put(skb->sk); } @@ -383,6 +365,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) sock_hold(sk); skb->sk = sk; skb->destructor = rxe_skb_tx_dtor; + rxe_get(pkt->qp); atomic_inc(&pkt->qp->skb_out); if (skb->protocol == htons(ETH_P_IP)) @@ -405,6 +388,7 @@ static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt) sock_hold(sk); skb->sk = sk; skb->destructor = rxe_skb_tx_dtor; + rxe_get(pkt->qp); atomic_inc(&pkt->qp->skb_out); if (skb->protocol == htons(ETH_P_IP)) @@ -497,6 +481,9 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, goto out; } + /* Add time stamp to skb. */ + skb->tstamp = ktime_get(); + skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(ndev)); /* FIXME: hold reference to this netdev until life of this skb. */ diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index f2af3e0aef35..95f1c1c2949d 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -244,7 +244,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk); if (err < 0) return err; - qp->sk->sk->sk_user_data = (void *)(uintptr_t)qp->elem.index; + qp->sk->sk->sk_user_data = qp; /* pick a source UDP port number for this QP based on * the source QPN. this spreads traffic for different QPs From d5c74713f0117d07f91eb48b10bc2ad44e23c9b9 Mon Sep 17 00:00:00 2001 From: Boshi Yu Date: Fri, 25 Jul 2025 13:53:55 +0800 Subject: [PATCH 1908/2411] RDMA/erdma: Fix ignored return value of init_kernel_qp The init_kernel_qp interface may fail. Check its return value and free related resources properly when it does. Fixes: 155055771704 ("RDMA/erdma: Add verbs implementation") Reviewed-by: Cheng Xu Signed-off-by: Boshi Yu Link: https://patch.msgid.link/20250725055410.67520-3-boshiyu@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_verbs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 94c211df09d8..2e01520ca385 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -1031,7 +1031,9 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, if (ret) goto err_out_cmd; } else { - init_kernel_qp(dev, qp, attrs); + ret = init_kernel_qp(dev, qp, attrs); + if (ret) + goto err_out_xa; } qp->attrs.max_send_sge = attrs->cap.max_send_sge; From d4ac86b47563c7895dae28658abd1879d266b2b4 Mon Sep 17 00:00:00 2001 From: Boshi Yu Date: Fri, 25 Jul 2025 13:53:56 +0800 Subject: [PATCH 1909/2411] RDMA/erdma: Fix unset QPN of GSI QP The QPN of the GSI QP was not set, which may cause issues. Set the QPN to 1 when creating the GSI QP. Fixes: 999a0a2e9b87 ("RDMA/erdma: Support UD QPs and UD WRs") Reviewed-by: Cheng Xu Signed-off-by: Boshi Yu Link: https://patch.msgid.link/20250725055410.67520-4-boshiyu@linux.alibaba.com Reviewed-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 2e01520ca385..fdeec33c71da 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -994,6 +994,8 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, old_entry = xa_store(&dev->qp_xa, 1, qp, GFP_KERNEL); if (xa_is_err(old_entry)) ret = xa_err(old_entry); + else + qp->ibqp.qp_num = 1; } else { ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp, XA_LIMIT(1, dev->attrs.max_qp - 1), From f0ba0e7172a222ea6043b61ecd86723c46d7bcf2 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 23 Jul 2025 15:38:10 +0200 Subject: [PATCH 1910/2411] btrfs: zoned: skip ZONE FINISH of conventional zones Don't call ZONE FINISH for conventional zones as this will result in I/O errors. Instead check if the zone that needs finishing is a conventional zone and if yes skip it. Also factor out the actual handling of finishing a single zone into a helper function, as do_zone_finish() is growing ever bigger and the indentations levels are getting higher. Reviewed-by: Naohiro Aota Reviewed-by: Anand Jain Signed-off-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 55 ++++++++++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index db11b5b5f0e6..36de6d0d595f 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2245,6 +2245,40 @@ static void wait_eb_writebacks(struct btrfs_block_group *block_group) rcu_read_unlock(); } +static int call_zone_finish(struct btrfs_block_group *block_group, + struct btrfs_io_stripe *stripe) +{ + struct btrfs_device *device = stripe->dev; + const u64 physical = stripe->physical; + struct btrfs_zoned_device_info *zinfo = device->zone_info; + int ret; + + if (!device->bdev) + return 0; + + if (zinfo->max_active_zones == 0) + return 0; + + if (btrfs_dev_is_sequential(device, physical)) { + unsigned int nofs_flags; + + nofs_flags = memalloc_nofs_save(); + ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, + physical >> SECTOR_SHIFT, + zinfo->zone_size >> SECTOR_SHIFT); + memalloc_nofs_restore(nofs_flags); + + if (ret) + return ret; + } + + if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA)) + zinfo->reserved_active_zones++; + btrfs_dev_clear_active_zone(device, physical); + + return 0; +} + static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written) { struct btrfs_fs_info *fs_info = block_group->fs_info; @@ -2329,31 +2363,12 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ down_read(&dev_replace->rwsem); map = block_group->physical_map; for (i = 0; i < map->num_stripes; i++) { - struct btrfs_device *device = map->stripes[i].dev; - const u64 physical = map->stripes[i].physical; - struct btrfs_zoned_device_info *zinfo = device->zone_info; - unsigned int nofs_flags; - - if (!device->bdev) - continue; - - if (zinfo->max_active_zones == 0) - continue; - - nofs_flags = memalloc_nofs_save(); - ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, - physical >> SECTOR_SHIFT, - zinfo->zone_size >> SECTOR_SHIFT); - memalloc_nofs_restore(nofs_flags); + ret = call_zone_finish(block_group, &map->stripes[i]); if (ret) { up_read(&dev_replace->rwsem); return ret; } - - if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA)) - zinfo->reserved_active_zones++; - btrfs_dev_clear_active_zone(device, physical); } up_read(&dev_replace->rwsem); From daa0fde322350b467bc62bc1b141bf62df6123f8 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 16 Jul 2025 16:59:53 +0900 Subject: [PATCH 1911/2411] btrfs: zoned: fix data relocation block group reservation btrfs_zoned_reserve_data_reloc_bg() is called on mount and at that point, all data block groups belong to the primary data space_info. So, we don't find anything in the data relocation space_info. Also, the condition "bg->used > 0" can select a block group with full of zone_unusable bytes for the candidate. As we cannot allocate from the block group, it is useless to reserve it as the data relocation block group. Furthermore, because of the space_info separation, we need to migrate the selected block group to the data relocation space_info. If not, the extent allocator cannot use the block group to do the allocation. This commit fixes these three issues. Fixes: e606ff985ec7 ("btrfs: zoned: reserve data_reloc block group on mount") Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 55 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 36de6d0d595f..7a3351b1b0c6 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -17,6 +17,7 @@ #include "accessors.h" #include "bio.h" #include "transaction.h" +#include "sysfs.h" /* Maximum number of zones to report per blkdev_report_zones() call */ #define BTRFS_REPORT_NR_ZONES 4096 @@ -2519,12 +2520,12 @@ void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info) { struct btrfs_space_info *data_sinfo = fs_info->data_sinfo; - struct btrfs_space_info *space_info = data_sinfo->sub_group[0]; + struct btrfs_space_info *space_info = data_sinfo; struct btrfs_trans_handle *trans; struct btrfs_block_group *bg; struct list_head *bg_list; u64 alloc_flags; - bool initial = false; + bool first = true; bool did_chunk_alloc = false; int index; int ret; @@ -2538,21 +2539,52 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info) if (sb_rdonly(fs_info->sb)) return; - ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC); alloc_flags = btrfs_get_alloc_profile(fs_info, space_info->flags); index = btrfs_bg_flags_to_raid_index(alloc_flags); - bg_list = &data_sinfo->block_groups[index]; + /* Scan the data space_info to find empty block groups. Take the second one. */ again: + bg_list = &space_info->block_groups[index]; list_for_each_entry(bg, bg_list, list) { - if (bg->used > 0) + if (bg->alloc_offset != 0) continue; - if (!initial) { - initial = true; + if (first) { + first = false; continue; } + if (space_info == data_sinfo) { + /* Migrate the block group to the data relocation space_info. */ + struct btrfs_space_info *reloc_sinfo = data_sinfo->sub_group[0]; + int factor; + + ASSERT(reloc_sinfo->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC); + factor = btrfs_bg_type_to_factor(bg->flags); + + down_write(&space_info->groups_sem); + list_del_init(&bg->list); + /* We can assume this as we choose the second empty one. */ + ASSERT(!list_empty(&space_info->block_groups[index])); + up_write(&space_info->groups_sem); + + spin_lock(&space_info->lock); + space_info->total_bytes -= bg->length; + space_info->disk_total -= bg->length * factor; + /* There is no allocation ever happened. */ + ASSERT(bg->used == 0); + ASSERT(bg->zone_unusable == 0); + /* No super block in a block group on the zoned setup. */ + ASSERT(bg->bytes_super == 0); + spin_unlock(&space_info->lock); + + bg->space_info = reloc_sinfo; + if (reloc_sinfo->block_group_kobjs[index] == NULL) + btrfs_sysfs_add_block_group_type(bg); + + btrfs_add_bg_to_space_info(fs_info, bg); + } + fs_info->data_reloc_bg = bg->start; set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &bg->runtime_flags); btrfs_zone_activate(bg); @@ -2567,11 +2599,18 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info) if (IS_ERR(trans)) return; + /* Allocate new BG in the data relocation space_info. */ + space_info = data_sinfo->sub_group[0]; + ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC); ret = btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE); btrfs_end_transaction(trans); if (ret == 1) { + /* + * We allocated a new block group in the data relocation space_info. We + * can take that one. + */ + first = false; did_chunk_alloc = true; - bg_list = &space_info->block_groups[index]; goto again; } } From 5c4b93f4c8e5c53574c1a48d66a27a2c68b414af Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 16 Jul 2025 16:59:54 +0900 Subject: [PATCH 1912/2411] btrfs: zoned: fix write time activation failure for metadata block group Since commit 13bb483d32ab ("btrfs: zoned: activate metadata block group on write time"), we activate a metadata block group at the write time. If the zone capacity is small enough, we can allocate the entire region before the first write. Then, we hit the btrfs_zoned_bg_is_full() in btrfs_zone_activate() and the activation fails. For a data block group, we activate it at the allocation time and we should check the fullness condition in the caller side. Add, a WARN to check the fullness condition. For a metadata block group, we don't need the fullness check because we activate it at the write time. Instead, activating it once it is written should be invalid. Catch that with a WARN too. Fixes: 13bb483d32ab ("btrfs: zoned: activate metadata block group on write time") CC: stable@vger.kernel.org # 6.6+ Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 7a3351b1b0c6..ab6844dce8bc 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2169,10 +2169,15 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) goto out_unlock; } - /* No space left */ - if (btrfs_zoned_bg_is_full(block_group)) { - ret = false; - goto out_unlock; + if (block_group->flags & BTRFS_BLOCK_GROUP_DATA) { + /* The caller should check if the block group is full. */ + if (WARN_ON_ONCE(btrfs_zoned_bg_is_full(block_group))) { + ret = false; + goto out_unlock; + } + } else { + /* Since it is already written, it should have been active. */ + WARN_ON_ONCE(block_group->meta_write_pointer != block_group->start); } for (i = 0; i < map->num_stripes; i++) { From 04147d8394e80acaaebf0365f112339e8b606c05 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 16 Jul 2025 16:59:55 +0900 Subject: [PATCH 1913/2411] btrfs: zoned: limit active zones to max_open_zones When there is no active zone limit, we can technically write into any number of zones at the same time. However, exceeding the max open zones can degrade performance. To prevent this, set the max_active_zones to bdev_max_open_zones() if there is no active zone limit. Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index ab6844dce8bc..e0ee3aeabd2c 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -43,6 +43,9 @@ /* Number of superblock log zones */ #define BTRFS_NR_SB_LOG_ZONES 2 +/* Default number of max active zones when the device has no limits. */ +#define BTRFS_DEFAULT_MAX_ACTIVE_ZONES 128 + /* * Minimum of active zones we need: * @@ -417,7 +420,10 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) if (!IS_ALIGNED(nr_sectors, zone_sectors)) zone_info->nr_zones++; - max_active_zones = bdev_max_active_zones(bdev); + max_active_zones = min_not_zero(bdev_max_active_zones(bdev), + bdev_max_open_zones(bdev)); + if (!max_active_zones && zone_info->nr_zones > BTRFS_DEFAULT_MAX_ACTIVE_ZONES) + max_active_zones = BTRFS_DEFAULT_MAX_ACTIVE_ZONES; if (max_active_zones && max_active_zones < BTRFS_MIN_ACTIVE_ZONES) { btrfs_err(fs_info, "zoned: %s: max active zones %u is too small, need at least %u active zones", From 085a1b42e52750769a3fa29d4da6c05ab56f18f8 Mon Sep 17 00:00:00 2001 From: wenglianfa Date: Sat, 26 Jul 2025 15:53:45 +0800 Subject: [PATCH 1914/2411] RDMA/hns: Fix querying wrong SCC context for DIP algorithm When using DIP algorithm, all QPs establishing connections with the same destination IP share the same SCC, which is indexed by dip_idx, but dip_idx isn't necessarily equal to qpn. Therefore, dip_idx should be used to query SCC context instead of qpn. Fixes: 124a9fbe43aa ("RDMA/hns: Append SCC context to the raw dump of QPC") Signed-off-by: wenglianfa Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20250726075345.846957-1-huangjunxian6@hisilicon.com Reviewed-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_restrack.c | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 64bca08f3f1a..244a4780d3a6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5476,7 +5476,7 @@ static int hns_roce_v2_query_srqc(struct hns_roce_dev *hr_dev, u32 srqn, return ret; } -static int hns_roce_v2_query_sccc(struct hns_roce_dev *hr_dev, u32 qpn, +static int hns_roce_v2_query_sccc(struct hns_roce_dev *hr_dev, u32 sccn, void *buffer) { struct hns_roce_v2_scc_context *context; @@ -5488,7 +5488,7 @@ static int hns_roce_v2_query_sccc(struct hns_roce_dev *hr_dev, u32 qpn, return PTR_ERR(mailbox); ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_SCCC, - qpn); + sccn); if (ret) goto out; diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index f637b73b946e..230187dda6a0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -100,6 +100,7 @@ int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp) struct hns_roce_v2_qp_context qpc; struct hns_roce_v2_scc_context sccc; } context = {}; + u32 sccn = hr_qp->qpn; int ret; if (!hr_dev->hw->query_qpc) @@ -116,7 +117,13 @@ int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp) !hr_dev->hw->query_sccc) goto out; - ret = hr_dev->hw->query_sccc(hr_dev, hr_qp->qpn, &context.sccc); + if (hr_qp->cong_type == CONG_TYPE_DIP) { + if (!hr_qp->dip) + goto out; + sccn = hr_qp->dip->dip_idx; + } + + ret = hr_dev->hw->query_sccc(hr_dev, sccn, &context.sccc); if (ret) ibdev_warn_ratelimited(&hr_dev->ib_dev, "failed to query SCCC, ret = %d.\n", From 6296f9a5293ada28558f2867ac54c487e1e2b9f2 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Tue, 5 Aug 2025 15:39:57 +0530 Subject: [PATCH 1915/2411] RDMA/bnxt_re: Fix to do SRQ armena by default Whenever SRQ is created, make sure SRQ arm enable is always set. Driver is always ready to receive SRQ ASYNC event. Additional note - There is no need to do srq arm enable conditionally. See bnxt_qplib_armen_db in bnxt_qplib_create_cq(). Fixes: 37cb11acf1f7 ("RDMA/bnxt_re: Add SRQ support for Broadcom adapters") Signed-off-by: Kashyap Desai Signed-off-by: Saravanan Vajravel Link: https://patch.msgid.link/20250805101000.233310-2-kalesh-anakkur.purayil@broadcom.com Reviewed-by: Kalesh AP Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index dfe3177123e5..b2c1240775f4 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -705,8 +705,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, srq->dbinfo.db = srq->dpi->dbr; srq->dbinfo.max_slot = 1; srq->dbinfo.priv_db = res->dpi_tbl.priv_db; - if (srq->threshold) - bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA); + bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA); srq->arm_req = false; return 0; From 666bce0bd7e771127cb0cda125cc9d32d9f9f15d Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Tue, 5 Aug 2025 15:39:58 +0530 Subject: [PATCH 1916/2411] RDMA/bnxt_re: Fix to remove workload check in SRQ limit path There should not be any checks of current workload to set srq_limit value to SRQ hw context. Remove all such workload checks and make a direct call to set srq_limit via doorbell SRQ_ARM. Fixes: 37cb11acf1f7 ("RDMA/bnxt_re: Add SRQ support for Broadcom adapters") Signed-off-by: Kashyap Desai Signed-off-by: Saravanan Vajravel Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250805101000.233310-3-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 8 ++----- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 27 ------------------------ drivers/infiniband/hw/bnxt_re/qplib_fp.h | 2 -- 3 files changed, 2 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 37c2bc3bdba5..260dc67b8b87 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1921,7 +1921,6 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); struct bnxt_re_dev *rdev = srq->rdev; - int rc; switch (srq_attr_mask) { case IB_SRQ_MAX_WR: @@ -1933,11 +1932,8 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, return -EINVAL; srq->qplib_srq.threshold = srq_attr->srq_limit; - rc = bnxt_qplib_modify_srq(&rdev->qplib_res, &srq->qplib_srq); - if (rc) { - ibdev_err(&rdev->ibdev, "Modify HW SRQ failed!"); - return rc; - } + bnxt_qplib_srq_arm_db(&srq->qplib_srq.dbinfo, srq->qplib_srq.threshold); + /* On success, update the shadow */ srq->srq_limit = srq_attr->srq_limit; /* No need to Build and send response back to udata */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index b2c1240775f4..ee36b3d82cc0 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -706,7 +706,6 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, srq->dbinfo.max_slot = 1; srq->dbinfo.priv_db = res->dpi_tbl.priv_db; bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA); - srq->arm_req = false; return 0; fail: @@ -716,24 +715,6 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, return rc; } -int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res, - struct bnxt_qplib_srq *srq) -{ - struct bnxt_qplib_hwq *srq_hwq = &srq->hwq; - u32 count; - - count = __bnxt_qplib_get_avail(srq_hwq); - if (count > srq->threshold) { - srq->arm_req = false; - bnxt_qplib_srq_arm_db(&srq->dbinfo, srq->threshold); - } else { - /* Deferred arming */ - srq->arm_req = true; - } - - return 0; -} - int bnxt_qplib_query_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_srq *srq) { @@ -775,7 +756,6 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, struct bnxt_qplib_hwq *srq_hwq = &srq->hwq; struct rq_wqe *srqe; struct sq_sge *hw_sge; - u32 count = 0; int i, next; spin_lock(&srq_hwq->lock); @@ -807,15 +787,8 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, bnxt_qplib_hwq_incr_prod(&srq->dbinfo, srq_hwq, srq->dbinfo.max_slot); - spin_lock(&srq_hwq->lock); - count = __bnxt_qplib_get_avail(srq_hwq); - spin_unlock(&srq_hwq->lock); /* Ring DB */ bnxt_qplib_ring_prod_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ); - if (srq->arm_req == true && count > srq->threshold) { - srq->arm_req = false; - bnxt_qplib_srq_arm_db(&srq->dbinfo, srq->threshold); - } return 0; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index ab125f1d949e..4921a214c34c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -546,8 +546,6 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, srqn_handler_t srq_handler); int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_srq *srq); -int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res, - struct bnxt_qplib_srq *srq); int bnxt_qplib_query_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_srq *srq); void bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res, From ba60a1e8cbbd396c69ff9c8bc3242f5ab133e38a Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 5 Aug 2025 15:39:59 +0530 Subject: [PATCH 1917/2411] RDMA/bnxt_re: Fix a possible memory leak in the driver The GID context reuse logic requires the context memory to be not freed if and when DEL_GID firmware command fails. But, if there's no subsequent ADD_GID to reuse it, the context memory must be freed when the driver is unloaded. Otherwise it leads to a memory leak. Below is the kmemleak trace reported: unreferenced object 0xffff88817a4f34d0 (size 8): comm "insmod", pid 1072504, jiffies 4402561550 hex dump (first 8 bytes): 01 00 00 00 00 00 00 00 ........ backtrace (crc ccaa009e): __kmalloc_cache_noprof+0x33e/0x400 0xffffffffc2db9d48 add_modify_gid+0x5e0/0xb60 [ib_core] __ib_cache_gid_add+0x213/0x350 [ib_core] update_gid+0xf2/0x180 [ib_core] enum_netdev_ipv4_ips+0x3f3/0x690 [ib_core] enum_all_gids_of_dev_cb+0x125/0x1b0 [ib_core] ib_enum_roce_netdev+0x14b/0x250 [ib_core] ib_cache_setup_one+0x2e5/0x540 [ib_core] ib_register_device+0x82c/0xf10 [ib_core] 0xffffffffc2df5ad9 0xffffffffc2da8b07 0xffffffffc2db174d auxiliary_bus_probe+0xa5/0x120 really_probe+0x1e4/0x850 __driver_probe_device+0x18f/0x3d0 Fixes: 4a62c5e9e2e1 ("RDMA/bnxt_re: Do not free the ctx_tbl entry if delete GID fails") Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250805101000.233310-4-kalesh-anakkur.purayil@broadcom.com Reviewed-by: Sriharsha Basavapatna Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/main.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 293b0a96c8e3..df7cf8d68e27 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -2017,6 +2017,28 @@ static void bnxt_re_free_nqr_mem(struct bnxt_re_dev *rdev) rdev->nqr = NULL; } +/* When DEL_GID fails, driver is not freeing GID ctx memory. + * To avoid the memory leak, free the memory during unload + */ +static void bnxt_re_free_gid_ctx(struct bnxt_re_dev *rdev) +{ + struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl; + struct bnxt_re_gid_ctx *ctx, **ctx_tbl; + int i; + + if (!sgid_tbl->active) + return; + + ctx_tbl = sgid_tbl->ctx; + for (i = 0; i < sgid_tbl->max; i++) { + if (sgid_tbl->hw_id[i] == 0xFFFF) + continue; + + ctx = ctx_tbl[i]; + kfree(ctx); + } +} + static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) { u8 type; @@ -2030,6 +2052,7 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags)) cancel_delayed_work_sync(&rdev->worker); + bnxt_re_free_gid_ctx(rdev); if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags)) bnxt_re_cleanup_res(rdev); From 806b9f494f62791ee6d68f515a8056c615a0e7b2 Mon Sep 17 00:00:00 2001 From: Anantha Prabhu Date: Tue, 5 Aug 2025 15:40:00 +0530 Subject: [PATCH 1918/2411] RDMA/bnxt_re: Fix to initialize the PBL array memset the PBL page pointer and page map arrays before populating the SGL addresses of the HWQ. Fixes: 0c4dcd602817 ("RDMA/bnxt_re: Refactor hardware queue memory allocation") Signed-off-by: Anantha Prabhu Reviewed-by: Saravanan Vajravel Reviewed-by: Selvin Xavier Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250805101000.233310-5-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/qplib_res.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 6cd05207ffed..cc5c82d96839 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -121,6 +121,7 @@ static int __alloc_pbl(struct bnxt_qplib_res *res, pbl->pg_arr = vmalloc_array(pages, sizeof(void *)); if (!pbl->pg_arr) return -ENOMEM; + memset(pbl->pg_arr, 0, pages * sizeof(void *)); pbl->pg_map_arr = vmalloc_array(pages, sizeof(dma_addr_t)); if (!pbl->pg_map_arr) { @@ -128,6 +129,7 @@ static int __alloc_pbl(struct bnxt_qplib_res *res, pbl->pg_arr = NULL; return -ENOMEM; } + memset(pbl->pg_map_arr, 0, pages * sizeof(dma_addr_t)); pbl->pg_count = 0; pbl->pg_size = sginfo->pgsize; From 2186e8c39eb156b3557a467ce4e5dc3f24826609 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 8 Aug 2025 10:56:01 -0700 Subject: [PATCH 1919/2411] MAINTAINERS: Remove bouncing irdma maintainer This maintainer's email no longer works. Remove it from MAINTAINERS. This still leaves one maintainer for the driver. Signed-off-by: Dave Hansen Cc: Tatyana Nikolova Cc: linux-rdma@vger.kernel.org Link: https://patch.msgid.link/20250808175601.EF0AF767@davehans-spike.ostc.intel.com Signed-off-by: Leon Romanovsky --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fe168477caa4..b31fd6e7539c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12280,7 +12280,6 @@ F: include/linux/avf/virtchnl.h F: include/linux/net/intel/*/ INTEL ETHERNET PROTOCOL DRIVER FOR RDMA -M: Mustafa Ismail M: Tatyana Nikolova L: linux-rdma@vger.kernel.org S: Supported From 111aea0464c20f3eb25a48d5ff6c036e6b416123 Mon Sep 17 00:00:00 2001 From: Akhilesh Patil Date: Sun, 10 Aug 2025 23:21:58 +0530 Subject: [PATCH 1920/2411] RDMA/core: Free pfn_list with appropriate kvfree call Ensure that pfn_list allocated by kvcalloc() is freed using corresponding kvfree() function. Match memory allocation and free routines kvcalloc -> kvfree. Fixes: 259e9bd07c57 ("RDMA/core: Avoid hmm_dma_map_alloc() for virtual DMA devices") Signed-off-by: Akhilesh Patil Link: https://patch.msgid.link/aJjcPjL1BVh8QrMN@bhairav-test.ee.iitb.ac.in Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/umem_odp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index b1c44ec1a3f3..572a91a62a7b 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -115,7 +115,7 @@ static int ib_init_umem_odp(struct ib_umem_odp *umem_odp, out_free_map: if (ib_uses_virt_dma(dev)) - kfree(map->pfn_list); + kvfree(map->pfn_list); else hmm_dma_map_free(dev->dma_device, map); return ret; @@ -287,7 +287,7 @@ static void ib_umem_odp_free(struct ib_umem_odp *umem_odp) mutex_unlock(&umem_odp->umem_mutex); mmu_interval_notifier_remove(&umem_odp->notifier); if (ib_uses_virt_dma(dev)) - kfree(umem_odp->map.pfn_list); + kvfree(umem_odp->map.pfn_list); else hmm_dma_map_free(dev->dma_device, &umem_odp->map); } From fa2e2d31ee3b7212079323b4b09201ef68af3a97 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Tue, 12 Aug 2025 20:26:02 +0800 Subject: [PATCH 1921/2411] RDMA/hns: Fix dip entries leak on devices newer than hip09 DIP algorithm is also supported on devices newer than hip09, so free dip entries too. Fixes: f91696f2f053 ("RDMA/hns: Support congestion control type selection according to the FW") Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20250812122602.3524602-1-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 244a4780d3a6..f82bdd46a917 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3043,7 +3043,7 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) if (!hr_dev->is_vf) hns_roce_free_link_table(hr_dev); - if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP09) + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) free_dip_entry(hr_dev); } From f7a2e1c08727384cde1c686dd57172f99b5f2e6e Mon Sep 17 00:00:00 2001 From: Erick Karanja Date: Wed, 13 Aug 2025 10:18:36 +0300 Subject: [PATCH 1922/2411] Docs: admin-guide: Correct spelling mistake Fix spelling mistake directoy to directory Reported-by: codespell Signed-off-by: Erick Karanja Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20250813071837.668613-1-karanja99erick@gmail.com Signed-off-by: Jens Axboe --- Documentation/admin-guide/blockdev/zoned_loop.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/blockdev/zoned_loop.rst b/Documentation/admin-guide/blockdev/zoned_loop.rst index 9c7aa3b482f3..64dcfde7450a 100644 --- a/Documentation/admin-guide/blockdev/zoned_loop.rst +++ b/Documentation/admin-guide/blockdev/zoned_loop.rst @@ -79,7 +79,7 @@ zone_capacity_mb Device zone capacity (must always be equal to or lower than the zone size. Default: zone size. conv_zones Total number of conventioanl zones starting from sector 0. Default: 8. -base_dir Path to the base directoy where to create the directory +base_dir Path to the base directory where to create the directory containing the zone files of the device. Default=/var/local/zloop. The device directory containing the zone files is always From 8f5845e0743bf3512b71b3cb8afe06c192d6acc4 Mon Sep 17 00:00:00 2001 From: Julian Sun Date: Tue, 12 Aug 2025 23:42:57 +0800 Subject: [PATCH 1923/2411] block: restore default wbt enablement The commit 245618f8e45f ("block: protect wbt_lat_usec using q->elevator_lock") protected wbt_enable_default() with q->elevator_lock; however, it also placed wbt_enable_default() before blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);, resulting in wbt failing to be enabled. Moreover, the protection of wbt_enable_default() by q->elevator_lock was removed in commit 78c271344b6f ("block: move wbt_enable_default() out of queue freezing from sched ->exit()"), so we can directly fix this issue by placing wbt_enable_default() after blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);. Additionally, this issue also causes the inability to read the wbt_lat_usec file, and the scenario is as follows: root@q:/sys/block/sda/queue# cat wbt_lat_usec cat: wbt_lat_usec: Invalid argument root@q:/data00/sjc/linux# ls /sys/kernel/debug/block/sda/rqos cannot access '/sys/kernel/debug/block/sda/rqos': No such file or directory root@q:/data00/sjc/linux# find /sys -name wbt /sys/kernel/debug/tracing/events/wbt After testing with this patch, wbt can be enabled normally. Signed-off-by: Julian Sun Cc: stable@vger.kernel.org Fixes: 245618f8e45f ("block: protect wbt_lat_usec using q->elevator_lock") Reviewed-by: Nilay Shroff Reviewed-by: Yu Kuai Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250812154257.57540-1-sunjunchao@bytedance.com Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index c5cf79a20842..4a7f1a349998 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -902,9 +902,9 @@ int blk_register_queue(struct gendisk *disk) if (queue_is_mq(q)) elevator_set_default(q); - wbt_enable_default(disk); blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); + wbt_enable_default(disk); /* Now everything is ready and send out KOBJ_ADD uevent */ kobject_uevent(&disk->queue_kobj, KOBJ_ADD); From 25db5f284fb8f30222146ca15b3ab8265789da38 Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Wed, 13 Aug 2025 11:29:29 +0800 Subject: [PATCH 1924/2411] md: add legacy_async_del_gendisk mode commit 9e59d609763f ("md: call del_gendisk in control path") changes the async way to sync way of calling del_gendisk. But it breaks mdadm --assemble command. The assemble command runs like this: 1. create the array 2. stop the array 3. access the sysfs files after stopping The sync way calls del_gendisk in step 2, so all sysfs files are removed. Now to avoid breaking mdadm assemble command, this patch adds the parameter legacy_async_del_gendisk that can be used to choose which way. The default is async way. In future, we plan to change default to sync way in kernel 7.0. Then users need to upgrade to mdadm 4.5+ which removes step 2. Fixes: 9e59d609763f ("md: call del_gendisk in control path") Reported-by: Mikulas Patocka Closes: https://lore.kernel.org/linux-raid/CAMw=ZnQ=ET2St-+hnhsuq34rRPnebqcXqP1QqaHW5Bh4aaaZ4g@mail.gmail.com/T/#t Suggested-and-reviewed-by: Yu Kuai Signed-off-by: Xiao Ni Reviewed-by: Paul Menzel Link: https://lore.kernel.org/linux-raid/20250813032929.54978-1-xni@redhat.com Signed-off-by: Yu Kuai --- drivers/md/md.c | 56 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index ac85ec73a409..772cffe02ff5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -339,6 +339,7 @@ static int start_readonly; * so all the races disappear. */ static bool create_on_open = true; +static bool legacy_async_del_gendisk = true; /* * We have a system wide 'event count' that is incremented @@ -877,15 +878,18 @@ void mddev_unlock(struct mddev *mddev) export_rdev(rdev, mddev); } - /* Call del_gendisk after release reconfig_mutex to avoid - * deadlock (e.g. call del_gendisk under the lock and an - * access to sysfs files waits the lock) - * And MD_DELETED is only used for md raid which is set in - * do_md_stop. dm raid only uses md_stop to stop. So dm raid - * doesn't need to check MD_DELETED when getting reconfig lock - */ - if (test_bit(MD_DELETED, &mddev->flags)) - del_gendisk(mddev->gendisk); + if (!legacy_async_del_gendisk) { + /* + * Call del_gendisk after release reconfig_mutex to avoid + * deadlock (e.g. call del_gendisk under the lock and an + * access to sysfs files waits the lock) + * And MD_DELETED is only used for md raid which is set in + * do_md_stop. dm raid only uses md_stop to stop. So dm raid + * doesn't need to check MD_DELETED when getting reconfig lock + */ + if (test_bit(MD_DELETED, &mddev->flags)) + del_gendisk(mddev->gendisk); + } } EXPORT_SYMBOL_GPL(mddev_unlock); @@ -5818,6 +5822,13 @@ static void md_kobj_release(struct kobject *ko) { struct mddev *mddev = container_of(ko, struct mddev, kobj); + if (legacy_async_del_gendisk) { + if (mddev->sysfs_state) + sysfs_put(mddev->sysfs_state); + if (mddev->sysfs_level) + sysfs_put(mddev->sysfs_level); + del_gendisk(mddev->gendisk); + } put_disk(mddev->gendisk); } @@ -6021,6 +6032,9 @@ static int md_alloc_and_put(dev_t dev, char *name) { struct mddev *mddev = md_alloc(dev, name); + if (legacy_async_del_gendisk) + pr_warn("md: async del_gendisk mode will be removed in future, please upgrade to mdadm-4.5+\n"); + if (IS_ERR(mddev)) return PTR_ERR(mddev); mddev_put(mddev); @@ -6431,10 +6445,22 @@ static void md_clean(struct mddev *mddev) mddev->persistent = 0; mddev->level = LEVEL_NONE; mddev->clevel[0] = 0; - /* if UNTIL_STOP is set, it's cleared here */ - mddev->hold_active = 0; - /* Don't clear MD_CLOSING, or mddev can be opened again. */ - mddev->flags &= BIT_ULL_MASK(MD_CLOSING); + + /* + * For legacy_async_del_gendisk mode, it can stop the array in the + * middle of assembling it, then it still can access the array. So + * it needs to clear MD_CLOSING. If not legacy_async_del_gendisk, + * it can't open the array again after stopping it. So it doesn't + * clear MD_CLOSING. + */ + if (legacy_async_del_gendisk && mddev->hold_active) { + clear_bit(MD_CLOSING, &mddev->flags); + } else { + /* if UNTIL_STOP is set, it's cleared here */ + mddev->hold_active = 0; + /* Don't clear MD_CLOSING, or mddev can be opened again. */ + mddev->flags &= BIT_ULL_MASK(MD_CLOSING); + } mddev->sb_flags = 0; mddev->ro = MD_RDWR; mddev->metadata_type[0] = 0; @@ -6658,7 +6684,8 @@ static int do_md_stop(struct mddev *mddev, int mode) export_array(mddev); md_clean(mddev); - set_bit(MD_DELETED, &mddev->flags); + if (!legacy_async_del_gendisk) + set_bit(MD_DELETED, &mddev->flags); } md_new_event(); sysfs_notify_dirent_safe(mddev->sysfs_state); @@ -10392,6 +10419,7 @@ module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR); module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR); module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR); module_param(create_on_open, bool, S_IRUSR|S_IWUSR); +module_param(legacy_async_del_gendisk, bool, 0600); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MD RAID framework"); From 4bcd3061e8154606af7f721cb75ca04ffe191a12 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 29 Jul 2025 19:01:45 +0930 Subject: [PATCH 1925/2411] btrfs: clear block dirty if submit_one_sector() failed [BUG] If submit_one_sector() failed, the block will be kept dirty, but with their corresponding range finished in the ordered extent. This means if a writeback happens later again, we can hit the following problems: - ASSERT(block_start != EXTENT_MAP_HOLE) in submit_one_sector() If the original extent map is a hole, then we can hit this case, as the new ordered extent failed, we will drop the new extent map and re-read one from the disk. - DEBUG_WARN() in btrfs_writepage_cow_fixup() This is because we no longer have an ordered extent for those dirty blocks. The original for them is already finished with error. [CAUSE] The function submit_one_sector() is not following the regular error handling of writeback. The common practice is to clear the folio dirty, start and finish the writeback for the block. This is normally done by extent_clear_unlock_delalloc() with PAGE_START_WRITEBACK | PAGE_END_WRITEBACK flags during run_delalloc_range(). So if we keep those failed blocks dirty, they will stay in the page cache and wait for the next writeback. And since the original ordered extent is already finished and removed, depending on the original extent map, we either hit the ASSERT() inside submit_one_sector(), or hit the DEBUG_WARN() in btrfs_writepage_cow_fixup(). [FIX] Follow the regular error handling to clear the dirty flag for the block, start and finish writeback for that block instead. Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f23d75986947..741c20480099 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1512,7 +1512,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, /* * Return 0 if we have submitted or queued the sector for submission. - * Return <0 for critical errors. + * Return <0 for critical errors, and the sector will have its dirty flag cleared. * * Caller should make sure filepos < i_size and handle filepos >= i_size case. */ @@ -1535,8 +1535,17 @@ static int submit_one_sector(struct btrfs_inode *inode, ASSERT(filepos < i_size); em = btrfs_get_extent(inode, NULL, filepos, sectorsize); - if (IS_ERR(em)) + if (IS_ERR(em)) { + /* + * When submission failed, we should still clear the folio dirty. + * Or the folio will be written back again but without any + * ordered extent. + */ + btrfs_folio_clear_dirty(fs_info, folio, filepos, sectorsize); + btrfs_folio_set_writeback(fs_info, folio, filepos, sectorsize); + btrfs_folio_clear_writeback(fs_info, folio, filepos, sectorsize); return PTR_ERR(em); + } extent_offset = filepos - em->start; em_end = btrfs_extent_map_end(em); @@ -1666,8 +1675,8 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode, * Here we set writeback and clear for the range. If the full folio * is no longer dirty then we clear the PAGECACHE_TAG_DIRTY tag. * - * If we hit any error, the corresponding sector will still be dirty - * thus no need to clear PAGECACHE_TAG_DIRTY. + * If we hit any error, the corresponding sector will have its dirty + * flag cleared and writeback finished, thus no need to handle the error case. */ if (!submitted_io && !error) { btrfs_folio_set_writeback(fs_info, folio, start, len); From 05b372862600e551bbf86e7f24a1caeed5e06150 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 29 Jul 2025 19:01:46 +0930 Subject: [PATCH 1926/2411] btrfs: clear block dirty if btrfs_writepage_cow_fixup() failed [BUG] If btrfs_writepage_cow_fixup() failed (returning value -EUCLEAN), the block will be kept dirty, but with its corresponding range finished in the ordered extent. Currently that error pattern is only possible for experimental builds, which places extra check to ensure we shouldn't hit a dirty block without a corresponding ordered extent. This means if later a writeback happens again, we can hit the following problems: - ASSERT(block_start != EXTENT_MAP_HOLE) in submit_one_sector() If the original extent map is a hole, then we can hit this case, as the new ordered extent failed, we will drop the new extent map and re-read one from the disk. - DEBUG_WARN() in btrfs_writepage_cow_fixup() This is because we no longer have an ordered extent for those dirty blocks. The original for them is already finished with error. [CAUSE] The function btrfs_writepage_cow_fixup() is not following the regular error handling of writeback. The common practice is to clear the folio dirty, start and finish the writeback for the block. This is normally done by extent_clear_unlock_delalloc() with PAGE_START_WRITEBACK | PAGE_END_WRITEBACK flags during run_delalloc_range(). So if we keep those failed blocks dirty, they will stay in the page cache and wait for the next writeback. And since the original ordered extent is already finished and removed, depending on the original extent map, we either hit the ASSERT() inside submit_one_sector(), or hit the DEBUG_WARN() in btrfs_writepage_cow_fixup() again (and very ironic). [FIX] Follow the regular error handling to clear the dirty flag for the block range, start and finish writeback for that block range instead. Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 741c20480099..be9c9c804952 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1618,8 +1618,12 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode, folio_unlock(folio); return 1; } - if (ret < 0) + if (ret < 0) { + btrfs_folio_clear_dirty(fs_info, folio, start, len); + btrfs_folio_set_writeback(fs_info, folio, start, len); + btrfs_folio_clear_writeback(fs_info, folio, start, len); return ret; + } for (cur = start; cur < start + len; cur += fs_info->sectorsize) set_bit((cur - folio_start) >> fs_info->sectorsize_bits, &range_bitmap); From f022499f24e520706b9a8238746e1cacc37eb4e0 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 1 Aug 2025 16:39:49 +0100 Subject: [PATCH 1927/2411] btrfs: do not set mtime/ctime to current time when unlinking for log replay If we are doing an unlink for log replay, we are updating the directory's mtime and ctime to the current time, and this is incorrect since it should stay with the mtime and ctime that were set when the directory was logged. This is the same as when adding a link to an inode during log replay (with btrfs_add_link()), where we want the mtime and ctime to be the values that were in place when the inode was logged. This was found with generic/547 using LOAD_FACTOR=20 and TIME_FACTOR=20, where due to large log trees we have longer log replay times and fssum could detect a mismatch of the mtime and ctime of a directory. Fix this by skipping the mtime and ctime update at __btrfs_unlink_inode() if we are in log replay context (just like btrfs_add_link()). Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d740910e071a..9e4aec7330cb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4189,6 +4189,23 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, return ret; } +static void update_time_after_link_or_unlink(struct btrfs_inode *dir) +{ + struct timespec64 now; + + /* + * If we are replaying a log tree, we do not want to update the mtime + * and ctime of the parent directory with the current time, since the + * log replay procedure is responsible for setting them to their correct + * values (the ones it had when the fsync was done). + */ + if (test_bit(BTRFS_FS_LOG_RECOVERING, &dir->root->fs_info->flags)) + return; + + now = inode_set_ctime_current(&dir->vfs_inode); + inode_set_mtime_to_ts(&dir->vfs_inode, now); +} + /* * unlink helper that gets used here in inode.c and in the tree logging * recovery code. It remove a link in a directory with a given name, and @@ -4289,7 +4306,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, inode_inc_iversion(&inode->vfs_inode); inode_set_ctime_current(&inode->vfs_inode); inode_inc_iversion(&dir->vfs_inode); - inode_set_mtime_to_ts(&dir->vfs_inode, inode_set_ctime_current(&dir->vfs_inode)); + update_time_after_link_or_unlink(dir); return btrfs_update_inode(trans, dir); } @@ -6683,15 +6700,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size + name->len * 2); inode_inc_iversion(&parent_inode->vfs_inode); - /* - * If we are replaying a log tree, we do not want to update the mtime - * and ctime of the parent directory with the current time, since the - * log replay procedure is responsible for setting them to their correct - * values (the ones it had when the fsync was done). - */ - if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) - inode_set_mtime_to_ts(&parent_inode->vfs_inode, - inode_set_ctime_current(&parent_inode->vfs_inode)); + update_time_after_link_or_unlink(parent_inode); ret = btrfs_update_inode(trans, parent_inode); if (ret) From 1f3d56db694cce6dfbffba0f398a06a222204487 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 31 Jul 2025 08:20:01 +0930 Subject: [PATCH 1928/2411] btrfs: clear TAG_TOWRITE from buffer tree when submitting a tree block [POSSIBLE BUG] After commit 5e121ae687b8 ("btrfs: use buffer xarray for extent buffer writeback operations"), we have a dedicated xarray for extent buffers, and a lot of tags are migrated to that buffer tree, like PAGECACHE_TAG_TOWRITE/DIRTY/WRITEBACK. This frees us from the limits of page flags, but there is a new asymmetric behavior, we call buffer_tree_tag_for_writeback() to set PAGECACHE_TAG_TOWRITE for the involved ranges, but there is no one to clear that tag. Before that rework, we relied on the page cache tag which was cleared when folio_start_writeback() was called. Although this has its own problems (e.g. the first one calling folio_start_writeback() will clear the tag for the whole page), it at least cleared the tag. But now our real tags are stored in the buffer tree, no one is really clearing the PAGECACHE_TAG_TOWRITE tag now. [FIX] Thankfully this is not going to cause any real bug, but just some inefficiency iterating the extent buffers. As if we hit an extent buffer which is not dirty but still has the PAGECACHE_TAG_TOWRITE tag, lock_extent_buffer_for_io() will skip it so we won't writeback the extent buffer again. To properly fix the inefficiency, just clear the PAGECACHE_TAG_TOWRITE inside lock_extent_buffer_for_io(). There is no error path between lock_extent_buffer_for_io() and write_one_eb(), so we're safe to clear the tag there. Reviewed-by: Naohiro Aota Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index be9c9c804952..c953297aa89a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1826,6 +1826,7 @@ static noinline_for_stack bool lock_extent_buffer_for_io(struct extent_buffer *e xas_load(&xas); xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK); xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY); + xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); xas_unlock_irqrestore(&xas, flags); btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); From b1511360c8ac882b0c52caa263620538e8d73220 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 31 Jul 2025 12:46:56 +0900 Subject: [PATCH 1929/2411] btrfs: subpage: keep TOWRITE tag until folio is cleaned btrfs_subpage_set_writeback() calls folio_start_writeback() the first time a folio is written back, and it also clears the PAGECACHE_TAG_TOWRITE tag even if there are still dirty blocks in the folio. This can break ordering guarantees, such as those required by btrfs_wait_ordered_extents(). That ordering breakage leads to a real failure. For example, running generic/464 on a zoned setup will hit the following ASSERT. This happens because the broken ordering fails to flush existing dirty pages before the file size is truncated. assertion failed: !list_empty(&ordered->list) :: 0, in fs/btrfs/zoned.c:1899 ------------[ cut here ]------------ kernel BUG at fs/btrfs/zoned.c:1899! Oops: invalid opcode: 0000 [#1] SMP NOPTI CPU: 2 UID: 0 PID: 1906169 Comm: kworker/u130:2 Kdump: loaded Not tainted 6.16.0-rc6-BTRFS-ZNS+ #554 PREEMPT(voluntary) Hardware name: Supermicro Super Server/H12SSL-NT, BIOS 2.0 02/22/2021 Workqueue: btrfs-endio-write btrfs_work_helper [btrfs] RIP: 0010:btrfs_finish_ordered_zoned.cold+0x50/0x52 [btrfs] RSP: 0018:ffffc9002efdbd60 EFLAGS: 00010246 RAX: 000000000000004c RBX: ffff88811923c4e0 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff827e38b1 RDI: 00000000ffffffff RBP: ffff88810005d000 R08: 00000000ffffdfff R09: ffffffff831051c8 R10: ffffffff83055220 R11: 0000000000000000 R12: ffff8881c2458c00 R13: ffff88811923c540 R14: ffff88811923c5e8 R15: ffff8881c1bd9680 FS: 0000000000000000(0000) GS:ffff88a04acd0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f907c7a918c CR3: 0000000004024000 CR4: 0000000000350ef0 Call Trace: ? srso_return_thunk+0x5/0x5f btrfs_finish_ordered_io+0x4a/0x60 [btrfs] btrfs_work_helper+0xf9/0x490 [btrfs] process_one_work+0x204/0x590 ? srso_return_thunk+0x5/0x5f worker_thread+0x1d6/0x3d0 ? __pfx_worker_thread+0x10/0x10 kthread+0x118/0x230 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x205/0x260 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Consider process A calling writepages() with WB_SYNC_NONE. In zoned mode or for compressed writes, it locks several folios for delalloc and starts writing them out. Let's call the last locked folio folio X. Suppose the write range only partially covers folio X, leaving some pages dirty. Process A calls btrfs_subpage_set_writeback() when building a bio. This function call clears the TOWRITE tag of folio X, whose size = 8K and the block size = 4K. It is following state. 0 4K 8K |/////|/////| (flag: DIRTY, tag: DIRTY) <-----> Process A will write this range. Now suppose process B concurrently calls writepages() with WB_SYNC_ALL. It calls tag_pages_for_writeback() to tag dirty folios with PAGECACHE_TAG_TOWRITE. Since folio X is still dirty, it gets tagged. Then, B collects tagged folios using filemap_get_folios_tag() and must wait for folio X to be written before returning from writepages(). 0 4K 8K |/////|/////| (flag: DIRTY, tag: DIRTY|TOWRITE) However, between tagging and collecting, process A may call btrfs_subpage_set_writeback() and clear folio X's TOWRITE tag. 0 4K 8K | |/////| (flag: DIRTY|WRITEBACK, tag: DIRTY) As a result, process B won't see folio X in its batch, and returns without waiting for it. This breaks the WB_SYNC_ALL ordering requirement. Fix this by using btrfs_subpage_set_writeback_keepwrite(), which retains the TOWRITE tag. We now manually clear the tag only after the folio becomes clean, via the xas operation. Fixes: 3470da3b7d87 ("btrfs: subpage: introduce helpers for writeback status") CC: stable@vger.kernel.org # 6.12+ Reviewed-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/subpage.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index c9b3821957f7..cb4f97833dc3 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -448,8 +448,25 @@ void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, spin_lock_irqsave(&bfs->lock, flags); bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + + /* + * Don't clear the TOWRITE tag when starting writeback on a still-dirty + * folio. Doing so can cause WB_SYNC_ALL writepages() to overlook it, + * assume writeback is complete, and exit too early — violating sync + * ordering guarantees. + */ if (!folio_test_writeback(folio)) - folio_start_writeback(folio); + __folio_start_writeback(folio, true); + if (!folio_test_dirty(folio)) { + struct address_space *mapping = folio_mapping(folio); + XA_STATE(xas, &mapping->i_pages, folio->index); + unsigned long flags; + + xas_lock_irqsave(&xas, flags); + xas_load(&xas); + xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); + xas_unlock_irqrestore(&xas, flags); + } spin_unlock_irqrestore(&bfs->lock, flags); } From dc61d97b0ba064fb21b01fbfa7436873948277bd Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 12 Aug 2025 01:32:58 +0900 Subject: [PATCH 1930/2411] btrfs: fix buffer index in wait_eb_writebacks() The commit f2cb97ee964a ("btrfs: index buffer_tree using node size") changed the index of buffer_tree from "start >> sectorsize_bits" to "start >> nodesize_bits". However, the change is not applied for wait_eb_writebacks() and caused IO failures by writing in a full zone. Use the index properly. Fixes: f2cb97ee964a ("btrfs: index buffer_tree using node size") Reviewed-by: Qu Wenruo Reviewed-by: Boris Burkov Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index e0ee3aeabd2c..ea662036f441 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2242,7 +2242,7 @@ static void wait_eb_writebacks(struct btrfs_block_group *block_group) struct btrfs_fs_info *fs_info = block_group->fs_info; const u64 end = block_group->start + block_group->length; struct extent_buffer *eb; - unsigned long index, start = (block_group->start >> fs_info->sectorsize_bits); + unsigned long index, start = (block_group->start >> fs_info->nodesize_bits); rcu_read_lock(); xa_for_each_start(&fs_info->buffer_tree, index, eb, start) { From edf842abe4368ce3c423343cf4b23b210fcf1622 Mon Sep 17 00:00:00 2001 From: Kyoji Ogasawara Date: Wed, 23 Jul 2025 00:38:37 +0900 Subject: [PATCH 1931/2411] btrfs: fix incorrect log message for nobarrier mount option Fix a wrong log message that appears when the "nobarrier" mount option is unset. When "nobarrier" is unset, barrier is actually enabled. However, the log incorrectly stated "turning off barriers". Fixes: eddb1a433f26 ("btrfs: add reconfigure callback for fs_context") CC: stable@vger.kernel.org # 6.12+ Reviewed-by: Qu Wenruo Signed-off-by: Kyoji Ogasawara Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 466d0450269c..768a2532fa4a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1462,7 +1462,7 @@ static void btrfs_emit_options(struct btrfs_fs_info *info, btrfs_info_if_unset(info, old, NODATACOW, "setting datacow"); btrfs_info_if_unset(info, old, SSD, "not using ssd optimizations"); btrfs_info_if_unset(info, old, SSD_SPREAD, "not using spread ssd allocation scheme"); - btrfs_info_if_unset(info, old, NOBARRIER, "turning off barriers"); + btrfs_info_if_unset(info, old, NOBARRIER, "turning on barriers"); btrfs_info_if_unset(info, old, NOTREELOG, "enabling tree log"); btrfs_info_if_unset(info, old, SPACE_CACHE, "disabling disk space caching"); btrfs_info_if_unset(info, old, FREE_SPACE_TREE, "disabling free space tree"); From b435ab556bea875c088485f271ef2709ca1d75f5 Mon Sep 17 00:00:00 2001 From: Kyoji Ogasawara Date: Wed, 13 Aug 2025 03:00:06 +0900 Subject: [PATCH 1932/2411] btrfs: restore mount option info messages during mount After the fsconfig migration in 6.8, mount option info messages are no longer displayed during mount operations because btrfs_emit_options() is only called during remount, not during initial mount. Fix this by calling btrfs_emit_options() in btrfs_fill_super() after open_ctree() succeeds. Additionally, prevent log duplication by ensuring btrfs_check_options() handles validation with warn-level and err-level messages, while btrfs_emit_options() provides info-level messages. Fixes: eddb1a433f26 ("btrfs: add reconfigure callback for fs_context") CC: stable@vger.kernel.org # 6.8+ Reviewed-by: Qu Wenruo Signed-off-by: Kyoji Ogasawara Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 768a2532fa4a..8469f36ef011 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -88,6 +88,9 @@ struct btrfs_fs_context { refcount_t refs; }; +static void btrfs_emit_options(struct btrfs_fs_info *info, + struct btrfs_fs_context *old); + enum { Opt_acl, Opt_clear_cache, @@ -698,12 +701,9 @@ bool btrfs_check_options(const struct btrfs_fs_info *info, if (!test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state)) { if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) { - btrfs_info(info, "disk space caching is enabled"); btrfs_warn(info, "space cache v1 is being deprecated and will be removed in a future release, please use -o space_cache=v2"); } - if (btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE)) - btrfs_info(info, "using free-space-tree"); } return ret; @@ -980,6 +980,8 @@ static int btrfs_fill_super(struct super_block *sb, return ret; } + btrfs_emit_options(fs_info, NULL); + inode = btrfs_iget(BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root); if (IS_ERR(inode)) { ret = PTR_ERR(inode); From 74857fdc5dd2cdcdeb6e99bdf26976fd9299d2bb Mon Sep 17 00:00:00 2001 From: Kyoji Ogasawara Date: Wed, 13 Aug 2025 03:00:07 +0900 Subject: [PATCH 1933/2411] btrfs: fix printing of mount info messages for NODATACOW/NODATASUM The NODATASUM message was printed twice by mistake and the NODATACOW was missing from the 'unset' part. Fix the duplication and make the output look the same. Fixes: eddb1a433f26 ("btrfs: add reconfigure callback for fs_context") CC: stable@vger.kernel.org # 6.8+ Reviewed-by: Qu Wenruo Signed-off-by: Kyoji Ogasawara Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8469f36ef011..7f31f8bd63ba 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1439,7 +1439,7 @@ static void btrfs_emit_options(struct btrfs_fs_info *info, { btrfs_info_if_set(info, old, NODATASUM, "setting nodatasum"); btrfs_info_if_set(info, old, DEGRADED, "allowing degraded mounts"); - btrfs_info_if_set(info, old, NODATASUM, "setting nodatasum"); + btrfs_info_if_set(info, old, NODATACOW, "setting nodatacow"); btrfs_info_if_set(info, old, SSD, "enabling ssd optimizations"); btrfs_info_if_set(info, old, SSD_SPREAD, "using spread ssd allocation scheme"); btrfs_info_if_set(info, old, NOBARRIER, "turning off barriers"); @@ -1461,6 +1461,7 @@ static void btrfs_emit_options(struct btrfs_fs_info *info, btrfs_info_if_set(info, old, IGNOREMETACSUMS, "ignoring meta csums"); btrfs_info_if_set(info, old, IGNORESUPERFLAGS, "ignoring unknown super block flags"); + btrfs_info_if_unset(info, old, NODATASUM, "setting datasum"); btrfs_info_if_unset(info, old, NODATACOW, "setting datacow"); btrfs_info_if_unset(info, old, SSD, "not using ssd optimizations"); btrfs_info_if_unset(info, old, SSD_SPREAD, "not using spread ssd allocation scheme"); From 9d83e1f05c98bab5de350bef89177e2be8b34db0 Mon Sep 17 00:00:00 2001 From: Fengnan Chang Date: Wed, 13 Aug 2025 20:02:14 +0800 Subject: [PATCH 1934/2411] io_uring/io-wq: add check free worker before create new worker After commit 0b2b066f8a85 ("io_uring/io-wq: only create a new worker if it can make progress"), in our produce environment, we still observe that part of io_worker threads keeps creating and destroying. After analysis, it was confirmed that this was due to a more complex scenario involving a large number of fsync operations, which can be abstracted as frequent write + fsync operations on multiple files in a single uring instance. Since write is a hash operation while fsync is not, and fsync is likely to be suspended during execution, the action of checking the hash value in io_wqe_dec_running cannot handle such scenarios. Similarly, if hash-based work and non-hash-based work are sent at the same time, similar issues are likely to occur. Returning to the starting point of the issue, when a new work arrives, io_wq_enqueue may wake up free worker A, while io_wq_dec_running may create worker B. Ultimately, only one of A and B can obtain and process the task, leaving the other in an idle state. In the end, the issue is caused by inconsistent logic in the checks performed by io_wq_enqueue and io_wq_dec_running. Therefore, the problem can be resolved by checking for available workers in io_wq_dec_running. Signed-off-by: Fengnan Chang Reviewed-by: Diangang Li Link: https://lore.kernel.org/r/20250813120214.18729-1-changfengnan@bytedance.com Signed-off-by: Jens Axboe --- io_uring/io-wq.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index be91edf34f01..17dfaa0395c4 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -357,6 +357,13 @@ static void create_worker_cb(struct callback_head *cb) worker = container_of(cb, struct io_worker, create_work); wq = worker->wq; acct = worker->acct; + + rcu_read_lock(); + do_create = !io_acct_activate_free_worker(acct); + rcu_read_unlock(); + if (!do_create) + goto no_need_create; + raw_spin_lock(&acct->workers_lock); if (acct->nr_workers < acct->max_workers) { @@ -367,6 +374,7 @@ static void create_worker_cb(struct callback_head *cb) if (do_create) { create_io_worker(wq, acct); } else { +no_need_create: atomic_dec(&acct->nr_running); io_worker_ref_put(wq); } From 47ed64db8c17eb16541098add865178fb7e68744 Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Wed, 13 Aug 2025 18:07:08 +0800 Subject: [PATCH 1935/2411] ASoC: tas2781: Normalize the volume kcontrol name Change the name of the kcontrol from "Gain" to "Volume". Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20250813100708.12197-1-baojun.xu@ti.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2781-i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index 8e7e45c046b8..676130f4cf3e 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -908,10 +908,10 @@ static const struct snd_kcontrol_new tasdevice_cali_controls[] = { }; static const struct snd_kcontrol_new tas2781_snd_controls[] = { - SOC_SINGLE_RANGE_EXT_TLV("Speaker Analog Gain", TAS2781_AMP_LEVEL, + SOC_SINGLE_RANGE_EXT_TLV("Speaker Analog Volume", TAS2781_AMP_LEVEL, 1, 0, 20, 0, tas2781_amp_getvol, tas2781_amp_putvol, amp_vol_tlv), - SOC_SINGLE_RANGE_EXT_TLV("Speaker Digital Gain", TAS2781_DVC_LVL, + SOC_SINGLE_RANGE_EXT_TLV("Speaker Digital Volume", TAS2781_DVC_LVL, 0, 0, 200, 1, tas2781_digital_getvol, tas2781_digital_putvol, dvc_tlv), }; From e664036cf36480414936cd91f4cfa2179a3d8367 Mon Sep 17 00:00:00 2001 From: Miao Li Date: Fri, 1 Aug 2025 16:27:28 +0800 Subject: [PATCH 1936/2411] usb: quirks: Add DELAY_INIT quick for another SanDisk 3.2Gen1 Flash Drive Another SanDisk 3.2Gen1 Flash Drive also need DELAY_INIT quick, or it will randomly work incorrectly on Huawei hisi platforms when doing reboot test. Signed-off-by: Miao Li Cc: stable Link: https://lore.kernel.org/r/20250801082728.469406-1-limiao870622@163.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index ff0ff95d5cca..f5bc53875330 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -371,6 +371,7 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0781, 0x5591), .driver_info = USB_QUIRK_NO_LPM }, /* SanDisk Corp. SanDisk 3.2Gen1 */ + { USB_DEVICE(0x0781, 0x5596), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x0781, 0x55a3), .driver_info = USB_QUIRK_DELAY_INIT }, /* SanDisk Extreme 55AE */ From 202ad1aaca777dc7fd24f459f5f808f5abd2bfda Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 1 Aug 2025 18:40:41 +0100 Subject: [PATCH 1937/2411] usb: gadget: tegra-xudc: fix PM use count underflow Upon resume from system suspend, the PM runtime core issues the following warning: tegra-xudc 3550000.usb: Runtime PM usage count underflow! This is because tegra_xudc_resume() unconditionally calls schedule_work(&xudc->usb_role_sw_work) whether or not anything has changed, which causes tegra_xudc_device_mode_off() to be called even when we're already in that mode. Keep track of the current state of "device_mode", and only schedule this work if it has changed from the hardware state on resume. Signed-off-by: "Russell King (Oracle)" Link: https://lore.kernel.org/r/E1uhtkH-007KDZ-JT@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/tegra-xudc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c index 2957316fd3d0..1d3085cc9d22 100644 --- a/drivers/usb/gadget/udc/tegra-xudc.c +++ b/drivers/usb/gadget/udc/tegra-xudc.c @@ -502,6 +502,7 @@ struct tegra_xudc { struct clk_bulk_data *clks; bool device_mode; + bool current_device_mode; struct work_struct usb_role_sw_work; struct phy **usb3_phy; @@ -715,6 +716,8 @@ static void tegra_xudc_device_mode_on(struct tegra_xudc *xudc) phy_set_mode_ext(xudc->curr_utmi_phy, PHY_MODE_USB_OTG, USB_ROLE_DEVICE); + + xudc->current_device_mode = true; } static void tegra_xudc_device_mode_off(struct tegra_xudc *xudc) @@ -725,6 +728,8 @@ static void tegra_xudc_device_mode_off(struct tegra_xudc *xudc) dev_dbg(xudc->dev, "device mode off\n"); + xudc->current_device_mode = false; + connected = !!(xudc_readl(xudc, PORTSC) & PORTSC_CCS); reinit_completion(&xudc->disconnect_complete); @@ -4044,10 +4049,10 @@ static int __maybe_unused tegra_xudc_resume(struct device *dev) spin_lock_irqsave(&xudc->lock, flags); xudc->suspended = false; + if (xudc->device_mode != xudc->current_device_mode) + schedule_work(&xudc->usb_role_sw_work); spin_unlock_irqrestore(&xudc->lock, flags); - schedule_work(&xudc->usb_role_sw_work); - pm_runtime_enable(dev); return 0; From f9420f4757752f056144896024d5ea89e5a611f1 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 3 Aug 2025 00:55:20 +0200 Subject: [PATCH 1938/2411] usb: renesas-xhci: Fix External ROM access timeouts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Increase the External ROM access timeouts to prevent failures during programming of External SPI EEPROM chips. The current timeouts are too short for some SPI EEPROMs used with uPD720201 controllers. The current timeout for Chip Erase in renesas_rom_erase() is 100 ms , the current timeout for Sector Erase issued by the controller before Page Program in renesas_fw_download_image() is also 100 ms. Neither timeout is sufficient for e.g. the Macronix MX25L5121E or MX25V5126F. MX25L5121E reference manual [1] page 35 section "ERASE AND PROGRAMMING PERFORMANCE" and page 23 section "Table 8. AC CHARACTERISTICS (Temperature = 0°C to 70°C for Commercial grade, VCC = 2.7V ~ 3.6V)" row "tCE" indicate that the maximum time required for Chip Erase opcode to complete is 2 s, and for Sector Erase it is 300 ms . MX25V5126F reference manual [2] page 47 section "13. ERASE AND PROGRAMMING PERFORMANCE (2.3V - 3.6V)" and page 42 section "Table 8. AC CHARACTERISTICS (Temperature = -40°C to 85°C for Industrial grade, VCC = 2.3V - 3.6V)" row "tCE" indicate that the maximum time required for Chip Erase opcode to complete is 3.2 s, and for Sector Erase it is 400 ms . Update the timeouts such, that Chip Erase timeout is set to 5 seconds, and Sector Erase timeout is set to 500 ms. Such lengthy timeouts ought to be sufficient for majority of SPI EEPROM chips. [1] https://www.macronix.com/Lists/Datasheet/Attachments/8634/MX25L5121E,%203V,%20512Kb,%20v1.3.pdf [2] https://www.macronix.com/Lists/Datasheet/Attachments/8750/MX25V5126F,%202.5V,%20512Kb,%20v1.1.pdf Fixes: 2478be82de44 ("usb: renesas-xhci: Add ROM loader for uPD720201") Cc: stable Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20250802225526.25431-1-marek.vasut+renesas@mailbox.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci-renesas.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-pci-renesas.c b/drivers/usb/host/xhci-pci-renesas.c index 620f8f0febb8..86df80399c9f 100644 --- a/drivers/usb/host/xhci-pci-renesas.c +++ b/drivers/usb/host/xhci-pci-renesas.c @@ -47,8 +47,9 @@ #define RENESAS_ROM_ERASE_MAGIC 0x5A65726F #define RENESAS_ROM_WRITE_MAGIC 0x53524F4D -#define RENESAS_RETRY 10000 -#define RENESAS_DELAY 10 +#define RENESAS_RETRY 50000 /* 50000 * RENESAS_DELAY ~= 500ms */ +#define RENESAS_CHIP_ERASE_RETRY 500000 /* 500000 * RENESAS_DELAY ~= 5s */ +#define RENESAS_DELAY 10 #define RENESAS_FW_NAME "renesas_usb_fw.mem" @@ -407,7 +408,7 @@ static void renesas_rom_erase(struct pci_dev *pdev) /* sleep a bit while ROM is erased */ msleep(20); - for (i = 0; i < RENESAS_RETRY; i++) { + for (i = 0; i < RENESAS_CHIP_ERASE_RETRY; i++) { retval = pci_read_config_byte(pdev, RENESAS_ROM_STATUS, &status); status &= RENESAS_ROM_STATUS_ERASE; From 8fe06185e11ae753414aa6117f0e798aa77567ff Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Wed, 6 Aug 2025 16:39:55 +0800 Subject: [PATCH 1939/2411] usb: core: hcd: fix accessing unmapped memory in SINGLE_STEP_SET_FEATURE test The USB core will unmap urb->transfer_dma after SETUP stage completes. Then the USB controller will access unmapped memory when it received device descriptor. If iommu is equipped, the entire test can't be completed due to the memory accessing is blocked. Fix it by calling map_urb_for_dma() again for IN stage. To reduce redundant map for urb->transfer_buffer, this will also set URB_NO_TRANSFER_DMA_MAP flag before first map_urb_for_dma() to skip dma map for urb->transfer_buffer and clear URB_NO_TRANSFER_DMA_MAP flag before second map_urb_for_dma(). Fixes: 216e0e563d81 ("usb: core: hcd: use map_urb_for_dma for single step set feature urb") Cc: stable Reviewed-by: Jun Li Signed-off-by: Xu Yang Acked-by: Alan Stern Link: https://lore.kernel.org/r/20250806083955.3325299-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 03771bbc6c01..c4a1875b5d3d 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2166,7 +2166,7 @@ static struct urb *request_single_step_set_feature_urb( urb->complete = usb_ehset_completion; urb->status = -EINPROGRESS; urb->actual_length = 0; - urb->transfer_flags = URB_DIR_IN; + urb->transfer_flags = URB_DIR_IN | URB_NO_TRANSFER_DMA_MAP; usb_get_urb(urb); atomic_inc(&urb->use_count); atomic_inc(&urb->dev->urbnum); @@ -2230,9 +2230,15 @@ int ehset_single_step_set_feature(struct usb_hcd *hcd, int port) /* Complete remaining DATA and STATUS stages using the same URB */ urb->status = -EINPROGRESS; + urb->transfer_flags &= ~URB_NO_TRANSFER_DMA_MAP; usb_get_urb(urb); atomic_inc(&urb->use_count); atomic_inc(&urb->dev->urbnum); + if (map_urb_for_dma(hcd, urb, GFP_KERNEL)) { + usb_put_urb(urb); + goto out1; + } + retval = hcd->driver->submit_single_step_set_feature(hcd, urb, 0); if (!retval && !wait_for_completion_timeout(&done, msecs_to_jiffies(2000))) { From 6ca8af3c8fb584f3424a827f554ff74f898c27cd Mon Sep 17 00:00:00 2001 From: Mael GUERIN Date: Wed, 6 Aug 2025 18:44:03 +0200 Subject: [PATCH 1940/2411] USB: storage: Add unusual-devs entry for Novatek NTK96550-based camera Add the US_FL_BULK_IGNORE_TAG quirk for Novatek NTK96550-based camera to fix USB resets after sending SCSI vendor commands due to CBW and CSW tags difference, leading to undesired slowness while communicating with the device. Please find below the copy of /sys/kernel/debug/usb/devices with my device plugged in (listed as TechSys USB mass storage here, the underlying chipset being the Novatek NTK96550-based camera): T: Bus=03 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=0603 ProdID=8611 Rev= 0.01 S: Manufacturer=TechSys S: Product=USB Mass Storage S: SerialNumber=966110000000100 C:* #Ifs= 1 Cfg#= 1 Atr=c0 MxPwr=100mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Mael GUERIN Cc: stable Acked-by: Alan Stern Link: https://lore.kernel.org/r/20250806164406.43450-1-mael.guerin@murena.io Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 54f0b1c83317..bee9f1e8003d 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -934,6 +934,13 @@ UNUSUAL_DEV( 0x05e3, 0x0723, 0x9451, 0x9451, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_SANE_SENSE ), +/* Added by Maël GUERIN */ +UNUSUAL_DEV( 0x0603, 0x8611, 0x0000, 0xffff, + "Novatek", + "NTK96550-based camera", + USB_SC_SCSI, USB_PR_BULK, NULL, + US_FL_BULK_IGNORE_TAG ), + /* * Reported by Hanno Boeck * Taken from the Lycoris Kernel From 58577118cc7cec9eb7c1836bf88f865ff2c5e3a3 Mon Sep 17 00:00:00 2001 From: Kuen-Han Tsai Date: Thu, 7 Aug 2025 17:06:55 +0800 Subject: [PATCH 1941/2411] usb: dwc3: Ignore late xferNotReady event to prevent halt timeout During a device-initiated disconnect, the End Transfer command resets the event filter, allowing a new xferNotReady event to be generated before the controller is fully halted. Processing this late event incorrectly triggers a Start Transfer, which prevents the controller from halting and results in a DSTS.DEVCTLHLT bit polling timeout. Ignore the late xferNotReady event if the controller is already in a disconnected state. Fixes: 72246da40f37 ("usb: Introduce DesignWare USB3 DRD Driver") Cc: stable Signed-off-by: Kuen-Han Tsai Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250807090700.2397190-1-khtsai@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 25db36c63951..68fa2813e5f4 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3777,6 +3777,15 @@ static void dwc3_gadget_endpoint_transfer_complete(struct dwc3_ep *dep, static void dwc3_gadget_endpoint_transfer_not_ready(struct dwc3_ep *dep, const struct dwc3_event_depevt *event) { + /* + * During a device-initiated disconnect, a late xferNotReady event can + * be generated after the End Transfer command resets the event filter, + * but before the controller is halted. Ignore it to prevent a new + * transfer from starting. + */ + if (!dep->dwc->connected) + return; + dwc3_gadget_endpoint_frame_from_event(dep, event); /* From 86f390ba59cd8d5755bafe2b163c3e6b89d6bbd9 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 12 Aug 2025 16:11:00 +0300 Subject: [PATCH 1942/2411] usb: dwc3: pci: add support for the Intel Wildcat Lake This patch adds the necessary PCI ID for Intel Wildcat Lake devices. Signed-off-by: Heikki Krogerus Cc: stable Link: https://lore.kernel.org/r/20250812131101.2930199-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 54a4ee2b90b7..39c72cb52ce7 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -41,6 +41,7 @@ #define PCI_DEVICE_ID_INTEL_TGPLP 0xa0ee #define PCI_DEVICE_ID_INTEL_TGPH 0x43ee #define PCI_DEVICE_ID_INTEL_JSP 0x4dee +#define PCI_DEVICE_ID_INTEL_WCL 0x4d7e #define PCI_DEVICE_ID_INTEL_ADL 0x460e #define PCI_DEVICE_ID_INTEL_ADL_PCH 0x51ee #define PCI_DEVICE_ID_INTEL_ADLN 0x465e @@ -431,6 +432,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, TGPLP, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, TGPH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, JSP, &dwc3_pci_intel_swnode) }, + { PCI_DEVICE_DATA(INTEL, WCL, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, ADL, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, ADL_PCH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, ADLN, &dwc3_pci_intel_swnode) }, From 9528d32873b38281ae105f2f5799e79ae9d086c2 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 11 Aug 2025 10:27:45 +0200 Subject: [PATCH 1943/2411] kcov, usb: Don't disable interrupts in kcov_remote_start_usb_softirq() kcov_remote_start_usb_softirq() the begin of urb's completion callback. HCDs marked HCD_BH will invoke this function from the softirq and in_serving_softirq() will detect this properly. Root-HUB (RH) requests will not be delayed to softirq but complete immediately in IRQ context. This will confuse kcov because in_serving_softirq() will report true if the softirq is served after the hardirq and if the softirq got interrupted by the hardirq in which currently runs. This was addressed by simply disabling interrupts in kcov_remote_start_usb_softirq() which avoided the interruption by the RH while a regular completion callback was invoked. This not only changes the behaviour while kconv is enabled but also breaks PREEMPT_RT because now sleeping locks can no longer be acquired. Revert the previous fix. Address the issue by invoking kcov_remote_start_usb() only if the context is just "serving softirqs" which is identified by checking in_serving_softirq() and in_hardirq() must be false. Fixes: f85d39dd7ed89 ("kcov, usb: disable interrupts in kcov_remote_start_usb_softirq") Cc: stable Reported-by: Yunseong Kim Closes: https://lore.kernel.org/all/20250725201400.1078395-2-ysk@kzalloc.com/ Tested-by: Yunseong Kim Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250811082745.ycJqBXMs@linutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 12 +++++------ include/linux/kcov.h | 47 ++++++++---------------------------------- 2 files changed, 14 insertions(+), 45 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index c4a1875b5d3d..6270fbb5c699 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1636,7 +1636,6 @@ static void __usb_hcd_giveback_urb(struct urb *urb) struct usb_hcd *hcd = bus_to_hcd(urb->dev->bus); struct usb_anchor *anchor = urb->anchor; int status = urb->unlinked; - unsigned long flags; urb->hcpriv = NULL; if (unlikely((urb->transfer_flags & URB_SHORT_NOT_OK) && @@ -1654,14 +1653,13 @@ static void __usb_hcd_giveback_urb(struct urb *urb) /* pass ownership to the completion handler */ urb->status = status; /* - * Only collect coverage in the softirq context and disable interrupts - * to avoid scenarios with nested remote coverage collection sections - * that KCOV does not support. - * See the comment next to kcov_remote_start_usb_softirq() for details. + * This function can be called in task context inside another remote + * coverage collection section, but kcov doesn't support that kind of + * recursion yet. Only collect coverage in softirq context for now. */ - flags = kcov_remote_start_usb_softirq((u64)urb->dev->bus->busnum); + kcov_remote_start_usb_softirq((u64)urb->dev->bus->busnum); urb->complete(urb); - kcov_remote_stop_softirq(flags); + kcov_remote_stop_softirq(); usb_anchor_resume_wakeups(anchor); atomic_dec(&urb->use_count); diff --git a/include/linux/kcov.h b/include/linux/kcov.h index 75a2fb8b16c3..0143358874b0 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -57,47 +57,21 @@ static inline void kcov_remote_start_usb(u64 id) /* * The softirq flavor of kcov_remote_*() functions is introduced as a temporary - * workaround for KCOV's lack of nested remote coverage sections support. - * - * Adding support is tracked in https://bugzilla.kernel.org/show_bug.cgi?id=210337. - * - * kcov_remote_start_usb_softirq(): - * - * 1. Only collects coverage when called in the softirq context. This allows - * avoiding nested remote coverage collection sections in the task context. - * For example, USB/IP calls usb_hcd_giveback_urb() in the task context - * within an existing remote coverage collection section. Thus, KCOV should - * not attempt to start collecting coverage within the coverage collection - * section in __usb_hcd_giveback_urb() in this case. - * - * 2. Disables interrupts for the duration of the coverage collection section. - * This allows avoiding nested remote coverage collection sections in the - * softirq context (a softirq might occur during the execution of a work in - * the BH workqueue, which runs with in_serving_softirq() > 0). - * For example, usb_giveback_urb_bh() runs in the BH workqueue with - * interrupts enabled, so __usb_hcd_giveback_urb() might be interrupted in - * the middle of its remote coverage collection section, and the interrupt - * handler might invoke __usb_hcd_giveback_urb() again. + * work around for kcov's lack of nested remote coverage sections support in + * task context. Adding support for nested sections is tracked in: + * https://bugzilla.kernel.org/show_bug.cgi?id=210337 */ -static inline unsigned long kcov_remote_start_usb_softirq(u64 id) +static inline void kcov_remote_start_usb_softirq(u64 id) { - unsigned long flags = 0; - - if (in_serving_softirq()) { - local_irq_save(flags); + if (in_serving_softirq() && !in_hardirq()) kcov_remote_start_usb(id); - } - - return flags; } -static inline void kcov_remote_stop_softirq(unsigned long flags) +static inline void kcov_remote_stop_softirq(void) { - if (in_serving_softirq()) { + if (in_serving_softirq() && !in_hardirq()) kcov_remote_stop(); - local_irq_restore(flags); - } } #ifdef CONFIG_64BIT @@ -131,11 +105,8 @@ static inline u64 kcov_common_handle(void) } static inline void kcov_remote_start_common(u64 id) {} static inline void kcov_remote_start_usb(u64 id) {} -static inline unsigned long kcov_remote_start_usb_softirq(u64 id) -{ - return 0; -} -static inline void kcov_remote_stop_softirq(unsigned long flags) {} +static inline void kcov_remote_start_usb_softirq(u64 id) {} +static inline void kcov_remote_stop_softirq(void) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ From 421255afa2a58eee2109dda56c137a7b61c4b05f Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 11 Aug 2025 18:08:33 +0800 Subject: [PATCH 1944/2411] usb: chipidea: imx: improve usbmisc_imx7d_pullup() When add workaround for ERR051725, the usbmisc will put PHY to Non-driving mode (OPMODE = 01) after stopping the device controller and put PHY back to Normal mode (OPMODE = 00) after starting the device controller. However, this will bring issue for host controller. Because the PHY may stay in Non-driving mode after switching the role from device to host. Then the port will not work if USB device is attached. To fix this issue, improving the workaround by putting PHY to Non-driving mode for a certain period and back to Normal mode finally. To make host detect a disconnect signal, the period should be at least 125us (a micro-frame time) for high-speed link. And only working as high-speed mode will need workaround for ERR051725. So this will also filter the pullup event for high-speed. Fixes: 11992b410083 ("usb: chipidea: imx: implement workaround for ERR051725") Reviewed-by: Jun Li Signed-off-by: Xu Yang Acked-by: Peter Chen Link: https://lore.kernel.org/r/20250811100833.862876-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci_hdrc_imx.c | 3 ++- drivers/usb/chipidea/usbmisc_imx.c | 23 ++++++++++++++++------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index e1ec9b38f5b9..d7c2a1a3c271 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -338,7 +338,8 @@ static int ci_hdrc_imx_notify_event(struct ci_hdrc *ci, unsigned int event) schedule_work(&ci->usb_phy->chg_work); break; case CI_HDRC_CONTROLLER_PULLUP_EVENT: - if (ci->role == CI_ROLE_GADGET) + if (ci->role == CI_ROLE_GADGET && + ci->gadget.speed == USB_SPEED_HIGH) imx_usbmisc_pullup(data->usbmisc_data, ci->gadget.connected); break; diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c index 3d20c5e76c6a..b1418885707c 100644 --- a/drivers/usb/chipidea/usbmisc_imx.c +++ b/drivers/usb/chipidea/usbmisc_imx.c @@ -1068,15 +1068,24 @@ static void usbmisc_imx7d_pullup(struct imx_usbmisc_data *data, bool on) unsigned long flags; u32 val; + if (on) + return; + spin_lock_irqsave(&usbmisc->lock, flags); val = readl(usbmisc->base + MX7D_USBNC_USB_CTRL2); - if (!on) { - val &= ~MX7D_USBNC_USB_CTRL2_OPMODE_OVERRIDE_MASK; - val |= MX7D_USBNC_USB_CTRL2_OPMODE(1); - val |= MX7D_USBNC_USB_CTRL2_OPMODE_OVERRIDE_EN; - } else { - val &= ~MX7D_USBNC_USB_CTRL2_OPMODE_OVERRIDE_EN; - } + val &= ~MX7D_USBNC_USB_CTRL2_OPMODE_OVERRIDE_MASK; + val |= MX7D_USBNC_USB_CTRL2_OPMODE(1); + val |= MX7D_USBNC_USB_CTRL2_OPMODE_OVERRIDE_EN; + writel(val, usbmisc->base + MX7D_USBNC_USB_CTRL2); + spin_unlock_irqrestore(&usbmisc->lock, flags); + + /* Last for at least 1 micro-frame to let host see disconnect signal */ + usleep_range(125, 150); + + spin_lock_irqsave(&usbmisc->lock, flags); + val &= ~MX7D_USBNC_USB_CTRL2_OPMODE_OVERRIDE_MASK; + val |= MX7D_USBNC_USB_CTRL2_OPMODE(0); + val &= ~MX7D_USBNC_USB_CTRL2_OPMODE_OVERRIDE_EN; writel(val, usbmisc->base + MX7D_USBNC_USB_CTRL2); spin_unlock_irqrestore(&usbmisc->lock, flags); } From 98da66a70ad2396e5a508c4245367797ebc052ce Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 13 Aug 2025 16:52:49 +0200 Subject: [PATCH 1945/2411] usb: storage: realtek_cr: Use correct byte order for bcs->Residue Since 'bcs->Residue' has the data type '__le32', convert it to the correct byte order of the CPU using this driver when assigning it to the local variable 'residue'. Cc: stable Fixes: 50a6cb932d5c ("USB: usb_storage: add ums-realtek driver") Suggested-by: Alan Stern Acked-by: Alan Stern Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20250813145247.184717-3-thorsten.blum@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/realtek_cr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/storage/realtek_cr.c b/drivers/usb/storage/realtek_cr.c index 7dea28c2b8ee..cb5bbb19060e 100644 --- a/drivers/usb/storage/realtek_cr.c +++ b/drivers/usb/storage/realtek_cr.c @@ -252,7 +252,7 @@ static int rts51x_bulk_transport(struct us_data *us, u8 lun, return USB_STOR_TRANSPORT_ERROR; } - residue = bcs->Residue; + residue = le32_to_cpu(bcs->Residue); if (bcs->Tag != us->tag) return USB_STOR_TRANSPORT_ERROR; From a5ba9ad417254c49ecf06ac5ab36ec4b12ee133f Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 4 Aug 2025 19:13:11 +0200 Subject: [PATCH 1946/2411] rust: faux: fix C header link Starting with Rust 1.91.0 (expected 2025-10-30), `rustdoc` has improved some false negatives around intra-doc links [1], and it found a broken intra-doc link we currently have: error: unresolved link to `include/linux/device/faux.h` --> rust/kernel/faux.rs:7:17 | 7 | //! C header: [`include/linux/device/faux.h`] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ no item named `include/linux/device/faux.h` in scope | = help: to escape `[` and `]` characters, add '\' before them like `\[` or `\]` = note: `-D rustdoc::broken-intra-doc-links` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(rustdoc::broken_intra_doc_links)]` Our `srctree/` C header links are not intra-doc links, thus they need the link destination. Thus fix it. Cc: stable Link: https://github.com/rust-lang/rust/pull/132748 [1] Fixes: 78418f300d39 ("rust/kernel: Add faux device bindings") Signed-off-by: Miguel Ojeda Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250804171311.1186538-1-ojeda@kernel.org Signed-off-by: Greg Kroah-Hartman --- rust/kernel/faux.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/faux.rs b/rust/kernel/faux.rs index 7a906099993f..7fe2dd197e37 100644 --- a/rust/kernel/faux.rs +++ b/rust/kernel/faux.rs @@ -4,7 +4,7 @@ //! //! This module provides bindings for working with faux devices in kernel modules. //! -//! C header: [`include/linux/device/faux.h`] +//! C header: [`include/linux/device/faux.h`](srctree/include/linux/device/faux.h) use crate::{bindings, device, error::code::*, prelude::*}; use core::ptr::{addr_of_mut, null, null_mut, NonNull}; From 23cbfd6fed78715459a4395c034c4e76b8c85320 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 13 Aug 2025 17:36:27 +0200 Subject: [PATCH 1947/2411] ALSA: azt3328: Put __maybe_unused for inline functions for gameport Some inline functions are unused depending on kconfig, and the recent change for clang builds made those handled as errors with W=1. For avoiding pitfalls, mark those with __maybe_unused attributes. Link: https://patch.msgid.link/20250813153628.12303-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/azt3328.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/pci/azt3328.c b/sound/pci/azt3328.c index 4418b9ae33e6..b33344f65b8c 100644 --- a/sound/pci/azt3328.c +++ b/sound/pci/azt3328.c @@ -412,25 +412,25 @@ snd_azf3328_ctrl_outl(const struct snd_azf3328 *chip, unsigned reg, u32 value) outl(value, chip->ctrl_io + reg); } -static inline void +static inline void __maybe_unused snd_azf3328_game_outb(const struct snd_azf3328 *chip, unsigned reg, u8 value) { outb(value, chip->game_io + reg); } -static inline void +static inline void __maybe_unused snd_azf3328_game_outw(const struct snd_azf3328 *chip, unsigned reg, u16 value) { outw(value, chip->game_io + reg); } -static inline u8 +static inline u8 __maybe_unused snd_azf3328_game_inb(const struct snd_azf3328 *chip, unsigned reg) { return inb(chip->game_io + reg); } -static inline u16 +static inline u16 __maybe_unused snd_azf3328_game_inw(const struct snd_azf3328 *chip, unsigned reg) { return inw(chip->game_io + reg); From ee8f1613596ad44c7cff4805d65a8a705998db11 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Wed, 13 Aug 2025 09:03:08 -0500 Subject: [PATCH 1948/2411] Revert "ALSA: hda: Add ASRock X670E Taichi to denylist" On a motherboard with an AMD Granite Ridge CPU there is a report that 3.5mm microphone and headphones aren't working. In the log it's observed: snd_hda_intel 0000:02:00.6: Skipping the device on the denylist This was because of commit df42ee7e22f03 ("ALSA: hda: Add ASRock X670E Taichi to denylist"). Reverting this commit allows the microphone and headphones to work again. As at least some combinations of this motherboard do have applicable devices, revert so that they can be probed. Cc: Richard Gong Cc: Juan Martinez Signed-off-by: Mario Limonciello (AMD) Link: https://patch.msgid.link/20250813140427.1577172-1-superm1@kernel.org Signed-off-by: Takashi Iwai --- sound/hda/controllers/intel.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/hda/controllers/intel.c b/sound/hda/controllers/intel.c index fcf67e97a546..1bb3ff55b115 100644 --- a/sound/hda/controllers/intel.c +++ b/sound/hda/controllers/intel.c @@ -2077,7 +2077,6 @@ static const struct pci_device_id driver_denylist[] = { { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1043, 0x874f) }, /* ASUS ROG Zenith II / Strix */ { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb59) }, /* MSI TRX40 Creator */ { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb60) }, /* MSI TRX40 */ - { PCI_DEVICE_SUB(0x1022, 0x15e3, 0x1022, 0xd601) }, /* ASRock X670E Taichi */ {} }; From 7d34ec36abb84fdfb6632a0f2cbda90379ae21fc Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 11 Aug 2025 23:14:55 -0500 Subject: [PATCH 1949/2411] smb3: fix for slab out of bounds on mount to ksmbd With KASAN enabled, it is possible to get a slab out of bounds during mount to ksmbd due to missing check in parse_server_interfaces() (see below): BUG: KASAN: slab-out-of-bounds in parse_server_interfaces+0x14ee/0x1880 [cifs] Read of size 4 at addr ffff8881433dba98 by task mount/9827 CPU: 5 UID: 0 PID: 9827 Comm: mount Tainted: G OE 6.16.0-rc2-kasan #2 PREEMPT(voluntary) Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: Dell Inc. Precision Tower 3620/0MWYPT, BIOS 2.13.1 06/14/2019 Call Trace: dump_stack_lvl+0x9f/0xf0 print_report+0xd1/0x670 __virt_addr_valid+0x22c/0x430 ? parse_server_interfaces+0x14ee/0x1880 [cifs] ? kasan_complete_mode_report_info+0x2a/0x1f0 ? parse_server_interfaces+0x14ee/0x1880 [cifs] kasan_report+0xd6/0x110 parse_server_interfaces+0x14ee/0x1880 [cifs] __asan_report_load_n_noabort+0x13/0x20 parse_server_interfaces+0x14ee/0x1880 [cifs] ? __pfx_parse_server_interfaces+0x10/0x10 [cifs] ? trace_hardirqs_on+0x51/0x60 SMB3_request_interfaces+0x1ad/0x3f0 [cifs] ? __pfx_SMB3_request_interfaces+0x10/0x10 [cifs] ? SMB2_tcon+0x23c/0x15d0 [cifs] smb3_qfs_tcon+0x173/0x2b0 [cifs] ? __pfx_smb3_qfs_tcon+0x10/0x10 [cifs] ? cifs_get_tcon+0x105d/0x2120 [cifs] ? do_raw_spin_unlock+0x5d/0x200 ? cifs_get_tcon+0x105d/0x2120 [cifs] ? __pfx_smb3_qfs_tcon+0x10/0x10 [cifs] cifs_mount_get_tcon+0x369/0xb90 [cifs] ? dfs_cache_find+0xe7/0x150 [cifs] dfs_mount_share+0x985/0x2970 [cifs] ? check_path.constprop.0+0x28/0x50 ? save_trace+0x54/0x370 ? __pfx_dfs_mount_share+0x10/0x10 [cifs] ? __lock_acquire+0xb82/0x2ba0 ? __kasan_check_write+0x18/0x20 cifs_mount+0xbc/0x9e0 [cifs] ? __pfx_cifs_mount+0x10/0x10 [cifs] ? do_raw_spin_unlock+0x5d/0x200 ? cifs_setup_cifs_sb+0x29d/0x810 [cifs] cifs_smb3_do_mount+0x263/0x1990 [cifs] Reported-by: Namjae Jeon Tested-by: Namjae Jeon Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index ad8947434b71..218b6ce7ff3a 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -772,6 +772,13 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, bytes_left -= sizeof(*p); break; } + /* Validate that Next doesn't point beyond the buffer */ + if (next > bytes_left) { + cifs_dbg(VFS, "%s: invalid Next pointer %zu > %zd\n", + __func__, next, bytes_left); + rc = -EINVAL; + goto out; + } p = (struct network_interface_info_ioctl_rsp *)((u8 *)p+next); bytes_left -= next; } @@ -783,7 +790,9 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, } /* Azure rounds the buffer size up 8, to a 16 byte boundary */ - if ((bytes_left > 8) || p->Next) + if ((bytes_left > 8) || + (bytes_left >= offsetof(struct network_interface_info_ioctl_rsp, Next) + + sizeof(p->Next) && p->Next)) cifs_dbg(VFS, "%s: incomplete interface info\n", __func__); ses->iface_last_update = jiffies; From e3835731e169a48a2c73018d135b5c08c39ea61d Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Mon, 11 Aug 2025 22:07:37 +0800 Subject: [PATCH 1950/2411] smb: client: fix mid_q_entry memleak leak with per-mid locking This is step 4/4 of a patch series to fix mid_q_entry memory leaks caused by race conditions in callback execution. In compound_send_recv(), when wait_for_response() is interrupted by signals, the code attempts to cancel pending requests by changing their callbacks to cifs_cancelled_callback. However, there's a race condition between signal interruption and network response processing that causes both mid_q_entry and server buffer leaks: ``` User foreground process cifsd cifs_readdir open_cached_dir cifs_send_recv compound_send_recv smb2_setup_request smb2_mid_entry_alloc smb2_get_mid_entry smb2_mid_entry_alloc mempool_alloc // alloc mid kref_init(&temp->refcount); // refcount = 1 mid[0]->callback = cifs_compound_callback; mid[1]->callback = cifs_compound_last_callback; smb_send_rqst rc = wait_for_response wait_event_state TASK_KILLABLE cifs_demultiplex_thread allocate_buffers server->bigbuf = cifs_buf_get() standard_receive3 ->find_mid() smb2_find_mid __smb2_find_mid kref_get(&mid->refcount) // +1 cifs_handle_standard handle_mid /* bigbuf will also leak */ mid->resp_buf = server->bigbuf server->bigbuf = NULL; dequeue_mid /* in for loop */ mids[0]->callback cifs_compound_callback /* Signal interrupts wait: rc = -ERESTARTSYS */ /* if (... || midQ[i]->mid_state == MID_RESPONSE_RECEIVED) *? midQ[0]->callback = cifs_cancelled_callback; cancelled_mid[i] = true; /* The change comes too late */ mid->mid_state = MID_RESPONSE_READY release_mid // -1 /* cancelled_mid[i] == true causes mid won't be released in compound_send_recv cleanup */ /* cifs_cancelled_callback won't executed to release mid */ ``` The root cause is that there's a race between callback assignment and execution. Fix this by introducing per-mid locking: - Add spinlock_t mid_lock to struct mid_q_entry - Add mid_execute_callback() for atomic callback execution - Use mid_lock in cancellation paths to ensure atomicity This ensures that either the original callback or the cancellation callback executes atomically, preventing reference count leaks when requests are interrupted by signals. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220404 Fixes: ee258d79159a ("CIFS: Move credit processing to mid callbacks for SMB3") Signed-off-by: Wang Zhaolong Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 21 +++++++++++++++++++++ fs/smb/client/cifstransport.c | 19 +++++++++---------- fs/smb/client/connect.c | 8 ++++---- fs/smb/client/smb2ops.c | 4 ++-- fs/smb/client/smb2transport.c | 1 + fs/smb/client/transport.c | 7 +++---- 6 files changed, 40 insertions(+), 20 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index e6830ab3a546..1e64a4fb6af0 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1732,6 +1732,7 @@ struct mid_q_entry { int mid_rc; /* rc for MID_RC */ __le16 command; /* smb command code */ unsigned int optype; /* operation type */ + spinlock_t mid_lock; bool wait_cancelled:1; /* Cancelled while waiting for response */ bool deleted_from_q:1; /* Whether Mid has been dequeued frem pending_mid_q */ bool large_buf:1; /* if valid response, is pointer to large buf */ @@ -2036,6 +2037,9 @@ require use of the stronger protocol */ * cifsFileInfo->file_info_lock cifsFileInfo->count cifs_new_fileinfo * ->invalidHandle initiate_cifs_search * ->oplock_break_cancelled + * mid_q_entry->mid_lock mid_q_entry->callback alloc_mid + * smb2_mid_entry_alloc + * (Any fields of mid_q_entry that will need protection) ****************************************************************************/ #ifdef DECLARE_GLOBALS_HERE @@ -2375,6 +2379,23 @@ static inline bool cifs_netbios_name(const char *name, size_t namelen) return ret; } +/* + * Execute mid callback atomically - ensures callback runs exactly once + * and prevents sleeping in atomic context. + */ +static inline void mid_execute_callback(struct mid_q_entry *mid) +{ + void (*callback)(struct mid_q_entry *mid); + + spin_lock(&mid->mid_lock); + callback = mid->callback; + mid->callback = NULL; /* Mark as executed, */ + spin_unlock(&mid->mid_lock); + + if (callback) + callback(mid); +} + #define CIFS_REPARSE_SUPPORT(tcon) \ ((tcon)->posix_extensions || \ (le32_to_cpu((tcon)->fsAttrInfo.Attributes) & \ diff --git a/fs/smb/client/cifstransport.c b/fs/smb/client/cifstransport.c index 352dafb888dd..e98b95eff8c9 100644 --- a/fs/smb/client/cifstransport.c +++ b/fs/smb/client/cifstransport.c @@ -46,6 +46,7 @@ alloc_mid(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); memset(temp, 0, sizeof(struct mid_q_entry)); kref_init(&temp->refcount); + spin_lock_init(&temp->mid_lock); temp->mid = get_mid(smb_buffer); temp->pid = current->pid; temp->command = cpu_to_le16(smb_buffer->Command); @@ -345,16 +346,15 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, rc = wait_for_response(server, midQ); if (rc != 0) { send_cancel(server, &rqst, midQ); - spin_lock(&server->mid_queue_lock); - if (midQ->mid_state == MID_REQUEST_SUBMITTED || - midQ->mid_state == MID_RESPONSE_RECEIVED) { + spin_lock(&midQ->mid_lock); + if (midQ->callback) { /* no longer considered to be "in-flight" */ midQ->callback = release_mid; - spin_unlock(&server->mid_queue_lock); + spin_unlock(&midQ->mid_lock); add_credits(server, &credits, 0); return rc; } - spin_unlock(&server->mid_queue_lock); + spin_unlock(&midQ->mid_lock); } rc = cifs_sync_mid_result(midQ, server); @@ -527,15 +527,14 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, rc = wait_for_response(server, midQ); if (rc) { send_cancel(server, &rqst, midQ); - spin_lock(&server->mid_queue_lock); - if (midQ->mid_state == MID_REQUEST_SUBMITTED || - midQ->mid_state == MID_RESPONSE_RECEIVED) { + spin_lock(&midQ->mid_lock); + if (midQ->callback) { /* no longer considered to be "in-flight" */ midQ->callback = release_mid; - spin_unlock(&server->mid_queue_lock); + spin_unlock(&midQ->mid_lock); return rc; } - spin_unlock(&server->mid_queue_lock); + spin_unlock(&midQ->mid_lock); } /* We got the response - restart system call. */ diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 587845a2452d..281ccbeea719 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -335,7 +335,7 @@ cifs_abort_connection(struct TCP_Server_Info *server) cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__); list_for_each_entry_safe(mid, nmid, &retry_list, qhead) { list_del_init(&mid->qhead); - mid->callback(mid); + mid_execute_callback(mid); release_mid(mid); } @@ -919,7 +919,7 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type) list_del_init(&mid->qhead); mid->mid_rc = mid_rc; mid->mid_state = MID_RC; - mid->callback(mid); + mid_execute_callback(mid); release_mid(mid); } @@ -1117,7 +1117,7 @@ clean_demultiplex_info(struct TCP_Server_Info *server) mid_entry = list_entry(tmp, struct mid_q_entry, qhead); cifs_dbg(FYI, "Callback mid %llu\n", mid_entry->mid); list_del_init(&mid_entry->qhead); - mid_entry->callback(mid_entry); + mid_execute_callback(mid_entry); release_mid(mid_entry); } /* 1/8th of sec is more than enough time for them to exit */ @@ -1394,7 +1394,7 @@ cifs_demultiplex_thread(void *p) } if (!mids[i]->multiRsp || mids[i]->multiEnd) - mids[i]->callback(mids[i]); + mid_execute_callback(mids[i]); release_mid(mids[i]); } else if (server->ops->is_oplock_break && diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 218b6ce7ff3a..3b251de874ec 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4814,7 +4814,7 @@ static void smb2_decrypt_offload(struct work_struct *work) dw->server->ops->is_network_name_deleted(dw->buf, dw->server); - mid->callback(mid); + mid_execute_callback(mid); } else { spin_lock(&dw->server->srv_lock); if (dw->server->tcpStatus == CifsNeedReconnect) { @@ -4822,7 +4822,7 @@ static void smb2_decrypt_offload(struct work_struct *work) mid->mid_state = MID_RETRY_NEEDED; spin_unlock(&dw->server->mid_queue_lock); spin_unlock(&dw->server->srv_lock); - mid->callback(mid); + mid_execute_callback(mid); } else { spin_lock(&dw->server->mid_queue_lock); mid->mid_state = MID_REQUEST_SUBMITTED; diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index ff9ef7fcd010..bc0e92eb2b64 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -771,6 +771,7 @@ smb2_mid_entry_alloc(const struct smb2_hdr *shdr, temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); memset(temp, 0, sizeof(struct mid_q_entry)); kref_init(&temp->refcount); + spin_lock_init(&temp->mid_lock); temp->mid = le64_to_cpu(shdr->MessageId); temp->credits = credits > 0 ? credits : 1; temp->pid = current->pid; diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 32d528b4dd83..a61ba7f3fb86 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1005,15 +1005,14 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n", midQ[i]->mid, le16_to_cpu(midQ[i]->command)); send_cancel(server, &rqst[i], midQ[i]); - spin_lock(&server->mid_queue_lock); + spin_lock(&midQ[i]->mid_lock); midQ[i]->wait_cancelled = true; - if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED || - midQ[i]->mid_state == MID_RESPONSE_RECEIVED) { + if (midQ[i]->callback) { midQ[i]->callback = cifs_cancelled_callback; cancelled_mid[i] = true; credits[i].value = 0; } - spin_unlock(&server->mid_queue_lock); + spin_unlock(&midQ[i]->mid_lock); } } From 8c48e1c7520321cc87ff651e96093e2f412785fb Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 12 Aug 2025 18:45:06 +0200 Subject: [PATCH 1951/2411] smb: client: don't wait for info->send_pending == 0 on error We already called ib_drain_qp() before and that makes sure send_done() was called with IB_WC_WR_FLUSH_ERR, but didn't called atomic_dec_and_test(&sc->send_io.pending.count) So we may never reach the info->send_pending == 0 condition. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: 5349ae5e05fa ("smb: client: let send_done() cleanup before calling smbd_disconnect_rdma_connection()") Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index c628e91c328b..02d6db431fd4 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1337,10 +1337,6 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "cancelling idle timer\n"); cancel_delayed_work_sync(&info->idle_timer_work); - log_rdma_event(INFO, "wait for all send posted to IB to finish\n"); - wait_event(info->wait_send_pending, - atomic_read(&info->send_pending) == 0); - /* It's not possible for upper layer to get to reassembly */ log_rdma_event(INFO, "drain the reassembly queue\n"); do { @@ -1986,7 +1982,11 @@ int smbd_send(struct TCP_Server_Info *server, */ wait_event(info->wait_send_pending, - atomic_read(&info->send_pending) == 0); + atomic_read(&info->send_pending) == 0 || + sc->status != SMBDIRECT_SOCKET_CONNECTED); + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED && rc == 0) + rc = -EAGAIN; return rc; } From e3f776d30a56286aaf882b96c92e797b7587a6ab Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 9 Aug 2025 09:17:46 -0500 Subject: [PATCH 1952/2411] cifs: update internal version number to 2.56 Signed-off-by: Steve French --- fs/smb/client/cifsfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index 487f39cff77e..3ce7c614ccc0 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -145,6 +145,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 55 -#define CIFS_VERSION "2.55" +#define SMB3_PRODUCT_BUILD 56 +#define CIFS_VERSION "2.56" #endif /* _CIFSFS_H */ From e19d8dd694d261ac26adb2a26121a37c107c81ad Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Fri, 1 Aug 2025 17:07:24 +0800 Subject: [PATCH 1953/2411] smb: client: remove redundant lstrp update in negotiate protocol Commit 34331d7beed7 ("smb: client: fix first command failure during re-negotiation") addressed a race condition by updating lstrp before entering negotiate state. However, this approach may have some unintended side effects. The lstrp field is documented as "when we got last response from this server", and updating it before actually receiving a server response could potentially affect other mechanisms that rely on this timestamp. For example, the SMB echo detection logic also uses lstrp as a reference point. In scenarios with frequent user operations during reconnect states, the repeated calls to cifs_negotiate_protocol() might continuously update lstrp, which could interfere with the echo detection timing. Additionally, commit 266b5d02e14f ("smb: client: fix race condition in negotiate timeout by using more precise timing") introduced a dedicated neg_start field specifically for tracking negotiate start time. This provides a more precise solution for the original race condition while preserving the intended semantics of lstrp. Since the race condition is now properly handled by the neg_start mechanism, the lstrp update in cifs_negotiate_protocol() is no longer necessary and can be safely removed. Fixes: 266b5d02e14f ("smb: client: fix race condition in negotiate timeout by using more precise timing") Cc: stable@vger.kernel.org Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: Wang Zhaolong Signed-off-by: Steve French --- fs/smb/client/connect.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 281ccbeea719..dd12f3eb61dc 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -4205,7 +4205,6 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses, return 0; } - server->lstrp = jiffies; server->tcpStatus = CifsInNegotiate; server->neg_start = jiffies; spin_unlock(&server->srv_lock); From 0b3ccb76b95bd06cf80124d8adda647c82a6cc0f Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Wed, 9 Jul 2025 16:08:36 +0200 Subject: [PATCH 1954/2411] drm/msm/dsi: Fix 14nm DSI PHY PLL Lock issue To configure and enable the DSI PHY PLL clocks, the MDSS AHB clock must be active for MMIO operations. Typically, this AHB clock is enabled as part of the DSI PHY interface enabling (dsi_phy_enable_resource). However, since these PLL clocks are registered as clock entities, they can be enabled independently of the DSI PHY interface, leading to enabling failures and subsequent warnings: ``` msm_dsi_phy 5e94400.phy: [drm:dsi_pll_14nm_vco_prepare] *ERROR* DSI PLL lock failed ------------[ cut here ]------------ dsi0pllbyte already disabled WARNING: CPU: 3 PID: 1 at drivers/clk/clk.c:1194 clk_core_disable+0xa4/0xac CPU: 3 UID: 0 PID: 1 Comm: swapper/0 Tainted: Tainted: [W]=WARN Hardware name: Qualcomm Technologies, Inc. Robotics RB1 (DT) pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [...] ``` This issue is particularly prevalent at boot time during the disabling of unused clocks (clk_disable_unused()) which includes enabling the parent clock(s) when CLK_OPS_PARENT_ENABLE flag is set (this is the case for the 14nm DSI PHY PLL consumers). To resolve this issue, we move the AHB clock as a PM dependency of the DSI PHY device (via pm_clk). Since the DSI PHY device is the parent of the PLL clocks, this resolves the PLL/AHB dependency. Now the AHB clock is enabled prior the PLL clk_prepare callback, as part of the runtime-resume chain. We also eliminate dsi_phy_[enable|disable]_resource functions, which are superseded by runtime PM. Note that it breaks compatibility with kernels before 6.0, as we do not support anymore the legacy `iface_clk` name. Signed-off-by: Loic Poulain Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/663239/ Link: https://lore.kernel.org/r/20250709140836.124143-1-loic.poulain@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 59 ++++++++------------------- drivers/gpu/drm/msm/dsi/phy/dsi_phy.h | 1 - 2 files changed, 18 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 221f12db5f8b..4ea681130dba 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -5,6 +5,8 @@ #include #include +#include +#include #include #include "dsi_phy.h" @@ -511,30 +513,6 @@ int msm_dsi_cphy_timing_calc_v4(struct msm_dsi_dphy_timing *timing, return 0; } -static int dsi_phy_enable_resource(struct msm_dsi_phy *phy) -{ - struct device *dev = &phy->pdev->dev; - int ret; - - ret = pm_runtime_resume_and_get(dev); - if (ret) - return ret; - - ret = clk_prepare_enable(phy->ahb_clk); - if (ret) { - DRM_DEV_ERROR(dev, "%s: can't enable ahb clk, %d\n", __func__, ret); - pm_runtime_put_sync(dev); - } - - return ret; -} - -static void dsi_phy_disable_resource(struct msm_dsi_phy *phy) -{ - clk_disable_unprepare(phy->ahb_clk); - pm_runtime_put(&phy->pdev->dev); -} - static const struct of_device_id dsi_phy_dt_match[] = { #ifdef CONFIG_DRM_MSM_DSI_28NM_PHY { .compatible = "qcom,dsi-phy-28nm-hpm", @@ -698,22 +676,20 @@ static int dsi_phy_driver_probe(struct platform_device *pdev) if (ret) return ret; - phy->ahb_clk = msm_clk_get(pdev, "iface"); - if (IS_ERR(phy->ahb_clk)) - return dev_err_probe(dev, PTR_ERR(phy->ahb_clk), - "Unable to get ahb clk\n"); + platform_set_drvdata(pdev, phy); - ret = devm_pm_runtime_enable(&pdev->dev); + ret = devm_pm_runtime_enable(dev); if (ret) return ret; - /* PLL init will call into clk_register which requires - * register access, so we need to enable power and ahb clock. - */ - ret = dsi_phy_enable_resource(phy); + ret = devm_pm_clk_create(dev); if (ret) return ret; + ret = pm_clk_add(dev, "iface"); + if (ret < 0) + return dev_err_probe(dev, ret, "Unable to get iface clk\n"); + if (phy->cfg->ops.pll_init) { ret = phy->cfg->ops.pll_init(phy); if (ret) @@ -727,18 +703,19 @@ static int dsi_phy_driver_probe(struct platform_device *pdev) return dev_err_probe(dev, ret, "Failed to register clk provider\n"); - dsi_phy_disable_resource(phy); - - platform_set_drvdata(pdev, phy); - return 0; } +static const struct dev_pm_ops dsi_phy_pm_ops = { + SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL) +}; + static struct platform_driver dsi_phy_platform_driver = { .probe = dsi_phy_driver_probe, .driver = { .name = "msm_dsi_phy", .of_match_table = dsi_phy_dt_match, + .pm = &dsi_phy_pm_ops, }, }; @@ -764,9 +741,9 @@ int msm_dsi_phy_enable(struct msm_dsi_phy *phy, dev = &phy->pdev->dev; - ret = dsi_phy_enable_resource(phy); + ret = pm_runtime_resume_and_get(dev); if (ret) { - DRM_DEV_ERROR(dev, "%s: resource enable failed, %d\n", + DRM_DEV_ERROR(dev, "%s: resume failed, %d\n", __func__, ret); goto res_en_fail; } @@ -810,7 +787,7 @@ int msm_dsi_phy_enable(struct msm_dsi_phy *phy, phy_en_fail: regulator_bulk_disable(phy->cfg->num_regulators, phy->supplies); reg_en_fail: - dsi_phy_disable_resource(phy); + pm_runtime_put(dev); res_en_fail: return ret; } @@ -823,7 +800,7 @@ void msm_dsi_phy_disable(struct msm_dsi_phy *phy) phy->cfg->ops.disable(phy); regulator_bulk_disable(phy->cfg->num_regulators, phy->supplies); - dsi_phy_disable_resource(phy); + pm_runtime_put(&phy->pdev->dev); } void msm_dsi_phy_set_usecase(struct msm_dsi_phy *phy, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index c558f8df1684..3cbf08231492 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -104,7 +104,6 @@ struct msm_dsi_phy { phys_addr_t lane_size; int id; - struct clk *ahb_clk; struct regulator_bulk_data *supplies; struct msm_dsi_dphy_timing timing; From 553666f839b86545300773954df7426a45c169c4 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 15 Jul 2025 18:50:37 +0300 Subject: [PATCH 1955/2411] drm/msm/kms: move snapshot init earlier in KMS init Various parts of the display driver can be triggering the display snapshot (including the IOMMU fault handlers). Move the call to msm_disp_snapshot_init() before KMS initialization, otherwise it is possible to ocassionally trigger the kernel fault during init: __lock_acquire+0x44/0x2798 (P) lock_acquire+0x114/0x25c _raw_spin_lock_irqsave+0x6c/0x90 kthread_queue_work+0x2c/0xac msm_disp_snapshot_state+0x2c/0x4c msm_kms_fault_handler+0x2c/0x74 msm_disp_fault_handler+0x30/0x48 report_iommu_fault+0x54/0x128 arm_smmu_context_fault+0x74/0x184 __handle_irq_event_percpu+0xa4/0x24c handle_irq_event_percpu+0x20/0x5c handle_irq_event+0x48/0x84 handle_fasteoi_irq+0xcc/0x170 generic_handle_domain_irq+0x48/0x70 gic_handle_irq+0x54/0x11c call_on_irq_stack+0x3c/0x50 do_interrupt_handler+0x54/0x78 el1_interrupt+0x3c/0x5c el1h_64_irq_handler+0x20/0x30 el1h_64_irq+0x6c/0x70 _raw_spin_unlock_irqrestore+0x44/0x68 (P) klist_next+0xc4/0x124 bus_for_each_drv+0x9c/0xe8 __device_attach+0xfc/0x190 device_initial_probe+0x1c/0x2c bus_probe_device+0x44/0xa0 device_add+0x204/0x3e4 platform_device_add+0x170/0x244 platform_device_register_full+0x130/0x138 drm_connector_hdmi_audio_init+0xc0/0x108 drm_bridge_connector_init+0x318/0x394 msm_dsi_manager_connector_init+0xac/0xdc msm_dsi_modeset_init+0x78/0xc0 _dpu_kms_drm_obj_init+0x198/0x75c dpu_kms_hw_init+0x2f8/0x494 msm_drm_kms_init+0xb0/0x230 msm_drm_init+0x218/0x250 msm_drm_bind+0x3c/0x4c try_to_bring_up_aggregate_device+0x208/0x2a4 __component_add+0xa8/0x188 component_add+0x1c/0x2c dsi_dev_attach+0x24/0x34 dsi_host_attach+0x68/0xa0 devm_mipi_dsi_attach+0x40/0xcc lt9611_attach_dsi+0x94/0x118 lt9611_probe+0x368/0x3c8 i2c_device_probe+0x2d0/0x3d8 really_probe+0x130/0x354 __driver_probe_device+0xac/0x110 driver_probe_device+0x44/0x110 __device_attach_driver+0xb0/0x138 bus_for_each_drv+0x90/0xe8 __device_attach+0xfc/0x190 device_initial_probe+0x1c/0x2c bus_probe_device+0x44/0xa0 deferred_probe_work_func+0xac/0x110 process_one_work+0x20c/0x51c process_scheduled_works+0x58/0x88 worker_thread+0x1ec/0x304 kthread+0x194/0x1d4 ret_from_fork+0x10/0x20 Reported-by: Konrad Dybcio Fixes: 98659487b845 ("drm/msm: add support to take dpu snapshot") Signed-off-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/664149/ Link: https://lore.kernel.org/r/20250715-msm-move-snapshot-init-v1-1-f39c396192ab@oss.qualcomm.com --- drivers/gpu/drm/msm/msm_kms.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c index 6889f1c1e721..56828d218e88 100644 --- a/drivers/gpu/drm/msm/msm_kms.c +++ b/drivers/gpu/drm/msm/msm_kms.c @@ -275,6 +275,12 @@ int msm_drm_kms_init(struct device *dev, const struct drm_driver *drv) if (ret) return ret; + ret = msm_disp_snapshot_init(ddev); + if (ret) { + DRM_DEV_ERROR(dev, "msm_disp_snapshot_init failed ret = %d\n", ret); + return ret; + } + ret = priv->kms_init(ddev); if (ret) { DRM_DEV_ERROR(dev, "failed to load kms\n"); @@ -327,10 +333,6 @@ int msm_drm_kms_init(struct device *dev, const struct drm_driver *drv) goto err_msm_uninit; } - ret = msm_disp_snapshot_init(ddev); - if (ret) - DRM_DEV_ERROR(dev, "msm_disp_snapshot_init failed ret = %d\n", ret); - drm_mode_config_reset(ddev); return 0; From 1a76b255eceb9c570c6228f6393e1d63d97a22ba Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 15 Jul 2025 20:28:18 +0300 Subject: [PATCH 1956/2411] drm/msm/dpu: correct dpu_plane_virtual_atomic_check() Fix c&p error in dpu_plane_virtual_atomic_check(), compare CRTC width too, in addition to CRTC height. Fixes: 8c62a31607f6 ("drm/msm/dpu: allow using two SSPP blocks for a single plane") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507150432.U0cALR6W-lkp@intel.com/ Signed-off-by: Dmitry Baryshkov Reviewed-by: Jessica Zhang Reviewed-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/664170/ Link: https://lore.kernel.org/r/20250715-msm-fix-virt-atomic-check-v1-1-9bab02c9f952@oss.qualcomm.com --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 01171c535a27..c722f54e71b0 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -1162,7 +1162,7 @@ static int dpu_plane_virtual_atomic_check(struct drm_plane *plane, if (!old_plane_state || !old_plane_state->fb || old_plane_state->src_w != plane_state->src_w || old_plane_state->src_h != plane_state->src_h || - old_plane_state->src_w != plane_state->src_w || + old_plane_state->crtc_w != plane_state->crtc_w || old_plane_state->crtc_h != plane_state->crtc_h || msm_framebuffer_format(old_plane_state->fb) != msm_framebuffer_format(plane_state->fb)) From 494045c561e68945b1183ff416b8db8e37a122d6 Mon Sep 17 00:00:00 2001 From: Ayushi Makhija Date: Wed, 30 Jul 2025 18:09:38 +0530 Subject: [PATCH 1957/2411] drm/msm: update the high bitfield of certain DSI registers Currently, the high bitfield of certain DSI registers do not align with the configuration of the SWI registers description. This can lead to wrong programming these DSI registers, for example for 4k resloution where H_TOTAL is taking 13 bits but software is programming only 12 bits because of the incorrect bitmask for H_TOTAL bitfeild, this is causing DSI FIFO errors. To resolve this issue, increase the high bitfield of the DSI registers from 12 bits to 16 bits in dsi.xml to match the SWI register configuration. Signed-off-by: Ayushi Makhija Fixes: 4f52f5e63b62 ("drm/msm: import XML display registers database") Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/666229/ Link: https://lore.kernel.org/r/20250730123938.1038640-1-quic_amakhija@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/registers/display/dsi.xml | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/msm/registers/display/dsi.xml b/drivers/gpu/drm/msm/registers/display/dsi.xml index 501ffc585a9f..c7a7b633d747 100644 --- a/drivers/gpu/drm/msm/registers/display/dsi.xml +++ b/drivers/gpu/drm/msm/registers/display/dsi.xml @@ -159,28 +159,28 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> - - + + - - + + - - + + - - + + - - + + - - + + @@ -209,8 +209,8 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> - - + + From daab47925c06a04792ca720d8438abd37775e357 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 15 Jul 2025 16:27:35 -0700 Subject: [PATCH 1958/2411] drm/msm/dpu: Initialize crtc_state to NULL in dpu_plane_virtual_atomic_check() After a recent change in clang to expose uninitialized warnings from const variables and pointers [1], there is a warning around crtc_state in dpu_plane_virtual_atomic_check(): drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c:1145:6: error: variable 'crtc_state' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized] 1145 | if (plane_state->crtc) | ^~~~~~~~~~~~~~~~~ drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c:1149:58: note: uninitialized use occurs here 1149 | ret = dpu_plane_atomic_check_nosspp(plane, plane_state, crtc_state); | ^~~~~~~~~~ drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c:1145:2: note: remove the 'if' if its condition is always true 1145 | if (plane_state->crtc) | ^~~~~~~~~~~~~~~~~~~~~~ 1146 | crtc_state = drm_atomic_get_new_crtc_state(state, drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c:1139:35: note: initialize the variable 'crtc_state' to silence this warning 1139 | struct drm_crtc_state *crtc_state; | ^ | = NULL Initialize crtc_state to NULL like other places in the driver do, so that it is consistently initialized. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/2106 Fixes: 774bcfb73176 ("drm/msm/dpu: add support for virtual planes") Link: https://github.com/llvm/llvm-project/commit/2464313eef01c5b1edf0eccf57a32cdee01472c7 [1] Signed-off-by: Nathan Chancellor Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index c722f54e71b0..6859e8ef6b05 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -1129,7 +1129,7 @@ static int dpu_plane_virtual_atomic_check(struct drm_plane *plane, struct drm_plane_state *old_plane_state = drm_atomic_get_old_plane_state(state, plane); struct dpu_plane_state *pstate = to_dpu_plane_state(plane_state); - struct drm_crtc_state *crtc_state; + struct drm_crtc_state *crtc_state = NULL; int ret; if (IS_ERR(plane_state)) From 197713d0cf018e7d58a63a83cc43035b56678a50 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 23 Jul 2025 16:33:43 +0300 Subject: [PATCH 1959/2411] soc: qcom: ubwc: provide no-UBWC configuration After the commit 45a2974157d2 ("drm/msm: Use the central UBWC config database") the MDSS driver errors out if UBWC database didn't provide it with the UBWC configuration. Make UBWC database return zero data for MSM8916 / APQ8016, MSM8974 / APQ8074, MSM8226 and MSM8939. Fixes: 1924272b9ce1 ("soc: qcom: Add UBWC config provider") Acked-by: Bjorn Andersson Reviewed-by: Rob Clark Reviewed-by: Konrad Dybcio Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/665313/ Link: https://lore.kernel.org/r/20250723-ubwc-no-ubwc-v3-1-81bdb75685bf@oss.qualcomm.com --- drivers/soc/qcom/ubwc_config.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/soc/qcom/ubwc_config.c b/drivers/soc/qcom/ubwc_config.c index bd0a98aad9f3..9002fc9373ce 100644 --- a/drivers/soc/qcom/ubwc_config.c +++ b/drivers/soc/qcom/ubwc_config.c @@ -12,6 +12,10 @@ #include +static const struct qcom_ubwc_cfg_data no_ubwc_data = { + /* no UBWC, no HBB */ +}; + static const struct qcom_ubwc_cfg_data msm8937_data = { .ubwc_enc_version = UBWC_1_0, .ubwc_dec_version = UBWC_1_0, @@ -215,11 +219,18 @@ static const struct qcom_ubwc_cfg_data x1e80100_data = { }; static const struct of_device_id qcom_ubwc_configs[] __maybe_unused = { + { .compatible = "qcom,apq8016", .data = &no_ubwc_data }, + { .compatible = "qcom,apq8026", .data = &no_ubwc_data }, + { .compatible = "qcom,apq8074", .data = &no_ubwc_data }, { .compatible = "qcom,apq8096", .data = &msm8998_data }, + { .compatible = "qcom,msm8226", .data = &no_ubwc_data }, + { .compatible = "qcom,msm8916", .data = &no_ubwc_data }, { .compatible = "qcom,msm8917", .data = &msm8937_data }, { .compatible = "qcom,msm8937", .data = &msm8937_data }, + { .compatible = "qcom,msm8939", .data = &no_ubwc_data }, { .compatible = "qcom,msm8953", .data = &msm8937_data }, { .compatible = "qcom,msm8956", .data = &msm8937_data }, + { .compatible = "qcom,msm8974", .data = &no_ubwc_data }, { .compatible = "qcom,msm8976", .data = &msm8937_data }, { .compatible = "qcom,msm8996", .data = &msm8998_data }, { .compatible = "qcom,msm8998", .data = &msm8998_data }, From 0b6974bb4134ca6396752a0b122026b41300592f Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Wed, 23 Jul 2025 16:19:22 +0200 Subject: [PATCH 1960/2411] soc: qcom: ubwc: Add missing UBWC config for SM7225 SM7225 is a variation of SM6350, and also needs an entry in the table. Fixes: 1924272b9ce1 ("soc: qcom: Add UBWC config provider") Signed-off-by: Luca Weiss Reviewed-by: Dmitry Baryshkov Acked-by: Bjorn Andersson Signed-off-by: Dmitry Baryshkov --- drivers/soc/qcom/ubwc_config.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/qcom/ubwc_config.c b/drivers/soc/qcom/ubwc_config.c index 9002fc9373ce..1490a7f63767 100644 --- a/drivers/soc/qcom/ubwc_config.c +++ b/drivers/soc/qcom/ubwc_config.c @@ -257,6 +257,7 @@ static const struct of_device_id qcom_ubwc_configs[] __maybe_unused = { { .compatible = "qcom,sm6375", .data = &sm6350_data, }, { .compatible = "qcom,sm7125", .data = &sc7180_data }, { .compatible = "qcom,sm7150", .data = &sm7150_data, }, + { .compatible = "qcom,sm7225", .data = &sm6350_data, }, { .compatible = "qcom,sm8150", .data = &sm8150_data, }, { .compatible = "qcom,sm8250", .data = &sm8250_data, }, { .compatible = "qcom,sm8350", .data = &sm8350_data, }, From 757fc66da91b54d4fbc414bee5c440b52560d3b7 Mon Sep 17 00:00:00 2001 From: Baolin Liu Date: Tue, 12 Aug 2025 10:17:09 +0800 Subject: [PATCH 1961/2411] ext4: fix incorrect function name in comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 6b730a405037 “ext4: hoist ext4_block_write_begin and replace the __block_write_begin”, the comment should be updated accordingly from "__block_write_begin" to "ext4_block_write_begin". Fixes: 6b730a405037 (“ext4: hoist ext4_block_write_begin and replace...") Signed-off-by: Baolin Liu Reviewed-by: Darrick J. Wong Link: https://patch.msgid.link/20250812021709.1120716-1-liubaolin12138@163.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 731a800d9c14..238a0f12a5c0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3159,7 +3159,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, folio_unlock(folio); folio_put(folio); /* - * block_write_begin may have instantiated a few blocks + * ext4_block_write_begin may have instantiated a few blocks * outside i_size. Trim these off again. Don't need * i_size_read because we hold inode lock. */ From 9d98cf4632258720f18265a058e62fde120c0151 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Tue, 12 Aug 2025 14:37:52 +0800 Subject: [PATCH 1962/2411] jbd2: prevent softlockup in jbd2_log_do_checkpoint() Both jbd2_log_do_checkpoint() and jbd2_journal_shrink_checkpoint_list() periodically release j_list_lock after processing a batch of buffers to avoid long hold times on the j_list_lock. However, since both functions contend for j_list_lock, the combined time spent waiting and processing can be significant. jbd2_journal_shrink_checkpoint_list() explicitly calls cond_resched() when need_resched() is true to avoid softlockups during prolonged operations. But jbd2_log_do_checkpoint() only exits its loop when need_resched() is true, relying on potentially sleeping functions like __flush_batch() or wait_on_buffer() to trigger rescheduling. If those functions do not sleep, the kernel may hit a softlockup. watchdog: BUG: soft lockup - CPU#3 stuck for 156s! [kworker/u129:2:373] CPU: 3 PID: 373 Comm: kworker/u129:2 Kdump: loaded Not tainted 6.6.0+ #10 Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.27 06/13/2017 Workqueue: writeback wb_workfn (flush-7:2) pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : native_queued_spin_lock_slowpath+0x358/0x418 lr : jbd2_log_do_checkpoint+0x31c/0x438 [jbd2] Call trace: native_queued_spin_lock_slowpath+0x358/0x418 jbd2_log_do_checkpoint+0x31c/0x438 [jbd2] __jbd2_log_wait_for_space+0xfc/0x2f8 [jbd2] add_transaction_credits+0x3bc/0x418 [jbd2] start_this_handle+0xf8/0x560 [jbd2] jbd2__journal_start+0x118/0x228 [jbd2] __ext4_journal_start_sb+0x110/0x188 [ext4] ext4_do_writepages+0x3dc/0x740 [ext4] ext4_writepages+0xa4/0x190 [ext4] do_writepages+0x94/0x228 __writeback_single_inode+0x48/0x318 writeback_sb_inodes+0x204/0x590 __writeback_inodes_wb+0x54/0xf8 wb_writeback+0x2cc/0x3d8 wb_do_writeback+0x2e0/0x2f8 wb_workfn+0x80/0x2a8 process_one_work+0x178/0x3e8 worker_thread+0x234/0x3b8 kthread+0xf0/0x108 ret_from_fork+0x10/0x20 So explicitly call cond_resched() in jbd2_log_do_checkpoint() to avoid softlockup. Cc: stable@kernel.org Signed-off-by: Baokun Li Link: https://patch.msgid.link/20250812063752.912130-1-libaokun@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/jbd2/checkpoint.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index b3971e91e8eb..38861ca04899 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -285,6 +285,7 @@ int jbd2_log_do_checkpoint(journal_t *journal) retry: if (batch_count) __flush_batch(journal, &batch_count); + cond_resched(); spin_lock(&journal->j_list_lock); goto restart; } From 0c6b24d70da21201ed009a2aca740d2dfddc7ab5 Mon Sep 17 00:00:00 2001 From: Jason-JH Lin Date: Mon, 28 Jul 2025 10:48:50 +0800 Subject: [PATCH 1963/2411] drm/mediatek: Add error handling for old state CRTC in atomic_disable Introduce error handling to address an issue where, after a hotplug event, the cursor continues to update. This situation can lead to a kernel panic due to accessing the NULL `old_state->crtc`. E,g. Unable to handle kernel NULL pointer dereference at virtual address Call trace: mtk_crtc_plane_disable+0x24/0x140 mtk_plane_atomic_update+0x8c/0xa8 drm_atomic_helper_commit_planes+0x114/0x2c8 drm_atomic_helper_commit_tail_rpm+0x4c/0x158 commit_tail+0xa0/0x168 drm_atomic_helper_commit+0x110/0x120 drm_atomic_commit+0x8c/0xe0 drm_atomic_helper_update_plane+0xd4/0x128 __setplane_atomic+0xcc/0x110 drm_mode_cursor_common+0x250/0x440 drm_mode_cursor_ioctl+0x44/0x70 drm_ioctl+0x264/0x5d8 __arm64_sys_ioctl+0xd8/0x510 invoke_syscall+0x6c/0xe0 do_el0_svc+0x68/0xe8 el0_svc+0x34/0x60 el0t_64_sync_handler+0x1c/0xf8 el0t_64_sync+0x180/0x188 Adding NULL pointer checks to ensure stability by preventing operations on an invalid CRTC state. Fixes: d208261e9f7c ("drm/mediatek: Add wait_event_timeout when disabling plane") Signed-off-by: Jason-JH Lin Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/linux-mediatek/patch/20250728025036.24953-1-jason-jh.lin@mediatek.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_plane.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_plane.c b/drivers/gpu/drm/mediatek/mtk_plane.c index cbc4f37da8ba..02349bd44001 100644 --- a/drivers/gpu/drm/mediatek/mtk_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_plane.c @@ -292,7 +292,8 @@ static void mtk_plane_atomic_disable(struct drm_plane *plane, wmb(); /* Make sure the above parameter is set before update */ mtk_plane_state->pending.dirty = true; - mtk_crtc_plane_disable(old_state->crtc, plane); + if (old_state && old_state->crtc) + mtk_crtc_plane_disable(old_state->crtc, plane); } static void mtk_plane_atomic_update(struct drm_plane *plane, From 75a7b151e808355a1fdf972e85da137612b8f2ae Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 12 Aug 2025 15:09:06 +0200 Subject: [PATCH 1964/2411] rust: devres: fix leaking call to devm_add_action() When the data argument of Devres::new() is Err(), we leak the preceding call to devm_add_action(). In order to fix this, call devm_add_action() in a unit type initializer in try_pin_init!() after the initializers of all other fields. Fixes: f5d3ef25d238 ("rust: devres: get rid of Devres' inner Arc") Reviewed-by: Alice Ryhl Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250812130928.11075-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/devres.rs | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index da18091143a6..d04e3fcebafb 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -115,10 +115,11 @@ pub struct Devres { /// Contains all the fields shared with [`Self::callback`]. // TODO: Replace with `UnsafePinned`, once available. // - // Subsequently, the `drop_in_place()` in `Devres::drop` and the explicit `Send` and `Sync' - // impls can be removed. + // Subsequently, the `drop_in_place()` in `Devres::drop` and `Devres::new` as well as the + // explicit `Send` and `Sync' impls can be removed. #[pin] inner: Opaque>, + _add_action: (), } impl Devres { @@ -140,7 +141,15 @@ pub fn new<'a, E>( dev: dev.into(), callback, // INVARIANT: `inner` is properly initialized. - inner <- { + inner <- Opaque::pin_init(try_pin_init!(Inner { + devm <- Completion::new(), + revoke <- Completion::new(), + data <- Revocable::new(data), + })), + // TODO: Replace with "initializer code blocks" [1] once available. + // + // [1] https://github.com/Rust-for-Linux/pin-init/pull/69 + _add_action: { // SAFETY: `this` is a valid pointer to uninitialized memory. let inner = unsafe { &raw mut (*this.as_ptr()).inner }; @@ -152,13 +161,13 @@ pub fn new<'a, E>( // live at least as long as the returned `impl PinInit`. to_result(unsafe { bindings::devm_add_action(dev.as_raw(), Some(callback), inner.cast()) - })?; + }).inspect_err(|_| { + let inner = Opaque::cast_into(inner); - Opaque::pin_init(try_pin_init!(Inner { - devm <- Completion::new(), - revoke <- Completion::new(), - data <- Revocable::new(data), - })) + // SAFETY: `inner` is a valid pointer to an `Inner` and valid for both reads + // and writes. + unsafe { core::ptr::drop_in_place(inner) }; + })?; }, }) } From a58893aa173923fdc49c2d35d638d8133065e952 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Tue, 12 Aug 2025 13:08:58 +0800 Subject: [PATCH 1965/2411] net: mctp: Fix bad kfree_skb in bind lookup test The kunit test's skb_pkt is consumed by mctp_dst_input() so shouldn't be freed separately. Fixes: e6d8e7dbc5a3 ("net: mctp: Add bind lookup test") Reported-by: Alexandre Ghiti Closes: https://lore.kernel.org/all/734b02a3-1941-49df-a0da-ec14310d41e4@ghiti.fr/ Signed-off-by: Matt Johnston Tested-by: Alexandre Ghiti Link: https://patch.msgid.link/20250812-fix-mctp-bind-test-v1-1-5e2128664eb3@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- net/mctp/test/route-test.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index fb6b46a952cb..69a3ccfc6310 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -1586,7 +1586,6 @@ static void mctp_test_bind_lookup(struct kunit *test) cleanup: kfree_skb(skb_sock); - kfree_skb(skb_pkt); /* Drop all binds */ for (size_t i = 0; i < ARRAY_SIZE(lookup_binds); i++) From b2cafefaf0473bafb0c3502a8530167d35e06113 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 12 Aug 2025 16:21:26 +0000 Subject: [PATCH 1966/2411] netdevsim: Fix wild pointer access in nsim_queue_free(). syzbot reported the splat below. [0] When nsim_queue_uninit() is called from nsim_init_netdevsim(), register_netdevice() has not been called, thus dev->dstats has not been allocated. Let's not call dev_dstats_rx_dropped_add() in such a case. [0] BUG: unable to handle page fault for address: ffff88809782c020 PF: supervisor write access in kernel mode PF: error_code(0x0002) - not-present page PGD 1b401067 P4D 1b401067 PUD 0 Oops: Oops: 0002 [#1] SMP KASAN NOPTI CPU: 3 UID: 0 PID: 8476 Comm: syz.1.251 Not tainted 6.16.0-syzkaller-06699-ge8d780dcd957 #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:local_add arch/x86/include/asm/local.h:33 [inline] RIP: 0010:u64_stats_add include/linux/u64_stats_sync.h:89 [inline] RIP: 0010:dev_dstats_rx_dropped_add include/linux/netdevice.h:3027 [inline] RIP: 0010:nsim_queue_free+0xba/0x120 drivers/net/netdevsim/netdev.c:714 Code: 07 77 6c 4a 8d 3c ed 20 7e f1 8d 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 80 3c 02 00 75 46 4a 03 1c ed 20 7e f1 8d <4c> 01 63 20 be 00 02 00 00 48 8d 3d 00 00 00 00 e8 61 2f 58 fa 48 RSP: 0018:ffffc900044af150 EFLAGS: 00010286 RAX: dffffc0000000000 RBX: ffff88809782c000 RCX: 00000000000079c3 RDX: 1ffffffff1be2fc7 RSI: ffffffff8c15f380 RDI: ffffffff8df17e38 RBP: ffff88805f59d000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000000 R13: 0000000000000003 R14: ffff88806ceb3d00 R15: ffffed100dfd308e FS: 0000000000000000(0000) GS:ffff88809782c000(0063) knlGS:00000000f505db40 CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 CR2: ffff88809782c020 CR3: 000000006fc6a000 CR4: 0000000000352ef0 Call Trace: nsim_queue_uninit drivers/net/netdevsim/netdev.c:993 [inline] nsim_init_netdevsim drivers/net/netdevsim/netdev.c:1049 [inline] nsim_create+0xd0a/0x1260 drivers/net/netdevsim/netdev.c:1101 __nsim_dev_port_add+0x435/0x7d0 drivers/net/netdevsim/dev.c:1438 nsim_dev_port_add_all drivers/net/netdevsim/dev.c:1494 [inline] nsim_dev_reload_create drivers/net/netdevsim/dev.c:1546 [inline] nsim_dev_reload_up+0x5b8/0x860 drivers/net/netdevsim/dev.c:1003 devlink_reload+0x322/0x7c0 net/devlink/dev.c:474 devlink_nl_reload_doit+0xe31/0x1410 net/devlink/dev.c:584 genl_family_rcv_msg_doit+0x206/0x2f0 net/netlink/genetlink.c:1115 genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] genl_rcv_msg+0x55c/0x800 net/netlink/genetlink.c:1210 netlink_rcv_skb+0x155/0x420 net/netlink/af_netlink.c:2552 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219 netlink_unicast_kernel net/netlink/af_netlink.c:1320 [inline] netlink_unicast+0x5aa/0x870 net/netlink/af_netlink.c:1346 netlink_sendmsg+0x8d1/0xdd0 net/netlink/af_netlink.c:1896 sock_sendmsg_nosec net/socket.c:714 [inline] __sock_sendmsg net/socket.c:729 [inline] ____sys_sendmsg+0xa95/0xc70 net/socket.c:2614 ___sys_sendmsg+0x134/0x1d0 net/socket.c:2668 __sys_sendmsg+0x16d/0x220 net/socket.c:2700 do_syscall_32_irqs_on arch/x86/entry/syscall_32.c:83 [inline] __do_fast_syscall_32+0x7c/0x3a0 arch/x86/entry/syscall_32.c:306 do_fast_syscall_32+0x32/0x80 arch/x86/entry/syscall_32.c:331 entry_SYSENTER_compat_after_hwframe+0x84/0x8e RIP: 0023:0xf708e579 Code: b8 01 10 06 03 74 b4 01 10 07 03 74 b0 01 10 08 03 74 d8 01 00 00 00 00 00 00 00 00 00 00 00 00 00 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d b4 26 00 00 00 00 8d b4 26 00 00 00 00 RSP: 002b:00000000f505d55c EFLAGS: 00000296 ORIG_RAX: 0000000000000172 RAX: ffffffffffffffda RBX: 0000000000000007 RCX: 0000000080000080 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000296 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Modules linked in: CR2: ffff88809782c020 Fixes: 2a68a22304f9 ("netdevsim: account dropped packet length in stats on queue free") Reported-by: syzbot+8aa80c6232008f7b957d@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/688bb9ca.a00a0220.26d0e1.0050.GAE@google.com/ Suggested-by: Jakub Kicinski Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250812162130.4129322-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/netdev.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 39fe28af48b9..0178219f0db5 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -710,9 +710,13 @@ static struct nsim_rq *nsim_queue_alloc(void) static void nsim_queue_free(struct net_device *dev, struct nsim_rq *rq) { hrtimer_cancel(&rq->napi_timer); - local_bh_disable(); - dev_dstats_rx_dropped_add(dev, rq->skb_queue.qlen); - local_bh_enable(); + + if (rq->skb_queue.qlen) { + local_bh_disable(); + dev_dstats_rx_dropped_add(dev, rq->skb_queue.qlen); + local_bh_enable(); + } + skb_queue_purge_reason(&rq->skb_queue, SKB_DROP_REASON_QUEUE_PURGE); kfree(rq); } From 39f8fcda2088382a4aa70b258d6f7225aa386f11 Mon Sep 17 00:00:00 2001 From: David Wei Date: Tue, 12 Aug 2025 11:29:07 -0700 Subject: [PATCH 1967/2411] bnxt: fill data page pool with frags if PAGE_SIZE > BNXT_RX_PAGE_SIZE The data page pool always fills the HW rx ring with pages. On arm64 with 64K pages, this will waste _at least_ 32K of memory per entry in the rx ring. Fix by fragmenting the pages if PAGE_SIZE > BNXT_RX_PAGE_SIZE. This makes the data page pool the same as the header pool. Tested with iperf3 with a small (64 entries) rx ring to encourage buffer circulation. Fixes: cd1fafe7da1f ("eth: bnxt: add support rx side device memory TCP") Reviewed-by: Michael Chan Signed-off-by: David Wei Link: https://patch.msgid.link/20250812182907.1540755-1-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 76a4c5ae8000..2800a90fba1f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -926,15 +926,21 @@ static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, static netmem_ref __bnxt_alloc_rx_netmem(struct bnxt *bp, dma_addr_t *mapping, struct bnxt_rx_ring_info *rxr, + unsigned int *offset, gfp_t gfp) { netmem_ref netmem; - netmem = page_pool_alloc_netmems(rxr->page_pool, gfp); + if (PAGE_SIZE > BNXT_RX_PAGE_SIZE) { + netmem = page_pool_alloc_frag_netmem(rxr->page_pool, offset, BNXT_RX_PAGE_SIZE, gfp); + } else { + netmem = page_pool_alloc_netmems(rxr->page_pool, gfp); + *offset = 0; + } if (!netmem) return 0; - *mapping = page_pool_get_dma_addr_netmem(netmem); + *mapping = page_pool_get_dma_addr_netmem(netmem) + *offset; return netmem; } @@ -1029,7 +1035,7 @@ static int bnxt_alloc_rx_netmem(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, dma_addr_t mapping; netmem_ref netmem; - netmem = __bnxt_alloc_rx_netmem(bp, &mapping, rxr, gfp); + netmem = __bnxt_alloc_rx_netmem(bp, &mapping, rxr, &offset, gfp); if (!netmem) return -ENOMEM; From 87c6efc5ce9c126ae4a781bc04504b83780e3650 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 12 Aug 2025 18:40:29 +0200 Subject: [PATCH 1968/2411] net/sched: ets: use old 'nbands' while purging unused classes Shuang reported sch_ets test-case [1] crashing in ets_class_qlen_notify() after recent changes from Lion [2]. The problem is: in ets_qdisc_change() we purge unused DWRR queues; the value of 'q->nbands' is the new one, and the cleanup should be done with the old one. The problem is here since my first attempts to fix ets_qdisc_change(), but it surfaced again after the recent qdisc len accounting fixes. Fix it purging idle DWRR queues before assigning a new value of 'q->nbands', so that all purge operations find a consistent configuration: - old 'q->nbands' because it's needed by ets_class_find() - old 'q->nstrict' because it's needed by ets_class_is_strict() BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP NOPTI CPU: 62 UID: 0 PID: 39457 Comm: tc Kdump: loaded Not tainted 6.12.0-116.el10.x86_64 #1 PREEMPT(voluntary) Hardware name: Dell Inc. PowerEdge R640/06DKY5, BIOS 2.12.2 07/09/2021 RIP: 0010:__list_del_entry_valid_or_report+0x4/0x80 Code: ff 4c 39 c7 0f 84 39 19 8e ff b8 01 00 00 00 c3 cc cc cc cc 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa <48> 8b 17 48 8b 4f 08 48 85 d2 0f 84 56 19 8e ff 48 85 c9 0f 84 ab RSP: 0018:ffffba186009f400 EFLAGS: 00010202 RAX: 00000000000000d6 RBX: 0000000000000000 RCX: 0000000000000004 RDX: ffff9f0fa29b69c0 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffffffc12c2400 R08: 0000000000000008 R09: 0000000000000004 R10: ffffffffffffffff R11: 0000000000000004 R12: 0000000000000000 R13: ffff9f0f8cfe0000 R14: 0000000000100005 R15: 0000000000000000 FS: 00007f2154f37480(0000) GS:ffff9f269c1c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000001530be001 CR4: 00000000007726f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ets_class_qlen_notify+0x65/0x90 [sch_ets] qdisc_tree_reduce_backlog+0x74/0x110 ets_qdisc_change+0x630/0xa40 [sch_ets] __tc_modify_qdisc.constprop.0+0x216/0x7f0 tc_modify_qdisc+0x7c/0x120 rtnetlink_rcv_msg+0x145/0x3f0 netlink_rcv_skb+0x53/0x100 netlink_unicast+0x245/0x390 netlink_sendmsg+0x21b/0x470 ____sys_sendmsg+0x39d/0x3d0 ___sys_sendmsg+0x9a/0xe0 __sys_sendmsg+0x7a/0xd0 do_syscall_64+0x7d/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f2155114084 Code: 89 02 b8 ff ff ff ff eb bb 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 80 3d 25 f0 0c 00 00 74 13 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 48 83 ec 28 89 54 24 1c 48 89 RSP: 002b:00007fff1fd7a988 EFLAGS: 00000202 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000560ec063e5e0 RCX: 00007f2155114084 RDX: 0000000000000000 RSI: 00007fff1fd7a9f0 RDI: 0000000000000003 RBP: 00007fff1fd7aa60 R08: 0000000000000010 R09: 000000000000003f R10: 0000560ee9b3a010 R11: 0000000000000202 R12: 00007fff1fd7aae0 R13: 000000006891ccde R14: 0000560ec063e5e0 R15: 00007fff1fd7aad0 [1] https://lore.kernel.org/netdev/e08c7f4a6882f260011909a868311c6e9b54f3e4.1639153474.git.dcaratti@redhat.com/ [2] https://lore.kernel.org/netdev/d912cbd7-193b-4269-9857-525bee8bbb6a@gmail.com/ Cc: stable@vger.kernel.org Fixes: 103406b38c60 ("net/sched: Always pass notifications when child class becomes empty") Fixes: c062f2a0b04d ("net/sched: sch_ets: don't remove idle classes from the round-robin list") Fixes: dcc68b4d8084 ("net: sch_ets: Add a new Qdisc") Reported-by: Li Shuang Closes: https://issues.redhat.com/browse/RHEL-108026 Reviewed-by: Petr Machata Co-developed-by: Ivan Vecera Signed-off-by: Ivan Vecera Signed-off-by: Davide Caratti Link: https://patch.msgid.link/7928ff6d17db47a2ae7cc205c44777b1f1950545.1755016081.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- net/sched/sch_ets.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 037f764822b9..82635dd2cfa5 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -651,6 +651,12 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); + for (i = nbands; i < oldbands; i++) { + if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) + list_del_init(&q->classes[i].alist); + qdisc_purge_queue(q->classes[i].qdisc); + } + WRITE_ONCE(q->nbands, nbands); for (i = nstrict; i < q->nstrict; i++) { if (q->classes[i].qdisc->q.qlen) { @@ -658,11 +664,6 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, q->classes[i].deficit = quanta[i]; } } - for (i = q->nbands; i < oldbands; i++) { - if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) - list_del_init(&q->classes[i].alist); - qdisc_purge_queue(q->classes[i].qdisc); - } WRITE_ONCE(q->nstrict, nstrict); memcpy(q->prio2band, priomap, sizeof(priomap)); From 774a2ae6617b30a4dcc7ebaf178ef05da05b2a47 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 12 Aug 2025 18:40:30 +0200 Subject: [PATCH 1969/2411] selftests: net/forwarding: test purge of active DWRR classes Extend sch_ets.sh to add a reproducer for problematic list deletions when active DWRR class are purged by ets_qdisc_change() [1] [2]. [1] https://lore.kernel.org/netdev/e08c7f4a6882f260011909a868311c6e9b54f3e4.1639153474.git.dcaratti@redhat.com/ [2] https://lore.kernel.org/netdev/f3b9bacc73145f265c19ab80785933da5b7cbdec.1754581577.git.dcaratti@redhat.com/ Suggested-by: Victor Nogueira Signed-off-by: Davide Caratti Acked-by: Victor Nogueira Link: https://patch.msgid.link/489497cb781af7389011ca1591fb702a7391f5e7.1755016081.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/sch_ets.sh | 1 + tools/testing/selftests/net/forwarding/sch_ets_tests.sh | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh index 1f6f53e284b5..6269d5e23487 100755 --- a/tools/testing/selftests/net/forwarding/sch_ets.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets.sh @@ -11,6 +11,7 @@ ALL_TESTS=" ets_test_strict ets_test_mixed ets_test_dwrr + ets_test_plug classifier_mode ets_test_strict ets_test_mixed diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh index 08240d3e3c87..79d837a2868a 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh @@ -224,3 +224,11 @@ ets_test_dwrr() ets_set_dwrr_two_bands xfail_on_slow ets_dwrr_test_01 } + +ets_test_plug() +{ + ets_change_qdisc $put 2 "3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3" "1514 1514" + tc qdisc add dev $put handle 20: parent 10:4 plug + start_traffic_pktsize 100 $h1.10 192.0.2.1 192.0.2.2 00:c1:a0:c1:a0:00 "-c 1" + ets_qdisc_setup $put 2 +} From 52565a935213cd6a8662ddb8efe5b4219343a25d Mon Sep 17 00:00:00 2001 From: Sven Stegemann Date: Tue, 12 Aug 2025 21:18:03 +0200 Subject: [PATCH 1970/2411] net: kcm: Fix race condition in kcm_unattach() syzbot found a race condition when kcm_unattach(psock) and kcm_release(kcm) are executed at the same time. kcm_unattach() is missing a check of the flag kcm->tx_stopped before calling queue_work(). If the kcm has a reserved psock, kcm_unattach() might get executed between cancel_work_sync() and unreserve_psock() in kcm_release(), requeuing kcm->tx_work right before kcm gets freed in kcm_done(). Remove kcm->tx_stopped and replace it by the less error-prone disable_work_sync(). Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot+e62c9db591c30e174662@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e62c9db591c30e174662 Reported-by: syzbot+d199b52665b6c3069b94@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d199b52665b6c3069b94 Reported-by: syzbot+be6b1fdfeae512726b4e@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=be6b1fdfeae512726b4e Signed-off-by: Sven Stegemann Link: https://patch.msgid.link/20250812191810.27777-1-sven@stegemann.de Signed-off-by: Jakub Kicinski --- include/net/kcm.h | 1 - net/kcm/kcmsock.c | 10 ++-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/include/net/kcm.h b/include/net/kcm.h index 441e993be634..d9c35e71ecea 100644 --- a/include/net/kcm.h +++ b/include/net/kcm.h @@ -71,7 +71,6 @@ struct kcm_sock { struct list_head wait_psock_list; struct sk_buff *seq_skb; struct mutex tx_mutex; - u32 tx_stopped : 1; /* Don't use bit fields here, these are set under different locks */ bool tx_wait; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index a4971e6fa943..b4f01cb07561 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -430,7 +430,7 @@ static void psock_write_space(struct sock *sk) /* Check if the socket is reserved so someone is waiting for sending. */ kcm = psock->tx_kcm; - if (kcm && !unlikely(kcm->tx_stopped)) + if (kcm) queue_work(kcm_wq, &kcm->tx_work); spin_unlock_bh(&mux->lock); @@ -1693,12 +1693,6 @@ static int kcm_release(struct socket *sock) */ __skb_queue_purge(&sk->sk_write_queue); - /* Set tx_stopped. This is checked when psock is bound to a kcm and we - * get a writespace callback. This prevents further work being queued - * from the callback (unbinding the psock occurs after canceling work. - */ - kcm->tx_stopped = 1; - release_sock(sk); spin_lock_bh(&mux->lock); @@ -1714,7 +1708,7 @@ static int kcm_release(struct socket *sock) /* Cancel work. After this point there should be no outside references * to the kcm socket. */ - cancel_work_sync(&kcm->tx_work); + disable_work_sync(&kcm->tx_work); lock_sock(sk); psock = kcm->tx_psock; From e2f9ae91619add9884428d095c3c630b6b120a61 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 14 Aug 2025 11:38:58 +0900 Subject: [PATCH 1971/2411] MAINTAINERS: Remove bouncing kprobes maintainer The kprobes MAINTAINERS entry includes anil.s.keshavamurthy@intel.com. That address is bouncing. Remove it. This still leaves three other listed maintainers. Link: https://lore.kernel.org/all/20250808180124.7DDE2ECD@davehans-spike.ostc.intel.com/ Signed-off-by: Dave Hansen Cc: Naveen N Rao Cc: "David S. Miller" Cc: Masami Hiramatsu Cc: linux-trace-kernel@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fe168477caa4..a2bc2bb91970 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13686,7 +13686,6 @@ F: scripts/Makefile.kmsan KPROBES M: Naveen N Rao -M: Anil S Keshavamurthy M: "David S. Miller" M: Masami Hiramatsu L: linux-kernel@vger.kernel.org From 4faff70959d51078f9ee8372f8cff0d7045e4114 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 11 Aug 2025 17:29:31 +0800 Subject: [PATCH 1972/2411] net: usb: asix_devices: add phy_mask for ax88772 mdio bus Without setting phy_mask for ax88772 mdio bus, current driver may create at most 32 mdio phy devices with phy address range from 0x00 ~ 0x1f. DLink DUB-E100 H/W Ver B1 is such a device. However, only one main phy device will bind to net phy driver. This is creating issue during system suspend/resume since phy_polling_mode() in phy_state_machine() will directly deference member of phydev->drv for non-main phy devices. Then NULL pointer dereference issue will occur. Due to only external phy or internal phy is necessary, add phy_mask for ax88772 mdio bus to workarnoud the issue. Closes: https://lore.kernel.org/netdev/20250806082931.3289134-1-xu.yang_2@nxp.com Fixes: e532a096be0e ("net: usb: asix: ax88772: add phylib support") Cc: stable@vger.kernel.org Signed-off-by: Xu Yang Tested-by: Oleksij Rempel Reviewed-by: Oleksij Rempel Link: https://patch.msgid.link/20250811092931.860333-1-xu.yang_2@nxp.com Signed-off-by: Paolo Abeni --- drivers/net/usb/asix_devices.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 9b0318fb50b5..d9f5942ccc44 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -676,6 +676,7 @@ static int ax88772_init_mdio(struct usbnet *dev) priv->mdio->read = &asix_mdio_bus_read; priv->mdio->write = &asix_mdio_bus_write; priv->mdio->name = "Asix MDIO Bus"; + priv->mdio->phy_mask = ~(BIT(priv->phy_addr) | BIT(AX_EMBD_PHY_ADDR)); /* mii bus name is usb-- */ snprintf(priv->mdio->id, MII_BUS_ID_SIZE, "usb-%03d:%03d", dev->udev->bus->busnum, dev->udev->devnum); From d832ccbc301fbd9e5a1d691bdcf461cdb514595f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 14 Aug 2025 10:12:42 +0200 Subject: [PATCH 1973/2411] ALSA: usb-audio: Validate UAC3 power domain descriptors, too UAC3 power domain descriptors need to be verified with its variable bLength for avoiding the unexpected OOB accesses by malicious firmware, too. Fixes: 9a2fe9b801f5 ("ALSA: usb: initial USB Audio Device Class 3.0 support") Reported-and-tested-by: Youngjun Lee Cc: Link: https://patch.msgid.link/20250814081245.8902-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/validate.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/usb/validate.c b/sound/usb/validate.c index 6fe206f6e911..4f4e8e87a14c 100644 --- a/sound/usb/validate.c +++ b/sound/usb/validate.c @@ -221,6 +221,17 @@ static bool validate_uac3_feature_unit(const void *p, return d->bLength >= sizeof(*d) + 4 + 2; } +static bool validate_uac3_power_domain_unit(const void *p, + const struct usb_desc_validator *v) +{ + const struct uac3_power_domain_descriptor *d = p; + + if (d->bLength < sizeof(*d)) + return false; + /* baEntities[] + wPDomainDescrStr */ + return d->bLength >= sizeof(*d) + d->bNrEntities + 2; +} + static bool validate_midi_out_jack(const void *p, const struct usb_desc_validator *v) { @@ -285,6 +296,7 @@ static const struct usb_desc_validator audio_validators[] = { struct uac3_clock_multiplier_descriptor), /* UAC_VERSION_3, UAC3_SAMPLE_RATE_CONVERTER: not implemented yet */ /* UAC_VERSION_3, UAC3_CONNECTORS: not implemented yet */ + FUNC(UAC_VERSION_3, UAC3_POWER_DOMAIN, validate_uac3_power_domain_unit), { } /* terminator */ }; From ecfd41166b72b67d3bdeb88d224ff445f6163869 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 14 Aug 2025 10:12:43 +0200 Subject: [PATCH 1974/2411] ALSA: usb-audio: Validate UAC3 cluster segment descriptors UAC3 class segment descriptors need to be verified whether their sizes match with the declared lengths and whether they fit with the allocated buffer sizes, too. Otherwise malicious firmware may lead to the unexpected OOB accesses. Fixes: 11785ef53228 ("ALSA: usb-audio: Initial Power Domain support") Reported-and-tested-by: Youngjun Lee Cc: Link: https://patch.msgid.link/20250814081245.8902-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/stream.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/sound/usb/stream.c b/sound/usb/stream.c index ad6ced780634..acf3dc2d79e0 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -341,20 +341,28 @@ snd_pcm_chmap_elem *convert_chmap_v3(struct uac3_cluster_header_descriptor len = le16_to_cpu(cluster->wLength); c = 0; - p += sizeof(struct uac3_cluster_header_descriptor); + p += sizeof(*cluster); + len -= sizeof(*cluster); - while (((p - (void *)cluster) < len) && (c < channels)) { + while (len > 0 && (c < channels)) { struct uac3_cluster_segment_descriptor *cs_desc = p; u16 cs_len; u8 cs_type; + if (len < sizeof(*p)) + break; cs_len = le16_to_cpu(cs_desc->wLength); + if (len < cs_len) + break; cs_type = cs_desc->bSegmentType; if (cs_type == UAC3_CHANNEL_INFORMATION) { struct uac3_cluster_information_segment_descriptor *is = p; unsigned char map; + if (cs_len < sizeof(*is)) + break; + /* * TODO: this conversion is not complete, update it * after adding UAC3 values to asound.h @@ -456,6 +464,7 @@ snd_pcm_chmap_elem *convert_chmap_v3(struct uac3_cluster_header_descriptor chmap->map[c++] = map; } p += cs_len; + len -= cs_len; } if (channels < c) @@ -881,7 +890,7 @@ snd_usb_get_audioformat_uac3(struct snd_usb_audio *chip, u64 badd_formats = 0; unsigned int num_channels; struct audioformat *fp; - u16 cluster_id, wLength; + u16 cluster_id, wLength, cluster_wLength; int clock = 0; int err; @@ -1011,6 +1020,16 @@ snd_usb_get_audioformat_uac3(struct snd_usb_audio *chip, return ERR_PTR(-EIO); } + cluster_wLength = le16_to_cpu(cluster->wLength); + if (cluster_wLength < sizeof(*cluster) || + cluster_wLength > wLength) { + dev_err(&dev->dev, + "%u:%d : invalid Cluster Descriptor size\n", + iface_no, altno); + kfree(cluster); + return ERR_PTR(-EIO); + } + num_channels = cluster->bNrChannels; chmap = convert_chmap_v3(cluster); kfree(cluster); From c345102d1feed3de8aa9b9ec7d18b3fbba62deb7 Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Wed, 13 Aug 2025 18:08:42 +0800 Subject: [PATCH 1975/2411] ALSA: hda/tas2781: Normalize the volume kcontrol name Change the name of the kcontrol from "Gain" to "Volume". Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20250813100842.12224-1-baojun.xu@ti.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/side-codecs/tas2781_hda_i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c index 45ac5e41bd4f..06c7bc2b9e9d 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c @@ -265,7 +265,7 @@ static const struct snd_kcontrol_new tas2770_snd_controls[] = { }; static const struct snd_kcontrol_new tas2781_snd_controls[] = { - ACARD_SINGLE_RANGE_EXT_TLV("Speaker Analog Gain", TAS2781_AMP_LEVEL, + ACARD_SINGLE_RANGE_EXT_TLV("Speaker Analog Volume", TAS2781_AMP_LEVEL, 1, 0, 20, 0, tas2781_amp_getvol, tas2781_amp_putvol, amp_vol_tlv), ACARD_SINGLE_BOOL_EXT("Speaker Force Firmware Load", 0, From 35f6bedccf4c4280f02d48e4f7d194e64e9a62d8 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 13 Aug 2025 18:08:53 +0900 Subject: [PATCH 1976/2411] ata: libata-eh: Fix link state check for IDE/PATA ports Commit 4371fe1ba400 ("ata: libata-eh: Avoid unnecessary resets when revalidating devices") replaced the call to ata_phys_link_offline() in ata_eh_revalidate_and_attach() with the new function ata_eh_link_established() which relaxes the checks on a device link state to account for low power mode transitions. However, this change assumed that the device port has a valid scr_read method to obtain the SStatus register for the port. This is not always the case, especially with older IDE/PATA adapters (e.g. PATA/IDE devices emulated with QEMU). For such adapter, ata_eh_link_established() will always return false, causing ata_eh_revalidate_and_attach() to go into its error path and ultimately to the device being disabled. Avoid this by restoring the previous behavior, which is to assume that the link is online if reading the port SStatus register fails. While at it, also fix the spelling of SStatus in the comment describing the function ata_eh_link_established(). Reported-by: Shin'ichiro Kawasaki Fixes: 4371fe1ba400 ("ata: libata-eh: Avoid unnecessary resets when revalidating devices") Signed-off-by: Damien Le Moal Tested-by: Shin'ichiro Kawasaki Reviewed-by: Niklas Cassel --- drivers/ata/libata-eh.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 2946ae6d4b2c..2586e77ebf45 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2075,7 +2075,7 @@ static void ata_eh_get_success_sense(struct ata_link *link) * Check if a link is established. This is a relaxed version of * ata_phys_link_online() which accounts for the fact that this is potentially * called after changing the link power management policy, which may not be - * reflected immediately in the SSTAUS register (e.g., we may still be seeing + * reflected immediately in the SStatus register (e.g., we may still be seeing * the PHY in partial, slumber or devsleep Partial power management state. * So check that: * - A device is still present, that is, DET is 1h (Device presence detected @@ -2089,8 +2089,13 @@ static bool ata_eh_link_established(struct ata_link *link) u32 sstatus; u8 det, ipm; + /* + * For old IDE/PATA adapters that do not have a valid scr_read method, + * or if reading the SStatus register fails, assume that the device is + * present. Device probe will determine if that is really the case. + */ if (sata_scr_read(link, SCR_STATUS, &sstatus)) - return false; + return true; det = sstatus & 0x0f; ipm = (sstatus >> 8) & 0x0f; From ea177a1b1efc6e42e73ee4a17581842cd254e006 Mon Sep 17 00:00:00 2001 From: Rudi Heitbaum Date: Tue, 12 Aug 2025 06:55:15 +0000 Subject: [PATCH 1977/2411] drm/rockchip: cdn-dp: select bridge for cdp-dp Select drm bridge connector when building cdp-dp. This was missed in previous commit causing build failure. Fixes: afbbca25d06e ("drm/rockchip: cdn-dp: Convert to drm bridge") Signed-off-by: Rudi Heitbaum Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/aJrlUzV1u4i65NRe@8eba3d7b3083 --- drivers/gpu/drm/rockchip/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig index ab525668939a..faf50d872be3 100644 --- a/drivers/gpu/drm/rockchip/Kconfig +++ b/drivers/gpu/drm/rockchip/Kconfig @@ -53,6 +53,7 @@ config ROCKCHIP_CDN_DP bool "Rockchip cdn DP" depends on EXTCON=y || (EXTCON=m && DRM_ROCKCHIP=m) select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR select DRM_DISPLAY_DP_HELPER help This selects support for Rockchip SoC specific extensions From a52dffaa46c2c5ff0b311c4dc1288581f7b9109e Mon Sep 17 00:00:00 2001 From: Piotr Zalewski Date: Sun, 6 Jul 2025 08:36:58 +0000 Subject: [PATCH 1978/2411] drm/rockchip: vop2: make vp registers nonvolatile Make video port registers nonvolatile. As DSP_CTRL register is written to twice due to gamma LUT enable bit which is set outside of the main DSP_CTRL initialization within atomic_enable (for rk356x case it is also necessary to always disable gamma LUT before writing a new LUT) there is a chance that DSP_CTRL value read-out in gamma LUT init/update code is not the one which was written by the preceding DSP_CTRL initialization code within atomic_enable. This might result in misconfigured DSP_CTRL which leads to no visual output[1]. Since DSP_CTRL write takes effect after VSYNC[1] the issue is not always present. When tested on Pinetab2 with kernel 6.14 it happenes only when DRM is compiled as a module[1]. In order to confirm that it is a timing issue I inserted 18ms udelay before vop2_crtc_atomic_try_set_gamma in atomic enable and compiled DRM as module - this has also fixed the issue. [1] https://lore.kernel.org/linux-rockchip/562b38e5.a496.1975f09f983.Coremail.andyshrk@163.com/ Reported-by: Diederik de Haas Closes: https://lore.kernel.org/linux-rockchip/DAEVDSTMWI1E.J454VZN0R9MA@cknow.org/ Suggested-by: Andy Yan Signed-off-by: Piotr Zalewski Tested-by: Diederik de Haas Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20250706083629.140332-2-pZ010001011111@proton.me --- drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 186f6452a7d3..b50927a824b4 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -2579,12 +2579,13 @@ static int vop2_win_init(struct vop2 *vop2) } /* - * The window registers are only updated when config done is written. - * Until that they read back the old value. As we read-modify-write - * these registers mark them as non-volatile. This makes sure we read - * the new values from the regmap register cache. + * The window and video port registers are only updated when config + * done is written. Until that they read back the old value. As we + * read-modify-write these registers mark them as non-volatile. This + * makes sure we read the new values from the regmap register cache. */ static const struct regmap_range vop2_nonvolatile_range[] = { + regmap_reg_range(RK3568_VP0_CTRL_BASE, RK3588_VP3_CTRL_BASE + 255), regmap_reg_range(0x1000, 0x23ff), }; From 58768b0563916ddcb73d8ed26ede664915f8df31 Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Wed, 13 Aug 2025 19:22:56 -0700 Subject: [PATCH 1979/2411] ata: libata-scsi: Fix CDL control Delete extra checks for the ATA_DFLAG_CDL_ENABLED flag that prevent SET FEATURES command from being issued to a drive when NCQ commands are active. ata_mselect_control_ata_feature() sets / clears the ATA_DFLAG_CDL_ENABLED flag during the translation of MODE SELECT to SET FEATURES. If SET FEATURES gets deferred due to outstanding NCQ commands, the original MODE SELECT command will be re-queued. When the re-queued MODE SELECT goes through the ata_mselect_control_ata_feature() translation again, SET FEATURES will not be issued because ATA_DFLAG_CDL_ENABLED has been already set or cleared by the initial translation of MODE SELECT. The ATA_DFLAG_CDL_ENABLED checks in ata_mselect_control_ata_feature() are safe to remove because scsi_cdl_enable() implements a similar logic that avoids enabling CDL if it has been enabled already. Fixes: 17e897a45675 ("ata: libata-scsi: Improve CDL control") Cc: stable@vger.kernel.org Signed-off-by: Igor Pylypiv Reviewed-by: Niklas Cassel Signed-off-by: Damien Le Moal --- drivers/ata/libata-scsi.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 57f674f51b0c..2ded5e476d6e 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3904,21 +3904,16 @@ static int ata_mselect_control_ata_feature(struct ata_queued_cmd *qc, /* Check cdl_ctrl */ switch (buf[0] & 0x03) { case 0: - /* Disable CDL if it is enabled */ - if (!(dev->flags & ATA_DFLAG_CDL_ENABLED)) - return 0; + /* Disable CDL */ ata_dev_dbg(dev, "Disabling CDL\n"); cdl_action = 0; dev->flags &= ~ATA_DFLAG_CDL_ENABLED; break; case 0x02: /* - * Enable CDL if not already enabled. Since this is mutually - * exclusive with NCQ priority, allow this only if NCQ priority - * is disabled. + * Enable CDL. Since CDL is mutually exclusive with NCQ + * priority, allow this only if NCQ priority is disabled. */ - if (dev->flags & ATA_DFLAG_CDL_ENABLED) - return 0; if (dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLED) { ata_dev_err(dev, "NCQ priority must be disabled to enable CDL\n"); From f5b1819193667bf62c3c99d3921b9429997a14b2 Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Tue, 1 Jul 2025 09:30:40 +0200 Subject: [PATCH 1980/2411] drm/mediatek: dsi: Fix DSI host and panel bridge pre-enable order Since commit c9b1150a68d9 ("drm/atomic-helper: Re-order bridge chain pre-enable and post-disable"), the bridge pre_enable callbacks are now called before crtc enable, and the bridge post_disable callbacks after the crtc disable. In the mediatek-drm driver, this change leads to transfer errors on mtk_dsi_host_transfer callback processing during the panel bridge pre-enable sequence because the DSI host bridge pre_enable and CRTC enable sequences, that are enabling the required clocks and PHY using mtk_dsi_poweron function, are called after. So, in order to fix this call order issue, request the DSI host bridge be pre-enabled before panel bridge by setting pre_enable_prev_first flag on DSI device bridge in the mtk_dsi_host_attach function. Fixes: c9b1150a68d9 ("drm/atomic-helper: Re-order bridge chain pre-enable and post-disable") Signed-off-by: Louis-Alexis Eyraud Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20250701-mediatek-drm-fix-dsi-panel-init-v1-1-7af4adb9fdeb@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_dsi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index d7726091819c..0e2bcd5f67b7 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -1002,6 +1002,12 @@ static int mtk_dsi_host_attach(struct mipi_dsi_host *host, return PTR_ERR(dsi->next_bridge); } + /* + * set flag to request the DSI host bridge be pre-enabled before device bridge + * in the chain, so the DSI host is ready when the device bridge is pre-enabled + */ + dsi->next_bridge->pre_enable_prev_first = true; + drm_bridge_add(&dsi->bridge); ret = component_add(host->dev, &mtk_dsi_component_ops); From 94eae6ee4c2df2031bca586405e9ec36e0b9ccf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Tue, 27 May 2025 14:06:37 +0200 Subject: [PATCH 1981/2411] drm/xe/pf: Set VF LMEM BAR size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LMEM is partitioned between multiple VFs and we expect that the more VFs we have, the less LMEM is assigned to each VF. This means that we can achieve full LMEM BAR access without the need to attempt full VF LMEM BAR resize via pci_resize_resource(). Always try to set the largest possible BAR size that allows to fit the number of enabled VFs and inform the user in case the resize attempt is not successful. Signed-off-by: Michał Winiarski Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20250527120637.665506-7-michal.winiarski@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 32a4d1b98e6663101fd0abfaf151c48feea7abb1) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_bars.h | 1 + drivers/gpu/drm/xe/xe_pci_sriov.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_bars.h b/drivers/gpu/drm/xe/regs/xe_bars.h index ce05b6ae832f..880140d6ccdc 100644 --- a/drivers/gpu/drm/xe/regs/xe_bars.h +++ b/drivers/gpu/drm/xe/regs/xe_bars.h @@ -7,5 +7,6 @@ #define GTTMMADR_BAR 0 /* MMIO + GTT */ #define LMEM_BAR 2 /* VRAM */ +#define VF_LMEM_BAR 9 /* VF VRAM */ #endif diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index 447a7867eecb..af05db07162e 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -3,6 +3,10 @@ * Copyright © 2023-2024 Intel Corporation */ +#include +#include + +#include "regs/xe_bars.h" #include "xe_assert.h" #include "xe_device.h" #include "xe_gt_sriov_pf_config.h" @@ -128,6 +132,18 @@ static void pf_engine_activity_stats(struct xe_device *xe, unsigned int num_vfs, } } +static int resize_vf_vram_bar(struct xe_device *xe, int num_vfs) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + u32 sizes; + + sizes = pci_iov_vf_bar_get_sizes(pdev, VF_LMEM_BAR, num_vfs); + if (!sizes) + return 0; + + return pci_iov_vf_bar_set_size(pdev, VF_LMEM_BAR, __fls(sizes)); +} + static int pf_enable_vfs(struct xe_device *xe, int num_vfs) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -158,6 +174,12 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) if (err < 0) goto failed; + if (IS_DGFX(xe)) { + err = resize_vf_vram_bar(xe, num_vfs); + if (err) + xe_sriov_info(xe, "Failed to set VF LMEM BAR size: %d\n", err); + } + err = pci_enable_sriov(pdev, num_vfs); if (err < 0) goto failed; From 1548549e17e374a126e9a4e9edab8bb041fbd67e Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Wed, 13 Aug 2025 13:16:33 +0200 Subject: [PATCH 1982/2411] MAINTAINERS: update s390/net Remove Thorsten Winkler as maintainer and add Aswin Karuvally as reviewer. Thank you Thorsten for your support, welcome Aswin! Signed-off-by: Alexandra Winter Acked-by: Thorsten Winkler Acked-by: Aswin Karuvally Link: https://patch.msgid.link/20250813111633.241111-1-wintera@linux.ibm.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index daf520a13bdf..2720544cd91f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22174,7 +22174,7 @@ F: arch/s390/mm S390 NETWORK DRIVERS M: Alexandra Winter -M: Thorsten Winkler +R: Aswin Karuvally L: linux-s390@vger.kernel.org L: netdev@vger.kernel.org S: Supported From fd980bf6e9cdae885105685259421164f843ca55 Mon Sep 17 00:00:00 2001 From: Suraj Gupta Date: Wed, 13 Aug 2025 19:25:59 +0530 Subject: [PATCH 1983/2411] net: xilinx: axienet: Fix RX skb ring management in DMAengine mode Submit multiple descriptors in axienet_rx_cb() to fill Rx skb ring. This ensures the ring "catches up" on previously missed allocations. Increment Rx skb ring head pointer after BD is successfully allocated. Previously, head pointer was incremented before verifying if descriptor is successfully allocated and has valid entries, which could lead to ring state inconsistency if descriptor setup failed. These changes improve reliability by maintaining adequate descriptor availability and ensuring proper ring buffer state management. Fixes: 6a91b846af85 ("net: axienet: Introduce dmaengine support") Signed-off-by: Suraj Gupta Link: https://patch.msgid.link/20250813135559.1555652-1-suraj.gupta2@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 6011d7eae0c7..0d8a05fe541a 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1160,6 +1160,7 @@ static void axienet_dma_rx_cb(void *data, const struct dmaengine_result *result) struct axienet_local *lp = data; struct sk_buff *skb; u32 *app_metadata; + int i; skbuf_dma = axienet_get_rx_desc(lp, lp->rx_ring_tail++); skb = skbuf_dma->skb; @@ -1178,7 +1179,10 @@ static void axienet_dma_rx_cb(void *data, const struct dmaengine_result *result) u64_stats_add(&lp->rx_packets, 1); u64_stats_add(&lp->rx_bytes, rx_len); u64_stats_update_end(&lp->rx_stat_sync); - axienet_rx_submit_desc(lp->ndev); + + for (i = 0; i < CIRC_SPACE(lp->rx_ring_head, lp->rx_ring_tail, + RX_BUF_NUM_DEFAULT); i++) + axienet_rx_submit_desc(lp->ndev); dma_async_issue_pending(lp->rx_chan); } @@ -1457,7 +1461,6 @@ static void axienet_rx_submit_desc(struct net_device *ndev) if (!skbuf_dma) return; - lp->rx_ring_head++; skb = netdev_alloc_skb(ndev, lp->max_frm_size); if (!skb) return; @@ -1482,6 +1485,7 @@ static void axienet_rx_submit_desc(struct net_device *ndev) skbuf_dma->desc = dma_rx_desc; dma_rx_desc->callback_param = lp; dma_rx_desc->callback_result = axienet_dma_rx_cb; + lp->rx_ring_head++; dmaengine_submit(dma_rx_desc); return; From d1547bf460baec718b3398365f8de33d25c5f36f Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Wed, 13 Aug 2025 10:10:54 +0800 Subject: [PATCH 1984/2411] net: bridge: fix soft lockup in br_multicast_query_expired() When set multicast_query_interval to a large value, the local variable 'time' in br_multicast_send_query() may overflow. If the time is smaller than jiffies, the timer will expire immediately, and then call mod_timer() again, which creates a loop and may trigger the following soft lockup issue. watchdog: BUG: soft lockup - CPU#1 stuck for 221s! [rb_consumer:66] CPU: 1 UID: 0 PID: 66 Comm: rb_consumer Not tainted 6.16.0+ #259 PREEMPT(none) Call Trace: __netdev_alloc_skb+0x2e/0x3a0 br_ip6_multicast_alloc_query+0x212/0x1b70 __br_multicast_send_query+0x376/0xac0 br_multicast_send_query+0x299/0x510 br_multicast_query_expired.constprop.0+0x16d/0x1b0 call_timer_fn+0x3b/0x2a0 __run_timers+0x619/0x950 run_timer_softirq+0x11c/0x220 handle_softirqs+0x18e/0x560 __irq_exit_rcu+0x158/0x1a0 sysvec_apic_timer_interrupt+0x76/0x90 This issue can be reproduced with: ip link add br0 type bridge echo 1 > /sys/class/net/br0/bridge/multicast_querier echo 0xffffffffffffffff > /sys/class/net/br0/bridge/multicast_query_interval ip link set dev br0 up The multicast_startup_query_interval can also cause this issue. Similar to the commit 99b40610956a ("net: bridge: mcast: add and enforce query interval minimum"), add check for the query interval maximum to fix this issue. Link: https://lore.kernel.org/netdev/20250806094941.1285944-1-wangliang74@huawei.com/ Link: https://lore.kernel.org/netdev/20250812091818.542238-1-wangliang74@huawei.com/ Fixes: d902eee43f19 ("bridge: Add multicast count/interval sysfs entries") Suggested-by: Nikolay Aleksandrov Signed-off-by: Wang Liang Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250813021054.1643649-1-wangliang74@huawei.com Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 16 ++++++++++++++++ net/bridge/br_private.h | 2 ++ 2 files changed, 18 insertions(+) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 1377f31b719c..8ce145938b02 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -4818,6 +4818,14 @@ void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, intvl_jiffies = BR_MULTICAST_QUERY_INTVL_MIN; } + if (intvl_jiffies > BR_MULTICAST_QUERY_INTVL_MAX) { + br_info(brmctx->br, + "trying to set multicast query interval above maximum, setting to %lu (%ums)\n", + jiffies_to_clock_t(BR_MULTICAST_QUERY_INTVL_MAX), + jiffies_to_msecs(BR_MULTICAST_QUERY_INTVL_MAX)); + intvl_jiffies = BR_MULTICAST_QUERY_INTVL_MAX; + } + brmctx->multicast_query_interval = intvl_jiffies; } @@ -4834,6 +4842,14 @@ void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx, intvl_jiffies = BR_MULTICAST_STARTUP_QUERY_INTVL_MIN; } + if (intvl_jiffies > BR_MULTICAST_STARTUP_QUERY_INTVL_MAX) { + br_info(brmctx->br, + "trying to set multicast startup query interval above maximum, setting to %lu (%ums)\n", + jiffies_to_clock_t(BR_MULTICAST_STARTUP_QUERY_INTVL_MAX), + jiffies_to_msecs(BR_MULTICAST_STARTUP_QUERY_INTVL_MAX)); + intvl_jiffies = BR_MULTICAST_STARTUP_QUERY_INTVL_MAX; + } + brmctx->multicast_startup_query_interval = intvl_jiffies; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index b159aae594c0..8de0904b9627 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -31,6 +31,8 @@ #define BR_MULTICAST_DEFAULT_HASH_MAX 4096 #define BR_MULTICAST_QUERY_INTVL_MIN msecs_to_jiffies(1000) #define BR_MULTICAST_STARTUP_QUERY_INTVL_MIN BR_MULTICAST_QUERY_INTVL_MIN +#define BR_MULTICAST_QUERY_INTVL_MAX msecs_to_jiffies(86400000) /* 24 hours */ +#define BR_MULTICAST_STARTUP_QUERY_INTVL_MAX BR_MULTICAST_QUERY_INTVL_MAX #define BR_HWDOM_MAX BITS_PER_LONG From 52bf272636bda69587952b35ae97690b8dc89941 Mon Sep 17 00:00:00 2001 From: William Liu Date: Tue, 12 Aug 2025 23:57:57 +0000 Subject: [PATCH 1985/2411] net/sched: Fix backlog accounting in qdisc_dequeue_internal This issue applies for the following qdiscs: hhf, fq, fq_codel, and fq_pie, and occurs in their change handlers when adjusting to the new limit. The problem is the following in the values passed to the subsequent qdisc_tree_reduce_backlog call given a tbf parent: When the tbf parent runs out of tokens, skbs of these qdiscs will be placed in gso_skb. Their peek handlers are qdisc_peek_dequeued, which accounts for both qlen and backlog. However, in the case of qdisc_dequeue_internal, ONLY qlen is accounted for when pulling from gso_skb. This means that these qdiscs are missing a qdisc_qstats_backlog_dec when dropping packets to satisfy the new limit in their change handlers. One can observe this issue with the following (with tc patched to support a limit of 0): export TARGET=fq tc qdisc del dev lo root tc qdisc add dev lo root handle 1: tbf rate 8bit burst 100b latency 1ms tc qdisc replace dev lo handle 3: parent 1:1 $TARGET limit 1000 echo ''; echo 'add child'; tc -s -d qdisc show dev lo ping -I lo -f -c2 -s32 -W0.001 127.0.0.1 2>&1 >/dev/null echo ''; echo 'after ping'; tc -s -d qdisc show dev lo tc qdisc change dev lo handle 3: parent 1:1 $TARGET limit 0 echo ''; echo 'after limit drop'; tc -s -d qdisc show dev lo tc qdisc replace dev lo handle 2: parent 1:1 sfq echo ''; echo 'post graft'; tc -s -d qdisc show dev lo The second to last show command shows 0 packets but a positive number (74) of backlog bytes. The problem becomes clearer in the last show command, where qdisc_purge_queue triggers qdisc_tree_reduce_backlog with the positive backlog and causes an underflow in the tbf parent's backlog (4096 Mb instead of 0). To fix this issue, the codepath for all clients of qdisc_dequeue_internal has been simplified: codel, pie, hhf, fq, fq_pie, and fq_codel. qdisc_dequeue_internal handles the backlog adjustments for all cases that do not directly use the dequeue handler. The old fq_codel_change limit adjustment loop accumulated the arguments to the subsequent qdisc_tree_reduce_backlog call through the cstats field. However, this is confusing and error prone as fq_codel_dequeue could also potentially mutate this field (which qdisc_dequeue_internal calls in the non gso_skb case), so we have unified the code here with other qdiscs. Fixes: 2d3cbfd6d54a ("net_sched: Flush gso_skb list too during ->change()") Fixes: 4b549a2ef4be ("fq_codel: Fair Queue Codel AQM") Fixes: 10239edf86f1 ("net-qdisc-hhf: Heavy-Hitter Filter (HHF) qdisc") Signed-off-by: William Liu Reviewed-by: Savino Dicanosa Link: https://patch.msgid.link/20250812235725.45243-1-will@willsroot.io Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 11 ++++++++--- net/sched/sch_codel.c | 12 +++++++----- net/sched/sch_fq.c | 12 +++++++----- net/sched/sch_fq_codel.c | 12 +++++++----- net/sched/sch_fq_pie.c | 12 +++++++----- net/sched/sch_hhf.c | 12 +++++++----- net/sched/sch_pie.c | 12 +++++++----- 7 files changed, 50 insertions(+), 33 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 638948be4c50..738cd5b13c62 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1038,12 +1038,17 @@ static inline struct sk_buff *qdisc_dequeue_internal(struct Qdisc *sch, bool dir skb = __skb_dequeue(&sch->gso_skb); if (skb) { sch->q.qlen--; + qdisc_qstats_backlog_dec(sch, skb); return skb; } - if (direct) - return __qdisc_dequeue_head(&sch->q); - else + if (direct) { + skb = __qdisc_dequeue_head(&sch->q); + if (skb) + qdisc_qstats_backlog_dec(sch, skb); + return skb; + } else { return sch->dequeue(sch); + } } static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index c93761040c6e..fa0314679e43 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -101,9 +101,9 @@ static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = { static int codel_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + unsigned int dropped_pkts = 0, dropped_bytes = 0; struct codel_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_CODEL_MAX + 1]; - unsigned int qlen, dropped = 0; int err; err = nla_parse_nested_deprecated(tb, TCA_CODEL_MAX, opt, @@ -142,15 +142,17 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt, WRITE_ONCE(q->params.ecn, !!nla_get_u32(tb[TCA_CODEL_ECN])); - qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { struct sk_buff *skb = qdisc_dequeue_internal(sch, true); - dropped += qdisc_pkt_len(skb); - qdisc_qstats_backlog_dec(sch, skb); + if (!skb) + break; + + dropped_pkts++; + dropped_bytes += qdisc_pkt_len(skb); rtnl_qdisc_drop(skb, sch); } - qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); + qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 902ff5470607..fee922da2f99 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -1013,11 +1013,11 @@ static int fq_load_priomap(struct fq_sched_data *q, static int fq_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + unsigned int dropped_pkts = 0, dropped_bytes = 0; struct fq_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_FQ_MAX + 1]; - int err, drop_count = 0; - unsigned drop_len = 0; u32 fq_log; + int err; err = nla_parse_nested_deprecated(tb, TCA_FQ_MAX, opt, fq_policy, NULL); @@ -1135,16 +1135,18 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, err = fq_resize(sch, fq_log); sch_tree_lock(sch); } + while (sch->q.qlen > sch->limit) { struct sk_buff *skb = qdisc_dequeue_internal(sch, false); if (!skb) break; - drop_len += qdisc_pkt_len(skb); + + dropped_pkts++; + dropped_bytes += qdisc_pkt_len(skb); rtnl_kfree_skbs(skb, skb); - drop_count++; } - qdisc_tree_reduce_backlog(sch, drop_count, drop_len); + qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); sch_tree_unlock(sch); return err; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 2a0f3a513bfa..a14142392939 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -366,6 +366,7 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = { static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + unsigned int dropped_pkts = 0, dropped_bytes = 0; struct fq_codel_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_FQ_CODEL_MAX + 1]; u32 quantum = 0; @@ -443,13 +444,14 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, q->memory_usage > q->memory_limit) { struct sk_buff *skb = qdisc_dequeue_internal(sch, false); - q->cstats.drop_len += qdisc_pkt_len(skb); + if (!skb) + break; + + dropped_pkts++; + dropped_bytes += qdisc_pkt_len(skb); rtnl_kfree_skbs(skb, skb); - q->cstats.drop_count++; } - qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len); - q->cstats.drop_count = 0; - q->cstats.drop_len = 0; + qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index b0e34daf1f75..7b96bc3ff891 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -287,10 +287,9 @@ static struct sk_buff *fq_pie_qdisc_dequeue(struct Qdisc *sch) static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + unsigned int dropped_pkts = 0, dropped_bytes = 0; struct fq_pie_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_FQ_PIE_MAX + 1]; - unsigned int len_dropped = 0; - unsigned int num_dropped = 0; int err; err = nla_parse_nested(tb, TCA_FQ_PIE_MAX, opt, fq_pie_policy, extack); @@ -368,11 +367,14 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, while (sch->q.qlen > sch->limit) { struct sk_buff *skb = qdisc_dequeue_internal(sch, false); - len_dropped += qdisc_pkt_len(skb); - num_dropped += 1; + if (!skb) + break; + + dropped_pkts++; + dropped_bytes += qdisc_pkt_len(skb); rtnl_kfree_skbs(skb, skb); } - qdisc_tree_reduce_backlog(sch, num_dropped, len_dropped); + qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 5aa434b46707..2d4855e28a28 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -508,9 +508,9 @@ static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = { static int hhf_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + unsigned int dropped_pkts = 0, dropped_bytes = 0; struct hhf_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_HHF_MAX + 1]; - unsigned int qlen, prev_backlog; int err; u64 non_hh_quantum; u32 new_quantum = q->quantum; @@ -561,15 +561,17 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt, usecs_to_jiffies(us)); } - qlen = sch->q.qlen; - prev_backlog = sch->qstats.backlog; while (sch->q.qlen > sch->limit) { struct sk_buff *skb = qdisc_dequeue_internal(sch, false); + if (!skb) + break; + + dropped_pkts++; + dropped_bytes += qdisc_pkt_len(skb); rtnl_kfree_skbs(skb, skb); } - qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, - prev_backlog - sch->qstats.backlog); + qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index ad46ee3ed5a9..0a377313b6a9 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -141,9 +141,9 @@ static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = { static int pie_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + unsigned int dropped_pkts = 0, dropped_bytes = 0; struct pie_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_PIE_MAX + 1]; - unsigned int qlen, dropped = 0; int err; err = nla_parse_nested_deprecated(tb, TCA_PIE_MAX, opt, pie_policy, @@ -193,15 +193,17 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt, nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR])); /* Drop excess packets if new limit is lower */ - qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { struct sk_buff *skb = qdisc_dequeue_internal(sch, true); - dropped += qdisc_pkt_len(skb); - qdisc_qstats_backlog_dec(sch, skb); + if (!skb) + break; + + dropped_pkts++; + dropped_bytes += qdisc_pkt_len(skb); rtnl_qdisc_drop(skb, sch); } - qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); + qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); sch_tree_unlock(sch); return 0; From 8c06cbdcbaea34d7b96d76df4d6669275c1d291a Mon Sep 17 00:00:00 2001 From: William Liu Date: Tue, 12 Aug 2025 23:58:26 +0000 Subject: [PATCH 1986/2411] selftests/tc-testing: Check backlog stats in gso_skb case Add tests to ensure proper backlog accounting in hhf, codel, pie, fq, fq_pie, and fq_codel qdiscs. We check for the bug pattern originally found in fq, fq_pie, and fq_codel, which was an underflow in the tbf parent backlog stats upon child qdisc removal. Signed-off-by: William Liu Reviewed-by: Savino Dicanosa Link: https://patch.msgid.link/20250812235808.45281-1-will@willsroot.io Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 198 ++++++++++++++++++ 1 file changed, 198 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 23a61e5b99d0..998e5a2f4579 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -185,6 +185,204 @@ "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] }, + { + "id": "34c0", + "name": "Test TBF with HHF Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "hhf" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 hhf limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 hhf limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "fd68", + "name": "Test TBF with CODEL Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "codel" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 codel limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 codel limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "514e", + "name": "Test TBF with PIE Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "pie" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 pie limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 pie limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "6c97", + "name": "Test TBF with FQ Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "fq" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 fq limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 fq limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "5d0b", + "name": "Test TBF with FQ_CODEL Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "fq_codel" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 fq_codel limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 fq_codel limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "21c3", + "name": "Test TBF with FQ_PIE Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "fq_pie" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 fq_pie limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 fq_pie limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, { "id": "a4bb", "name": "Test FQ_CODEL with HTB parent - force packet drop with empty queue", From 065c31f2c6915b38f45b1c817b31f41f62eaa774 Mon Sep 17 00:00:00 2001 From: Justin Lai Date: Wed, 13 Aug 2025 15:16:31 +0800 Subject: [PATCH 1987/2411] rtase: Fix Rx descriptor CRC error bit definition The CRC error bit is located at bit 17 in the Rx descriptor, but the driver was incorrectly using bit 16. Fix it. Fixes: a36e9f5cfe9e ("rtase: Add support for a pci table in this module") Signed-off-by: Justin Lai Link: https://patch.msgid.link/20250813071631.7566-1-justinlai0215@realtek.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/rtase/rtase.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/rtase/rtase.h b/drivers/net/ethernet/realtek/rtase/rtase.h index 20decdeb9fdb..b9209eb6ea73 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase.h +++ b/drivers/net/ethernet/realtek/rtase/rtase.h @@ -241,7 +241,7 @@ union rtase_rx_desc { #define RTASE_RX_RES BIT(20) #define RTASE_RX_RUNT BIT(19) #define RTASE_RX_RWT BIT(18) -#define RTASE_RX_CRC BIT(16) +#define RTASE_RX_CRC BIT(17) #define RTASE_RX_V6F BIT(31) #define RTASE_RX_V4F BIT(30) #define RTASE_RX_UDPT BIT(29) From d73915fdc0011d536c03856be7ec451f6a5fb4ff Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 31 Jul 2025 15:42:18 -0700 Subject: [PATCH 1988/2411] lib/crypto: sha: Update Kconfig help for SHA1 and SHA256 Update the help text for CRYPTO_LIB_SHA1 and CRYPTO_LIB_SHA256 to reflect the addition of HMAC support, and to be consistent with CRYPTO_LIB_SHA512. Link: https://lore.kernel.org/r/20250731224218.137947-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index c2b65b6a9bb6..1e6b008f8fca 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -140,8 +140,8 @@ config CRYPTO_LIB_CHACHA20POLY1305 config CRYPTO_LIB_SHA1 tristate help - The SHA-1 library functions. Select this if your module uses any of - the functions from . + The SHA-1 and HMAC-SHA1 library functions. Select this if your module + uses any of the functions from . config CRYPTO_LIB_SHA1_ARCH bool @@ -157,9 +157,9 @@ config CRYPTO_LIB_SHA1_ARCH config CRYPTO_LIB_SHA256 tristate help - Enable the SHA-256 library interface. This interface may be fulfilled - by either the generic implementation or an arch-specific one, if one - is available and enabled. + The SHA-224, SHA-256, HMAC-SHA224, and HMAC-SHA256 library functions. + Select this if your module uses any of these functions from + . config CRYPTO_LIB_SHA256_ARCH bool From fd7e5de4b2eddd34e3567cd419812d8869ef4f13 Mon Sep 17 00:00:00 2001 From: Tal Zussman Date: Thu, 14 Aug 2025 02:51:57 -0400 Subject: [PATCH 1989/2411] lib/crypto: ensure generated *.S files are removed on make clean make clean does not check the kernel config when removing files. As such, additions to clean-files under CONFIG_ARM or CONFIG_ARM64 are not evaluated. For example, when building on arm64, this means that lib/crypto/arm64/sha{256,512}-core.S are left over after make clean. Set clean-files unconditionally to ensure that make clean removes these files. Fixes: e96cb9507f2d ("lib/crypto: sha256: Consolidate into single module") Fixes: 24c91b62ac50 ("lib/crypto: arm/sha512: Migrate optimized SHA-512 code to library") Fixes: 60e3f1e9b7a5 ("lib/crypto: arm64/sha512: Migrate optimized SHA-512 code to library") Signed-off-by: Tal Zussman Link: https://lore.kernel.org/r/20250814-crypto_clean-v2-1-659a2dc86302@columbia.edu Signed-off-by: Eric Biggers --- lib/crypto/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index e4151be2ebd4..539d5d59a50e 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -100,7 +100,6 @@ ifeq ($(CONFIG_ARM),y) libsha256-y += arm/sha256-ce.o arm/sha256-core.o $(obj)/arm/sha256-core.S: $(src)/arm/sha256-armv4.pl $(call cmd,perlasm) -clean-files += arm/sha256-core.S AFLAGS_arm/sha256-core.o += $(aflags-thumb2-y) endif @@ -108,7 +107,6 @@ ifeq ($(CONFIG_ARM64),y) libsha256-y += arm64/sha256-core.o $(obj)/arm64/sha256-core.S: $(src)/arm64/sha2-armv8.pl $(call cmd,perlasm_with_args) -clean-files += arm64/sha256-core.S libsha256-$(CONFIG_KERNEL_MODE_NEON) += arm64/sha256-ce.o endif @@ -132,7 +130,6 @@ ifeq ($(CONFIG_ARM),y) libsha512-y += arm/sha512-core.o $(obj)/arm/sha512-core.S: $(src)/arm/sha512-armv4.pl $(call cmd,perlasm) -clean-files += arm/sha512-core.S AFLAGS_arm/sha512-core.o += $(aflags-thumb2-y) endif @@ -140,7 +137,6 @@ ifeq ($(CONFIG_ARM64),y) libsha512-y += arm64/sha512-core.o $(obj)/arm64/sha512-core.S: $(src)/arm64/sha2-armv8.pl $(call cmd,perlasm_with_args) -clean-files += arm64/sha512-core.S libsha512-$(CONFIG_KERNEL_MODE_NEON) += arm64/sha512-ce-core.o endif @@ -167,3 +163,7 @@ obj-$(CONFIG_PPC) += powerpc/ obj-$(CONFIG_RISCV) += riscv/ obj-$(CONFIG_S390) += s390/ obj-$(CONFIG_X86) += x86/ + +# clean-files must be defined unconditionally +clean-files += arm/sha256-core.S arm/sha512-core.S +clean-files += arm64/sha256-core.S arm64/sha512-core.S From fad2cf04e91fd3c4310731537bf98d1f8a0b4137 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Aug 2025 10:28:05 +0200 Subject: [PATCH 1990/2411] scsi: fnic: Remove a useless struct mempool forward declaration struct mempool doesn't currently exist, and thus also isn't used in fnic.h, remove it. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250812082808.371119-1-hch@lst.de Reviewed-by: Karan Tilak Kumar Signed-off-by: Martin K. Petersen --- drivers/scsi/fnic/fnic.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h index c2fdc6553e62..1199d701c3f5 100644 --- a/drivers/scsi/fnic/fnic.h +++ b/drivers/scsi/fnic/fnic.h @@ -323,8 +323,6 @@ enum fnic_state { FNIC_IN_ETH_TRANS_FC_MODE, }; -struct mempool; - enum fnic_role_e { FNIC_ROLE_FCP_INITIATOR = 0, }; From 50a8c08b8b69399a09c2dbcad8ef3fef9d9349d2 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Wed, 6 Aug 2025 17:52:24 +0800 Subject: [PATCH 1991/2411] drm/bridge: analogix_dp: Fix bailout for devm_drm_bridge_alloc() devm_drm_bridge_alloc() returns ERR_PTR on failure instead of a NULL pointer, so use IS_ERR() to check the returned pointer. While at it, on failure, return ERR_CAST(dp) instead of ERR_PTR(-ENOMEM) in order not to depend on devm_drm_bridge_alloc() error code implementation. Fixes: 48f05c3b4b70 ("drm/bridge: analogix_dp: Use devm_drm_bridge_alloc() API") Signed-off-by: Liu Ying Reviewed-by: Luca Ceresoli Link: https://lore.kernel.org/r/20250806095224.527938-1-victor.liu@nxp.com --- drivers/gpu/drm/bridge/analogix/analogix_dp_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index ed35e567d117..efe534977d12 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1474,8 +1474,8 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) dp = devm_drm_bridge_alloc(dev, struct analogix_dp_device, bridge, &analogix_dp_bridge_funcs); - if (!dp) - return ERR_PTR(-ENOMEM); + if (IS_ERR(dp)) + return ERR_CAST(dp); dp->dev = &pdev->dev; dp->dpms_mode = DRM_MODE_DPMS_OFF; From 823f95575d85454ccad7d5b684aec42e57b962f6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 12 Aug 2025 16:02:59 +0300 Subject: [PATCH 1992/2411] scsi: ufs: ufs-pci: Add support for Intel Wildcat Lake Add PCI ID to support Intel Wildcat Lake, same as MTL. Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20250812130259.109645-1-adrian.hunter@intel.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufshcd-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c index b39239f641f2..b87e03777395 100644 --- a/drivers/ufs/host/ufshcd-pci.c +++ b/drivers/ufs/host/ufshcd-pci.c @@ -630,6 +630,7 @@ static const struct pci_device_id ufshcd_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0xA847), (kernel_ulong_t)&ufs_intel_mtl_hba_vops }, { PCI_VDEVICE(INTEL, 0x7747), (kernel_ulong_t)&ufs_intel_mtl_hba_vops }, { PCI_VDEVICE(INTEL, 0xE447), (kernel_ulong_t)&ufs_intel_mtl_hba_vops }, + { PCI_VDEVICE(INTEL, 0x4D47), (kernel_ulong_t)&ufs_intel_mtl_hba_vops }, { } /* terminate list */ }; From 9dcf111dd3e7ed5fce82bb108e3a3fc001c07225 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Aug 2025 08:49:08 +0300 Subject: [PATCH 1993/2411] scsi: qla4xxx: Prevent a potential error pointer dereference The qla4xxx_get_ep_fwdb() function is supposed to return NULL on error, but qla4xxx_ep_connect() returns error pointers. Propagating the error pointers will lead to an Oops in the caller, so change the error pointers to NULL. Fixes: 13483730a13b ("[SCSI] qla4xxx: fix flash/ddb support") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/aJwnVKS9tHsw1tEu@stanley.mountain Reviewed-by: Chris Leech Signed-off-by: Martin K. Petersen --- drivers/scsi/qla4xxx/ql4_os.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index a39f1da4ce47..a761c0aa5127 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -6606,6 +6606,8 @@ static struct iscsi_endpoint *qla4xxx_get_ep_fwdb(struct scsi_qla_host *ha, ep = qla4xxx_ep_connect(ha->host, (struct sockaddr *)dst_addr, 0); vfree(dst_addr); + if (IS_ERR(ep)) + return NULL; return ep; } From 8503d0fcb1086a7cfe26df67ca4bd9bd9e99bdec Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 4 Aug 2025 08:40:27 -0700 Subject: [PATCH 1994/2411] iommu/amd: Avoid stack buffer overflow from kernel cmdline While the kernel command line is considered trusted in most environments, avoid writing 1 byte past the end of "acpiid" if the "str" argument is maximum length. Reported-by: Simcha Kosman Closes: https://lore.kernel.org/all/AS8P193MB2271C4B24BCEDA31830F37AE84A52@AS8P193MB2271.EURP193.PROD.OUTLOOK.COM Fixes: b6b26d86c61c ("iommu/amd: Add a length limitation for the ivrs_acpihid command-line parameter") Signed-off-by: Kees Cook Reviewed-by: Ankit Soni Link: https://lore.kernel.org/r/20250804154023.work.970-kees@kernel.org Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 7b5af6176de9..8de689b2c5ed 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3638,7 +3638,7 @@ static int __init parse_ivrs_acpihid(char *str) { u32 seg = 0, bus, dev, fn; char *hid, *uid, *p, *addr; - char acpiid[ACPIID_LEN] = {0}; + char acpiid[ACPIID_LEN + 1] = { }; /* size with NULL terminator */ int i; addr = strchr(str, '@'); @@ -3664,7 +3664,7 @@ static int __init parse_ivrs_acpihid(char *str) /* We have the '@', make it the terminator to get just the acpiid */ *addr++ = 0; - if (strlen(str) > ACPIID_LEN + 1) + if (strlen(str) > ACPIID_LEN) goto not_found; if (sscanf(str, "=%s", acpiid) != 1) From 41f0200c718cf1826959a082a5374838c15bd242 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 14 Aug 2025 12:30:38 -0700 Subject: [PATCH 1995/2411] iommu/tegra241-cmdqv: Fix missing cpu_to_le64 at lvcmdq_err_map Sparse reported a warning: drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c:305:47: sparse: expected restricted __le64 sparse: got unsigned long long Add cpu_to_le64() to fix that. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202508142105.Jb5Smjsg-lkp@intel.com/ Suggested-by: Pranjal Shrivastava Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20250814193039.2265813-1-nicolinc@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index be1aaaf8cd17..378104cd395e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -301,9 +301,11 @@ static void tegra241_vintf_user_handle_error(struct tegra241_vintf *vintf) struct iommu_vevent_tegra241_cmdqv vevent_data; int i; - for (i = 0; i < LVCMDQ_ERR_MAP_NUM_64; i++) - vevent_data.lvcmdq_err_map[i] = - readq_relaxed(REG_VINTF(vintf, LVCMDQ_ERR_MAP_64(i))); + for (i = 0; i < LVCMDQ_ERR_MAP_NUM_64; i++) { + u64 err = readq_relaxed(REG_VINTF(vintf, LVCMDQ_ERR_MAP_64(i))); + + vevent_data.lvcmdq_err_map[i] = cpu_to_le64(err); + } iommufd_viommu_report_event(viommu, IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV, &vevent_data, sizeof(vevent_data)); From b64fdd422a85025b5e91ead794db9d3ef970e369 Mon Sep 17 00:00:00 2001 From: Yunseong Kim Date: Tue, 12 Aug 2025 18:10:47 +0000 Subject: [PATCH 1996/2411] perf: Avoid undefined behavior from stopping/starting inactive events Calling pmu->start()/stop() on perf events in PERF_EVENT_STATE_OFF can leave event->hw.idx at -1. When PMU drivers later attempt to use this negative index as a shift exponent in bitwise operations, it leads to UBSAN shift-out-of-bounds reports. The issue is a logical flaw in how event groups handle throttling when some members are intentionally disabled. Based on the analysis and the reproducer provided by Mark Rutland (this issue on both arm64 and x86-64). The scenario unfolds as follows: 1. A group leader event is configured with a very aggressive sampling period (e.g., sample_period = 1). This causes frequent interrupts and triggers the throttling mechanism. 2. A child event in the same group is created in a disabled state (.disabled = 1). This event remains in PERF_EVENT_STATE_OFF. Since it hasn't been scheduled onto the PMU, its event->hw.idx remains initialized at -1. 3. When throttling occurs, perf_event_throttle_group() and later perf_event_unthrottle_group() iterate through all siblings, including the disabled child event. 4. perf_event_throttle()/unthrottle() are called on this inactive child event, which then call event->pmu->start()/stop(). 5. The PMU driver receives the event with hw.idx == -1 and attempts to use it as a shift exponent. e.g., in macros like PMCNTENSET(idx), leading to the UBSAN report. The throttling mechanism attempts to start/stop events that are not actively scheduled on the hardware. Move the state check into perf_event_throttle()/perf_event_unthrottle() so that inactive events are skipped entirely. This ensures only active events with a valid hw.idx are processed, preventing undefined behavior and silencing UBSAN warnings. The corrected check ensures true before proceeding with PMU operations. The problem can be reproduced with the syzkaller reproducer: Fixes: 9734e25fbf5a ("perf: Fix the throttle logic for a group") Signed-off-by: Yunseong Kim Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20250812181046.292382-2-ysk@kzalloc.com --- kernel/events/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 8060c2857bb2..872122e074e5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2665,6 +2665,9 @@ static void perf_log_itrace_start(struct perf_event *event); static void perf_event_unthrottle(struct perf_event *event, bool start) { + if (event->state != PERF_EVENT_STATE_ACTIVE) + return; + event->hw.interrupts = 0; if (start) event->pmu->start(event, 0); @@ -2674,6 +2677,9 @@ static void perf_event_unthrottle(struct perf_event *event, bool start) static void perf_event_throttle(struct perf_event *event) { + if (event->state != PERF_EVENT_STATE_ACTIVE) + return; + event->hw.interrupts = MAX_INTERRUPTS; event->pmu->stop(event, 0); if (event == event->group_leader) From de5d7d3f27ddd4046736f558a40e252ddda82013 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Mon, 28 Jul 2025 17:08:44 +0800 Subject: [PATCH 1997/2411] Bluetooth: hci_sync: Avoid adding default advertising on startup list_empty(&hdev->adv_instances) is always true during startup, so an advertising instance is added by default. Call trace: dump_backtrace+0x94/0xec show_stack+0x18/0x24 dump_stack_lvl+0x48/0x60 dump_stack+0x18/0x24 hci_setup_ext_adv_instance_sync+0x17c/0x328 hci_powered_update_adv_sync+0xb4/0x12c hci_powered_update_sync+0x54/0x70 hci_power_on_sync+0xe4/0x278 hci_set_powered_sync+0x28/0x34 set_powered_sync+0x40/0x58 hci_cmd_sync_work+0x94/0x100 process_one_work+0x168/0x444 worker_thread+0x378/0x3f4 kthread+0x108/0x10c ret_from_fork+0x10/0x20 Link: https://github.com/bluez/bluez/issues/1442 Signed-off-by: Yang Li Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 2b4f21fbf9c1..7397b6b50ccb 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -3344,7 +3344,7 @@ static int hci_powered_update_adv_sync(struct hci_dev *hdev) * advertising data. This also applies to the case * where BR/EDR was toggled during the AUTO_OFF phase. */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + if (hci_dev_test_flag(hdev, HCI_ADVERTISING) && list_empty(&hdev->adv_instances)) { if (ext_adv_capable(hdev)) { err = hci_setup_ext_adv_instance_sync(hdev, 0x00); From ca88be1a2725a42f8dbad579181611d9dcca8e88 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 24 Jul 2025 16:43:18 -0400 Subject: [PATCH 1998/2411] Bluetooth: hci_sync: Fix scan state after PA Sync has been established Passive scanning is used to program the address of the peer to be synchronized, so once HCI_EV_LE_PA_SYNC_ESTABLISHED is received it needs to be updated after clearing HCI_PA_SYNC then call hci_update_passive_scan_sync to return it to its original state. Fixes: 6d0417e4e1cf ("Bluetooth: hci_conn: Fix not setting conn_timeout for Broadcast Receiver") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 7397b6b50ccb..387c128f2ba0 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6985,8 +6985,6 @@ static void create_pa_complete(struct hci_dev *hdev, void *data, int err) hci_dev_lock(hdev); - hci_dev_clear_flag(hdev, HCI_PA_SYNC); - if (!hci_conn_valid(hdev, conn)) clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); @@ -7080,6 +7078,11 @@ static int hci_le_pa_create_sync(struct hci_dev *hdev, void *data) __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC_CANCEL, 0, NULL, HCI_CMD_TIMEOUT); + hci_dev_clear_flag(hdev, HCI_PA_SYNC); + + /* Update passive scan since HCI_PA_SYNC flag has been cleared */ + hci_update_passive_scan_sync(hdev); + return err; } From aee29c18a38d479c2f058c9b6a39b0527cf81d10 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 24 Jul 2025 16:36:27 -0400 Subject: [PATCH 1999/2411] Bluetooth: ISO: Fix getname not returning broadcast fields getname shall return iso_bc fields for both BIS_LINK and PA_LINK since the likes of bluetoothd do use the getpeername to retrieve the SID both when enumerating the broadcasters and when synchronizing. Fixes: a7bcffc673de ("Bluetooth: Add PA_LINK to distinguish BIG sync and PA sync connections") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 7bd3aa0a6db9..eaffd25570e3 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1347,7 +1347,7 @@ static int iso_sock_getname(struct socket *sock, struct sockaddr *addr, bacpy(&sa->iso_bdaddr, &iso_pi(sk)->dst); sa->iso_bdaddr_type = iso_pi(sk)->dst_type; - if (hcon && hcon->type == BIS_LINK) { + if (hcon && (hcon->type == BIS_LINK || hcon->type == PA_LINK)) { sa->iso_bc->bc_sid = iso_pi(sk)->bc_sid; sa->iso_bc->bc_num_bis = iso_pi(sk)->bc_num_bis; memcpy(sa->iso_bc->bc_bis, iso_pi(sk)->bc_bis, From d36349ea73d805bb72cbc24ab90cb1da4ad5c379 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 28 Jul 2025 13:51:01 -0400 Subject: [PATCH 2000/2411] Bluetooth: hci_conn: Fix running bis_cleanup for hci_conn->type PA_LINK Connections with type of PA_LINK shall be considered temporary just to track the lifetime of PA Sync setup, once the BIG Sync is established and connection are created with BIS_LINK the existing PA_LINK connection shall not longer use bis_cleanup otherwise it terminates the PA Sync when that shall be left to BIS_LINK connection to do it. Fixes: a7bcffc673de ("Bluetooth: Add PA_LINK to distinguish BIG sync and PA sync connections") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 12 +++++++++++- net/bluetooth/hci_event.c | 7 ++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 7d1e79f69cd1..f8b20b609a03 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -830,7 +830,17 @@ static void bis_cleanup(struct hci_conn *conn) /* Check if ISO connection is a BIS and terminate advertising * set and BIG if there are no other connections using it. */ - bis = hci_conn_hash_lookup_big(hdev, conn->iso_qos.bcast.big); + bis = hci_conn_hash_lookup_big_state(hdev, + conn->iso_qos.bcast.big, + BT_CONNECTED, + HCI_ROLE_MASTER); + if (bis) + return; + + bis = hci_conn_hash_lookup_big_state(hdev, + conn->iso_qos.bcast.big, + BT_CONNECT, + HCI_ROLE_MASTER); if (bis) return; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 8aa5039b975a..4f0a6116291e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6957,9 +6957,14 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, continue; } - if (ev->status != 0x42) + if (ev->status != 0x42) { /* Mark PA sync as established */ set_bit(HCI_CONN_PA_SYNC, &bis->flags); + /* Reset cleanup callback of PA Sync so it doesn't + * terminate the sync when deleting the connection. + */ + conn->cleanup = NULL; + } bis->sync_handle = conn->sync_handle; bis->iso_qos.bcast.big = ev->handle; From 3ba486c5f3ce2c22ffd29c0103404cdbe21912b3 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 29 Jul 2025 12:11:09 -0400 Subject: [PATCH 2001/2411] Bluetooth: hci_conn: Fix not cleaning up Broadcaster/Broadcast Source This fixes Broadcaster/Broadcast Source not sending HCI_OP_LE_TERM_BIG because HCI_CONN_PER_ADV where not being set. Fixes: a7bcffc673de ("Bluetooth: Add PA_LINK to distinguish BIG sync and PA sync connections") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index f8b20b609a03..ab6fe5b0cc0f 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2259,7 +2259,7 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, * the start periodic advertising and create BIG commands have * been queued */ - hci_conn_hash_list_state(hdev, bis_mark_per_adv, PA_LINK, + hci_conn_hash_list_state(hdev, bis_mark_per_adv, BIS_LINK, BT_BOUND, &data); /* Queue start periodic advertising and create BIG */ From 099799fa9b76c5c02b49e07005a85117a25b01ea Mon Sep 17 00:00:00 2001 From: Jiande Lu Date: Thu, 24 Jul 2025 16:51:17 +0800 Subject: [PATCH 2002/2411] Bluetooth: btmtk: Fix wait_on_bit_timeout interruption during shutdown During the shutdown process, an interrupt occurs that prematurely terminates the wait for the expected event. This change replaces TASK_INTERRUPTIBLE with TASK_UNINTERRUPTIBLE in the wait_on_bit_timeout call to ensure the shutdown process completes as intended without being interrupted by signals. Fixes: d019930b0049 ("Bluetooth: btmtk: move btusb_mtk_hci_wmt_sync to btmtk.c") Signed-off-by: Jiande Lu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtk.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index 4390fd571dbd..a8c520dc09e1 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -642,12 +642,7 @@ static int btmtk_usb_hci_wmt_sync(struct hci_dev *hdev, * WMT command. */ err = wait_on_bit_timeout(&data->flags, BTMTK_TX_WAIT_VND_EVT, - TASK_INTERRUPTIBLE, HCI_INIT_TIMEOUT); - if (err == -EINTR) { - bt_dev_err(hdev, "Execution of wmt command interrupted"); - clear_bit(BTMTK_TX_WAIT_VND_EVT, &data->flags); - goto err_free_wc; - } + TASK_UNINTERRUPTIBLE, HCI_INIT_TIMEOUT); if (err) { bt_dev_err(hdev, "Execution of wmt command timed out"); From 709788b154caf042874d765628ffa860f0bb0d1e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 4 Aug 2025 09:54:05 -0400 Subject: [PATCH 2003/2411] Bluetooth: hci_core: Fix using {cis,bis}_capable for current settings {cis,bis}_capable only indicates the controller supports the feature since it doesn't check that LE is enabled so it shall not be used for current setting, instead this introduces {cis,bis}_enabled macros that can be used to indicate that these features are currently enabled. Fixes: 26afbd826ee3 ("Bluetooth: Add initial implementation of CIS connections") Fixes: eca0ae4aea66 ("Bluetooth: Add initial implementation of BIS connections") Fixes: ae7533613133 ("Bluetooth: Check for ISO support in controller") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/bluetooth.h | 4 ++-- include/net/bluetooth/hci_core.h | 13 ++++++++++++- net/bluetooth/hci_sync.c | 4 ++-- net/bluetooth/iso.c | 14 +++++++------- net/bluetooth/mgmt.c | 10 +++++----- 5 files changed, 28 insertions(+), 17 deletions(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index ada5b56a4413..e5751f3070b8 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -647,7 +647,7 @@ static inline void sco_exit(void) #if IS_ENABLED(CONFIG_BT_LE) int iso_init(void); int iso_exit(void); -bool iso_enabled(void); +bool iso_inited(void); #else static inline int iso_init(void) { @@ -659,7 +659,7 @@ static inline int iso_exit(void) return 0; } -static inline bool iso_enabled(void) +static inline bool iso_inited(void) { return false; } diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 4dc11c66f7b8..bc29f2e2e16f 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1915,6 +1915,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn); !hci_dev_test_flag(dev, HCI_RPA_EXPIRED)) #define adv_rpa_valid(adv) (bacmp(&adv->random_addr, BDADDR_ANY) && \ !adv->rpa_expired) +#define le_enabled(dev) (lmp_le_capable(dev) && \ + hci_dev_test_flag(dev, HCI_LE_ENABLED)) #define scan_1m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_1M) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_1M)) @@ -1981,14 +1983,23 @@ void hci_conn_del_sysfs(struct hci_conn *conn); /* CIS Master/Slave and BIS support */ #define iso_capable(dev) (cis_capable(dev) || bis_capable(dev)) +#define iso_enabled(dev) (le_enabled(dev) && iso_capable(dev)) #define cis_capable(dev) \ (cis_central_capable(dev) || cis_peripheral_capable(dev)) +#define cis_enabled(dev) (le_enabled(dev) && cis_capable(dev)) #define cis_central_capable(dev) \ ((dev)->le_features[3] & HCI_LE_CIS_CENTRAL) +#define cis_central_enabled(dev) \ + (le_enabled(dev) && cis_central_capable(dev)) #define cis_peripheral_capable(dev) \ ((dev)->le_features[3] & HCI_LE_CIS_PERIPHERAL) +#define cis_peripheral_enabled(dev) \ + (le_enabled(dev) && cis_peripheral_capable(dev)) #define bis_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_BROADCASTER) -#define sync_recv_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_SYNC_RECEIVER) +#define bis_enabled(dev) (le_enabled(dev) && bis_capable(dev)) +#define sync_recv_capable(dev) \ + ((dev)->le_features[3] & HCI_LE_ISO_SYNC_RECEIVER) +#define sync_recv_enabled(dev) (le_enabled(dev) && sync_recv_capable(dev)) #define mws_transport_config_capable(dev) (((dev)->commands[30] & 0x08) && \ (!hci_test_quirk((dev), HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG))) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 387c128f2ba0..aa7d7a8ec3ee 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -4531,14 +4531,14 @@ static int hci_le_set_host_feature_sync(struct hci_dev *hdev) { struct hci_cp_le_set_host_feature cp; - if (!cis_capable(hdev)) + if (!iso_capable(hdev)) return 0; memset(&cp, 0, sizeof(cp)); /* Connected Isochronous Channels (Host Support) */ cp.bit_number = 32; - cp.bit_value = 1; + cp.bit_value = iso_enabled(hdev) ? 0x01 : 0x00; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_HOST_FEATURE, sizeof(cp), &cp, HCI_CMD_TIMEOUT); diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index eaffd25570e3..5ce823ca3aaf 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -2483,11 +2483,11 @@ static const struct net_proto_family iso_sock_family_ops = { .create = iso_sock_create, }; -static bool iso_inited; +static bool inited; -bool iso_enabled(void) +bool iso_inited(void) { - return iso_inited; + return inited; } int iso_init(void) @@ -2496,7 +2496,7 @@ int iso_init(void) BUILD_BUG_ON(sizeof(struct sockaddr_iso) > sizeof(struct sockaddr)); - if (iso_inited) + if (inited) return -EALREADY; err = proto_register(&iso_proto, 0); @@ -2524,7 +2524,7 @@ int iso_init(void) iso_debugfs = debugfs_create_file("iso", 0444, bt_debugfs, NULL, &iso_debugfs_fops); - iso_inited = true; + inited = true; return 0; @@ -2535,7 +2535,7 @@ int iso_init(void) int iso_exit(void) { - if (!iso_inited) + if (!inited) return -EALREADY; bt_procfs_cleanup(&init_net, "iso"); @@ -2549,7 +2549,7 @@ int iso_exit(void) proto_unregister(&iso_proto); - iso_inited = false; + inited = false; return 0; } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 1ce682038b51..c42dffe77daf 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -922,16 +922,16 @@ static u32 get_current_settings(struct hci_dev *hdev) if (hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED)) settings |= MGMT_SETTING_WIDEBAND_SPEECH; - if (cis_central_capable(hdev)) + if (cis_central_enabled(hdev)) settings |= MGMT_SETTING_CIS_CENTRAL; - if (cis_peripheral_capable(hdev)) + if (cis_peripheral_enabled(hdev)) settings |= MGMT_SETTING_CIS_PERIPHERAL; - if (bis_capable(hdev)) + if (bis_enabled(hdev)) settings |= MGMT_SETTING_ISO_BROADCASTER; - if (sync_recv_capable(hdev)) + if (sync_recv_enabled(hdev)) settings |= MGMT_SETTING_ISO_SYNC_RECEIVER; if (ll_privacy_capable(hdev)) @@ -4513,7 +4513,7 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, } if (IS_ENABLED(CONFIG_BT_LE)) { - flags = iso_enabled() ? BIT(0) : 0; + flags = iso_inited() ? BIT(0) : 0; memcpy(rp->features[idx].uuid, iso_socket_uuid, 16); rp->features[idx].flags = cpu_to_le32(flags); idx++; From 3dcf7175f2c04bd3a7d50db3fa42a0bd933b6e23 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 4 Aug 2025 14:05:03 -0400 Subject: [PATCH 2004/2411] Bluetooth: hci_core: Fix using ll_privacy_capable for current settings ll_privacy_capable only indicates that the controller supports the feature but it doesnt' check that LE is enabled so it end up being marked as active in the current settings when it shouldn't. Fixes: ad383c2c65a5 ("Bluetooth: hci_sync: Enable advertising when LL privacy is enabled") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/mgmt.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index bc29f2e2e16f..bb30bde6f0e8 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1934,6 +1934,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn); ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED)) #define ll_privacy_capable(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY) +#define ll_privacy_enabled(dev) (le_enabled(dev) && ll_privacy_capable(dev)) #define privacy_mode_capable(dev) (ll_privacy_capable(dev) && \ ((dev)->commands[39] & 0x04)) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index c42dffe77daf..3166f5fb876b 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -934,7 +934,7 @@ static u32 get_current_settings(struct hci_dev *hdev) if (sync_recv_enabled(hdev)) settings |= MGMT_SETTING_ISO_SYNC_RECEIVER; - if (ll_privacy_capable(hdev)) + if (ll_privacy_enabled(hdev)) settings |= MGMT_SETTING_LL_PRIVACY; return settings; From 4d19cd228bbe8ff84a63fe7b11bc756b4b4370c7 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 7 Aug 2025 15:56:03 +0800 Subject: [PATCH 2005/2411] Bluetooth: hci_sync: Prevent unintended PA sync when SID is 0xFF After LE Extended Scan times out, conn->sid remains 0xFF, so the PA sync creation process should be aborted. Btmon snippet from PA sync with SID=0xFF: < HCI Command: LE Set Extended.. (0x08|0x0042) plen 6 #74726 [hci0] 863.107927 Extended scan: Enabled (0x01) Filter duplicates: Enabled (0x01) Duration: 0 msec (0x0000) Period: 0.00 sec (0x0000) > HCI Event: Command Complete (0x0e) plen 4 #74727 [hci0] 863.109389 LE Set Extended Scan Enable (0x08|0x0042) ncmd 1 Status: Success (0x00) < HCI Command: LE Periodic Ad.. (0x08|0x0044) plen 14 #74728 [hci0] 865.141168 Options: 0x0000 Use advertising SID, Advertiser Address Type and address Reporting initially enabled SID: 0xff Adv address type: Random (0x01) Adv address: 0D:D7:2C:E7:42:46 (Non-Resolvable) Skip: 0x0000 Sync timeout: 20000 msec (0x07d0) Sync CTE type: 0x0000 > HCI Event: Command Status (0x0f) plen 4 #74729 [hci0] 865.143223 LE Periodic Advertising Create Sync (0x08|0x0044) ncmd 1 Status: Success (0x00) Fixes: e2d471b7806b ("Bluetooth: ISO: Fix not using SID from adv report") Signed-off-by: Yang Li Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index aa7d7a8ec3ee..31d72b9683ef 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -7045,10 +7045,13 @@ static int hci_le_pa_create_sync(struct hci_dev *hdev, void *data) /* SID has not been set listen for HCI_EV_LE_EXT_ADV_REPORT to update * it. */ - if (conn->sid == HCI_SID_INVALID) - __hci_cmd_sync_status_sk(hdev, HCI_OP_NOP, 0, NULL, - HCI_EV_LE_EXT_ADV_REPORT, - conn->conn_timeout, NULL); + if (conn->sid == HCI_SID_INVALID) { + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_NOP, 0, NULL, + HCI_EV_LE_EXT_ADV_REPORT, + conn->conn_timeout, NULL); + if (err == -ETIMEDOUT) + goto done; + } memset(&cp, 0, sizeof(cp)); cp.options = qos->bcast.options; @@ -7078,6 +7081,7 @@ static int hci_le_pa_create_sync(struct hci_dev *hdev, void *data) __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC_CANCEL, 0, NULL, HCI_CMD_TIMEOUT); +done: hci_dev_clear_flag(hdev, HCI_PA_SYNC); /* Update passive scan since HCI_PA_SYNC flag has been cleared */ From 0b3725dbf61b51e7c663834811b3691157ae17d6 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sat, 9 Aug 2025 11:36:20 +0300 Subject: [PATCH 2006/2411] Bluetooth: hci_event: fix MTU for BN == 0 in CIS Established BN == 0x00 in CIS Established means no isochronous data for the corresponding direction (Core v6.1 pp. 2394). In this case SDU MTU should be 0. However, the specification does not say the Max_PDU_C_To_P or P_To_C are then zero. Intel AX210 in Framed CIS mode sets nonzero Max_PDU for direction with zero BN. This causes failure later when we try to LE Setup ISO Data Path for disabled direction, which is disallowed (Core v6.1 pp. 2750). Fix by setting SDU MTU to 0 if BN == 0. Fixes: 2be22f1941d5f ("Bluetooth: hci_event: Fix parsing of CIS Established Event") Signed-off-by: Pauli Virtanen Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4f0a6116291e..fe7cdd67ad2a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6745,8 +6745,8 @@ static void hci_le_cis_established_evt(struct hci_dev *hdev, void *data, qos->ucast.out.latency = DIV_ROUND_CLOSEST(get_unaligned_le24(ev->p_latency), 1000); - qos->ucast.in.sdu = le16_to_cpu(ev->c_mtu); - qos->ucast.out.sdu = le16_to_cpu(ev->p_mtu); + qos->ucast.in.sdu = ev->c_bn ? le16_to_cpu(ev->c_mtu) : 0; + qos->ucast.out.sdu = ev->p_bn ? le16_to_cpu(ev->p_mtu) : 0; qos->ucast.in.phy = ev->c_phy; qos->ucast.out.phy = ev->p_phy; break; @@ -6760,8 +6760,8 @@ static void hci_le_cis_established_evt(struct hci_dev *hdev, void *data, qos->ucast.in.latency = DIV_ROUND_CLOSEST(get_unaligned_le24(ev->p_latency), 1000); - qos->ucast.out.sdu = le16_to_cpu(ev->c_mtu); - qos->ucast.in.sdu = le16_to_cpu(ev->p_mtu); + qos->ucast.out.sdu = ev->c_bn ? le16_to_cpu(ev->c_mtu) : 0; + qos->ucast.in.sdu = ev->p_bn ? le16_to_cpu(ev->p_mtu) : 0; qos->ucast.out.phy = ev->c_phy; qos->ucast.in.phy = ev->p_phy; break; From b5ca88927e353185b3d9ac4362d33e5aeb25771f Mon Sep 17 00:00:00 2001 From: Thomas Bertschinger Date: Thu, 14 Aug 2025 17:54:28 -0600 Subject: [PATCH 2007/2411] fhandle: do_handle_open() should get FD with user flags In f07c7cc4684a, do_handle_open() was switched to use the automatic cleanup method for getting a FD. In that change it was also switched to pass O_CLOEXEC unconditionally to get_unused_fd_flags() instead of passing the user-specified flags. I don't see anything in that commit description that indicates this was intentional, so I am assuming it was an oversight. With this fix, the FD will again be opened with, or without, O_CLOEXEC according to what the user requested. Fixes: f07c7cc4684a ("fhandle: simplify error handling") Signed-off-by: Thomas Bertschinger Link: https://lore.kernel.org/20250814235431.995876-4-tahbertschinger@gmail.com Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/fhandle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fhandle.c b/fs/fhandle.c index 7c236f64cdea..68a7d2861c58 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -402,7 +402,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, if (retval) return retval; - CLASS(get_unused_fd, fd)(O_CLOEXEC); + CLASS(get_unused_fd, fd)(open_flag); if (fd < 0) return fd; From a3de58b12ce074ec05b8741fa28d62ccb1070468 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 14 Aug 2025 22:45:50 +0100 Subject: [PATCH 2008/2411] netfs: Fix unbuffered write error handling If all the subrequests in an unbuffered write stream fail, the subrequest collector doesn't update the stream->transferred value and it retains its initial LONG_MAX value. Unfortunately, if all active streams fail, then we take the smallest value of { LONG_MAX, LONG_MAX, ... } as the value to set in wreq->transferred - which is then returned from ->write_iter(). LONG_MAX was chosen as the initial value so that all the streams can be quickly assessed by taking the smallest value of all stream->transferred - but this only works if we've set any of them. Fix this by adding a flag to indicate whether the value in stream->transferred is valid and checking that when we integrate the values. stream->transferred can then be initialised to zero. This was found by running the generic/750 xfstest against cifs with cache=none. It splices data to the target file. Once (if) it has used up all the available scratch space, the writes start failing with ENOSPC. This causes ->write_iter() to fail. However, it was returning wreq->transferred, i.e. LONG_MAX, rather than an error (because it thought the amount transferred was non-zero) and iter_file_splice_write() would then try to clean up that amount of pipe bufferage - leading to an oops when it overran. The kernel log showed: CIFS: VFS: Send error in write = -28 followed by: BUG: kernel NULL pointer dereference, address: 0000000000000008 with: RIP: 0010:iter_file_splice_write+0x3a4/0x520 do_splice+0x197/0x4e0 or: RIP: 0010:pipe_buf_release (include/linux/pipe_fs_i.h:282) iter_file_splice_write (fs/splice.c:755) Also put a warning check into splice to announce if ->write_iter() returned that it had written more than it was asked to. Fixes: 288ace2f57c9 ("netfs: New writeback implementation") Reported-by: Xiaoli Feng Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220445 Signed-off-by: David Howells Link: https://lore.kernel.org/915443.1755207950@warthog.procyon.org.uk cc: Paulo Alcantara cc: Steve French cc: Shyam Prasad N cc: netfs@lists.linux.dev cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: stable@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/read_collect.c | 4 +++- fs/netfs/write_collect.c | 10 ++++++++-- fs/netfs/write_issue.c | 4 ++-- fs/splice.c | 3 +++ include/linux/netfs.h | 1 + 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 3e804da1e1eb..a95e7aadafd0 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -281,8 +281,10 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq) } else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) { notes |= MADE_PROGRESS; } else { - if (!stream->failed) + if (!stream->failed) { stream->transferred += transferred; + stream->transferred_valid = true; + } if (front->transferred < front->len) set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags); notes |= MADE_PROGRESS; diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 0f3a36852a4d..cbf3d9194c7b 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -254,6 +254,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) if (front->start + front->transferred > stream->collected_to) { stream->collected_to = front->start + front->transferred; stream->transferred = stream->collected_to - wreq->start; + stream->transferred_valid = true; notes |= MADE_PROGRESS; } if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { @@ -356,6 +357,7 @@ bool netfs_write_collection(struct netfs_io_request *wreq) { struct netfs_inode *ictx = netfs_inode(wreq->inode); size_t transferred; + bool transferred_valid = false; int s; _enter("R=%x", wreq->debug_id); @@ -376,12 +378,16 @@ bool netfs_write_collection(struct netfs_io_request *wreq) continue; if (!list_empty(&stream->subrequests)) return false; - if (stream->transferred < transferred) + if (stream->transferred_valid && + stream->transferred < transferred) { transferred = stream->transferred; + transferred_valid = true; + } } /* Okay, declare that all I/O is complete. */ - wreq->transferred = transferred; + if (transferred_valid) + wreq->transferred = transferred; trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); if (wreq->io_streams[1].active && diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 50bee2c4130d..0584cba1a043 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -118,12 +118,12 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, wreq->io_streams[0].prepare_write = ictx->ops->prepare_write; wreq->io_streams[0].issue_write = ictx->ops->issue_write; wreq->io_streams[0].collected_to = start; - wreq->io_streams[0].transferred = LONG_MAX; + wreq->io_streams[0].transferred = 0; wreq->io_streams[1].stream_nr = 1; wreq->io_streams[1].source = NETFS_WRITE_TO_CACHE; wreq->io_streams[1].collected_to = start; - wreq->io_streams[1].transferred = LONG_MAX; + wreq->io_streams[1].transferred = 0; if (fscache_resources_valid(&wreq->cache_resources)) { wreq->io_streams[1].avail = true; wreq->io_streams[1].active = true; diff --git a/fs/splice.c b/fs/splice.c index 4d6df083e0c0..f5094b6d00a0 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -739,6 +739,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, sd.pos = kiocb.ki_pos; if (ret <= 0) break; + WARN_ONCE(ret > sd.total_len - left, + "Splice Exceeded! ret=%zd tot=%zu left=%zu\n", + ret, sd.total_len, left); sd.num_spliced += ret; sd.total_len -= ret; diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 185bd8196503..98c96d649bf9 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -150,6 +150,7 @@ struct netfs_io_stream { bool active; /* T if stream is active */ bool need_retry; /* T if this stream needs retrying */ bool failed; /* T if this stream failed */ + bool transferred_valid; /* T is ->transferred is valid */ }; /* From 0b2d71a7c82628bb36fd43e80193bcc2693c239a Mon Sep 17 00:00:00 2001 From: "Adrian Huang (Lenovo)" Date: Thu, 14 Aug 2025 17:44:53 +0800 Subject: [PATCH 2009/2411] pidfs: Fix memory leak in pidfd_info() After running the program 'ioctl_pidfd03' of Linux Test Project (LTP) or the program 'pidfd_info_test' in 'tools/testing/selftests/pidfd' of the kernel source, kmemleak reports the following memory leaks: # cat /sys/kernel/debug/kmemleak unreferenced object 0xff110020e5988000 (size 8216): comm "ioctl_pidfd03", pid 10853, jiffies 4294800031 hex dump (first 32 bytes): 02 40 00 00 00 00 00 00 10 00 00 00 00 00 00 00 .@.............. 00 00 00 00 af 01 00 00 80 00 00 00 00 00 00 00 ................ backtrace (crc 69483047): kmem_cache_alloc_node_noprof+0x2fb/0x410 copy_process+0x178/0x1740 kernel_clone+0x99/0x3b0 __do_sys_clone3+0xbe/0x100 do_syscall_64+0x7b/0x2c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e ... unreferenced object 0xff11002097b70000 (size 8216): comm "pidfd_info_test", pid 11840, jiffies 4294889165 hex dump (first 32 bytes): 06 40 00 00 00 00 00 00 10 00 00 00 00 00 00 00 .@.............. 00 00 00 00 b5 00 00 00 80 00 00 00 00 00 00 00 ................ backtrace (crc a6286bb7): kmem_cache_alloc_node_noprof+0x2fb/0x410 copy_process+0x178/0x1740 kernel_clone+0x99/0x3b0 __do_sys_clone3+0xbe/0x100 do_syscall_64+0x7b/0x2c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e ... The leak occurs because pidfd_info() obtains a task_struct via get_pid_task() but never calls put_task_struct() to drop the reference, leaving task->usage unbalanced. Fix the issue by adding '__free(put_task) = NULL' to the local variable 'task', ensuring that put_task_struct() is automatically invoked when the variable goes out of scope. Fixes: 7477d7dce48a ("pidfs: allow to retrieve exit information") Signed-off-by: Adrian Huang (Lenovo) Link: https://lore.kernel.org/20250814094453.15232-1-adrianhuang0701@gmail.com Signed-off-by: Christian Brauner --- fs/pidfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pidfs.c b/fs/pidfs.c index edc35522d75c..108e7527f837 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -296,12 +296,12 @@ static __u32 pidfs_coredump_mask(unsigned long mm_flags) static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) { struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg; + struct task_struct *task __free(put_task) = NULL; struct pid *pid = pidfd_pid(file); size_t usize = _IOC_SIZE(cmd); struct pidfd_info kinfo = {}; struct pidfs_exit_info *exit_info; struct user_namespace *user_ns; - struct task_struct *task; struct pidfs_attr *attr; const struct cred *c; __u64 mask; From 0eaf7c7e85da7495c0e03a99375707fc954f5e7b Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Tue, 5 Aug 2025 22:14:51 +0300 Subject: [PATCH 2010/2411] Bluetooth: hci_conn: do return error from hci_enhanced_setup_sync() The commit e07a06b4eb41 ("Bluetooth: Convert SCO configure_datapath to hci_sync") missed to update the *return* statement under the *case* of BT_CODEC_TRANSPARENT in hci_enhanced_setup_sync(), which led to returning success (0) instead of the negative error code (-EINVAL). However, the result of hci_enhanced_setup_sync() seems to be ignored anyway, since NULL gets passed to hci_cmd_sync_queue() as the last argument in that case and the only function interested in that result is specified by that argument. Fixes: e07a06b4eb41 ("Bluetooth: Convert SCO configure_datapath to hci_sync") Signed-off-by: Sergey Shtylyov Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index ab6fe5b0cc0f..7a879290dd28 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -339,7 +339,8 @@ static int hci_enhanced_setup_sync(struct hci_dev *hdev, void *data) case BT_CODEC_TRANSPARENT: if (!find_next_esco_param(conn, esco_param_msbc, ARRAY_SIZE(esco_param_msbc))) - return false; + return -EINVAL; + param = &esco_param_msbc[conn->attempt - 1]; cp.tx_coding_format.id = 0x03; cp.rx_coding_format.id = 0x03; From e489317d2fd9a51a81bdcbe15a73ddde8246e6d6 Mon Sep 17 00:00:00 2001 From: Neeraj Sanjay Kale Date: Mon, 4 Aug 2025 16:00:15 +0530 Subject: [PATCH 2011/2411] Bluetooth: btnxpuart: Uses threaded IRQ for host wakeup handling This replaces devm_request_irq() with devm_request_threaded_irq(). On iMX93 11x11 EVK platform, the BT chip's BT_WAKE_OUT pin is connected to an I2C GPIO expander instead of directly been connected to iMX GPIO. When I2C GPIO expander's (PCAL6524) host driver receives an interrupt on it's INTR line, the driver's interrupt handler needs to query the interrupt source with PCAL6524 first, and then call the actual interrupt handler, in this case the IRQ handler in BTNXPUART. In order to handle interrupts when such I2C GPIO expanders are between the host and interrupt source, devm_request_threaded_irq() is needed. This commit also removes the IRQF_TRIGGER_FALLING flag, to allow setting the IRQ trigger type from the device tree setting instead of hardcoding in the driver. Signed-off-by: Neeraj Sanjay Kale Reviewed-by: Sherry Sun Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btnxpuart.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index 73a4a325c867..76e7f857fb7d 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -543,10 +543,10 @@ static int ps_setup(struct hci_dev *hdev) } if (psdata->wakeup_source) { - ret = devm_request_irq(&serdev->dev, psdata->irq_handler, - ps_host_wakeup_irq_handler, - IRQF_ONESHOT | IRQF_TRIGGER_FALLING, - dev_name(&serdev->dev), nxpdev); + ret = devm_request_threaded_irq(&serdev->dev, psdata->irq_handler, + NULL, ps_host_wakeup_irq_handler, + IRQF_ONESHOT, + dev_name(&serdev->dev), nxpdev); if (ret) bt_dev_info(hdev, "error setting wakeup IRQ handler, ignoring\n"); disable_irq(psdata->irq_handler); From 9d4b01a0bf8d2163ae129c9c537cb0753ad5a2aa Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 14 Aug 2025 11:57:19 -0400 Subject: [PATCH 2012/2411] Bluetooth: hci_core: Fix not accounting for BIS/CIS/PA links separately This fixes the likes of hci_conn_num(CIS_LINK) returning the total of ISO connection which includes BIS_LINK as well, so this splits the iso_num into each link type and introduces hci_iso_num that can be used in places where the total number of ISO connection still needs to be used. Fixes: 23205562ffc8 ("Bluetooth: separate CIS_LINK and BIS_LINK link types") Fixes: a7bcffc673de ("Bluetooth: Add PA_LINK to distinguish BIG sync and PA sync connections") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index bb30bde6f0e8..6906af7a8f24 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -129,7 +129,9 @@ struct hci_conn_hash { struct list_head list; unsigned int acl_num; unsigned int sco_num; - unsigned int iso_num; + unsigned int cis_num; + unsigned int bis_num; + unsigned int pa_num; unsigned int le_num; unsigned int le_num_peripheral; }; @@ -1014,9 +1016,13 @@ static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c) h->sco_num++; break; case CIS_LINK: + h->cis_num++; + break; case BIS_LINK: + h->bis_num++; + break; case PA_LINK: - h->iso_num++; + h->pa_num++; break; } } @@ -1042,9 +1048,13 @@ static inline void hci_conn_hash_del(struct hci_dev *hdev, struct hci_conn *c) h->sco_num--; break; case CIS_LINK: + h->cis_num--; + break; case BIS_LINK: + h->bis_num--; + break; case PA_LINK: - h->iso_num--; + h->pa_num--; break; } } @@ -1061,9 +1071,11 @@ static inline unsigned int hci_conn_num(struct hci_dev *hdev, __u8 type) case ESCO_LINK: return h->sco_num; case CIS_LINK: + return h->cis_num; case BIS_LINK: + return h->bis_num; case PA_LINK: - return h->iso_num; + return h->pa_num; default: return 0; } @@ -1073,7 +1085,15 @@ static inline unsigned int hci_conn_count(struct hci_dev *hdev) { struct hci_conn_hash *c = &hdev->conn_hash; - return c->acl_num + c->sco_num + c->le_num + c->iso_num; + return c->acl_num + c->sco_num + c->le_num + c->cis_num + c->bis_num + + c->pa_num; +} + +static inline unsigned int hci_iso_count(struct hci_dev *hdev) +{ + struct hci_conn_hash *c = &hdev->conn_hash; + + return c->cis_num + c->bis_num; } static inline bool hci_conn_valid(struct hci_dev *hdev, struct hci_conn *conn) From c08ba63078dd6046c279df37795cb77e784e1ec9 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 16 Jul 2025 15:41:35 -0500 Subject: [PATCH 2013/2411] virt: sev-guest: Satisfy linear mapping requirement in get_derived_key() Commit 7ffeb2fc2670 ("x86/sev: Document requirement for linear mapping of guest request buffers") added a check that requires the guest request buffers to be in the linear mapping. The get_derived_key() function was passing a buffer that was allocated on the stack, resulting in the call to snp_send_guest_request() returning an error. Update the get_derived_key() function to use an allocated buffer instead of a stack buffer. Fixes: 7ffeb2fc2670 ("x86/sev: Document requirement for linear mapping of guest request buffers") Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/9b764ca9fc79199a091aac684c4926e2080ca7a8.1752698495.git.thomas.lendacky@amd.com --- drivers/virt/coco/sev-guest/sev-guest.c | 27 +++++++++++-------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index d2b3ae7113ab..b01ec99106cd 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -116,13 +116,11 @@ static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_io static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg) { + struct snp_derived_key_resp *derived_key_resp __free(kfree) = NULL; struct snp_derived_key_req *derived_key_req __free(kfree) = NULL; - struct snp_derived_key_resp derived_key_resp = {0}; struct snp_msg_desc *mdesc = snp_dev->msg_desc; struct snp_guest_req req = {}; int rc, resp_len; - /* Response data is 64 bytes and max authsize for GCM is 16 bytes. */ - u8 buf[64 + 16]; if (!arg->req_data || !arg->resp_data) return -EINVAL; @@ -132,8 +130,9 @@ static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_reque * response payload. Make sure that it has enough space to cover the * authtag. */ - resp_len = sizeof(derived_key_resp.data) + mdesc->ctx->authsize; - if (sizeof(buf) < resp_len) + resp_len = sizeof(derived_key_resp->data) + mdesc->ctx->authsize; + derived_key_resp = kzalloc(resp_len, GFP_KERNEL_ACCOUNT); + if (!derived_key_resp) return -ENOMEM; derived_key_req = kzalloc(sizeof(*derived_key_req), GFP_KERNEL_ACCOUNT); @@ -149,23 +148,21 @@ static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_reque req.vmpck_id = mdesc->vmpck_id; req.req_buf = derived_key_req; req.req_sz = sizeof(*derived_key_req); - req.resp_buf = buf; + req.resp_buf = derived_key_resp; req.resp_sz = resp_len; req.exit_code = SVM_VMGEXIT_GUEST_REQUEST; rc = snp_send_guest_request(mdesc, &req); arg->exitinfo2 = req.exitinfo2; - if (rc) - return rc; - - memcpy(derived_key_resp.data, buf, sizeof(derived_key_resp.data)); - if (copy_to_user((void __user *)arg->resp_data, &derived_key_resp, - sizeof(derived_key_resp))) - rc = -EFAULT; + if (!rc) { + if (copy_to_user((void __user *)arg->resp_data, derived_key_resp, + sizeof(derived_key_resp->data))) + rc = -EFAULT; + } /* The response buffer contains the sensitive data, explicitly clear it. */ - memzero_explicit(buf, sizeof(buf)); - memzero_explicit(&derived_key_resp, sizeof(derived_key_resp)); + memzero_explicit(derived_key_resp, sizeof(*derived_key_resp)); + return rc; } From 3ee9cebd0a5e7ea47eb35cec95eaa1a866af982d Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 13 Aug 2025 10:26:59 -0500 Subject: [PATCH 2014/2411] x86/sev: Ensure SVSM reserved fields in a page validation entry are initialized to zero In order to support future versions of the SVSM_CORE_PVALIDATE call, all reserved fields within a PVALIDATE entry must be set to zero as an SVSM should be ensuring all reserved fields are zero in order to support future usage of reserved areas based on the protocol version. Fixes: fcd042e86422 ("x86/sev: Perform PVALIDATE using the SVSM when not at VMPL0") Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Joerg Roedel Cc: Link: https://lore.kernel.org/7cde412f8b057ea13a646fb166b1ca023f6a5031.1755098819.git.thomas.lendacky@amd.com --- arch/x86/boot/startup/sev-shared.c | 1 + arch/x86/coco/sev/core.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c index 7a706db87b93..4ab0dbd043c6 100644 --- a/arch/x86/boot/startup/sev-shared.c +++ b/arch/x86/boot/startup/sev-shared.c @@ -785,6 +785,7 @@ static void __head svsm_pval_4k_page(unsigned long paddr, bool validate) pc->entry[0].page_size = RMP_PG_SIZE_4K; pc->entry[0].action = validate; pc->entry[0].ignore_cf = 0; + pc->entry[0].rsvd = 0; pc->entry[0].pfn = paddr >> PAGE_SHIFT; /* Protocol 0, Call ID 1 */ diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index fc59ce78c477..43ecc6b9fb9c 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -227,6 +227,7 @@ static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action, pe->page_size = RMP_PG_SIZE_4K; pe->action = action; pe->ignore_cf = 0; + pe->rsvd = 0; pe->pfn = pfn; pe++; @@ -257,6 +258,7 @@ static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int d pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K; pe->action = e->operation == SNP_PAGE_STATE_PRIVATE; pe->ignore_cf = 0; + pe->rsvd = 0; pe->pfn = e->gfn; pe++; From ed6c4b657bca3b39f7b11cba1405931aeb490f3d Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 15 Aug 2025 09:01:54 +0200 Subject: [PATCH 2015/2411] x86/cpuid: Remove transitional header All CPUID call sites were updated at commit: 968e30006807 ("x86/cpuid: Set as the main CPUID header") to include instead of . The header was still retained as a wrapper, just in case some new code in -next started using it. Now that everything is merged to Linus' tree, remove the header. Signed-off-by: Ahmed S. Darwish Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250815070227.19981-2-darwi@linutronix.de --- arch/x86/include/asm/cpuid.h | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 arch/x86/include/asm/cpuid.h diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h deleted file mode 100644 index d5749b25fa10..000000000000 --- a/arch/x86/include/asm/cpuid.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef _ASM_X86_CPUID_H -#define _ASM_X86_CPUID_H - -#include - -#endif /* _ASM_X86_CPUID_H */ From eafae0fdd115a71b3a200ef1a31f86da04bac77f Mon Sep 17 00:00:00 2001 From: Evgeniy Harchenko Date: Fri, 15 Aug 2025 12:58:14 +0300 Subject: [PATCH 2016/2411] ALSA: hda/realtek: Add support for HP EliteBook x360 830 G6 and EliteBook 830 G6 The HP EliteBook x360 830 G6 and HP EliteBook 830 G6 have Realtek HDA codec ALC215. It needs the ALC285_FIXUP_HP_GPIO_LED quirk to enable the mute LED. Cc: Signed-off-by: Evgeniy Harchenko Link: https://patch.msgid.link/20250815095814.75845-1-evgeniyharchenko.dev@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index e90c4047ea62..db8e6352b942 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6368,6 +6368,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), SND_PCI_QUIRK(0x103c, 0x8537, "HP ProBook 440 G6", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8548, "HP EliteBook x360 830 G6", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x854a, "HP EliteBook 830 G6", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x85c6, "HP Pavilion x360 Convertible 14-dy1xxx", ALC295_FIXUP_HP_MUTE_LED_COEFBIT11), SND_PCI_QUIRK(0x103c, 0x85de, "HP Envy x360 13-ar0xxx", ALC285_FIXUP_HP_ENVY_X360), SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), From b3eaf14f4c63fd6abc7b68c6d7a07c5680a6d8e5 Mon Sep 17 00:00:00 2001 From: Shinji Nomoto Date: Thu, 22 May 2025 15:10:58 +0900 Subject: [PATCH 2017/2411] cpupower: Fix a bug where the -t option of the set subcommand was not working. The set subcommand's -t option is documented as being available for boost configuration, but it was not actually functioning due to a bug in the option handling. Link: https://lore.kernel.org/r/20250522061122.2149188-2-fj5851bi@fujitsu.com Signed-off-by: Shinji Nomoto Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/cpupower-set.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index 0677b58374ab..59ace394cf3e 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -62,8 +62,8 @@ int cmd_set(int argc, char **argv) params.params = 0; /* parameter parsing */ - while ((ret = getopt_long(argc, argv, "b:e:m:", - set_opts, NULL)) != -1) { + while ((ret = getopt_long(argc, argv, "b:e:m:t:", + set_opts, NULL)) != -1) { switch (ret) { case 'b': if (params.perf_bias) From 357d1fc38aad2cf4ea6626138cbf68299d20170c Mon Sep 17 00:00:00 2001 From: Shinji Nomoto Date: Thu, 22 May 2025 15:10:59 +0900 Subject: [PATCH 2018/2411] cpupower: Allow control of boost feature on non-x86 based systems with boost support. The cpufreq subsystem has a generic sysfs interface for controlling boost (/sys/devices/system/cpu/cpufreq/boost). The sysfs interface can be used to enable boost control from the cpupower command on non-x86 platforms as well. So, allow boost controlling on non-x86 system if boost sysfs file exists. The set subcommand enables/disables the boost feature using the following syntax: cpupower set --boost 1 cpupower set --boost 0 The --boost option is an alias for --turbo-boost. We provided the neutral option name because the name "turbo boost" is specific to Intel technology. The frequency-info subcommand displays the enabled/disabled state of the boost feature as follows: boost state support: Active: yes (or no) Link: https://lore.kernel.org/r/20250522061122.2149188-3-fj5851bi@fujitsu.com Signed-off-by: Shinji Nomoto Signed-off-by: Shuah Khan --- tools/power/cpupower/man/cpupower-set.1 | 7 +- tools/power/cpupower/utils/cpufreq-info.c | 16 ++++- tools/power/cpupower/utils/cpupower-set.c | 1 + tools/power/cpupower/utils/helpers/helpers.h | 14 ++-- tools/power/cpupower/utils/helpers/misc.c | 76 ++++++++++++++------ 5 files changed, 81 insertions(+), 33 deletions(-) diff --git a/tools/power/cpupower/man/cpupower-set.1 b/tools/power/cpupower/man/cpupower-set.1 index 500653ef98c7..8ac82b6f9189 100644 --- a/tools/power/cpupower/man/cpupower-set.1 +++ b/tools/power/cpupower/man/cpupower-set.1 @@ -81,10 +81,11 @@ Refer to the AMD P-State kernel documentation for further information. .RE .PP -\-\-turbo\-boost, \-t +\-\-turbo\-boost, \-\-boost, \-t .RS 4 -This option is used to enable or disable the turbo boost feature on -supported Intel and AMD processors. +This option is used to enable or disable the boost feature on +supported Intel and AMD processors, and other boost supported systems. +(The --boost option is an alias for the --turbo-boost option) This option takes as parameter either \fB1\fP to enable, or \fB0\fP to disable the feature. diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index fc750e127404..7d3732f5f2f6 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -128,7 +128,7 @@ static int get_boost_mode_x86(unsigned int cpu) /* ToDo: Make this more global */ unsigned long pstates[MAX_HW_PSTATES] = {0,}; - ret = cpufreq_has_boost_support(cpu, &support, &active, &b_states); + ret = cpufreq_has_x86_boost_support(cpu, &support, &active, &b_states); if (ret) { printf(_("Error while evaluating Boost Capabilities" " on CPU %d -- are you root?\n"), cpu); @@ -204,6 +204,18 @@ static int get_boost_mode_x86(unsigned int cpu) return 0; } +static int get_boost_mode_generic(unsigned int cpu) +{ + bool active; + + if (!cpufreq_has_generic_boost_support(&active)) { + printf(_(" boost state support:\n")); + printf(_(" Active: %s\n"), active ? _("yes") : _("no")); + } + + return 0; +} + /* --boost / -b */ static int get_boost_mode(unsigned int cpu) @@ -214,6 +226,8 @@ static int get_boost_mode(unsigned int cpu) cpupower_cpu_info.vendor == X86_VENDOR_HYGON || cpupower_cpu_info.vendor == X86_VENDOR_INTEL) return get_boost_mode_x86(cpu); + else + get_boost_mode_generic(cpu); freqs = cpufreq_get_boost_frequencies(cpu); if (freqs) { diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index 59ace394cf3e..c2117e5650dd 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -21,6 +21,7 @@ static struct option set_opts[] = { {"epp", required_argument, NULL, 'e'}, {"amd-pstate-mode", required_argument, NULL, 'm'}, {"turbo-boost", required_argument, NULL, 't'}, + {"boost", required_argument, NULL, 't'}, { }, }; diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 95749b8ee475..82ea62bdf5a2 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -103,6 +103,9 @@ extern struct cpupower_cpu_info cpupower_cpu_info; /* cpuid and cpuinfo helpers **************************/ +int cpufreq_has_generic_boost_support(bool *active); +int cpupower_set_turbo_boost(int turbo_boost); + /* X86 ONLY ****************************************/ #if defined(__i386__) || defined(__x86_64__) @@ -118,7 +121,6 @@ extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu); extern int cpupower_set_epp(unsigned int cpu, char *epp); extern int cpupower_set_amd_pstate_mode(char *mode); -extern int cpupower_set_turbo_boost(int turbo_boost); /* Read/Write msr ****************************/ @@ -139,8 +141,8 @@ extern int decode_pstates(unsigned int cpu, int boost_states, /* AMD HW pstate decoding **************************/ -extern int cpufreq_has_boost_support(unsigned int cpu, int *support, - int *active, int * states); +int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, + int *active, int *states); /* AMD P-State stuff **************************/ bool cpupower_amd_pstate_enabled(void); @@ -181,13 +183,11 @@ static inline int cpupower_set_epp(unsigned int cpu, char *epp) { return -1; }; static inline int cpupower_set_amd_pstate_mode(char *mode) { return -1; }; -static inline int cpupower_set_turbo_boost(int turbo_boost) -{ return -1; }; /* Read/Write msr ****************************/ -static inline int cpufreq_has_boost_support(unsigned int cpu, int *support, - int *active, int * states) +static inline int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, + int *active, int *states) { return -1; } static inline bool cpupower_amd_pstate_enabled(void) diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index 76e461ff4f74..166dc1e470ea 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -8,15 +8,14 @@ #include "helpers/helpers.h" #include "helpers/sysfs.h" #include "cpufreq.h" +#include "cpupower_intern.h" #if defined(__i386__) || defined(__x86_64__) -#include "cpupower_intern.h" - #define MSR_AMD_HWCR 0xc0010015 -int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, - int *states) +int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, int *active, + int *states) { int ret; unsigned long long val; @@ -124,24 +123,6 @@ int cpupower_set_amd_pstate_mode(char *mode) return 0; } -int cpupower_set_turbo_boost(int turbo_boost) -{ - char path[SYSFS_PATH_MAX]; - char linebuf[2] = {}; - - snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost"); - - if (!is_valid_path(path)) - return -1; - - snprintf(linebuf, sizeof(linebuf), "%d", turbo_boost); - - if (cpupower_write_sysfs(path, linebuf, 2) <= 0) - return -1; - - return 0; -} - bool cpupower_amd_pstate_enabled(void) { char *driver = cpufreq_get_driver(0); @@ -160,6 +141,39 @@ bool cpupower_amd_pstate_enabled(void) #endif /* #if defined(__i386__) || defined(__x86_64__) */ +int cpufreq_has_generic_boost_support(bool *active) +{ + char path[SYSFS_PATH_MAX]; + char linebuf[2] = {}; + unsigned long val; + char *endp; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost"); + + if (!is_valid_path(path)) + return -EACCES; + + if (cpupower_read_sysfs(path, linebuf, 2) <= 0) + return -EINVAL; + + val = strtoul(linebuf, &endp, 0); + if (endp == linebuf || errno == ERANGE) + return -EINVAL; + + switch (val) { + case 0: + *active = false; + break; + case 1: + *active = true; + break; + default: + return -EINVAL; + } + + return 0; +} + /* get_cpustate * * Gather the information of all online CPUs into bitmask struct @@ -259,3 +273,21 @@ void print_speed(unsigned long speed, int no_rounding) } } } + +int cpupower_set_turbo_boost(int turbo_boost) +{ + char path[SYSFS_PATH_MAX]; + char linebuf[2] = {}; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost"); + + if (!is_valid_path(path)) + return -1; + + snprintf(linebuf, sizeof(linebuf), "%d", turbo_boost); + + if (cpupower_write_sysfs(path, linebuf, 2) <= 0) + return -1; + + return 0; +} From f604d3aaf64ff0d90cc875295474d3abf4155629 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 14 Aug 2025 15:06:40 +0200 Subject: [PATCH 2019/2411] mlxsw: spectrum: Forward packets with an IPv4 link-local source IP By default, the device does not forward IPv4 packets with a link-local source IP (i.e., 169.254.0.0/16). This behavior does not align with the kernel which does forward them. Fix by instructing the device to forward such packets instead of dropping them. Fixes: ca360db4b825 ("mlxsw: spectrum: Disable DIP_LINK_LOCAL check in hardware pipeline") Reported-by: Zoey Mertes Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Petr Machata Link: https://patch.msgid.link/6721e6b2c96feb80269e72ce8d0b426e2f32d99c.1755174341.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 ++ drivers/net/ethernet/mellanox/mlxsw/trap.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 618957d65663..9a2d64a0a858 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2375,6 +2375,8 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { ROUTER_EXP, false), MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_DIP_LINK_LOCAL, FORWARD, ROUTER_EXP, false), + MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_SIP_LINK_LOCAL, FORWARD, + ROUTER_EXP, false), /* Multicast Router Traps */ MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false), MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 80ee5c4825dc..9962dc157901 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -94,6 +94,7 @@ enum { MLXSW_TRAP_ID_DISCARD_ING_ROUTER_IPV4_SIP_BC = 0x16A, MLXSW_TRAP_ID_DISCARD_ING_ROUTER_IPV4_DIP_LOCAL_NET = 0x16B, MLXSW_TRAP_ID_DISCARD_ING_ROUTER_DIP_LINK_LOCAL = 0x16C, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_LINK_LOCAL = 0x16D, MLXSW_TRAP_ID_DISCARD_ROUTER_IRIF_EN = 0x178, MLXSW_TRAP_ID_DISCARD_ROUTER_ERIF_EN = 0x179, MLXSW_TRAP_ID_DISCARD_ROUTER_LPM4 = 0x17B, From 5e0b2177bdba99c2487480e9864825f742b684ee Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 14 Aug 2025 15:06:41 +0200 Subject: [PATCH 2020/2411] selftest: forwarding: router: Add a test case for IPv4 link-local source IP Add a test case which checks that packets with an IPv4 link-local source IP are forwarded and not dropped. Signed-off-by: Ido Schimmel Signed-off-by: Petr Machata Link: https://patch.msgid.link/3c2e0b17d99530f57bef5ddff9af284fa0c9b667.1755174341.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/router.sh | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh index b98ea9449b8b..dfb6646cb97b 100755 --- a/tools/testing/selftests/net/forwarding/router.sh +++ b/tools/testing/selftests/net/forwarding/router.sh @@ -18,6 +18,8 @@ # | 2001:db8:1::1/64 2001:db8:2::1/64 | # | | # +-----------------------------------------------------------------+ +# +#shellcheck disable=SC2034 # SC doesn't see our uses of global variables ALL_TESTS=" ping_ipv4 @@ -27,6 +29,7 @@ ALL_TESTS=" ipv4_sip_equal_dip ipv6_sip_equal_dip ipv4_dip_link_local + ipv4_sip_link_local " NUM_NETIFS=4 @@ -330,6 +333,32 @@ ipv4_dip_link_local() tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower } +ipv4_sip_link_local() +{ + local sip=169.254.1.1 + + RET=0 + + # Disable rpfilter to prevent packets to be dropped because of it. + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf."$rp1".rp_filter 0 + + tc filter add dev "$rp2" egress protocol ip pref 1 handle 101 \ + flower src_ip "$sip" action pass + + $MZ "$h1" -t udp "sp=54321,dp=12345" -c 5 -d 1msec -b "$rp1mac" \ + -A "$sip" -B 198.51.100.2 -q + + tc_check_packets "dev $rp2 egress" 101 5 + check_err $? "Packets were dropped" + + log_test "IPv4 source IP is link-local" + + tc filter del dev "$rp2" egress protocol ip pref 1 handle 101 flower + sysctl_restore net.ipv4.conf."$rp1".rp_filter + sysctl_restore net.ipv4.conf.all.rp_filter +} + trap cleanup EXIT setup_prepare From 989fe6771266bdb82a815d78802c5aa7c918fdfd Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Wed, 13 Aug 2025 20:54:04 +0800 Subject: [PATCH 2021/2411] drm/nouveau/gsp: fix mismatched alloc/free for kvmalloc() Replace kfree() with kvfree() for memory allocated by kvmalloc(). Compile-tested only. Cc: stable@vger.kernel.org Fixes: 8a8b1ec5261f ("drm/nouveau/gsp: split rpc handling out on its own") Signed-off-by: Qianfeng Rong Reviewed-by: Timur Tabi Acked-by: Zhi Wang Link: https://lore.kernel.org/r/20250813125412.96178-1-rongqianfeng@vivo.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c index 9d06ff722fea..0dc4782df8c0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c @@ -325,7 +325,7 @@ r535_gsp_msgq_recv(struct nvkm_gsp *gsp, u32 gsp_rpc_len, int *retries) rpc = r535_gsp_msgq_peek(gsp, sizeof(*rpc), info.retries); if (IS_ERR_OR_NULL(rpc)) { - kfree(buf); + kvfree(buf); return rpc; } @@ -334,7 +334,7 @@ r535_gsp_msgq_recv(struct nvkm_gsp *gsp, u32 gsp_rpc_len, int *retries) rpc = r535_gsp_msgq_recv_one_elem(gsp, &info); if (IS_ERR_OR_NULL(rpc)) { - kfree(buf); + kvfree(buf); return rpc; } From 6cc44e9618f03f1deb9a092698c0b0ce20990221 Mon Sep 17 00:00:00 2001 From: Javier Garcia Date: Sun, 10 Aug 2025 17:07:06 +0200 Subject: [PATCH 2022/2411] drm: Add directive to format code in comment Add formatting directive line in function `drm_gpuvm_sm_map_exec_lock()` comment to clear warning messages shown bellow that appears generating documentation `make htmldocs`. Warning: ./drivers/gpu/drm/drm_gpuvm.c:2444: Unexpected indentation. Warning: ./drivers/gpu/drm/drm_gpuvm.c:2446: Block quote ends without a blank line; unexpected unindent. Warning: ./drivers/gpu/drm/drm_gpuvm.c:2450: Definition list ends without a blank line; unexpected unindent. Warning: ./drivers/gpu/drm/drm_gpuvm.c:2451: Definition list ends without a blank line; unexpected unindent. Warning: ./drivers/gpu/drm/drm_gpuvm.c:2455: Unexpected indentation. Warning: ./drivers/gpu/drm/drm_gpuvm.c:2456: Definition list ends without a blank line; unexpected unindent. Warning: ./drivers/gpu/drm/drm_gpuvm.c:2457: Definition list ends without a blank line; unexpected unindent. Warning: ./drivers/gpu/drm/drm_gpuvm.c:2458: Definition list ends without a blank line; unexpected unindent. Fixes: 471920ce25d5 ("drm/gpuvm: Add locking helpers") Signed-off-by: Javier Garcia Reviewed-by: Bagas Sanjaya Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250810150706.305040-1-rampxxxx@gmail.com [ Fix typo in commit message. - Danilo ] Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/drm_gpuvm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index bbc7fecb6f4a..74d949995a72 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -2432,6 +2432,8 @@ static const struct drm_gpuvm_ops lock_ops = { * * The expected usage is: * + * .. code-block:: c + * * vm_bind { * struct drm_exec exec; * From 12da2b92ad50e6602b4c5e9073d71f2368b70b63 Mon Sep 17 00:00:00 2001 From: Chandra Mohan Sundar Date: Thu, 14 Aug 2025 22:00:10 +0530 Subject: [PATCH 2023/2411] net: libwx: Fix the size in RSS hash key population While trying to fill a random RSS key, the size of the pointer is being used rather than the actual size of the RSS key. Fix by passing an appropriate value of the RSS key. This issue was reported by static coverity analyser. Fixes: eb4898fde1de8 ("net: libwx: add wangxun vf common api") Signed-off-by: Chandra Mohan Sundar Link: https://patch.msgid.link/20250814163014.613004-1-chandramohan.explore@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c index 5d48df7a849f..3023ea2732ef 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c @@ -192,7 +192,7 @@ void wx_setup_vfmrqc_vf(struct wx *wx) u8 i, j; /* Fill out hash function seeds */ - netdev_rss_key_fill(wx->rss_key, sizeof(wx->rss_key)); + netdev_rss_key_fill(wx->rss_key, WX_RSS_KEY_SIZE); for (i = 0; i < WX_RSS_KEY_SIZE / 4; i++) wr32(wx, WX_VXRSSRK(i), wx->rss_key[i]); From db2e7bcee11cd57f95fef3c6cbb562d0577eb84a Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Thu, 24 Jul 2025 18:54:41 +0200 Subject: [PATCH 2024/2411] drm: nova-drm: fix 32-bit arm build In 32-bit arm, the build fails with: error[E0308]: mismatched types --> drivers/gpu/drm/nova/file.rs:42:28 | 42 | getparam.set_value(value); | --------- ^^^^^ expected `u64`, found `u32` | | | arguments to this method are incorrect | note: method defined here --> drivers/gpu/drm/nova/uapi.rs:29:12 | 29 | pub fn set_value(&self, v: u64) { | ^^^^^^^^^ ------ help: you can convert a `u32` to a `u64` | 42 | getparam.set_value(value.into()); | +++++++ The reason is that `Getparam::set_value` takes a `u64` (from the UAPI), but `pci::Device::resource_len()` returns a `resource_size_t`, which is a `phys_addr_t`, which may be 32- or 64-bit. Thus add an `into()` call to support the 32-bit case, while allowing the Clippy lint that complains in the 64-bit case where the type is the same. Fixes: cdeaeb9dd762 ("drm: nova-drm: add initial driver skeleton") Signed-off-by: Miguel Ojeda Reviewed-by: Christian Schrefl Link: https://lore.kernel.org/r/20250724165441.2105632-1-ojeda@kernel.org Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nova/file.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nova/file.rs b/drivers/gpu/drm/nova/file.rs index 7e59a34b830d..4fe62cf98a23 100644 --- a/drivers/gpu/drm/nova/file.rs +++ b/drivers/gpu/drm/nova/file.rs @@ -39,7 +39,8 @@ pub(crate) fn get_param( _ => return Err(EINVAL), }; - getparam.set_value(value); + #[allow(clippy::useless_conversion)] + getparam.set_value(value.into()); Ok(0) } From 1f403699c40f0806a707a9a6eed3b8904224021a Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Tue, 12 Aug 2025 15:19:32 +0800 Subject: [PATCH 2025/2411] drm/mediatek: Fix device/node reference count leaks in mtk_drm_get_all_drm_priv Using device_find_child() and of_find_device_by_node() to locate devices could cause an imbalance in the device's reference count. device_find_child() and of_find_device_by_node() both call get_device() to increment the reference count of the found device before returning the pointer. In mtk_drm_get_all_drm_priv(), these references are never released through put_device(), resulting in permanent reference count increments. Additionally, the for_each_child_of_node() iterator fails to release node references in all code paths. This leaks device node references when loop termination occurs before reaching MAX_CRTC. These reference count leaks may prevent device/node resources from being properly released during driver unbind operations. As comment of device_find_child() says, 'NOTE: you will need to drop the reference with put_device() after use'. Cc: stable@vger.kernel.org Fixes: 1ef7ed48356c ("drm/mediatek: Modify mediatek-drm for mt8195 multi mmsys support") Signed-off-by: Ma Ke Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20250812071932.471730-1-make24@iscas.ac.cn/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index d5e6bab36414..f8a817689e16 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -387,19 +387,19 @@ static bool mtk_drm_get_all_drm_priv(struct device *dev) of_id = of_match_node(mtk_drm_of_ids, node); if (!of_id) - continue; + goto next_put_node; pdev = of_find_device_by_node(node); if (!pdev) - continue; + goto next_put_node; drm_dev = device_find_child(&pdev->dev, NULL, mtk_drm_match); if (!drm_dev) - continue; + goto next_put_device_pdev_dev; temp_drm_priv = dev_get_drvdata(drm_dev); if (!temp_drm_priv) - continue; + goto next_put_device_drm_dev; if (temp_drm_priv->data->main_len) all_drm_priv[CRTC_MAIN] = temp_drm_priv; @@ -411,10 +411,17 @@ static bool mtk_drm_get_all_drm_priv(struct device *dev) if (temp_drm_priv->mtk_drm_bound) cnt++; - if (cnt == MAX_CRTC) { - of_node_put(node); +next_put_device_drm_dev: + put_device(drm_dev); + +next_put_device_pdev_dev: + put_device(&pdev->dev); + +next_put_node: + of_node_put(node); + + if (cnt == MAX_CRTC) break; - } } if (drm_priv->data->mmsys_dev_num == cnt) { From c27973211ffcdf0a092eec265d5993e64b89adaf Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Fri, 15 Aug 2025 12:00:28 +0800 Subject: [PATCH 2026/2411] md: keep recovery_cp in mdp_superblock_s commit 907a99c314a5 ("md: rename recovery_cp to resync_offset") replaces recovery_cp with resync_offset in mdp_superblock_s which is in md_p.h. md_p.h is used in userspace too. So mdadm building fails because of this. This patch revert this change. Fixes: 907a99c314a5 ("md: rename recovery_cp to resync_offset") Signed-off-by: Xiao Ni Link: https://lore.kernel.org/linux-raid/20250815040028.18085-1-xni@redhat.com Signed-off-by: Yu Kuai --- drivers/md/md.c | 6 +++--- include/uapi/linux/raid/md_p.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 772cffe02ff5..3836fc7eff67 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1423,7 +1423,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, stru else { if (sb->events_hi == sb->cp_events_hi && sb->events_lo == sb->cp_events_lo) { - mddev->resync_offset = sb->resync_offset; + mddev->resync_offset = sb->recovery_cp; } else mddev->resync_offset = 0; } @@ -1551,13 +1551,13 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev) mddev->minor_version = sb->minor_version; if (mddev->in_sync) { - sb->resync_offset = mddev->resync_offset; + sb->recovery_cp = mddev->resync_offset; sb->cp_events_hi = (mddev->events>>32); sb->cp_events_lo = (u32)mddev->events; if (mddev->resync_offset == MaxSector) sb->state = (1<< MD_SB_CLEAN); } else - sb->resync_offset = 0; + sb->recovery_cp = 0; sb->layout = mddev->layout; sb->chunk_size = mddev->chunk_sectors << 9; diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index b13946287277..ac74133a4768 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -173,7 +173,7 @@ typedef struct mdp_superblock_s { #else #error unspecified endianness #endif - __u32 resync_offset; /* 11 resync checkpoint sector count */ + __u32 recovery_cp; /* 11 resync checkpoint sector count */ /* There are only valid for minor_version > 90 */ __u64 reshape_position; /* 12,13 next address in array-space for reshape */ __u32 new_level; /* 14 new level we are reshaping to */ From cb0780ad4333040a98e10f014b593ef738a3f31e Mon Sep 17 00:00:00 2001 From: Zheng Qixing Date: Sat, 16 Aug 2025 08:25:33 +0800 Subject: [PATCH 2027/2411] md: add helper rdev_needs_recovery() Add a helper for checking if an rdev needs recovery. Signed-off-by: Zheng Qixing Link: https://lore.kernel.org/linux-raid/20250816002534.1754356-2-zhengqixing@huaweicloud.com Signed-off-by: Yu Kuai --- drivers/md/md.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 3836fc7eff67..abd327ade4bd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4839,6 +4839,15 @@ metadata_store(struct mddev *mddev, const char *buf, size_t len) static struct md_sysfs_entry md_metadata = __ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store); +static bool rdev_needs_recovery(struct md_rdev *rdev, sector_t sectors) +{ + return rdev->raid_disk >= 0 && + !test_bit(Journal, &rdev->flags) && + !test_bit(Faulty, &rdev->flags) && + !test_bit(In_sync, &rdev->flags) && + rdev->recovery_offset < sectors; +} + enum sync_action md_sync_action(struct mddev *mddev) { unsigned long recovery = mddev->recovery; @@ -8995,11 +9004,7 @@ static sector_t md_sync_position(struct mddev *mddev, enum sync_action action) start = MaxSector; rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) - if (rdev->raid_disk >= 0 && - !test_bit(Journal, &rdev->flags) && - !test_bit(Faulty, &rdev->flags) && - !test_bit(In_sync, &rdev->flags) && - rdev->recovery_offset < start) + if (rdev_needs_recovery(rdev, start)) start = rdev->recovery_offset; rcu_read_unlock(); @@ -9358,12 +9363,8 @@ void md_do_sync(struct md_thread *thread) test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) { rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) - if (rdev->raid_disk >= 0 && - mddev->delta_disks >= 0 && - !test_bit(Journal, &rdev->flags) && - !test_bit(Faulty, &rdev->flags) && - !test_bit(In_sync, &rdev->flags) && - rdev->recovery_offset < mddev->curr_resync) + if (mddev->delta_disks >= 0 && + rdev_needs_recovery(rdev, mddev->curr_resync)) rdev->recovery_offset = mddev->curr_resync; rcu_read_unlock(); } From b7ee30f0efd12f42735ae233071015389407966c Mon Sep 17 00:00:00 2001 From: Zheng Qixing Date: Sat, 16 Aug 2025 08:25:34 +0800 Subject: [PATCH 2028/2411] md: fix sync_action incorrect display during resync During raid resync, if a disk becomes faulty, the operation is briefly interrupted. The MD_RECOVERY_RECOVER flag triggered by the disk failure causes sync_action to incorrectly show "recover" instead of "resync". The same issue affects reshape operations. Reproduction steps: mdadm -Cv /dev/md1 -l1 -n4 -e1.2 /dev/sd{a..d} // -> resync happened mdadm -f /dev/md1 /dev/sda // -> resync interrupted cat sync_action -> recover Add progress checks in md_sync_action() for resync/recover/reshape to ensure the interface correctly reports the actual operation type. Fixes: 4b10a3bc67c1 ("md: ensure resync is prioritized over recovery") Signed-off-by: Zheng Qixing Link: https://lore.kernel.org/linux-raid/20250816002534.1754356-3-zhengqixing@huaweicloud.com Signed-off-by: Yu Kuai --- drivers/md/md.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index abd327ade4bd..1baaf52c603c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4848,9 +4848,33 @@ static bool rdev_needs_recovery(struct md_rdev *rdev, sector_t sectors) rdev->recovery_offset < sectors; } +static enum sync_action md_get_active_sync_action(struct mddev *mddev) +{ + struct md_rdev *rdev; + bool is_recover = false; + + if (mddev->resync_offset < MaxSector) + return ACTION_RESYNC; + + if (mddev->reshape_position != MaxSector) + return ACTION_RESHAPE; + + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) { + if (rdev_needs_recovery(rdev, MaxSector)) { + is_recover = true; + break; + } + } + rcu_read_unlock(); + + return is_recover ? ACTION_RECOVER : ACTION_IDLE; +} + enum sync_action md_sync_action(struct mddev *mddev) { unsigned long recovery = mddev->recovery; + enum sync_action active_action; /* * frozen has the highest priority, means running sync_thread will be @@ -4874,8 +4898,17 @@ enum sync_action md_sync_action(struct mddev *mddev) !test_bit(MD_RECOVERY_NEEDED, &recovery)) return ACTION_IDLE; - if (test_bit(MD_RECOVERY_RESHAPE, &recovery) || - mddev->reshape_position != MaxSector) + /* + * Check if any sync operation (resync/recover/reshape) is + * currently active. This ensures that only one sync operation + * can run at a time. Returns the type of active operation, or + * ACTION_IDLE if none are active. + */ + active_action = md_get_active_sync_action(mddev); + if (active_action != ACTION_IDLE) + return active_action; + + if (test_bit(MD_RECOVERY_RESHAPE, &recovery)) return ACTION_RESHAPE; if (test_bit(MD_RECOVERY_RECOVER, &recovery)) From 715c7a36d59f54162a26fac1d1ed8dc087a24cf1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 14 Aug 2025 12:43:23 -0700 Subject: [PATCH 2029/2411] selftests: tls: make the new data_steal test less flaky The CI has hit a couple of cases of: RUN global.data_steal ... tls.c:2762:data_steal:Expected recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT) (20000) == -1 (-1) data_steal: Test terminated by timeout FAIL global.data_steal Looks like the 2msec sleep is not long enough. Make the sleep longer, and then instead of second sleep wait for the thieving process to exit. That way we can be sure it called recv() before us. While at it also avoid trying to steal more than a record, this seems to be causing issues in manual testing as well. Fixes: d7e82594a45c ("selftests: tls: test TCP stealing data from under the TLS socket") Link: https://patch.msgid.link/20250814194323.2014650-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index d8cfcf9bb825..2b8387a83bc7 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -2748,17 +2748,18 @@ TEST(data_steal) { pid = fork(); ASSERT_GE(pid, 0); if (!pid) { - EXPECT_EQ(recv(cfd, buf, sizeof(buf), MSG_WAITALL), - sizeof(buf)); + EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2, MSG_WAITALL), + sizeof(buf) / 2); exit(!__test_passed(_metadata)); } - usleep(2000); + usleep(10000); ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0); ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0); EXPECT_EQ(send(fd, buf, sizeof(buf), 0), sizeof(buf)); - usleep(2000); + EXPECT_EQ(wait(&status), pid); + EXPECT_EQ(status, 0); EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT), -1); /* Don't check errno, the error will be different depending * on what random bytes TLS interpreted as the record length. @@ -2766,9 +2767,6 @@ TEST(data_steal) { close(fd); close(cfd); - - EXPECT_EQ(wait(&status), pid); - EXPECT_EQ(status, 0); } static void __attribute__((constructor)) fips_check(void) { From 39ca24675b7e351b8e681d924f417e455d4a7fc1 Mon Sep 17 00:00:00 2001 From: "Avizrat, Yaron" Date: Thu, 14 Aug 2025 10:44:07 +0000 Subject: [PATCH 2030/2411] MAINTAINERS: Change habanalabs maintainers I will be leaving Intel soon, Koby Elbaz & Konstantin Sinyuk will take the role of habanalabs driver maintainers. Signed-off-by: Yaron Avizrat Reviewed-by: Koby Elbaz Reviewed-by: Konstantin Sinyuk Link: https://patch.msgid.link/DM4PR11MB55491ACAA33DF29CEF3C67DAE935A@DM4PR11MB5549.namprd11.prod.outlook.com [lukas: wrap to 72 chars] Signed-off-by: Lukas Wunner --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 20ffd334e0a7..433fc4e5e68e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10666,7 +10666,8 @@ S: Maintained F: block/partitions/efi.* HABANALABS PCI DRIVER -M: Yaron Avizrat +M: Koby Elbaz +M: Konstantin Sinyuk L: dri-devel@lists.freedesktop.org S: Supported C: irc://irc.oftc.net/dri-devel From a44458dfd5bc0c79c6739c3f4c658361d3a5126b Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 21 Aug 2024 01:10:28 +0200 Subject: [PATCH 2031/2411] accel/habanalabs/gaudi2: Use kvfree() for memory allocated with kvcalloc() Use kvfree() to fix the following Coccinelle/coccicheck warning reported by kfree_mismatch.cocci: WARNING kvmalloc is used to allocate this memory at line 10398 Fixes: f728c17fc97a ("accel/habanalabs/gaudi2: move HMMU page tables to device memory") Reported-by: Qianfeng Rong Closes: https://patch.msgid.link/20250808085530.233737-1-rongqianfeng@vivo.com Signed-off-by: Thorsten Blum [lukas: acknowledge Qianfeng, adjust Thorsten's domain, add Fixes tag] Signed-off-by: Lukas Wunner Reviewed-by: Tomer Tayar Cc: stable@vger.kernel.org # v6.9+ Link: https://patch.msgid.link/20240820231028.136126-1-thorsten.blum@toblux.com --- drivers/accel/habanalabs/gaudi2/gaudi2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index a38b88baadf2..5722e4128d3c 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -10437,7 +10437,7 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz (u64 *)(lin_dma_pkts_arr), DEBUGFS_WRITE64); WREG32(sob_addr, 0); - kfree(lin_dma_pkts_arr); + kvfree(lin_dma_pkts_arr); return rc; } From 8a6ededaad2d2dcaac8e545bffee1073dca9db95 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Wed, 13 Aug 2025 12:16:06 +0300 Subject: [PATCH 2032/2411] iio: adc: bd79124: Add GPIOLIB dependency The bd79124 has ADC inputs which can be muxed to be GPIOs. The driver supports this by registering a GPIO-chip for channels which aren't used as ADC. The Kconfig entry does not handle the dependency to GPIOLIB, which causes errors: ERROR: modpost: "devm_gpiochip_add_data_with_key" [drivers/iio/adc/rohm-bd79124.ko] undefined! ERROR: modpost: "gpiochip_get_data" [drivers/iio/adc/rohm-bd79124.ko] undefined! at linking phase if GPIOLIB is not configured to be used. Fix this by adding dependency to the GPIOLIB. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202508131533.5sSkq80B-lkp@intel.com/ Fixes: 3f57a3b9ab74 ("iio: adc: Support ROHM BD79124 ADC") Signed-off-by: Matti Vaittinen Reviewed-by: Bartosz Golaszewski Link: https://patch.msgid.link/6837249bddf358924e67566293944506206d2d62.1755076369.git.mazziesaccount@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 6de2abad0197..24f2572c487e 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -1300,7 +1300,7 @@ config RN5T618_ADC config ROHM_BD79124 tristate "Rohm BD79124 ADC driver" - depends on I2C + depends on I2C && GPIOLIB select REGMAP_I2C select IIO_ADC_HELPER help From b04e4551893fb8a06106a175ed7055d41a9279c4 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Mon, 11 Aug 2025 13:32:31 -0500 Subject: [PATCH 2033/2411] iio: adc: ad7380: fix missing max_conversion_rate_hz on adaq4381-4 Add max_conversion_rate_hz to the chip info for "adaq4381-4". Without this, the driver fails to probe because it tries to set the initial sample rate to 0 Hz, which is not valid. Fixes: bbeaec81a03e ("iio: ad7380: add support for SPI offload") Signed-off-by: David Lechner Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20250811-iio-adc-ad7380-fix-missing-max_conversion_rate_hs-on-ad4381-4-v1-1-ffb728d7a71c@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7380.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/ad7380.c b/drivers/iio/adc/ad7380.c index 6f7034b6c266..fa251dc1aae6 100644 --- a/drivers/iio/adc/ad7380.c +++ b/drivers/iio/adc/ad7380.c @@ -873,6 +873,7 @@ static const struct ad7380_chip_info adaq4381_4_chip_info = { .has_hardware_gain = true, .available_scan_masks = ad7380_4_channel_scan_masks, .timing_specs = &ad7380_4_timing, + .max_conversion_rate_hz = 4 * MEGA, }; static const struct spi_offload_config ad7380_offload_config = { From a3c6eabe3bbd6b0e7124d68b2d3bc32fed17362e Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Sun, 10 Aug 2025 15:33:27 +0300 Subject: [PATCH 2034/2411] iio: adc: rzg2l: Cleanup suspend/resume path There is no need to manually track the runtime PM status in the driver. The pm_runtime_force_suspend() and pm_runtime_force_resume() functions already call pm_runtime_status_suspended() to check the runtime PM state. Additionally, avoid calling pm_runtime_put_autosuspend() during the suspend/resume path, as this would decrease the usage counter of a potential user that had the ADC open before the suspend/resume cycle. Fixes: 563cf94f9329 ("iio: adc: rzg2l_adc: Add suspend/resume support") Reviewed-by: Ulf Hansson Reviewed-by: Lad Prabhakar Signed-off-by: Claudiu Beznea Link: https://patch.msgid.link/20250810123328.800104-2-claudiu.beznea.uj@bp.renesas.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/rzg2l_adc.c | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/drivers/iio/adc/rzg2l_adc.c b/drivers/iio/adc/rzg2l_adc.c index 9674d48074c9..0cb5a67fd497 100644 --- a/drivers/iio/adc/rzg2l_adc.c +++ b/drivers/iio/adc/rzg2l_adc.c @@ -89,7 +89,6 @@ struct rzg2l_adc { struct completion completion; struct mutex lock; u16 last_val[RZG2L_ADC_MAX_CHANNELS]; - bool was_rpm_active; }; /** @@ -541,14 +540,9 @@ static int rzg2l_adc_suspend(struct device *dev) }; int ret; - if (pm_runtime_suspended(dev)) { - adc->was_rpm_active = false; - } else { - ret = pm_runtime_force_suspend(dev); - if (ret) - return ret; - adc->was_rpm_active = true; - } + ret = pm_runtime_force_suspend(dev); + if (ret) + return ret; ret = reset_control_bulk_assert(ARRAY_SIZE(resets), resets); if (ret) @@ -557,9 +551,7 @@ static int rzg2l_adc_suspend(struct device *dev) return 0; rpm_restore: - if (adc->was_rpm_active) - pm_runtime_force_resume(dev); - + pm_runtime_force_resume(dev); return ret; } @@ -577,11 +569,9 @@ static int rzg2l_adc_resume(struct device *dev) if (ret) return ret; - if (adc->was_rpm_active) { - ret = pm_runtime_force_resume(dev); - if (ret) - goto resets_restore; - } + ret = pm_runtime_force_resume(dev); + if (ret) + goto resets_restore; ret = rzg2l_adc_hw_init(dev, adc); if (ret) @@ -590,10 +580,7 @@ static int rzg2l_adc_resume(struct device *dev) return 0; rpm_restore: - if (adc->was_rpm_active) { - pm_runtime_mark_last_busy(dev); - pm_runtime_put_autosuspend(dev); - } + pm_runtime_force_suspend(dev); resets_restore: reset_control_bulk_assert(ARRAY_SIZE(resets), resets); return ret; From c69e13965f26b8058f538ea8bdbd2d7718cf1fbe Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Sun, 10 Aug 2025 15:33:28 +0300 Subject: [PATCH 2035/2411] iio: adc: rzg2l_adc: Set driver data before enabling runtime PM When stress-testing the system by repeatedly unbinding and binding the ADC device in a loop, and the ADC is a supplier for another device (e.g., a thermal hardware block that reads temperature through the ADC), it may happen that the ADC device is runtime-resumed immediately after runtime PM is enabled, triggered by its consumer. At this point, since drvdata is not yet set and the driver's runtime PM callbacks rely on it, a crash can occur. To avoid this, set drvdata just after it was allocated. Fixes: 89ee8174e8c8 ("iio: adc: rzg2l_adc: Simplify the runtime PM code") Signed-off-by: Claudiu Beznea Link: https://patch.msgid.link/20250810123328.800104-3-claudiu.beznea.uj@bp.renesas.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/rzg2l_adc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/rzg2l_adc.c b/drivers/iio/adc/rzg2l_adc.c index 0cb5a67fd497..cadb0446bc29 100644 --- a/drivers/iio/adc/rzg2l_adc.c +++ b/drivers/iio/adc/rzg2l_adc.c @@ -427,6 +427,8 @@ static int rzg2l_adc_probe(struct platform_device *pdev) if (!indio_dev) return -ENOMEM; + platform_set_drvdata(pdev, indio_dev); + adc = iio_priv(indio_dev); adc->hw_params = device_get_match_data(dev); @@ -459,8 +461,6 @@ static int rzg2l_adc_probe(struct platform_device *pdev) if (ret) return ret; - platform_set_drvdata(pdev, indio_dev); - ret = rzg2l_adc_hw_init(dev, adc); if (ret) return dev_err_probe(&pdev->dev, ret, From 433b99e922943efdfd62b9a8e3ad1604838181f2 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sat, 2 Aug 2025 17:44:21 +0100 Subject: [PATCH 2036/2411] iio: light: as73211: Ensure buffer holes are zeroed Given that the buffer is copied to a kfifo that ultimately user space can read, ensure we zero it. Fixes: 403e5586b52e ("iio: light: as73211: New driver") Reviewed-by: Matti Vaittinen Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20250802164436.515988-2-jic23@kernel.org Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/as73211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/light/as73211.c b/drivers/iio/light/as73211.c index 68f60dc3c79d..32719f584c47 100644 --- a/drivers/iio/light/as73211.c +++ b/drivers/iio/light/as73211.c @@ -639,7 +639,7 @@ static irqreturn_t as73211_trigger_handler(int irq __always_unused, void *p) struct { __le16 chan[4]; aligned_s64 ts; - } scan; + } scan = { }; int data_result, ret; mutex_lock(&data->mutex); From 5eb4b9a4cdbb70d70377fe8fb2920b75910e5024 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 13 Aug 2025 15:21:59 +0200 Subject: [PATCH 2037/2411] params: Replace deprecated strcpy() with strscpy() and memcpy() strcpy() is deprecated; use strscpy() and memcpy() instead. In param_set_copystring(), we can safely use memcpy() because we already know the length of the source string 'val' and that it is guaranteed to be NUL-terminated within the first 'kps->maxlen' bytes. Link: https://github.com/KSPP/linux/issues/88 Signed-off-by: Thorsten Blum Reviewed-by: Daniel Gomez Reviewed-by: Petr Pavlu Link: https://lore.kernel.org/r/20250813132200.184064-2-thorsten.blum@linux.dev Signed-off-by: Daniel Gomez --- kernel/params.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/params.c b/kernel/params.c index b92d64161b75..b96cfd693c99 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -513,13 +513,14 @@ EXPORT_SYMBOL(param_array_ops); int param_set_copystring(const char *val, const struct kernel_param *kp) { const struct kparam_string *kps = kp->str; + const size_t len = strnlen(val, kps->maxlen); - if (strnlen(val, kps->maxlen) == kps->maxlen) { + if (len == kps->maxlen) { pr_err("%s: string doesn't fit in %u chars.\n", kp->name, kps->maxlen-1); return -ENOSPC; } - strcpy(kps->string, val); + memcpy(kps->string, val, len + 1); return 0; } EXPORT_SYMBOL(param_set_copystring); @@ -841,7 +842,7 @@ static void __init param_sysfs_builtin(void) dot = strchr(kp->name, '.'); if (!dot) { /* This happens for core_param() */ - strcpy(modname, "kernel"); + strscpy(modname, "kernel"); name_len = 0; } else { name_len = dot - kp->name + 1; From 0f580d5d3d9d9cd0953695cd32e43aac3a946338 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 16 Aug 2025 22:42:15 +0200 Subject: [PATCH 2038/2411] rust: alloc: fix `rusttest` by providing `Cmalloc::aligned_layout` too Commit fde578c86281 ("rust: alloc: replace aligned_size() with Kmalloc::aligned_layout()") provides a public `aligned_layout` function in `Kamlloc`, but not in `Cmalloc`, and thus uses of it will trigger an error in `rusttest`. Such a user appeared in the following commit 22ab0641b939 ("rust: drm: ensure kmalloc() compatible Layout"): error[E0599]: no function or associated item named `aligned_layout` found for struct `alloc::allocator_test::Cmalloc` in the current scope --> rust/kernel/drm/device.rs:100:31 | 100 | let layout = Kmalloc::aligned_layout(Layout::new::()); | ^^^^^^^^^^^^^^ function or associated item not found in `Cmalloc` | ::: rust/kernel/alloc/allocator_test.rs:19:1 | 19 | pub struct Cmalloc; | ------------------ function or associated item `aligned_layout` not found for this struct Thus add an equivalent one for `Cmalloc`. Fixes: fde578c86281 ("rust: alloc: replace aligned_size() with Kmalloc::aligned_layout()") Signed-off-by: Miguel Ojeda Link: https://lore.kernel.org/r/20250816204215.2719559-1-ojeda@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/alloc/allocator_test.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/rust/kernel/alloc/allocator_test.rs b/rust/kernel/alloc/allocator_test.rs index a3074480bd8d..90dd987d40e4 100644 --- a/rust/kernel/alloc/allocator_test.rs +++ b/rust/kernel/alloc/allocator_test.rs @@ -22,6 +22,17 @@ pub type Vmalloc = Kmalloc; pub type KVmalloc = Kmalloc; +impl Cmalloc { + /// Returns a [`Layout`] that makes [`Kmalloc`] fulfill the requested size and alignment of + /// `layout`. + pub fn aligned_layout(layout: Layout) -> Layout { + // Note that `layout.size()` (after padding) is guaranteed to be a multiple of + // `layout.align()` which together with the slab guarantees means that `Kmalloc` will return + // a properly aligned object (see comments in `kmalloc()` for more information). + layout.pad_to_align() + } +} + extern "C" { #[link_name = "aligned_alloc"] fn libc_aligned_alloc(align: usize, size: usize) -> *mut crate::ffi::c_void; From a3dc32c635bae0ae569f489e00de0e8f015bfc25 Mon Sep 17 00:00:00 2001 From: Zenm Chen Date: Thu, 14 Aug 2025 00:24:15 +0800 Subject: [PATCH 2039/2411] USB: storage: Ignore driver CD mode for Realtek multi-mode Wi-Fi dongles Many Realtek USB Wi-Fi dongles released in recent years have two modes: one is driver CD mode which has Windows driver onboard, another one is Wi-Fi mode. Add the US_FL_IGNORE_DEVICE quirk for these multi-mode devices. Otherwise, usb_modeswitch may fail to switch them to Wi-Fi mode. Currently there are only two USB IDs known to be used by these multi-mode Wi-Fi dongles: 0bda:1a2b and 0bda:a192. Information about Mercury MW310UH in /sys/kernel/debug/usb/devices. T: Bus=02 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 12 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=0bda ProdID=a192 Rev= 2.00 S: Manufacturer=Realtek S: Product=DISK C:* #Ifs= 1 Cfg#= 1 Atr=80 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=(none) E: Ad=8a(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0b(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Information about D-Link AX9U rev. A1 in /sys/kernel/debug/usb/devices. T: Bus=03 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 55 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=0bda ProdID=1a2b Rev= 0.00 S: Manufacturer=Realtek S: Product=DISK C:* #Ifs= 1 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=(none) E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Cc: stable Signed-off-by: Zenm Chen Acked-by: Alan Stern Link: https://lore.kernel.org/r/20250813162415.2630-1-zenmchen@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index bee9f1e8003d..dfa5276a5a43 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1501,6 +1501,28 @@ UNUSUAL_DEV( 0x0bc2, 0x3332, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_WP_DETECT ), +/* + * Reported by Zenm Chen + * Ignore driver CD mode, otherwise usb_modeswitch may fail to switch + * the device into Wi-Fi mode. + */ +UNUSUAL_DEV( 0x0bda, 0x1a2b, 0x0000, 0xffff, + "Realtek", + "DISK", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_DEVICE ), + +/* + * Reported by Zenm Chen + * Ignore driver CD mode, otherwise usb_modeswitch may fail to switch + * the device into Wi-Fi mode. + */ +UNUSUAL_DEV( 0x0bda, 0xa192, 0x0000, 0xffff, + "Realtek", + "DISK", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_DEVICE ), + UNUSUAL_DEV( 0x0d49, 0x7310, 0x0000, 0x9999, "Maxtor", "USB to SATA", From 45eae113dccaf8e502090ecf5b3d9e9b805add6f Mon Sep 17 00:00:00 2001 From: Selvarasu Ganesan Date: Fri, 8 Aug 2025 18:23:05 +0530 Subject: [PATCH 2040/2411] usb: dwc3: Remove WARN_ON for device endpoint command timeouts This commit addresses a rarely observed endpoint command timeout which causes kernel panic due to warn when 'panic_on_warn' is enabled and unnecessary call trace prints when 'panic_on_warn' is disabled. It is seen during fast software-controlled connect/disconnect testcases. The following is one such endpoint command timeout that we observed: 1. Connect ======= ->dwc3_thread_interrupt ->dwc3_ep0_interrupt ->configfs_composite_setup ->composite_setup ->usb_ep_queue ->dwc3_gadget_ep0_queue ->__dwc3_gadget_ep0_queue ->__dwc3_ep0_do_control_data ->dwc3_send_gadget_ep_cmd 2. Disconnect ========== ->dwc3_thread_interrupt ->dwc3_gadget_disconnect_interrupt ->dwc3_ep0_reset_state ->dwc3_ep0_end_control_data ->dwc3_send_gadget_ep_cmd In the issue scenario, in Exynos platforms, we observed that control transfers for the previous connect have not yet been completed and end transfer command sent as a part of the disconnect sequence and processing of USB_ENDPOINT_HALT feature request from the host timeout. This maybe an expected scenario since the controller is processing EP commands sent as a part of the previous connect. It maybe better to remove WARN_ON in all places where device endpoint commands are sent to avoid unnecessary kernel panic due to warn. Cc: stable Co-developed-by: Akash M Signed-off-by: Akash M Signed-off-by: Selvarasu Ganesan Acked-by: Thinh Nguyen Reviewed-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250808125315.1607-1-selvarasu.g@samsung.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ep0.c | 20 ++++++++++++++++---- drivers/usb/dwc3/gadget.c | 10 ++++++++-- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 666ac432f52d..b4229aa13f37 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -288,7 +288,9 @@ void dwc3_ep0_out_start(struct dwc3 *dwc) dwc3_ep0_prepare_one_trb(dep, dwc->ep0_trb_addr, 8, DWC3_TRBCTL_CONTROL_SETUP, false); ret = dwc3_ep0_start_trans(dep); - WARN_ON(ret < 0); + if (ret < 0) + dev_err(dwc->dev, "ep0 out start transfer failed: %d\n", ret); + for (i = 2; i < DWC3_ENDPOINTS_NUM; i++) { struct dwc3_ep *dwc3_ep; @@ -1061,7 +1063,9 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, ret = dwc3_ep0_start_trans(dep); } - WARN_ON(ret < 0); + if (ret < 0) + dev_err(dwc->dev, + "ep0 data phase start transfer failed: %d\n", ret); } static int dwc3_ep0_start_control_status(struct dwc3_ep *dep) @@ -1078,7 +1082,12 @@ static int dwc3_ep0_start_control_status(struct dwc3_ep *dep) static void __dwc3_ep0_do_control_status(struct dwc3 *dwc, struct dwc3_ep *dep) { - WARN_ON(dwc3_ep0_start_control_status(dep)); + int ret; + + ret = dwc3_ep0_start_control_status(dep); + if (ret) + dev_err(dwc->dev, + "ep0 status phase start transfer failed: %d\n", ret); } static void dwc3_ep0_do_control_status(struct dwc3 *dwc, @@ -1121,7 +1130,10 @@ void dwc3_ep0_end_control_data(struct dwc3 *dwc, struct dwc3_ep *dep) cmd |= DWC3_DEPCMD_PARAM(dep->resource_index); memset(¶ms, 0, sizeof(params)); ret = dwc3_send_gadget_ep_cmd(dep, cmd, ¶ms); - WARN_ON_ONCE(ret); + if (ret) + dev_err_ratelimited(dwc->dev, + "ep0 data phase end transfer failed: %d\n", ret); + dep->resource_index = 0; } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 68fa2813e5f4..554f997eb8c4 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1772,7 +1772,11 @@ static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool int dep->flags |= DWC3_EP_DELAY_STOP; return 0; } - WARN_ON_ONCE(ret); + + if (ret) + dev_err_ratelimited(dep->dwc->dev, + "end transfer failed: %d\n", ret); + dep->resource_index = 0; if (!interrupt) @@ -4048,7 +4052,9 @@ static void dwc3_clear_stall_all_ep(struct dwc3 *dwc) dep->flags &= ~DWC3_EP_STALL; ret = dwc3_send_clear_stall_ep_cmd(dep); - WARN_ON_ONCE(ret); + if (ret) + dev_err_ratelimited(dwc->dev, + "failed to clear STALL on %s\n", dep->name); } } From cabb6c5f4d9e7f49bdf8c0a13c74bd93ee35f45a Mon Sep 17 00:00:00 2001 From: Amit Sunil Dhamne Date: Fri, 15 Aug 2025 11:31:51 -0700 Subject: [PATCH 2041/2411] usb: typec: maxim_contaminant: disable low power mode when reading comparator values Low power mode is enabled when reading CC resistance as part of `max_contaminant_read_resistance_kohm()` and left in that state. However, it's supposed to work with 1uA current source. To read CC comparator values current source is changed to 80uA. This causes a storm of CC interrupts as it (falsely) detects a potential contaminant. To prevent this, disable low power mode current sourcing before reading comparator values. Fixes: 02b332a06397 ("usb: typec: maxim_contaminant: Implement check_contaminant callback") Cc: stable Signed-off-by: Amit Sunil Dhamne Reviewed-by: Badhri Jagan Sridharan Rule: add Link: https://lore.kernel.org/stable/20250814-fix-upstream-contaminant-v1-1-801ce8089031%40google.com Link: https://lore.kernel.org/r/20250815-fix-upstream-contaminant-v2-1-6c8d6c3adafb@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/maxim_contaminant.c | 5 +++++ drivers/usb/typec/tcpm/tcpci_maxim.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/usb/typec/tcpm/maxim_contaminant.c b/drivers/usb/typec/tcpm/maxim_contaminant.c index 0cdda06592fd..818cfe226ac7 100644 --- a/drivers/usb/typec/tcpm/maxim_contaminant.c +++ b/drivers/usb/typec/tcpm/maxim_contaminant.c @@ -188,6 +188,11 @@ static int max_contaminant_read_comparators(struct max_tcpci_chip *chip, u8 *ven if (ret < 0) return ret; + /* Disable low power mode */ + ret = regmap_update_bits(regmap, TCPC_VENDOR_CC_CTRL2, CCLPMODESEL, + FIELD_PREP(CCLPMODESEL, + LOW_POWER_MODE_DISABLE)); + /* Sleep to allow comparators settle */ usleep_range(5000, 6000); ret = regmap_update_bits(regmap, TCPC_TCPC_CTRL, TCPC_TCPC_CTRL_ORIENTATION, PLUG_ORNT_CC1); diff --git a/drivers/usb/typec/tcpm/tcpci_maxim.h b/drivers/usb/typec/tcpm/tcpci_maxim.h index 76270d5c2838..b33540a42a95 100644 --- a/drivers/usb/typec/tcpm/tcpci_maxim.h +++ b/drivers/usb/typec/tcpm/tcpci_maxim.h @@ -21,6 +21,7 @@ #define CCOVPDIS BIT(6) #define SBURPCTRL BIT(5) #define CCLPMODESEL GENMASK(4, 3) +#define LOW_POWER_MODE_DISABLE 0 #define ULTRA_LOW_POWER_MODE 1 #define CCRPCTRL GENMASK(2, 0) #define UA_1_SRC 1 From a381c6d6f646226924809d0ad01a9465786da463 Mon Sep 17 00:00:00 2001 From: Amit Sunil Dhamne Date: Fri, 15 Aug 2025 11:31:52 -0700 Subject: [PATCH 2042/2411] usb: typec: maxim_contaminant: re-enable cc toggle if cc is open and port is clean Presently in `max_contaminant_is_contaminant()` if there's no contaminant detected previously, CC is open & stopped toggling and no contaminant is currently present, TCPC.RC would be programmed to do DRP toggling. However, it didn't actively look for a connection. This would lead to Type-C not detect *any* new connections. Hence, in the above situation, re-enable toggling & program TCPC to look for a new connection. Also, return early if TCPC was looking for connection as this indicates TCPC has neither detected a potential connection nor a change in contaminant state. In addition, once dry detection is complete (port is dry), restart toggling. Fixes: 02b332a06397e ("usb: typec: maxim_contaminant: Implement check_contaminant callback") Cc: stable Signed-off-by: Amit Sunil Dhamne Reviewed-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20250815-fix-upstream-contaminant-v2-2-6c8d6c3adafb@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/maxim_contaminant.c | 53 ++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/drivers/usb/typec/tcpm/maxim_contaminant.c b/drivers/usb/typec/tcpm/maxim_contaminant.c index 818cfe226ac7..af8da6dc60ae 100644 --- a/drivers/usb/typec/tcpm/maxim_contaminant.c +++ b/drivers/usb/typec/tcpm/maxim_contaminant.c @@ -329,6 +329,39 @@ static int max_contaminant_enable_dry_detection(struct max_tcpci_chip *chip) return 0; } +static int max_contaminant_enable_toggling(struct max_tcpci_chip *chip) +{ + struct regmap *regmap = chip->data.regmap; + int ret; + + /* Disable dry detection if enabled. */ + ret = regmap_update_bits(regmap, TCPC_VENDOR_CC_CTRL2, CCLPMODESEL, + FIELD_PREP(CCLPMODESEL, + LOW_POWER_MODE_DISABLE)); + if (ret) + return ret; + + ret = regmap_update_bits(regmap, TCPC_VENDOR_CC_CTRL1, CCCONNDRY, 0); + if (ret) + return ret; + + ret = max_tcpci_write8(chip, TCPC_ROLE_CTRL, TCPC_ROLE_CTRL_DRP | + FIELD_PREP(TCPC_ROLE_CTRL_CC1, + TCPC_ROLE_CTRL_CC_RD) | + FIELD_PREP(TCPC_ROLE_CTRL_CC2, + TCPC_ROLE_CTRL_CC_RD)); + if (ret) + return ret; + + ret = regmap_update_bits(regmap, TCPC_TCPC_CTRL, + TCPC_TCPC_CTRL_EN_LK4CONN_ALRT, + TCPC_TCPC_CTRL_EN_LK4CONN_ALRT); + if (ret) + return ret; + + return max_tcpci_write8(chip, TCPC_COMMAND, TCPC_CMD_LOOK4CONNECTION); +} + bool max_contaminant_is_contaminant(struct max_tcpci_chip *chip, bool disconnect_while_debounce, bool *cc_handled) { @@ -345,6 +378,12 @@ bool max_contaminant_is_contaminant(struct max_tcpci_chip *chip, bool disconnect if (ret < 0) return false; + if (cc_status & TCPC_CC_STATUS_TOGGLING) { + if (chip->contaminant_state == DETECTED) + return true; + return false; + } + if (chip->contaminant_state == NOT_DETECTED || chip->contaminant_state == SINK) { if (!disconnect_while_debounce) msleep(100); @@ -377,6 +416,12 @@ bool max_contaminant_is_contaminant(struct max_tcpci_chip *chip, bool disconnect max_contaminant_enable_dry_detection(chip); return true; } + + ret = max_contaminant_enable_toggling(chip); + if (ret) + dev_err(chip->dev, + "Failed to enable toggling, ret=%d", + ret); } } else if (chip->contaminant_state == DETECTED) { if (!(cc_status & TCPC_CC_STATUS_TOGGLING)) { @@ -384,6 +429,14 @@ bool max_contaminant_is_contaminant(struct max_tcpci_chip *chip, bool disconnect if (chip->contaminant_state == DETECTED) { max_contaminant_enable_dry_detection(chip); return true; + } else { + ret = max_contaminant_enable_toggling(chip); + if (ret) { + dev_err(chip->dev, + "Failed to enable toggling, ret=%d", + ret); + return true; + } } } } From ba6cc29351b1fa0cb9adce91b88b9f3c3cbe9c46 Mon Sep 17 00:00:00 2001 From: Charalampos Mitrodimas Date: Sat, 16 Aug 2025 14:14:37 +0000 Subject: [PATCH 2043/2411] debugfs: fix mount options not being applied Mount options (uid, gid, mode) are silently ignored when debugfs is mounted. This is a regression introduced during the conversion to the new mount API. When the mount API conversion was done, the parsed options were never applied to the superblock when it was reused. As a result, the mount options were ignored when debugfs was mounted. Fix this by following the same pattern as the tracefs fix in commit e4d32142d1de ("tracing: Fix tracefs mount options"). Call debugfs_reconfigure() in debugfs_get_tree() to apply the mount options to the superblock after it has been created or reused. As an example, with the bug the "mode" mount option is ignored: $ mount -o mode=0666 -t debugfs debugfs /tmp/debugfs_test $ mount | grep debugfs_test debugfs on /tmp/debugfs_test type debugfs (rw,relatime) $ ls -ld /tmp/debugfs_test drwx------ 25 root root 0 Aug 4 14:16 /tmp/debugfs_test With the fix applied, it works as expected: $ mount -o mode=0666 -t debugfs debugfs /tmp/debugfs_test $ mount | grep debugfs_test debugfs on /tmp/debugfs_test type debugfs (rw,relatime,mode=666) $ ls -ld /tmp/debugfs_test drw-rw-rw- 37 root root 0 Aug 2 17:28 /tmp/debugfs_test Fixes: a20971c18752 ("vfs: Convert debugfs to use the new mount API") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220406 Cc: stable Reviewed-by: Eric Sandeen Signed-off-by: Charalampos Mitrodimas Link: https://lore.kernel.org/r/20250816-debugfs-mount-opts-v3-1-d271dad57b5b@posteo.net Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index a0357b0cf362..c12d649df6a5 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -183,6 +183,9 @@ static int debugfs_reconfigure(struct fs_context *fc) struct debugfs_fs_info *sb_opts = sb->s_fs_info; struct debugfs_fs_info *new_opts = fc->s_fs_info; + if (!new_opts) + return 0; + sync_filesystem(sb); /* structure copy of new mount options to sb */ @@ -282,10 +285,16 @@ static int debugfs_fill_super(struct super_block *sb, struct fs_context *fc) static int debugfs_get_tree(struct fs_context *fc) { + int err; + if (!(debugfs_allow & DEBUGFS_ALLOW_API)) return -EPERM; - return get_tree_single(fc, debugfs_fill_super); + err = get_tree_single(fc, debugfs_fill_super); + if (err) + return err; + + return debugfs_reconfigure(fc); } static void debugfs_free_fc(struct fs_context *fc) From d49172bbd7eb07e4ba5e52238eaa9caf692c1cea Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 14 Aug 2025 21:27:29 +0200 Subject: [PATCH 2044/2411] Documentation: clarify the expected collaboration with security bugs reporters Some bug reports sent to the security team sometimes lack any explanation, are only AI-generated without verification, or sometimes it can simply be difficult to have a conversation with an invisible reporter belonging to an opaque team. This fortunately remains rare but the trend has been steadily increasing over the last years and it seems important to clarify what developers expect from reporters to avoid frustration on any side and keep the process efficient. Signed-off-by: Willy Tarreau Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20250814192730.19252-1-w@1wt.eu Signed-off-by: Greg Kroah-Hartman --- Documentation/process/security-bugs.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/process/security-bugs.rst b/Documentation/process/security-bugs.rst index 56c560a00b37..7dcc034d3df8 100644 --- a/Documentation/process/security-bugs.rst +++ b/Documentation/process/security-bugs.rst @@ -19,6 +19,16 @@ that can speed up the process considerably. It is possible that the security team will bring in extra help from area maintainers to understand and fix the security vulnerability. +The security team and maintainers almost always require additional +information beyond what was initially provided in a report and rely on +active and efficient collaboration with the reporter to perform further +testing (e.g., verifying versions, configuration options, mitigations, or +patches). Before contacting the security team, the reporter must ensure +they are available to explain their findings, engage in discussions, and +run additional tests. Reports where the reporter does not respond promptly +or cannot effectively discuss their findings may be abandoned if the +communication does not quickly improve. + As it is with any bug, the more information provided the easier it will be to diagnose and fix. Please review the procedure outlined in 'Documentation/admin-guide/reporting-issues.rst' if you are unclear about what From 3a68841d1d9b6eb32b2652bbb83acd17d5eb9135 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 14 Aug 2025 21:27:30 +0200 Subject: [PATCH 2045/2411] Documentation: smooth the text flow in the security bug reporting process The text was presenting the team, the the e-mail address, then some of the expectations, then what form of e-mail is expected. By switching the e-mail paragraph two paragraphs later and dropping the "Contact" sub-section, we can have a more natural flow that presents the team, then its expectation, then how to best contribute, then where to send. And more importantly, it increases the chances that reporters have read the prerequisites before finding the e-mail address. Signed-off-by: Willy Tarreau Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20250814192730.19252-2-w@1wt.eu Signed-off-by: Greg Kroah-Hartman --- Documentation/process/security-bugs.rst | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/Documentation/process/security-bugs.rst b/Documentation/process/security-bugs.rst index 7dcc034d3df8..84657e7d2e5b 100644 --- a/Documentation/process/security-bugs.rst +++ b/Documentation/process/security-bugs.rst @@ -8,17 +8,6 @@ like to know when a security bug is found so that it can be fixed and disclosed as quickly as possible. Please report security bugs to the Linux kernel security team. -Contact -------- - -The Linux kernel security team can be contacted by email at -. This is a private list of security officers -who will help verify the bug report and develop and release a fix. -If you already have a fix, please include it with your report, as -that can speed up the process considerably. It is possible that the -security team will bring in extra help from area maintainers to -understand and fix the security vulnerability. - The security team and maintainers almost always require additional information beyond what was initially provided in a report and rely on active and efficient collaboration with the reporter to perform further @@ -36,6 +25,14 @@ information is helpful. Any exploit code is very helpful and will not be released without consent from the reporter unless it has already been made public. +The Linux kernel security team can be contacted by email at +. This is a private list of security officers +who will help verify the bug report and develop and release a fix. +If you already have a fix, please include it with your report, as +that can speed up the process considerably. It is possible that the +security team will bring in extra help from area maintainers to +understand and fix the security vulnerability. + Please send plain text emails without attachments where possible. It is much harder to have a context-quoted discussion about a complex issue if all the details are hidden away in attachments. Think of it like a From e5f48bfa2ae0806d5f51fb8061afc619a73599a7 Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 13 Aug 2025 17:42:28 +0800 Subject: [PATCH 2046/2411] drm/hisilicon/hibmc: fix the i2c device resource leak when vdac init failed Currently the driver missed to clean the i2c adapter when vdac init failed. It may cause resource leak. Fixes: a0d078d06e516 ("drm/hisilicon: Features to support reading resolutions from EDID") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250813094238.3722345-2-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h | 1 + drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c | 5 +++++ drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c | 11 ++++++++--- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h index 274feabe7df0..ca8502e2760c 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h @@ -69,6 +69,7 @@ int hibmc_de_init(struct hibmc_drm_private *priv); int hibmc_vdac_init(struct hibmc_drm_private *priv); int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_vdac *connector); +void hibmc_ddc_del(struct hibmc_vdac *vdac); int hibmc_dp_init(struct hibmc_drm_private *priv); diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c index 99b3b77b5445..44860011855e 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c @@ -95,3 +95,8 @@ int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_vdac *vdac) return i2c_bit_add_bus(&vdac->adapter); } + +void hibmc_ddc_del(struct hibmc_vdac *vdac) +{ + i2c_del_adapter(&vdac->adapter); +} diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c index e8a527ede854..841e81f47b68 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c @@ -53,7 +53,7 @@ static void hibmc_connector_destroy(struct drm_connector *connector) { struct hibmc_vdac *vdac = to_hibmc_vdac(connector); - i2c_del_adapter(&vdac->adapter); + hibmc_ddc_del(vdac); drm_connector_cleanup(connector); } @@ -110,7 +110,7 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) ret = drmm_encoder_init(dev, encoder, NULL, DRM_MODE_ENCODER_DAC, NULL); if (ret) { drm_err(dev, "failed to init encoder: %d\n", ret); - return ret; + goto err; } drm_encoder_helper_add(encoder, &hibmc_encoder_helper_funcs); @@ -121,7 +121,7 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) &vdac->adapter); if (ret) { drm_err(dev, "failed to init connector: %d\n", ret); - return ret; + goto err; } drm_connector_helper_add(connector, &hibmc_connector_helper_funcs); @@ -131,4 +131,9 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; return 0; + +err: + hibmc_ddc_del(vdac); + + return ret; } From 8bed4ec42a4e0dc8113172696ff076d1eb6d8bcb Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 13 Aug 2025 17:42:30 +0800 Subject: [PATCH 2047/2411] drm/hisilicon/hibmc: fix irq_request()'s irq name variable is local The local variable is passed in request_irq (), and there will be use after free problem, which will make request_irq failed. Using the global irq name instead of it to fix. Fixes: b11bc1ae4658 ("drm/hisilicon/hibmc: Add MSI irq getting and requesting for HPD") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250813094238.3722345-4-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index 768b97f9e74a..4cdcc34070ee 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -32,7 +32,7 @@ DEFINE_DRM_GEM_FOPS(hibmc_fops); -static const char *g_irqs_names_map[HIBMC_MAX_VECTORS] = { "vblank", "hpd" }; +static const char *g_irqs_names_map[HIBMC_MAX_VECTORS] = { "hibmc-vblank", "hibmc-hpd" }; static irqreturn_t hibmc_interrupt(int irq, void *arg) { @@ -277,7 +277,6 @@ static void hibmc_unload(struct drm_device *dev) static int hibmc_msi_init(struct drm_device *dev) { struct pci_dev *pdev = to_pci_dev(dev->dev); - char name[32] = {0}; int valid_irq_num; int irq; int ret; @@ -292,9 +291,6 @@ static int hibmc_msi_init(struct drm_device *dev) valid_irq_num = ret; for (int i = 0; i < valid_irq_num; i++) { - snprintf(name, ARRAY_SIZE(name) - 1, "%s-%s-%s", - dev->driver->name, pci_name(pdev), g_irqs_names_map[i]); - irq = pci_irq_vector(pdev, i); if (i) @@ -302,10 +298,10 @@ static int hibmc_msi_init(struct drm_device *dev) ret = devm_request_threaded_irq(&pdev->dev, irq, hibmc_dp_interrupt, hibmc_dp_hpd_isr, - IRQF_SHARED, name, dev); + IRQF_SHARED, g_irqs_names_map[i], dev); else ret = devm_request_irq(&pdev->dev, irq, hibmc_interrupt, - IRQF_SHARED, name, dev); + IRQF_SHARED, g_irqs_names_map[i], dev); if (ret) { drm_err(dev, "install irq failed: %d\n", ret); return ret; From 93a08f856fcc5aaeeecad01f71bef3088588216a Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 13 Aug 2025 17:42:31 +0800 Subject: [PATCH 2048/2411] drm/hisilicon/hibmc: fix the hibmc loaded failed bug When hibmc loaded failed, the driver use hibmc_unload to free the resource, but the mutexes in mode.config are not init, which will access an NULL pointer. Just change goto statement to return, because hibnc_hw_init() doesn't need to free anything. Fixes: b3df5e65cc03 ("drm/hibmc: Drop drm_vblank_cleanup") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250813094238.3722345-5-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index 4cdcc34070ee..ac552c339671 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -319,13 +319,13 @@ static int hibmc_load(struct drm_device *dev) ret = hibmc_hw_init(priv); if (ret) - goto err; + return ret; ret = drmm_vram_helper_init(dev, pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); if (ret) { drm_err(dev, "Error initializing VRAM MM; %d\n", ret); - goto err; + return ret; } ret = hibmc_kms_init(priv); From 9f98b429ba67d430b873e06bcfb90afa22888978 Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 13 Aug 2025 17:42:32 +0800 Subject: [PATCH 2049/2411] drm/hisilicon/hibmc: fix rare monitors cannot display problem In some case, the dp link training success at 8.1Gbps, but the sink's maximum supported rate is less than 8.1G. So change the default 8.1Gbps link rate to the rate that reads from devices' capabilities. Fixes: 54063d86e036 ("drm/hisilicon/hibmc: add dp link moduel in hibmc drivers") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250813094238.3722345-6-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c index 74f7832ea53e..0726cb5b736e 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c @@ -325,6 +325,17 @@ static int hibmc_dp_link_downgrade_training_eq(struct hibmc_dp_dev *dp) return hibmc_dp_link_reduce_rate(dp); } +static void hibmc_dp_update_caps(struct hibmc_dp_dev *dp) +{ + dp->link.cap.link_rate = dp->dpcd[DP_MAX_LINK_RATE]; + if (dp->link.cap.link_rate > DP_LINK_BW_8_1 || !dp->link.cap.link_rate) + dp->link.cap.link_rate = DP_LINK_BW_8_1; + + dp->link.cap.lanes = dp->dpcd[DP_MAX_LANE_COUNT] & DP_MAX_LANE_COUNT_MASK; + if (dp->link.cap.lanes > HIBMC_DP_LANE_NUM_MAX) + dp->link.cap.lanes = HIBMC_DP_LANE_NUM_MAX; +} + int hibmc_dp_link_training(struct hibmc_dp_dev *dp) { struct hibmc_dp_link *link = &dp->link; @@ -334,8 +345,7 @@ int hibmc_dp_link_training(struct hibmc_dp_dev *dp) if (ret) drm_err(dp->dev, "dp aux read dpcd failed, ret: %d\n", ret); - dp->link.cap.link_rate = dp->dpcd[DP_MAX_LINK_RATE]; - dp->link.cap.lanes = 0x2; + hibmc_dp_update_caps(dp); ret = hibmc_dp_get_serdes_rate_cfg(dp); if (ret < 0) From 3271faf42d135bcf569c3ff6af55c21858eec212 Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 13 Aug 2025 17:42:34 +0800 Subject: [PATCH 2050/2411] drm/hisilicon/hibmc: fix dp and vga cannot show together If VGA and DP connected together, there will be only one can get crtc. Add encoder possible_clones to support two connectors enable. Fixes: 3c7623fb5bb6 ("drm/hisilicon/hibmc: Enable this hot plug detect of irq feature") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250813094238.3722345-8-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index ac552c339671..289304500ab0 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -115,6 +115,8 @@ static const struct drm_mode_config_funcs hibmc_mode_funcs = { static int hibmc_kms_init(struct hibmc_drm_private *priv) { struct drm_device *dev = &priv->dev; + struct drm_encoder *encoder; + u32 clone_mask = 0; int ret; ret = drmm_mode_config_init(dev); @@ -154,6 +156,12 @@ static int hibmc_kms_init(struct hibmc_drm_private *priv) return ret; } + drm_for_each_encoder(encoder, dev) + clone_mask |= drm_encoder_mask(encoder); + + drm_for_each_encoder(encoder, dev) + encoder->possible_clones = clone_mask; + return 0; } From c17b750b3ad9f45f2b6f7e6f7f4679844244f0b9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 17 Aug 2025 15:22:10 -0700 Subject: [PATCH 2051/2411] Linux 6.17-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6bfe776bf3c5..d1adb78c3596 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* From bac7b996d42e458a94578f4227795a0d4deef6fa Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 12 Aug 2025 18:45:46 +0200 Subject: [PATCH 2052/2411] smb: server: split ksmbd_rdma_stop_listening() out of ksmbd_rdma_destroy() We can't call destroy_workqueue(smb_direct_wq); before stop_sessions()! Otherwise already existing connections try to use smb_direct_wq as a NULL pointer. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.c | 3 ++- fs/smb/server/transport_rdma.c | 5 ++++- fs/smb/server/transport_rdma.h | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 3f04a2977ba8..67c4f73398df 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -504,7 +504,8 @@ void ksmbd_conn_transport_destroy(void) { mutex_lock(&init_lock); ksmbd_tcp_destroy(); - ksmbd_rdma_destroy(); + ksmbd_rdma_stop_listening(); stop_sessions(); + ksmbd_rdma_destroy(); mutex_unlock(&init_lock); } diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 8d366db5f605..5466aa8c39b1 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -2194,7 +2194,7 @@ int ksmbd_rdma_init(void) return 0; } -void ksmbd_rdma_destroy(void) +void ksmbd_rdma_stop_listening(void) { if (!smb_direct_listener.cm_id) return; @@ -2203,7 +2203,10 @@ void ksmbd_rdma_destroy(void) rdma_destroy_id(smb_direct_listener.cm_id); smb_direct_listener.cm_id = NULL; +} +void ksmbd_rdma_destroy(void) +{ if (smb_direct_wq) { destroy_workqueue(smb_direct_wq); smb_direct_wq = NULL; diff --git a/fs/smb/server/transport_rdma.h b/fs/smb/server/transport_rdma.h index 77aee4e5c9dc..a2291b77488a 100644 --- a/fs/smb/server/transport_rdma.h +++ b/fs/smb/server/transport_rdma.h @@ -54,13 +54,15 @@ struct smb_direct_data_transfer { #ifdef CONFIG_SMB_SERVER_SMBDIRECT int ksmbd_rdma_init(void); +void ksmbd_rdma_stop_listening(void); void ksmbd_rdma_destroy(void); bool ksmbd_rdma_capable_netdev(struct net_device *netdev); void init_smbd_max_io_size(unsigned int sz); unsigned int get_smbd_max_read_write_size(void); #else static inline int ksmbd_rdma_init(void) { return 0; } -static inline int ksmbd_rdma_destroy(void) { return 0; } +static inline void ksmbd_rdma_stop_listening(void) { } +static inline void ksmbd_rdma_destroy(void) { } static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; } static inline void init_smbd_max_io_size(unsigned int sz) { } static inline unsigned int get_smbd_max_read_write_size(void) { return 0; } From c0d41112f1a5828c194b59cca953114bc3776ef2 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 17 Aug 2025 09:48:40 +0900 Subject: [PATCH 2053/2411] ksmbd: extend the connection limiting mechanism to support IPv6 Update the connection tracking logic to handle both IPv4 and IPv6 address families. Cc: stable@vger.kernel.org Fixes: e6bb91939740 ("ksmbd: limit repeated connections from clients with the same IP") Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.h | 7 ++++++- fs/smb/server/transport_tcp.c | 26 +++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 31dd1caac1e8..2aa8084bb593 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -46,7 +46,12 @@ struct ksmbd_conn { struct mutex srv_mutex; int status; unsigned int cli_cap; - __be32 inet_addr; + union { + __be32 inet_addr; +#if IS_ENABLED(CONFIG_IPV6) + u8 inet6_addr[16]; +#endif + }; char *request_buf; struct ksmbd_transport *transport; struct nls_table *local_nls; diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index b1df02e321b0..4337df97987d 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -85,7 +85,14 @@ static struct tcp_transport *alloc_transport(struct socket *client_sk) return NULL; } +#if IS_ENABLED(CONFIG_IPV6) + if (client_sk->sk->sk_family == AF_INET6) + memcpy(&conn->inet6_addr, &client_sk->sk->sk_v6_daddr, 16); + else + conn->inet_addr = inet_sk(client_sk->sk)->inet_daddr; +#else conn->inet_addr = inet_sk(client_sk->sk)->inet_daddr; +#endif conn->transport = KSMBD_TRANS(t); KSMBD_TRANS(t)->conn = conn; KSMBD_TRANS(t)->ops = &ksmbd_tcp_transport_ops; @@ -229,7 +236,6 @@ static int ksmbd_kthread_fn(void *p) { struct socket *client_sk = NULL; struct interface *iface = (struct interface *)p; - struct inet_sock *csk_inet; struct ksmbd_conn *conn; int ret; @@ -252,13 +258,27 @@ static int ksmbd_kthread_fn(void *p) /* * Limits repeated connections from clients with the same IP. */ - csk_inet = inet_sk(client_sk->sk); down_read(&conn_list_lock); list_for_each_entry(conn, &conn_list, conns_list) - if (csk_inet->inet_daddr == conn->inet_addr) { +#if IS_ENABLED(CONFIG_IPV6) + if (client_sk->sk->sk_family == AF_INET6) { + if (memcmp(&client_sk->sk->sk_v6_daddr, + &conn->inet6_addr, 16) == 0) { + ret = -EAGAIN; + break; + } + } else if (inet_sk(client_sk->sk)->inet_daddr == + conn->inet_addr) { ret = -EAGAIN; break; } +#else + if (inet_sk(client_sk->sk)->inet_daddr == + conn->inet_addr) { + ret = -EAGAIN; + break; + } +#endif up_read(&conn_list_lock); if (ret == -EAGAIN) continue; From 89bb430f621124af39bb31763c4a8b504c9651e2 Mon Sep 17 00:00:00 2001 From: Ziyan Xu Date: Sat, 16 Aug 2025 10:20:05 +0900 Subject: [PATCH 2054/2411] ksmbd: fix refcount leak causing resource not released When ksmbd_conn_releasing(opinfo->conn) returns true,the refcount was not decremented properly, causing a refcount leak that prevents the count from reaching zero and the memory from being released. Cc: stable@vger.kernel.org Signed-off-by: Ziyan Xu Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index d7a8a580d013..a04d5702820d 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1102,8 +1102,10 @@ void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, if (!atomic_inc_not_zero(&opinfo->refcount)) continue; - if (ksmbd_conn_releasing(opinfo->conn)) + if (ksmbd_conn_releasing(opinfo->conn)) { + opinfo_put(opinfo); continue; + } oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE, NULL); opinfo_put(opinfo); @@ -1139,8 +1141,11 @@ void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp) if (!atomic_inc_not_zero(&opinfo->refcount)) continue; - if (ksmbd_conn_releasing(opinfo->conn)) + if (ksmbd_conn_releasing(opinfo->conn)) { + opinfo_put(opinfo); continue; + } + oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE, NULL); opinfo_put(opinfo); } @@ -1343,8 +1348,10 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp, if (!atomic_inc_not_zero(&brk_op->refcount)) continue; - if (ksmbd_conn_releasing(brk_op->conn)) + if (ksmbd_conn_releasing(brk_op->conn)) { + opinfo_put(brk_op); continue; + } if (brk_op->is_lease && (brk_op->o_lease->state & (~(SMB2_LEASE_READ_CACHING_LE | From f52d6aa98379842fc255d93282655566f2114e0c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 11 Aug 2025 11:01:48 +0300 Subject: [PATCH 2055/2411] drm/i915/lnl+/tc: Fix handling of an enabled/disconnected dp-alt sink The TypeC PHY HW readout during driver loading and system resume determines which TypeC mode the PHY is in (legacy/DP-alt/TBT-alt) and whether the PHY is connected, based on the PHY's Owned and Ready flags. For the PHY to be in DP-alt or legacy mode and for the PHY to be in the connected state in these modes, both the Owned (set by the BIOS/driver) and the Ready (set by the HW) flags should be set. On ICL-MTL the HW kept the PHY's Ready flag set after the driver connected the PHY by acquiring the PHY ownership (by setting the Owned flag), until the driver disconnected the PHY by releasing the PHY ownership (by clearing the Owned flag). On LNL+ this has changed, in that the HW clears the Ready flag as soon as the sink gets disconnected, even if the PHY ownership was acquired already and hence the PHY is being used by the display. When inheriting the HW state from BIOS for a PHY connected in DP-alt mode on which the sink got disconnected - i.e. in a case where the sink was connected while BIOS/GOP was running and so the sink got enabled connecting the PHY, but the user disconnected the sink by the time the driver loaded - the PHY Owned but not Ready state must be accounted for on LNL+ according to the above. Do that by assuming on LNL+ that the PHY is connected in DP-alt mode whenever the PHY Owned flag is set, regardless of the PHY Ready flag. This fixes a problem on LNL+, where the PHY TypeC mode / connected state was detected incorrectly for a DP-alt sink, which got connected and then disconnected by the user in the above way. v2: Rename tc_phy_in_legacy_or_dp_alt_mode() to tc_phy_owned_by_display(). (Luca, Jani) Cc: Jani Nikula Cc: stable@vger.kernel.org # v6.8+ Reported-by: Charlton Lin Tested-by: Khaled Almahallawy Reviewed-by: Mika Kahola Reviewed-by: Luca Coelho [Imre: Add one-liner function documentation for tc_phy_owned_by_display()] Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250811080152.906216-2-imre.deak@intel.com (cherry picked from commit 89f4b196ee4b056e0e8c179b247b29d4a71a4e7e) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_tc.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 3bc57579fe53..8208539bfe66 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -1226,14 +1226,19 @@ static void tc_phy_get_hw_state(struct intel_tc_port *tc) tc->phy_ops->get_hw_state(tc); } -static bool tc_phy_is_ready_and_owned(struct intel_tc_port *tc, - bool phy_is_ready, bool phy_is_owned) +/* Is the PHY owned by display i.e. is it in legacy or DP-alt mode? */ +static bool tc_phy_owned_by_display(struct intel_tc_port *tc, + bool phy_is_ready, bool phy_is_owned) { struct intel_display *display = to_intel_display(tc->dig_port); - drm_WARN_ON(display->drm, phy_is_owned && !phy_is_ready); + if (DISPLAY_VER(display) < 20) { + drm_WARN_ON(display->drm, phy_is_owned && !phy_is_ready); - return phy_is_ready && phy_is_owned; + return phy_is_ready && phy_is_owned; + } else { + return phy_is_owned; + } } static bool tc_phy_is_connected(struct intel_tc_port *tc, @@ -1244,7 +1249,7 @@ static bool tc_phy_is_connected(struct intel_tc_port *tc, bool phy_is_owned = tc_phy_is_owned(tc); bool is_connected; - if (tc_phy_is_ready_and_owned(tc, phy_is_ready, phy_is_owned)) + if (tc_phy_owned_by_display(tc, phy_is_ready, phy_is_owned)) is_connected = port_pll_type == ICL_PORT_DPLL_MG_PHY; else is_connected = port_pll_type == ICL_PORT_DPLL_DEFAULT; @@ -1352,7 +1357,7 @@ tc_phy_get_current_mode(struct intel_tc_port *tc) phy_is_ready = tc_phy_is_ready(tc); phy_is_owned = tc_phy_is_owned(tc); - if (!tc_phy_is_ready_and_owned(tc, phy_is_ready, phy_is_owned)) { + if (!tc_phy_owned_by_display(tc, phy_is_ready, phy_is_owned)) { mode = get_tc_mode_in_phy_not_owned_state(tc, live_mode); } else { drm_WARN_ON(display->drm, live_mode == TC_PORT_TBT_ALT); From 5fd35236546abe780eaadb7561e09953719d4fc3 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 11 Aug 2025 11:01:49 +0300 Subject: [PATCH 2056/2411] drm/i915/icl+/tc: Cache the max lane count value The PHY's pin assignment value in the TCSS_DDI_STATUS register - as set by the HW/FW based on the connected DP-alt sink's TypeC/PD pin assignment negotiation - gets cleared by the HW/FW on LNL+ as soon as the sink gets disconnected, even if the PHY ownership got acquired already by the driver (and hence the PHY itself is still connected and used by the display). This is similar to how the PHY Ready flag gets cleared on LNL+ in the same register. To be able to query the max lane count value on LNL+ - which is based on the above pin assignment - at all times even after the sink gets disconnected, the max lane count must be determined and cached during the PHY's HW readout and connect sequences. Do that here, leaving the actual use of the cached value to a follow-up change. v2: Don't read out the pin configuration if the PHY is disconnected. Cc: stable@vger.kernel.org # v6.8+ Reported-by: Charlton Lin Tested-by: Khaled Almahallawy Reviewed-by: Mika Kahola Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250811080152.906216-3-imre.deak@intel.com (cherry picked from commit 3e32438fc406761f81b1928d210b3d2a5e7501a0) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_tc.c | 57 +++++++++++++++++++++---- 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 8208539bfe66..34435c4fc280 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -66,6 +66,7 @@ struct intel_tc_port { enum tc_port_mode init_mode; enum phy_fia phy_fia; u8 phy_fia_idx; + u8 max_lane_count; }; static enum intel_display_power_domain @@ -365,12 +366,12 @@ static int intel_tc_port_get_max_lane_count(struct intel_digital_port *dig_port) } } -int intel_tc_port_max_lane_count(struct intel_digital_port *dig_port) +static int get_max_lane_count(struct intel_tc_port *tc) { - struct intel_display *display = to_intel_display(dig_port); - struct intel_tc_port *tc = to_tc_port(dig_port); + struct intel_display *display = to_intel_display(tc->dig_port); + struct intel_digital_port *dig_port = tc->dig_port; - if (!intel_encoder_is_tc(&dig_port->base) || tc->mode != TC_PORT_DP_ALT) + if (tc->mode != TC_PORT_DP_ALT) return 4; assert_tc_cold_blocked(tc); @@ -384,6 +385,21 @@ int intel_tc_port_max_lane_count(struct intel_digital_port *dig_port) return intel_tc_port_get_max_lane_count(dig_port); } +static void read_pin_configuration(struct intel_tc_port *tc) +{ + tc->max_lane_count = get_max_lane_count(tc); +} + +int intel_tc_port_max_lane_count(struct intel_digital_port *dig_port) +{ + struct intel_tc_port *tc = to_tc_port(dig_port); + + if (!intel_encoder_is_tc(&dig_port->base)) + return 4; + + return get_max_lane_count(tc); +} + void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, int required_lanes) { @@ -596,9 +612,12 @@ static void icl_tc_phy_get_hw_state(struct intel_tc_port *tc) tc_cold_wref = __tc_cold_block(tc, &domain); tc->mode = tc_phy_get_current_mode(tc); - if (tc->mode != TC_PORT_DISCONNECTED) + if (tc->mode != TC_PORT_DISCONNECTED) { tc->lock_wakeref = tc_cold_block(tc); + read_pin_configuration(tc); + } + __tc_cold_unblock(tc, domain, tc_cold_wref); } @@ -656,8 +675,11 @@ static bool icl_tc_phy_connect(struct intel_tc_port *tc, tc->lock_wakeref = tc_cold_block(tc); - if (tc->mode == TC_PORT_TBT_ALT) + if (tc->mode == TC_PORT_TBT_ALT) { + read_pin_configuration(tc); + return true; + } if ((!tc_phy_is_ready(tc) || !icl_tc_phy_take_ownership(tc, true)) && @@ -668,6 +690,7 @@ static bool icl_tc_phy_connect(struct intel_tc_port *tc, goto out_unblock_tc_cold; } + read_pin_configuration(tc); if (!tc_phy_verify_legacy_or_dp_alt_mode(tc, required_lanes)) goto out_release_phy; @@ -858,9 +881,12 @@ static void adlp_tc_phy_get_hw_state(struct intel_tc_port *tc) port_wakeref = intel_display_power_get(display, port_power_domain); tc->mode = tc_phy_get_current_mode(tc); - if (tc->mode != TC_PORT_DISCONNECTED) + if (tc->mode != TC_PORT_DISCONNECTED) { tc->lock_wakeref = tc_cold_block(tc); + read_pin_configuration(tc); + } + intel_display_power_put(display, port_power_domain, port_wakeref); } @@ -873,6 +899,9 @@ static bool adlp_tc_phy_connect(struct intel_tc_port *tc, int required_lanes) if (tc->mode == TC_PORT_TBT_ALT) { tc->lock_wakeref = tc_cold_block(tc); + + read_pin_configuration(tc); + return true; } @@ -894,6 +923,8 @@ static bool adlp_tc_phy_connect(struct intel_tc_port *tc, int required_lanes) tc->lock_wakeref = tc_cold_block(tc); + read_pin_configuration(tc); + if (!tc_phy_verify_legacy_or_dp_alt_mode(tc, required_lanes)) goto out_unblock_tc_cold; @@ -1124,9 +1155,12 @@ static void xelpdp_tc_phy_get_hw_state(struct intel_tc_port *tc) tc_cold_wref = __tc_cold_block(tc, &domain); tc->mode = tc_phy_get_current_mode(tc); - if (tc->mode != TC_PORT_DISCONNECTED) + if (tc->mode != TC_PORT_DISCONNECTED) { tc->lock_wakeref = tc_cold_block(tc); + read_pin_configuration(tc); + } + drm_WARN_ON(display->drm, (tc->mode == TC_PORT_DP_ALT || tc->mode == TC_PORT_LEGACY) && !xelpdp_tc_phy_tcss_power_is_enabled(tc)); @@ -1138,14 +1172,19 @@ static bool xelpdp_tc_phy_connect(struct intel_tc_port *tc, int required_lanes) { tc->lock_wakeref = tc_cold_block(tc); - if (tc->mode == TC_PORT_TBT_ALT) + if (tc->mode == TC_PORT_TBT_ALT) { + read_pin_configuration(tc); + return true; + } if (!xelpdp_tc_phy_enable_tcss_power(tc, true)) goto out_unblock_tccold; xelpdp_tc_phy_take_ownership(tc, true); + read_pin_configuration(tc); + if (!tc_phy_verify_legacy_or_dp_alt_mode(tc, required_lanes)) goto out_release_phy; From c87514a0bb0a64507412a2d98264060dc0c1562a Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 11 Aug 2025 11:01:50 +0300 Subject: [PATCH 2057/2411] drm/i915/lnl+/tc: Fix max lane count HW readout On LNL+ for a disconnected sink the pin assignment value gets cleared by the HW/FW as soon as the sink gets disconnected, even if the PHY ownership got acquired already by the BIOS/driver (and hence the PHY itself is still connected and used by the display). During HW readout this can result in detecting the PHY's max lane count as 0 - matching the above cleared aka NONE pin assignment HW state. For a connected PHY the driver in general (outside of intel_tc.c) expects the max lane count value to be valid for the video mode enabled on the corresponding output (1, 2 or 4). Ensure this by setting the max lane count to 4 in this case. Note, that it doesn't matter if this lane count happened to be more than the max lane count with which the PHY got connected and enabled, since the only thing the driver can do with such an output - where the DP-alt sink is disconnected - is to disable the output. v2: Rebased on change reading out the pin configuration only if the PHY is connected. Cc: stable@vger.kernel.org # v6.8+ Reported-by: Charlton Lin Tested-by: Khaled Almahallawy Reviewed-by: Mika Kahola Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250811080152.906216-4-imre.deak@intel.com (cherry picked from commit 33cf70bc0fe760224f892bc1854a33665f27d482) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_tc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 34435c4fc280..3f9842040bb0 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -23,6 +23,7 @@ #include "intel_modeset_lock.h" #include "intel_tc.h" +#define DP_PIN_ASSIGNMENT_NONE 0x0 #define DP_PIN_ASSIGNMENT_C 0x3 #define DP_PIN_ASSIGNMENT_D 0x4 #define DP_PIN_ASSIGNMENT_E 0x5 @@ -308,6 +309,8 @@ static int lnl_tc_port_get_max_lane_count(struct intel_digital_port *dig_port) REG_FIELD_GET(TCSS_DDI_STATUS_PIN_ASSIGNMENT_MASK, val); switch (pin_assignment) { + case DP_PIN_ASSIGNMENT_NONE: + return 0; default: MISSING_CASE(pin_assignment); fallthrough; @@ -1159,6 +1162,12 @@ static void xelpdp_tc_phy_get_hw_state(struct intel_tc_port *tc) tc->lock_wakeref = tc_cold_block(tc); read_pin_configuration(tc); + /* + * Set a valid lane count value for a DP-alt sink which got + * disconnected. The driver can only disable the output on this PHY. + */ + if (tc->max_lane_count == 0) + tc->max_lane_count = 4; } drm_WARN_ON(display->drm, From c5c2b4b3841666be3a45346d0ffa96b4b143504e Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 11 Aug 2025 11:01:51 +0300 Subject: [PATCH 2058/2411] drm/i915/lnl+/tc: Use the cached max lane count value Use the cached max lane count value on LNL+, to account for scenarios where this value is queried after the HW cleared the corresponding pin assignment value in the TCSS_DDI_STATUS register after the sink got disconnected. For consistency, follow-up changes will use the cached max lane count value on other platforms as well and will also cache the pin assignment value in a similar way. Cc: stable@vger.kernel.org # v6.8+ Reported-by: Charlton Lin Tested-by: Khaled Almahallawy Reviewed-by: Mika Kahola Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250811080152.906216-5-imre.deak@intel.com (cherry picked from commit afc4e84388079f4d5ba05271632b7a4d8d85165c) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_tc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 3f9842040bb0..6a2442a0649e 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -395,12 +395,16 @@ static void read_pin_configuration(struct intel_tc_port *tc) int intel_tc_port_max_lane_count(struct intel_digital_port *dig_port) { + struct intel_display *display = to_intel_display(dig_port); struct intel_tc_port *tc = to_tc_port(dig_port); if (!intel_encoder_is_tc(&dig_port->base)) return 4; - return get_max_lane_count(tc); + if (DISPLAY_VER(display) < 20) + return get_max_lane_count(tc); + + return tc->max_lane_count; } void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, From d7fa5754e83cd36c4327eb2d806064e598a72ff6 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 11 Aug 2025 11:01:52 +0300 Subject: [PATCH 2059/2411] drm/i915/icl+/tc: Convert AUX powered WARN to a debug message The BIOS can leave the AUX power well enabled on an output, even if this isn't required (on platforms where the AUX power is only needed for an AUX access). This was observed at least on PTL. To avoid the WARN which would be triggered by this during the HW readout, convert the WARN to a debug message. Cc: stable@vger.kernel.org # v6.8+ Reported-by: Charlton Lin Tested-by: Khaled Almahallawy Reviewed-by: Mika Kahola Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250811080152.906216-6-imre.deak@intel.com (cherry picked from commit 6cb52cba474b2bec1a3018d3dbf75292059a29a1) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_tc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 6a2442a0649e..668ef139391b 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -1498,11 +1498,11 @@ static void intel_tc_port_reset_mode(struct intel_tc_port *tc, intel_display_power_flush_work(display); if (!intel_tc_cold_requires_aux_pw(dig_port)) { enum intel_display_power_domain aux_domain; - bool aux_powered; aux_domain = intel_aux_power_domain(dig_port); - aux_powered = intel_display_power_is_enabled(display, aux_domain); - drm_WARN_ON(display->drm, aux_powered); + if (intel_display_power_is_enabled(display, aux_domain)) + drm_dbg_kms(display->drm, "Port %s: AUX unexpectedly powered\n", + tc->port_name); } tc_phy_disconnect(tc); From 6347dc7fb967521a77f9ff0774d25ef0cca4c6cd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 11 Aug 2025 10:18:07 -0400 Subject: [PATCH 2060/2411] media: rkvdec: Fix a NULL vs IS_ERR() bug in probe() The iommu_paging_domain_alloc() function doesn't return NULL on error it returns error pointers. Update the check and then set ->empty_domain to NULL because the rest of the driver assumes it can be NULL. Fixes: ff8c5622f9f7 ("media: rkvdec: Restore iommu addresses on errors") Signed-off-by: Dan Carpenter Tested-by: Detlev Casanova Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil --- drivers/media/platform/rockchip/rkvdec/rkvdec.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec.c b/drivers/media/platform/rockchip/rkvdec/rkvdec.c index d707088ec0dc..1b7f27e4d961 100644 --- a/drivers/media/platform/rockchip/rkvdec/rkvdec.c +++ b/drivers/media/platform/rockchip/rkvdec/rkvdec.c @@ -1162,8 +1162,10 @@ static int rkvdec_probe(struct platform_device *pdev) if (iommu_get_domain_for_dev(&pdev->dev)) { rkvdec->empty_domain = iommu_paging_domain_alloc(rkvdec->dev); - if (!rkvdec->empty_domain) + if (IS_ERR(rkvdec->empty_domain)) { + rkvdec->empty_domain = NULL; dev_warn(rkvdec->dev, "cannot alloc new empty domain\n"); + } } vb2_dma_contig_set_max_seg_size(&pdev->dev, DMA_BIT_MASK(32)); From 0d58a72b66ec4c24128395e408348c2c84221605 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 30 Jul 2025 20:24:44 +0200 Subject: [PATCH 2061/2411] media: rkvdec: Fix an error handling path in rkvdec_probe() If an error occurs after a successful iommu_paging_domain_alloc() call, it should be undone by a corresponding iommu_domain_free() call, as already done in the remove function. In order to fix the issue, move the corresponding call at the end of the function, because it is safe to allocate 'empty_domain' later. Fixes: ff8c5622f9f7 ("media: rkvdec: Restore iommu addresses on errors") Signed-off-by: Christophe JAILLET Reviewed-by: Nicolas Dufresne Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil --- .../media/platform/rockchip/rkvdec/rkvdec.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec.c b/drivers/media/platform/rockchip/rkvdec/rkvdec.c index 1b7f27e4d961..35265e321203 100644 --- a/drivers/media/platform/rockchip/rkvdec/rkvdec.c +++ b/drivers/media/platform/rockchip/rkvdec/rkvdec.c @@ -1159,15 +1159,6 @@ static int rkvdec_probe(struct platform_device *pdev) return ret; } - if (iommu_get_domain_for_dev(&pdev->dev)) { - rkvdec->empty_domain = iommu_paging_domain_alloc(rkvdec->dev); - - if (IS_ERR(rkvdec->empty_domain)) { - rkvdec->empty_domain = NULL; - dev_warn(rkvdec->dev, "cannot alloc new empty domain\n"); - } - } - vb2_dma_contig_set_max_seg_size(&pdev->dev, DMA_BIT_MASK(32)); irq = platform_get_irq(pdev, 0); @@ -1190,6 +1181,15 @@ static int rkvdec_probe(struct platform_device *pdev) if (ret) goto err_disable_runtime_pm; + if (iommu_get_domain_for_dev(&pdev->dev)) { + rkvdec->empty_domain = iommu_paging_domain_alloc(rkvdec->dev); + + if (IS_ERR(rkvdec->empty_domain)) { + rkvdec->empty_domain = NULL; + dev_warn(rkvdec->dev, "cannot alloc new empty domain\n"); + } + } + return 0; err_disable_runtime_pm: From da3fa08a89dc1cb33ed57d097239b9c7cd9e7a60 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 11 Aug 2025 11:00:19 -0400 Subject: [PATCH 2062/2411] media: rkvdec: Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-reduntant explicit call to pm_runtime_mark_last_busy(). Signed-off-by: Sakari Ailus Reviewed-by: Nicolas Dufresne Reviewed-by: Heiko Stuebner Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil --- drivers/media/platform/rockchip/rkvdec/rkvdec.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec.c b/drivers/media/platform/rockchip/rkvdec/rkvdec.c index 35265e321203..d3b31f461194 100644 --- a/drivers/media/platform/rockchip/rkvdec/rkvdec.c +++ b/drivers/media/platform/rockchip/rkvdec/rkvdec.c @@ -765,7 +765,6 @@ static void rkvdec_job_finish(struct rkvdec_ctx *ctx, { struct rkvdec_dev *rkvdec = ctx->dev; - pm_runtime_mark_last_busy(rkvdec->dev); pm_runtime_put_autosuspend(rkvdec->dev); rkvdec_job_finish_no_pm(ctx, result); } From a032fe30cf09b6723ab61a05aee057311b00f9e1 Mon Sep 17 00:00:00 2001 From: Dongcheng Yan Date: Fri, 25 Apr 2025 18:43:30 +0800 Subject: [PATCH 2063/2411] platform/x86: int3472: add hpd pin support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Typically HDMI to MIPI CSI-2 bridges have a pin to signal image data is being received. On the host side this is wired to a GPIO for polling or interrupts. This includes the Lontium HDMI to MIPI CSI-2 bridges lt6911uxe and lt6911uxc. The GPIO "hpd" is used already by other HDMI to CSI-2 bridges, use it here as well. Signed-off-by: Dongcheng Yan Reviewed-by: Sakari Ailus Acked-by: Ilpo Järvinen Reviewed-by: Hans de Goede Reviewed-by: Andy Shevchenko Fixes: 20244cbafbd6 ("media: i2c: change lt6911uxe irq_gpio name to "hpd"") Cc: stable@vger.kernel.org Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/platform/x86/intel/int3472/discrete.c | 6 ++++++ include/linux/platform_data/x86/int3472.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c index 4c0aed6e626f..bdfb8a800c54 100644 --- a/drivers/platform/x86/intel/int3472/discrete.c +++ b/drivers/platform/x86/intel/int3472/discrete.c @@ -193,6 +193,10 @@ static void int3472_get_con_id_and_polarity(struct int3472_discrete_device *int3 *con_id = "privacy-led"; *gpio_flags = GPIO_ACTIVE_HIGH; break; + case INT3472_GPIO_TYPE_HOTPLUG_DETECT: + *con_id = "hpd"; + *gpio_flags = GPIO_ACTIVE_HIGH; + break; case INT3472_GPIO_TYPE_POWER_ENABLE: *con_id = "avdd"; *gpio_flags = GPIO_ACTIVE_HIGH; @@ -223,6 +227,7 @@ static void int3472_get_con_id_and_polarity(struct int3472_discrete_device *int3 * 0x0b Power enable * 0x0c Clock enable * 0x0d Privacy LED + * 0x13 Hotplug detect * * There are some known platform specific quirks where that does not quite * hold up; for example where a pin with type 0x01 (Power down) is mapped to @@ -292,6 +297,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares, switch (type) { case INT3472_GPIO_TYPE_RESET: case INT3472_GPIO_TYPE_POWERDOWN: + case INT3472_GPIO_TYPE_HOTPLUG_DETECT: ret = skl_int3472_map_gpio_to_sensor(int3472, agpio, con_id, gpio_flags); if (ret) err_msg = "Failed to map GPIO pin to sensor\n"; diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h index 78276a11c48d..1571e9157fa5 100644 --- a/include/linux/platform_data/x86/int3472.h +++ b/include/linux/platform_data/x86/int3472.h @@ -27,6 +27,7 @@ #define INT3472_GPIO_TYPE_CLK_ENABLE 0x0c #define INT3472_GPIO_TYPE_PRIVACY_LED 0x0d #define INT3472_GPIO_TYPE_HANDSHAKE 0x12 +#define INT3472_GPIO_TYPE_HOTPLUG_DETECT 0x13 #define INT3472_PDEV_MAX_NAME_LEN 23 #define INT3472_MAX_SENSOR_GPIOS 3 From 6f6fbd9a0c5a75eee0618c1499cf73cc770b3f52 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 9 Jul 2025 22:53:48 +0300 Subject: [PATCH 2064/2411] media: Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-reduntant explicit call to pm_runtime_mark_last_busy(). Reviewed-by: Laurent Pinchart Acked-by: Thierry Reding (tegra-vde/h264.c) Acked-by: Tommaso Merciai (alvium-csi2.c) Reviewed-by: Dikshita Agarwal (iris_hfi_queue.c) Reviewed-by: Sean Young Acked-by: Dave Stevenson (imx219.c) Acked-by: Benjamin Mugnier Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/i2c/alvium-csi2.c | 1 - drivers/media/i2c/ccs/ccs-core.c | 7 +------ drivers/media/i2c/dw9768.c | 1 - drivers/media/i2c/gc0308.c | 3 --- drivers/media/i2c/gc2145.c | 3 --- drivers/media/i2c/imx219.c | 2 -- drivers/media/i2c/imx283.c | 3 --- drivers/media/i2c/imx290.c | 3 --- drivers/media/i2c/imx296.c | 1 - drivers/media/i2c/imx415.c | 1 - drivers/media/i2c/mt9m114.c | 6 ------ drivers/media/i2c/ov4689.c | 3 --- drivers/media/i2c/ov5640.c | 4 ---- drivers/media/i2c/ov5645.c | 3 --- drivers/media/i2c/ov64a40.c | 7 +------ drivers/media/i2c/ov8858.c | 2 -- drivers/media/i2c/st-mipid02.c | 2 -- drivers/media/i2c/tc358746.c | 5 ----- drivers/media/i2c/thp7312.c | 4 ---- drivers/media/i2c/vd55g1.c | 4 ---- drivers/media/i2c/vd56g3.c | 4 ---- drivers/media/i2c/video-i2c.c | 4 ---- drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c | 4 ---- drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c | 5 ----- drivers/media/platform/nvidia/tegra-vde/h264.c | 2 -- drivers/media/platform/qcom/iris/iris_hfi_queue.c | 1 - drivers/media/platform/raspberrypi/pisp_be/pisp_be.c | 2 -- drivers/media/platform/verisilicon/hantro_drv.c | 1 - drivers/media/rc/gpio-ir-recv.c | 4 +--- 29 files changed, 3 insertions(+), 89 deletions(-) diff --git a/drivers/media/i2c/alvium-csi2.c b/drivers/media/i2c/alvium-csi2.c index 05b708bd0a64..1f088acecf36 100644 --- a/drivers/media/i2c/alvium-csi2.c +++ b/drivers/media/i2c/alvium-csi2.c @@ -1841,7 +1841,6 @@ static int alvium_s_stream(struct v4l2_subdev *sd, int enable) } else { alvium_set_stream_mipi(alvium, enable); - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); } diff --git a/drivers/media/i2c/ccs/ccs-core.c b/drivers/media/i2c/ccs/ccs-core.c index 487bcabb4a19..1c889c878abd 100644 --- a/drivers/media/i2c/ccs/ccs-core.c +++ b/drivers/media/i2c/ccs/ccs-core.c @@ -787,10 +787,8 @@ static int ccs_set_ctrl(struct v4l2_ctrl *ctrl) rval = -EINVAL; } - if (pm_status > 0) { - pm_runtime_mark_last_busy(&client->dev); + if (pm_status > 0) pm_runtime_put_autosuspend(&client->dev); - } return rval; } @@ -1914,7 +1912,6 @@ static int ccs_set_stream(struct v4l2_subdev *subdev, int enable) if (!enable) { ccs_stop_streaming(sensor); sensor->streaming = false; - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); return 0; @@ -1929,7 +1926,6 @@ static int ccs_set_stream(struct v4l2_subdev *subdev, int enable) rval = ccs_start_streaming(sensor); if (rval < 0) { sensor->streaming = false; - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); } @@ -2677,7 +2673,6 @@ nvm_show(struct device *dev, struct device_attribute *attr, char *buf) return -ENODEV; } - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); /* diff --git a/drivers/media/i2c/dw9768.c b/drivers/media/i2c/dw9768.c index 3a4d100b9199..d434721ba8ed 100644 --- a/drivers/media/i2c/dw9768.c +++ b/drivers/media/i2c/dw9768.c @@ -374,7 +374,6 @@ static int dw9768_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh) static int dw9768_close(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh) { - pm_runtime_mark_last_busy(sd->dev); pm_runtime_put_autosuspend(sd->dev); return 0; diff --git a/drivers/media/i2c/gc0308.c b/drivers/media/i2c/gc0308.c index 069f42785b3c..cbcda0e18ff1 100644 --- a/drivers/media/i2c/gc0308.c +++ b/drivers/media/i2c/gc0308.c @@ -974,7 +974,6 @@ static int gc0308_s_ctrl(struct v4l2_ctrl *ctrl) if (ret) dev_err(gc0308->dev, "failed to set control: %d\n", ret); - pm_runtime_mark_last_busy(gc0308->dev); pm_runtime_put_autosuspend(gc0308->dev); return ret; @@ -1157,14 +1156,12 @@ static int gc0308_start_stream(struct gc0308 *gc0308) return 0; disable_pm: - pm_runtime_mark_last_busy(gc0308->dev); pm_runtime_put_autosuspend(gc0308->dev); return ret; } static int gc0308_stop_stream(struct gc0308 *gc0308) { - pm_runtime_mark_last_busy(gc0308->dev); pm_runtime_put_autosuspend(gc0308->dev); return 0; } diff --git a/drivers/media/i2c/gc2145.c b/drivers/media/i2c/gc2145.c index ba02161d46e7..559a851669aa 100644 --- a/drivers/media/i2c/gc2145.c +++ b/drivers/media/i2c/gc2145.c @@ -963,7 +963,6 @@ static int gc2145_enable_streams(struct v4l2_subdev *sd, return 0; err_rpm_put: - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); return ret; } @@ -985,7 +984,6 @@ static int gc2145_disable_streams(struct v4l2_subdev *sd, if (ret) dev_err(&client->dev, "%s failed to write regs\n", __func__); - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); return ret; @@ -1193,7 +1191,6 @@ static int gc2145_s_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); return ret; diff --git a/drivers/media/i2c/imx219.c b/drivers/media/i2c/imx219.c index 3b4f68543342..3faf48f34af4 100644 --- a/drivers/media/i2c/imx219.c +++ b/drivers/media/i2c/imx219.c @@ -771,7 +771,6 @@ static int imx219_enable_streams(struct v4l2_subdev *sd, return 0; err_rpm_put: - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); return ret; } @@ -793,7 +792,6 @@ static int imx219_disable_streams(struct v4l2_subdev *sd, __v4l2_ctrl_grab(imx219->vflip, false); __v4l2_ctrl_grab(imx219->hflip, false); - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); return ret; diff --git a/drivers/media/i2c/imx283.c b/drivers/media/i2c/imx283.c index da618c8cbadc..67e8bb432d10 100644 --- a/drivers/media/i2c/imx283.c +++ b/drivers/media/i2c/imx283.c @@ -1143,7 +1143,6 @@ static int imx283_enable_streams(struct v4l2_subdev *sd, return 0; err_rpm_put: - pm_runtime_mark_last_busy(imx283->dev); pm_runtime_put_autosuspend(imx283->dev); return ret; @@ -1163,7 +1162,6 @@ static int imx283_disable_streams(struct v4l2_subdev *sd, if (ret) dev_err(imx283->dev, "Failed to stop stream\n"); - pm_runtime_mark_last_busy(imx283->dev); pm_runtime_put_autosuspend(imx283->dev); return ret; @@ -1558,7 +1556,6 @@ static int imx283_probe(struct i2c_client *client) * Decrease the PM usage count. The device will get suspended after the * autosuspend delay, turning the power off. */ - pm_runtime_mark_last_busy(imx283->dev); pm_runtime_put_autosuspend(imx283->dev); return 0; diff --git a/drivers/media/i2c/imx290.c b/drivers/media/i2c/imx290.c index 4f3f386c5353..ec172556612e 100644 --- a/drivers/media/i2c/imx290.c +++ b/drivers/media/i2c/imx290.c @@ -869,7 +869,6 @@ static int imx290_set_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(imx290->dev); pm_runtime_put_autosuspend(imx290->dev); return ret; @@ -1099,7 +1098,6 @@ static int imx290_set_stream(struct v4l2_subdev *sd, int enable) } } else { imx290_stop_streaming(imx290); - pm_runtime_mark_last_busy(imx290->dev); pm_runtime_put_autosuspend(imx290->dev); } @@ -1294,7 +1292,6 @@ static int imx290_subdev_init(struct imx290 *imx290) * will already be prevented even before the delay. */ v4l2_i2c_subdev_init(&imx290->sd, client, &imx290_subdev_ops); - pm_runtime_mark_last_busy(imx290->dev); pm_runtime_put_autosuspend(imx290->dev); imx290->sd.internal_ops = &imx290_internal_ops; diff --git a/drivers/media/i2c/imx296.c b/drivers/media/i2c/imx296.c index f3bec16b527c..61116f4e3f76 100644 --- a/drivers/media/i2c/imx296.c +++ b/drivers/media/i2c/imx296.c @@ -604,7 +604,6 @@ static int imx296_s_stream(struct v4l2_subdev *sd, int enable) if (!enable) { ret = imx296_stream_off(sensor); - pm_runtime_mark_last_busy(sensor->dev); pm_runtime_put_autosuspend(sensor->dev); goto unlock; diff --git a/drivers/media/i2c/imx415.c b/drivers/media/i2c/imx415.c index 278e743646ea..276bf4d6f39d 100644 --- a/drivers/media/i2c/imx415.c +++ b/drivers/media/i2c/imx415.c @@ -952,7 +952,6 @@ static int imx415_s_stream(struct v4l2_subdev *sd, int enable) if (!enable) { ret = imx415_stream_off(sensor); - pm_runtime_mark_last_busy(sensor->dev); pm_runtime_put_autosuspend(sensor->dev); goto unlock; diff --git a/drivers/media/i2c/mt9m114.c b/drivers/media/i2c/mt9m114.c index 3f540ca40f3c..aa3fd6c6c76c 100644 --- a/drivers/media/i2c/mt9m114.c +++ b/drivers/media/i2c/mt9m114.c @@ -974,7 +974,6 @@ static int mt9m114_start_streaming(struct mt9m114 *sensor, return 0; error: - pm_runtime_mark_last_busy(&sensor->client->dev); pm_runtime_put_autosuspend(&sensor->client->dev); return ret; @@ -988,7 +987,6 @@ static int mt9m114_stop_streaming(struct mt9m114 *sensor) ret = mt9m114_set_state(sensor, MT9M114_SYS_STATE_ENTER_SUSPEND); - pm_runtime_mark_last_busy(&sensor->client->dev); pm_runtime_put_autosuspend(&sensor->client->dev); return ret; @@ -1046,7 +1044,6 @@ static int mt9m114_pa_g_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(&sensor->client->dev); pm_runtime_put_autosuspend(&sensor->client->dev); return ret; @@ -1113,7 +1110,6 @@ static int mt9m114_pa_s_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(&sensor->client->dev); pm_runtime_put_autosuspend(&sensor->client->dev); return ret; @@ -1565,7 +1561,6 @@ static int mt9m114_ifp_s_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(&sensor->client->dev); pm_runtime_put_autosuspend(&sensor->client->dev); return ret; @@ -2472,7 +2467,6 @@ static int mt9m114_probe(struct i2c_client *client) * Decrease the PM usage count. The device will get suspended after the * autosuspend delay, turning the power off. */ - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/media/i2c/ov4689.c b/drivers/media/i2c/ov4689.c index 1c3a449f9354..7d740ad3926f 100644 --- a/drivers/media/i2c/ov4689.c +++ b/drivers/media/i2c/ov4689.c @@ -497,7 +497,6 @@ static int ov4689_s_stream(struct v4l2_subdev *sd, int on) } else { cci_write(ov4689->regmap, OV4689_REG_CTRL_MODE, OV4689_MODE_SW_STANDBY, NULL); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); } @@ -702,7 +701,6 @@ static int ov4689_set_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; @@ -999,7 +997,6 @@ static int ov4689_probe(struct i2c_client *client) goto err_clean_subdev_pm; } - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/media/i2c/ov5640.c b/drivers/media/i2c/ov5640.c index 0dae0438aa80..84198613381d 100644 --- a/drivers/media/i2c/ov5640.c +++ b/drivers/media/i2c/ov5640.c @@ -3341,7 +3341,6 @@ static int ov5640_g_volatile_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(&sensor->i2c_client->dev); pm_runtime_put_autosuspend(&sensor->i2c_client->dev); return 0; @@ -3417,7 +3416,6 @@ static int ov5640_s_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(&sensor->i2c_client->dev); pm_runtime_put_autosuspend(&sensor->i2c_client->dev); return ret; @@ -3754,7 +3752,6 @@ static int ov5640_s_stream(struct v4l2_subdev *sd, int enable) mutex_unlock(&sensor->lock); if (!enable || ret) { - pm_runtime_mark_last_busy(&sensor->i2c_client->dev); pm_runtime_put_autosuspend(&sensor->i2c_client->dev); } @@ -3965,7 +3962,6 @@ static int ov5640_probe(struct i2c_client *client) pm_runtime_set_autosuspend_delay(dev, 1000); pm_runtime_use_autosuspend(dev); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/media/i2c/ov5645.c b/drivers/media/i2c/ov5645.c index 004d0ee5c3f5..58c846a44376 100644 --- a/drivers/media/i2c/ov5645.c +++ b/drivers/media/i2c/ov5645.c @@ -808,7 +808,6 @@ static int ov5645_s_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(ov5645->dev); pm_runtime_put_autosuspend(ov5645->dev); return ret; @@ -979,7 +978,6 @@ static int ov5645_disable_streams(struct v4l2_subdev *sd, OV5645_SYSTEM_CTRL0_STOP); rpm_put: - pm_runtime_mark_last_busy(ov5645->dev); pm_runtime_put_autosuspend(ov5645->dev); return ret; @@ -1196,7 +1194,6 @@ static int ov5645_probe(struct i2c_client *client) pm_runtime_set_autosuspend_delay(dev, 1000); pm_runtime_use_autosuspend(dev); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/media/i2c/ov64a40.c b/drivers/media/i2c/ov64a40.c index a5da4fe47e0b..2031cbd05c26 100644 --- a/drivers/media/i2c/ov64a40.c +++ b/drivers/media/i2c/ov64a40.c @@ -2990,7 +2990,6 @@ static int ov64a40_start_streaming(struct ov64a40 *ov64a40, return 0; error_power_off: - pm_runtime_mark_last_busy(ov64a40->dev); pm_runtime_put_autosuspend(ov64a40->dev); return ret; @@ -3000,7 +2999,6 @@ static int ov64a40_stop_streaming(struct ov64a40 *ov64a40, struct v4l2_subdev_state *state) { cci_update_bits(ov64a40->cci, OV64A40_REG_SMIA, BIT(0), 0, NULL); - pm_runtime_mark_last_busy(ov64a40->dev); pm_runtime_put_autosuspend(ov64a40->dev); __v4l2_ctrl_grab(ov64a40->link_freq, false); @@ -3329,10 +3327,8 @@ static int ov64a40_set_ctrl(struct v4l2_ctrl *ctrl) break; } - if (pm_status > 0) { - pm_runtime_mark_last_busy(ov64a40->dev); + if (pm_status > 0) pm_runtime_put_autosuspend(ov64a40->dev); - } return ret; } @@ -3622,7 +3618,6 @@ static int ov64a40_probe(struct i2c_client *client) goto error_subdev_cleanup; } - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); return 0; diff --git a/drivers/media/i2c/ov8858.c b/drivers/media/i2c/ov8858.c index 95f9ae794846..6b7193eaea1f 100644 --- a/drivers/media/i2c/ov8858.c +++ b/drivers/media/i2c/ov8858.c @@ -1391,7 +1391,6 @@ static int ov8858_s_stream(struct v4l2_subdev *sd, int on) } } else { ov8858_stop_stream(ov8858); - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); } @@ -1945,7 +1944,6 @@ static int ov8858_probe(struct i2c_client *client) goto err_power_off; } - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/media/i2c/st-mipid02.c b/drivers/media/i2c/st-mipid02.c index f4568e87f018..41ae25b0911f 100644 --- a/drivers/media/i2c/st-mipid02.c +++ b/drivers/media/i2c/st-mipid02.c @@ -465,7 +465,6 @@ static int mipid02_disable_streams(struct v4l2_subdev *sd, if (ret) goto error; - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); error: @@ -542,7 +541,6 @@ static int mipid02_enable_streams(struct v4l2_subdev *sd, cci_write(bridge->regmap, MIPID02_DATA_LANE0_REG1, 0, &ret); cci_write(bridge->regmap, MIPID02_DATA_LANE1_REG1, 0, &ret); - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); return ret; } diff --git a/drivers/media/i2c/tc358746.c b/drivers/media/i2c/tc358746.c index 143aa1359aba..bcfc274cf891 100644 --- a/drivers/media/i2c/tc358746.c +++ b/drivers/media/i2c/tc358746.c @@ -816,7 +816,6 @@ static int tc358746_s_stream(struct v4l2_subdev *sd, int enable) return 0; err_out: - pm_runtime_mark_last_busy(sd->dev); pm_runtime_put_sync_autosuspend(sd->dev); return err; @@ -838,7 +837,6 @@ static int tc358746_s_stream(struct v4l2_subdev *sd, int enable) if (err) return err; - pm_runtime_mark_last_busy(sd->dev); pm_runtime_put_sync_autosuspend(sd->dev); return v4l2_subdev_call(src, video, s_stream, 0); @@ -1016,7 +1014,6 @@ tc358746_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) err = tc358746_read(tc358746, reg->reg, &val); reg->val = val; - pm_runtime_mark_last_busy(sd->dev); pm_runtime_put_sync_autosuspend(sd->dev); return err; @@ -1032,7 +1029,6 @@ tc358746_s_register(struct v4l2_subdev *sd, const struct v4l2_dbg_register *reg) tc358746_write(tc358746, (u32)reg->reg, (u32)reg->val); - pm_runtime_mark_last_busy(sd->dev); pm_runtime_put_sync_autosuspend(sd->dev); return 0; @@ -1395,7 +1391,6 @@ static int tc358746_init_hw(struct tc358746 *tc358746) } err = tc358746_read(tc358746, CHIPID_REG, &val); - pm_runtime_mark_last_busy(dev); pm_runtime_put_sync_autosuspend(dev); if (err) return -ENODEV; diff --git a/drivers/media/i2c/thp7312.c b/drivers/media/i2c/thp7312.c index 8852c56431fe..775cfba188d8 100644 --- a/drivers/media/i2c/thp7312.c +++ b/drivers/media/i2c/thp7312.c @@ -808,7 +808,6 @@ static int thp7312_s_stream(struct v4l2_subdev *sd, int enable) if (!enable) { thp7312_stream_enable(thp7312, false); - pm_runtime_mark_last_busy(thp7312->dev); pm_runtime_put_autosuspend(thp7312->dev); v4l2_subdev_unlock_state(sd_state); @@ -839,7 +838,6 @@ static int thp7312_s_stream(struct v4l2_subdev *sd, int enable) goto finish_unlock; finish_pm: - pm_runtime_mark_last_busy(thp7312->dev); pm_runtime_put_autosuspend(thp7312->dev); finish_unlock: v4l2_subdev_unlock_state(sd_state); @@ -1147,7 +1145,6 @@ static int thp7312_s_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(thp7312->dev); pm_runtime_put_autosuspend(thp7312->dev); return ret; @@ -2183,7 +2180,6 @@ static int thp7312_probe(struct i2c_client *client) * Decrease the PM usage count. The device will get suspended after the * autosuspend delay, turning the power off. */ - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); dev_info(dev, "THP7312 firmware version %02u.%02u\n", diff --git a/drivers/media/i2c/vd55g1.c b/drivers/media/i2c/vd55g1.c index c0754fd03b1d..7c39183dd44b 100644 --- a/drivers/media/i2c/vd55g1.c +++ b/drivers/media/i2c/vd55g1.c @@ -1104,7 +1104,6 @@ static int vd55g1_disable_streams(struct v4l2_subdev *sd, vd55g1_grab_ctrls(sensor, false); - pm_runtime_mark_last_busy(sensor->dev); pm_runtime_put_autosuspend(sensor->dev); return ret; @@ -1338,7 +1337,6 @@ static int vd55g1_g_volatile_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(sensor->dev); pm_runtime_put_autosuspend(sensor->dev); return ret; @@ -1433,7 +1431,6 @@ static int vd55g1_s_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(sensor->dev); pm_runtime_put_autosuspend(sensor->dev); return ret; @@ -1895,7 +1892,6 @@ static int vd55g1_probe(struct i2c_client *client) pm_runtime_enable(dev); pm_runtime_set_autosuspend_delay(dev, 4000); pm_runtime_use_autosuspend(dev); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); ret = vd55g1_subdev_init(sensor); diff --git a/drivers/media/i2c/vd56g3.c b/drivers/media/i2c/vd56g3.c index 5d951ad0b478..d66e21ba4498 100644 --- a/drivers/media/i2c/vd56g3.c +++ b/drivers/media/i2c/vd56g3.c @@ -493,7 +493,6 @@ static int vd56g3_g_volatile_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(sensor->dev); pm_runtime_put_autosuspend(sensor->dev); return ret; @@ -577,7 +576,6 @@ static int vd56g3_s_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(sensor->dev); pm_runtime_put_autosuspend(sensor->dev); return ret; @@ -1021,7 +1019,6 @@ static int vd56g3_disable_streams(struct v4l2_subdev *sd, __v4l2_ctrl_grab(sensor->vflip_ctrl, false); __v4l2_ctrl_grab(sensor->patgen_ctrl, false); - pm_runtime_mark_last_busy(sensor->dev); pm_runtime_put_autosuspend(sensor->dev); return ret; @@ -1527,7 +1524,6 @@ static int vd56g3_probe(struct i2c_client *client) } /* Sensor could now be powered off (after the autosuspend delay) */ - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); dev_dbg(dev, "Successfully probe %s sensor\n", diff --git a/drivers/media/i2c/video-i2c.c b/drivers/media/i2c/video-i2c.c index 0dd991d70d53..1eee2d4f5b40 100644 --- a/drivers/media/i2c/video-i2c.c +++ b/drivers/media/i2c/video-i2c.c @@ -288,7 +288,6 @@ static int amg88xx_read(struct device *dev, enum hwmon_sensor_types type, return tmp; tmp = regmap_bulk_read(data->regmap, AMG88XX_REG_TTHL, &buf, 2); - pm_runtime_mark_last_busy(regmap_get_device(data->regmap)); pm_runtime_put_autosuspend(regmap_get_device(data->regmap)); if (tmp) return tmp; @@ -527,7 +526,6 @@ static int start_streaming(struct vb2_queue *vq, unsigned int count) return 0; error_rpm_put: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); error_del_list: video_i2c_del_list(vq, VB2_BUF_STATE_QUEUED); @@ -544,7 +542,6 @@ static void stop_streaming(struct vb2_queue *vq) kthread_stop(data->kthread_vid_cap); data->kthread_vid_cap = NULL; - pm_runtime_mark_last_busy(regmap_get_device(data->regmap)); pm_runtime_put_autosuspend(regmap_get_device(data->regmap)); video_i2c_del_list(vq, VB2_BUF_STATE_ERROR); @@ -853,7 +850,6 @@ static int video_i2c_probe(struct i2c_client *client) if (ret < 0) goto error_pm_disable; - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); return 0; diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c b/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c index fd71f0c43ac3..a9ce032cc5a2 100644 --- a/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c +++ b/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c @@ -451,7 +451,6 @@ static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst) if (q_status.report_queue_count == 0 && (q_status.instance_queue_count == 0 || dec_info.sequence_changed)) { dev_dbg(inst->dev->dev, "%s: finishing job.\n", __func__); - pm_runtime_mark_last_busy(inst->dev->dev); pm_runtime_put_autosuspend(inst->dev->dev); v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); } @@ -1364,7 +1363,6 @@ static int wave5_vpu_dec_start_streaming(struct vb2_queue *q, unsigned int count } } - pm_runtime_mark_last_busy(inst->dev->dev); pm_runtime_put_autosuspend(inst->dev->dev); return ret; @@ -1498,7 +1496,6 @@ static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q) else streamoff_capture(q); - pm_runtime_mark_last_busy(inst->dev->dev); pm_runtime_put_autosuspend(inst->dev->dev); } @@ -1662,7 +1659,6 @@ static void wave5_vpu_dec_device_run(void *priv) finish_job_and_return: dev_dbg(inst->dev->dev, "%s: leave and finish job", __func__); - pm_runtime_mark_last_busy(inst->dev->dev); pm_runtime_put_autosuspend(inst->dev->dev); v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); } diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c b/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c index 1e5fc5f8b856..35913a7de834 100644 --- a/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c +++ b/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c @@ -1391,12 +1391,10 @@ static int wave5_vpu_enc_start_streaming(struct vb2_queue *q, unsigned int count if (ret) goto return_buffers; - pm_runtime_mark_last_busy(inst->dev->dev); pm_runtime_put_autosuspend(inst->dev->dev); return 0; return_buffers: wave5_return_bufs(q, VB2_BUF_STATE_QUEUED); - pm_runtime_mark_last_busy(inst->dev->dev); pm_runtime_put_autosuspend(inst->dev->dev); return ret; } @@ -1465,7 +1463,6 @@ static void wave5_vpu_enc_stop_streaming(struct vb2_queue *q) else streamoff_capture(inst, q); - pm_runtime_mark_last_busy(inst->dev->dev); pm_runtime_put_autosuspend(inst->dev->dev); } @@ -1520,7 +1517,6 @@ static void wave5_vpu_enc_device_run(void *priv) break; } dev_dbg(inst->dev->dev, "%s: leave with active job", __func__); - pm_runtime_mark_last_busy(inst->dev->dev); pm_runtime_put_autosuspend(inst->dev->dev); return; default: @@ -1529,7 +1525,6 @@ static void wave5_vpu_enc_device_run(void *priv) break; } dev_dbg(inst->dev->dev, "%s: leave and finish job", __func__); - pm_runtime_mark_last_busy(inst->dev->dev); pm_runtime_put_autosuspend(inst->dev->dev); v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); } diff --git a/drivers/media/platform/nvidia/tegra-vde/h264.c b/drivers/media/platform/nvidia/tegra-vde/h264.c index 0e56a4331b0d..45f8f6904867 100644 --- a/drivers/media/platform/nvidia/tegra-vde/h264.c +++ b/drivers/media/platform/nvidia/tegra-vde/h264.c @@ -585,7 +585,6 @@ static int tegra_vde_decode_begin(struct tegra_vde *vde, return 0; put_runtime_pm: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); unlock: @@ -612,7 +611,6 @@ static void tegra_vde_decode_abort(struct tegra_vde *vde) if (err) dev_err(dev, "DEC end: Failed to assert HW reset: %d\n", err); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); mutex_unlock(&vde->lock); diff --git a/drivers/media/platform/qcom/iris/iris_hfi_queue.c b/drivers/media/platform/qcom/iris/iris_hfi_queue.c index 221dcd09e1e1..b3ed06297953 100644 --- a/drivers/media/platform/qcom/iris/iris_hfi_queue.c +++ b/drivers/media/platform/qcom/iris/iris_hfi_queue.c @@ -142,7 +142,6 @@ int iris_hfi_queue_cmd_write(struct iris_core *core, void *pkt, u32 pkt_size) } mutex_unlock(&core->lock); - pm_runtime_mark_last_busy(core->dev); pm_runtime_put_autosuspend(core->dev); return 0; diff --git a/drivers/media/platform/raspberrypi/pisp_be/pisp_be.c b/drivers/media/platform/raspberrypi/pisp_be/pisp_be.c index b30891718d8d..d60d92d2ffa1 100644 --- a/drivers/media/platform/raspberrypi/pisp_be/pisp_be.c +++ b/drivers/media/platform/raspberrypi/pisp_be/pisp_be.c @@ -950,7 +950,6 @@ static void pispbe_node_stop_streaming(struct vb2_queue *q) kfree(job); } - pm_runtime_mark_last_busy(pispbe->dev); pm_runtime_put_autosuspend(pispbe->dev); dev_dbg(pispbe->dev, "Nodes streaming now 0x%x\n", @@ -1742,7 +1741,6 @@ static int pispbe_probe(struct platform_device *pdev) if (ret) goto disable_devs_err; - pm_runtime_mark_last_busy(pispbe->dev); pm_runtime_put_autosuspend(pispbe->dev); return 0; diff --git a/drivers/media/platform/verisilicon/hantro_drv.c b/drivers/media/platform/verisilicon/hantro_drv.c index 8542238e0fb1..fa972effd4a2 100644 --- a/drivers/media/platform/verisilicon/hantro_drv.c +++ b/drivers/media/platform/verisilicon/hantro_drv.c @@ -89,7 +89,6 @@ static void hantro_job_finish(struct hantro_dev *vpu, struct hantro_ctx *ctx, enum vb2_buffer_state result) { - pm_runtime_mark_last_busy(vpu->dev); pm_runtime_put_autosuspend(vpu->dev); clk_bulk_disable(vpu->variant->num_clocks, vpu->clocks); diff --git a/drivers/media/rc/gpio-ir-recv.c b/drivers/media/rc/gpio-ir-recv.c index bf6d8fa983bf..a6418ef782bc 100644 --- a/drivers/media/rc/gpio-ir-recv.c +++ b/drivers/media/rc/gpio-ir-recv.c @@ -48,10 +48,8 @@ static irqreturn_t gpio_ir_recv_irq(int irq, void *dev_id) if (val >= 0) ir_raw_event_store_edge(gpio_dev->rcdev, val == 1); - if (pmdev) { - pm_runtime_mark_last_busy(pmdev); + if (pmdev) pm_runtime_put_autosuspend(pmdev); - } return IRQ_HANDLED; } From bd7c2312128e31d056d30d34d60503de056e15f0 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Thu, 14 Aug 2025 15:07:13 +0200 Subject: [PATCH 2065/2411] pinctrl: meson: Fix typo in device table macro The typo when using the MODULE_DEVICE_TABLE macro was not noticeable because the macro was defined only if the module was built as a separate module. Cc: Xianwei Zhao Cc: Linus Walleij Cc: Neil Armstrong Cc: Kevin Hilman Cc: linux-amlogic@lists.infradead.org Cc: linux-gpio@vger.kernel.org Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507220009.8HKbNP16-lkp@intel.com/ Signed-off-by: Alexey Gladkov Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/e548b7761302defec15aa2098172eabb1ce1ad4a.1755170493.git.legion@kernel.org Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-amlogic-a4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c index e34e984c2b38..6132710aff68 100644 --- a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c +++ b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c @@ -1093,7 +1093,7 @@ static const struct of_device_id aml_pctl_of_match[] = { { .compatible = "amlogic,pinctrl-s6", .data = &s6_priv_data, }, { /* sentinel */ } }; -MODULE_DEVICE_TABLE(of, aml_pctl_dt_match); +MODULE_DEVICE_TABLE(of, aml_pctl_of_match); static struct platform_driver aml_pctl_driver = { .driver = { From c0ed3c2edc7692c6b8af7578b41012694dc8c671 Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Sat, 16 Aug 2025 12:27:41 +0800 Subject: [PATCH 2066/2411] ALSA: hda/tas2781: Add name prefix tas2781 for tas2781's dvc_tlv and amp_vol_tlv With some new devices adding into the driver, dvc_tlv and amp_vol_tlv will cause confusion for customers on which devices they support. Fixes: 5be27f1e3ec9 ("ALSA: hda/tas2781: Add tas2781 HDA driver") Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20250816042741.1659-1-shenghao-ding@ti.com Signed-off-by: Takashi Iwai --- include/sound/tas2781-tlv.h | 6 +++--- sound/hda/codecs/side-codecs/tas2781_hda_i2c.c | 2 +- sound/hda/codecs/side-codecs/tas2781_hda_spi.c | 6 ++++-- sound/soc/codecs/tas2781-i2c.c | 4 ++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/sound/tas2781-tlv.h b/include/sound/tas2781-tlv.h index ef9b9f19d212..273224df9282 100644 --- a/include/sound/tas2781-tlv.h +++ b/include/sound/tas2781-tlv.h @@ -2,7 +2,7 @@ // // ALSA SoC Texas Instruments TAS2781 Audio Smart Amplifier // -// Copyright (C) 2022 - 2024 Texas Instruments Incorporated +// Copyright (C) 2022 - 2025 Texas Instruments Incorporated // https://www.ti.com // // The TAS2781 driver implements a flexible and configurable @@ -15,7 +15,7 @@ #ifndef __TAS2781_TLV_H__ #define __TAS2781_TLV_H__ -static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 50, 0); -static const __maybe_unused DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2781_dvc_tlv, -10000, 50, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2781_amp_tlv, 1100, 50, 0); #endif diff --git a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c index 06c7bc2b9e9d..b91fff3fde97 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c @@ -267,7 +267,7 @@ static const struct snd_kcontrol_new tas2770_snd_controls[] = { static const struct snd_kcontrol_new tas2781_snd_controls[] = { ACARD_SINGLE_RANGE_EXT_TLV("Speaker Analog Volume", TAS2781_AMP_LEVEL, 1, 0, 20, 0, tas2781_amp_getvol, - tas2781_amp_putvol, amp_vol_tlv), + tas2781_amp_putvol, tas2781_amp_tlv), ACARD_SINGLE_BOOL_EXT("Speaker Force Firmware Load", 0, tas2781_force_fwload_get, tas2781_force_fwload_put), }; diff --git a/sound/hda/codecs/side-codecs/tas2781_hda_spi.c b/sound/hda/codecs/side-codecs/tas2781_hda_spi.c index 09a5d0f131b2..b9a55672bf15 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda_spi.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda_spi.c @@ -494,9 +494,11 @@ static int tas2781_force_fwload_put(struct snd_kcontrol *kcontrol, static struct snd_kcontrol_new tas2781_snd_ctls[] = { ACARD_SINGLE_RANGE_EXT_TLV(NULL, TAS2781_AMP_LEVEL, 1, 0, 20, 0, - tas2781_amp_getvol, tas2781_amp_putvol, amp_vol_tlv), + tas2781_amp_getvol, tas2781_amp_putvol, + tas2781_amp_tlv), ACARD_SINGLE_RANGE_EXT_TLV(NULL, TAS2781_DVC_LVL, 0, 0, 200, 1, - tas2781_digital_getvol, tas2781_digital_putvol, dvc_tlv), + tas2781_digital_getvol, tas2781_digital_putvol, + tas2781_dvc_tlv), ACARD_SINGLE_BOOL_EXT(NULL, 0, tas2781_force_fwload_get, tas2781_force_fwload_put), }; diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index 676130f4cf3e..0e09d794516f 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -910,10 +910,10 @@ static const struct snd_kcontrol_new tasdevice_cali_controls[] = { static const struct snd_kcontrol_new tas2781_snd_controls[] = { SOC_SINGLE_RANGE_EXT_TLV("Speaker Analog Volume", TAS2781_AMP_LEVEL, 1, 0, 20, 0, tas2781_amp_getvol, - tas2781_amp_putvol, amp_vol_tlv), + tas2781_amp_putvol, tas2781_amp_tlv), SOC_SINGLE_RANGE_EXT_TLV("Speaker Digital Volume", TAS2781_DVC_LVL, 0, 0, 200, 1, tas2781_digital_getvol, - tas2781_digital_putvol, dvc_tlv), + tas2781_digital_putvol, tas2781_dvc_tlv), }; static const struct snd_kcontrol_new tas2781_cali_controls[] = { From dec8b38be4b35cae5f7fa086daf2631e2cfa09c1 Mon Sep 17 00:00:00 2001 From: Victor Shih Date: Thu, 31 Jul 2025 14:57:50 +0800 Subject: [PATCH 2067/2411] mmc: sdhci-pci-gli: Add a new function to simplify the code In preparation to fix replay timer timeout, add sdhci_gli_mask_replay_timer_timeout() function to simplify some of the code, allowing it to be re-used. Signed-off-by: Victor Shih Fixes: 1ae1d2d6e555 ("mmc: sdhci-pci-gli: Add Genesys Logic GL9763E support") Cc: stable@vger.kernel.org Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20250731065752.450231-2-victorshihgli@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-gli.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index 4c2ae71770f7..f678c91f8d3e 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -287,6 +287,20 @@ #define GLI_MAX_TUNING_LOOP 40 /* Genesys Logic chipset */ +static void sdhci_gli_mask_replay_timer_timeout(struct pci_dev *pdev) +{ + int aer; + u32 value; + + /* mask the replay timer timeout of AER */ + aer = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); + if (aer) { + pci_read_config_dword(pdev, aer + PCI_ERR_COR_MASK, &value); + value |= PCI_ERR_COR_REP_TIMER; + pci_write_config_dword(pdev, aer + PCI_ERR_COR_MASK, value); + } +} + static inline void gl9750_wt_on(struct sdhci_host *host) { u32 wt_value; @@ -607,7 +621,6 @@ static void gl9750_hw_setting(struct sdhci_host *host) { struct sdhci_pci_slot *slot = sdhci_priv(host); struct pci_dev *pdev; - int aer; u32 value; pdev = slot->chip->pdev; @@ -626,12 +639,7 @@ static void gl9750_hw_setting(struct sdhci_host *host) pci_set_power_state(pdev, PCI_D0); /* mask the replay timer timeout of AER */ - aer = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); - if (aer) { - pci_read_config_dword(pdev, aer + PCI_ERR_COR_MASK, &value); - value |= PCI_ERR_COR_REP_TIMER; - pci_write_config_dword(pdev, aer + PCI_ERR_COR_MASK, value); - } + sdhci_gli_mask_replay_timer_timeout(pdev); gl9750_wt_off(host); } @@ -806,7 +814,6 @@ static void sdhci_gl9755_set_clock(struct sdhci_host *host, unsigned int clock) static void gl9755_hw_setting(struct sdhci_pci_slot *slot) { struct pci_dev *pdev = slot->chip->pdev; - int aer; u32 value; gl9755_wt_on(pdev); @@ -841,12 +848,7 @@ static void gl9755_hw_setting(struct sdhci_pci_slot *slot) pci_set_power_state(pdev, PCI_D0); /* mask the replay timer timeout of AER */ - aer = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); - if (aer) { - pci_read_config_dword(pdev, aer + PCI_ERR_COR_MASK, &value); - value |= PCI_ERR_COR_REP_TIMER; - pci_write_config_dword(pdev, aer + PCI_ERR_COR_MASK, value); - } + sdhci_gli_mask_replay_timer_timeout(pdev); gl9755_wt_off(pdev); } From 293ed0f5f34e1e9df888456af4b0a021f57b5f54 Mon Sep 17 00:00:00 2001 From: Victor Shih Date: Thu, 31 Jul 2025 14:57:51 +0800 Subject: [PATCH 2068/2411] mmc: sdhci-pci-gli: GL9763e: Rename the gli_set_gl9763e() for consistency In preparation to fix replay timer timeout, rename the gli_set_gl9763e() to gl9763e_hw_setting() for consistency. Signed-off-by: Victor Shih Fixes: 1ae1d2d6e555 ("mmc: sdhci-pci-gli: Add Genesys Logic GL9763E support") Cc: stable@vger.kernel.org Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20250731065752.450231-3-victorshihgli@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-gli.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index f678c91f8d3e..436f0460222f 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -1753,7 +1753,7 @@ static int gl9763e_add_host(struct sdhci_pci_slot *slot) return ret; } -static void gli_set_gl9763e(struct sdhci_pci_slot *slot) +static void gl9763e_hw_setting(struct sdhci_pci_slot *slot) { struct pci_dev *pdev = slot->chip->pdev; u32 value; @@ -1925,7 +1925,7 @@ static int gli_probe_slot_gl9763e(struct sdhci_pci_slot *slot) gli_pcie_enable_msi(slot); host->mmc_host_ops.hs400_enhanced_strobe = gl9763e_hs400_enhanced_strobe; - gli_set_gl9763e(slot); + gl9763e_hw_setting(slot); sdhci_enable_v4_mode(host); return 0; From 340be332e420ed37d15d4169a1b4174e912ad6cb Mon Sep 17 00:00:00 2001 From: Victor Shih Date: Thu, 31 Jul 2025 14:57:52 +0800 Subject: [PATCH 2069/2411] mmc: sdhci-pci-gli: GL9763e: Mask the replay timer timeout of AER Due to a flaw in the hardware design, the GL9763e replay timer frequently times out when ASPM is enabled. As a result, the warning messages will often appear in the system log when the system accesses the GL9763e PCI config. Therefore, the replay timer timeout must be masked. Signed-off-by: Victor Shih Fixes: 1ae1d2d6e555 ("mmc: sdhci-pci-gli: Add Genesys Logic GL9763E support") Cc: stable@vger.kernel.org Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20250731065752.450231-4-victorshihgli@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-gli.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index 436f0460222f..3a1de477e9af 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -1782,6 +1782,9 @@ static void gl9763e_hw_setting(struct sdhci_pci_slot *slot) value |= FIELD_PREP(GLI_9763E_HS400_RXDLY, GLI_9763E_HS400_RXDLY_5); pci_write_config_dword(pdev, PCIE_GLI_9763E_CLKRXDLY, value); + /* mask the replay timer timeout of AER */ + sdhci_gli_mask_replay_timer_timeout(pdev); + pci_read_config_dword(pdev, PCIE_GLI_9763E_VHS, &value); value &= ~GLI_9763E_VHS_REV; value |= FIELD_PREP(GLI_9763E_VHS_REV, GLI_9763E_VHS_REV_R); From e251709aaddb3ee1e8ac1ed5e361a608a1cc92de Mon Sep 17 00:00:00 2001 From: Sai Krishna Potthuri Date: Wed, 30 Jul 2025 11:35:43 +0530 Subject: [PATCH 2070/2411] mmc: sdhci-of-arasan: Ensure CD logic stabilization before power-up During SD suspend/resume without a full card rescan (when using non-removable SD cards for rootfs), the SD card initialization may fail after resume. This occurs because, after a host controller reset, the card detect logic may take time to stabilize due to debounce logic. Without waiting for stabilization, the host may attempt powering up the card prematurely, leading to command timeouts during resume flow. Add sdhci_arasan_set_power_and_bus_voltage() to wait for the card detect stable bit before power up the card. Since the stabilization time is not fixed, a maximum timeout of one second is used to ensure sufficient wait time for the card detect signal to stabilize. Signed-off-by: Sai Krishna Potthuri Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250730060543.1735971-1-sai.krishna.potthuri@amd.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-arasan.c | 33 ++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index 42878474e56e..60dbc815e501 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -99,6 +99,9 @@ #define HIWORD_UPDATE(val, mask, shift) \ ((val) << (shift) | (mask) << ((shift) + 16)) +#define CD_STABLE_TIMEOUT_US 1000000 +#define CD_STABLE_MAX_SLEEP_US 10 + /** * struct sdhci_arasan_soc_ctl_field - Field used in sdhci_arasan_soc_ctl_map * @@ -206,12 +209,15 @@ struct sdhci_arasan_data { * 19MHz instead */ #define SDHCI_ARASAN_QUIRK_CLOCK_25_BROKEN BIT(2) +/* Enable CD stable check before power-up */ +#define SDHCI_ARASAN_QUIRK_ENSURE_CD_STABLE BIT(3) }; struct sdhci_arasan_of_data { const struct sdhci_arasan_soc_ctl_map *soc_ctl_map; const struct sdhci_pltfm_data *pdata; const struct sdhci_arasan_clk_ops *clk_ops; + u32 quirks; }; static const struct sdhci_arasan_soc_ctl_map rk3399_soc_ctl_map = { @@ -514,6 +520,24 @@ static int sdhci_arasan_voltage_switch(struct mmc_host *mmc, return -EINVAL; } +static void sdhci_arasan_set_power_and_bus_voltage(struct sdhci_host *host, unsigned char mode, + unsigned short vdd) +{ + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host); + u32 reg; + + /* + * Ensure that the card detect logic has stabilized before powering up, this is + * necessary after a host controller reset. + */ + if (mode == MMC_POWER_UP && sdhci_arasan->quirks & SDHCI_ARASAN_QUIRK_ENSURE_CD_STABLE) + read_poll_timeout(sdhci_readl, reg, reg & SDHCI_CD_STABLE, CD_STABLE_MAX_SLEEP_US, + CD_STABLE_TIMEOUT_US, false, host, SDHCI_PRESENT_STATE); + + sdhci_set_power_and_bus_voltage(host, mode, vdd); +} + static const struct sdhci_ops sdhci_arasan_ops = { .set_clock = sdhci_arasan_set_clock, .get_max_clock = sdhci_pltfm_clk_get_max_clock, @@ -521,7 +545,7 @@ static const struct sdhci_ops sdhci_arasan_ops = { .set_bus_width = sdhci_set_bus_width, .reset = sdhci_arasan_reset, .set_uhs_signaling = sdhci_set_uhs_signaling, - .set_power = sdhci_set_power_and_bus_voltage, + .set_power = sdhci_arasan_set_power_and_bus_voltage, .hw_reset = sdhci_arasan_hw_reset, }; @@ -570,7 +594,7 @@ static const struct sdhci_ops sdhci_arasan_cqe_ops = { .set_bus_width = sdhci_set_bus_width, .reset = sdhci_arasan_reset, .set_uhs_signaling = sdhci_set_uhs_signaling, - .set_power = sdhci_set_power_and_bus_voltage, + .set_power = sdhci_arasan_set_power_and_bus_voltage, .irq = sdhci_arasan_cqhci_irq, }; @@ -1447,6 +1471,7 @@ static const struct sdhci_arasan_clk_ops zynqmp_clk_ops = { static struct sdhci_arasan_of_data sdhci_arasan_zynqmp_data = { .pdata = &sdhci_arasan_zynqmp_pdata, .clk_ops = &zynqmp_clk_ops, + .quirks = SDHCI_ARASAN_QUIRK_ENSURE_CD_STABLE, }; static const struct sdhci_arasan_clk_ops versal_clk_ops = { @@ -1457,6 +1482,7 @@ static const struct sdhci_arasan_clk_ops versal_clk_ops = { static struct sdhci_arasan_of_data sdhci_arasan_versal_data = { .pdata = &sdhci_arasan_zynqmp_pdata, .clk_ops = &versal_clk_ops, + .quirks = SDHCI_ARASAN_QUIRK_ENSURE_CD_STABLE, }; static const struct sdhci_arasan_clk_ops versal_net_clk_ops = { @@ -1467,6 +1493,7 @@ static const struct sdhci_arasan_clk_ops versal_net_clk_ops = { static struct sdhci_arasan_of_data sdhci_arasan_versal_net_data = { .pdata = &sdhci_arasan_versal_net_pdata, .clk_ops = &versal_net_clk_ops, + .quirks = SDHCI_ARASAN_QUIRK_ENSURE_CD_STABLE, }; static struct sdhci_arasan_of_data intel_keembay_emmc_data = { @@ -1937,6 +1964,8 @@ static int sdhci_arasan_probe(struct platform_device *pdev) if (of_device_is_compatible(np, "rockchip,rk3399-sdhci-5.1")) sdhci_arasan_update_clockmultiplier(host, 0x0); + sdhci_arasan->quirks |= data->quirks; + if (of_device_is_compatible(np, "intel,keembay-sdhci-5.1-emmc") || of_device_is_compatible(np, "intel,keembay-sdhci-5.1-sd") || of_device_is_compatible(np, "intel,keembay-sdhci-5.1-sdio")) { From 99d7ab8db9d8230b243f5ed20ba0229e54cc0dfa Mon Sep 17 00:00:00 2001 From: Jiayi Li Date: Mon, 4 Aug 2025 09:36:04 +0800 Subject: [PATCH 2071/2411] memstick: Fix deadlock by moving removing flag earlier The existing memstick core patch: commit 62c59a8786e6 ("memstick: Skip allocating card when removing host") sets host->removing in memstick_remove_host(),but still exists a critical time window where memstick_check can run after host->eject is set but before removing is set. In the rtsx_usb_ms driver, the problematic sequence is: rtsx_usb_ms_drv_remove: memstick_check: host->eject = true cancel_work_sync(handle_req) if(!host->removing) ... memstick_alloc_card() memstick_set_rw_addr() memstick_new_req() rtsx_usb_ms_request() if(!host->eject) skip schedule_work wait_for_completion() memstick_remove_host: [blocks indefinitely] host->removing = true flush_workqueue() [block] 1. rtsx_usb_ms_drv_remove sets host->eject = true 2. cancel_work_sync(&host->handle_req) runs 3. memstick_check work may be executed here <-- danger window 4. memstick_remove_host sets removing = 1 During this window (step 3), memstick_check calls memstick_alloc_card, which may indefinitely waiting for mrq_complete completion that will never occur because rtsx_usb_ms_request sees eject=true and skips scheduling work, memstick_set_rw_addr waits forever for completion. This causes a deadlock when memstick_remove_host tries to flush_workqueue, waiting for memstick_check to complete, while memstick_check is blocked waiting for mrq_complete completion. Fix this by setting removing=true at the start of rtsx_usb_ms_drv_remove, before any work cancellation. This ensures memstick_check will see the removing flag immediately and exit early, avoiding the deadlock. Fixes: 62c59a8786e6 ("memstick: Skip allocating card when removing host") Signed-off-by: Jiayi Li Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250804013604.1311218-1-lijiayi@kylinos.cn Signed-off-by: Ulf Hansson --- drivers/memstick/core/memstick.c | 1 - drivers/memstick/host/rtsx_usb_ms.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index 7f3f47db4c98..e4275f8ee5db 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -555,7 +555,6 @@ EXPORT_SYMBOL(memstick_add_host); */ void memstick_remove_host(struct memstick_host *host) { - host->removing = 1; flush_workqueue(workqueue); mutex_lock(&host->lock); if (host->card) diff --git a/drivers/memstick/host/rtsx_usb_ms.c b/drivers/memstick/host/rtsx_usb_ms.c index 3878136227e4..5b5e9354fb2e 100644 --- a/drivers/memstick/host/rtsx_usb_ms.c +++ b/drivers/memstick/host/rtsx_usb_ms.c @@ -812,6 +812,7 @@ static void rtsx_usb_ms_drv_remove(struct platform_device *pdev) int err; host->eject = true; + msh->removing = true; cancel_work_sync(&host->handle_req); cancel_delayed_work_sync(&host->poll_card); From d8df126349dad855cdfedd6bbf315bad2e901c2f Mon Sep 17 00:00:00 2001 From: Tianxiang Peng Date: Mon, 23 Jun 2025 17:31:53 +0800 Subject: [PATCH 2072/2411] x86/cpu/hygon: Add missing resctrl_cpu_detect() in bsp_init helper Since 923f3a2b48bd ("x86/resctrl: Query LLC monitoring properties once during boot") resctrl_cpu_detect() has been moved from common CPU initialization code to the vendor-specific BSP init helper, while Hygon didn't put that call in their code. This triggers a division by zero fault during early booting stage on our machines with X86_FEATURE_CQM* supported, where get_rdt_mon_resources() tries to calculate mon_l3_config with uninitialized boot_cpu_data.x86_cache_occ_scale. Add the missing resctrl_cpu_detect() in the Hygon BSP init helper. [ bp: Massage commit message. ] Fixes: 923f3a2b48bd ("x86/resctrl: Query LLC monitoring properties once during boot") Signed-off-by: Tianxiang Peng Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Hui Li Cc: Link: https://lore.kernel.org/20250623093153.3016937-1-txpeng@tencent.com --- arch/x86/kernel/cpu/hygon.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 2154f12766fb..1fda6c3a2b65 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "cpu.h" @@ -117,6 +118,8 @@ static void bsp_init_hygon(struct cpuinfo_x86 *c) x86_amd_ls_cfg_ssbd_mask = 1ULL << 10; } } + + resctrl_cpu_detect(c); } static void early_init_hygon(struct cpuinfo_x86 *c) From 89f0addeee3cb2dc49837599330ed9c4612f05b0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 18 Aug 2025 12:59:45 +0300 Subject: [PATCH 2073/2411] ALSA: usb-audio: Fix size validation in convert_chmap_v3() The "p" pointer is void so sizeof(*p) is 1. The intent was to check sizeof(*cs_desc), which is 3, instead. Fixes: ecfd41166b72 ("ALSA: usb-audio: Validate UAC3 cluster segment descriptors") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aKL5kftC1qGt6lpv@stanley.mountain Signed-off-by: Takashi Iwai --- sound/usb/stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/stream.c b/sound/usb/stream.c index acf3dc2d79e0..5c235a5ba7e1 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -349,7 +349,7 @@ snd_pcm_chmap_elem *convert_chmap_v3(struct uac3_cluster_header_descriptor u16 cs_len; u8 cs_type; - if (len < sizeof(*p)) + if (len < sizeof(*cs_desc)) break; cs_len = le16_to_cpu(cs_desc->wLength); if (len < cs_len) From 5f1c8965e748c150d580a2ea8fbee1bd80d07a24 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 4 Aug 2025 22:11:28 +1000 Subject: [PATCH 2074/2411] ovl: use I_MUTEX_PARENT when locking parent in ovl_create_temp() ovl_create_temp() treats "workdir" as a parent in which it creates an object so it should use I_MUTEX_PARENT. Prior to the commit identified below the lock was taken by the caller which sometimes used I_MUTEX_PARENT and sometimes used I_MUTEX_NORMAL. The use of I_MUTEX_NORMAL was incorrect but unfortunately copied into ovl_create_temp(). Note to backporters: This patch only applies after the last Fixes given below (post v6.16). To fix the bug in v6.7 and later the inode_lock() call in ovl_copy_up_workdir() needs to nest using I_MUTEX_PARENT. Link: https://lore.kernel.org/all/67a72070.050a0220.3d72c.0022.GAE@google.com/ Cc: stable@vger.kernel.org Reported-by: syzbot+7836a68852a10ec3d790@syzkaller.appspotmail.com Tested-by: syzbot+7836a68852a10ec3d790@syzkaller.appspotmail.com Fixes: c63e56a4a652 ("ovl: do not open/llseek lower file with upper sb_writers held") Fixes: d2c995581c7c ("ovl: Call ovl_create_temp() without lock held.") Signed-off-by: NeilBrown Signed-off-by: Amir Goldstein --- fs/overlayfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 70b8687dc45e..dbd63a74df4b 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -225,7 +225,7 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, struct ovl_cattr *attr) { struct dentry *ret; - inode_lock(workdir->d_inode); + inode_lock_nested(workdir->d_inode, I_MUTEX_PARENT); ret = ovl_create_real(ofs, workdir, ovl_lookup_temp(ofs, workdir), attr); inode_unlock(workdir->d_inode); From e8bd877fb76bb9f35253e8f41ce0c772269934dd Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 18 Aug 2025 11:23:55 +0200 Subject: [PATCH 2075/2411] ovl: fix possible double unlink commit 9d23967b18c6 ("ovl: simplify an error path in ovl_copy_up_workdir()") introduced the helper ovl_cleanup_unlocked(), which is later used in several following patches to re-acquire the parent inode lock and unlink a dentry that was earlier found using lookup. This helper was eventually renamed to ovl_cleanup(). The helper ovl_parent_lock() is used to re-acquire the parent inode lock. After acquiring the parent inode lock, the helper verifies that the dentry has not since been moved to another parent, but it failed to verify that the dentry wasn't unlinked from the parent. This means that now every call to ovl_cleanup() could potentially race with another thread, unlinking the dentry to be cleaned up underneath overlayfs and trigger a vfs assertion. Reported-by: syzbot+ec9fab8b7f0386b98a17@syzkaller.appspotmail.com Tested-by: syzbot+ec9fab8b7f0386b98a17@syzkaller.appspotmail.com Fixes: 9d23967b18c6 ("ovl: simplify an error path in ovl_copy_up_workdir()") Suggested-by: NeilBrown Signed-off-by: Amir Goldstein --- fs/overlayfs/util.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index a33115e7384c..41033bac96cb 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -1552,7 +1552,8 @@ void ovl_copyattr(struct inode *inode) int ovl_parent_lock(struct dentry *parent, struct dentry *child) { inode_lock_nested(parent->d_inode, I_MUTEX_PARENT); - if (!child || child->d_parent == parent) + if (!child || + (!d_unhashed(child) && child->d_parent == parent)) return 0; inode_unlock(parent->d_inode); From 0227af355b50c526bf83ca52d67aef5d102e9b07 Mon Sep 17 00:00:00 2001 From: Akhilesh Patil Date: Sun, 17 Aug 2025 15:06:05 +0530 Subject: [PATCH 2076/2411] selftests: ublk: Use ARRAY_SIZE() macro to improve code Use ARRAY_SIZE() macro while calculating size of an array to improve code readability and reduce potential sizing errors. Implement this suggestion given by spatch tool by running coccinelle script - scripts/coccinelle/misc/array_size.cocci Follow ARRAY_SIZE() macro usage pattern in ublk.c introduced by, commit ec120093180b9 ("selftests: ublk: fix ublk_find_tgt()") wherever appropriate to maintain consistency. Signed-off-by: Akhilesh Patil Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/aKGihYui6/Pcijbk@bhairav-test.ee.iitb.ac.in Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 95188065b2e9..6512dfbdbce3 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -1400,7 +1400,7 @@ static int cmd_dev_get_features(void) if (!((1ULL << i) & features)) continue; - if (i < sizeof(feat_map) / sizeof(feat_map[0])) + if (i < ARRAY_SIZE(feat_map)) feat = feat_map[i]; else feat = "unknown"; @@ -1477,7 +1477,7 @@ static void __cmd_create_help(char *exe, bool recovery) printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); printf("\tdefault: nthreads=nr_queues"); - for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) { + for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) { const struct ublk_tgt_ops *ops = tgt_ops_list[i]; if (ops->usage) From 447be50598c05499f7ccc2b1f6ddb3da30f8099a Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 15 Aug 2025 12:52:09 +0800 Subject: [PATCH 2077/2411] regulator: pca9450: Use devm_register_sys_off_handler With module test, there is error dump: ------------[ cut here ]------------ notifier callback pca9450_i2c_restart_handler already registered WARNING: kernel/notifier.c:23 at notifier_chain_register+0x5c/0x88, CPU#0: kworker/u16:3/50 Call trace: notifier_chain_register+0x5c/0x88 (P) atomic_notifier_chain_register+0x30/0x58 register_restart_handler+0x1c/0x28 pca9450_i2c_probe+0x418/0x538 i2c_device_probe+0x220/0x3d0 really_probe+0x114/0x410 __driver_probe_device+0xa0/0x150 driver_probe_device+0x40/0x114 __device_attach_driver+0xd4/0x12c So use devm_register_sys_off_handler to let kernel handle the resource free to avoid kernel dump. Fixes: 6157e62b07d9 ("regulator: pca9450: Add restart handler") Signed-off-by: Peng Fan Link: https://patch.msgid.link/20250815-pca9450-v1-1-7748e362dc97@nxp.com Signed-off-by: Mark Brown --- drivers/regulator/pca9450-regulator.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/regulator/pca9450-regulator.c b/drivers/regulator/pca9450-regulator.c index feadb21a8f30..4be270f4d6c3 100644 --- a/drivers/regulator/pca9450-regulator.c +++ b/drivers/regulator/pca9450-regulator.c @@ -40,7 +40,6 @@ struct pca9450 { struct device *dev; struct regmap *regmap; struct gpio_desc *sd_vsel_gpio; - struct notifier_block restart_nb; enum pca9450_chip_type type; unsigned int rcnt; int irq; @@ -1100,10 +1099,9 @@ static irqreturn_t pca9450_irq_handler(int irq, void *data) return IRQ_HANDLED; } -static int pca9450_i2c_restart_handler(struct notifier_block *nb, - unsigned long action, void *data) +static int pca9450_i2c_restart_handler(struct sys_off_data *data) { - struct pca9450 *pca9450 = container_of(nb, struct pca9450, restart_nb); + struct pca9450 *pca9450 = data->cb_data; struct i2c_client *i2c = container_of(pca9450->dev, struct i2c_client, dev); dev_dbg(&i2c->dev, "Restarting device..\n"); @@ -1261,10 +1259,9 @@ static int pca9450_i2c_probe(struct i2c_client *i2c) pca9450->sd_vsel_fixed_low = of_property_read_bool(ldo5->dev.of_node, "nxp,sd-vsel-fixed-low"); - pca9450->restart_nb.notifier_call = pca9450_i2c_restart_handler; - pca9450->restart_nb.priority = PCA9450_RESTART_HANDLER_PRIORITY; - - if (register_restart_handler(&pca9450->restart_nb)) + if (devm_register_sys_off_handler(&i2c->dev, SYS_OFF_MODE_RESTART, + PCA9450_RESTART_HANDLER_PRIORITY, + pca9450_i2c_restart_handler, pca9450)) dev_warn(&i2c->dev, "Failed to register restart handler\n"); dev_info(&i2c->dev, "%s probed.\n", From 8ea815399c3fcce1889bd951fec25b5b9a3979c1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 14 Apr 2025 16:41:07 +0200 Subject: [PATCH 2078/2411] compiler: remove __ADDRESSABLE_ASM{_STR,}() again __ADDRESSABLE_ASM_STR() is where the necessary stringification happens. As long as "sym" doesn't contain any odd characters, no quoting is required for its use with .quad / .long. In fact the quotation gets in the way with gas 2.25; it's only from 2.26 onwards that quoted symbols are half-way properly supported. However, assembly being different from C anyway, drop __ADDRESSABLE_ASM_STR() and its helper macro altogether. A simple .global directive will suffice to get the symbol "declared", i.e. into the symbol table. While there also stop open-coding STATIC_CALL_TRAMP() and STATIC_CALL_KEY(). Fixes: 0ef8047b737d ("x86/static-call: provide a way to do very early static-call updates") Signed-off-by: Jan Beulich Acked-by: Josh Poimboeuf Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Message-ID: <609d2c74-de13-4fae-ab1a-1ec44afb948d@suse.com> --- arch/x86/include/asm/xen/hypercall.h | 5 +++-- include/linux/compiler.h | 8 -------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 59a62c3780a2..a16d4631547c 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -94,12 +94,13 @@ DECLARE_STATIC_CALL(xen_hypercall, xen_hypercall_func); #ifdef MODULE #define __ADDRESSABLE_xen_hypercall #else -#define __ADDRESSABLE_xen_hypercall __ADDRESSABLE_ASM_STR(__SCK__xen_hypercall) +#define __ADDRESSABLE_xen_hypercall \ + __stringify(.global STATIC_CALL_KEY(xen_hypercall);) #endif #define __HYPERCALL \ __ADDRESSABLE_xen_hypercall \ - "call __SCT__xen_hypercall" + __stringify(call STATIC_CALL_TRAMP(xen_hypercall)) #define __HYPERCALL_ENTRY(x) "a" (x) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 6f04a1d8c720..64ff73c533e5 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -288,14 +288,6 @@ static inline void *offset_to_ptr(const int *off) #define __ADDRESSABLE(sym) \ ___ADDRESSABLE(sym, __section(".discard.addressable")) -#define __ADDRESSABLE_ASM(sym) \ - .pushsection .discard.addressable,"aw"; \ - .align ARCH_SEL(8,4); \ - ARCH_SEL(.quad, .long) __stringify(sym); \ - .popsection; - -#define __ADDRESSABLE_ASM_STR(sym) __stringify(__ADDRESSABLE_ASM(sym)) - /* * This returns a constant expression while determining if an argument is * a constant expression, most importantly without evaluating the argument. From 8fe8a092043f28d3c8e467cb2bbfe1e1ccf7f996 Mon Sep 17 00:00:00 2001 From: Akhilesh Patil Date: Sun, 10 Aug 2025 19:04:01 +0530 Subject: [PATCH 2079/2411] iommufd: viommu: free memory allocated by kvcalloc() using kvfree() Use kvfree() instead of kfree() to free pages allocated by kvcalloc() in iommufs_hw_queue_alloc_phys() to fix potential memory corruption. Ensure the memory is properly freed, as kvcalloc may internally use vmalloc or kmalloc depending on available memory in the system. Fixes: 2238ddc2b056 ("iommufd/viommu: Add IOMMUFD_CMD_HW_QUEUE_ALLOC ioctl") Link: https://patch.msgid.link/r/aJifyVV2PL6WGEs6@bhairav-test.ee.iitb.ac.in Signed-off-by: Akhilesh Patil Reviewed-by: Kevin Tian Reviewed-by: Nicolin Chen Reviewed-by: Pranjal Shrivastava Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/viommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c index 2ca5809b238b..462b457ffd0c 100644 --- a/drivers/iommu/iommufd/viommu.c +++ b/drivers/iommu/iommufd/viommu.c @@ -339,7 +339,7 @@ iommufd_hw_queue_alloc_phys(struct iommu_hw_queue_alloc *cmd, } *base_pa = (page_to_pfn(pages[0]) << PAGE_SHIFT) + offset; - kfree(pages); + kvfree(pages); return access; out_unpin: @@ -349,7 +349,7 @@ iommufd_hw_queue_alloc_phys(struct iommu_hw_queue_alloc *cmd, out_destroy: iommufd_access_destroy_internal(viommu->ictx, access); out_free: - kfree(pages); + kvfree(pages); return ERR_PTR(rc); } From 447c6141e8ea68ef4e56c55144fd18f43e6c8dca Mon Sep 17 00:00:00 2001 From: Alessandro Ratti Date: Fri, 15 Aug 2025 17:37:26 +0200 Subject: [PATCH 2080/2411] iommufd: Fix spelling errors in iommufd.rst This patch corrects two minor spelling issues found in Documentation/userspace-api/iommufd.rst: - "primarly" -> "primarily" - "sharable" -> "shareable" Found using codespell(1). Link: https://patch.msgid.link/r/20250815153840.188213-2-alessandro@0x65c.net Signed-off-by: Alessandro Ratti Signed-off-by: Jason Gunthorpe --- Documentation/userspace-api/iommufd.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/userspace-api/iommufd.rst b/Documentation/userspace-api/iommufd.rst index 03f7510384d2..f1c4d21e5c5e 100644 --- a/Documentation/userspace-api/iommufd.rst +++ b/Documentation/userspace-api/iommufd.rst @@ -43,7 +43,7 @@ Following IOMMUFD objects are exposed to userspace: - IOMMUFD_OBJ_HWPT_PAGING, representing an actual hardware I/O page table (i.e. a single struct iommu_domain) managed by the iommu driver. "PAGING" - primarly indicates this type of HWPT should be linked to an IOAS. It also + primarily indicates this type of HWPT should be linked to an IOAS. It also indicates that it is backed by an iommu_domain with __IOMMU_DOMAIN_PAGING feature flag. This can be either an UNMANAGED stage-1 domain for a device running in the user space, or a nesting parent stage-2 domain for mappings @@ -76,7 +76,7 @@ Following IOMMUFD objects are exposed to userspace: * Security namespace for guest owned ID, e.g. guest-controlled cache tags * Non-device-affiliated event reporting, e.g. invalidation queue errors - * Access to a sharable nesting parent pagetable across physical IOMMUs + * Access to a shareable nesting parent pagetable across physical IOMMUs * Virtualization of various platforms IDs, e.g. RIDs and others * Delivery of paravirtualized invalidation * Direct assigned invalidation queues From e9576e078220c50ace9e9087355423de23e25fa5 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 21 Jul 2025 18:11:54 +0000 Subject: [PATCH 2081/2411] x86/CPU/AMD: Ignore invalid reset reason value The reset reason value may be "all bits set", e.g. 0xFFFFFFFF. This is a commonly used error response from hardware. This may occur due to a real hardware issue or when running in a VM. The user will see all reset reasons reported in this case. Check for an error response value and return early to avoid decoding invalid data. Also, adjust the data variable type to match the hardware register size. Fixes: ab8131028710 ("x86/CPU/AMD: Print the reason for the last reset") Reported-by: Libing He Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Mario Limonciello Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250721181155.3536023-1-yazen.ghannam@amd.com --- arch/x86/kernel/cpu/amd.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a5ece6ebe8a7..a6f88ca1a6b4 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1326,8 +1326,8 @@ static const char * const s5_reset_reason_txt[] = { static __init int print_s5_reset_status_mmio(void) { - unsigned long value; void __iomem *addr; + u32 value; int i; if (!cpu_feature_enabled(X86_FEATURE_ZEN)) @@ -1340,12 +1340,16 @@ static __init int print_s5_reset_status_mmio(void) value = ioread32(addr); iounmap(addr); + /* Value with "all bits set" is an error response and should be ignored. */ + if (value == U32_MAX) + return 0; + for (i = 0; i < ARRAY_SIZE(s5_reset_reason_txt); i++) { if (!(value & BIT(i))) continue; if (s5_reset_reason_txt[i]) { - pr_info("x86/amd: Previous system reset reason [0x%08lx]: %s\n", + pr_info("x86/amd: Previous system reset reason [0x%08x]: %s\n", value, s5_reset_reason_txt[i]); } } From 1ba9fbe40337e448b32e2831a7051191d61f0382 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 11 Aug 2025 09:44:42 +0200 Subject: [PATCH 2082/2411] drm/msm: Don't use %pK through printk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the past %pK was preferable to %p as it would not leak raw pointer values into the kernel log. Since commit ad67b74d2469 ("printk: hash addresses printed with %p") the regular %p has been improved to avoid this issue. Furthermore, restricted pointers ("%pK") were never meant to be used through printk(). They can still unintentionally leak raw pointers or acquire sleeping locks in atomic contexts. Switch to the regular pointer formatting which is safer and easier to reason about. Signed-off-by: Thomas Weißschuh Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/667895/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c | 2 +- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c | 4 ++-- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 4 ++-- drivers/gpu/drm/msm/msm_mdss.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index d4b545448d74..94912b4708fb 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -596,7 +596,7 @@ static void _dpu_crtc_complete_flip(struct drm_crtc *crtc) spin_lock_irqsave(&dev->event_lock, flags); if (dpu_crtc->event) { - DRM_DEBUG_VBL("%s: send event: %pK\n", dpu_crtc->name, + DRM_DEBUG_VBL("%s: send event: %p\n", dpu_crtc->name, dpu_crtc->event); trace_dpu_crtc_complete_flip(DRMID(crtc)); drm_crtc_send_vblank_event(crtc, dpu_crtc->event); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c index 11fb1bc54fa9..54b20faa0b69 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c @@ -31,14 +31,14 @@ static void dpu_setup_dspp_pcc(struct dpu_hw_dspp *ctx, u32 base; if (!ctx) { - DRM_ERROR("invalid ctx %pK\n", ctx); + DRM_ERROR("invalid ctx %p\n", ctx); return; } base = ctx->cap->sblk->pcc.base; if (!base) { - DRM_ERROR("invalid ctx %pK pcc base 0x%x\n", ctx, base); + DRM_ERROR("invalid ctx %p pcc base 0x%x\n", ctx, base); return; } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 12dcb32b4724..a306077647c3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -1345,7 +1345,7 @@ static int dpu_kms_mmap_mdp5(struct dpu_kms *dpu_kms) dpu_kms->mmio = NULL; return ret; } - DRM_DEBUG("mapped dpu address space @%pK\n", dpu_kms->mmio); + DRM_DEBUG("mapped dpu address space @%p\n", dpu_kms->mmio); dpu_kms->vbif[VBIF_RT] = msm_ioremap_mdss(mdss_dev, dpu_kms->pdev, @@ -1380,7 +1380,7 @@ static int dpu_kms_mmap_dpu(struct dpu_kms *dpu_kms) dpu_kms->mmio = NULL; return ret; } - DRM_DEBUG("mapped dpu address space @%pK\n", dpu_kms->mmio); + DRM_DEBUG("mapped dpu address space @%p\n", dpu_kms->mmio); dpu_kms->vbif[VBIF_RT] = msm_ioremap(pdev, "vbif"); if (IS_ERR(dpu_kms->vbif[VBIF_RT])) { diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 1f5fe7811e01..39885b333910 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -423,7 +423,7 @@ static struct msm_mdss *msm_mdss_init(struct platform_device *pdev, bool is_mdp5 if (IS_ERR(msm_mdss->mmio)) return ERR_CAST(msm_mdss->mmio); - dev_dbg(&pdev->dev, "mapped mdss address space @%pK\n", msm_mdss->mmio); + dev_dbg(&pdev->dev, "mapped mdss address space @%p\n", msm_mdss->mmio); ret = msm_mdss_parse_data_bus_icc_path(&pdev->dev, msm_mdss); if (ret) From 4876b391654142dcf31ac6da619ace357b6b902d Mon Sep 17 00:00:00 2001 From: Antonino Maniscalco Date: Wed, 13 Aug 2025 15:04:44 +0200 Subject: [PATCH 2083/2411] drm/msm: skip re-emitting IBs for unusable VMs When a VM is marked as an usuable we disallow new submissions from it, however submissions that where already scheduled on the ring would still be re-sent. Since this can lead to further hangs, avoid emitting the actual IBs. Fixes: 6a4d287a1ae6 ("drm/msm: Mark VM as unusable on GPU hangs") Signed-off-by: Antonino Maniscalco Reviewed-by: Akhil P Oommen Patchwork: https://patchwork.freedesktop.org/patch/668314/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gpu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 416d47185ef0..26c5ce897cbb 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -558,8 +558,15 @@ static void recover_worker(struct kthread_work *work) unsigned long flags; spin_lock_irqsave(&ring->submit_lock, flags); - list_for_each_entry(submit, &ring->submits, node) + list_for_each_entry(submit, &ring->submits, node) { + /* + * If the submit uses an unusable vm make sure + * we don't actually run it + */ + if (to_msm_vm(submit->vm)->unusable) + submit->nr_cmds = 0; gpu->funcs->submit(gpu, submit); + } spin_unlock_irqrestore(&ring->submit_lock, flags); } } From 6c705851499172c0ce863e816946fb5a564ff69f Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 14 Aug 2025 09:17:06 -0700 Subject: [PATCH 2084/2411] ACPI: APEI: EINJ: Check if user asked for EINJV2 injection On an EINJV2 capable system, users may still use the old injection interface but einj_get_parameter_address() takes the EINJV2 path to map the parameter structure. This results in the address the user supplied being stored to the wrong location and the BIOS injecting based on an uninitialized field (0x0 in the reported case). Check the version of the request when mapping the EINJ parameter structure in BIOS reserved memory. Fixes: 691a0f0a557b ("ACPI: APEI: EINJ: Discover EINJv2 parameters") Reported-by: Lai, Yi1 Signed-off-by: Tony Luck Reviewed-by: Zaid Alali Reviewed-by: Hanjun Guo Link: https://patch.msgid.link/20250814161706.4489-1-tony.luck@intel.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/einj-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c index bf8dc92a373a..99f1b841fba9 100644 --- a/drivers/acpi/apei/einj-core.c +++ b/drivers/acpi/apei/einj-core.c @@ -315,7 +315,7 @@ static void __iomem *einj_get_parameter_address(void) memcpy_fromio(&v5param, p, v5param_size); acpi5 = 1; check_vendor_extension(pa_v5, &v5param); - if (available_error_type & ACPI65_EINJV2_SUPP) { + if (is_v2 && available_error_type & ACPI65_EINJV2_SUPP) { len = v5param.einjv2_struct.length; offset = offsetof(struct einjv2_extension_struct, component_arr); max_nr_components = (len - offset) / From 7459e87ae1d78ba27b728172fa2aa912a5b8640d Mon Sep 17 00:00:00 2001 From: Charles Han Date: Fri, 15 Aug 2025 10:42:06 +0800 Subject: [PATCH 2085/2411] ACPI: APEI: EINJ: fix potential NULL dereference in __einj_error_inject() The __einj_error_inject() function allocates memory via kmalloc() without checking for allocation failure, which could lead to a NULL pointer dereference. Return -ENOMEM in case allocation fails. Fixes: b47610296d17 ("ACPI: APEI: EINJ: Enable EINJv2 error injections") Signed-off-by: Charles Han Reviewed-by: Tony Luck Reviewed-by: Hanjun Guo Link: https://patch.msgid.link/20250815024207.3038-1-hanchunchao@inspur.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/einj-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c index 99f1b841fba9..b489ae684a1b 100644 --- a/drivers/acpi/apei/einj-core.c +++ b/drivers/acpi/apei/einj-core.c @@ -540,6 +540,9 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, struct set_error_type_with_address *v5param; v5param = kmalloc(v5param_size, GFP_KERNEL); + if (!v5param) + return -ENOMEM; + memcpy_fromio(v5param, einj_param, v5param_size); v5param->type = type; if (type & ACPI5_VENDOR_BIT) { From 61ca3b891b4b9667334c1356a73f28954c92d43a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Aug 2025 06:54:50 +0200 Subject: [PATCH 2086/2411] block: handle pi_tuple_size in queue_limits_stack_integrity queue_limits_stack_integrity needs to handle the new pi_tuple_size field, otherwise stacking PI-capable devices will always fail. Fixes: 76e45252a4ce ("block: introduce pi_tuple_size field in blk_integrity") Signed-off-by: Christoph Hellwig Reviewed-by: Anuj Gupta Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250818045456.1482889-2-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/blk-settings.c b/block/blk-settings.c index 07874e9b609f..491c0c48d52b 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -972,6 +972,8 @@ bool queue_limits_stack_integrity(struct queue_limits *t, goto incompatible; if (ti->csum_type != bi->csum_type) goto incompatible; + if (ti->pi_tuple_size != bi->pi_tuple_size) + goto incompatible; if ((ti->flags & BLK_INTEGRITY_REF_TAG) != (bi->flags & BLK_INTEGRITY_REF_TAG)) goto incompatible; @@ -980,6 +982,7 @@ bool queue_limits_stack_integrity(struct queue_limits *t, ti->flags |= (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) | (bi->flags & BLK_INTEGRITY_REF_TAG); ti->csum_type = bi->csum_type; + ti->pi_tuple_size = bi->pi_tuple_size; ti->metadata_size = bi->metadata_size; ti->pi_offset = bi->pi_offset; ti->interval_exp = bi->interval_exp; From f4ae1744033d54b63c31a3664a4fdf5cebec7f27 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Aug 2025 06:54:51 +0200 Subject: [PATCH 2087/2411] block: remove newlines from the warnings in blk_validate_integrity_limits Otherwise they are very hard to read in the kernel log. Signed-off-by: Christoph Hellwig Reviewed-by: Anuj Gupta Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250818045456.1482889-3-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 491c0c48d52b..d6438e6c276d 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -157,16 +157,14 @@ static int blk_validate_integrity_limits(struct queue_limits *lim) switch (bi->csum_type) { case BLK_INTEGRITY_CSUM_NONE: if (bi->pi_tuple_size) { - pr_warn("pi_tuple_size must be 0 when checksum type \ - is none\n"); + pr_warn("pi_tuple_size must be 0 when checksum type is none\n"); return -EINVAL; } break; case BLK_INTEGRITY_CSUM_CRC: case BLK_INTEGRITY_CSUM_IP: if (bi->pi_tuple_size != sizeof(struct t10_pi_tuple)) { - pr_warn("pi_tuple_size mismatch for T10 PI: expected \ - %zu, got %u\n", + pr_warn("pi_tuple_size mismatch for T10 PI: expected %zu, got %u\n", sizeof(struct t10_pi_tuple), bi->pi_tuple_size); return -EINVAL; @@ -174,8 +172,7 @@ static int blk_validate_integrity_limits(struct queue_limits *lim) break; case BLK_INTEGRITY_CSUM_CRC64: if (bi->pi_tuple_size != sizeof(struct crc64_pi_tuple)) { - pr_warn("pi_tuple_size mismatch for CRC64 PI: \ - expected %zu, got %u\n", + pr_warn("pi_tuple_size mismatch for CRC64 PI: expected %zu, got %u\n", sizeof(struct crc64_pi_tuple), bi->pi_tuple_size); return -EINVAL; From b21d1fbb97c814c76ffa392cd603f8cd3ecc0355 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 14 Aug 2025 07:11:57 +0200 Subject: [PATCH 2088/2411] ACPI: APEI: EINJ: Fix resource leak by remove callback in .exit.text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback is also used during error handling in faux_probe(). As einj_remove() was marked with __exit it's not linked into the kernel if the driver is built-in, potentially resulting in resource leaks. Also remove the comment justifying the __exit annotation which doesn't apply any more since the driver was converted to the faux device interface. Fixes: 6cb9441bfe8d ("ACPI: APEI: EINJ: Transition to the faux device interface") Signed-off-by: Uwe Kleine-König Cc: 6.16+ # 6.16+ Link: https://patch.msgid.link/20250814051157.35867-2-u.kleine-koenig@baylibre.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/einj-core.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c index b489ae684a1b..2561b045acc7 100644 --- a/drivers/acpi/apei/einj-core.c +++ b/drivers/acpi/apei/einj-core.c @@ -1094,7 +1094,7 @@ static int __init einj_probe(struct faux_device *fdev) return rc; } -static void __exit einj_remove(struct faux_device *fdev) +static void einj_remove(struct faux_device *fdev) { struct apei_exec_context ctx; @@ -1117,15 +1117,9 @@ static void __exit einj_remove(struct faux_device *fdev) } static struct faux_device *einj_dev; -/* - * einj_remove() lives in .exit.text. For drivers registered via - * platform_driver_probe() this is ok because they cannot get unbound at - * runtime. So mark the driver struct with __refdata to prevent modpost - * triggering a section mismatch warning. - */ -static struct faux_device_ops einj_device_ops __refdata = { +static struct faux_device_ops einj_device_ops = { .probe = einj_probe, - .remove = __exit_p(einj_remove), + .remove = einj_remove, }; static int __init einj_init(void) From 779b1a1cb13ae17028aeddb2fbbdba97357a1e15 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 13 Aug 2025 12:25:58 +0200 Subject: [PATCH 2089/2411] cpuidle: governors: menu: Avoid selecting states with too much latency Occasionally, the exit latency of the idle state selected by the menu governor may exceed the PM QoS CPU wakeup latency limit. Namely, if the scheduler tick has been stopped already and predicted_ns is greater than the tick period length, the governor may return an idle state whose exit latency exceeds latency_req because that decision is made before checking the current idle state's exit latency. For instance, say that there are 3 idle states, 0, 1, and 2. For idle states 0 and 1, the exit latency is equal to the target residency and the values are 0 and 5 us, respectively. State 2 is deeper and has the exit latency and target residency of 200 us and 2 ms (which is greater than the tick period length), respectively. Say that predicted_ns is equal to TICK_NSEC and the PM QoS latency limit is 20 us. After the first two iterations of the main loop in menu_select(), idx becomes 1 and in the third iteration of it the target residency of the current state (state 2) is greater than predicted_ns. State 2 is not a polling one and predicted_ns is not less than TICK_NSEC, so the check on whether or not the tick has been stopped is done. Say that the tick has been stopped already and there are no imminent timers (that is, delta_tick is greater than the target residency of state 2). In that case, idx becomes 2 and it is returned immediately, but the exit latency of state 2 exceeds the latency limit. Address this issue by modifying the code to compare the exit latency of the current idle state (idle state i) with the latency limit before comparing its target residency with predicted_ns, which allows one more exit_latency_ns check that becomes redundant to be dropped. However, after the above change, latency_req cannot take the predicted_ns value any more, which takes place after commit 38f83090f515 ("cpuidle: menu: Remove iowait influence"), because it may cause a polling state to be returned prematurely. In the context of the previous example say that predicted_ns is 3000 and the PM QoS latency limit is still 20 us. Additionally, say that idle state 0 is a polling one. Moving the exit_latency_ns check before the target_residency_ns one causes the loop to terminate in the second iteration, before the target_residency_ns check, so idle state 0 will be returned even though previously state 1 would be returned if there were no imminent timers. For this reason, remove the assignment of the predicted_ns value to latency_req from the code. Fixes: 5ef499cd571c ("cpuidle: menu: Handle stopped tick more aggressively") Cc: 4.17+ # 4.17+ Signed-off-by: Rafael J. Wysocki Reviewed-by: Christian Loehle Link: https://patch.msgid.link/5043159.31r3eYUQgx@rafael.j.wysocki --- drivers/cpuidle/governors/menu.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 81306612a5c6..b2e3d0b0a116 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -287,20 +287,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, return 0; } - if (tick_nohz_tick_stopped()) { - /* - * If the tick is already stopped, the cost of possible short - * idle duration misprediction is much higher, because the CPU - * may be stuck in a shallow idle state for a long time as a - * result of it. In that case say we might mispredict and use - * the known time till the closest timer event for the idle - * state selection. - */ - if (predicted_ns < TICK_NSEC) - predicted_ns = data->next_timer_ns; - } else if (latency_req > predicted_ns) { - latency_req = predicted_ns; - } + /* + * If the tick is already stopped, the cost of possible short idle + * duration misprediction is much higher, because the CPU may be stuck + * in a shallow idle state for a long time as a result of it. In that + * case, say we might mispredict and use the known time till the closest + * timer event for the idle state selection. + */ + if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC) + predicted_ns = data->next_timer_ns; /* * Find the idle state with the lowest power while satisfying @@ -316,13 +311,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (idx == -1) idx = i; /* first enabled state */ + if (s->exit_latency_ns > latency_req) + break; + if (s->target_residency_ns > predicted_ns) { /* * Use a physical idle state, not busy polling, unless * a timer is going to trigger soon enough. */ if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && - s->exit_latency_ns <= latency_req && s->target_residency_ns <= data->next_timer_ns) { predicted_ns = s->target_residency_ns; idx = i; @@ -354,8 +351,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, return idx; } - if (s->exit_latency_ns > latency_req) - break; idx = i; } From af24c20c4633a667ac5b5e20cf9d96f6176a0ca3 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Fri, 15 Aug 2025 10:47:29 +0800 Subject: [PATCH 2090/2411] ASoC: codecs: ES9389: Modify the standby configuration Modify the standby configuration Signed-off-by: Zhang Yi Link: https://patch.msgid.link/20250815024729.3051-1-zhangyi@everest-semi.com Signed-off-by: Mark Brown --- sound/soc/codecs/es8389.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/es8389.c b/sound/soc/codecs/es8389.c index ba1763f36f17..6e4c75d288ef 100644 --- a/sound/soc/codecs/es8389.c +++ b/sound/soc/codecs/es8389.c @@ -636,7 +636,7 @@ static int es8389_set_bias_level(struct snd_soc_component *component, regmap_write(es8389->regmap, ES8389_ANA_CTL1, 0x59); regmap_write(es8389->regmap, ES8389_ADC_EN, 0x00); regmap_write(es8389->regmap, ES8389_CLK_OFF1, 0x00); - regmap_write(es8389->regmap, ES8389_RESET, 0x7E); + regmap_write(es8389->regmap, ES8389_RESET, 0x3E); regmap_update_bits(es8389->regmap, ES8389_DAC_INV, 0x80, 0x80); usleep_range(8000, 8500); regmap_update_bits(es8389->regmap, ES8389_DAC_INV, 0x80, 0x00); From 43c0f6456f801181a80b73d95def0e0fd134e1cc Mon Sep 17 00:00:00 2001 From: Salah Triki Date: Mon, 18 Aug 2025 10:27:30 +0100 Subject: [PATCH 2091/2411] iio: pressure: bmp280: Use IS_ERR() in bmp280_common_probe() `devm_gpiod_get_optional()` may return non-NULL error pointer on failure. Check its return value using `IS_ERR()` and propagate the error if necessary. Fixes: df6e71256c84 ("iio: pressure: bmp280: Explicitly mark GPIO optional") Signed-off-by: Salah Triki Reviewed-by: David Lechner Link: https://patch.msgid.link/20250818092740.545379-2-salah.triki@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/bmp280-core.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index 74505c9ec1a0..6cdc8ed53520 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -3213,11 +3213,12 @@ int bmp280_common_probe(struct device *dev, /* Bring chip out of reset if there is an assigned GPIO line */ gpiod = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(gpiod)) + return dev_err_probe(dev, PTR_ERR(gpiod), "failed to get reset GPIO\n"); + /* Deassert the signal */ - if (gpiod) { - dev_info(dev, "release reset\n"); - gpiod_set_value(gpiod, 0); - } + dev_info(dev, "release reset\n"); + gpiod_set_value(gpiod, 0); data->regmap = regmap; From 22ec0faa0eda30acdd6dcb3c29c872629da677bb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 15 Aug 2025 09:41:22 -0700 Subject: [PATCH 2092/2411] perf test: Fix a build error in x86 topdown test There's an environment that caused the following build error. Include "debug.h" (under util directory) to fix it. arch/x86/tests/topdown.c: In function 'event_cb': arch/x86/tests/topdown.c:53:25: error: implicit declaration of function 'pr_debug' [-Werror=implicit-function-declaration] 53 | pr_debug("Broken topdown information for '%s'\n", evsel__name(evsel)); | ^~~~~~~~ cc1: all warnings being treated as errors Link: https://lore.kernel.org/r/20250815164122.289651-1-namhyung@kernel.org Fixes: 5b546de9cc177936 ("perf topdown: Use attribute to see an event is a topdown metic or slots") Reported-by: Naresh Kamboju Signed-off-by: Namhyung Kim --- tools/perf/arch/x86/tests/topdown.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/arch/x86/tests/topdown.c b/tools/perf/arch/x86/tests/topdown.c index 8d0ea7a4bbc1..1eba3b4594ef 100644 --- a/tools/perf/arch/x86/tests/topdown.c +++ b/tools/perf/arch/x86/tests/topdown.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "arch-tests.h" #include "../util/topdown.h" +#include "debug.h" #include "evlist.h" #include "parse-events.h" #include "pmu.h" From bd842ff41543af424c2473dc16c678ac8ba2b43f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 2093/2411] tools headers: Sync KVM headers with the kernel source To pick up the changes in this cset: f55ce5a6cd33211c KVM: arm64: Expose new KVM cap for cacheable PFNMAP 28224ef02b56fcee KVM: TDX: Report supported optional TDVMCALLs in TDX capabilities 4580dbef5ce0f95a KVM: TDX: Exit to userspace for SetupEventNotifyInterrupt 25e8b1dd4883e6c2 KVM: TDX: Exit to userspace for GetTdVmCallInfo cf207eac06f661fb KVM: TDX: Handle TDG.VP.VMCALL This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Please see tools/include/uapi/README for further details. Cc: Paolo Bonzini Cc: kvm@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/arch/x86/include/uapi/asm/kvm.h | 8 +++++++- tools/include/uapi/linux/kvm.h | 27 +++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 6f3499507c5e..0f15d683817d 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -965,7 +965,13 @@ struct kvm_tdx_cmd { struct kvm_tdx_capabilities { __u64 supported_attrs; __u64 supported_xfam; - __u64 reserved[254]; + + __u64 kernel_tdvmcallinfo_1_r11; + __u64 user_tdvmcallinfo_1_r11; + __u64 kernel_tdvmcallinfo_1_r12; + __u64 user_tdvmcallinfo_1_r12; + + __u64 reserved[250]; /* Configurable CPUID bits for userspace */ struct kvm_cpuid2 cpuid; diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 7415a3863891..f0f0d49d2544 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -178,6 +178,7 @@ struct kvm_xen_exit { #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 +#define KVM_EXIT_TDX 40 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -447,6 +448,31 @@ struct kvm_run { __u64 gpa; __u64 size; } memory_fault; + /* KVM_EXIT_TDX */ + struct { + __u64 flags; + __u64 nr; + union { + struct { + __u64 ret; + __u64 data[5]; + } unknown; + struct { + __u64 ret; + __u64 gpa; + __u64 size; + } get_quote; + struct { + __u64 ret; + __u64 leaf; + __u64 r11, r12, r13, r14; + } get_tdvmcall_info; + struct { + __u64 ret; + __u64 vector; + } setup_event_notify; + }; + } tdx; /* Fix the size of the union. */ char padding[256]; }; @@ -935,6 +961,7 @@ struct kvm_enable_cap { #define KVM_CAP_ARM_EL2 240 #define KVM_CAP_ARM_EL2_E2H0 241 #define KVM_CAP_RISCV_MP_STATE_RESET 242 +#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 struct kvm_irq_routing_irqchip { __u32 irqchip; From 8aa5a3b68ad144da49a3d17f165e6561255e3529 Mon Sep 17 00:00:00 2001 From: Rajeev Mishra Date: Mon, 18 Aug 2025 18:48:20 +0000 Subject: [PATCH 2094/2411] loop: Consolidate size calculation logic into lo_calculate_size() Renamed get_size to lo_calculate_size and merged the logic from get_size and get_loop_size into a single function. Update all callers to use lo_calculate_size. This is done in preparation for improving the size detection logic. Signed-off-by: Rajeev Mishra Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20250818184821.115033-2-rajeevm@hpe.com [axboe: massage commit message] Signed-off-by: Jens Axboe --- drivers/block/loop.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1b6ee91f8eb9..0e1b9eb9db10 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -137,20 +137,18 @@ static void loop_global_unlock(struct loop_device *lo, bool global) static int max_part; static int part_shift; -static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) +static loff_t lo_calculate_size(struct loop_device *lo, struct file *file) { loff_t loopsize; - /* Compute loopsize in bytes */ loopsize = i_size_read(file->f_mapping->host); - if (offset > 0) - loopsize -= offset; + if (lo->lo_offset > 0) + loopsize -= lo->lo_offset; /* offset is beyond i_size, weird but possible */ if (loopsize < 0) return 0; - - if (sizelimit > 0 && sizelimit < loopsize) - loopsize = sizelimit; + if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize) + loopsize = lo->lo_sizelimit; /* * Unfortunately, if we want to do I/O on the device, * the number of 512-byte sectors has to fit into a sector_t. @@ -158,11 +156,6 @@ static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) return loopsize >> 9; } -static loff_t get_loop_size(struct loop_device *lo, struct file *file) -{ - return get_size(lo->lo_offset, lo->lo_sizelimit, file); -} - /* * We support direct I/O only if lo_offset is aligned with the logical I/O size * of backing device, and the logical block size of loop is bigger than that of @@ -569,7 +562,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, error = -EINVAL; /* size of the new backing store needs to be the same */ - if (get_loop_size(lo, file) != get_loop_size(lo, old_file)) + if (lo_calculate_size(lo, file) != lo_calculate_size(lo, old_file)) goto out_err; /* @@ -1063,7 +1056,7 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, loop_update_dio(lo); loop_sysfs_init(lo); - size = get_loop_size(lo, file); + size = lo_calculate_size(lo, file); loop_set_size(lo, size); /* Order wrt reading lo_state in loop_validate_file(). */ @@ -1255,8 +1248,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); if (!err && size_changed) { - loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit, - lo->lo_backing_file); + loff_t new_size = lo_calculate_size(lo, lo->lo_backing_file); loop_set_size(lo, new_size); } out_unlock: @@ -1399,7 +1391,7 @@ static int loop_set_capacity(struct loop_device *lo) if (unlikely(lo->lo_state != Lo_bound)) return -ENXIO; - size = get_loop_size(lo, lo->lo_backing_file); + size = lo_calculate_size(lo, lo->lo_backing_file); loop_set_size(lo, size); return 0; From 47b71abd58461a67cae71d2f2a9d44379e4e2fcf Mon Sep 17 00:00:00 2001 From: Rajeev Mishra Date: Mon, 18 Aug 2025 18:48:21 +0000 Subject: [PATCH 2095/2411] loop: use vfs_getattr_nosec for accurate file size Use vfs_getattr_nosec() in lo_calculate_size() for getting the file size, rather than just read the cached inode size via i_size_read(). This provides better results than cached inode data, particularly for network filesystems where metadata may be stale. Signed-off-by: Rajeev Mishra Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20250818184821.115033-3-rajeevm@hpe.com [axboe: massage commit message] Signed-off-by: Jens Axboe --- drivers/block/loop.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 0e1b9eb9db10..57263c273f0f 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -139,9 +139,20 @@ static int part_shift; static loff_t lo_calculate_size(struct loop_device *lo, struct file *file) { + struct kstat stat; loff_t loopsize; - /* Compute loopsize in bytes */ - loopsize = i_size_read(file->f_mapping->host); + int ret; + + /* + * Get the accurate file size. This provides better results than + * cached inode data, particularly for network filesystems where + * metadata may be stale. + */ + ret = vfs_getattr_nosec(&file->f_path, &stat, STATX_SIZE, 0); + if (ret) + return 0; + + loopsize = stat.size; if (lo->lo_offset > 0) loopsize -= lo->lo_offset; /* offset is beyond i_size, weird but possible */ From d0a2b527d8c32e46ccb8a34053468d4ff0c27e5c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Aug 2025 12:11:02 +0200 Subject: [PATCH 2096/2411] block: tone down bio_check_eod bdev_nr_sectors() == 0 is a pattern used for block devices that have been hot removed, don't spam the log about them. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250818101102.1604551-1-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index fdac48aec5ef..4201504158a1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -557,7 +557,7 @@ static inline int bio_check_eod(struct bio *bio) sector_t maxsector = bdev_nr_sectors(bio->bi_bdev); unsigned int nr_sectors = bio_sectors(bio); - if (nr_sectors && + if (nr_sectors && maxsector && (nr_sectors > maxsector || bio->bi_iter.bi_sector > maxsector - nr_sectors)) { pr_info_ratelimited("%s: attempt to access beyond end of device\n" From 6cb8607934d937f4ad24ec9ad26aeb669e266937 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 2097/2411] tools headers: Sync linux/bits.h with the kernel source To pick up the changes in this cset: 104ea1c84b91c9f4 bits: unify the non-asm GENMASK*() 6d4471252ccc1722 bits: split the definition of the asm and non-asm GENMASK*() This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/linux/bits.h include/linux/bits.h Please see tools/include/uapi/README for further details. Cc: Yury Norov Signed-off-by: Namhyung Kim --- tools/include/linux/bits.h | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 7ad056219115..a40cc861b3a7 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -2,10 +2,8 @@ #ifndef __LINUX_BITS_H #define __LINUX_BITS_H -#include #include #include -#include #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) @@ -50,10 +48,14 @@ (type_max(t) << (l) & \ type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h))))) +#define GENMASK(h, l) GENMASK_TYPE(unsigned long, h, l) +#define GENMASK_ULL(h, l) GENMASK_TYPE(unsigned long long, h, l) + #define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l) #define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l) #define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l) #define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l) +#define GENMASK_U128(h, l) GENMASK_TYPE(u128, h, l) /* * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The @@ -79,28 +81,9 @@ * BUILD_BUG_ON_ZERO is not available in h files included from asm files, * disable the input check if that is the case. */ -#define GENMASK_INPUT_CHECK(h, l) 0 +#define GENMASK(h, l) __GENMASK(h, l) +#define GENMASK_ULL(h, l) __GENMASK_ULL(h, l) #endif /* !defined(__ASSEMBLY__) */ -#define GENMASK(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) -#define GENMASK_ULL(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) - -#if !defined(__ASSEMBLY__) -/* - * Missing asm support - * - * __GENMASK_U128() depends on _BIT128() which would not work - * in the asm code, as it shifts an 'unsigned __int128' data - * type instead of direct representation of 128 bit constants - * such as long and unsigned long. The fundamental problem is - * that a 128 bit constant will get silently truncated by the - * gcc compiler. - */ -#define GENMASK_U128(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l)) -#endif - #endif /* __LINUX_BITS_H */ From aa34642f6fc36a436de5ae5b30d414578b3622f5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 2098/2411] tools headers: Sync linux/cfi_types.h with the kernel source To pick up the changes in this cset: 5ccaeedb489b41ce cfi: add C CFI type macro This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/linux/cfi_types.h include/linux/cfi_types.h Please see tools/include/uapi/README for further details. Cc: Mark Rutland Signed-off-by: Namhyung Kim --- tools/include/linux/cfi_types.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h index 6b8713675765..685f7181780f 100644 --- a/tools/include/linux/cfi_types.h +++ b/tools/include/linux/cfi_types.h @@ -41,5 +41,28 @@ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_CFI_CLANG +#define DEFINE_CFI_TYPE(name, func) \ + /* \ + * Force a reference to the function so the compiler generates \ + * __kcfi_typeid_. \ + */ \ + __ADDRESSABLE(func); \ + /* u32 name __ro_after_init = __kcfi_typeid_ */ \ + extern u32 name; \ + asm ( \ + " .pushsection .data..ro_after_init,\"aw\",\%progbits \n" \ + " .type " #name ",\%object \n" \ + " .globl " #name " \n" \ + " .p2align 2, 0x0 \n" \ + #name ": \n" \ + " .4byte __kcfi_typeid_" #func " \n" \ + " .size " #name ", 4 \n" \ + " .popsection \n" \ + ); +#endif + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_CFI_TYPES_H */ From 619f55c859014e2235f83ba6cde8c59edc492f39 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 2099/2411] tools headers: Sync x86 headers with the kernel source To pick up the changes in this cset: 7b306dfa326f7011 x86/sev: Evict cache lines during SNP memory validation 65f55a30176662ee x86/CPU/AMD: Add CPUID faulting support d8010d4ba43e9f79 x86/bugs: Add a Transient Scheduler Attacks mitigation a3c4f3396b82849a x86/msr-index: Add AMD workload classification MSRs 17ec2f965344ee3f KVM: VMX: Allow guest to set DEBUGCTL.RTM_DEBUG if RTM is supported This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Please see tools/include/uapi/README for further details. Cc: x86@kernel.org Signed-off-by: Namhyung Kim --- tools/arch/x86/include/asm/cpufeatures.h | 10 +++++++++- tools/arch/x86/include/asm/msr-index.h | 7 +++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index ee176236c2be..06fc0479a23f 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -218,6 +218,7 @@ #define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */ #define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */ #define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */ +#define X86_FEATURE_COHERENCY_SFW_NO ( 8*32+ 4) /* SNP cache coherency software work around not needed */ #define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */ #define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */ @@ -456,10 +457,14 @@ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ #define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ #define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */ #define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ + #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ +#define X86_FEATURE_GP_ON_USER_CPUID (20*32+17) /* User CPUID faulting */ + #define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */ #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ @@ -487,6 +492,9 @@ #define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ #define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */ #define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */ +#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */ +#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ +#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ /* * BUG word(s) @@ -542,5 +550,5 @@ #define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */ #define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ #define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ - +#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 5cfb5d74dd5f..b65c3ba5fa14 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -419,6 +419,7 @@ #define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12) #define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 #define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) +#define DEBUGCTLMSR_RTM_DEBUG BIT(15) #define MSR_PEBS_FRONTEND 0x000003f7 @@ -733,6 +734,11 @@ #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 +/* AMD Hardware Feedback Support MSRs */ +#define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500 +#define MSR_AMD_WORKLOAD_CLASS_ID 0xc0000501 +#define MSR_AMD_WORKLOAD_HRST 0xc0000502 + /* AMD Last Branch Record MSRs */ #define MSR_AMD64_LBR_SELECT 0xc000010e @@ -831,6 +837,7 @@ #define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) #define MSR_K7_HWCR_IRPERF_EN_BIT 30 #define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) +#define MSR_K7_HWCR_CPUID_USER_DIS_BIT 35 #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 #define MSR_K7_HWCR_CPB_DIS_BIT 25 From 14ec8ce45611c767656e4fa575f17b05344aa80a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 2100/2411] tools headers: Sync arm64 headers with the kernel source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To pick up the changes in this cset: efe676a1a7554219 arm64: proton-pack: Add new CPUs 'k' values for branch mitigation e18c09b204e81702 arm64: Add support for HIP09 Spectre-BHB mitigation a9b5bd81b294d30a arm64: cputype: Add MIDR_CORTEX_A76AE 53a52a0ec7680287 arm64: cputype: Add comments about Qualcomm Kryo 5XX and 6XX cores 401c3333bb2396aa arm64: cputype: Add QCOM_CPU_PART_KRYO_3XX_GOLD 86edf6bdcf0571c0 smccc/kvm_guest: Enable errata based on implementation CPUs 0bc9a9e85fcf4ffb KVM: arm64: Work around x1e's CNTVOFF_EL2 bogosity This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/arch/arm64/include/asm/cputype.h arch/arm64/include/asm/cputype.h But the following two changes cannot be applied since they introduced new build errors in util/arm-spe.c. So it still has the warning after this change. c8c2647e69bedf80 arm64: Make  _midr_in_range_list() an exported function e3121298c7fcaf48 arm64: Modify _midr_range() functions to read MIDR/REVIDR internally Please see tools/include/uapi/README for further details. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Namhyung Kim perf build: [WIP] Fix arm-spe build errors Signed-off-by: Namhyung Kim --- tools/arch/arm64/include/asm/cputype.h | 28 ++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 9a5d85cfd1fb..139d5e87dc95 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -75,11 +75,13 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_CORTEX_A76AE 0xD0E #define ARM_CPU_PART_NEOVERSE_V1 0xD40 #define ARM_CPU_PART_CORTEX_A78 0xD41 #define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 +#define ARM_CPU_PART_CORTEX_X1C 0xD4C #define ARM_CPU_PART_CORTEX_A520 0xD80 #define ARM_CPU_PART_CORTEX_A710 0xD47 #define ARM_CPU_PART_CORTEX_A715 0xD4D @@ -119,9 +121,11 @@ #define QCOM_CPU_PART_KRYO 0x200 #define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800 #define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801 +#define QCOM_CPU_PART_KRYO_3XX_GOLD 0x802 #define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 #define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804 #define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 +#define QCOM_CPU_PART_ORYON_X1 0x001 #define NVIDIA_CPU_PART_DENVER 0x003 #define NVIDIA_CPU_PART_CARMEL 0x004 @@ -129,6 +133,7 @@ #define FUJITSU_CPU_PART_A64FX 0x001 #define HISI_CPU_PART_TSV110 0xD01 +#define HISI_CPU_PART_HIP09 0xD02 #define HISI_CPU_PART_HIP12 0xD06 #define APPLE_CPU_PART_M1_ICESTORM 0x022 @@ -159,11 +164,13 @@ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_CORTEX_A76AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76AE) #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) #define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) #define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) #define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715) @@ -196,13 +203,26 @@ #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) #define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD) #define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER) +#define MIDR_QCOM_KRYO_3XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_GOLD) #define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) #define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD) #define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) +#define MIDR_QCOM_ORYON_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_ORYON_X1) + +/* + * NOTES: + * - Qualcomm Kryo 5XX Prime / Gold ID themselves as MIDR_CORTEX_A77 + * - Qualcomm Kryo 5XX Silver IDs itself as MIDR_QCOM_KRYO_4XX_SILVER + * - Qualcomm Kryo 6XX Prime IDs itself as MIDR_CORTEX_X1 + * - Qualcomm Kryo 6XX Gold IDs itself as ARM_CPU_PART_CORTEX_A78 + * - Qualcomm Kryo 6XX Silver IDs itself as MIDR_CORTEX_A55 + */ + #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) +#define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09) #define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12) #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) @@ -291,6 +311,14 @@ static inline u32 __attribute_const__ read_cpuid_id(void) return read_cpuid(MIDR_EL1); } +struct target_impl_cpu { + u64 midr; + u64 revidr; + u64 aidr; +}; + +bool cpu_errata_set_target_impl(u64 num, void *impl_cpus); + static inline u64 __attribute_const__ read_cpuid_mpidr(void) { return read_cpuid(MPIDR_EL1); From c85538c4e3c7111958057d15ea8ee444116891c3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 2101/2411] tools headers: Sync powerpc headers with the kernel source To pick up the changes in this cset: 69bf2053608423cb powerpc: Drop GPL boilerplate text with obsolete FSF address This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/arch/powerpc/include/uapi/asm/kvm.h arch/powerpc/include/uapi/asm/kvm.h Please see tools/include/uapi/README for further details. Cc: Madhavan Srinivasan Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Namhyung Kim --- tools/arch/powerpc/include/uapi/asm/kvm.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index eaeda001784e..077c5437f521 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -1,18 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * Copyright IBM Corp. 2007 * * Authors: Hollis Blanchard From 52174e0eb13876654f56701c26a672890aa5e7e3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 2102/2411] tools headers: Sync syscall tables with the kernel source To pick up the changes in this cset: be7efb2d20d67f33 fs: introduce file_getattr and file_setattr syscalls This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h diff -u tools/scripts/syscall.tbl scripts/syscall.tbl diff -u tools/perf/arch/x86/entry/syscalls/syscall_32.tbl arch/x86/entry/syscalls/syscall_32.tbl diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl diff -u tools/perf/arch/arm/entry/syscalls/syscall.tbl arch/arm/tools/syscall.tbl diff -u tools/perf/arch/sh/entry/syscalls/syscall.tbl arch/sh/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/sparc/entry/syscalls/syscall.tbl arch/sparc/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/xtensa/entry/syscalls/syscall.tbl arch/xtensa/kernel/syscalls/syscall.tbl Please see tools/include/uapi/README for further details. Cc: Arnd Bergmann CC: linux-api@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/asm-generic/unistd.h | 8 +++++++- tools/perf/arch/arm/entry/syscalls/syscall.tbl | 2 ++ tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl | 2 ++ tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 2 ++ tools/perf/arch/s390/entry/syscalls/syscall.tbl | 2 ++ tools/perf/arch/sh/entry/syscalls/syscall.tbl | 2 ++ tools/perf/arch/sparc/entry/syscalls/syscall.tbl | 2 ++ tools/perf/arch/x86/entry/syscalls/syscall_32.tbl | 2 ++ tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 2 ++ tools/perf/arch/xtensa/entry/syscalls/syscall.tbl | 2 ++ tools/scripts/syscall.tbl | 2 ++ 11 files changed, 27 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 2892a45023af..04e0077fb4c9 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -852,8 +852,14 @@ __SYSCALL(__NR_removexattrat, sys_removexattrat) #define __NR_open_tree_attr 467 __SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) +/* fs/inode.c */ +#define __NR_file_getattr 468 +__SYSCALL(__NR_file_getattr, sys_file_getattr) +#define __NR_file_setattr 469 +__SYSCALL(__NR_file_setattr, sys_file_setattr) + #undef __NR_syscalls -#define __NR_syscalls 468 +#define __NR_syscalls 470 /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/arm/entry/syscalls/syscall.tbl b/tools/perf/arch/arm/entry/syscalls/syscall.tbl index 27c1d5ebcd91..b07e699aaa3c 100644 --- a/tools/perf/arch/arm/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/arm/entry/syscalls/syscall.tbl @@ -482,3 +482,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 1e8c44c7b614..7a7049c2c307 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -382,3 +382,5 @@ 465 n64 listxattrat sys_listxattrat 466 n64 removexattrat sys_removexattrat 467 n64 open_tree_attr sys_open_tree_attr +468 n64 file_getattr sys_file_getattr +469 n64 file_setattr sys_file_setattr diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 9a084bdb8926..b453e80dfc00 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -558,3 +558,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index a4569b96ef06..8a6744d658db 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -470,3 +470,5 @@ 465 common listxattrat sys_listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr sys_file_setattr diff --git a/tools/perf/arch/sh/entry/syscalls/syscall.tbl b/tools/perf/arch/sh/entry/syscalls/syscall.tbl index 52a7652fcff6..5e9c9eff5539 100644 --- a/tools/perf/arch/sh/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sh/entry/syscalls/syscall.tbl @@ -471,3 +471,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl index 83e45eb6c095..ebb7d06d1044 100644 --- a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl @@ -513,3 +513,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl index ac007ea00979..4877e16da69a 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl @@ -473,3 +473,5 @@ 465 i386 listxattrat sys_listxattrat 466 i386 removexattrat sys_removexattrat 467 i386 open_tree_attr sys_open_tree_attr +468 i386 file_getattr sys_file_getattr +469 i386 file_setattr sys_file_setattr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index cfb5ca41e30d..92cf0fe2291e 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -391,6 +391,8 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl index f657a77314f8..374e4cb788d8 100644 --- a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl @@ -438,3 +438,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/scripts/syscall.tbl b/tools/scripts/syscall.tbl index 580b4e246aec..d1ae5e92c615 100644 --- a/tools/scripts/syscall.tbl +++ b/tools/scripts/syscall.tbl @@ -408,3 +408,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr From b18aabe283a10774977d698c075d2296a2336aef Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 2103/2411] tools headers: Sync uapi/linux/fcntl.h with the kernel source To pick up the changes in this cset: 3941e37f62fe2c3c uapi/fcntl: add FD_PIDFS_ROOT cd5d2006327b6d84 uapi/fcntl: add FD_INVALID 67fcec2919e4ed31 fcntl/pidfd: redefine PIDFD_SELF_THREAD_GROUP a4c746f06853f91d uapi/fcntl: mark range as reserved This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/linux/fcntl.h include/uapi/linux/fcntl.h Please see tools/include/uapi/README for further details. Cc: Christian Brauner Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Namhyung Kim --- .../trace/beauty/include/uapi/linux/fcntl.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h index a15ac2fa4b20..f291ab4f94eb 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h @@ -90,10 +90,28 @@ #define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ +/* Reserved kernel ranges [-100], [-10000, -40000]. */ #define AT_FDCWD -100 /* Special value for dirfd used to indicate openat should use the current working directory. */ +/* + * The concept of process and threads in userland and the kernel is a confusing + * one - within the kernel every thread is a 'task' with its own individual PID, + * however from userland's point of view threads are grouped by a single PID, + * which is that of the 'thread group leader', typically the first thread + * spawned. + * + * To cut the Gideon knot, for internal kernel usage, we refer to + * PIDFD_SELF_THREAD to refer to the current thread (or task from a kernel + * perspective), and PIDFD_SELF_THREAD_GROUP to refer to the current thread + * group leader... + */ +#define PIDFD_SELF_THREAD -10000 /* Current thread. */ +#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ + +#define FD_PIDFS_ROOT -10002 /* Root of the pidfs filesystem */ +#define FD_INVALID -10009 /* Invalid file descriptor: -10000 - EBADF = -10009 */ /* Generic flags for the *at(2) family of syscalls. */ From 4a4083af03a7a75a86c392fd60cb37ce23ed87b6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 2104/2411] tools headers: Sync uapi/linux/fs.h with the kernel source To pick up the changes in this cset: 76fdb7eb4e1c9108 uapi: export PROCFS_ROOT_INO ca115d7e754691c0 tree-wide: s/struct fileattr/struct file_kattr/g be7efb2d20d67f33 fs: introduce file_getattr and file_setattr syscalls 9eb22f7fedfc9eb1 fs: add ioctl to query metadata and protection info capabilities This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/linux/fs.h include/uapi/linux/fs.h Please see tools/include/uapi/README for further details. Cc: Christian Brauner Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Namhyung Kim --- .../perf/trace/beauty/include/uapi/linux/fs.h | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 0098b0ce8ccb..0bd678a4a10e 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -60,6 +60,17 @@ #define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ #define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ +/* + * The root inode of procfs is guaranteed to always have the same inode number. + * For programs that make heavy use of procfs, verifying that the root is a + * real procfs root and using openat2(RESOLVE_{NO_{XDEV,MAGICLINKS},BENEATH}) + * will allow you to make sure you are never tricked into operating on the + * wrong procfs file. + */ +enum procfs_ino { + PROCFS_ROOT_INO = 1, +}; + struct file_clone_range { __s64 src_fd; __u64 src_offset; @@ -91,6 +102,63 @@ struct fs_sysfs_path { __u8 name[128]; }; +/* Protection info capability flags */ +#define LBMD_PI_CAP_INTEGRITY (1 << 0) +#define LBMD_PI_CAP_REFTAG (1 << 1) + +/* Checksum types for Protection Information */ +#define LBMD_PI_CSUM_NONE 0 +#define LBMD_PI_CSUM_IP 1 +#define LBMD_PI_CSUM_CRC16_T10DIF 2 +#define LBMD_PI_CSUM_CRC64_NVME 4 + +/* sizeof first published struct */ +#define LBMD_SIZE_VER0 16 + +/* + * Logical block metadata capability descriptor + * If the device does not support metadata, all the fields will be zero. + * Applications must check lbmd_flags to determine whether metadata is + * supported or not. + */ +struct logical_block_metadata_cap { + /* Bitmask of logical block metadata capability flags */ + __u32 lbmd_flags; + /* + * The amount of data described by each unit of logical block + * metadata + */ + __u16 lbmd_interval; + /* + * Size in bytes of the logical block metadata associated with each + * interval + */ + __u8 lbmd_size; + /* + * Size in bytes of the opaque block tag associated with each + * interval + */ + __u8 lbmd_opaque_size; + /* + * Offset in bytes of the opaque block tag within the logical block + * metadata + */ + __u8 lbmd_opaque_offset; + /* Size in bytes of the T10 PI tuple associated with each interval */ + __u8 lbmd_pi_size; + /* Offset in bytes of T10 PI tuple within the logical block metadata */ + __u8 lbmd_pi_offset; + /* T10 PI guard tag type */ + __u8 lbmd_guard_tag_type; + /* Size in bytes of the T10 PI application tag */ + __u8 lbmd_app_tag_size; + /* Size in bytes of the T10 PI reference tag */ + __u8 lbmd_ref_tag_size; + /* Size in bytes of the T10 PI storage tag */ + __u8 lbmd_storage_tag_size; + __u8 pad; +}; + /* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */ #define FILE_DEDUPE_RANGE_SAME 0 #define FILE_DEDUPE_RANGE_DIFFERS 1 @@ -148,6 +216,24 @@ struct fsxattr { unsigned char fsx_pad[8]; }; +/* + * Variable size structure for file_[sg]et_attr(). + * + * Note. This is alternative to the structure 'struct file_kattr'/'struct fsxattr'. + * As this structure is passed to/from userspace with its size, this can + * be versioned based on the size. + */ +struct file_attr { + __u64 fa_xflags; /* xflags field value (get/set) */ + __u32 fa_extsize; /* extsize field value (get/set)*/ + __u32 fa_nextents; /* nextents field value (get) */ + __u32 fa_projid; /* project identifier (get/set) */ + __u32 fa_cowextsize; /* CoW extsize field value (get/set) */ +}; + +#define FILE_ATTR_SIZE_VER0 24 +#define FILE_ATTR_SIZE_LATEST FILE_ATTR_SIZE_VER0 + /* * Flags for the fsx_xflags field */ @@ -247,6 +333,8 @@ struct fsxattr { * also /sys/kernel/debug/ for filesystems with debugfs exports */ #define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path) +/* Get logical block metadata capability details */ +#define FS_IOC_GETLBMD_CAP _IOWR(0x15, 2, struct logical_block_metadata_cap) /* * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) From e7e79e99726190a5a83d158576cd448896d68102 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 2105/2411] tools headers: Sync uapi/linux/prctl.h with the kernel source To pick up the changes in this cset: b1fabef37bd504f3 prctl: Introduce PR_MTE_STORE_ONLY a2fc422ed75748ee syscall_user_dispatch: Add PR_SYS_DISPATCH_INCLUSIVE_ON This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/linux/prctl.h include/uapi/linux/prctl.h Please see tools/include/uapi/README for further details. Signed-off-by: Namhyung Kim --- tools/perf/trace/beauty/include/uapi/linux/prctl.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 3b93fb906e3c..ed3aed264aeb 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -244,6 +244,8 @@ struct prctl_mm_map { # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Unused; kept only for source compatibility */ # define PR_MTE_TCF_SHIFT 1 +/* MTE tag check store only */ +# define PR_MTE_STORE_ONLY (1UL << 19) /* RISC-V pointer masking tag length */ # define PR_PMLEN_SHIFT 24 # define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) @@ -255,7 +257,12 @@ struct prctl_mm_map { /* Dispatch syscalls to a userspace handler */ #define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 -# define PR_SYS_DISPATCH_ON 1 +/* Enable dispatch except for the specified range */ +# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1 +/* Enable dispatch for the specified range */ +# define PR_SYS_DISPATCH_INCLUSIVE_ON 2 +/* Legacy name for backwards compatibility */ +# define PR_SYS_DISPATCH_ON PR_SYS_DISPATCH_EXCLUSIVE_ON /* The control values for the user space selector when dispatch is enabled */ # define SYSCALL_DISPATCH_FILTER_ALLOW 0 # define SYSCALL_DISPATCH_FILTER_BLOCK 1 From f79a62f4b3c750759e60a402e8fe5180fc5771f0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 2106/2411] tools headers: Sync uapi/linux/vhost.h with the kernel source To pick up the changes in this cset: 7d9896e9f6d02d8a vhost: Reintroduce kthread API and add mode selection 333c515d189657c9 vhost-net: allow configuring extended features This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/linux/vhost.h include/uapi/linux/vhost.h Please see tools/include/uapi/README for further details. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: kvm@vger.kernel.org Cc: virtualization@lists.linux.dev Signed-off-by: Namhyung Kim --- .../trace/beauty/include/uapi/linux/vhost.h | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tools/perf/trace/beauty/include/uapi/linux/vhost.h b/tools/perf/trace/beauty/include/uapi/linux/vhost.h index d4b3e2ae1314..c57674a6aa0d 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/vhost.h +++ b/tools/perf/trace/beauty/include/uapi/linux/vhost.h @@ -235,4 +235,39 @@ */ #define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \ struct vhost_vring_state) + +/* Extended features manipulation */ +#define VHOST_GET_FEATURES_ARRAY _IOR(VHOST_VIRTIO, 0x83, \ + struct vhost_features_array) +#define VHOST_SET_FEATURES_ARRAY _IOW(VHOST_VIRTIO, 0x83, \ + struct vhost_features_array) + +/* fork_owner values for vhost */ +#define VHOST_FORK_OWNER_KTHREAD 0 +#define VHOST_FORK_OWNER_TASK 1 + +/** + * VHOST_SET_FORK_FROM_OWNER - Set the fork_owner flag for the vhost device, + * This ioctl must called before VHOST_SET_OWNER. + * Only available when CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y + * + * @param fork_owner: An 8-bit value that determines the vhost thread mode + * + * When fork_owner is set to VHOST_FORK_OWNER_TASK(default value): + * - Vhost will create vhost worker as tasks forked from the owner, + * inheriting all of the owner's attributes. + * + * When fork_owner is set to VHOST_FORK_OWNER_KTHREAD: + * - Vhost will create vhost workers as kernel threads. + */ +#define VHOST_SET_FORK_FROM_OWNER _IOW(VHOST_VIRTIO, 0x84, __u8) + +/** + * VHOST_GET_FORK_OWNER - Get the current fork_owner flag for the vhost device. + * Only available when CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y + * + * @return: An 8-bit value indicating the current thread mode. + */ +#define VHOST_GET_FORK_FROM_OWNER _IOR(VHOST_VIRTIO, 0x85, __u8) + #endif From 0aa86640ebd98d77fb64acef5684e42fba517d2d Mon Sep 17 00:00:00 2001 From: Gabe Teeger Date: Wed, 9 Jul 2025 14:12:22 -0400 Subject: [PATCH 2107/2411] drm/amd/display: Revert Add HPO encoder support to Replay This reverts commits: commit 1f26214d268b ("drm/amd/display: Add HPO encoder support to Replay") commit 3bfce48b109f ("drm/amd/display: Add support for Panel Replay on DP1 eDP (panel_inst=1)") due to visual confirm issue. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Gabe Teeger Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 92f68f6a1b297633159a3f3759e4dfc7e5b58abb) --- .../gpu/drm/amd/display/dc/dce/dmub_replay.c | 43 ++----------------- .../gpu/drm/amd/display/dc/dce/dmub_replay.h | 2 +- .../link/protocols/link_edp_panel_control.c | 2 +- .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 20 --------- 4 files changed, 5 insertions(+), 62 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index e7a318e26d38..fcd3d86ad517 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -4,7 +4,6 @@ #include "dc.h" #include "dc_dmub_srv.h" -#include "dc_dp_types.h" #include "dmub/dmub_srv.h" #include "core_types.h" #include "dmub_replay.h" @@ -44,45 +43,21 @@ static void dmub_replay_get_state(struct dmub_replay *dmub, enum replay_state *s /* * Enable/Disable Replay. */ -static void dmub_replay_enable(struct dmub_replay *dmub, bool enable, bool wait, uint8_t panel_inst, - struct dc_link *link) +static void dmub_replay_enable(struct dmub_replay *dmub, bool enable, bool wait, uint8_t panel_inst) { union dmub_rb_cmd cmd; struct dc_context *dc = dmub->ctx; uint32_t retry_count; enum replay_state state = REPLAY_STATE_0; - struct pipe_ctx *pipe_ctx = NULL; - struct resource_context *res_ctx = &link->ctx->dc->current_state->res_ctx; - uint8_t i; memset(&cmd, 0, sizeof(cmd)); cmd.replay_enable.header.type = DMUB_CMD__REPLAY; cmd.replay_enable.data.panel_inst = panel_inst; cmd.replay_enable.header.sub_type = DMUB_CMD__REPLAY_ENABLE; - if (enable) { + if (enable) cmd.replay_enable.data.enable = REPLAY_ENABLE; - // hpo stream/link encoder assignments are not static, need to update everytime we try to enable replay - if (link->cur_link_settings.link_rate >= LINK_RATE_UHBR10) { - for (i = 0; i < MAX_PIPES; i++) { - if (res_ctx && - res_ctx->pipe_ctx[i].stream && - res_ctx->pipe_ctx[i].stream->link && - res_ctx->pipe_ctx[i].stream->link == link && - res_ctx->pipe_ctx[i].stream->link->connector_signal == SIGNAL_TYPE_EDP) { - pipe_ctx = &res_ctx->pipe_ctx[i]; - //TODO: refactor for multi edp support - break; - } - } - - if (!pipe_ctx) - return; - - cmd.replay_enable.data.hpo_stream_enc_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; - cmd.replay_enable.data.hpo_link_enc_inst = pipe_ctx->link_res.hpo_dp_link_enc->inst; - } - } else + else cmd.replay_enable.data.enable = REPLAY_DISABLE; cmd.replay_enable.header.payload_bytes = sizeof(struct dmub_rb_cmd_replay_enable_data); @@ -174,17 +149,6 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, copy_settings_data->digbe_inst = replay_context->digbe_inst; copy_settings_data->digfe_inst = replay_context->digfe_inst; - if (link->cur_link_settings.link_rate >= LINK_RATE_UHBR10) { - if (pipe_ctx->stream_res.hpo_dp_stream_enc) - copy_settings_data->hpo_stream_enc_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; - else - copy_settings_data->hpo_stream_enc_inst = 0; - if (pipe_ctx->link_res.hpo_dp_link_enc) - copy_settings_data->hpo_link_enc_inst = pipe_ctx->link_res.hpo_dp_link_enc->inst; - else - copy_settings_data->hpo_link_enc_inst = 0; - } - if (pipe_ctx->plane_res.dpp) copy_settings_data->dpp_inst = pipe_ctx->plane_res.dpp->inst; else @@ -247,7 +211,6 @@ static void dmub_replay_set_coasting_vtotal(struct dmub_replay *dmub, pCmd->header.type = DMUB_CMD__REPLAY; pCmd->header.sub_type = DMUB_CMD__REPLAY_SET_COASTING_VTOTAL; pCmd->header.payload_bytes = sizeof(struct dmub_cmd_replay_set_coasting_vtotal_data); - pCmd->replay_set_coasting_vtotal_data.panel_inst = panel_inst; pCmd->replay_set_coasting_vtotal_data.coasting_vtotal = (coasting_vtotal & 0xFFFF); pCmd->replay_set_coasting_vtotal_data.coasting_vtotal_high = (coasting_vtotal & 0xFFFF0000) >> 16; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h index ccbe385e132c..e6346c0ffc0e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h @@ -19,7 +19,7 @@ struct dmub_replay_funcs { void (*replay_get_state)(struct dmub_replay *dmub, enum replay_state *state, uint8_t panel_inst); void (*replay_enable)(struct dmub_replay *dmub, bool enable, bool wait, - uint8_t panel_inst, struct dc_link *link); + uint8_t panel_inst); bool (*replay_copy_settings)(struct dmub_replay *dmub, struct dc_link *link, struct replay_context *replay_context, uint8_t panel_inst); void (*replay_set_power_opt)(struct dmub_replay *dmub, unsigned int power_opt, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index e7927b8f5ba3..98ec9b5a559c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -944,7 +944,7 @@ bool edp_set_replay_allow_active(struct dc_link *link, const bool *allow_active, // TODO: Handle mux change case if force_static is set // If force_static is set, just change the replay_allow_active state directly if (replay != NULL && link->replay_settings.replay_feature_enabled) - replay->funcs->replay_enable(replay, *allow_active, wait, panel_inst, link); + replay->funcs->replay_enable(replay, *allow_active, wait, panel_inst); link->replay_settings.replay_allow_active = *allow_active; } diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index c587b3441e07..6a69a788abe8 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -4047,14 +4047,6 @@ struct dmub_cmd_replay_copy_settings_data { * DIG BE HW instance. */ uint8_t digbe_inst; - /** - * @hpo_stream_enc_inst: HPO stream encoder instance - */ - uint8_t hpo_stream_enc_inst; - /** - * @hpo_link_enc_inst: HPO link encoder instance - */ - uint8_t hpo_link_enc_inst; /** * AUX HW instance. */ @@ -4159,18 +4151,6 @@ struct dmub_rb_cmd_replay_enable_data { * This does not support HDMI/DP2 for now. */ uint8_t phy_rate; - /** - * @hpo_stream_enc_inst: HPO stream encoder instance - */ - uint8_t hpo_stream_enc_inst; - /** - * @hpo_link_enc_inst: HPO link encoder instance - */ - uint8_t hpo_link_enc_inst; - /** - * @pad: Align structure to 4 byte boundary. - */ - uint8_t pad[2]; }; /** From 79e25cd06e85105c75701ef1773c6c64bb304091 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 8 Aug 2025 13:12:07 -0400 Subject: [PATCH 2108/2411] drm/amdgpu/swm14: Update power limit logic Take into account the limits from the vbios. Ported from the SMU13 code. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4352 Reviewed-by: Jesse Zhang Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher (cherry picked from commit 203cc7f1dd86f2c8de5c3c6182f19adac7c9c206) Cc: stable@vger.kernel.org --- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 3aea32baea3d..f32474af90b3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -1697,9 +1697,11 @@ static int smu_v14_0_2_get_power_limit(struct smu_context *smu, uint32_t *min_power_limit) { struct smu_table_context *table_context = &smu->smu_table; + struct smu_14_0_2_powerplay_table *powerplay_table = + table_context->power_play_table; PPTable_t *pptable = table_context->driver_pptable; CustomSkuTable_t *skutable = &pptable->CustomSkuTable; - uint32_t power_limit; + uint32_t power_limit, od_percent_upper = 0, od_percent_lower = 0; uint32_t msg_limit = pptable->SkuTable.MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; if (smu_v14_0_get_current_power_limit(smu, &power_limit)) @@ -1712,11 +1714,29 @@ static int smu_v14_0_2_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (max_power_limit) - *max_power_limit = msg_limit; + if (powerplay_table) { + if (smu->od_enabled && + smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_PPT_BIT)) { + od_percent_upper = pptable->SkuTable.OverDriveLimitsBasicMax.Ppt; + od_percent_lower = pptable->SkuTable.OverDriveLimitsBasicMin.Ppt; + } else if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_PPT_BIT)) { + od_percent_upper = 0; + od_percent_lower = pptable->SkuTable.OverDriveLimitsBasicMin.Ppt; + } + } - if (min_power_limit) - *min_power_limit = 0; + dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", + od_percent_upper, od_percent_lower, power_limit); + + if (max_power_limit) { + *max_power_limit = msg_limit * (100 + od_percent_upper); + *max_power_limit /= 100; + } + + if (min_power_limit) { + *min_power_limit = power_limit * (100 + od_percent_lower); + *min_power_limit /= 100; + } return 0; } From 07b93a5704b0b72002f0c4bd1076214af67dc661 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 24 Jul 2025 15:00:43 -0500 Subject: [PATCH 2109/2411] drm/amd/display: Avoid a NULL pointer dereference [WHY] Although unlikely drm_atomic_get_new_connector_state() or drm_atomic_get_old_connector_state() can return NULL. [HOW] Check returns before dereference. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Harry Wentland Signed-off-by: Mario Limonciello Signed-off-by: Alex Hung Tested-by: Dan Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 1e5e8d672fec9f2ab352be121be971877bff2af9) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index cd0e2976e268..a0ca3b2c6bd8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7792,6 +7792,9 @@ amdgpu_dm_connector_atomic_check(struct drm_connector *conn, struct amdgpu_dm_connector *aconn = to_amdgpu_dm_connector(conn); int ret; + if (WARN_ON(unlikely(!old_con_state || !new_con_state))) + return -EINVAL; + trace_amdgpu_dm_connector_atomic_check(new_con_state); if (conn->connector_type == DRM_MODE_CONNECTOR_DisplayPort) { From 66af73a1c319336694a8610fe4c2943f7b33066c Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Fri, 18 Jul 2025 18:25:08 +0800 Subject: [PATCH 2110/2411] drm/amd/display: Fix Xorg desktop unresponsive on Replay panel [WHY & HOW] IPS & self-fresh feature can cause vblank counter resets between vblank disable and enable. It may cause system stuck due to wait the vblank counter. Call the drm_crtc_vblank_restore() during vblank enable to estimate missed vblanks by using timestamps and update the vblank counter in DRM. It can make the vblank counter increase smoothly and resolve this issue. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Sun peng (Leo) Li Signed-off-by: Tom Chung Signed-off-by: Alex Hung Tested-by: Dan Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 34d66bc7ff10e146a4cec76cf286979740a10954) Cc: stable@vger.kernel.org --- .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 010172f930ae..45feb404b097 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -299,6 +299,25 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, acrtc->crtc_id); if (enable) { + struct dc *dc = adev->dm.dc; + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); + struct psr_settings *psr = &acrtc_state->stream->link->psr_settings; + struct replay_settings *pr = &acrtc_state->stream->link->replay_settings; + bool sr_supported = (psr->psr_version != DC_PSR_VERSION_UNSUPPORTED) || + pr->config.replay_supported; + + /* + * IPS & self-refresh feature can cause vblank counter resets between + * vblank disable and enable. + * It may cause system stuck due to waiting for the vblank counter. + * Call this function to estimate missed vblanks by using timestamps and + * update the vblank counter in DRM. + */ + if (dc->caps.ips_support && + dc->config.disable_ips != DMUB_IPS_DISABLE_ALL && + sr_supported && vblank->config.disable_immediate) + drm_crtc_vblank_restore(crtc); + /* vblank irq on -> Only need vupdate irq in vrr mode */ if (amdgpu_dm_crtc_vrr_active(acrtc_state)) rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, true); From 7a2ca2ea64b1b63c8baa94a8f5deb70b2248d119 Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Wed, 23 Jul 2025 21:36:41 -0500 Subject: [PATCH 2111/2411] drm/amd/display: Add null pointer check in mod_hdcp_hdcp1_create_session() The function mod_hdcp_hdcp1_create_session() calls the function get_first_active_display(), but does not check its return value. The return value is a null pointer if the display list is empty. This will lead to a null pointer dereference. Add a null pointer check for get_first_active_display() and return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND if the function return null. This is similar to the commit c3e9826a2202 ("drm/amd/display: Add null pointer check for get_first_active_display()"). Fixes: 2deade5ede56 ("drm/amd/display: Remove hdcp display state with mst fix") Signed-off-by: Chenyuan Yang Reviewed-by: Alex Hung Tested-by: Dan Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 5e43eb3cd731649c4f8b9134f857be62a416c893) --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c index e58e7b93810b..6b7db8ec9a53 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c @@ -260,6 +260,9 @@ enum mod_hdcp_status mod_hdcp_hdcp1_create_session(struct mod_hdcp *hdcp) return MOD_HDCP_STATUS_FAILURE; } + if (!display) + return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; mutex_lock(&psp->hdcp_context.mutex); From cb7b7ae53b557d168b4af5cd8549f3eff920bfb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Thu, 31 Jul 2025 11:43:46 +0200 Subject: [PATCH 2112/2411] drm/amd/display: Don't overclock DCE 6 by 15% MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The extra 15% clock was added as a workaround for a Polaris issue which uses DCE 11, and should not have been used on DCE 6 which is already hardcoded to the highest possible display clock. Unfortunately, the extra 15% was mistakenly copied and kept even on code paths which don't affect Polaris. This commit fixes that and also adds a check to make sure not to exceed the maximum DCE 6 display clock. Fixes: 8cd61c313d8b ("drm/amd/display: Raise dispclk value for Polaris") Fixes: dc88b4a684d2 ("drm/amd/display: make clk mgr soc specific") Fixes: 3ecb3b794e2c ("drm/amd/display: dc/clk_mgr: add support for SI parts (v2)") Signed-off-by: Timur Kristóf Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit 427980c1cbd22bb256b9385f5ce73c0937562408) Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c index 0267644717b2..cfd7309f2c6a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c @@ -123,11 +123,9 @@ static void dce60_update_clocks(struct clk_mgr *clk_mgr_base, { struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dm_pp_power_level_change_request level_change_req; - int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz; - - /*TODO: W/A for dal3 linux, investigate why this works */ - if (!clk_mgr_dce->dfs_bypass_active) - patched_disp_clk = patched_disp_clk * 115 / 100; + const int max_disp_clk = + clk_mgr_dce->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz; + int patched_disp_clk = MIN(max_disp_clk, context->bw_ctx.bw.dce.dispclk_khz); level_change_req.power_level = dce_get_required_clocks_state(clk_mgr_base, context); /* get max clock state from PPLIB */ From 1fc931be2f47fde23ca5aff6f19421375c312fb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Thu, 31 Jul 2025 11:43:47 +0200 Subject: [PATCH 2113/2411] drm/amd/display: Adjust DCE 8-10 clock, don't overclock by 15% MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adjust the nominal (and performance) clocks for DCE 8-10, and set them to 625 MHz, which is the value used by the legacy display code in amdgpu_atombios_get_clock_info. This was tested with Hawaii, Tonga and Fiji. These GPUs can output 4K 60Hz (10-bit depth) at 625 MHz. The extra 15% clock was added as a workaround for a Polaris issue which uses DCE 11, and should not have been used on DCE 8-10 which are already hardcoded to the highest possible display clock. Unfortunately, the extra 15% was mistakenly copied and kept even on code paths which don't affect Polaris. This commit fixes that and also adds a check to make sure not to exceed the maximum DCE 8-10 display clock. Fixes: 8cd61c313d8b ("drm/amd/display: Raise dispclk value for Polaris") Fixes: dc88b4a684d2 ("drm/amd/display: make clk mgr soc specific") Signed-off-by: Timur Kristóf Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit 1ae45b5d4f371af8ae51a3827d0ec9fe27eeb867) --- .../drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c index f5ad0a177038..a324dbd4543c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c @@ -72,9 +72,9 @@ static const struct state_dependent_clocks dce80_max_clks_by_state[] = { /* ClocksStateLow */ { .display_clk_khz = 352000, .pixel_clk_khz = 330000}, /* ClocksStateNominal */ -{ .display_clk_khz = 600000, .pixel_clk_khz = 400000 }, +{ .display_clk_khz = 625000, .pixel_clk_khz = 400000 }, /* ClocksStatePerformance */ -{ .display_clk_khz = 600000, .pixel_clk_khz = 400000 } }; +{ .display_clk_khz = 625000, .pixel_clk_khz = 400000 } }; int dentist_get_divider_from_did(int did) { @@ -405,11 +405,9 @@ static void dce_update_clocks(struct clk_mgr *clk_mgr_base, { struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dm_pp_power_level_change_request level_change_req; - int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz; - - /*TODO: W/A for dal3 linux, investigate why this works */ - if (!clk_mgr_dce->dfs_bypass_active) - patched_disp_clk = patched_disp_clk * 115 / 100; + const int max_disp_clk = + clk_mgr_dce->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz; + int patched_disp_clk = MIN(max_disp_clk, context->bw_ctx.bw.dce.dispclk_khz); level_change_req.power_level = dce_get_required_clocks_state(clk_mgr_base, context); /* get max clock state from PPLIB */ From 669f73a26f6112eedbadac53a2f2707ac6d0b9c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Thu, 31 Jul 2025 11:43:48 +0200 Subject: [PATCH 2114/2411] drm/amd/display: Find first CRTC and its line time in dce110_fill_display_configs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dce110_fill_display_configs is shared between DCE 6-11, and finding the first CRTC and its line time is relevant to DCE 6 too. Move the code to find it from DCE 11 specific code. Signed-off-by: Timur Kristóf Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit 4ab09785f8d5d03df052827af073d5c508ff5f63) Cc: stable@vger.kernel.org --- .../dc/clk_mgr/dce110/dce110_clk_mgr.c | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c index f8409453434c..baeac8f1c04f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c @@ -120,9 +120,12 @@ void dce110_fill_display_configs( const struct dc_state *context, struct dm_pp_display_configuration *pp_display_cfg) { + struct dc *dc = context->clk_mgr->ctx->dc; int j; int num_cfgs = 0; + pp_display_cfg->crtc_index = dc->res_pool->res_cap->num_timing_generator; + for (j = 0; j < context->stream_count; j++) { int k; @@ -164,6 +167,23 @@ void dce110_fill_display_configs( cfg->v_refresh /= stream->timing.h_total; cfg->v_refresh = (cfg->v_refresh + stream->timing.v_total / 2) / stream->timing.v_total; + + /* Find first CRTC index and calculate its line time. + * This is necessary for DPM on SI GPUs. + */ + if (cfg->pipe_idx < pp_display_cfg->crtc_index) { + const struct dc_crtc_timing *timing = + &context->streams[0]->timing; + + pp_display_cfg->crtc_index = cfg->pipe_idx; + pp_display_cfg->line_time_in_us = + timing->h_total * 10000 / timing->pix_clk_100hz; + } + } + + if (!num_cfgs) { + pp_display_cfg->crtc_index = 0; + pp_display_cfg->line_time_in_us = 0; } pp_display_cfg->display_count = num_cfgs; @@ -232,16 +252,6 @@ void dce11_pplib_apply_display_requirements( dce110_fill_display_configs(context, pp_display_cfg); - /* TODO: is this still applicable?*/ - if (pp_display_cfg->display_count == 1) { - const struct dc_crtc_timing *timing = - &context->streams[0]->timing; - - pp_display_cfg->crtc_index = - pp_display_cfg->disp_configs[0].pipe_idx; - pp_display_cfg->line_time_in_us = timing->h_total * 10000 / timing->pix_clk_100hz; - } - if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0) dm_pp_apply_display_requirements(dc->ctx, pp_display_cfg); } From 7d07140d37f792f01cfdb8ca9a6a792ab1d29126 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Thu, 31 Jul 2025 11:43:49 +0200 Subject: [PATCH 2115/2411] drm/amd/display: Fill display clock and vblank time in dce110_fill_display_configs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also needed by DCE 6. This way the code that gathers this info can be shared between different DCE versions and doesn't have to be repeated. Signed-off-by: Timur Kristóf Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit 8107432dff37db26fcb641b6cebeae8981cd73a0) Cc: stable@vger.kernel.org --- .../drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c | 2 -- .../drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c | 10 +++------- .../drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c | 2 -- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c index a324dbd4543c..dbd6ef1b60a0 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c @@ -391,8 +391,6 @@ static void dce_pplib_apply_display_requirements( { struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg; - pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context); - dce110_fill_display_configs(context, pp_display_cfg); if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c index baeac8f1c04f..13cf415e38e5 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c @@ -124,6 +124,9 @@ void dce110_fill_display_configs( int j; int num_cfgs = 0; + pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context); + pp_display_cfg->disp_clk_khz = dc->clk_mgr->clks.dispclk_khz; + pp_display_cfg->avail_mclk_switch_time_in_disp_active_us = 0; pp_display_cfg->crtc_index = dc->res_pool->res_cap->num_timing_generator; for (j = 0; j < context->stream_count; j++) { @@ -243,13 +246,6 @@ void dce11_pplib_apply_display_requirements( pp_display_cfg->min_engine_clock_deep_sleep_khz = context->bw_ctx.bw.dce.sclk_deep_sleep_khz; - pp_display_cfg->avail_mclk_switch_time_us = - dce110_get_min_vblank_time_us(context); - /* TODO: dce11.2*/ - pp_display_cfg->avail_mclk_switch_time_in_disp_active_us = 0; - - pp_display_cfg->disp_clk_khz = dc->clk_mgr->clks.dispclk_khz; - dce110_fill_display_configs(context, pp_display_cfg); if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c index cfd7309f2c6a..7044b437fe9d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c @@ -109,8 +109,6 @@ static void dce60_pplib_apply_display_requirements( { struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg; - pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context); - dce110_fill_display_configs(context, pp_display_cfg); if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0) From 8246147f1fbaed522b8bcc02ca34e4260747dcfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Thu, 31 Jul 2025 11:43:50 +0200 Subject: [PATCH 2116/2411] drm/amd/display: Don't warn when missing DCE encoder caps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On some GPUs the VBIOS just doesn't have encoder caps, or maybe not for every encoder. This isn't really a problem and it's handled well, so let's not litter the logs with it. Signed-off-by: Timur Kristóf Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit 33e0227ee96e62d034781e91f215e32fd0b1d512) --- drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index 4a9d07c31bc5..0c50fe266c8a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -896,13 +896,13 @@ void dce110_link_encoder_construct( enc110->base.id, &bp_cap_info); /* Override features with DCE-specific values */ - if (BP_RESULT_OK == result) { + if (result == BP_RESULT_OK) { enc110->base.features.flags.bits.IS_HBR2_CAPABLE = bp_cap_info.DP_HBR2_EN; enc110->base.features.flags.bits.IS_HBR3_CAPABLE = bp_cap_info.DP_HBR3_EN; enc110->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN; - } else { + } else if (result != BP_RESULT_NORECORD) { DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n", __func__, result); @@ -1798,13 +1798,13 @@ void dce60_link_encoder_construct( enc110->base.id, &bp_cap_info); /* Override features with DCE-specific values */ - if (BP_RESULT_OK == result) { + if (result == BP_RESULT_OK) { enc110->base.features.flags.bits.IS_HBR2_CAPABLE = bp_cap_info.DP_HBR2_EN; enc110->base.features.flags.bits.IS_HBR3_CAPABLE = bp_cap_info.DP_HBR3_EN; enc110->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN; - } else { + } else if (result != BP_RESULT_NORECORD) { DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n", __func__, result); From f14ee2e7a86c5e57295b48b8e198cae7189b3b93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Thu, 31 Jul 2025 11:43:51 +0200 Subject: [PATCH 2117/2411] drm/amd/display: Don't print errors for nonexistent connectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When getting the number of connectors, the VBIOS reports the number of valid indices, but it doesn't say which indices are valid, and not every valid index has an actual connector. If we don't find a connector on an index, that is not an error. Considering these are not actual errors, don't litter the logs. Fixes: 60df5628144b ("drm/amd/display: handle invalid connector indices") Signed-off-by: Timur Kristóf Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit 249d4bc5f1935f04bb45b3b63c0f8922565124f7) --- drivers/gpu/drm/amd/display/dc/bios/bios_parser.c | 5 +---- drivers/gpu/drm/amd/display/dc/core/dc.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index 67f08495b7e6..154fd2c18e88 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -174,11 +174,8 @@ static struct graphics_object_id bios_parser_get_connector_id( return object_id; } - if (tbl->ucNumberOfObjects <= i) { - dm_error("Can't find connector id %d in connector table of size %d.\n", - i, tbl->ucNumberOfObjects); + if (tbl->ucNumberOfObjects <= i) return object_id; - } id = le16_to_cpu(tbl->asObjects[i].usObjectID); object_id = object_id_from_bios_object_id(id); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 9ab0ee20ca6f..dcc48b5238e5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -217,11 +217,24 @@ static bool create_links( connectors_num, num_virtual_links); - // condition loop on link_count to allow skipping invalid indices + /* When getting the number of connectors, the VBIOS reports the number of valid indices, + * but it doesn't say which indices are valid, and not every index has an actual connector. + * So, if we don't find a connector on an index, that is not an error. + * + * - There is no guarantee that the first N indices will be valid + * - VBIOS may report a higher amount of valid indices than there are actual connectors + * - Some VBIOS have valid configurations for more connectors than there actually are + * on the card. This may be because the manufacturer used the same VBIOS for different + * variants of the same card. + */ for (i = 0; dc->link_count < connectors_num && i < MAX_LINKS; i++) { + struct graphics_object_id connector_id = bios->funcs->get_connector_id(bios, i); struct link_init_data link_init_params = {0}; struct dc_link *link; + if (connector_id.id == CONNECTOR_ID_UNKNOWN) + continue; + DC_LOG_DC("BIOS object table - printing link object info for connector number: %d, link_index: %d", i, dc->link_count); link_init_params.ctx = dc->ctx; From 10507478468f165ea681605d133991ed05cdff62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Thu, 31 Jul 2025 11:43:52 +0200 Subject: [PATCH 2118/2411] drm/amd/display: Fix fractional fb divider in set_pixel_clock_v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For later VBIOS versions, the fractional feedback divider is calculated as the remainder of dividing the feedback divider by a factor, which is set to 1000000. For reference, see: - calculate_fb_and_fractional_fb_divider - calc_pll_max_vco_construct However, in case of old VBIOS versions that have set_pixel_clock_v3, they only have 1 byte available for the fractional feedback divider, and it's expected to be set to the remainder from dividing the feedback divider by 10. For reference see the legacy display code: - amdgpu_pll_compute - amdgpu_atombios_crtc_program_pll This commit fixes set_pixel_clock_v3 by dividing the fractional feedback divider passed to the function by 100000. Fixes: 4562236b3bc0 ("drm/amd/dc: Add dc display driver (v2)") Signed-off-by: Timur Kristóf Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit 027e7acc7e17802ebf28e1edb88a404836ad50d6) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/bios/command_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c index 2bcae0643e61..58e88778da7f 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c @@ -993,7 +993,7 @@ static enum bp_result set_pixel_clock_v3( allocation.sPCLKInput.usFbDiv = cpu_to_le16((uint16_t)bp_params->feedback_divider); allocation.sPCLKInput.ucFracFbDiv = - (uint8_t)bp_params->fractional_feedback_divider; + (uint8_t)(bp_params->fractional_feedback_divider / 100000); allocation.sPCLKInput.ucPostDiv = (uint8_t)bp_params->pixel_clock_post_divider; From 297a4833a68aac3316eb808b4123eb016ef242d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Sat, 2 Aug 2025 17:51:53 +0200 Subject: [PATCH 2119/2411] drm/amd/display: Fix DP audio DTO1 clock source on DCE 6. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On DCE 6, DP audio was not working. However, it worked when an HDMI monitor was also plugged in. Looking at dce_aud_wall_dto_setup it seems that the main difference is that we use DTO1 when only DP is plugged in. When programming DTO1, it uses audio_dto_source_clock_in_khz which is set from get_dp_ref_freq_khz The dce60_get_dp_ref_freq_khz implementation looks incorrect, because DENTIST_DISPCLK_CNTL seems to be always zero on DCE 6, so it isn't usable. I compared dce60_get_dp_ref_freq_khz to the legacy display code, specifically dce_v6_0_audio_set_dto, and it turns out that in case of DCE 6, it needs to use the display clock. With that, DP audio started working on Pitcairn, Oland and Cape Verde. However, it still didn't work on Tahiti. Despite having the same DCE version, Tahiti seems to have a different audio device. After some trial and error I realized that it works with the default display clock as reported by the VBIOS, not the current display clock. The patch was tested on all four SI GPUs: * Pitcairn (DCE 6.0) * Oland (DCE 6.4) * Cape Verde (DCE 6.0) * Tahiti (DCE 6.0 but different) The testing was done on Samsung Odyssey G7 LS28BG700EPXEN on each of the above GPUs, at the following settings: * 4K 60 Hz * 1080p 60 Hz * 1080p 144 Hz Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher (cherry picked from commit 645cc7863da5de700547d236697dffd6760cf051) Cc: stable@vger.kernel.org --- .../display/dc/clk_mgr/dce60/dce60_clk_mgr.c | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c index 7044b437fe9d..a39641a0ff09 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c @@ -83,22 +83,13 @@ static const struct state_dependent_clocks dce60_max_clks_by_state[] = { static int dce60_get_dp_ref_freq_khz(struct clk_mgr *clk_mgr_base) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); - int dprefclk_wdivider; - int dp_ref_clk_khz; - int target_div; + struct dc_context *ctx = clk_mgr_base->ctx; + int dp_ref_clk_khz = 0; - /* DCE6 has no DPREFCLK_CNTL to read DP Reference Clock source */ - - /* Read the mmDENTIST_DISPCLK_CNTL to get the currently - * programmed DID DENTIST_DPREFCLK_WDIVIDER*/ - REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DPREFCLK_WDIVIDER, &dprefclk_wdivider); - - /* Convert DENTIST_DPREFCLK_WDIVIDERto actual divider*/ - target_div = dentist_get_divider_from_did(dprefclk_wdivider); - - /* Calculate the current DFS clock, in kHz.*/ - dp_ref_clk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; + if (ASIC_REV_IS_TAHITI_P(ctx->asic_id.hw_internal_rev)) + dp_ref_clk_khz = ctx->dc_bios->fw_info.default_display_engine_pll_frequency; + else + dp_ref_clk_khz = clk_mgr_base->clks.dispclk_khz; return dce_adjust_dp_ref_freq_for_ss(clk_mgr, dp_ref_clk_khz); } From 864e3396976ef41de6cc7bc366276bf4e084fff2 Mon Sep 17 00:00:00 2001 From: Jakub Ramaseuski Date: Thu, 14 Aug 2025 12:51:19 +0200 Subject: [PATCH 2120/2411] net: gso: Forbid IPv6 TSO with extensions on devices with only IPV6_CSUM When performing Generic Segmentation Offload (GSO) on an IPv6 packet that contains extension headers, the kernel incorrectly requests checksum offload if the egress device only advertises NETIF_F_IPV6_CSUM feature, which has a strict contract: it supports checksum offload only for plain TCP or UDP over IPv6 and explicitly does not support packets with extension headers. The current GSO logic violates this contract by failing to disable the feature for packets with extension headers, such as those used in GREoIPv6 tunnels. This violation results in the device being asked to perform an operation it cannot support, leading to a `skb_warn_bad_offload` warning and a collapse of network throughput. While device TSO/USO is correctly bypassed in favor of software GSO for these packets, the GSO stack must be explicitly told not to request checksum offload. Mask NETIF_F_IPV6_CSUM, NETIF_F_TSO6 and NETIF_F_GSO_UDP_L4 in gso_features_check if the IPv6 header contains extension headers to compute checksum in software. The exception is a BIG TCP extension, which, as stated in commit 68e068cabd2c6c53 ("net: reenable NETIF_F_IPV6_CSUM offload for BIG TCP packets"): "The feature is only enabled on devices that support BIG TCP TSO. The header is only present for PF_PACKET taps like tcpdump, and not transmitted by physical devices." kernel log output (truncated): WARNING: CPU: 1 PID: 5273 at net/core/dev.c:3535 skb_warn_bad_offload+0x81/0x140 ... Call Trace: skb_checksum_help+0x12a/0x1f0 validate_xmit_skb+0x1a3/0x2d0 validate_xmit_skb_list+0x4f/0x80 sch_direct_xmit+0x1a2/0x380 __dev_xmit_skb+0x242/0x670 __dev_queue_xmit+0x3fc/0x7f0 ip6_finish_output2+0x25e/0x5d0 ip6_finish_output+0x1fc/0x3f0 ip6_tnl_xmit+0x608/0xc00 [ip6_tunnel] ip6gre_tunnel_xmit+0x1c0/0x390 [ip6_gre] dev_hard_start_xmit+0x63/0x1c0 __dev_queue_xmit+0x6d0/0x7f0 ip6_finish_output2+0x214/0x5d0 ip6_finish_output+0x1fc/0x3f0 ip6_xmit+0x2ca/0x6f0 ip6_finish_output+0x1fc/0x3f0 ip6_xmit+0x2ca/0x6f0 inet6_csk_xmit+0xeb/0x150 __tcp_transmit_skb+0x555/0xa80 tcp_write_xmit+0x32a/0xe90 tcp_sendmsg_locked+0x437/0x1110 tcp_sendmsg+0x2f/0x50 ... skb linear: 00000000: e4 3d 1a 7d ec 30 e4 3d 1a 7e 5d 90 86 dd 60 0e skb linear: 00000010: 00 0a 1b 34 3c 40 20 11 00 00 00 00 00 00 00 00 skb linear: 00000020: 00 00 00 00 00 12 20 11 00 00 00 00 00 00 00 00 skb linear: 00000030: 00 00 00 00 00 11 2f 00 04 01 04 01 01 00 00 00 skb linear: 00000040: 86 dd 60 0e 00 0a 1b 00 06 40 20 23 00 00 00 00 skb linear: 00000050: 00 00 00 00 00 00 00 00 00 12 20 23 00 00 00 00 skb linear: 00000060: 00 00 00 00 00 00 00 00 00 11 bf 96 14 51 13 f9 skb linear: 00000070: ae 27 a0 a8 2b e3 80 18 00 40 5b 6f 00 00 01 01 skb linear: 00000080: 08 0a 42 d4 50 d5 4b 70 f8 1a Fixes: 04c20a9356f283da ("net: skip offload for NETIF_F_IPV6_CSUM if ipv6 header contains extension") Reported-by: Tianhao Zhao Suggested-by: Michal Schmidt Suggested-by: Willem de Bruijn Signed-off-by: Jakub Ramaseuski Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250814105119.1525687-1-jramaseu@redhat.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 5a3c0f40a93f..93a25d87b86b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3779,6 +3779,18 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, features &= ~NETIF_F_TSO_MANGLEID; } + /* NETIF_F_IPV6_CSUM does not support IPv6 extension headers, + * so neither does TSO that depends on it. + */ + if (features & NETIF_F_IPV6_CSUM && + (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + vlan_get_protocol(skb) == htons(ETH_P_IPV6))) && + skb_transport_header_was_set(skb) && + skb_network_header_len(skb) != sizeof(struct ipv6hdr) && + !ipv6_has_hopopt_jumbo(skb)) + features &= ~(NETIF_F_IPV6_CSUM | NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4); + return features; } From 84967deee9d9870b15bc4c3acb50f1d401807902 Mon Sep 17 00:00:00 2001 From: Minhong He Date: Fri, 15 Aug 2025 14:38:45 +0800 Subject: [PATCH 2121/2411] ipv6: sr: validate HMAC algorithm ID in seg6_hmac_info_add The seg6_genl_sethmac() directly uses the algorithm ID provided by the userspace without verifying whether it is an HMAC algorithm supported by the system. If an unsupported HMAC algorithm ID is configured, packets using SRv6 HMAC will be dropped during encapsulation or decapsulation. Fixes: 4f4853dc1c9c ("ipv6: sr: implement API to control SR HMAC structure") Signed-off-by: Minhong He Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250815063845.85426-1-heminhong@kylinos.cn Signed-off-by: Jakub Kicinski --- net/ipv6/seg6_hmac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index f78ecb6ad838..d77b52523b6a 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -304,6 +304,9 @@ int seg6_hmac_info_add(struct net *net, u32 key, struct seg6_hmac_info *hinfo) struct seg6_pernet_data *sdata = seg6_pernet(net); int err; + if (!__hmac_get_algo(hinfo->alg_id)) + return -EINVAL; + err = rhashtable_lookup_insert_fast(&sdata->hmac_infos, &hinfo->node, rht_params); From ccab044697980c6c01ab51f43f48f13b8a3e5c33 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Fri, 15 Aug 2025 19:28:19 +0200 Subject: [PATCH 2122/2411] mptcp: drop skb if MPTCP skb extension allocation fails When skb_ext_add(skb, SKB_EXT_MPTCP) fails in mptcp_incoming_options(), we used to return true, letting the segment proceed through the TCP receive path without a DSS mapping. Such segments can leave inconsistent mapping state and trigger a mid-stream fallback to TCP, which in testing collapsed (by artificially forcing failures in skb_ext_add) throughput to zero. Return false instead so the TCP input path drops the skb (see tcp_data_queue() and step-7 processing). This is the safer choice under memory pressure: it preserves MPTCP correctness and provides backpressure to the sender. Control packets remain unaffected: ACK updates and DATA_FIN handling happen before attempting the extension allocation, and tcp_reset() continues to ignore the return value. With this change, MPTCP continues to work at high throughput if we artificially inject failures into skb_ext_add. Fixes: 6787b7e350d3 ("mptcp: avoid processing packet if a subflow reset") Cc: stable@vger.kernel.org Signed-off-by: Christoph Paasch Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-1-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/options.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 70c0ab0ecf90..2a8ea28442b2 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1118,7 +1118,9 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk, return hmac == mp_opt->ahmac; } -/* Return false if a subflow has been reset, else return true */ +/* Return false in case of error (or subflow has been reset), + * else return true. + */ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); @@ -1222,7 +1224,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) mpext = skb_ext_add(skb, SKB_EXT_MPTCP); if (!mpext) - return true; + return false; memset(mpext, 0, sizeof(*mpext)); From 68fc0f4b0d25692940cdc85c68e366cae63e1757 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 15 Aug 2025 19:28:20 +0200 Subject: [PATCH 2123/2411] mptcp: pm: kernel: flush: do not reset ADD_ADDR limit A flush of the MPTCP endpoints should not affect the MPTCP limits. In other words, 'ip mptcp endpoint flush' should not change 'ip mptcp limits'. But it was the case: the MPTCP_PM_ATTR_RCV_ADD_ADDRS (add_addr_accepted) limit was reset by accident. Removing the reset of this counter during a flush fixes this issue. Fixes: 01cacb00b35c ("mptcp: add netlink-based PM") Cc: stable@vger.kernel.org Reported-by: Thomas Dreibholz Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/579 Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-2-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_kernel.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index d39e7c178460..667803d72b64 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c @@ -1085,7 +1085,6 @@ static void __flush_addrs(struct list_head *list) static void __reset_counters(struct pm_nl_pernet *pernet) { WRITE_ONCE(pernet->add_addr_signal_max, 0); - WRITE_ONCE(pernet->add_addr_accept_max, 0); WRITE_ONCE(pernet->local_addr_max, 0); pernet->addrs = 0; } From 452690be7de2f91cc0de68cb9e95252875b33503 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 15 Aug 2025 19:28:21 +0200 Subject: [PATCH 2124/2411] selftests: mptcp: pm: check flush doesn't reset limits This modification is linked to the parent commit where the received ADD_ADDR limit was accidentally reset when the endpoints were flushed. To validate that, the test is now flushing endpoints after having set new limits, and before checking them. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: 01cacb00b35c ("mptcp: add netlink-based PM") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-3-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 2e6648a2b2c0..ac7ec6f94023 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -198,6 +198,7 @@ set_limits 1 9 2>/dev/null check "get_limits" "${default_limits}" "subflows above hard limit" set_limits 8 8 +flush_endpoint ## to make sure it doesn't affect the limits check "get_limits" "$(format_limits 8 8)" "set limits" flush_endpoint From 5d13349472ac8abcbcb94407969aa0fdc2e1f1be Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 15 Aug 2025 19:28:22 +0200 Subject: [PATCH 2125/2411] mptcp: remove duplicate sk_reset_timer call sk_reset_timer() was called twice in mptcp_pm_alloc_anno_list. Simplify the code by using a 'goto' statement to eliminate the duplication. Note that this is not a fix, but it will help backporting the following patch. The same "Fixes" tag has been added for this reason. Fixes: 93f323b9cccc ("mptcp: add a new sysctl add_addr_timeout") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-4-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 420d416e2603..c5f6a53ce5f1 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -353,9 +353,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk))) return false; - sk_reset_timer(sk, &add_entry->add_timer, - jiffies + mptcp_get_add_addr_timeout(net)); - return true; + goto reset_timer; } add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); @@ -369,6 +367,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, add_entry->retrans_times = 0; timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); +reset_timer: sk_reset_timer(sk, &add_entry->add_timer, jiffies + mptcp_get_add_addr_timeout(net)); From f5ce0714623cffd00bf2a83e890d09c609b7f50a Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 15 Aug 2025 19:28:23 +0200 Subject: [PATCH 2126/2411] mptcp: disable add_addr retransmission when timeout is 0 When add_addr_timeout was set to 0, this caused the ADD_ADDR to be retransmitted immediately, which looks like a buggy behaviour. Instead, interpret 0 as "no retransmissions needed". The documentation is updated to explicitly state that setting the timeout to 0 disables retransmission. Fixes: 93f323b9cccc ("mptcp: add a new sysctl add_addr_timeout") Cc: stable@vger.kernel.org Suggested-by: Matthieu Baerts Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-5-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/networking/mptcp-sysctl.rst | 2 ++ net/mptcp/pm.c | 13 ++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst index 5bfab01eff5a..1683c139821e 100644 --- a/Documentation/networking/mptcp-sysctl.rst +++ b/Documentation/networking/mptcp-sysctl.rst @@ -12,6 +12,8 @@ add_addr_timeout - INTEGER (seconds) resent to an MPTCP peer that has not acknowledged a previous ADD_ADDR message. + Do not retransmit if set to 0. + The default value matches TCP_RTO_MAX. This is a per-namespace sysctl. diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index c5f6a53ce5f1..136a380602ca 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -274,6 +274,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer) add_timer); struct mptcp_sock *msk = entry->sock; struct sock *sk = (struct sock *)msk; + unsigned int timeout; pr_debug("msk=%p\n", msk); @@ -291,6 +292,10 @@ static void mptcp_pm_add_timer(struct timer_list *timer) goto out; } + timeout = mptcp_get_add_addr_timeout(sock_net(sk)); + if (!timeout) + goto out; + spin_lock_bh(&msk->pm.lock); if (!mptcp_pm_should_add_signal_addr(msk)) { @@ -302,7 +307,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer) if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) sk_reset_timer(sk, timer, - jiffies + mptcp_get_add_addr_timeout(sock_net(sk))); + jiffies + timeout); spin_unlock_bh(&msk->pm.lock); @@ -344,6 +349,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, struct mptcp_pm_add_entry *add_entry = NULL; struct sock *sk = (struct sock *)msk; struct net *net = sock_net(sk); + unsigned int timeout; lockdep_assert_held(&msk->pm.lock); @@ -368,8 +374,9 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); reset_timer: - sk_reset_timer(sk, &add_entry->add_timer, - jiffies + mptcp_get_add_addr_timeout(net)); + timeout = mptcp_get_add_addr_timeout(net); + if (timeout) + sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout); return true; } From f92199f551e617fae028c5c5905ddd63e3616e18 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 15 Aug 2025 19:28:24 +0200 Subject: [PATCH 2127/2411] selftests: mptcp: disable add_addr retrans in endpoint_tests To prevent test instability in the "delete re-add signal" test caused by ADD_ADDR retransmissions, disable retransmissions for this test by setting net.mptcp.add_addr_timeout to 0. Suggested-by: Matthieu Baerts Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-6-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index b8af65373b3a..82cae37d9c20 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3842,6 +3842,7 @@ endpoint_tests() # remove and re-add if reset_with_events "delete re-add signal" && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=0 pm_nl_set_limits $ns1 0 3 pm_nl_set_limits $ns2 3 3 pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal From 2eefbed30d46d5e68593baf6b52923e00e7678af Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 15 Aug 2025 19:28:25 +0200 Subject: [PATCH 2128/2411] selftests: mptcp: connect: fix C23 extension warning GCC was complaining about the new label: mptcp_connect.c:187:2: warning: label followed by a declaration is a C23 extension [-Wc23-extensions] 187 | int err = getaddrinfo(node, service, hints, res); | ^ Simply declare 'err' before the label to avoid this warning. Fixes: a862771d1aa4 ("selftests: mptcp: use IPPROTO_MPTCP for getaddrinfo") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-7-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index ac1349c4b9e5..4f07ac9fa207 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -183,9 +183,10 @@ static void xgetaddrinfo(const char *node, const char *service, struct addrinfo *hints, struct addrinfo **res) { -again: - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { const char *errstr; From 3259889fd3c0cc165b7e9ee375c789875dd32326 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 15 Aug 2025 19:28:26 +0200 Subject: [PATCH 2129/2411] selftests: mptcp: sockopt: fix C23 extension warning GCC was complaining about the new label: mptcp_inq.c:79:2: warning: label followed by a declaration is a C23 extension [-Wc23-extensions] 79 | int err = getaddrinfo(node, service, hints, res); | ^ mptcp_sockopt.c:166:2: warning: label followed by a declaration is a C23 extension [-Wc23-extensions] 166 | int err = getaddrinfo(node, service, hints, res); | ^ Simply declare 'err' before the label to avoid this warning. Fixes: dd367e81b79a ("selftests: mptcp: sockopt: use IPPROTO_MPTCP for getaddrinfo") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-8-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_inq.c | 5 +++-- tools/testing/selftests/net/mptcp/mptcp_sockopt.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 3cf1e2a612ce..f3bcaa48df8f 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -75,9 +75,10 @@ static void xgetaddrinfo(const char *node, const char *service, struct addrinfo *hints, struct addrinfo **res) { -again: - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { const char *errstr; diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index 9934a68df237..e934dd26a59d 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -162,9 +162,10 @@ static void xgetaddrinfo(const char *node, const char *service, struct addrinfo *hints, struct addrinfo **res) { -again: - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { const char *errstr; From 89eb9a62aed77b409663ba1eac152e8f758815b7 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Fri, 15 Aug 2025 22:18:09 +0200 Subject: [PATCH 2130/2411] net: dsa: b53: fix reserved register access in b53_fdb_dump() When BCM5325 support was added in c45655386e53 ("net: dsa: b53: add support for FDB operations on 5325/5365"), the register used for ARL access was made conditional on the chip. But in b53_fdb_dump(), instead of the register argument the page argument was replaced, causing it to write to a reserved page 0x50 on !BCM5325*. Writing to this page seems to completely lock the switch up: [ 89.680000] b53-switch spi0.1 lan2: Link is Down [ 89.680000] WARNING: CPU: 1 PID: 26 at drivers/net/phy/phy.c:1350 _phy_state_machine+0x1bc/0x454 [ 89.720000] phy_check_link_status+0x0/0x114: returned: -5 [ 89.730000] Modules linked in: nft_fib_inet nf_flow_table_inet nft_reject_ipv6 nft_reject_ipv4 nft_reject_inet nft_reject nft_redir nft_quota nft_numgen nft_nat nft_masq nft_log nft_limit nft_hash nft_flow_offload nft_fib_ipv6 nft_fib_ipv4 nft_fib nft_ct nft_chain_nat nf_tables nf_nat nf_flow_table nf_conntrack nfnetlink nf_reject_ipv6 nf_reject_ipv4 nf_log_syslog nf_defrag_ipv6 nf_defrag_ipv4 cls_flower sch_tbf sch_ingress sch_htb sch_hfsc em_u32 cls_u32 cls_route cls_matchall cls_fw cls_flow cls_basic act_skbedit act_mirred act_gact vrf md5 crc32c_cryptoapi [ 89.780000] CPU: 1 UID: 0 PID: 26 Comm: kworker/u10:0 Tainted: G W 6.16.0-rc1+ #0 NONE [ 89.780000] Tainted: [W]=WARN [ 89.780000] Hardware name: Netgear DGND3700 v1 [ 89.780000] Workqueue: events_power_efficient phy_state_machine [ 89.780000] Stack : 809c762c 8006b050 00000001 820a9ce3 0000114c 000affff 805d22d0 8200ba00 [ 89.780000] 82005000 6576656e 74735f70 6f776572 5f656666 10008b00 820a9cb8 82088700 [ 89.780000] 00000000 00000000 809c762c 820a9a98 00000000 00000000 ffffefff 80a7a76c [ 89.780000] 80a70000 820a9af8 80a70000 80a70000 80a70000 00000000 809c762c 820a9dd4 [ 89.780000] 00000000 805d1494 80a029e4 80a70000 00000003 00000000 00000004 81a60004 [ 89.780000] ... [ 89.780000] Call Trace: [ 89.780000] [<800228b8>] show_stack+0x38/0x118 [ 89.780000] [<8001afc4>] dump_stack_lvl+0x6c/0xac [ 89.780000] [<80046b90>] __warn+0x9c/0x114 [ 89.780000] [<80046da8>] warn_slowpath_fmt+0x1a0/0x1b0 [ 89.780000] [<805d1494>] _phy_state_machine+0x1bc/0x454 [ 89.780000] [<805d22fc>] phy_state_machine+0x2c/0x70 [ 89.780000] [<80066b08>] process_one_work+0x1e8/0x3e0 [ 89.780000] [<80067a1c>] worker_thread+0x354/0x4e4 [ 89.780000] [<800706cc>] kthread+0x130/0x274 [ 89.780000] [<8001d808>] ret_from_kernel_thread+0x14/0x1c And any further accesses fail: [ 120.790000] b53-switch spi0.1: timeout waiting for ARL to finish: 0x81 [ 120.800000] b53-switch spi0.1: port 2 failed to add 2c:b0:5d:27:9a:bd vid 3 to fdb: -145 [ 121.010000] b53-switch spi0.1: timeout waiting for ARL to finish: 0xbf [ 121.020000] b53-switch spi0.1: port 3 failed to add 2c:b0:5d:27:9a:bd vid 3 to fdb: -145 Restore the correct page B53_ARLIO_PAGE again, and move the offset argument to the correct place. *On BCM5325, this became a write to the MIB page of Port 1. Still a reserved offset, but likely less brokenness from that write. Fixes: c45655386e53 ("net: dsa: b53: add support for FDB operations on 5325/5365") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250815201809.549195-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 9942fb6f7f4b..829b1f087e9e 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2078,7 +2078,7 @@ int b53_fdb_dump(struct dsa_switch *ds, int port, /* Start search operation */ reg = ARL_SRCH_STDN; - b53_write8(priv, offset, B53_ARL_SRCH_CTL, reg); + b53_write8(priv, B53_ARLIO_PAGE, offset, reg); do { ret = b53_arl_search_wait(priv); From 4611d88a37cfc18cbabc6978aaf7325d1ae3f53a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sat, 16 Aug 2025 11:38:50 -0700 Subject: [PATCH 2131/2411] bnxt_en: Fix lockdep warning during rmmod The commit under the Fixes tag added a netdev_assert_locked() in bnxt_free_ntp_fltrs(). The lock should be held during normal run-time but the assert will be triggered (see below) during bnxt_remove_one() which should not need the lock. The netdev is already unregistered by then. Fix it by calling netdev_assert_locked_or_invisible() which will not assert if the netdev is unregistered. WARNING: CPU: 5 PID: 2241 at ./include/net/netdev_lock.h:17 bnxt_free_ntp_fltrs+0xf8/0x100 [bnxt_en] Modules linked in: rpcrdma rdma_cm iw_cm ib_cm configfs ib_core bnxt_en(-) bridge stp llc x86_pkg_temp_thermal xfs tg3 [last unloaded: bnxt_re] CPU: 5 UID: 0 PID: 2241 Comm: rmmod Tainted: G S W 6.16.0 #2 PREEMPT(voluntary) Tainted: [S]=CPU_OUT_OF_SPEC, [W]=WARN Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.4.3 01/17/2017 RIP: 0010:bnxt_free_ntp_fltrs+0xf8/0x100 [bnxt_en] Code: 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc 48 8b 47 60 be ff ff ff ff 48 8d b8 28 0c 00 00 e8 d0 cf 41 c3 85 c0 0f 85 2e ff ff ff <0f> 0b e9 27 ff ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 RSP: 0018:ffffa92082387da0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff9e5b593d8000 RCX: 0000000000000001 RDX: 0000000000000001 RSI: ffffffff83dc9a70 RDI: ffffffff83e1a1cf RBP: ffff9e5b593d8c80 R08: 0000000000000000 R09: ffffffff8373a2b3 R10: 000000008100009f R11: 0000000000000001 R12: 0000000000000001 R13: ffffffffc01c4478 R14: dead000000000122 R15: dead000000000100 FS: 00007f3a8a52c740(0000) GS:ffff9e631ad1c000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055bb289419c8 CR3: 000000011274e001 CR4: 00000000003706f0 Call Trace: bnxt_remove_one+0x57/0x180 [bnxt_en] pci_device_remove+0x39/0xc0 device_release_driver_internal+0xa5/0x130 driver_detach+0x42/0x90 bus_remove_driver+0x61/0xc0 pci_unregister_driver+0x38/0x90 bnxt_exit+0xc/0x7d0 [bnxt_en] Fixes: 004b5008016a ("eth: bnxt: remove most dependencies on RTNL") Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250816183850.4125033-1-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2800a90fba1f..207a8bb36ae5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5332,7 +5332,7 @@ static void bnxt_free_ntp_fltrs(struct bnxt *bp, bool all) { int i; - netdev_assert_locked(bp->dev); + netdev_assert_locked_or_invisible(bp->dev); /* Under netdev instance lock and all our NAPIs have been disabled. * It's safe to delete the hash table. From eabcac808ca3ee9878223d4b49b750979029016b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 15 Aug 2025 08:58:23 -0700 Subject: [PATCH 2132/2411] scsi: ufs: core: Fix IRQ lock inversion for the SCSI host lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 3c7ac40d7322 ("scsi: ufs: core: Delegate the interrupt service routine to a threaded IRQ handler") introduced an IRQ lock inversion issue. Fix this lock inversion by changing the spin_lock_irq() calls into spin_lock_irqsave() calls in code that can be called either from interrupt context or from thread context. This patch fixes the following lockdep complaint: WARNING: possible irq lock inversion dependency detected 6.12.30-android16-5-maybe-dirty-4k #1 Tainted: G W OE -------------------------------------------------------- kworker/u28:0/12 just changed the state of lock: ffffff881e29dd60 (&hba->clk_gating.lock){-...}-{2:2}, at: ufshcd_release_scsi_cmd+0x60/0x110 but this lock took another, HARDIRQ-unsafe lock in the past: (shost->host_lock){+.+.}-{2:2} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(shost->host_lock); local_irq_disable(); lock(&hba->clk_gating.lock); lock(shost->host_lock); lock(&hba->clk_gating.lock); *** DEADLOCK *** 4 locks held by kworker/u28:0/12: #0: ffffff8800ac6158 ((wq_completion)async){+.+.}-{0:0}, at: process_one_work+0x1bc/0x65c #1: ffffffc085c93d70 ((work_completion)(&entry->work)){+.+.}-{0:0}, at: process_one_work+0x1e4/0x65c #2: ffffff881e29c0e0 (&shost->scan_mutex){+.+.}-{3:3}, at: __scsi_add_device+0x74/0x120 #3: ffffff881960ea00 (&hwq->cq_lock){-...}-{2:2}, at: ufshcd_mcq_poll_cqe_lock+0x28/0x104 the shortest dependencies between 2nd lock and 1st lock: -> (shost->host_lock){+.+.}-{2:2} { HARDIRQ-ON-W at: lock_acquire+0x134/0x2b4 _raw_spin_lock+0x48/0x64 ufshcd_sl_intr+0x4c/0xa08 ufshcd_threaded_intr+0x70/0x12c irq_thread_fn+0x48/0xa8 irq_thread+0x130/0x1ec kthread+0x110/0x134 ret_from_fork+0x10/0x20 SOFTIRQ-ON-W at: lock_acquire+0x134/0x2b4 _raw_spin_lock+0x48/0x64 ufshcd_sl_intr+0x4c/0xa08 ufshcd_threaded_intr+0x70/0x12c irq_thread_fn+0x48/0xa8 irq_thread+0x130/0x1ec kthread+0x110/0x134 ret_from_fork+0x10/0x20 INITIAL USE at: lock_acquire+0x134/0x2b4 _raw_spin_lock+0x48/0x64 ufshcd_sl_intr+0x4c/0xa08 ufshcd_threaded_intr+0x70/0x12c irq_thread_fn+0x48/0xa8 irq_thread+0x130/0x1ec kthread+0x110/0x134 ret_from_fork+0x10/0x20 } ... key at: [] scsi_host_alloc.__key+0x0/0x10 ... acquired at: _raw_spin_lock_irqsave+0x5c/0x80 __ufshcd_release+0x78/0x118 ufshcd_send_uic_cmd+0xe4/0x118 ufshcd_dme_set_attr+0x88/0x1c8 ufs_google_phy_initialization+0x68/0x418 [ufs] ufs_google_link_startup_notify+0x78/0x27c [ufs] ufshcd_link_startup+0x84/0x720 ufshcd_init+0xf3c/0x1330 ufshcd_pltfrm_init+0x728/0x7d8 ufs_google_probe+0x30/0x84 [ufs] platform_probe+0xa0/0xe0 really_probe+0x114/0x454 __driver_probe_device+0xa4/0x160 driver_probe_device+0x44/0x23c __driver_attach_async_helper+0x60/0xd4 async_run_entry_fn+0x4c/0x17c process_one_work+0x26c/0x65c worker_thread+0x33c/0x498 kthread+0x110/0x134 ret_from_fork+0x10/0x20 -> (&hba->clk_gating.lock){-...}-{2:2} { IN-HARDIRQ-W at: lock_acquire+0x134/0x2b4 _raw_spin_lock_irqsave+0x5c/0x80 ufshcd_release_scsi_cmd+0x60/0x110 ufshcd_compl_one_cqe+0x2c0/0x3f4 ufshcd_mcq_poll_cqe_lock+0xb0/0x104 ufs_google_mcq_intr+0x80/0xa0 [ufs] __handle_irq_event_percpu+0x104/0x32c handle_irq_event+0x40/0x9c handle_fasteoi_irq+0x170/0x2e8 generic_handle_domain_irq+0x58/0x80 gic_handle_irq+0x48/0x104 call_on_irq_stack+0x3c/0x50 do_interrupt_handler+0x7c/0xd8 el1_interrupt+0x34/0x58 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x68/0x6c _raw_spin_unlock_irqrestore+0x3c/0x6c debug_object_assert_init+0x16c/0x21c __mod_timer+0x4c/0x48c schedule_timeout+0xd4/0x16c io_schedule_timeout+0x48/0x70 do_wait_for_common+0x100/0x194 wait_for_completion_io_timeout+0x48/0x6c blk_execute_rq+0x124/0x17c scsi_execute_cmd+0x18c/0x3f8 scsi_probe_and_add_lun+0x204/0xd74 __scsi_add_device+0xbc/0x120 ufshcd_async_scan+0x80/0x3c0 async_run_entry_fn+0x4c/0x17c process_one_work+0x26c/0x65c worker_thread+0x33c/0x498 kthread+0x110/0x134 ret_from_fork+0x10/0x20 INITIAL USE at: lock_acquire+0x134/0x2b4 _raw_spin_lock_irqsave+0x5c/0x80 ufshcd_hold+0x34/0x14c ufshcd_send_uic_cmd+0x28/0x118 ufshcd_dme_set_attr+0x88/0x1c8 ufs_google_phy_initialization+0x68/0x418 [ufs] ufs_google_link_startup_notify+0x78/0x27c [ufs] ufshcd_link_startup+0x84/0x720 ufshcd_init+0xf3c/0x1330 ufshcd_pltfrm_init+0x728/0x7d8 ufs_google_probe+0x30/0x84 [ufs] platform_probe+0xa0/0xe0 really_probe+0x114/0x454 __driver_probe_device+0xa4/0x160 driver_probe_device+0x44/0x23c __driver_attach_async_helper+0x60/0xd4 async_run_entry_fn+0x4c/0x17c process_one_work+0x26c/0x65c worker_thread+0x33c/0x498 kthread+0x110/0x134 ret_from_fork+0x10/0x20 } ... key at: [] ufshcd_init.__key+0x0/0x10 ... acquired at: mark_lock+0x1c4/0x224 __lock_acquire+0x438/0x2e1c lock_acquire+0x134/0x2b4 _raw_spin_lock_irqsave+0x5c/0x80 ufshcd_release_scsi_cmd+0x60/0x110 ufshcd_compl_one_cqe+0x2c0/0x3f4 ufshcd_mcq_poll_cqe_lock+0xb0/0x104 ufs_google_mcq_intr+0x80/0xa0 [ufs] __handle_irq_event_percpu+0x104/0x32c handle_irq_event+0x40/0x9c handle_fasteoi_irq+0x170/0x2e8 generic_handle_domain_irq+0x58/0x80 gic_handle_irq+0x48/0x104 call_on_irq_stack+0x3c/0x50 do_interrupt_handler+0x7c/0xd8 el1_interrupt+0x34/0x58 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x68/0x6c _raw_spin_unlock_irqrestore+0x3c/0x6c debug_object_assert_init+0x16c/0x21c __mod_timer+0x4c/0x48c schedule_timeout+0xd4/0x16c io_schedule_timeout+0x48/0x70 do_wait_for_common+0x100/0x194 wait_for_completion_io_timeout+0x48/0x6c blk_execute_rq+0x124/0x17c scsi_execute_cmd+0x18c/0x3f8 scsi_probe_and_add_lun+0x204/0xd74 __scsi_add_device+0xbc/0x120 ufshcd_async_scan+0x80/0x3c0 async_run_entry_fn+0x4c/0x17c process_one_work+0x26c/0x65c worker_thread+0x33c/0x498 kthread+0x110/0x134 ret_from_fork+0x10/0x20 stack backtrace: CPU: 6 UID: 0 PID: 12 Comm: kworker/u28:0 Tainted: G W OE 6.12.30-android16-5-maybe-dirty-4k #1 ccd4020fe444bdf629efc3b86df6be920b8df7d0 Tainted: [W]=WARN, [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: Spacecraft board based on MALIBU (DT) Workqueue: async async_run_entry_fn Call trace: dump_backtrace+0xfc/0x17c show_stack+0x18/0x28 dump_stack_lvl+0x40/0xa0 dump_stack+0x18/0x24 print_irq_inversion_bug+0x2fc/0x304 mark_lock_irq+0x388/0x4fc mark_lock+0x1c4/0x224 __lock_acquire+0x438/0x2e1c lock_acquire+0x134/0x2b4 _raw_spin_lock_irqsave+0x5c/0x80 ufshcd_release_scsi_cmd+0x60/0x110 ufshcd_compl_one_cqe+0x2c0/0x3f4 ufshcd_mcq_poll_cqe_lock+0xb0/0x104 ufs_google_mcq_intr+0x80/0xa0 [ufs dd6f385554e109da094ab91d5f7be18625a2222a] __handle_irq_event_percpu+0x104/0x32c handle_irq_event+0x40/0x9c handle_fasteoi_irq+0x170/0x2e8 generic_handle_domain_irq+0x58/0x80 gic_handle_irq+0x48/0x104 call_on_irq_stack+0x3c/0x50 do_interrupt_handler+0x7c/0xd8 el1_interrupt+0x34/0x58 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x68/0x6c _raw_spin_unlock_irqrestore+0x3c/0x6c debug_object_assert_init+0x16c/0x21c __mod_timer+0x4c/0x48c schedule_timeout+0xd4/0x16c io_schedule_timeout+0x48/0x70 do_wait_for_common+0x100/0x194 wait_for_completion_io_timeout+0x48/0x6c blk_execute_rq+0x124/0x17c scsi_execute_cmd+0x18c/0x3f8 scsi_probe_and_add_lun+0x204/0xd74 __scsi_add_device+0xbc/0x120 ufshcd_async_scan+0x80/0x3c0 async_run_entry_fn+0x4c/0x17c process_one_work+0x26c/0x65c worker_thread+0x33c/0x498 kthread+0x110/0x134 ret_from_fork+0x10/0x20 Cc: Neil Armstrong Cc: André Draszik Reviewed-by: Peter Wang Fixes: 3c7ac40d7322 ("scsi: ufs: core: Delegate the interrupt service routine to a threaded IRQ handler") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20250815155842.472867-2-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index efd7a811a002..b047325a3669 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5559,7 +5559,7 @@ static irqreturn_t ufshcd_uic_cmd_compl(struct ufs_hba *hba, u32 intr_status) irqreturn_t retval = IRQ_NONE; struct uic_command *cmd; - spin_lock(hba->host->host_lock); + guard(spinlock_irqsave)(hba->host->host_lock); cmd = hba->active_uic_cmd; if (WARN_ON_ONCE(!cmd)) goto unlock; @@ -5586,8 +5586,6 @@ static irqreturn_t ufshcd_uic_cmd_compl(struct ufs_hba *hba, u32 intr_status) ufshcd_add_uic_command_trace(hba, cmd, UFS_CMD_COMP); unlock: - spin_unlock(hba->host->host_lock); - return retval; } @@ -6920,7 +6918,7 @@ static irqreturn_t ufshcd_check_errors(struct ufs_hba *hba, u32 intr_status) bool queue_eh_work = false; irqreturn_t retval = IRQ_NONE; - spin_lock(hba->host->host_lock); + guard(spinlock_irqsave)(hba->host->host_lock); hba->errors |= UFSHCD_ERROR_MASK & intr_status; if (hba->errors & INT_FATAL_ERRORS) { @@ -6979,7 +6977,7 @@ static irqreturn_t ufshcd_check_errors(struct ufs_hba *hba, u32 intr_status) */ hba->errors = 0; hba->uic_error = 0; - spin_unlock(hba->host->host_lock); + return retval; } From e5203d89d59bfcbe1f348aa0d2dc4449a8ba644c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 15 Aug 2025 08:58:24 -0700 Subject: [PATCH 2133/2411] scsi: ufs: core: Remove WARN_ON_ONCE() call from ufshcd_uic_cmd_compl() The UIC completion interrupt may be disabled while an UIC command is being processed. When the UIC completion interrupt is reenabled, an UIC interrupt is triggered and the WARN_ON_ONCE(!cmd) statement is hit. Hence this patch that removes this kernel warning. Fixes: fcd8b0450a9a ("scsi: ufs: core: Make ufshcd_uic_cmd_compl() easier to analyze") Reviewed-by: Peter Wang Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20250815155842.472867-3-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index b047325a3669..2097efe3a990 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5561,7 +5561,7 @@ static irqreturn_t ufshcd_uic_cmd_compl(struct ufs_hba *hba, u32 intr_status) guard(spinlock_irqsave)(hba->host->host_lock); cmd = hba->active_uic_cmd; - if (WARN_ON_ONCE(!cmd)) + if (!cmd) goto unlock; if (ufshcd_is_auto_hibern8_error(hba, intr_status)) From 9ee35fd43f94bf19dbd27cffc213a31314b623d2 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 15 Aug 2025 08:58:25 -0700 Subject: [PATCH 2134/2411] scsi: ufs: core: Fix the return value documentation ufshcd_wait_for_dev_cmd() and all its callers can return an OCS error. OCS errors are represented by positive integers. Remove the WARN_ONCE() statements that complain about positive error codes and update the documentation. Keep the behavior of ufshcd_wait_for_dev_cmd() because this return value may end be passed as the second argument of bsg_job_done() and bsg_job_done() handles positive and negative error codes differently. Cc: Peter Wang Fixes: cc59f3b68542 ("scsi: ufs: core: Improve return value documentation") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20250815155842.472867-4-bvanassche@acm.org Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 62 ++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 2097efe3a990..f471288a5c70 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -3199,7 +3199,8 @@ ufshcd_dev_cmd_completion(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) } /* - * Return: 0 upon success; < 0 upon failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, struct ufshcd_lrb *lrbp, int max_timeout) @@ -3275,7 +3276,6 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, } } - WARN_ONCE(err > 0, "Incorrect return value %d > 0\n", err); return err; } @@ -3294,7 +3294,8 @@ static void ufshcd_dev_man_unlock(struct ufs_hba *hba) } /* - * Return: 0 upon success; < 0 upon failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ static int ufshcd_issue_dev_cmd(struct ufs_hba *hba, struct ufshcd_lrb *lrbp, const u32 tag, int timeout) @@ -3317,7 +3318,8 @@ static int ufshcd_issue_dev_cmd(struct ufs_hba *hba, struct ufshcd_lrb *lrbp, * @cmd_type: specifies the type (NOP, Query...) * @timeout: timeout in milliseconds * - * Return: 0 upon success; < 0 upon failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. * * NOTE: Since there is only one available tag for device management commands, * it is expected you hold the hba->dev_cmd.lock mutex. @@ -3363,6 +3365,10 @@ static inline void ufshcd_init_query(struct ufs_hba *hba, (*request)->upiu_req.selector = selector; } +/* + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. + */ static int ufshcd_query_flag_retry(struct ufs_hba *hba, enum query_opcode opcode, enum flag_idn idn, u8 index, bool *flag_res) { @@ -3383,7 +3389,6 @@ static int ufshcd_query_flag_retry(struct ufs_hba *hba, dev_err(hba->dev, "%s: query flag, opcode %d, idn %d, failed with error %d after %d retries\n", __func__, opcode, idn, ret, retries); - WARN_ONCE(ret > 0, "Incorrect return value %d > 0\n", ret); return ret; } @@ -3395,7 +3400,8 @@ static int ufshcd_query_flag_retry(struct ufs_hba *hba, * @index: flag index to access * @flag_res: the flag value after the query request completes * - * Return: 0 for success; < 0 upon failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode, enum flag_idn idn, u8 index, bool *flag_res) @@ -3451,7 +3457,6 @@ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode, out_unlock: ufshcd_dev_man_unlock(hba); - WARN_ONCE(err > 0, "Incorrect return value %d > 0\n", err); return err; } @@ -3464,8 +3469,9 @@ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode, * @selector: selector field * @attr_val: the attribute value after the query request completes * - * Return: 0 upon success; < 0 upon failure. -*/ + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. + */ int ufshcd_query_attr(struct ufs_hba *hba, enum query_opcode opcode, enum attr_idn idn, u8 index, u8 selector, u32 *attr_val) { @@ -3513,7 +3519,6 @@ int ufshcd_query_attr(struct ufs_hba *hba, enum query_opcode opcode, out_unlock: ufshcd_dev_man_unlock(hba); - WARN_ONCE(err > 0, "Incorrect return value %d > 0\n", err); return err; } @@ -3528,8 +3533,9 @@ int ufshcd_query_attr(struct ufs_hba *hba, enum query_opcode opcode, * @attr_val: the attribute value after the query request * completes * - * Return: 0 for success; < 0 upon failure. -*/ + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. + */ int ufshcd_query_attr_retry(struct ufs_hba *hba, enum query_opcode opcode, enum attr_idn idn, u8 index, u8 selector, u32 *attr_val) @@ -3551,12 +3557,12 @@ int ufshcd_query_attr_retry(struct ufs_hba *hba, dev_err(hba->dev, "%s: query attribute, idn %d, failed with error %d after %d retries\n", __func__, idn, ret, QUERY_REQ_RETRIES); - WARN_ONCE(ret > 0, "Incorrect return value %d > 0\n", ret); return ret; } /* - * Return: 0 if successful; < 0 upon failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ static int __ufshcd_query_descriptor(struct ufs_hba *hba, enum query_opcode opcode, enum desc_idn idn, u8 index, @@ -3615,7 +3621,6 @@ static int __ufshcd_query_descriptor(struct ufs_hba *hba, out_unlock: hba->dev_cmd.query.descriptor = NULL; ufshcd_dev_man_unlock(hba); - WARN_ONCE(err > 0, "Incorrect return value %d > 0\n", err); return err; } @@ -3632,7 +3637,8 @@ static int __ufshcd_query_descriptor(struct ufs_hba *hba, * The buf_len parameter will contain, on return, the length parameter * received on the response. * - * Return: 0 for success; < 0 upon failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ int ufshcd_query_descriptor_retry(struct ufs_hba *hba, enum query_opcode opcode, @@ -3650,7 +3656,6 @@ int ufshcd_query_descriptor_retry(struct ufs_hba *hba, break; } - WARN_ONCE(err > 0, "Incorrect return value %d > 0\n", err); return err; } @@ -3663,7 +3668,8 @@ int ufshcd_query_descriptor_retry(struct ufs_hba *hba, * @param_read_buf: pointer to buffer where parameter would be read * @param_size: sizeof(param_read_buf) * - * Return: 0 in case of success; < 0 upon failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ int ufshcd_read_desc_param(struct ufs_hba *hba, enum desc_idn desc_id, @@ -3730,7 +3736,6 @@ int ufshcd_read_desc_param(struct ufs_hba *hba, out: if (is_kmalloc) kfree(desc_buf); - WARN_ONCE(ret > 0, "Incorrect return value %d > 0\n", ret); return ret; } @@ -4781,7 +4786,8 @@ EXPORT_SYMBOL_GPL(ufshcd_config_pwr_mode); * * Set fDeviceInit flag and poll until device toggles it. * - * Return: 0 upon success; < 0 upon failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ static int ufshcd_complete_dev_init(struct ufs_hba *hba) { @@ -5135,7 +5141,8 @@ static int ufshcd_link_startup(struct ufs_hba *hba) * not respond with NOP IN UPIU within timeout of %NOP_OUT_TIMEOUT * and we retry sending NOP OUT for %NOP_OUT_RETRIES iterations. * - * Return: 0 upon success; < 0 upon failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ static int ufshcd_verify_dev_init(struct ufs_hba *hba) { @@ -5867,7 +5874,8 @@ static inline int ufshcd_enable_ee(struct ufs_hba *hba, u16 mask) * as the device is allowed to manage its own way of handling background * operations. * - * Return: zero on success, non-zero on failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ static int ufshcd_enable_auto_bkops(struct ufs_hba *hba) { @@ -5906,7 +5914,8 @@ static int ufshcd_enable_auto_bkops(struct ufs_hba *hba) * host is idle so that BKOPS are managed effectively without any negative * impacts. * - * Return: zero on success, non-zero on failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ static int ufshcd_disable_auto_bkops(struct ufs_hba *hba) { @@ -6056,6 +6065,10 @@ static void ufshcd_bkops_exception_event_handler(struct ufs_hba *hba) __func__, err); } +/* + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. + */ int ufshcd_read_device_lvl_exception_id(struct ufs_hba *hba, u64 *exception_id) { struct utp_upiu_query_v4_0 *upiu_resp; @@ -7452,7 +7465,8 @@ int ufshcd_exec_raw_upiu_cmd(struct ufs_hba *hba, * @sg_list: Pointer to SG list when DATA IN/OUT UPIU is required in ARPMB operation * @dir: DMA direction * - * Return: zero on success, non-zero on failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ int ufshcd_advanced_rpmb_req_handler(struct ufs_hba *hba, struct utp_upiu_req *req_upiu, struct utp_upiu_req *rsp_upiu, struct ufs_ehs *req_ehs, From 09d57d68ba9a36117eadb75d3ecf817a3c091acc Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 15 Aug 2025 08:58:26 -0700 Subject: [PATCH 2135/2411] scsi: ufs: core: Rename ufshcd_wait_for_doorbell_clr() The name ufshcd_wait_for_doorbell_clr() refers to legacy mode. Commit 8d077ede48c1 ("scsi: ufs: Optimize the command queueing code") added support for MCQ mode in this function. Since then the name of this function is misleading. Hence change the name of this function into something that is appropriate for both legacy and MCQ mode. Reviewed-by: Peter Wang Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20250815155842.472867-5-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index f471288a5c70..9a43102b2b21 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -1303,7 +1303,7 @@ static u32 ufshcd_pending_cmds(struct ufs_hba *hba) * * Return: 0 upon success; -EBUSY upon timeout. */ -static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba, +static int ufshcd_wait_for_pending_cmds(struct ufs_hba *hba, u64 wait_timeout_us) { int ret = 0; @@ -1431,7 +1431,7 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba, u64 timeout_us) down_write(&hba->clk_scaling_lock); if (!hba->clk_scaling.is_allowed || - ufshcd_wait_for_doorbell_clr(hba, timeout_us)) { + ufshcd_wait_for_pending_cmds(hba, timeout_us)) { ret = -EBUSY; up_write(&hba->clk_scaling_lock); mutex_unlock(&hba->wb_mutex); From 6300d5c5438724c0876828da2f6e2c1a661871fc Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Mon, 11 Aug 2025 13:03:30 +0530 Subject: [PATCH 2136/2411] scsi: ufs: ufs-qcom: Fix ESI null pointer dereference ESI/MSI is a performance optimization feature that provides dedicated interrupts per MCQ hardware queue. This is optional feature and UFS MCQ should work with and without ESI feature. Commit e46a28cea29a ("scsi: ufs: qcom: Remove the MSI descriptor abuse") brings a regression in ESI (Enhanced System Interrupt) configuration that causes a null pointer dereference when Platform MSI allocation fails. The issue occurs in when platform_device_msi_init_and_alloc_irqs() in ufs_qcom_config_esi() fails (returns -EINVAL) but the current code uses __free() macro for automatic cleanup free MSI resources that were never successfully allocated. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Call trace: mutex_lock+0xc/0x54 (P) platform_device_msi_free_irqs_all+0x1c/0x40 ufs_qcom_config_esi+0x1d0/0x220 [ufs_qcom] ufshcd_config_mcq+0x28/0x104 ufshcd_init+0xa3c/0xf40 ufshcd_pltfrm_init+0x504/0x7d4 ufs_qcom_probe+0x20/0x58 [ufs_qcom] Fix by restructuring the ESI configuration to try MSI allocation first, before any other resource allocation and instead use explicit cleanup instead of __free() macro to avoid cleanup of unallocated resources. Tested on SM8750 platform with MCQ enabled, both with and without Platform ESI support. Fixes: e46a28cea29a ("scsi: ufs: qcom: Remove the MSI descriptor abuse") Cc: Manivannan Sadhasivam Cc: Thomas Gleixner Cc: James Bottomley Signed-off-by: Nitin Rawat Link: https://lore.kernel.org/r/20250811073330.20230-1-quic_nitirawa@quicinc.com Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-qcom.c | 39 ++++++++++++++----------------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 76fc70503a62..9574fdc2bb0f 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -2070,17 +2070,6 @@ static irqreturn_t ufs_qcom_mcq_esi_handler(int irq, void *data) return IRQ_HANDLED; } -static void ufs_qcom_irq_free(struct ufs_qcom_irq *uqi) -{ - for (struct ufs_qcom_irq *q = uqi; q->irq; q++) - devm_free_irq(q->hba->dev, q->irq, q->hba); - - platform_device_msi_free_irqs_all(uqi->hba->dev); - devm_kfree(uqi->hba->dev, uqi); -} - -DEFINE_FREE(ufs_qcom_irq, struct ufs_qcom_irq *, if (_T) ufs_qcom_irq_free(_T)) - static int ufs_qcom_config_esi(struct ufs_hba *hba) { struct ufs_qcom_host *host = ufshcd_get_variant(hba); @@ -2095,18 +2084,18 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba) */ nr_irqs = hba->nr_hw_queues - hba->nr_queues[HCTX_TYPE_POLL]; - struct ufs_qcom_irq *qi __free(ufs_qcom_irq) = - devm_kcalloc(hba->dev, nr_irqs, sizeof(*qi), GFP_KERNEL); - if (!qi) - return -ENOMEM; - /* Preset so __free() has a pointer to hba in all error paths */ - qi[0].hba = hba; - ret = platform_device_msi_init_and_alloc_irqs(hba->dev, nr_irqs, ufs_qcom_write_msi_msg); if (ret) { - dev_err(hba->dev, "Failed to request Platform MSI %d\n", ret); - return ret; + dev_warn(hba->dev, "Platform MSI not supported or failed, continuing without ESI\n"); + return ret; /* Continue without ESI */ + } + + struct ufs_qcom_irq *qi = devm_kcalloc(hba->dev, nr_irqs, sizeof(*qi), GFP_KERNEL); + + if (!qi) { + platform_device_msi_free_irqs_all(hba->dev); + return -ENOMEM; } for (int idx = 0; idx < nr_irqs; idx++) { @@ -2117,15 +2106,17 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba) ret = devm_request_irq(hba->dev, qi[idx].irq, ufs_qcom_mcq_esi_handler, IRQF_SHARED, "qcom-mcq-esi", qi + idx); if (ret) { - dev_err(hba->dev, "%s: Fail to request IRQ for %d, err = %d\n", + dev_err(hba->dev, "%s: Failed to request IRQ for %d, err = %d\n", __func__, qi[idx].irq, ret); - qi[idx].irq = 0; + /* Free previously allocated IRQs */ + for (int j = 0; j < idx; j++) + devm_free_irq(hba->dev, qi[j].irq, qi + j); + platform_device_msi_free_irqs_all(hba->dev); + devm_kfree(hba->dev, qi); return ret; } } - retain_and_null_ptr(qi); - if (host->hw_ver.major >= 6) { ufshcd_rmwl(hba, ESI_VEC_MASK, FIELD_PREP(ESI_VEC_MASK, MAX_ESI_VEC - 1), REG_UFS_CFG3); From 018f659753fd38bb6fdba7fa8c751121b495e1f4 Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Mon, 18 Aug 2025 23:42:43 +0300 Subject: [PATCH 2137/2411] ALSA: hda/realtek: Fix headset mic on ASUS Zenbook 14 Add a PCI quirk to enable microphone input on the headphone jack on the ASUS Zenbook 14 UM3406HA laptop. This model uses an ALC294 codec with CS35L41 amplifiers over I2C, and the existing fixup for it did not enable the headset microphone. A new fix is introduced to get the mic working while keeping the amplifier settings correct. Fixes: 61cbc08fdb04 ("ALSA: hda/realtek: Add quirks for ASUS 2024 Zenbooks") Signed-off-by: Vasiliy Kovalev Link: https://patch.msgid.link/20250818204243.247297-1-kovalev@altlinux.org Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index db8e6352b942..6c78a286172c 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -3579,6 +3579,7 @@ enum { ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE, ALC294_FIXUP_ASUS_MIC, ALC294_FIXUP_ASUS_HEADSET_MIC, + ALC294_FIXUP_ASUS_I2C_HEADSET_MIC, ALC294_FIXUP_ASUS_SPK, ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE, ALC285_FIXUP_LENOVO_PC_BEEP_IN_NOISE, @@ -4889,6 +4890,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MIC }, + [ALC294_FIXUP_ASUS_I2C_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x03a19020 }, /* use as headset mic */ + { } + }, + .chained = true, + .chain_id = ALC287_FIXUP_CS35L41_I2C_2 + }, [ALC294_FIXUP_ASUS_SPK] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -6730,7 +6740,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1b13, "ASUS U41SV/GA403U", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1b93, "ASUS G614JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1043, 0x1c03, "ASUS UM3406HA", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x1c03, "ASUS UM3406HA", ALC294_FIXUP_ASUS_I2C_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x1c33, "ASUS UX5304MA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2), From f4b3cef55f5f96fdb4e7f9ca90b7d6213689faeb Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 19 Aug 2025 14:03:44 +0800 Subject: [PATCH 2138/2411] ALSA: hda/realtek: Audio disappears on HP 15-fc000 after warm boot again There was a similar bug in the past (Bug 217440), which was fixed for this laptop. The same issue is occurring again as of kernel v.6.12.2. The symptoms are very similar - initially audio works but after a warm reboot, the audio completely disappears until the computer is powered off (there is no audio output at all). The issue is also related by caused by a different change now. By bisecting different kernel versions, I found that reverting cc3d0b5dd989 in patch_realtek.c[*] restores the sound and it works fine after the reboot. [*] https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/sound/pci/hda/patch_realtek.c?h=v6.12.2&id=4ed7f16070a8475c088ff423b2eb11ba15eb89b6 [ patch description reformatted by tiwai ] Fixes: cc3d0b5dd989 ("ALSA: hda/realtek: Update ALC256 depop procedure") Link: https://bugzilla.kernel.org/show_bug.cgi?id=220109 Signed-off-by: Kailang Yang Link: https://lore.kernel.org/5317ca723c82447a938414fcca85cbf5@realtek.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 6c78a286172c..0323606b3d6d 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -510,6 +510,15 @@ static void alc256_shutup(struct hda_codec *codec) hp_pin = 0x21; alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */ + + /* 3k pull low control for Headset jack. */ + /* NOTE: call this before clearing the pin, otherwise codec stalls */ + /* If disable 3k pulldown control for alc257, the Mic detection will not work correctly + * when booting with headset plugged. So skip setting it for the codec alc257 + */ + if (spec->en_3kpull_low) + alc_update_coef_idx(codec, 0x46, 0, 3 << 12); + hp_pin_sense = snd_hda_jack_detect(codec, hp_pin); if (hp_pin_sense) { @@ -520,14 +529,6 @@ static void alc256_shutup(struct hda_codec *codec) msleep(75); - /* 3k pull low control for Headset jack. */ - /* NOTE: call this before clearing the pin, otherwise codec stalls */ - /* If disable 3k pulldown control for alc257, the Mic detection will not work correctly - * when booting with headset plugged. So skip setting it for the codec alc257 - */ - if (spec->en_3kpull_low) - alc_update_coef_idx(codec, 0x46, 0, 3 << 12); - if (!spec->no_shutup_pins) snd_hda_codec_write(codec, hp_pin, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); From ff646d033783068cc5b38924873cab4a536b17c1 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 5 Aug 2025 14:56:56 +0300 Subject: [PATCH 2139/2411] drm/i915: silence rpm wakeref asserts on GEN11_GU_MISC_IIR access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 8d9908e8fe9c ("drm/i915/display: remove small micro-optimizations in irq handling") not only removed the optimizations, it also enabled wakeref asserts for the GEN11_GU_MISC_IIR access. Silence the asserts by wrapping the access inside intel_display_rpm_assert_{block,unblock}(). Reported-by: "Jason A. Donenfeld" Closes: https://lore.kernel.org/r/aG0tWkfmxWtxl_xc@zx2c4.com Fixes: 8d9908e8fe9c ("drm/i915/display: remove small micro-optimizations in irq handling") Cc: stable@vger.kernel.org # v6.13+ Suggested-by: Ville Syrjälä Reviewed-by: Jouni Högander Link: https://lore.kernel.org/r/20250805115656.832235-1-jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit cbd3baeffbc08052ce7dc53f11bf5524b4411056) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_display_irq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index fb25ec8adae3..68157f177b6a 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -1506,10 +1506,14 @@ u32 gen11_gu_misc_irq_ack(struct intel_display *display, const u32 master_ctl) if (!(master_ctl & GEN11_GU_MISC_IRQ)) return 0; + intel_display_rpm_assert_block(display); + iir = intel_de_read(display, GEN11_GU_MISC_IIR); if (likely(iir)) intel_de_write(display, GEN11_GU_MISC_IIR, iir); + intel_display_rpm_assert_unblock(display); + return iir; } From 8236820fd767f400d1baefb71bc7e36e37730a1e Mon Sep 17 00:00:00 2001 From: Sebastian Brzezinka Date: Mon, 11 Aug 2025 09:12:31 +0000 Subject: [PATCH 2140/2411] drm/i915/gt: Relocate compression repacking WA for JSL/EHL CACHE_MODE_0 registers should be saved and restored as part of the context, not during engine reset. Move the related workaround (Disable Repacking for Compression) from rcs_engine_wa_init() to icl_ctx_workarounds_init() for Jasper Lake and Elkhart Lake platforms. This ensures the WA is applied during context initialisation. BSPEC: 11322 Fixes: 0ddae025ab6c ("drm/i915: Disable compression tricks on JSL") Closes: Fixes: 0ddae025ab6c ("drm/i915: Disable compression tricks on JSL") Signed-off-by: Sebastian Brzezinka Cc: stable@vger.kernel.org # v6.13+ Reviewed-by: Andi Shyti Reviewed-by: Krzysztof Karas Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/4feaa24094e019e000ceb6011d8cd419b0361b3f.1754902406.git.sebastian.brzezinka@intel.com (cherry picked from commit c9932f0d604e4c8f2c6018e598a322acb43c68a2) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b37e400f74e5..5a95f06900b5 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -634,6 +634,8 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { + struct drm_i915_private *i915 = engine->i915; + /* Wa_1406697149 (WaDisableBankHangMode:icl) */ wa_write(wal, GEN8_L3CNTLREG, GEN8_ERRDETBCTRL); @@ -669,6 +671,15 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_1406306137:icl,ehl */ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); + + if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) { + /* + * Disable Repacking for Compression (masked R/W access) + * before rendering compressed surfaces for display. + */ + wa_masked_en(wal, CACHE_MODE_0_GEN7, + DISABLE_REPACKING_FOR_COMPRESSION); + } } /* @@ -2306,15 +2317,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_RC_SEMA_IDLE_MSG_DISABLE); } - if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) { - /* - * "Disable Repacking for Compression (masked R/W access) - * before rendering compressed surfaces for display." - */ - wa_masked_en(wal, CACHE_MODE_0_GEN7, - DISABLE_REPACKING_FOR_COMPRESSION); - } - if (GRAPHICS_VER(i915) == 11) { /* This is not an Wa. Enable for better image quality */ wa_masked_en(wal, From d4932a1b148bb6121121e56bad312c4339042d70 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 19 Aug 2025 10:33:56 +0800 Subject: [PATCH 2141/2411] x86/bugs: Fix GDS mitigation selecting when mitigation is off The current GDS mitigation logic incorrectly returns early when the attack vector mitigation is turned off, which leads to two problems: 1. CPUs without ARCH_CAP_GDS_CTRL support are incorrectly marked with GDS_MITIGATION_OFF when they should be marked as GDS_MITIGATION_UCODE_NEEDED. 2. The mitigation state checks and locking verification that follow are skipped, which means: - fail to detect if the mitigation was locked - miss the warning when trying to disable a locked mitigation Remove the early return to ensure proper mitigation state handling. This allows: - Proper mitigation classification for non-ARCH_CAP_GDS_CTRL CPUs - Complete mitigation state verification This also addresses the failed MSR 0x123 write attempt at boot on non-ARCH_CAP_GDS_CTRL CPUs: unchecked MSR access error: WRMSR to 0x123 (tried to write 0x0000000000000010) at rIP: ... (update_gds_msr) Call Trace: identify_secondary_cpu start_secondary common_startup_64 WARNING: CPU: 1 PID: 0 at arch/x86/kernel/cpu/bugs.c:1053 update_gds_msr [ bp: Massage, zap superfluous braces. ] Fixes: 8c7261abcb7ad ("x86/bugs: Add attack vector controls for GDS") Suggested-by: Pawan Gupta Signed-off-by: Li RongQing Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Pawan Gupta Link: https://lore.kernel.org/20250819023356.2012-1-lirongqing@baidu.com --- arch/x86/kernel/cpu/bugs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2186a771b9fc..49ef1b832c1a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1068,10 +1068,8 @@ static void __init gds_select_mitigation(void) if (gds_mitigation == GDS_MITIGATION_AUTO) { if (should_mitigate_vuln(X86_BUG_GDS)) gds_mitigation = GDS_MITIGATION_FULL; - else { + else gds_mitigation = GDS_MITIGATION_OFF; - return; - } } /* No microcode */ From 62c30c544359aa18b8fb2734166467a07d435c2d Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Thu, 14 Aug 2025 09:25:57 +0800 Subject: [PATCH 2142/2411] net: ethernet: mtk_ppe: add RCU lock around dev_fill_forward_path Ensure ndo_fill_forward_path() is called with RCU lock held. Fixes: 2830e314778d ("net: ethernet: mtk-ppe: fix traffic offload with bridged wlan") Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20250814012559.3705-1-dqfext@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index c855fb799ce1..e9bd32741983 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -101,7 +101,9 @@ mtk_flow_get_wdma_info(struct net_device *dev, const u8 *addr, struct mtk_wdma_i if (!IS_ENABLED(CONFIG_NET_MEDIATEK_SOC_WED)) return -1; + rcu_read_lock(); err = dev_fill_forward_path(dev, addr, &stack); + rcu_read_unlock(); if (err) return err; From 0417adf367a0af11adf7ace849af4638cfb573f7 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Thu, 14 Aug 2025 09:25:58 +0800 Subject: [PATCH 2143/2411] ppp: fix race conditions in ppp_fill_forward_path ppp_fill_forward_path() has two race conditions: 1. The ppp->channels list can change between list_empty() and list_first_entry(), as ppp_lock() is not held. If the only channel is deleted in ppp_disconnect_channel(), list_first_entry() may access an empty head or a freed entry, and trigger a panic. 2. pch->chan can be NULL. When ppp_unregister_channel() is called, pch->chan is set to NULL before pch is removed from ppp->channels. Fix these by using a lockless RCU approach: - Use list_first_or_null_rcu() to safely test and access the first list entry. - Convert list modifications on ppp->channels to their RCU variants and add synchronize_net() after removal. - Check for a NULL pch->chan before dereferencing it. Fixes: f6efc675c9dd ("net: ppp: resolve forwarding path for bridge pppoe devices") Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20250814012559.3705-2-dqfext@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ppp/ppp_generic.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 8c98cbd4b06d..824c8dc4120b 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1598,11 +1599,14 @@ static int ppp_fill_forward_path(struct net_device_path_ctx *ctx, if (ppp->flags & SC_MULTILINK) return -EOPNOTSUPP; - if (list_empty(&ppp->channels)) + pch = list_first_or_null_rcu(&ppp->channels, struct channel, clist); + if (!pch) + return -ENODEV; + + chan = READ_ONCE(pch->chan); + if (!chan) return -ENODEV; - pch = list_first_entry(&ppp->channels, struct channel, clist); - chan = pch->chan; if (!chan->ops->fill_forward_path) return -EOPNOTSUPP; @@ -2994,7 +2998,7 @@ ppp_unregister_channel(struct ppp_channel *chan) */ down_write(&pch->chan_sem); spin_lock_bh(&pch->downl); - pch->chan = NULL; + WRITE_ONCE(pch->chan, NULL); spin_unlock_bh(&pch->downl); up_write(&pch->chan_sem); ppp_disconnect_channel(pch); @@ -3515,7 +3519,7 @@ ppp_connect_channel(struct channel *pch, int unit) hdrlen = pch->file.hdrlen + 2; /* for protocol bytes */ if (hdrlen > ppp->dev->hard_header_len) ppp->dev->hard_header_len = hdrlen; - list_add_tail(&pch->clist, &ppp->channels); + list_add_tail_rcu(&pch->clist, &ppp->channels); ++ppp->n_channels; pch->ppp = ppp; refcount_inc(&ppp->file.refcnt); @@ -3545,10 +3549,11 @@ ppp_disconnect_channel(struct channel *pch) if (ppp) { /* remove it from the ppp unit's list */ ppp_lock(ppp); - list_del(&pch->clist); + list_del_rcu(&pch->clist); if (--ppp->n_channels == 0) wake_up_interruptible(&ppp->file.rwait); ppp_unlock(ppp); + synchronize_net(); if (refcount_dec_and_test(&ppp->file.refcnt)) ppp_destroy_interface(ppp); err = 0; From 01792bc3e5bdafa171dd83c7073f00e7de93a653 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Thu, 14 Aug 2025 16:21:06 +0530 Subject: [PATCH 2144/2411] net: ti: icssg-prueth: Fix HSR and switch offload Enablement during firwmare reload. To enable HSR / Switch offload, certain configurations are needed. Currently they are done inside icssg_change_mode(). This function only gets called if we move from one mode to another without bringing the links up / down. Once in HSR / Switch mode, if we bring the links down and bring it back up again. The callback sequence is, - emac_ndo_stop() Firmwares are stopped - emac_ndo_open() Firmwares are loaded In this path icssg_change_mode() doesn't get called and as a result the configurations needed for HSR / Switch is not done. To fix this, put all these configurations in a separate function icssg_enable_fw_offload() and call this from both icssg_change_mode() and emac_ndo_open() Fixes: 56375086d093 ("net: ti: icssg-prueth: Enable HSR Tx duplication, Tx Tag and Rx Tag offload") Signed-off-by: MD Danish Anwar Link: https://patch.msgid.link/20250814105106.1491871-1-danishanwar@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 72 +++++++++++--------- 1 file changed, 41 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 6c7d776ae4ee..dadce6009791 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -203,6 +203,44 @@ static void prueth_emac_stop(struct prueth *prueth) } } +static void icssg_enable_fw_offload(struct prueth *prueth) +{ + struct prueth_emac *emac; + int mac; + + for (mac = PRUETH_MAC0; mac < PRUETH_NUM_MACS; mac++) { + emac = prueth->emac[mac]; + if (prueth->is_hsr_offload_mode) { + if (emac->ndev->features & NETIF_F_HW_HSR_TAG_RM) + icssg_set_port_state(emac, ICSSG_EMAC_HSR_RX_OFFLOAD_ENABLE); + else + icssg_set_port_state(emac, ICSSG_EMAC_HSR_RX_OFFLOAD_DISABLE); + } + + if (prueth->is_switch_mode || prueth->is_hsr_offload_mode) { + if (netif_running(emac->ndev)) { + icssg_fdb_add_del(emac, eth_stp_addr, prueth->default_vlan, + ICSSG_FDB_ENTRY_P0_MEMBERSHIP | + ICSSG_FDB_ENTRY_P1_MEMBERSHIP | + ICSSG_FDB_ENTRY_P2_MEMBERSHIP | + ICSSG_FDB_ENTRY_BLOCK, + true); + icssg_vtbl_modify(emac, emac->port_vlan | DEFAULT_VID, + BIT(emac->port_id) | DEFAULT_PORT_MASK, + BIT(emac->port_id) | DEFAULT_UNTAG_MASK, + true); + if (prueth->is_hsr_offload_mode) + icssg_vtbl_modify(emac, DEFAULT_VID, + DEFAULT_PORT_MASK, + DEFAULT_UNTAG_MASK, true); + icssg_set_pvid(prueth, emac->port_vlan, emac->port_id); + if (prueth->is_switch_mode) + icssg_set_port_state(emac, ICSSG_EMAC_PORT_VLAN_AWARE_ENABLE); + } + } + } +} + static int prueth_emac_common_start(struct prueth *prueth) { struct prueth_emac *emac; @@ -753,6 +791,7 @@ static int emac_ndo_open(struct net_device *ndev) ret = prueth_emac_common_start(prueth); if (ret) goto free_rx_irq; + icssg_enable_fw_offload(prueth); } flow_cfg = emac->dram.va + ICSSG_CONFIG_OFFSET + PSI_L_REGULAR_FLOW_ID_BASE_OFFSET; @@ -1360,8 +1399,7 @@ static int prueth_emac_restart(struct prueth *prueth) static void icssg_change_mode(struct prueth *prueth) { - struct prueth_emac *emac; - int mac, ret; + int ret; ret = prueth_emac_restart(prueth); if (ret) { @@ -1369,35 +1407,7 @@ static void icssg_change_mode(struct prueth *prueth) return; } - for (mac = PRUETH_MAC0; mac < PRUETH_NUM_MACS; mac++) { - emac = prueth->emac[mac]; - if (prueth->is_hsr_offload_mode) { - if (emac->ndev->features & NETIF_F_HW_HSR_TAG_RM) - icssg_set_port_state(emac, ICSSG_EMAC_HSR_RX_OFFLOAD_ENABLE); - else - icssg_set_port_state(emac, ICSSG_EMAC_HSR_RX_OFFLOAD_DISABLE); - } - - if (netif_running(emac->ndev)) { - icssg_fdb_add_del(emac, eth_stp_addr, prueth->default_vlan, - ICSSG_FDB_ENTRY_P0_MEMBERSHIP | - ICSSG_FDB_ENTRY_P1_MEMBERSHIP | - ICSSG_FDB_ENTRY_P2_MEMBERSHIP | - ICSSG_FDB_ENTRY_BLOCK, - true); - icssg_vtbl_modify(emac, emac->port_vlan | DEFAULT_VID, - BIT(emac->port_id) | DEFAULT_PORT_MASK, - BIT(emac->port_id) | DEFAULT_UNTAG_MASK, - true); - if (prueth->is_hsr_offload_mode) - icssg_vtbl_modify(emac, DEFAULT_VID, - DEFAULT_PORT_MASK, - DEFAULT_UNTAG_MASK, true); - icssg_set_pvid(prueth, emac->port_vlan, emac->port_id); - if (prueth->is_switch_mode) - icssg_set_port_state(emac, ICSSG_EMAC_PORT_VLAN_AWARE_ENABLE); - } - } + icssg_enable_fw_offload(prueth); } static int prueth_netdevice_port_link(struct net_device *ndev, From 70fb252a84a47430240d924528a40e84c2b027e4 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 18 Aug 2025 10:59:07 -0400 Subject: [PATCH 2145/2411] USB: core: Update kerneldoc for usb_hcd_giveback_urb() The kerneldoc added for usb_hcd_giveback_urb() by commit 41631d3616c3 ("usb: core: Replace in_interrupt() in comments") is unclear and incorrect. Update the text for greater clarity and to say that URBs for a root hub will always use a BH context for their completion. Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/41eaae05-116a-4568-940c-eeb94ab6baa0@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 6270fbb5c699..9dd79769cad1 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1717,10 +1717,10 @@ static void usb_giveback_urb_bh(struct work_struct *work) * @urb: urb being returned to the USB device driver. * @status: completion status code for the URB. * - * Context: atomic. The completion callback is invoked in caller's context. - * For HCDs with HCD_BH flag set, the completion callback is invoked in BH - * context (except for URBs submitted to the root hub which always complete in - * caller's context). + * Context: atomic. The completion callback is invoked either in a work queue + * (BH) context or in the caller's context, depending on whether the HCD_BH + * flag is set in the @hcd structure, except that URBs submitted to the + * root hub always complete in BH context. * * This hands the URB from HCD to its USB device driver, using its * completion function. The HCD has freed all per-urb resources From 309b6341d5570fb2b41b923de2fc9bb147106b80 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 18 Aug 2025 18:50:19 +0200 Subject: [PATCH 2146/2411] usb: typec: fusb302: Revert incorrect threaded irq fix The fusb302 irq handler has been carefully optimized by Hans de Goede in commit 207338ec5a27 ("usb: typec: fusb302: Improve suspend/resume handling"). A recent 'fix' undid most of that work to avoid a virtio-gpio driver bug. This reverts the incorrect fix, since it is of very low quality. It reverts the quirks from Hans change (and thus reintroduces the problems fixed by Hans) while keeping the overhead from the original change. The proper fix to support using fusb302 with an interrupt line provided by virtio-gpio must be implemented in the virtio driver instead, which should support disabling the IRQ from the fusb302 interrupt routine. Cc: Hans de Goede Cc: Yongbo Zhang Fixes: 1c2d81bded19 ("usb: typec: fusb302: fix scheduling while atomic when using virtio-gpio") Signed-off-by: Sebastian Reichel Link: https://lore.kernel.org/r/20250818-fusb302-unthreaded-irq-v1-1-3a9a11a9f56f@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/fusb302.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/tcpm/fusb302.c b/drivers/usb/typec/tcpm/fusb302.c index a4ff2403ddd6..870a71f953f6 100644 --- a/drivers/usb/typec/tcpm/fusb302.c +++ b/drivers/usb/typec/tcpm/fusb302.c @@ -1485,6 +1485,9 @@ static irqreturn_t fusb302_irq_intn(int irq, void *dev_id) struct fusb302_chip *chip = dev_id; unsigned long flags; + /* Disable our level triggered IRQ until our irq_work has cleared it */ + disable_irq_nosync(chip->gpio_int_n_irq); + spin_lock_irqsave(&chip->irq_lock, flags); if (chip->irq_suspended) chip->irq_while_suspended = true; @@ -1627,6 +1630,7 @@ static void fusb302_irq_work(struct work_struct *work) } done: mutex_unlock(&chip->lock); + enable_irq(chip->gpio_int_n_irq); } static int init_gpio(struct fusb302_chip *chip) @@ -1751,10 +1755,9 @@ static int fusb302_probe(struct i2c_client *client) goto destroy_workqueue; } - ret = devm_request_threaded_irq(dev, chip->gpio_int_n_irq, - NULL, fusb302_irq_intn, - IRQF_ONESHOT | IRQF_TRIGGER_LOW, - "fsc_interrupt_int_n", chip); + ret = request_irq(chip->gpio_int_n_irq, fusb302_irq_intn, + IRQF_ONESHOT | IRQF_TRIGGER_LOW, + "fsc_interrupt_int_n", chip); if (ret < 0) { dev_err(dev, "cannot request IRQ for GPIO Int_N, ret=%d", ret); goto tcpm_unregister_port; @@ -1779,6 +1782,7 @@ static void fusb302_remove(struct i2c_client *client) struct fusb302_chip *chip = i2c_get_clientdata(client); disable_irq_wake(chip->gpio_int_n_irq); + free_irq(chip->gpio_int_n_irq, chip); cancel_work_sync(&chip->irq_work); cancel_delayed_work_sync(&chip->bc_lvl_handler); tcpm_unregister_port(chip->tcpm_port); From 1ca61060de92a4320d73adfe5dc8d335653907ac Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Wed, 6 Aug 2025 15:06:05 +0800 Subject: [PATCH 2147/2411] fpga: zynq_fpga: Fix the wrong usage of dma_map_sgtable() dma_map_sgtable() returns only 0 or the error code. Read sgt->nents to get the number of mapped segments. Fixes: 37e00703228a ("zynq_fpga: use sgtable-based scatterlist wrappers") Reported-by: Pavel Pisa Closes: https://lore.kernel.org/linux-fpga/202508041548.22955.pisa@fel.cvut.cz/ Reviewed-by: Jason Gunthorpe Reviewed-by: Marek Szyprowski Signed-off-by: Xu Yilun Tested-by: Pavel Pisa Link: https://lore.kernel.org/r/20250806070605.1920909-2-yilun.xu@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/fpga/zynq-fpga.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c index 0be0d569589d..b7629a0e4813 100644 --- a/drivers/fpga/zynq-fpga.c +++ b/drivers/fpga/zynq-fpga.c @@ -405,12 +405,12 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr, struct sg_table *sgt) } } - priv->dma_nelms = - dma_map_sgtable(mgr->dev.parent, sgt, DMA_TO_DEVICE, 0); - if (priv->dma_nelms == 0) { + err = dma_map_sgtable(mgr->dev.parent, sgt, DMA_TO_DEVICE, 0); + if (err) { dev_err(&mgr->dev, "Unable to DMA map (TO_DEVICE)\n"); - return -ENOMEM; + return err; } + priv->dma_nelms = sgt->nents; /* enable clock */ err = clk_enable(priv->clk); From 300a0cfe9f375b2843bcb331bcfa7503475ef5dd Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 6 Aug 2025 11:05:09 +0200 Subject: [PATCH 2148/2411] cdx: Fix off-by-one error in cdx_rpmsg_probe() In cdx_rpmsg_probe(), strscpy() is incorrectly called with the length of the source string (excluding the NUL terminator) rather than the size of the destination buffer. This results in one character less being copied from 'cdx_rpmsg_id_table[0].name' to 'chinfo.name'. Use the destination buffer size instead to ensure the name is copied correctly. Cc: stable Fixes: 2a226927d9b8 ("cdx: add rpmsg communication channel for CDX") Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20250806090512.121260-2-thorsten.blum@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/cdx/controller/cdx_rpmsg.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cdx/controller/cdx_rpmsg.c b/drivers/cdx/controller/cdx_rpmsg.c index 04b578a0be17..61f1a290ff08 100644 --- a/drivers/cdx/controller/cdx_rpmsg.c +++ b/drivers/cdx/controller/cdx_rpmsg.c @@ -129,8 +129,7 @@ static int cdx_rpmsg_probe(struct rpmsg_device *rpdev) chinfo.src = RPMSG_ADDR_ANY; chinfo.dst = rpdev->dst; - strscpy(chinfo.name, cdx_rpmsg_id_table[0].name, - strlen(cdx_rpmsg_id_table[0].name)); + strscpy(chinfo.name, cdx_rpmsg_id_table[0].name, sizeof(chinfo.name)); cdx_mcdi->ept = rpmsg_create_ept(rpdev, cdx_rpmsg_cb, NULL, chinfo); if (!cdx_mcdi->ept) { From 96cb948408b3adb69df7e451ba7da9d21f814d00 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Mon, 7 Jul 2025 20:39:58 +0800 Subject: [PATCH 2149/2411] comedi: pcl726: Prevent invalid irq number The reproducer passed in an irq number(0x80008000) that was too large, which triggered the oob. Added an interrupt number check to prevent users from passing in an irq number that was too large. If `it->options[1]` is 31, then `1 << it->options[1]` is still invalid because it shifts a 1-bit into the sign bit (which is UB in C). Possible solutions include reducing the upper bound on the `it->options[1]` value to 30 or lower, or using `1U << it->options[1]`. The old code would just not attempt to request the IRQ if the `options[1]` value were invalid. And it would still configure the device without interrupts even if the call to `request_irq` returned an error. So it would be better to combine this test with the test below. Fixes: fff46207245c ("staging: comedi: pcl726: enable the interrupt support code") Cc: stable # 5.13+ Reported-by: syzbot+5cd373521edd68bebcb3@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=5cd373521edd68bebcb3 Tested-by: syzbot+5cd373521edd68bebcb3@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Reviewed-by: Ian Abbott Link: https://lore.kernel.org/r/tencent_3C66983CC1369E962436264A50759176BF09@qq.com Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers/pcl726.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/comedi/drivers/pcl726.c b/drivers/comedi/drivers/pcl726.c index 0430630e6ebb..b542896fa0e4 100644 --- a/drivers/comedi/drivers/pcl726.c +++ b/drivers/comedi/drivers/pcl726.c @@ -328,7 +328,8 @@ static int pcl726_attach(struct comedi_device *dev, * Hook up the external trigger source interrupt only if the * user config option is valid and the board supports interrupts. */ - if (it->options[1] && (board->irq_mask & (1 << it->options[1]))) { + if (it->options[1] > 0 && it->options[1] < 16 && + (board->irq_mask & (1U << it->options[1]))) { ret = request_irq(it->options[1], pcl726_interrupt, 0, dev->board_name, dev); if (ret == 0) { From 3cd212e895ca2d58963fdc6422502b10dd3966bb Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 25 Jul 2025 13:53:24 +0100 Subject: [PATCH 2150/2411] comedi: Fix use of uninitialized memory in do_insn_ioctl() and do_insnlist_ioctl() syzbot reports a KMSAN kernel-infoleak in `do_insn_ioctl()`. A kernel buffer is allocated to hold `insn->n` samples (each of which is an `unsigned int`). For some instruction types, `insn->n` samples are copied back to user-space, unless an error code is being returned. The problem is that not all the instruction handlers that need to return data to userspace fill in the whole `insn->n` samples, so that there is an information leak. There is a similar syzbot report for `do_insnlist_ioctl()`, although it does not have a reproducer for it at the time of writing. One culprit is `insn_rw_emulate_bits()` which is used as the handler for `INSN_READ` or `INSN_WRITE` instructions for subdevices that do not have a specific handler for that instruction, but do have an `INSN_BITS` handler. For `INSN_READ` it only fills in at most 1 sample, so if `insn->n` is greater than 1, the remaining `insn->n - 1` samples copied to userspace will be uninitialized kernel data. Another culprit is `vm80xx_ai_insn_read()` in the "vm80xx" driver. It never returns an error, even if it fails to fill the buffer. Fix it in `do_insn_ioctl()` and `do_insnlist_ioctl()` by making sure that uninitialized parts of the allocated buffer are zeroed before handling each instruction. Thanks to Arnaud Lecomte for their fix to `do_insn_ioctl()`. That fix replaced the call to `kmalloc_array()` with `kcalloc()`, but it is not always necessary to clear the whole buffer. Fixes: ed9eccbe8970 ("Staging: add comedi core") Reported-by: syzbot+a5e45f768aab5892da5d@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=a5e45f768aab5892da5d Reported-by: syzbot+fb4362a104d45ab09cf9@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fb4362a104d45ab09cf9 Cc: stable # 5.13+ Cc: Arnaud Lecomte Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20250725125324.80276-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/comedi_fops.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/comedi/comedi_fops.c b/drivers/comedi/comedi_fops.c index 23b7178522ae..7e2f2b1a1c36 100644 --- a/drivers/comedi/comedi_fops.c +++ b/drivers/comedi/comedi_fops.c @@ -1587,6 +1587,9 @@ static int do_insnlist_ioctl(struct comedi_device *dev, memset(&data[n], 0, (MIN_SAMPLES - n) * sizeof(unsigned int)); } + } else { + memset(data, 0, max_t(unsigned int, n, MIN_SAMPLES) * + sizeof(unsigned int)); } ret = parse_insn(dev, insns + i, data, file); if (ret < 0) @@ -1670,6 +1673,8 @@ static int do_insn_ioctl(struct comedi_device *dev, memset(&data[insn->n], 0, (MIN_SAMPLES - insn->n) * sizeof(unsigned int)); } + } else { + memset(data, 0, n_data * sizeof(unsigned int)); } ret = parse_insn(dev, insn, data, file); if (ret < 0) From 7afba9221f70d4cbce0f417c558879cba0eb5e66 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 25 Jul 2025 15:10:34 +0100 Subject: [PATCH 2151/2411] comedi: Make insn_rw_emulate_bits() do insn->n samples The `insn_rw_emulate_bits()` function is used as a default handler for `INSN_READ` instructions for subdevices that have a handler for `INSN_BITS` but not for `INSN_READ`. Similarly, it is used as a default handler for `INSN_WRITE` instructions for subdevices that have a handler for `INSN_BITS` but not for `INSN_WRITE`. It works by emulating the `INSN_READ` or `INSN_WRITE` instruction handling with a constructed `INSN_BITS` instruction. However, `INSN_READ` and `INSN_WRITE` instructions are supposed to be able read or write multiple samples, indicated by the `insn->n` value, but `insn_rw_emulate_bits()` currently only handles a single sample. For `INSN_READ`, the comedi core will copy `insn->n` samples back to user-space. (That triggered KASAN kernel-infoleak errors when `insn->n` was greater than 1, but that is being fixed more generally elsewhere in the comedi core.) Make `insn_rw_emulate_bits()` either handle `insn->n` samples, or return an error, to conform to the general expectation for `INSN_READ` and `INSN_WRITE` handlers. Fixes: ed9eccbe8970 ("Staging: add comedi core") Cc: stable # 5.13+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20250725141034.87297-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/comedi/drivers.c b/drivers/comedi/drivers.c index f1dc854928c1..c9ebaadc5e82 100644 --- a/drivers/comedi/drivers.c +++ b/drivers/comedi/drivers.c @@ -620,11 +620,9 @@ static int insn_rw_emulate_bits(struct comedi_device *dev, unsigned int chan = CR_CHAN(insn->chanspec); unsigned int base_chan = (chan < 32) ? 0 : chan; unsigned int _data[2]; + unsigned int i; int ret; - if (insn->n == 0) - return 0; - memset(_data, 0, sizeof(_data)); memset(&_insn, 0, sizeof(_insn)); _insn.insn = INSN_BITS; @@ -635,18 +633,21 @@ static int insn_rw_emulate_bits(struct comedi_device *dev, if (insn->insn == INSN_WRITE) { if (!(s->subdev_flags & SDF_WRITABLE)) return -EINVAL; - _data[0] = 1U << (chan - base_chan); /* mask */ - _data[1] = data[0] ? (1U << (chan - base_chan)) : 0; /* bits */ + _data[0] = 1U << (chan - base_chan); /* mask */ + } + for (i = 0; i < insn->n; i++) { + if (insn->insn == INSN_WRITE) + _data[1] = data[i] ? _data[0] : 0; /* bits */ + + ret = s->insn_bits(dev, s, &_insn, _data); + if (ret < 0) + return ret; + + if (insn->insn == INSN_READ) + data[i] = (_data[1] >> (chan - base_chan)) & 1; } - ret = s->insn_bits(dev, s, &_insn, _data); - if (ret < 0) - return ret; - - if (insn->insn == INSN_READ) - data[0] = (_data[1] >> (chan - base_chan)) & 1; - - return 1; + return insn->n; } static int __comedi_device_postconfig_async(struct comedi_device *dev, From b47b493d6387ae437098112936f32be27f73516c Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 4 Aug 2025 12:29:55 +0400 Subject: [PATCH 2152/2411] most: core: Drop device reference after usage in get_channel() In get_channel(), the reference obtained by bus_find_device_by_name() was dropped via put_device() before accessing the device's driver data Move put_device() after usage to avoid potential issues. Fixes: 2485055394be ("staging: most: core: drop device reference") Cc: stable Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20250804082955.3621026-1-linmq006@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/most/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/most/core.c b/drivers/most/core.c index a635d5082ebb..da319d108ea1 100644 --- a/drivers/most/core.c +++ b/drivers/most/core.c @@ -538,8 +538,8 @@ static struct most_channel *get_channel(char *mdev, char *mdev_ch) dev = bus_find_device_by_name(&mostbus, NULL, mdev); if (!dev) return NULL; - put_device(dev); iface = dev_get_drvdata(dev); + put_device(dev); list_for_each_entry_safe(c, tmp, &iface->p->channel_list, list) { if (!strcmp(dev_name(&c->dev), mdev_ch)) return c; From 7375f22495e7cd1c5b3b5af9dcc4f6dffe34ce49 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Mon, 11 Aug 2025 22:18:30 +0800 Subject: [PATCH 2153/2411] fs/buffer: fix use-after-free when call bh_read() helper There's issue as follows: BUG: KASAN: stack-out-of-bounds in end_buffer_read_sync+0xe3/0x110 Read of size 8 at addr ffffc9000168f7f8 by task swapper/3/0 CPU: 3 UID: 0 PID: 0 Comm: swapper/3 Not tainted 6.16.0-862.14.0.6.x86_64 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) Call Trace: dump_stack_lvl+0x55/0x70 print_address_description.constprop.0+0x2c/0x390 print_report+0xb4/0x270 kasan_report+0xb8/0xf0 end_buffer_read_sync+0xe3/0x110 end_bio_bh_io_sync+0x56/0x80 blk_update_request+0x30a/0x720 scsi_end_request+0x51/0x2b0 scsi_io_completion+0xe3/0x480 ? scsi_device_unbusy+0x11e/0x160 blk_complete_reqs+0x7b/0x90 handle_softirqs+0xef/0x370 irq_exit_rcu+0xa5/0xd0 sysvec_apic_timer_interrupt+0x6e/0x90 Above issue happens when do ntfs3 filesystem mount, issue may happens as follows: mount IRQ ntfs_fill_super read_cache_page do_read_cache_folio filemap_read_folio mpage_read_folio do_mpage_readpage ntfs_get_block_vbo bh_read submit_bh wait_on_buffer(bh); blk_complete_reqs scsi_io_completion scsi_end_request blk_update_request end_bio_bh_io_sync end_buffer_read_sync __end_buffer_read_notouch unlock_buffer wait_on_buffer(bh);--> return will return to caller put_bh --> trigger stack-out-of-bounds In the mpage_read_folio() function, the stack variable 'map_bh' is passed to ntfs_get_block_vbo(). Once unlock_buffer() unlocks and wait_on_buffer() returns to continue processing, the stack variable is likely to be reclaimed. Consequently, during the end_buffer_read_sync() process, calling put_bh() may result in stack overrun. If the bh is not allocated on the stack, it belongs to a folio. Freeing a buffer head which belongs to a folio is done by drop_buffers() which will fail to free buffers which are still locked. So it is safe to call put_bh() before __end_buffer_read_notouch(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Ye Bin Link: https://lore.kernel.org/20250811141830.343774-1-yebin@huaweicloud.com Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Christian Brauner --- fs/buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/buffer.c b/fs/buffer.c index ead4dc85debd..6a8752f7bbed 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -157,8 +157,8 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate) */ void end_buffer_read_sync(struct buffer_head *bh, int uptodate) { - __end_buffer_read_notouch(bh, uptodate); put_bh(bh); + __end_buffer_read_notouch(bh, uptodate); } EXPORT_SYMBOL(end_buffer_read_sync); From 589c12edcd8a7b3b24f407b58443bab3560125e4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 19 Aug 2025 12:41:15 +0300 Subject: [PATCH 2154/2411] coredump: Fix return value in coredump_parse() The coredump_parse() function is bool type. It should return true on success and false on failure. The cn_printf() returns zero on success or negative error codes. This mismatch means that when "return err;" here, it is treated as success instead of failure. Change it to return false instead. Fixes: a5715af549b2 ("coredump: make coredump_parse() return bool") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/aKRGu14w5vPSZLgv@stanley.mountain Signed-off-by: Christian Brauner --- fs/coredump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/coredump.c b/fs/coredump.c index fedbead956ed..5dce257c67fc 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -345,7 +345,7 @@ static bool coredump_parse(struct core_name *cn, struct coredump_params *cprm, was_space = false; err = cn_printf(cn, "%c", '\0'); if (err) - return err; + return false; (*argv)[(*argc)++] = cn->used; } } From c237aa9884f238e1480897463ca034877ca7530b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 19 Aug 2025 12:08:58 +0200 Subject: [PATCH 2155/2411] kernfs: don't fail listing extended attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace doesn't expect a failure to list extended attributes: $ ls -lA /sys/ ls: /sys/: No data available ls: /sys/kernel: No data available ls: /sys/power: No data available ls: /sys/class: No data available ls: /sys/devices: No data available ls: /sys/dev: No data available ls: /sys/hypervisor: No data available ls: /sys/fs: No data available ls: /sys/bus: No data available ls: /sys/firmware: No data available ls: /sys/block: No data available ls: /sys/module: No data available total 0 drwxr-xr-x 2 root root 0 Jan 1 1970 block drwxr-xr-x 52 root root 0 Jan 1 1970 bus drwxr-xr-x 88 root root 0 Jan 1 1970 class drwxr-xr-x 4 root root 0 Jan 1 1970 dev drwxr-xr-x 11 root root 0 Jan 1 1970 devices drwxr-xr-x 3 root root 0 Jan 1 1970 firmware drwxr-xr-x 10 root root 0 Jan 1 1970 fs drwxr-xr-x 2 root root 0 Jul 2 09:43 hypervisor drwxr-xr-x 14 root root 0 Jan 1 1970 kernel drwxr-xr-x 251 root root 0 Jan 1 1970 module drwxr-xr-x 3 root root 0 Jul 2 09:43 power Fix it by simply reporting success when no extended attributes are available instead of reporting ENODATA. Link: https://lore.kernel.org/78b13bcdae82ade95e88f315682966051f461dde.camel@linaro.org Fixes: d1f4e9026007 ("kernfs: remove iattr_mutex") # mainline only Reported-by: André Draszik Link: https://lore.kernel.org/20250819-ahndung-abgaben-524a535f8101@brauner Signed-off-by: Christian Brauner --- fs/kernfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 3c293a5a21b1..457f91c412d4 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -142,9 +142,9 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size) struct kernfs_node *kn = kernfs_dentry_node(dentry); struct kernfs_iattrs *attrs; - attrs = kernfs_iattrs_noalloc(kn); + attrs = kernfs_iattrs(kn); if (!attrs) - return -ENODATA; + return -ENOMEM; return simple_xattr_list(d_inode(dentry), &attrs->xattrs, buf, size); } From a2c1f82618b0b65f1ef615aa9cfdac8122537d69 Mon Sep 17 00:00:00 2001 From: "Adrian Huang (Lenovo)" Date: Mon, 18 Aug 2025 21:43:10 +0800 Subject: [PATCH 2156/2411] signal: Fix memory leak for PIDFD_SELF* sentinels Commit f08d0c3a7111 ("pidfd: add PIDFD_SELF* sentinels to refer to own thread/process") introduced a leak by acquiring a pid reference through get_task_pid(), which increments pid->count but never drops it with put_pid(). As a result, kmemleak reports unreferenced pid objects after running tools/testing/selftests/pidfd/pidfd_test, for example: unreferenced object 0xff1100206757a940 (size 160): comm "pidfd_test", pid 16965, jiffies 4294853028 hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 fd 57 50 04 .............WP. 5e 44 00 00 00 00 00 00 18 de 34 17 01 00 11 ff ^D........4..... backtrace (crc cd8844d4): kmem_cache_alloc_noprof+0x2f4/0x3f0 alloc_pid+0x54/0x3d0 copy_process+0xd58/0x1740 kernel_clone+0x99/0x3b0 __do_sys_clone3+0xbe/0x100 do_syscall_64+0x7b/0x2c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fix this by calling put_pid() after do_pidfd_send_signal() returns. Fixes: f08d0c3a7111 ("pidfd: add PIDFD_SELF* sentinels to refer to own thread/process") Signed-off-by: Adrian Huang (Lenovo) Link: https://lore.kernel.org/20250818134310.12273-1-adrianhuang0701@gmail.com Tested-by: Lorenzo Stoakes Reviewed-by: Lorenzo Stoakes Signed-off-by: Christian Brauner --- kernel/signal.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/signal.c b/kernel/signal.c index e2c928de7d2c..fe9190d84f28 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -4067,6 +4067,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, { struct pid *pid; enum pid_type type; + int ret; /* Enforce flags be set to 0 until we add an extension. */ if (flags & ~PIDFD_SEND_SIGNAL_FLAGS) @@ -4108,7 +4109,10 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, } } - return do_pidfd_send_signal(pid, sig, type, info, flags); + ret = do_pidfd_send_signal(pid, sig, type, info, flags); + put_pid(pid); + + return ret; } static int From 23800ad1265f10c2bc6f42154ce4d20e59f2900e Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Thu, 14 Aug 2025 13:34:29 -0500 Subject: [PATCH 2157/2411] gpiolib: acpi: Add quirk for ASUS ProArt PX13 The ASUS ProArt PX13 has a spurious wakeup event from the touchpad a few moments after entering hardware sleep. This can be avoided by preventing the touchpad from being a wake source. Add to the wakeup ignore list. Reported-by: Amit Chaudhari Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4482 Tested-by: Amit Chaudhari Signed-off-by: Mario Limonciello (AMD) Reviewed-by: Mika Westerberg Link: https://lore.kernel.org/20250814183430.3887973-1-superm1@kernel.org Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-acpi-quirks.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpio/gpiolib-acpi-quirks.c b/drivers/gpio/gpiolib-acpi-quirks.c index c13545dce349..bfb04e67c4bc 100644 --- a/drivers/gpio/gpiolib-acpi-quirks.c +++ b/drivers/gpio/gpiolib-acpi-quirks.c @@ -344,6 +344,20 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = { .ignore_interrupt = "AMDI0030:00@8", }, }, + { + /* + * Spurious wakeups from TP_ATTN# pin + * Found in BIOS 5.35 + * https://gitlab.freedesktop.org/drm/amd/-/issues/4482 + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_FAMILY, "ProArt PX13"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_wake = "ASCP1A00:00@8", + }, + }, {} /* Terminating entry */ }; From 2eb03376151bb8585caa23ed2673583107bb5193 Mon Sep 17 00:00:00 2001 From: Weitao Wang Date: Tue, 19 Aug 2025 15:58:43 +0300 Subject: [PATCH 2158/2411] usb: xhci: Fix slot_id resource race conflict xHC controller may immediately reuse a slot_id after it's disabled, giving it to a new enumerating device before the xhci driver freed all resources related to the disabled device. In such a scenario, device-A with slot_id equal to 1 is disconnecting while device-B is enumerating, device-B will fail to enumerate in the follow sequence. 1.[device-A] send disable slot command 2.[device-B] send enable slot command 3.[device-A] disable slot command completed and wakeup waiting thread 4.[device-B] enable slot command completed with slot_id equal to 1 and wakeup waiting thread 5.[device-B] driver checks that slot_id is still in use (by device-A) in xhci_alloc_virt_device, and fail to enumerate due to this conflict 6.[device-A] xhci->devs[slot_id] set to NULL in xhci_free_virt_device To fix driver's slot_id resources conflict, clear xhci->devs[slot_id] and xhci->dcbba->dev_context_ptrs[slot_id] pointers in the interrupt context when disable slot command completes successfully. Simultaneously, adjust function xhci_free_virt_device to accurately handle device release. [minor smatch warning and commit message fix -Mathias] Cc: stable@vger.kernel.org Fixes: 7faac1953ed1 ("xhci: avoid race between disable slot command and host runtime suspend") Signed-off-by: Weitao Wang Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250819125844.2042452-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 3 +-- drivers/usb/host/xhci-mem.c | 22 +++++++++++----------- drivers/usb/host/xhci-ring.c | 9 +++++++-- drivers/usb/host/xhci.c | 21 ++++++++++++++------- drivers/usb/host/xhci.h | 3 ++- 5 files changed, 35 insertions(+), 23 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 92bb84f8132a..b3a59ce1b3f4 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -704,8 +704,7 @@ static int xhci_enter_test_mode(struct xhci_hcd *xhci, if (!xhci->devs[i]) continue; - retval = xhci_disable_slot(xhci, i); - xhci_free_virt_device(xhci, i); + retval = xhci_disable_and_free_slot(xhci, i); if (retval) xhci_err(xhci, "Failed to disable slot %d, %d. Enter test mode anyway\n", i, retval); diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 07289333a1e8..81eaad87a3d9 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -865,21 +865,20 @@ int xhci_alloc_tt_info(struct xhci_hcd *xhci, * will be manipulated by the configure endpoint, allocate device, or update * hub functions while this function is removing the TT entries from the list. */ -void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id) +void xhci_free_virt_device(struct xhci_hcd *xhci, struct xhci_virt_device *dev, + int slot_id) { - struct xhci_virt_device *dev; int i; int old_active_eps = 0; /* Slot ID 0 is reserved */ - if (slot_id == 0 || !xhci->devs[slot_id]) + if (slot_id == 0 || !dev) return; - dev = xhci->devs[slot_id]; - - xhci->dcbaa->dev_context_ptrs[slot_id] = 0; - if (!dev) - return; + /* If device ctx array still points to _this_ device, clear it */ + if (dev->out_ctx && + xhci->dcbaa->dev_context_ptrs[slot_id] == cpu_to_le64(dev->out_ctx->dma)) + xhci->dcbaa->dev_context_ptrs[slot_id] = 0; trace_xhci_free_virt_device(dev); @@ -920,8 +919,9 @@ void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id) dev->udev->slot_id = 0; if (dev->rhub_port && dev->rhub_port->slot_id == slot_id) dev->rhub_port->slot_id = 0; - kfree(xhci->devs[slot_id]); - xhci->devs[slot_id] = NULL; + if (xhci->devs[slot_id] == dev) + xhci->devs[slot_id] = NULL; + kfree(dev); } /* @@ -962,7 +962,7 @@ static void xhci_free_virt_devices_depth_first(struct xhci_hcd *xhci, int slot_i out: /* we are now at a leaf device */ xhci_debugfs_remove_slot(xhci, slot_id); - xhci_free_virt_device(xhci, slot_id); + xhci_free_virt_device(xhci, vdev, slot_id); } int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index ecd757d482c5..4f8f5aab109d 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1592,7 +1592,8 @@ static void xhci_handle_cmd_enable_slot(int slot_id, struct xhci_command *comman command->slot_id = 0; } -static void xhci_handle_cmd_disable_slot(struct xhci_hcd *xhci, int slot_id) +static void xhci_handle_cmd_disable_slot(struct xhci_hcd *xhci, int slot_id, + u32 cmd_comp_code) { struct xhci_virt_device *virt_dev; struct xhci_slot_ctx *slot_ctx; @@ -1607,6 +1608,10 @@ static void xhci_handle_cmd_disable_slot(struct xhci_hcd *xhci, int slot_id) if (xhci->quirks & XHCI_EP_LIMIT_QUIRK) /* Delete default control endpoint resources */ xhci_free_device_endpoint_resources(xhci, virt_dev, true); + if (cmd_comp_code == COMP_SUCCESS) { + xhci->dcbaa->dev_context_ptrs[slot_id] = 0; + xhci->devs[slot_id] = NULL; + } } static void xhci_handle_cmd_config_ep(struct xhci_hcd *xhci, int slot_id) @@ -1856,7 +1861,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, xhci_handle_cmd_enable_slot(slot_id, cmd, cmd_comp_code); break; case TRB_DISABLE_SLOT: - xhci_handle_cmd_disable_slot(xhci, slot_id); + xhci_handle_cmd_disable_slot(xhci, slot_id, cmd_comp_code); break; case TRB_CONFIG_EP: if (!cmd->completion) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 47151ca527bf..0e03691f03bf 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3932,8 +3932,7 @@ static int xhci_discover_or_reset_device(struct usb_hcd *hcd, * Obtaining a new device slot to inform the xHCI host that * the USB device has been reset. */ - ret = xhci_disable_slot(xhci, udev->slot_id); - xhci_free_virt_device(xhci, udev->slot_id); + ret = xhci_disable_and_free_slot(xhci, udev->slot_id); if (!ret) { ret = xhci_alloc_dev(hcd, udev); if (ret == 1) @@ -4090,7 +4089,7 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) xhci_disable_slot(xhci, udev->slot_id); spin_lock_irqsave(&xhci->lock, flags); - xhci_free_virt_device(xhci, udev->slot_id); + xhci_free_virt_device(xhci, virt_dev, udev->slot_id); spin_unlock_irqrestore(&xhci->lock, flags); } @@ -4139,6 +4138,16 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) return 0; } +int xhci_disable_and_free_slot(struct xhci_hcd *xhci, u32 slot_id) +{ + struct xhci_virt_device *vdev = xhci->devs[slot_id]; + int ret; + + ret = xhci_disable_slot(xhci, slot_id); + xhci_free_virt_device(xhci, vdev, slot_id); + return ret; +} + /* * Checks if we have enough host controller resources for the default control * endpoint. @@ -4245,8 +4254,7 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev) return 1; disable_slot: - xhci_disable_slot(xhci, udev->slot_id); - xhci_free_virt_device(xhci, udev->slot_id); + xhci_disable_and_free_slot(xhci, udev->slot_id); return 0; } @@ -4382,8 +4390,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, dev_warn(&udev->dev, "Device not responding to setup %s.\n", act); mutex_unlock(&xhci->mutex); - ret = xhci_disable_slot(xhci, udev->slot_id); - xhci_free_virt_device(xhci, udev->slot_id); + ret = xhci_disable_and_free_slot(xhci, udev->slot_id); if (!ret) { if (xhci_alloc_dev(hcd, udev) == 1) xhci_setup_addressable_virt_dev(xhci, udev); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index a20f4e7cd43a..85d5b964bf1e 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1791,7 +1791,7 @@ void xhci_dbg_trace(struct xhci_hcd *xhci, void (*trace)(struct va_format *), /* xHCI memory management */ void xhci_mem_cleanup(struct xhci_hcd *xhci); int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags); -void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id); +void xhci_free_virt_device(struct xhci_hcd *xhci, struct xhci_virt_device *dev, int slot_id); int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, struct usb_device *udev, gfp_t flags); int xhci_setup_addressable_virt_dev(struct xhci_hcd *xhci, struct usb_device *udev); void xhci_copy_ep0_dequeue_into_input_ctx(struct xhci_hcd *xhci, @@ -1888,6 +1888,7 @@ void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev); int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, struct usb_tt *tt, gfp_t mem_flags); int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id); +int xhci_disable_and_free_slot(struct xhci_hcd *xhci, u32 slot_id); int xhci_ext_cap_init(struct xhci_hcd *xhci); int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup); From ff9a09b3e09c7b794b56f2f5858f5ce42ba46cb3 Mon Sep 17 00:00:00 2001 From: Niklas Neronin Date: Tue, 19 Aug 2025 15:58:44 +0300 Subject: [PATCH 2159/2411] usb: xhci: fix host not responding after suspend and resume Partially revert commit e1db856bd288 ("usb: xhci: remove '0' write to write-1-to-clear register") because the patch cleared the Interrupt Pending bit during interrupt enabling and disabling. The Interrupt Pending bit should only be cleared when the driver has handled the interrupt. Ideally, all interrupts should be handled before disabling the interrupt; consequently, no interrupt should be pending when enabling the interrupt. For this reason, keep the debug message informing if an interrupt is still pending when an interrupt is disabled. Because the Interrupt Pending bit is write-1-to-clear, writing '0' to it ensures that the state does not change. Link: https://lore.kernel.org/linux-usb/20250818231103.672ec7ed@foxbook Fixes: e1db856bd288 ("usb: xhci: remove '0' write to write-1-to-clear register") Closes: https://bbs.archlinux.org/viewtopic.php?id=307641 cc: stable@vger.kernel.org # 6.16+ Signed-off-by: Niklas Neronin Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250819125844.2042452-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0e03691f03bf..742c23826e17 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -309,6 +309,7 @@ int xhci_enable_interrupter(struct xhci_interrupter *ir) return -EINVAL; iman = readl(&ir->ir_set->iman); + iman &= ~IMAN_IP; iman |= IMAN_IE; writel(iman, &ir->ir_set->iman); @@ -325,6 +326,7 @@ int xhci_disable_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir) return -EINVAL; iman = readl(&ir->ir_set->iman); + iman &= ~IMAN_IP; iman &= ~IMAN_IE; writel(iman, &ir->ir_set->iman); From 658a1c8e0a66d0777e0e37a11ba19f27a81e77f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Pi=C3=B3rkowski?= Date: Mon, 11 Aug 2025 12:43:57 +0200 Subject: [PATCH 2160/2411] drm/xe: Assign ioctl xe file handler to vm in xe_vm_create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In several code paths, such as xe_pt_create(), the vm->xef field is used to determine whether a VM originates from userspace or the kernel. Previously, this handler was only assigned in xe_vm_create_ioctl(), after the VM was created by xe_vm_create(). However, xe_vm_create() triggers page table creation, and that function assumes vm->xef should be already set. This could lead to incorrect origin detection. To fix this problem and ensure consistency in the initialization of the VM object, let's move the assignment of this handler to xe_vm_create. v2: - take reference to the xe file object only when xef is not NULL - release the reference to the xe file object on the error path (Matthew) Fixes: 7f387e6012b6 ("drm/xe: add XE_BO_FLAG_PINNED_LATE_RESTORE") Signed-off-by: Piotr Piórkowski Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20250811104358.2064150-2-piotr.piorkowski@intel.com Signed-off-by: Michał Winiarski (cherry picked from commit 9337166fa1d80f7bb7c7d3a8f901f21c348c0f2a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_pxp_submit.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 11 ++++++----- drivers/gpu/drm/xe/xe_vm.h | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 7d20ac4bb633..84f412fd3c5d 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -408,7 +408,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) /* Special layout, prepared below.. */ vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION | - XE_VM_FLAG_SET_TILE_ID(tile)); + XE_VM_FLAG_SET_TILE_ID(tile), NULL); if (IS_ERR(vm)) return ERR_CAST(vm); diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.c b/drivers/gpu/drm/xe/xe_pxp_submit.c index d92ec0f515b0..ca95f2a4d4ef 100644 --- a/drivers/gpu/drm/xe/xe_pxp_submit.c +++ b/drivers/gpu/drm/xe/xe_pxp_submit.c @@ -101,7 +101,7 @@ static int allocate_gsc_client_resources(struct xe_gt *gt, xe_assert(xe, hwe); /* PXP instructions must be issued from PPGTT */ - vm = xe_vm_create(xe, XE_VM_FLAG_GSC); + vm = xe_vm_create(xe, XE_VM_FLAG_GSC, NULL); if (IS_ERR(vm)) return PTR_ERR(vm); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 2035604121e6..5bff317e335a 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1640,7 +1640,7 @@ static void xe_vm_free_scratch(struct xe_vm *vm) } } -struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) { struct drm_gem_object *vm_resv_obj; struct xe_vm *vm; @@ -1661,9 +1661,10 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->xe = xe; vm->size = 1ull << xe->info.va_bits; - vm->flags = flags; + if (xef) + vm->xef = xe_file_get(xef); /** * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be * manipulated under the PXP mutex. However, the PXP mutex can be taken @@ -1814,6 +1815,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); + if (vm->xef) + xe_file_put(vm->xef); kfree(vm); if (flags & XE_VM_FLAG_LR_MODE) xe_pm_runtime_put(xe); @@ -2097,7 +2100,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) flags |= XE_VM_FLAG_FAULT_MODE; - vm = xe_vm_create(xe, flags); + vm = xe_vm_create(xe, flags, xef); if (IS_ERR(vm)) return PTR_ERR(vm); @@ -2113,8 +2116,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, vm->usm.asid = asid; } - vm->xef = xe_file_get(xef); - /* Record BO memory for VM pagetable created against client */ for_each_tile(tile, xe, id) if (vm->pt_root[id]) diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 3475a118f666..2f213737c7e5 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -26,7 +26,7 @@ struct xe_sync_entry; struct xe_svm_range; struct drm_exec; -struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags); +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef); struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id); int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node); From 11cd7a5c21db020b8001aedcae27bd3fa9e1e901 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 19 Aug 2025 12:40:41 +0300 Subject: [PATCH 2161/2411] regulator: tps65219: regulator: tps65219: Fix error codes in probe() There is a copy and paste error and we accidentally use "PTR_ERR(rdev)" instead of "error". The "rdev" pointer is valid at this point. Also there is no need to print the error code in the error message because dev_err_probe() already prints that. So clean up the error message a bit. Fixes: 38c9f98db20a ("regulator: tps65219: Add support for TPS65215 Regulator IRQs") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aKRGmVdbvT1HBvm8@stanley.mountain Signed-off-by: Mark Brown --- drivers/regulator/tps65219-regulator.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/regulator/tps65219-regulator.c b/drivers/regulator/tps65219-regulator.c index 5e67fdc88f49..d77ca486879f 100644 --- a/drivers/regulator/tps65219-regulator.c +++ b/drivers/regulator/tps65219-regulator.c @@ -454,9 +454,9 @@ static int tps65219_regulator_probe(struct platform_device *pdev) irq_type->irq_name, irq_data); if (error) - return dev_err_probe(tps->dev, PTR_ERR(rdev), - "Failed to request %s IRQ %d: %d\n", - irq_type->irq_name, irq, error); + return dev_err_probe(tps->dev, error, + "Failed to request %s IRQ %d\n", + irq_type->irq_name, irq); } for (i = 0; i < pmic->dev_irq_size; ++i) { @@ -477,9 +477,9 @@ static int tps65219_regulator_probe(struct platform_device *pdev) irq_type->irq_name, irq_data); if (error) - return dev_err_probe(tps->dev, PTR_ERR(rdev), - "Failed to request %s IRQ %d: %d\n", - irq_type->irq_name, irq, error); + return dev_err_probe(tps->dev, error, + "Failed to request %s IRQ %d\n", + irq_type->irq_name, irq); } return 0; From 0ddfb62f5d018edcb571a3d8ea30ad5332cf2a69 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Aug 2025 15:38:14 -0400 Subject: [PATCH 2162/2411] fix the softlockups in attach_recursive_mnt() In case when we mounting something on top of a large stack of overmounts, all of them being peers of each other, we get quadratic time by the depth of overmount stack. Easily fixed by doing commit_tree() before reparenting the overmount; simplifies commit_tree() as well - it doesn't need to skip the already mounted stuff that had been reparented on top of the new mounts. Since we are holding mount_lock through both reparenting and call of commit_tree(), the order does not matter from the mount hash point of view. Reported-by: "Lai, Yi" Tested-by: "Lai, Yi" Reviewed-by: Christian Brauner Fixes: 663206854f02 "copy_tree(): don't link the mounts via mnt_list" Signed-off-by: Al Viro --- fs/namespace.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index ddfd4457d338..1c97f93d1865 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1197,10 +1197,7 @@ static void commit_tree(struct mount *mnt) if (!mnt_ns_attached(mnt)) { for (struct mount *m = mnt; m; m = next_mnt(m, mnt)) - if (unlikely(mnt_ns_attached(m))) - m = skip_mnt_tree(m); - else - mnt_add_to_ns(n, m); + mnt_add_to_ns(n, m); n->nr_mounts += n->pending_mounts; n->pending_mounts = 0; } @@ -2704,6 +2701,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, lock_mnt_tree(child); q = __lookup_mnt(&child->mnt_parent->mnt, child->mnt_mountpoint); + commit_tree(child); if (q) { struct mountpoint *mp = root.mp; struct mount *r = child; @@ -2713,7 +2711,6 @@ static int attach_recursive_mnt(struct mount *source_mnt, mp = shorter; mnt_change_mountpoint(r, mp, q); } - commit_tree(child); } unpin_mountpoint(&root); unlock_mount_hash(); From da025cdb97a23c1916d8491925b878f3e1de0bca Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 14 Aug 2025 23:32:26 -0400 Subject: [PATCH 2163/2411] propagate_umount(): only surviving overmounts should be reparented ... as the comments in reparent() clearly say. As it is, we reparent *all* overmounts of the mounts being taken out, including those that are taken out themselves. It's not only a potentially massive slowdown (on a pathological setup we might end up with O(N^2) time for N mounts being kicked out), it can end up with incorrect ->overmount in the surviving mounts. Fixes: f0d0ba19985d "Rewrite of propagate_umount()" Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/pnode.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/pnode.c b/fs/pnode.c index 81f7599bdac4..1c789f88b3d2 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -637,10 +637,11 @@ void propagate_umount(struct list_head *set) } // now to_umount consists of all acceptable candidates - // deal with reparenting of remaining overmounts on those + // deal with reparenting of surviving overmounts on those list_for_each_entry(m, &to_umount, mnt_list) { - if (m->overmount) - reparent(m->overmount); + struct mount *over = m->overmount; + if (over && !will_be_unmounted(over)) + reparent(over); } // and fold them into the set From cffd0441872e7f6b1fce5e78fb1c99187a291330 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 14 Aug 2025 01:44:31 -0400 Subject: [PATCH 2164/2411] use uniform permission checks for all mount propagation changes do_change_type() and do_set_group() are operating on different aspects of the same thing - propagation graph. The latter asks for mounts involved to be mounted in namespace(s) the caller has CAP_SYS_ADMIN for. The former is a mess - originally it didn't even check that mount *is* mounted. That got fixed, but the resulting check turns out to be too strict for userland - in effect, we check that mount is in our namespace, having already checked that we have CAP_SYS_ADMIN there. What we really need (in both cases) is * only touch mounts that are mounted. That's a must-have constraint - data corruption happens if it get violated. * don't allow to mess with a namespace unless you already have enough permissions to do so (i.e. CAP_SYS_ADMIN in its userns). That's an equivalent of what do_set_group() does; let's extract that into a helper (may_change_propagation()) and use it in both do_set_group() and do_change_type(). Fixes: 12f147ddd6de "do_change_type(): refuse to operate on unmounted/not ours mounts" Acked-by: Andrei Vagin Reviewed-by: Pavel Tikhomirov Tested-by: Pavel Tikhomirov Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/namespace.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 1c97f93d1865..88db58061919 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2859,6 +2859,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) return attach_recursive_mnt(mnt, p, mp); } +static int may_change_propagation(const struct mount *m) +{ + struct mnt_namespace *ns = m->mnt_ns; + + // it must be mounted in some namespace + if (IS_ERR_OR_NULL(ns)) // is_mounted() + return -EINVAL; + // and the caller must be admin in userns of that namespace + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + return 0; +} + /* * Sanity check the flags to change_mnt_propagation. */ @@ -2895,10 +2908,10 @@ static int do_change_type(struct path *path, int ms_flags) return -EINVAL; namespace_lock(); - if (!check_mnt(mnt)) { - err = -EINVAL; + err = may_change_propagation(mnt); + if (err) goto out_unlock; - } + if (type == MS_SHARED) { err = invent_group_ids(mnt, recurse); if (err) @@ -3344,18 +3357,11 @@ static int do_set_group(struct path *from_path, struct path *to_path) namespace_lock(); - err = -EINVAL; - /* To and From must be mounted */ - if (!is_mounted(&from->mnt)) + err = may_change_propagation(from); + if (err) goto out; - if (!is_mounted(&to->mnt)) - goto out; - - err = -EPERM; - /* We should be allowed to modify mount namespaces of both mounts */ - if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN)) - goto out; - if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN)) + err = may_change_propagation(to); + if (err) goto out; err = -EINVAL; From fb924b7b8669503582e003dd7b7340ee49029801 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Aug 2025 15:23:08 -0400 Subject: [PATCH 2165/2411] change_mnt_propagation(): calculate propagation source only if we'll need it We only need it when mount in question was sending events downstream (then recepients need to switch to new master) or the mount is being turned into slave (then we need a new master for it). That wouldn't be a big deal, except that it causes quite a bit of work when umount_tree() is taking a large peer group out. Adding a trivial "don't bother calling propagation_source() unless we are going to use its results" logics improves the things quite a bit. We are still doing unnecessary work on bulk removals from propagation graph, but the full solution for that will have to wait for the next merge window. Fixes: 955336e204ab "do_make_slave(): choose new master sanely" Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/pnode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/pnode.c b/fs/pnode.c index 1c789f88b3d2..6f7d02f3fa98 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -111,7 +111,8 @@ void change_mnt_propagation(struct mount *mnt, int type) return; } if (IS_MNT_SHARED(mnt)) { - m = propagation_source(mnt); + if (type == MS_SLAVE || !hlist_empty(&mnt->mnt_slave_list)) + m = propagation_source(mnt); if (list_empty(&mnt->mnt_share)) { mnt_release_group_id(mnt); } else { From 453a6d2a68e54a483d67233c6e1e24c4095ee4be Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 19 Aug 2025 16:27:36 +0100 Subject: [PATCH 2166/2411] cifs: Fix oops due to uninitialised variable Fix smb3_init_transform_rq() to initialise buffer to NULL before calling netfs_alloc_folioq_buffer() as netfs assumes it can append to the buffer it is given. Setting it to NULL means it should start a fresh buffer, but the value is currently undefined. Fixes: a2906d3316fc ("cifs: Switch crypto buffer to use a folio_queue rather than an xarray") Signed-off-by: David Howells cc: Steve French cc: Paulo Alcantara cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 3b251de874ec..94b1d7a395d5 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4496,7 +4496,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst, for (int i = 1; i < num_rqst; i++) { struct smb_rqst *old = &old_rq[i - 1]; struct smb_rqst *new = &new_rq[i]; - struct folio_queue *buffer; + struct folio_queue *buffer = NULL; size_t size = iov_iter_count(&old->rq_iter); orig_len += smb_rqst_len(server, old); From 76d2e3890fb169168c73f2e4f8375c7cc24a765e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 16 Aug 2025 07:25:20 -0700 Subject: [PATCH 2167/2411] NFS: Fix a race when updating an existing write After nfs_lock_and_join_requests() tests for whether the request is still attached to the mapping, nothing prevents a call to nfs_inode_remove_request() from succeeding until we actually lock the page group. The reason is that whoever called nfs_inode_remove_request() doesn't necessarily have a lock on the page group head. So in order to avoid races, let's take the page group lock earlier in nfs_lock_and_join_requests(), and hold it across the removal of the request in nfs_inode_remove_request(). Reported-by: Jeff Layton Tested-by: Joe Quanaim Tested-by: Andrew Steffen Reviewed-by: Jeff Layton Fixes: bd37d6fce184 ("NFSv4: Convert nfs_lock_and_join_requests() to use nfs_page_find_head_request()") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 9 +++++---- fs/nfs/write.c | 29 ++++++++++------------------- include/linux/nfs_page.h | 1 + 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 11968dcb7243..6e69ce43a13f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -253,13 +253,14 @@ nfs_page_group_unlock(struct nfs_page *req) nfs_page_clear_headlock(req); } -/* - * nfs_page_group_sync_on_bit_locked +/** + * nfs_page_group_sync_on_bit_locked - Test if all requests have @bit set + * @req: request in page group + * @bit: PG_* bit that is used to sync page group * * must be called with page group lock held */ -static bool -nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) +bool nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) { struct nfs_page *head = req->wb_head; struct nfs_page *tmp; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index fa5c41d0989a..8b7c04737967 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -153,20 +153,10 @@ nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode) } } -static int -nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) +static void nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) { - int ret; - - if (!test_bit(PG_REMOVE, &req->wb_flags)) - return 0; - ret = nfs_page_group_lock(req); - if (ret) - return ret; if (test_and_clear_bit(PG_REMOVE, &req->wb_flags)) nfs_page_set_inode_ref(req, inode); - nfs_page_group_unlock(req); - return 0; } /** @@ -585,19 +575,18 @@ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) } } + ret = nfs_page_group_lock(head); + if (ret < 0) + goto out_unlock; + /* Ensure that nobody removed the request before we locked it */ if (head != folio->private) { + nfs_page_group_unlock(head); nfs_unlock_and_release_request(head); goto retry; } - ret = nfs_cancel_remove_inode(head, inode); - if (ret < 0) - goto out_unlock; - - ret = nfs_page_group_lock(head); - if (ret < 0) - goto out_unlock; + nfs_cancel_remove_inode(head, inode); /* lock each request in the page group */ for (subreq = head->wb_this_page; @@ -786,7 +775,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req)); - if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { + nfs_page_group_lock(req); + if (nfs_page_group_sync_on_bit_locked(req, PG_REMOVE)) { struct folio *folio = nfs_page_to_folio(req->wb_head); struct address_space *mapping = folio->mapping; @@ -798,6 +788,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) } spin_unlock(&mapping->i_private_lock); } + nfs_page_group_unlock(req); if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { atomic_long_dec(&nfsi->nrequests); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 169b4ae30ff4..9aed39abc94b 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -160,6 +160,7 @@ extern void nfs_join_page_group(struct nfs_page *head, extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); +extern bool nfs_page_group_sync_on_bit_locked(struct nfs_page *, unsigned int); extern int nfs_page_set_headlock(struct nfs_page *req); extern void nfs_page_clear_headlock(struct nfs_page *req); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); From d41e327582e172f30e4e15f9124796a10fd1b0f9 Mon Sep 17 00:00:00 2001 From: Devyn Liu Date: Tue, 19 Aug 2025 10:03:49 +0800 Subject: [PATCH 2168/2411] MAINTAINERS: i2c: Update i2c_hisi entry Because Yicong Yang will no longer work on i2c_hisi driver, update the maintainer information for i2c_hisi. Signed-off-by: Devyn Liu Acked-by: Yicong Yang Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250819020349.4027842-1-liudingyuan@h-partners.com --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index daf520a13bdf..9d1270ddcbff 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11013,7 +11013,7 @@ F: Documentation/admin-guide/perf/hns3-pmu.rst F: drivers/perf/hisilicon/hns3_pmu.c HISILICON I2C CONTROLLER DRIVER -M: Yicong Yang +M: Devyn Liu L: linux-i2c@vger.kernel.org S: Maintained W: https://www.hisilicon.com From 57f312b955938fc4663f430cb57a71f2414f601b Mon Sep 17 00:00:00 2001 From: Alex Guo Date: Sun, 10 Aug 2025 20:05:13 +0200 Subject: [PATCH 2169/2411] i2c: rtl9300: Fix out-of-bounds bug in rtl9300_i2c_smbus_xfer The data->block[0] variable comes from user. Without proper check, the variable may be very large to cause an out-of-bounds bug. Fix this bug by checking the value of data->block[0] first. 1. commit 39244cc75482 ("i2c: ismt: Fix an out-of-bounds bug in ismt_access()") 2. commit 92fbb6d1296f ("i2c: xgene-slimpro: Fix out-of-bounds bug in xgene_slimpro_i2c_xfer()") Fixes: c366be720235 ("i2c: Add driver for the RTL9300 I2C controller") Signed-off-by: Alex Guo Cc: # v6.13+ Reviewed-by: Chris Packham Tested-by: Chris Packham Reviewed-by: Wolfram Sang Signed-off-by: Sven Eckelmann Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250810-i2c-rtl9300-multi-byte-v5-1-cd9dca0db722@narfation.org --- drivers/i2c/busses/i2c-rtl9300.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/i2c/busses/i2c-rtl9300.c b/drivers/i2c/busses/i2c-rtl9300.c index e064e8a4a1f0..568495720810 100644 --- a/drivers/i2c/busses/i2c-rtl9300.c +++ b/drivers/i2c/busses/i2c-rtl9300.c @@ -281,6 +281,10 @@ static int rtl9300_i2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, unsigned s ret = rtl9300_i2c_reg_addr_set(i2c, command, 1); if (ret) goto out_unlock; + if (data->block[0] < 1 || data->block[0] > I2C_SMBUS_BLOCK_MAX) { + ret = -EINVAL; + goto out_unlock; + } ret = rtl9300_i2c_config_xfer(i2c, chan, addr, data->block[0]); if (ret) goto out_unlock; From d67b740b9edfa46310355e2b68050f79ebf05a4c Mon Sep 17 00:00:00 2001 From: Harshal Gohel Date: Sun, 10 Aug 2025 20:05:14 +0200 Subject: [PATCH 2170/2411] i2c: rtl9300: Fix multi-byte I2C write The RTL93xx I2C controller has 4 32 bit registers to store the bytes for the upcoming I2C transmission. The first byte is stored in the least-significant byte of the first register. And the last byte in the most significant byte of the last register. A map of the transferred bytes to their order in the registers is: reg 0: 0x04_03_02_01 reg 1: 0x08_07_06_05 reg 2: 0x0c_0b_0a_09 reg 3: 0x10_0f_0e_0d The i2c_read() function basically demonstrates how the hardware would pick up bytes from this register set. But the i2c_write() function was just pushing bytes one after another to the least significant byte of a register AFTER shifting the last one to the next more significant byte position. If you would then have tried to send a buffer with numbers 1-11 using i2c_write(), you would have ended up with following register content: reg 0: 0x01_02_03_04 reg 1: 0x05_06_07_08 reg 2: 0x00_09_0a_0b reg 3: 0x00_00_00_00 On the wire, you would then have seen: Sr Addr Wr [A] 04 A 03 A 02 A 01 A 08 A 07 A 06 A 05 A 0b A 0a A 09 A P But the correct data transmission was expected to be Sr Addr Wr [A] 01 A 02 A 03 A 04 A 05 A 06 A 07 A 08 A 09 A 0a A 0b A P Because of this multi-byte ordering problem, only single byte i2c_write() operations were executed correctly (on the wire). By shifting the byte directly to the correct end position in the register, it is possible to avoid this incorrect byte ordering and fix multi-byte transmissions. The second initialization (to 0) of vals was also be dropped because this array is initialized to 0 on the stack by using `= {};`. This makes the fix a lot more readable. Fixes: c366be720235 ("i2c: Add driver for the RTL9300 I2C controller") Signed-off-by: Harshal Gohel Cc: # v6.13+ Co-developed-by: Sven Eckelmann Signed-off-by: Sven Eckelmann Reviewed-by: Chris Packham Tested-by: Chris Packham Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250810-i2c-rtl9300-multi-byte-v5-2-cd9dca0db722@narfation.org --- drivers/i2c/busses/i2c-rtl9300.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-rtl9300.c b/drivers/i2c/busses/i2c-rtl9300.c index 568495720810..4a538b266080 100644 --- a/drivers/i2c/busses/i2c-rtl9300.c +++ b/drivers/i2c/busses/i2c-rtl9300.c @@ -143,10 +143,10 @@ static int rtl9300_i2c_write(struct rtl9300_i2c *i2c, u8 *buf, int len) return -EIO; for (i = 0; i < len; i++) { - if (i % 4 == 0) - vals[i/4] = 0; - vals[i/4] <<= 8; - vals[i/4] |= buf[i]; + unsigned int shift = (i % 4) * 8; + unsigned int reg = i / 4; + + vals[reg] |= buf[i] << shift; } return regmap_bulk_write(i2c->regmap, i2c->reg_base + RTL9300_I2C_MST_DATA_WORD0, From ceee7776c010c5f09d30985c9e5223b363a6172a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 10 Aug 2025 20:05:15 +0200 Subject: [PATCH 2171/2411] i2c: rtl9300: Increase timeout for transfer polling The timeout for transfers was only set to 2ms. Because of this relatively low limit, 12-byte read operations to the frontend MCU of a RTL8239 POE PSE chip cluster was consistently resulting in a timeout. The original OpenWrt downstream driver [1] was not using any timeout limit at all. This is also possible by setting the timeout_us parameter of regmap_read_poll_timeout() to 0. But since the driver currently implements the ETIMEDOUT error, it is more sensible to increase the timeout in such a way that communication with the (quite common) Realtek I2C-connected POE management solution is possible. [1] https://git.openwrt.org/?p=openwrt/openwrt.git;a=blob;f=target/linux/realtek/files-6.12/drivers/i2c/busses/i2c-rtl9300.c;h=c4d973195ef39dc56d6207e665d279745525fcac#l202 Fixes: c366be720235 ("i2c: Add driver for the RTL9300 I2C controller") Signed-off-by: Sven Eckelmann Cc: # v6.13+ Reviewed-by: Chris Packham Tested-by: Chris Packham Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250810-i2c-rtl9300-multi-byte-v5-3-cd9dca0db722@narfation.org --- drivers/i2c/busses/i2c-rtl9300.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-rtl9300.c b/drivers/i2c/busses/i2c-rtl9300.c index 4a538b266080..4a282d57e2c1 100644 --- a/drivers/i2c/busses/i2c-rtl9300.c +++ b/drivers/i2c/busses/i2c-rtl9300.c @@ -175,7 +175,7 @@ static int rtl9300_i2c_execute_xfer(struct rtl9300_i2c *i2c, char read_write, return ret; ret = regmap_read_poll_timeout(i2c->regmap, i2c->reg_base + RTL9300_I2C_MST_CTRL1, - val, !(val & RTL9300_I2C_MST_CTRL1_I2C_TRIG), 100, 2000); + val, !(val & RTL9300_I2C_MST_CTRL1_I2C_TRIG), 100, 100000); if (ret) return ret; From 82b350dd8185ce790e61555c436f90b6501af23c Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 10 Aug 2025 20:05:16 +0200 Subject: [PATCH 2172/2411] i2c: rtl9300: Add missing count byte for SMBus Block Ops The expected on-wire format of an SMBus Block Write is S Addr Wr [A] Comm [A] Count [A] Data [A] Data [A] ... [A] Data [A] P Everything starting from the Count byte is provided by the I2C subsystem in the array data->block. But the driver was skipping the Count byte (data->block[0]) when sending it to the RTL93xx I2C controller. Only the actual data could be seen on the wire: S Addr Wr [A] Comm [A] Data [A] Data [A] ... [A] Data [A] P This wire format is not SMBus Block Write compatible but matches the format of an I2C Block Write. Simply adding the count byte to the buffer for the I2C controller is enough to fix the transmission. This also affects read because the I2C controller must receive the count byte + $count * data bytes. Fixes: c366be720235 ("i2c: Add driver for the RTL9300 I2C controller") Signed-off-by: Sven Eckelmann Cc: # v6.13+ Reviewed-by: Chris Packham Tested-by: Chris Packham Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250810-i2c-rtl9300-multi-byte-v5-4-cd9dca0db722@narfation.org --- drivers/i2c/busses/i2c-rtl9300.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-rtl9300.c b/drivers/i2c/busses/i2c-rtl9300.c index 4a282d57e2c1..cfafe089102a 100644 --- a/drivers/i2c/busses/i2c-rtl9300.c +++ b/drivers/i2c/busses/i2c-rtl9300.c @@ -285,15 +285,15 @@ static int rtl9300_i2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, unsigned s ret = -EINVAL; goto out_unlock; } - ret = rtl9300_i2c_config_xfer(i2c, chan, addr, data->block[0]); + ret = rtl9300_i2c_config_xfer(i2c, chan, addr, data->block[0] + 1); if (ret) goto out_unlock; if (read_write == I2C_SMBUS_WRITE) { - ret = rtl9300_i2c_write(i2c, &data->block[1], data->block[0]); + ret = rtl9300_i2c_write(i2c, &data->block[0], data->block[0] + 1); if (ret) goto out_unlock; } - len = data->block[0]; + len = data->block[0] + 1; break; default: From 63b17b653df30e90f95338083cb44c35d64bcae4 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Fri, 8 Aug 2025 20:18:02 +0000 Subject: [PATCH 2173/2411] kho: init new_physxa->phys_bits to fix lockdep Patch series "Several KHO Hotfixes". Three unrelated fixes for Kexec Handover. This patch (of 3): Lockdep shows the following warning: INFO: trying to register non-static key. The code is fine but needs lockdep annotation, or maybe you didn't initialize this object before use? turning off the locking correctness validator. [] dump_stack_lvl+0x66/0xa0 [] assign_lock_key+0x10c/0x120 [] register_lock_class+0xf4/0x2f0 [] __lock_acquire+0x7f/0x2c40 [] ? __pfx_hlock_conflict+0x10/0x10 [] ? native_flush_tlb_global+0x8e/0xa0 [] ? __flush_tlb_all+0x4e/0xa0 [] ? __kernel_map_pages+0x112/0x140 [] ? xa_load_or_alloc+0x67/0xe0 [] lock_acquire+0xe6/0x280 [] ? xa_load_or_alloc+0x67/0xe0 [] _raw_spin_lock+0x30/0x40 [] ? xa_load_or_alloc+0x67/0xe0 [] xa_load_or_alloc+0x67/0xe0 [] kho_preserve_folio+0x90/0x100 [] __kho_finalize+0xcf/0x400 [] kho_finalize+0x34/0x70 This is becase xa has its own lock, that is not initialized in xa_load_or_alloc. Modifiy __kho_preserve_order(), to properly call xa_init(&new_physxa->phys_bits); Link: https://lkml.kernel.org/r/20250808201804.772010-2-pasha.tatashin@soleen.com Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation") Signed-off-by: Pasha Tatashin Acked-by: Mike Rapoport (Microsoft) Cc: Alexander Graf Cc: Arnd Bergmann Cc: Baoquan He Cc: Changyuan Lyu Cc: Coiby Xu Cc: Dave Vasilevsky Cc: Eric Biggers Cc: Kees Cook Cc: Pratyush Yadav Cc: Signed-off-by: Andrew Morton --- kernel/kexec_handover.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index e49743ae52c5..65145972d6d6 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -144,14 +144,34 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn, unsigned int order) { struct kho_mem_phys_bits *bits; - struct kho_mem_phys *physxa; + struct kho_mem_phys *physxa, *new_physxa; const unsigned long pfn_high = pfn >> order; might_sleep(); - physxa = xa_load_or_alloc(&track->orders, order, sizeof(*physxa)); - if (IS_ERR(physxa)) - return PTR_ERR(physxa); + physxa = xa_load(&track->orders, order); + if (!physxa) { + int err; + + new_physxa = kzalloc(sizeof(*physxa), GFP_KERNEL); + if (!new_physxa) + return -ENOMEM; + + xa_init(&new_physxa->phys_bits); + physxa = xa_cmpxchg(&track->orders, order, NULL, new_physxa, + GFP_KERNEL); + + err = xa_err(physxa); + if (err || physxa) { + xa_destroy(&new_physxa->phys_bits); + kfree(new_physxa); + + if (err) + return err; + } else { + physxa = new_physxa; + } + } bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS, sizeof(*bits)); From 8b66ed2c3f42cc462e05704af6b94e6a7bad2f5e Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Fri, 8 Aug 2025 20:18:03 +0000 Subject: [PATCH 2174/2411] kho: mm: don't allow deferred struct page with KHO KHO uses struct pages for the preserved memory early in boot, however, with deferred struct page initialization, only a small portion of memory has properly initialized struct pages. This problem was detected where vmemmap is poisoned, and illegal flag combinations are detected. Don't allow them to be enabled together, and later we will have to teach KHO to work properly with deferred struct page init kernel feature. Link: https://lkml.kernel.org/r/20250808201804.772010-3-pasha.tatashin@soleen.com Fixes: 4e1d010e3bda ("kexec: add config option for KHO") Signed-off-by: Pasha Tatashin Acked-by: Mike Rapoport (Microsoft) Acked-by: Pratyush Yadav Cc: Alexander Graf Cc: Arnd Bergmann Cc: Baoquan He Cc: Changyuan Lyu Cc: Coiby Xu Cc: Dave Vasilevsky Cc: Eric Biggers Cc: Kees Cook Cc: Signed-off-by: Andrew Morton --- kernel/Kconfig.kexec | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 2ee603a98813..1224dd937df0 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -97,6 +97,7 @@ config KEXEC_JUMP config KEXEC_HANDOVER bool "kexec handover" depends on ARCH_SUPPORTS_KEXEC_HANDOVER && ARCH_SUPPORTS_KEXEC_FILE + depends on !DEFERRED_STRUCT_PAGE_INIT select MEMBLOCK_KHO_SCRATCH select KEXEC_FILE select DEBUG_FS From 44958f2025ed3f29fc3e93bb1f6c16121d7847ad Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Fri, 8 Aug 2025 20:18:04 +0000 Subject: [PATCH 2175/2411] kho: warn if KHO is disabled due to an error During boot scratch area is allocated based on command line parameters or auto calculated. However, scratch area may fail to allocate, and in that case KHO is disabled. Currently, no warning is printed that KHO is disabled, which makes it confusing for the end user to figure out why KHO is not available. Add the missing warning message. Link: https://lkml.kernel.org/r/20250808201804.772010-4-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Acked-by: Mike Rapoport (Microsoft) Acked-by: Pratyush Yadav Cc: Alexander Graf Cc: Arnd Bergmann Cc: Baoquan He Cc: Changyuan Lyu Cc: Coiby Xu Cc: Dave Vasilevsky Cc: Eric Biggers Cc: Kees Cook Cc: Signed-off-by: Andrew Morton --- kernel/kexec_handover.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index 65145972d6d6..ecd1ac210dbd 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -564,6 +564,7 @@ static void __init kho_reserve_scratch(void) err_free_scratch_desc: memblock_free(kho_scratch, kho_scratch_cnt * sizeof(*kho_scratch)); err_disable_kho: + pr_warn("Failed to reserve scratch area, disabling kexec handover\n"); kho_enable = false; } From b64700d41bdc4e9f82f1346c15a3678ebb91a89c Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Mon, 11 Aug 2025 23:37:40 +0100 Subject: [PATCH 2176/2411] squashfs: fix memory leak in squashfs_fill_super If sb_min_blocksize returns 0, squashfs_fill_super exits without freeing allocated memory (sb->s_fs_info). Fix this by moving the call to sb_min_blocksize to before memory is allocated. Link: https://lkml.kernel.org/r/20250811223740.110392-1-phillip@squashfs.org.uk Fixes: 734aa85390ea ("Squashfs: check return result of sb_min_blocksize") Signed-off-by: Phillip Lougher Reported-by: Scott GUO Closes: https://lore.kernel.org/all/20250811061921.3807353-1-scott_gzh@163.com Cc: Signed-off-by: Andrew Morton --- fs/squashfs/super.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 992ea0e37257..4465cf05603a 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -187,10 +187,15 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) unsigned short flags; unsigned int fragments; u64 lookup_table_start, xattr_id_table_start, next_table; - int err; + int err, devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); TRACE("Entered squashfs_fill_superblock\n"); + if (!devblksize) { + errorf(fc, "squashfs: unable to set blocksize\n"); + return -EINVAL; + } + sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL); if (sb->s_fs_info == NULL) { ERROR("Failed to allocate squashfs_sb_info\n"); @@ -201,12 +206,7 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) msblk->panic_on_errors = (opts->errors == Opt_errors_panic); - msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); - if (!msblk->devblksize) { - errorf(fc, "squashfs: unable to set blocksize\n"); - return -EINVAL; - } - + msblk->devblksize = devblksize; msblk->devblksize_log2 = ffz(~msblk->devblksize); mutex_init(&msblk->meta_index_mutex); From dde30854bddfb5d69f30022b53c5955a41088b33 Mon Sep 17 00:00:00 2001 From: "Herton R. Krzesinski" Date: Thu, 31 Jul 2025 18:40:51 -0300 Subject: [PATCH 2177/2411] mm/debug_vm_pgtable: clear page table entries at destroy_args() The mm/debug_vm_pagetable test allocates manually page table entries for the tests it runs, using also its manually allocated mm_struct. That in itself is ok, but when it exits, at destroy_args() it fails to clear those entries with the *_clear functions. The problem is that leaves stale entries. If another process allocates an mm_struct with a pgd at the same address, it may end up running into the stale entry. This is happening in practice on a debug kernel with CONFIG_DEBUG_VM_PGTABLE=y, for example this is the output with some extra debugging I added (it prints a warning trace if pgtables_bytes goes negative, in addition to the warning at check_mm() function): [ 2.539353] debug_vm_pgtable: [get_random_vaddr ]: random_vaddr is 0x7ea247140000 [ 2.539366] kmem_cache info [ 2.539374] kmem_cachep 0x000000002ce82385 - freelist 0x0000000000000000 - offset 0x508 [ 2.539447] debug_vm_pgtable: [init_args ]: args->mm is 0x000000002267cc9e (...) [ 2.552800] WARNING: CPU: 5 PID: 116 at include/linux/mm.h:2841 free_pud_range+0x8bc/0x8d0 [ 2.552816] Modules linked in: [ 2.552843] CPU: 5 UID: 0 PID: 116 Comm: modprobe Not tainted 6.12.0-105.debug_vm2.el10.ppc64le+debug #1 VOLUNTARY [ 2.552859] Hardware name: IBM,9009-41A POWER9 (architected) 0x4e0202 0xf000005 of:IBM,FW910.00 (VL910_062) hv:phyp pSeries [ 2.552872] NIP: c0000000007eef3c LR: c0000000007eef30 CTR: c0000000003d8c90 [ 2.552885] REGS: c0000000622e73b0 TRAP: 0700 Not tainted (6.12.0-105.debug_vm2.el10.ppc64le+debug) [ 2.552899] MSR: 800000000282b033 CR: 24002822 XER: 0000000a [ 2.552954] CFAR: c0000000008f03f0 IRQMASK: 0 [ 2.552954] GPR00: c0000000007eef30 c0000000622e7650 c000000002b1ac00 0000000000000001 [ 2.552954] GPR04: 0000000000000008 0000000000000000 c0000000007eef30 ffffffffffffffff [ 2.552954] GPR08: 00000000ffff00f5 0000000000000001 0000000000000048 0000000000004000 [ 2.552954] GPR12: 00000003fa440000 c000000017ffa300 c0000000051d9f80 ffffffffffffffdb [ 2.552954] GPR16: 0000000000000000 0000000000000008 000000000000000a 60000000000000e0 [ 2.552954] GPR20: 4080000000000000 c0000000113af038 00007fffcf130000 0000700000000000 [ 2.552954] GPR24: c000000062a6a000 0000000000000001 8000000062a68000 0000000000000001 [ 2.552954] GPR28: 000000000000000a c000000062ebc600 0000000000002000 c000000062ebc760 [ 2.553170] NIP [c0000000007eef3c] free_pud_range+0x8bc/0x8d0 [ 2.553185] LR [c0000000007eef30] free_pud_range+0x8b0/0x8d0 [ 2.553199] Call Trace: [ 2.553207] [c0000000622e7650] [c0000000007eef30] free_pud_range+0x8b0/0x8d0 (unreliable) [ 2.553229] [c0000000622e7750] [c0000000007f40b4] free_pgd_range+0x284/0x3b0 [ 2.553248] [c0000000622e7800] [c0000000007f4630] free_pgtables+0x450/0x570 [ 2.553274] [c0000000622e78e0] [c0000000008161c0] exit_mmap+0x250/0x650 [ 2.553292] [c0000000622e7a30] [c0000000001b95b8] __mmput+0x98/0x290 [ 2.558344] [c0000000622e7a80] [c0000000001d1018] exit_mm+0x118/0x1b0 [ 2.558361] [c0000000622e7ac0] [c0000000001d141c] do_exit+0x2ec/0x870 [ 2.558376] [c0000000622e7b60] [c0000000001d1ca8] do_group_exit+0x88/0x150 [ 2.558391] [c0000000622e7bb0] [c0000000001d1db8] sys_exit_group+0x48/0x50 [ 2.558407] [c0000000622e7be0] [c00000000003d810] system_call_exception+0x1e0/0x4c0 [ 2.558423] [c0000000622e7e50] [c00000000000d05c] system_call_vectored_common+0x15c/0x2ec (...) [ 2.558892] ---[ end trace 0000000000000000 ]--- [ 2.559022] BUG: Bad rss-counter state mm:000000002267cc9e type:MM_ANONPAGES val:1 [ 2.559037] BUG: non-zero pgtables_bytes on freeing mm: -6144 Here the modprobe process ended up with an allocated mm_struct from the mm_struct slab that was used before by the debug_vm_pgtable test. That is not a problem, since the mm_struct is initialized again etc., however, if it ends up using the same pgd table, it bumps into the old stale entry when clearing/freeing the page table entries, so it tries to free an entry already gone (that one which was allocated by the debug_vm_pgtable test), which also explains the negative pgtables_bytes since it's accounting for not allocated entries in the current process. As far as I looked pgd_{alloc,free} etc. does not clear entries, and clearing of the entries is explicitly done in the free_pgtables-> free_pgd_range->free_p4d_range->free_pud_range->free_pmd_range-> free_pte_range path. However, the debug_vm_pgtable test does not call free_pgtables, since it allocates mm_struct and entries manually for its test and eg. not goes through page faults. So it also should clear manually the entries before exit at destroy_args(). This problem was noticed on a reboot X number of times test being done on a powerpc host, with a debug kernel with CONFIG_DEBUG_VM_PGTABLE enabled. Depends on the system, but on a 100 times reboot loop the problem could manifest once or twice, if a process ends up getting the right mm->pgd entry with the stale entries used by mm/debug_vm_pagetable. After using this patch, I couldn't reproduce/experience the problems anymore. I was able to reproduce the problem as well on latest upstream kernel (6.16). I also modified destroy_args() to use mmput() instead of mmdrop(), there is no reason to hold mm_users reference and not release the mm_struct entirely, and in the output above with my debugging prints I already had patched it to use mmput, it did not fix the problem, but helped in the debugging as well. Link: https://lkml.kernel.org/r/20250731214051.4115182-1-herton@redhat.com Fixes: 3c9b84f044a9 ("mm/debug_vm_pgtable: introduce struct pgtable_debug_args") Signed-off-by: Herton R. Krzesinski Cc: Anshuman Khandual Cc: Christophe Leroy Cc: Gavin Shan Cc: Gerald Schaefer Cc: Signed-off-by: Andrew Morton --- mm/debug_vm_pgtable.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index d19031f275a3..830107b6dd08 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -990,29 +990,34 @@ static void __init destroy_args(struct pgtable_debug_args *args) /* Free page table entries */ if (args->start_ptep) { + pmd_clear(args->pmdp); pte_free(args->mm, args->start_ptep); mm_dec_nr_ptes(args->mm); } if (args->start_pmdp) { + pud_clear(args->pudp); pmd_free(args->mm, args->start_pmdp); mm_dec_nr_pmds(args->mm); } if (args->start_pudp) { + p4d_clear(args->p4dp); pud_free(args->mm, args->start_pudp); mm_dec_nr_puds(args->mm); } - if (args->start_p4dp) + if (args->start_p4dp) { + pgd_clear(args->pgdp); p4d_free(args->mm, args->start_p4dp); + } /* Free vma and mm struct */ if (args->vma) vm_area_free(args->vma); if (args->mm) - mmdrop(args->mm); + mmput(args->mm); } static struct page * __init From 9a6a6a3191574a01dcf7a7d9385246d7bc8736bc Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 11 Aug 2025 06:26:54 +0100 Subject: [PATCH 2178/2411] tools/testing: add linux/args.h header and fix radix, VMA tests Commit 857d18f23ab1 ("cleanup: Introduce ACQUIRE() and ACQUIRE_ERR() for conditional locks") accidentally broke the radix tree, VMA userland tests by including linux/args.h which is not present in the tools/include directory. This patch copies this over and adds an #ifdef block to avoid duplicate __CONCAT declaration in conflict with system headers when we ultimately include this. Link: https://lkml.kernel.org/r/20250811052654.33286-1-lorenzo.stoakes@oracle.com Fixes: 857d18f23ab1 ("cleanup: Introduce ACQUIRE() and ACQUIRE_ERR() for conditional locks") Signed-off-by: Lorenzo Stoakes Cc: Jann Horn Cc: John Hubbard Cc: Liam Howlett Cc: Sidhartha Kumar Cc: Vlastimil Babka Cc: Dan Williams Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- tools/include/linux/args.h | 28 ++++++++++++++++++++++++++++ tools/testing/shared/linux/idr.h | 4 ++++ 2 files changed, 32 insertions(+) create mode 100644 tools/include/linux/args.h diff --git a/tools/include/linux/args.h b/tools/include/linux/args.h new file mode 100644 index 000000000000..2e8e65d975c7 --- /dev/null +++ b/tools/include/linux/args.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_ARGS_H +#define _LINUX_ARGS_H + +/* + * How do these macros work? + * + * In __COUNT_ARGS() _0 to _12 are just placeholders from the start + * in order to make sure _n is positioned over the correct number + * from 12 to 0 (depending on X, which is a variadic argument list). + * They serve no purpose other than occupying a position. Since each + * macro parameter must have a distinct identifier, those identifiers + * are as good as any. + * + * In COUNT_ARGS() we use actual integers, so __COUNT_ARGS() returns + * that as _n. + */ + +/* This counts to 15. Any more, it will return 16th argument. */ +#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _n, X...) _n +#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +/* Concatenate two parameters, but allow them to be expanded beforehand. */ +#define __CONCAT(a, b) a ## b +#define CONCATENATE(a, b) __CONCAT(a, b) + +#endif /* _LINUX_ARGS_H */ diff --git a/tools/testing/shared/linux/idr.h b/tools/testing/shared/linux/idr.h index 4e342f2e37cf..676c5564e33f 100644 --- a/tools/testing/shared/linux/idr.h +++ b/tools/testing/shared/linux/idr.h @@ -1 +1,5 @@ +/* Avoid duplicate definitions due to system headers. */ +#ifdef __CONCAT +#undef __CONCAT +#endif #include "../../../../include/linux/idr.h" From 63f5dec16760f2cd7d3f9034d18fc1fa0d83652f Mon Sep 17 00:00:00 2001 From: Sang-Heon Jeon Date: Sun, 10 Aug 2025 21:42:01 +0900 Subject: [PATCH 2179/2411] mm/damon/core: fix commit_ops_filters by using correct nth function damos_commit_ops_filters() incorrectly uses damos_nth_filter() which iterates core_filters. As a result, performing a commit unintentionally corrupts ops_filters. Add damos_nth_ops_filter() which iterates ops_filters. Use this function to fix issues caused by wrong iteration. Link: https://lkml.kernel.org/r/20250810124201.15743-1-ekffu200098@gmail.com Fixes: 3607cc590f18 ("mm/damon/core: support committing ops_filters") # 6.15.x Signed-off-by: Sang-Heon Jeon Reviewed-by: SeongJae Park Cc: Signed-off-by: Andrew Morton --- mm/damon/core.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 52a48c9316bc..467c2d78126f 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -845,6 +845,18 @@ static struct damos_filter *damos_nth_filter(int n, struct damos *s) return NULL; } +static struct damos_filter *damos_nth_ops_filter(int n, struct damos *s) +{ + struct damos_filter *filter; + int i = 0; + + damos_for_each_ops_filter(filter, s) { + if (i++ == n) + return filter; + } + return NULL; +} + static void damos_commit_filter_arg( struct damos_filter *dst, struct damos_filter *src) { @@ -908,7 +920,7 @@ static int damos_commit_ops_filters(struct damos *dst, struct damos *src) int i = 0, j = 0; damos_for_each_ops_filter_safe(dst_filter, next, dst) { - src_filter = damos_nth_filter(i++, src); + src_filter = damos_nth_ops_filter(i++, src); if (src_filter) damos_commit_filter(dst_filter, src_filter); else From 7c91e0b91aaa3fa1f897efb06565af0ceb75195c Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sun, 3 Aug 2025 12:11:21 +0100 Subject: [PATCH 2180/2411] mm/mremap: allow multi-VMA move when filesystem uses thp_get_unmapped_area The multi-VMA move functionality introduced in commit d23cb648e365 ("mm/mremap: permit mremap() move of multiple VMA") doesn't allow moves of file-backed mappings which specify a custom f_op->get_unmapped_area handler excepting hugetlb and shmem. We expand this to include thp_get_unmapped_area to support file-backed mappings for filesystems which use large folios. Additionally, when the first VMA in a range is not compatible with a multi-VMA move, instead of moving the first VMA and returning an error, this series results in us not moving anything and returning an error immediately. Examining this second change in detail: The semantics of multi-VMA moves in mremap() very clearly indicate that a failure can result in a partial move of VMAs. This is in line with other aggregate operations within the kernel, which share these semantics. There are two classes of failures we're concerned with - eligiblity for mutli-VMA move, and transient failures that would occur even if the user individually moved each VMA. The latter is due to out-of-memory conditions (which, given the allocations involved are small, would likely be fatal in any case), or hitting the mapping limit. Regardless of the cause, transient issues would be fatal anyway, so it isn't really material which VMAs succeeded at being moved or not. However with when it comes to multi-VMA move eligiblity, we face another issue - we must allow a single VMA to succeed regardless of this eligiblity (as, of course, it is not a multi-VMA move) - but we must then fail multi-VMA operations. The two means by which VMAs may fail the eligbility test are - the VMAs being UFFD-armed, or the VMA being file-backed and providing its own f_op->get_unmapped_area() helper (because this may result in MREMAP_FIXED being disregarded), excepting those known to correctly handle MREMAP_FIXED. It is therefore conceivable that a user could erroneously try to use this functionality in these instances, and would prefer to not perform any move at all should that occur. This series therefore avoids any move of subsequent VMAs should the first be multi-VMA move ineligble and the input span exceeds that of the first VMA. We also add detailed test logic to assert that multi VMA move with ineligible VMAs functions as expected. This patch (of 3): We currently restrict multi-VMA move to avoid filesystems or drivers which provide a custom f_op->get_unmapped_area handler unless it is known to correctly handle MREMAP_FIXED. We do this so we do not get unexpected result when moving from one area to another (for instance, if the handler would align things resulting in the moved VMAs having different gaps than the original mapping). More and more filesystems are moving to using large folios, and typically do so (in part) by setting f_op->get_unmapped_area to thp_get_unmapped_area. When mremap() invokes the file system's get_unmapped MREMAP_FIXED, it does so via get_unmapped_area(), called in vrm_set_new_addr(). In order to do so, it converts the MREMAP_FIXED flag to a MAP_FIXED flag and passes this to the unmapped area handler. The __get_unmapped_area() function (called by get_unmapped_area()) in turn invokes the filesystem or driver's f_op->get_unmapped_area() handler. Therefore this is a point at which thp_get_unmapped_area() may be called (also, this is the case for anonymous mappings where the size is huge page aligned). thp_get_unmapped_area() calls thp_get_unmapped_area_vmflags() and __thp_get_unmapped_area() in turn (falling back to mm_get_unmapped_area_vm_flags() which is known to handle MAP_FIXED correctly). The __thp_get_unmapped_area() function in turn does nothing to change the address hint, nor the MAP_FIXED flag, only adjusting alignment parameters. It hten calls mm_get_unmapped_area_vmflags(), and in turn arch-specific unmapped area functions, all of which honour MAP_FIXED correctly. Therefore, we can safely add thp_get_unmapped_area to the known-good handlers. Link: https://lkml.kernel.org/r/cover.1754218667.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/4f2542340c29c84d3d470b0c605e916b192f6c81.1754218667.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Cc: David Hildenbrand Cc: Jann Horn Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- mm/mremap.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/mm/mremap.c b/mm/mremap.c index 9afa8cd524f5..eb37d1668770 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1620,7 +1620,7 @@ static void notify_uffd(struct vma_remap_struct *vrm, bool failed) static bool vma_multi_allowed(struct vm_area_struct *vma) { - struct file *file; + struct file *file = vma->vm_file; /* * We can't support moving multiple uffd VMAs as notify requires @@ -1633,15 +1633,17 @@ static bool vma_multi_allowed(struct vm_area_struct *vma) * Custom get unmapped area might result in MREMAP_FIXED not * being obeyed. */ - file = vma->vm_file; - if (file && !vma_is_shmem(vma) && !is_vm_hugetlb_page(vma)) { - const struct file_operations *fop = file->f_op; + if (!file || !file->f_op->get_unmapped_area) + return true; + /* Known good. */ + if (vma_is_shmem(vma)) + return true; + if (is_vm_hugetlb_page(vma)) + return true; + if (file->f_op->get_unmapped_area == thp_get_unmapped_area) + return true; - if (fop->get_unmapped_area) - return false; - } - - return true; + return false; } static int check_prep_vma(struct vma_remap_struct *vrm) From d5f416c7c36456676c2cf5ab98776db2e7601f27 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sun, 3 Aug 2025 12:11:22 +0100 Subject: [PATCH 2181/2411] mm/mremap: catch invalid multi VMA moves earlier Previously, any attempt to solely move a VMA would require that the span specified reside within the span of that single VMA, with no gaps before or afterwards. After commit d23cb648e365 ("mm/mremap: permit mremap() move of multiple VMAs"), the multi VMA move permitted a gap to exist only after VMAs. This was done to provide maximum flexibility. However, We have consequently permitted this behaviour for the move of a single VMA including those not eligible for multi VMA move. The change introduced here means that we no longer permit non-eligible VMAs from being moved in this way. This is consistent, as it means all eligible VMA moves are treated the same, and all non-eligible moves are treated as they were before. This change does not break previous behaviour, which equally would have disallowed such a move (only in all cases). [lorenzo.stoakes@oracle.com: do not incorrectly reference invalid VMA in VM_WARN_ON_ONCE()] Link: https://lkml.kernel.org/r/b6dbda20-667e-4053-abae-8ed4fa84bb6c@lucifer.local Link: https://lkml.kernel.org/r/2b5aad5681573be85b5b8fac61399af6fb6b68b6.1754218667.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Cc: David Hildenbrand Cc: Jann Horn Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- mm/mremap.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/mm/mremap.c b/mm/mremap.c index eb37d1668770..33b642076205 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1820,10 +1820,11 @@ static unsigned long remap_move(struct vma_remap_struct *vrm) unsigned long start = vrm->addr; unsigned long end = vrm->addr + vrm->old_len; unsigned long new_addr = vrm->new_addr; - bool allowed = true, seen_vma = false; unsigned long target_addr = new_addr; unsigned long res = -EFAULT; unsigned long last_end; + bool seen_vma = false; + VMA_ITERATOR(vmi, current->mm, start); /* @@ -1836,9 +1837,7 @@ static unsigned long remap_move(struct vma_remap_struct *vrm) unsigned long addr = max(vma->vm_start, start); unsigned long len = min(end, vma->vm_end) - addr; unsigned long offset, res_vma; - - if (!allowed) - return -EFAULT; + bool multi_allowed; /* No gap permitted at the start of the range. */ if (!seen_vma && start < vma->vm_start) @@ -1867,9 +1866,15 @@ static unsigned long remap_move(struct vma_remap_struct *vrm) vrm->new_addr = target_addr + offset; vrm->old_len = vrm->new_len = len; - allowed = vma_multi_allowed(vma); - if (seen_vma && !allowed) - return -EFAULT; + multi_allowed = vma_multi_allowed(vma); + if (!multi_allowed) { + /* This is not the first VMA, abort immediately. */ + if (seen_vma) + return -EFAULT; + /* This is the first, but there are more, abort. */ + if (vma->vm_end < end) + return -EFAULT; + } res_vma = check_prep_vma(vrm); if (!res_vma) @@ -1878,7 +1883,7 @@ static unsigned long remap_move(struct vma_remap_struct *vrm) return res_vma; if (!seen_vma) { - VM_WARN_ON_ONCE(allowed && res_vma != new_addr); + VM_WARN_ON_ONCE(multi_allowed && res_vma != new_addr); res = res_vma; } From 742d3663a5775cb7b957f4ca2ddb4ccd26badb94 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sun, 3 Aug 2025 12:11:23 +0100 Subject: [PATCH 2182/2411] selftests/mm: add test for invalid multi VMA operations We can use UFFD to easily assert invalid multi VMA moves, so do so, asserting expected behaviour when VMAs invalid for a multi VMA operation are encountered. We assert both that such operations are not permitted, and that we do not even attempt to move the first VMA under these circumstances. We also assert that we can still move a single VMA regardless. We then assert that a partial failure can occur if the invalid VMA appears later in the range of multiple VMAs, both at the very next VMA, and also at the end of the range. As part of this change, we are using the is_range_valid() helper more aggressively. Therefore, fix a bug where stale buffered data would hang around on success, causing subsequent calls to is_range_valid() to potentially give invalid results. We simply have to fflush() the stream on success to resolve this issue. Link: https://lkml.kernel.org/r/c4fb86dd5ba37610583ad5fc0e0c2306ddf318b9.1754218667.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Cc: David Hildenbrand Cc: Jann Horn Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/mremap_test.c | 264 ++++++++++++++++++++++- 1 file changed, 261 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index fccf9e797a0c..5bd52a951cbd 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -5,10 +5,14 @@ #define _GNU_SOURCE #include +#include +#include #include #include #include +#include #include +#include #include #include @@ -168,6 +172,7 @@ static bool is_range_mapped(FILE *maps_fp, unsigned long start, if (first_val <= start && second_val >= end) { success = true; + fflush(maps_fp); break; } } @@ -175,6 +180,15 @@ static bool is_range_mapped(FILE *maps_fp, unsigned long start, return success; } +/* Check if [ptr, ptr + size) mapped in /proc/self/maps. */ +static bool is_ptr_mapped(FILE *maps_fp, void *ptr, unsigned long size) +{ + unsigned long start = (unsigned long)ptr; + unsigned long end = start + size; + + return is_range_mapped(maps_fp, start, end); +} + /* * Returns the start address of the mapping on success, else returns * NULL on failure. @@ -733,6 +747,249 @@ static void mremap_move_multiple_vmas_split(unsigned int pattern_seed, dont_unmap ? " [dontunnmap]" : ""); } +#ifdef __NR_userfaultfd +static void mremap_move_multi_invalid_vmas(FILE *maps_fp, + unsigned long page_size) +{ + char *test_name = "mremap move multiple invalid vmas"; + const size_t size = 10 * page_size; + bool success = true; + char *ptr, *tgt_ptr; + int uffd, err, i; + void *res; + struct uffdio_api api = { + .api = UFFD_API, + .features = UFFD_EVENT_PAGEFAULT, + }; + + uffd = syscall(__NR_userfaultfd, O_NONBLOCK); + if (uffd == -1) { + err = errno; + perror("userfaultfd"); + if (err == EPERM) { + ksft_test_result_skip("%s - missing uffd", test_name); + return; + } + success = false; + goto out; + } + if (ioctl(uffd, UFFDIO_API, &api)) { + perror("ioctl UFFDIO_API"); + success = false; + goto out_close_uffd; + } + + ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_close_uffd; + } + + tgt_ptr = mmap(NULL, size, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (tgt_ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_close_uffd; + } + if (munmap(tgt_ptr, size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + + /* + * Unmap so we end up with: + * + * 0 2 4 6 8 10 offset in buffer + * |*| |*| |*| |*| |*| + * |*| |*| |*| |*| |*| + * + * Additionally, register each with UFFD. + */ + for (i = 0; i < 10; i += 2) { + void *unmap_ptr = &ptr[(i + 1) * page_size]; + unsigned long start = (unsigned long)&ptr[i * page_size]; + struct uffdio_register reg = { + .range = { + .start = start, + .len = page_size, + }, + .mode = UFFDIO_REGISTER_MODE_MISSING, + }; + + if (ioctl(uffd, UFFDIO_REGISTER, ®) == -1) { + perror("ioctl UFFDIO_REGISTER"); + success = false; + goto out_unmap; + } + if (munmap(unmap_ptr, page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + } + + /* + * Now try to move the entire range which is invalid for multi VMA move. + * + * This will fail, and no VMA should be moved, as we check this ahead of + * time. + */ + res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + err = errno; + if (res != MAP_FAILED) { + fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n"); + success = false; + goto out_unmap; + } + if (err != EFAULT) { + errno = err; + perror("mrmeap() unexpected error"); + success = false; + goto out_unmap; + } + if (is_ptr_mapped(maps_fp, tgt_ptr, page_size)) { + fprintf(stderr, + "Invalid uffd-armed VMA at start of multi range moved\n"); + success = false; + goto out_unmap; + } + + /* + * Now try to move a single VMA, this should succeed as not multi VMA + * move. + */ + res = mremap(ptr, page_size, page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + if (res == MAP_FAILED) { + perror("mremap single invalid-multi VMA"); + success = false; + goto out_unmap; + } + + /* + * Unmap the VMA, and remap a non-uffd registered (therefore, multi VMA + * move valid) VMA at the start of ptr range. + */ + if (munmap(tgt_ptr, page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + res = mmap(ptr, page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); + if (res == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_unmap; + } + + /* + * Now try to move the entire range, we should succeed in moving the + * first VMA, but no others, and report a failure. + */ + res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + err = errno; + if (res != MAP_FAILED) { + fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n"); + success = false; + goto out_unmap; + } + if (err != EFAULT) { + errno = err; + perror("mrmeap() unexpected error"); + success = false; + goto out_unmap; + } + if (!is_ptr_mapped(maps_fp, tgt_ptr, page_size)) { + fprintf(stderr, "Valid VMA not moved\n"); + success = false; + goto out_unmap; + } + + /* + * Unmap the VMA, and map valid VMA at start of ptr range, and replace + * all existing multi-move invalid VMAs, except the last, with valid + * multi-move VMAs. + */ + if (munmap(tgt_ptr, page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + if (munmap(ptr, size - 2 * page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + for (i = 0; i < 8; i += 2) { + res = mmap(&ptr[i * page_size], page_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); + if (res == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_unmap; + } + } + + /* + * Now try to move the entire range, we should succeed in moving all but + * the last VMA, and report a failure. + */ + res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + err = errno; + if (res != MAP_FAILED) { + fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n"); + success = false; + goto out_unmap; + } + if (err != EFAULT) { + errno = err; + perror("mrmeap() unexpected error"); + success = false; + goto out_unmap; + } + + for (i = 0; i < 10; i += 2) { + bool is_mapped = is_ptr_mapped(maps_fp, + &tgt_ptr[i * page_size], page_size); + + if (i < 8 && !is_mapped) { + fprintf(stderr, "Valid VMA not moved at %d\n", i); + success = false; + goto out_unmap; + } else if (i == 8 && is_mapped) { + fprintf(stderr, "Invalid VMA moved at %d\n", i); + success = false; + goto out_unmap; + } + } + +out_unmap: + if (munmap(tgt_ptr, size)) + perror("munmap tgt"); + if (munmap(ptr, size)) + perror("munmap src"); +out_close_uffd: + close(uffd); +out: + if (success) + ksft_test_result_pass("%s\n", test_name); + else + ksft_test_result_fail("%s\n", test_name); +} +#else +static void mremap_move_multi_invalid_vmas(FILE *maps_fp, unsigned long page_size) +{ + char *test_name = "mremap move multiple invalid vmas"; + + ksft_test_result_skip("%s - missing uffd", test_name); +} +#endif /* __NR_userfaultfd */ + /* Returns the time taken for the remap on success else returns -1. */ static long long remap_region(struct config c, unsigned int threshold_mb, char *rand_addr) @@ -1074,7 +1331,7 @@ int main(int argc, char **argv) char *rand_addr; size_t rand_size; int num_expand_tests = 2; - int num_misc_tests = 8; + int num_misc_tests = 9; struct test test_cases[MAX_TEST] = {}; struct test perf_test_cases[MAX_PERF_TEST]; int page_size; @@ -1197,8 +1454,6 @@ int main(int argc, char **argv) mremap_expand_merge(maps_fp, page_size); mremap_expand_merge_offset(maps_fp, page_size); - fclose(maps_fp); - mremap_move_within_range(pattern_seed, rand_addr); mremap_move_1mb_from_start(pattern_seed, rand_addr); mremap_shrink_multiple_vmas(page_size, /* inplace= */true); @@ -1207,6 +1462,9 @@ int main(int argc, char **argv) mremap_move_multiple_vmas(pattern_seed, page_size, /* dontunmap= */ true); mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ false); mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ true); + mremap_move_multi_invalid_vmas(maps_fp, page_size); + + fclose(maps_fp); if (run_perf_tests) { ksft_print_msg("\n%s\n", From 8b26f0a8b4f220c1ec410e1350e8594911ec5742 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 12 Aug 2025 18:02:14 +0000 Subject: [PATCH 2183/2411] .mailmap: add entry for Easwar Hariharan Map my old, obsolete work email address to my current one. Link: https://lkml.kernel.org/r/20250812180218.92755-1-easwar.hariharan@linux.microsoft.com Signed-off-by: Easwar Hariharan Cc: Carlos Bilbao Cc: Jarkko Sakkinen Cc: Shannon Nelson Cc: Dmitry Baryshkov Cc: Hans Verkuil Signed-off-by: Andrew Morton --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index d9fa1b555116..a124aeed52a2 100644 --- a/.mailmap +++ b/.mailmap @@ -226,6 +226,8 @@ Domen Puncer Douglas Gilbert Drew Fustini +Easwar Hariharan +Easwar Hariharan Ed L. Cashin Elliot Berman Enric Balletbo i Serra From 0cc2a4880ced1486acc34898cd85edc6ee109c4c Mon Sep 17 00:00:00 2001 From: Sang-Heon Jeon Date: Tue, 12 Aug 2025 23:00:46 +0900 Subject: [PATCH 2184/2411] selftests/damon: fix selftests by installing drgn related script drgn_dump_damon_status is not installed during kselftest setup. It can break other tests which depend on drgn_dump_damon_status. Install drgn_dump_damon_status files to fix broken test. Link: https://lkml.kernel.org/r/20250812140046.660486-1-ekffu200098@gmail.com Fixes: f3e8e1e51362 ("selftests/damon: add drgn script for extracting damon status") Signed-off-by: Sang-Heon Jeon Reviewed-by: SeongJae Park Cc: Alexandre Ghiti Cc: Honggyu Kim Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 5b230deb19e8..9a3499827d4b 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -4,6 +4,7 @@ TEST_GEN_FILES += access_memory access_memory_even TEST_FILES = _damon_sysfs.py +TEST_FILES += drgn_dump_damon_status.py # functionality tests TEST_PROGS += sysfs.sh From 808471ddb0fa785559c3e7aee59be20a13b46ef5 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Wed, 13 Aug 2025 15:04:55 +0900 Subject: [PATCH 2185/2411] iov_iter: iterate_folioq: fix handling of offset >= folio size It's apparently possible to get an iov advanced all the way up to the end of the current page we're looking at, e.g. (gdb) p *iter $24 = {iter_type = 4 '\004', nofault = false, data_source = false, iov_offset = 4096, {__ubuf_iovec = { iov_base = 0xffff88800f5bc000, iov_len = 655}, {{__iov = 0xffff88800f5bc000, kvec = 0xffff88800f5bc000, bvec = 0xffff88800f5bc000, folioq = 0xffff88800f5bc000, xarray = 0xffff88800f5bc000, ubuf = 0xffff88800f5bc000}, count = 655}}, {nr_segs = 2, folioq_slot = 2 '\002', xarray_start = 2}} Where iov_offset is 4k with 4k-sized folios This should have been fine because we're only in the 2nd slot and there's another one after this, but iterate_folioq should not try to map a folio that skips the whole size, and more importantly part here does not end up zero (because 'PAGE_SIZE - skip % PAGE_SIZE' ends up PAGE_SIZE and not zero..), so skip forward to the "advance to next folio" code Link: https://lkml.kernel.org/r/20250813-iot_iter_folio-v3-0-a0ffad2b665a@codewreck.org Link: https://lkml.kernel.org/r/20250813-iot_iter_folio-v3-1-a0ffad2b665a@codewreck.org Signed-off-by: Dominique Martinet Fixes: db0aa2e9566f ("mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios") Reported-by: Maximilian Bosch Reported-by: Ryan Lahfa Reported-by: Christian Theune Reported-by: Arnout Engelen Link: https://lkml.kernel.org/r/D4LHHUNLG79Y.12PI0X6BEHRHW@mbosch.me/ Acked-by: David Howells Cc: Al Viro Cc: Christian Brauner Cc: Matthew Wilcox (Oracle) Cc: [6.12+] Signed-off-by: Andrew Morton --- include/linux/iov_iter.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h index c4aa58032faf..f9a17fbbd398 100644 --- a/include/linux/iov_iter.h +++ b/include/linux/iov_iter.h @@ -160,7 +160,7 @@ size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2 do { struct folio *folio = folioq_folio(folioq, slot); - size_t part, remain, consumed; + size_t part, remain = 0, consumed; size_t fsize; void *base; @@ -168,14 +168,16 @@ size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2 break; fsize = folioq_folio_size(folioq, slot); - base = kmap_local_folio(folio, skip); - part = umin(len, PAGE_SIZE - skip % PAGE_SIZE); - remain = step(base, progress, part, priv, priv2); - kunmap_local(base); - consumed = part - remain; - len -= consumed; - progress += consumed; - skip += consumed; + if (skip < fsize) { + base = kmap_local_folio(folio, skip); + part = umin(len, PAGE_SIZE - skip % PAGE_SIZE); + remain = step(base, progress, part, priv, priv2); + kunmap_local(base); + consumed = part - remain; + len -= consumed; + progress += consumed; + skip += consumed; + } if (skip >= fsize) { skip = 0; slot++; From c7b70f76db0703a93b39a85b540d5b3911e166e8 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Thu, 14 Aug 2025 07:54:54 +0000 Subject: [PATCH 2186/2411] mm: rust: add page.rs to MEMORY MANAGEMENT - RUST The page.rs file currently isn't included anywhere, and I think it's a good fit for the MEMORY MANAGEMENT - RUST entry. The file was originally added for use by Rust Binder, but I believe there is also work to use it in the upcoming scatterlist abstractions. Link: https://lkml.kernel.org/r/20250814075454.1596482-1-aliceryhl@google.com Signed-off-by: Alice Ryhl Acked-by: Danilo Krummrich Cc: Danilo Krummrich Cc: Liam Howlett Cc: "Uladzislau Rezki (Sony)" Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index daf520a13bdf..afd7663c367f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16248,8 +16248,10 @@ S: Maintained W: http://www.linux-mm.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm F: rust/helpers/mm.c +F: rust/helpers/page.c F: rust/kernel/mm.rs F: rust/kernel/mm/ +F: rust/kernel/page.rs MEMORY MAPPING M: Andrew Morton From 44958000badae5488d91431de194f747acc5dcac Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Fri, 15 Aug 2025 14:59:14 -0700 Subject: [PATCH 2187/2411] MAINTAINERS: mark MGLRU as maintained The three folks being added here are actively working on MGLRU within Google, so we can review patches for this feature and plan to contribute some improvements / extensions to it on an ongoing basis. With three of us we may have some hope filling Yu Zhao's shoes, since he has moved on to other projects these days. Link: https://lkml.kernel.org/r/20250815215914.3671925-1-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Cc: Lorenzo Stoakes Cc: Axel Rasmussen Cc: Shakeel Butt Cc: Wei Xu Cc: Yuanchu Xie Cc: Yu Zhao Signed-off-by: Andrew Morton --- MAINTAINERS | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index afd7663c367f..64e6a25b3aa2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16058,6 +16058,23 @@ F: mm/mempolicy.c F: mm/migrate.c F: mm/migrate_device.c +MEMORY MANAGEMENT - MGLRU (MULTI-GEN LRU) +M: Andrew Morton +M: Axel Rasmussen +M: Yuanchu Xie +R: Wei Xu +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: Documentation/admin-guide/mm/multigen_lru.rst +F: Documentation/mm/multigen_lru.rst +F: include/linux/mm_inline.h +F: include/linux/mmzone.h +F: mm/swap.c +F: mm/vmscan.c +F: mm/workingset.c + MEMORY MANAGEMENT - MISC M: Andrew Morton M: David Hildenbrand From 2e6053fea379806269c4f7f5e36b523c9c0fb35c Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Fri, 15 Aug 2025 15:32:09 +0800 Subject: [PATCH 2188/2411] mm/memory-failure: fix infinite UCE for VM_PFNMAP pfn When memory_failure() is called for a already hwpoisoned pfn, kill_accessing_process() will be called to kill current task. However, if the vma of the accessing vaddr is VM_PFNMAP, walk_page_range() will skip the vma in walk_page_test() and return 0. Before commit aaf99ac2ceb7 ("mm/hwpoison: do not send SIGBUS to processes with recovered clean pages"), kill_accessing_process() will return EFAULT. For x86, the current task will be killed in kill_me_maybe(). However, after this commit, kill_accessing_process() simplies return 0, that means UCE is handled properly, but it doesn't actually. In such case, the user task will trigger UCE infinitely. To fix it, add .test_walk callback for hwpoison_walk_ops to scan all vmas. Link: https://lkml.kernel.org/r/20250815073209.1984582-1-tujinjiang@huawei.com Fixes: aaf99ac2ceb7 ("mm/hwpoison: do not send SIGBUS to processes with recovered clean pages") Signed-off-by: Jinjiang Tu Acked-by: David Hildenbrand Acked-by: Miaohe Lin Reviewed-by: Jane Chu Cc: Kefeng Wang Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Shuai Xue Cc: Zi Yan Cc: Signed-off-by: Andrew Morton --- mm/memory-failure.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index e2e685b971bb..fc30ca4804bf 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -853,9 +853,17 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask, #define hwpoison_hugetlb_range NULL #endif +static int hwpoison_test_walk(unsigned long start, unsigned long end, + struct mm_walk *walk) +{ + /* We also want to consider pages mapped into VM_PFNMAP. */ + return 0; +} + static const struct mm_walk_ops hwpoison_walk_ops = { .pmd_entry = hwpoison_pte_range, .hugetlb_entry = hwpoison_hugetlb_range, + .test_walk = hwpoison_test_walk, .walk_lock = PGWALK_RDLOCK, }; From b3dee902b6c26b7d8031a4df19753e27dcfcba01 Mon Sep 17 00:00:00 2001 From: Sang-Heon Jeon Date: Sat, 16 Aug 2025 10:51:16 +0900 Subject: [PATCH 2189/2411] mm/damon/core: fix damos_commit_filter not changing allow Current damos_commit_filter() does not persist the `allow' value of the filter. As a result, changing the `allow' value of a filter and committing doesn't change the `allow' value. Add the missing `allow' value update, so committing the filter persistently changes the `allow' value well. Link: https://lkml.kernel.org/r/20250816015116.194589-1-ekffu200098@gmail.com Fixes: fe6d7fdd6249 ("mm/damon/core: add damos_filter->allow field") Signed-off-by: Sang-Heon Jeon Reviewed-by: SeongJae Park Cc: [6.14.x] Signed-off-by: Andrew Morton --- mm/damon/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/damon/core.c b/mm/damon/core.c index 467c2d78126f..70eff5cbe6ee 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -883,6 +883,7 @@ static void damos_commit_filter( { dst->type = src->type; dst->matching = src->matching; + dst->allow = src->allow; damos_commit_filter_arg(dst, src); } From 053c8ebe74f7e1f4c072e59428da80b9d78bc4b7 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 17 Aug 2025 23:17:59 +0800 Subject: [PATCH 2190/2411] mm/migrate: fix NULL movable_ops if CONFIG_ZSMALLOC=m After commit 84caf98838a3e5f4bdb34 ("mm: stop storing migration_ops in page->mapping") we get such an error message if CONFIG_ZSMALLOC=m: WARNING: CPU: 3 PID: 42 at mm/migrate.c:142 isolate_movable_ops_page+0xa8/0x1c0 CPU: 3 UID: 0 PID: 42 Comm: kcompactd0 Not tainted 6.16.0-rc5+ #2133 PREEMPT pc 9000000000540bd8 ra 9000000000540b84 tp 9000000100420000 sp 9000000100423a60 a0 9000000100193a80 a1 000000000000000c a2 000000000000001b a3 ffffffffffffffff a4 ffffffffffffffff a5 0000000000000267 a6 0000000000000000 a7 9000000100423ae0 t0 00000000000000f1 t1 00000000000000f6 t2 0000000000000000 t3 0000000000000001 t4 ffffff00010eb834 t5 0000000000000040 t6 900000010c89d380 t7 90000000023fcc70 t8 0000000000000018 u0 0000000000000000 s9 ffffff00010eb800 s0 ffffff00010eb800 s1 000000000000000c s2 0000000000043ae0 s3 0000800000000000 s4 900000000219cc40 s5 0000000000000000 s6 ffffff00010eb800 s7 0000000000000001 s8 90000000025b4000 ra: 9000000000540b84 isolate_movable_ops_page+0x54/0x1c0 ERA: 9000000000540bd8 isolate_movable_ops_page+0xa8/0x1c0 CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) PRMD: 00000004 (PPLV0 +PIE -PWE) EUEN: 00000000 (-FPE -SXE -ASXE -BTE) ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) CPU: 3 UID: 0 PID: 42 Comm: kcompactd0 Not tainted 6.16.0-rc5+ #2133 PREEMPT Stack : 90000000021fd000 0000000000000000 9000000000247720 9000000100420000 90000001004236a0 90000001004236a8 0000000000000000 90000001004237e8 90000001004237e0 90000001004237e0 9000000100423550 0000000000000001 0000000000000001 90000001004236a8 725a84864a19e2d9 90000000023fcc58 9000000100420000 90000000024c6848 9000000002416848 0000000000000001 0000000000000000 000000000000000a 0000000007fe0000 ffffff00010eb800 0000000000000000 90000000021fd000 0000000000000000 900000000205cf30 000000000000008e 0000000000000009 ffffff00010eb800 0000000000000001 90000000025b4000 0000000000000000 900000000024773c 00007ffff103d748 00000000000000b0 0000000000000004 0000000000000000 0000000000071c1d ... Call Trace: [<900000000024773c>] show_stack+0x5c/0x190 [<90000000002415e0>] dump_stack_lvl+0x70/0x9c [<90000000004abe6c>] isolate_migratepages_block+0x3bc/0x16e0 [<90000000004af408>] compact_zone+0x558/0x1000 [<90000000004b0068>] compact_node+0xa8/0x1e0 [<90000000004b0aa4>] kcompactd+0x394/0x410 [<90000000002b3c98>] kthread+0x128/0x140 [<9000000001779148>] ret_from_kernel_thread+0x28/0xc0 [<9000000000245528>] ret_from_kernel_thread_asm+0x10/0x88 The reason is that defined(CONFIG_ZSMALLOC) evaluates to 1 only when CONFIG_ZSMALLOC=y, we should use IS_ENABLED(CONFIG_ZSMALLOC) instead. But when I use IS_ENABLED(CONFIG_ZSMALLOC), page_movable_ops() cannot access zsmalloc_mops because zsmalloc_mops is in a module. To solve this problem, we define a set_movable_ops() interface to register and unregister offline_movable_ops / zsmalloc_movable_ops in mm/migrate.c, and call them at mm/balloon_compaction.c & mm/zsmalloc.c. Since offline_movable_ops / zsmalloc_movable_ops are always accessible, all #ifdef / #endif are removed in page_movable_ops(). Link: https://lkml.kernel.org/r/20250817151759.2525174-1-chenhuacai@loongson.cn Fixes: 84caf98838a3 ("mm: stop storing migration_ops in page->mapping") Signed-off-by: Huacai Chen Acked-by: Zi Yan Acked-by: David Hildenbrand Cc: Huacai Chen Cc: Huacai Chen Cc: Lorenzo Stoakes Cc: "Michael S. Tsirkin" Cc: Minchan Kim Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton --- include/linux/migrate.h | 5 +++++ mm/balloon_compaction.c | 6 ++++++ mm/migrate.c | 38 ++++++++++++++++++++++++++++++-------- mm/zsmalloc.c | 10 ++++++++++ 4 files changed, 51 insertions(+), 8 deletions(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index acadd41e0b5c..9009e27b5f44 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -79,6 +79,7 @@ void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl) void folio_migrate_flags(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio, struct folio *folio, int extra_count); +int set_movable_ops(const struct movable_operations *ops, enum pagetype type); #else @@ -100,6 +101,10 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, { return -ENOSYS; } +static inline int set_movable_ops(const struct movable_operations *ops, enum pagetype type) +{ + return -ENOSYS; +} #endif /* CONFIG_MIGRATION */ diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 2a4a649805c1..03c5dbabb156 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -254,4 +254,10 @@ const struct movable_operations balloon_mops = { .putback_page = balloon_page_putback, }; +static int __init balloon_init(void) +{ + return set_movable_ops(&balloon_mops, PGTY_offline); +} +core_initcall(balloon_init); + #endif /* CONFIG_BALLOON_COMPACTION */ diff --git a/mm/migrate.c b/mm/migrate.c index 425401b2d4e1..9e5ef39ce73a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -43,8 +43,6 @@ #include #include #include -#include -#include #include @@ -53,6 +51,33 @@ #include "internal.h" #include "swap.h" +static const struct movable_operations *offline_movable_ops; +static const struct movable_operations *zsmalloc_movable_ops; + +int set_movable_ops(const struct movable_operations *ops, enum pagetype type) +{ + /* + * We only allow for selected types and don't handle concurrent + * registration attempts yet. + */ + switch (type) { + case PGTY_offline: + if (offline_movable_ops && ops) + return -EBUSY; + offline_movable_ops = ops; + break; + case PGTY_zsmalloc: + if (zsmalloc_movable_ops && ops) + return -EBUSY; + zsmalloc_movable_ops = ops; + break; + default: + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL_GPL(set_movable_ops); + static const struct movable_operations *page_movable_ops(struct page *page) { VM_WARN_ON_ONCE_PAGE(!page_has_movable_ops(page), page); @@ -62,15 +87,12 @@ static const struct movable_operations *page_movable_ops(struct page *page) * it as movable, the page type must be sticky until the page gets freed * back to the buddy. */ -#ifdef CONFIG_BALLOON_COMPACTION if (PageOffline(page)) /* Only balloon compaction sets PageOffline pages movable. */ - return &balloon_mops; -#endif /* CONFIG_BALLOON_COMPACTION */ -#if defined(CONFIG_ZSMALLOC) && defined(CONFIG_COMPACTION) + return offline_movable_ops; if (PageZsmalloc(page)) - return &zsmalloc_mops; -#endif /* defined(CONFIG_ZSMALLOC) && defined(CONFIG_COMPACTION) */ + return zsmalloc_movable_ops; + return NULL; } diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 2c5e56a65354..805a10b41266 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -2246,8 +2246,15 @@ EXPORT_SYMBOL_GPL(zs_destroy_pool); static int __init zs_init(void) { + int rc __maybe_unused; + #ifdef CONFIG_ZPOOL zpool_register_driver(&zs_zpool_driver); +#endif +#ifdef CONFIG_COMPACTION + rc = set_movable_ops(&zsmalloc_mops, PGTY_zsmalloc); + if (rc) + return rc; #endif zs_stat_init(); return 0; @@ -2257,6 +2264,9 @@ static void __exit zs_exit(void) { #ifdef CONFIG_ZPOOL zpool_unregister_driver(&zs_zpool_driver); +#endif +#ifdef CONFIG_COMPACTION + set_movable_ops(NULL, PGTY_zsmalloc); #endif zs_stat_exit(); } From ba1dd7ac735d604249f1e614d997dc66b30844ab Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 16 Aug 2025 09:55:59 -0700 Subject: [PATCH 2191/2411] mm/damon/sysfs-schemes: put damos dests dir after removing its files damon_sysfs_scheme_rm_dirs() puts dests directory kobject before removing its internal files. Sincee putting the kobject frees its container struct, and the internal files removal accesses the container, use-after-free happens. Fix it by putting the reference _after_ removing the files. Link: https://lkml.kernel.org/r/20250816165559.2601-1-sj@kernel.org Fixes: 2cd0bf85a203 ("mm/damon/sysfs-schemes: implement DAMOS action destinations directory") Signed-off-by: SeongJae Park Reported-by: Alexandre Ghiti Closes: https://lore.kernel.org/2d39a734-320d-4341-8f8a-4019eec2dbf2@ghiti.fr Tested-by: Alexandre Ghiti Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 74056bcd6a2c..6536f16006c9 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -2158,8 +2158,8 @@ static void damon_sysfs_scheme_rm_dirs(struct damon_sysfs_scheme *scheme) { damon_sysfs_access_pattern_rm_dirs(scheme->access_pattern); kobject_put(&scheme->access_pattern->kobj); - kobject_put(&scheme->dests->kobj); damos_sysfs_dests_rm_dirs(scheme->dests); + kobject_put(&scheme->dests->kobj); damon_sysfs_quotas_rm_dirs(scheme->quotas); kobject_put(&scheme->quotas->kobj); kobject_put(&scheme->watermarks->kobj); From 772e5b4a5e8360743645b9a466842d16092c4f94 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 18 Aug 2025 19:53:58 +0200 Subject: [PATCH 2192/2411] mm/mremap: fix WARN with uffd that has remap events disabled Registering userfaultd on a VMA that spans at least one PMD and then mremap()'ing that VMA can trigger a WARN when recovering from a failed page table move due to a page table allocation error. The code ends up doing the right thing (recurse, avoiding moving actual page tables), but triggering that WARN is unpleasant: WARNING: CPU: 2 PID: 6133 at mm/mremap.c:357 move_normal_pmd mm/mremap.c:357 [inline] WARNING: CPU: 2 PID: 6133 at mm/mremap.c:357 move_pgt_entry mm/mremap.c:595 [inline] WARNING: CPU: 2 PID: 6133 at mm/mremap.c:357 move_page_tables+0x3832/0x44a0 mm/mremap.c:852 Modules linked in: CPU: 2 UID: 0 PID: 6133 Comm: syz.0.19 Not tainted 6.17.0-rc1-syzkaller-00004-g53e760d89498 #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:move_normal_pmd mm/mremap.c:357 [inline] RIP: 0010:move_pgt_entry mm/mremap.c:595 [inline] RIP: 0010:move_page_tables+0x3832/0x44a0 mm/mremap.c:852 Code: ... RSP: 0018:ffffc900037a76d8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000032930007 RCX: ffffffff820c6645 RDX: ffff88802e56a440 RSI: ffffffff820c7201 RDI: 0000000000000007 RBP: ffff888037728fc0 R08: 0000000000000007 R09: 0000000000000000 R10: 0000000032930007 R11: 0000000000000000 R12: 0000000000000000 R13: ffffc900037a79a8 R14: 0000000000000001 R15: dffffc0000000000 FS: 000055556316a500(0000) GS:ffff8880d68bc000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b30863fff CR3: 0000000050171000 CR4: 0000000000352ef0 Call Trace: copy_vma_and_data+0x468/0x790 mm/mremap.c:1215 move_vma+0x548/0x1780 mm/mremap.c:1282 mremap_to+0x1b7/0x450 mm/mremap.c:1406 do_mremap+0xfad/0x1f80 mm/mremap.c:1921 __do_sys_mremap+0x119/0x170 mm/mremap.c:1977 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xcd/0x4c0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f00d0b8ebe9 Code: ... RSP: 002b:00007ffe5ea5ee98 EFLAGS: 00000246 ORIG_RAX: 0000000000000019 RAX: ffffffffffffffda RBX: 00007f00d0db5fa0 RCX: 00007f00d0b8ebe9 RDX: 0000000000400000 RSI: 0000000000c00000 RDI: 0000200000000000 RBP: 00007ffe5ea5eef0 R08: 0000200000c00000 R09: 0000000000000000 R10: 0000000000000003 R11: 0000000000000246 R12: 0000000000000002 R13: 00007f00d0db5fa0 R14: 00007f00d0db5fa0 R15: 0000000000000005 The underlying issue is that we recurse during the original page table move, but not during the recovery move. Fix it by checking for both VMAs and performing the check before the pmd_none() sanity check. Add a new helper where we perform+document that check for the PMD and PUD level. Thanks to Harry for bisecting. Link: https://lkml.kernel.org/r/20250818175358.1184757-1-david@redhat.com Fixes: 0cef0bb836e3 ("mm: clear uffd-wp PTE/PMD state on mremap()") Signed-off-by: David Hildenbrand Reported-by: syzbot+4d9a13f0797c46a29e42@syzkaller.appspotmail.com Closes: https://lkml.kernel.org/r/689bb893.050a0220.7f033.013a.GAE@google.com Tested-by: Harry Yoo Cc: "Liam R. Howlett" Cc: Lorenzo Stoakes Cc: Vlastimil Babka Cc: Jann Horn Cc: Pedro Falcato Cc: Signed-off-by: Andrew Morton --- mm/mremap.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/mm/mremap.c b/mm/mremap.c index 33b642076205..e618a706aff5 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -323,6 +323,25 @@ static inline bool arch_supports_page_table_move(void) } #endif +static inline bool uffd_supports_page_table_move(struct pagetable_move_control *pmc) +{ + /* + * If we are moving a VMA that has uffd-wp registered but with + * remap events disabled (new VMA will not be registered with uffd), we + * need to ensure that the uffd-wp state is cleared from all pgtables. + * This means recursing into lower page tables in move_page_tables(). + * + * We might get called with VMAs reversed when recovering from a + * failed page table move. In that case, the + * "old"-but-actually-"originally new" VMA during recovery will not have + * a uffd context. Recursing into lower page tables during the original + * move but not during the recovery move will cause trouble, because we + * run into already-existing page tables. So check both VMAs. + */ + return !vma_has_uffd_without_event_remap(pmc->old) && + !vma_has_uffd_without_event_remap(pmc->new); +} + #ifdef CONFIG_HAVE_MOVE_PMD static bool move_normal_pmd(struct pagetable_move_control *pmc, pmd_t *old_pmd, pmd_t *new_pmd) @@ -335,6 +354,8 @@ static bool move_normal_pmd(struct pagetable_move_control *pmc, if (!arch_supports_page_table_move()) return false; + if (!uffd_supports_page_table_move(pmc)) + return false; /* * The destination pmd shouldn't be established, free_pgtables() * should have released it. @@ -361,15 +382,6 @@ static bool move_normal_pmd(struct pagetable_move_control *pmc, if (WARN_ON_ONCE(!pmd_none(*new_pmd))) return false; - /* If this pmd belongs to a uffd vma with remap events disabled, we need - * to ensure that the uffd-wp state is cleared from all pgtables. This - * means recursing into lower page tables in move_page_tables(), and we - * can reuse the existing code if we simply treat the entry as "not - * moved". - */ - if (vma_has_uffd_without_event_remap(vma)) - return false; - /* * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. @@ -418,6 +430,8 @@ static bool move_normal_pud(struct pagetable_move_control *pmc, if (!arch_supports_page_table_move()) return false; + if (!uffd_supports_page_table_move(pmc)) + return false; /* * The destination pud shouldn't be established, free_pgtables() * should have released it. @@ -425,15 +439,6 @@ static bool move_normal_pud(struct pagetable_move_control *pmc, if (WARN_ON_ONCE(!pud_none(*new_pud))) return false; - /* If this pud belongs to a uffd vma with remap events disabled, we need - * to ensure that the uffd-wp state is cleared from all pgtables. This - * means recursing into lower page tables in move_page_tables(), and we - * can reuse the existing code if we simply treat the entry as "not - * moved". - */ - if (vma_has_uffd_without_event_remap(vma)) - return false; - /* * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. From 4a73a36cb704813f588af13d9842d0ba5a185758 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Thu, 14 Aug 2025 17:42:14 +0200 Subject: [PATCH 2193/2411] cdc_ncm: Flag Intel OEM version of Fibocom L850-GL as WWAN This lets NetworkManager/ModemManager know that this is a modem and needs to be connected first. Signed-off-by: Lubomir Rintel Link: https://patch.msgid.link/20250814154214.250103-1-lkundrak@v3.sk Signed-off-by: Jakub Kicinski --- drivers/net/usb/cdc_ncm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index ea0e5e276cd6..5d123df0a866 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -2087,6 +2087,13 @@ static const struct usb_device_id cdc_devs[] = { .driver_info = (unsigned long)&wwan_info, }, + /* Intel modem (label from OEM reads Fibocom L850-GL) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x8087, 0x095a, + USB_CLASS_COMM, + USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, + }, + /* DisplayLink docking stations */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_VENDOR, From 26ebba25e210116053609f4c7ee701bffa7ebd7d Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Fri, 8 Aug 2025 12:05:26 +0800 Subject: [PATCH 2194/2411] tools/latency-collector: Check pkg-config install The tool pkg-config used to check libtraceevent and libtracefs, if not installed, it will report the libs not found, even though they have already been installed. Before: libtraceevent is missing. Please install libtraceevent-dev/libtraceevent-devel libtracefs is missing. Please install libtracefs-dev/libtracefs-devel After: Makefile.config:10: *** Error: pkg-config needed by libtraceevent/libtracefs is missing on this system, please install it. Link: https://lore.kernel.org/20250808040527.2036023-1-chen.dylane@linux.dev Fixes: 9d56c88e5225 ("tools/tracing: Use tools/build makefiles on latency-collector") Signed-off-by: Tao Chen Signed-off-by: Steven Rostedt (Google) --- tools/tracing/latency/Makefile.config | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/tracing/latency/Makefile.config b/tools/tracing/latency/Makefile.config index 0fe6b50f029b..6efa13e3ca93 100644 --- a/tools/tracing/latency/Makefile.config +++ b/tools/tracing/latency/Makefile.config @@ -1,7 +1,15 @@ # SPDX-License-Identifier: GPL-2.0-only +include $(srctree)/tools/scripts/utilities.mak + STOP_ERROR := +ifndef ($(NO_LIBTRACEEVENT),1) + ifeq ($(call get-executable,$(PKG_CONFIG)),) + $(error Error: $(PKG_CONFIG) needed by libtraceevent/libtracefs is missing on this system, please install it) + endif +endif + define lib_setup $(eval LIB_INCLUDES += $(shell sh -c "$(PKG_CONFIG) --cflags lib$(1)")) $(eval LDFLAGS += $(shell sh -c "$(PKG_CONFIG) --libs-only-L lib$(1)")) From 7b128f1d53dcaa324d4aa05d821a6bf4a7b203e7 Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Fri, 8 Aug 2025 12:05:27 +0800 Subject: [PATCH 2195/2411] rtla: Check pkg-config install The tool pkg-config used to check libtraceevent and libtracefs, if not installed, it will report the libs not found, even though they have already been installed. Before: libtraceevent is missing. Please install libtraceevent-dev/libtraceevent-devel libtracefs is missing. Please install libtracefs-dev/libtracefs-devel After: Makefile.config:10: *** Error: pkg-config needed by libtraceevent/libtracefs is missing on this system, please install it. Link: https://lore.kernel.org/20250808040527.2036023-2-chen.dylane@linux.dev Fixes: 01474dc706ca ("tools/rtla: Use tools/build makefiles to build rtla") Signed-off-by: Tao Chen Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/Makefile.config | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/tracing/rtla/Makefile.config b/tools/tracing/rtla/Makefile.config index 5f2231d8d626..07ff5e8f3006 100644 --- a/tools/tracing/rtla/Makefile.config +++ b/tools/tracing/rtla/Makefile.config @@ -1,10 +1,18 @@ # SPDX-License-Identifier: GPL-2.0-only +include $(srctree)/tools/scripts/utilities.mak + STOP_ERROR := LIBTRACEEVENT_MIN_VERSION = 1.5 LIBTRACEFS_MIN_VERSION = 1.6 +ifndef ($(NO_LIBTRACEEVENT),1) + ifeq ($(call get-executable,$(PKG_CONFIG)),) + $(error Error: $(PKG_CONFIG) needed by libtraceevent/libtracefs is missing on this system, please install it) + endif +endif + define lib_setup $(eval LIB_INCLUDES += $(shell sh -c "$(PKG_CONFIG) --cflags lib$(1)")) $(eval LDFLAGS += $(shell sh -c "$(PKG_CONFIG) --libs-only-L lib$(1)")) From f179f5bc158f07693b74c264f8933c8b0f07503f Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 15 Aug 2025 10:53:17 -0300 Subject: [PATCH 2196/2411] net/sched: sch_dualpi2: Run prob update timer in softirq to avoid deadlock When a user creates a dualpi2 qdisc it automatically sets a timer. This timer will run constantly and update the qdisc's probability field. The issue is that the timer acquires the qdisc root lock and runs in hardirq. The qdisc root lock is also acquired in dev.c whenever a packet arrives for this qdisc. Since the dualpi2 timer callback runs in hardirq, it may interrupt the packet processing running in softirq. If that happens and it runs on the same CPU, it will acquire the same lock and cause a deadlock. The following splat shows up when running a kernel compiled with lock debugging: [ +0.000224] WARNING: inconsistent lock state [ +0.000224] 6.16.0+ #10 Not tainted [ +0.000169] -------------------------------- [ +0.000029] inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. [ +0.000000] ping/156 [HC0[0]:SC0[2]:HE1:SE0] takes: [ +0.000000] ffff897841242110 (&sch->root_lock_key){?.-.}-{3:3}, at: __dev_queue_xmit+0x86d/0x1140 [ +0.000000] {IN-HARDIRQ-W} state was registered at: [ +0.000000] lock_acquire.part.0+0xb6/0x220 [ +0.000000] _raw_spin_lock+0x31/0x80 [ +0.000000] dualpi2_timer+0x6f/0x270 [ +0.000000] __hrtimer_run_queues+0x1c5/0x360 [ +0.000000] hrtimer_interrupt+0x115/0x260 [ +0.000000] __sysvec_apic_timer_interrupt+0x6d/0x1a0 [ +0.000000] sysvec_apic_timer_interrupt+0x6e/0x80 [ +0.000000] asm_sysvec_apic_timer_interrupt+0x1a/0x20 [ +0.000000] pv_native_safe_halt+0xf/0x20 [ +0.000000] default_idle+0x9/0x10 [ +0.000000] default_idle_call+0x7e/0x1e0 [ +0.000000] do_idle+0x1e8/0x250 [ +0.000000] cpu_startup_entry+0x29/0x30 [ +0.000000] rest_init+0x151/0x160 [ +0.000000] start_kernel+0x6f3/0x700 [ +0.000000] x86_64_start_reservations+0x24/0x30 [ +0.000000] x86_64_start_kernel+0xc8/0xd0 [ +0.000000] common_startup_64+0x13e/0x148 [ +0.000000] irq event stamp: 6884 [ +0.000000] hardirqs last enabled at (6883): [] neigh_resolve_output+0x223/0x270 [ +0.000000] hardirqs last disabled at (6882): [] neigh_resolve_output+0x1e8/0x270 [ +0.000000] softirqs last enabled at (6880): [] neigh_resolve_output+0x1db/0x270 [ +0.000000] softirqs last disabled at (6884): [] __dev_queue_xmit+0x73/0x1140 [ +0.000000] other info that might help us debug this: [ +0.000000] Possible unsafe locking scenario: [ +0.000000] CPU0 [ +0.000000] ---- [ +0.000000] lock(&sch->root_lock_key); [ +0.000000] [ +0.000000] lock(&sch->root_lock_key); [ +0.000000] *** DEADLOCK *** [ +0.000000] 4 locks held by ping/156: [ +0.000000] #0: ffff897842332e08 (sk_lock-AF_INET){+.+.}-{0:0}, at: raw_sendmsg+0x41e/0xf40 [ +0.000000] #1: ffffffffa816f880 (rcu_read_lock){....}-{1:3}, at: ip_output+0x2c/0x190 [ +0.000000] #2: ffffffffa816f880 (rcu_read_lock){....}-{1:3}, at: ip_finish_output2+0xad/0x950 [ +0.000000] #3: ffffffffa816f840 (rcu_read_lock_bh){....}-{1:3}, at: __dev_queue_xmit+0x73/0x1140 I am able to reproduce it consistently when running the following: tc qdisc add dev lo handle 1: root dualpi2 ping -f 127.0.0.1 To fix it, make the timer run in softirq. Fixes: 320d031ad6e4 ("sched: Struct definition and parsing of dualpi2 qdisc") Reviewed-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250815135317.664993-1-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/sch_dualpi2.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_dualpi2.c b/net/sched/sch_dualpi2.c index 845375ebd4ea..4b975feb52b1 100644 --- a/net/sched/sch_dualpi2.c +++ b/net/sched/sch_dualpi2.c @@ -927,7 +927,8 @@ static int dualpi2_init(struct Qdisc *sch, struct nlattr *opt, q->sch = sch; dualpi2_reset_default(sch); - hrtimer_setup(&q->pi2_timer, dualpi2_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); + hrtimer_setup(&q->pi2_timer, dualpi2_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS_PINNED_SOFT); if (opt && nla_len(opt)) { err = dualpi2_change(sch, opt, extack); @@ -937,7 +938,7 @@ static int dualpi2_init(struct Qdisc *sch, struct nlattr *opt, } hrtimer_start(&q->pi2_timer, next_pi2_timeout(q), - HRTIMER_MODE_ABS_PINNED); + HRTIMER_MODE_ABS_PINNED_SOFT); return 0; } From bc1a59cff9f797bfbf8f3104507584d89e9ecf2e Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 18 Aug 2025 10:10:29 +0200 Subject: [PATCH 2197/2411] phy: mscc: Fix timestamping for vsc8584 There was a problem when we received frames and the frames were timestamped. The driver is configured to store the nanosecond part of the timestmap in the ptp reserved bits and it would take the second part by reading the LTC. The problem is that when reading the LTC we are in atomic context and to read the second part will go over mdio bus which might sleep, so we get an error. The fix consists in actually put all the frames in a queue and start the aux work and in that work to read the LTC and then calculate the full received time. Fixes: 7d272e63e0979d ("net: phy: mscc: timestamping and PHC support") Signed-off-by: Horatiu Vultur Reviewed-by: Vadim Fedorenko Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250818081029.1300780-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mscc/mscc.h | 12 ++++++++ drivers/net/phy/mscc/mscc_main.c | 12 ++++++++ drivers/net/phy/mscc/mscc_ptp.c | 49 ++++++++++++++++++++++++-------- 3 files changed, 61 insertions(+), 12 deletions(-) diff --git a/drivers/net/phy/mscc/mscc.h b/drivers/net/phy/mscc/mscc.h index 6a3d8a754eb8..58c6d47fbe04 100644 --- a/drivers/net/phy/mscc/mscc.h +++ b/drivers/net/phy/mscc/mscc.h @@ -362,6 +362,13 @@ struct vsc85xx_hw_stat { u16 mask; }; +struct vsc8531_skb_cb { + u32 ns; +}; + +#define VSC8531_SKB_CB(skb) \ + ((struct vsc8531_skb_cb *)((skb)->cb)) + struct vsc8531_private { int rate_magic; u16 supp_led_modes; @@ -410,6 +417,11 @@ struct vsc8531_private { */ struct mutex ts_lock; struct mutex phc_lock; + + /* list of skbs that were received and need timestamp information but it + * didn't received it yet + */ + struct sk_buff_head rx_skbs_list; }; /* Shared structure between the PHYs of the same package. diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index 7ed6522fb0ef..f1c9ce351ab4 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -2335,6 +2335,13 @@ static int vsc85xx_probe(struct phy_device *phydev) return vsc85xx_dt_led_modes_get(phydev, default_mode); } +static void vsc85xx_remove(struct phy_device *phydev) +{ + struct vsc8531_private *priv = phydev->priv; + + skb_queue_purge(&priv->rx_skbs_list); +} + /* Microsemi VSC85xx PHYs */ static struct phy_driver vsc85xx_driver[] = { { @@ -2589,6 +2596,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, + .remove = &vsc85xx_remove, .probe = &vsc8574_probe, .set_wol = &vsc85xx_wol_set, .get_wol = &vsc85xx_wol_get, @@ -2614,6 +2622,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, + .remove = &vsc85xx_remove, .probe = &vsc8574_probe, .set_wol = &vsc85xx_wol_set, .get_wol = &vsc85xx_wol_get, @@ -2639,6 +2648,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, + .remove = &vsc85xx_remove, .probe = &vsc8584_probe, .get_tunable = &vsc85xx_get_tunable, .set_tunable = &vsc85xx_set_tunable, @@ -2662,6 +2672,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, + .remove = &vsc85xx_remove, .probe = &vsc8584_probe, .get_tunable = &vsc85xx_get_tunable, .set_tunable = &vsc85xx_set_tunable, @@ -2685,6 +2696,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, + .remove = &vsc85xx_remove, .probe = &vsc8584_probe, .get_tunable = &vsc85xx_get_tunable, .set_tunable = &vsc85xx_set_tunable, diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index 275706de5847..de6c7312e8f2 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -1194,9 +1194,7 @@ static bool vsc85xx_rxtstamp(struct mii_timestamper *mii_ts, { struct vsc8531_private *vsc8531 = container_of(mii_ts, struct vsc8531_private, mii_ts); - struct skb_shared_hwtstamps *shhwtstamps = NULL; struct vsc85xx_ptphdr *ptphdr; - struct timespec64 ts; unsigned long ns; if (!vsc8531->ptp->configured) @@ -1206,27 +1204,52 @@ static bool vsc85xx_rxtstamp(struct mii_timestamper *mii_ts, type == PTP_CLASS_NONE) return false; - vsc85xx_gettime(&vsc8531->ptp->caps, &ts); - ptphdr = get_ptp_header_rx(skb, vsc8531->ptp->rx_filter); if (!ptphdr) return false; - shhwtstamps = skb_hwtstamps(skb); - memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps)); - ns = ntohl(ptphdr->rsrvd2); - /* nsec is in reserved field */ - if (ts.tv_nsec < ns) - ts.tv_sec--; + VSC8531_SKB_CB(skb)->ns = ns; + skb_queue_tail(&vsc8531->rx_skbs_list, skb); - shhwtstamps->hwtstamp = ktime_set(ts.tv_sec, ns); - netif_rx(skb); + ptp_schedule_worker(vsc8531->ptp->ptp_clock, 0); return true; } +static long vsc85xx_do_aux_work(struct ptp_clock_info *info) +{ + struct vsc85xx_ptp *ptp = container_of(info, struct vsc85xx_ptp, caps); + struct skb_shared_hwtstamps *shhwtstamps = NULL; + struct phy_device *phydev = ptp->phydev; + struct vsc8531_private *priv = phydev->priv; + struct sk_buff_head received; + struct sk_buff *rx_skb; + struct timespec64 ts; + unsigned long flags; + + __skb_queue_head_init(&received); + spin_lock_irqsave(&priv->rx_skbs_list.lock, flags); + skb_queue_splice_tail_init(&priv->rx_skbs_list, &received); + spin_unlock_irqrestore(&priv->rx_skbs_list.lock, flags); + + vsc85xx_gettime(info, &ts); + while ((rx_skb = __skb_dequeue(&received)) != NULL) { + shhwtstamps = skb_hwtstamps(rx_skb); + memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps)); + + if (ts.tv_nsec < VSC8531_SKB_CB(rx_skb)->ns) + ts.tv_sec--; + + shhwtstamps->hwtstamp = ktime_set(ts.tv_sec, + VSC8531_SKB_CB(rx_skb)->ns); + netif_rx(rx_skb); + } + + return -1; +} + static const struct ptp_clock_info vsc85xx_clk_caps = { .owner = THIS_MODULE, .name = "VSC85xx timer", @@ -1240,6 +1263,7 @@ static const struct ptp_clock_info vsc85xx_clk_caps = { .adjfine = &vsc85xx_adjfine, .gettime64 = &vsc85xx_gettime, .settime64 = &vsc85xx_settime, + .do_aux_work = &vsc85xx_do_aux_work, }; static struct vsc8531_private *vsc8584_base_priv(struct phy_device *phydev) @@ -1567,6 +1591,7 @@ int vsc8584_ptp_probe(struct phy_device *phydev) mutex_init(&vsc8531->phc_lock); mutex_init(&vsc8531->ts_lock); + skb_queue_head_init(&vsc8531->rx_skbs_list); /* Retrieve the shared load/save GPIO. Request it as non exclusive as * the same GPIO can be requested by all the PHYs of the same package. From 24ef2f53c07f273bad99173e27ee88d44d135b1c Mon Sep 17 00:00:00 2001 From: Yuichiro Tsuji Date: Mon, 18 Aug 2025 17:45:07 +0900 Subject: [PATCH 2198/2411] net: usb: asix_devices: Fix PHY address mask in MDIO bus initialization Syzbot reported shift-out-of-bounds exception on MDIO bus initialization. The PHY address should be masked to 5 bits (0-31). Without this mask, invalid PHY addresses could be used, potentially causing issues with MDIO bus operations. Fix this by masking the PHY address with 0x1f (31 decimal) to ensure it stays within the valid range. Fixes: 4faff70959d5 ("net: usb: asix_devices: add phy_mask for ax88772 mdio bus") Reported-by: syzbot+20537064367a0f98d597@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=20537064367a0f98d597 Tested-by: syzbot+20537064367a0f98d597@syzkaller.appspotmail.com Signed-off-by: Yuichiro Tsuji Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250818084541.1958-1-yuichtsu@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/asix_devices.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index d9f5942ccc44..792ddda1ad49 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -676,7 +676,7 @@ static int ax88772_init_mdio(struct usbnet *dev) priv->mdio->read = &asix_mdio_bus_read; priv->mdio->write = &asix_mdio_bus_write; priv->mdio->name = "Asix MDIO Bus"; - priv->mdio->phy_mask = ~(BIT(priv->phy_addr) | BIT(AX_EMBD_PHY_ADDR)); + priv->mdio->phy_mask = ~(BIT(priv->phy_addr & 0x1f) | BIT(AX_EMBD_PHY_ADDR)); /* mii bus name is usb-- */ snprintf(priv->mdio->id, MII_BUS_ID_SIZE, "usb-%03d:%03d", dev->udev->bus->busnum, dev->udev->devnum); From 75a9a46d67f46d608205888f9b34e315c1786345 Mon Sep 17 00:00:00 2001 From: Jordan Rhee Date: Mon, 18 Aug 2025 14:12:45 -0700 Subject: [PATCH 2199/2411] gve: prevent ethtool ops after shutdown A crash can occur if an ethtool operation is invoked after shutdown() is called. shutdown() is invoked during system shutdown to stop DMA operations without performing expensive deallocations. It is discouraged to unregister the netdev in this path, so the device may still be visible to userspace and kernel helpers. In gve, shutdown() tears down most internal data structures. If an ethtool operation is dispatched after shutdown(), it will dereference freed or NULL pointers, leading to a kernel panic. While graceful shutdown normally quiesces userspace before invoking the reboot syscall, forced shutdowns (as observed on GCP VMs) can still trigger this path. Fix by calling netif_device_detach() in shutdown(). This marks the device as detached so the ethtool ioctl handler will skip dispatching operations to the driver. Fixes: 974365e51861 ("gve: Implement suspend/resume/shutdown") Signed-off-by: Jordan Rhee Signed-off-by: Jeroen de Borst Link: https://patch.msgid.link/20250818211245.1156919-1-jeroendb@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 1f411d7c4373..1be1b1ef31ee 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -2870,6 +2870,8 @@ static void gve_shutdown(struct pci_dev *pdev) struct gve_priv *priv = netdev_priv(netdev); bool was_up = netif_running(priv->dev); + netif_device_detach(netdev); + rtnl_lock(); netdev_lock(netdev); if (was_up && gve_close(priv->dev)) { From 6d6714bf0c4e8eb2274081b4b023dfa01581c123 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Fri, 15 Aug 2025 10:48:03 +0000 Subject: [PATCH 2200/2411] net: stmmac: thead: Enable TX clock before MAC initialization The clk_tx_i clock must be supplied to the MAC for successful initialization. On TH1520 SoC, the clock is provided by an internal divider configured through GMAC_PLLCLK_DIV register when using RGMII interface. However, currently we don't setup the divider before initialization of the MAC, resulting in DMA reset failures if the bootloader/firmware doesn't enable the divider, [ 7.839601] thead-dwmac ffe7060000.ethernet eth0: Register MEM_TYPE_PAGE_POOL RxQ-0 [ 7.938338] thead-dwmac ffe7060000.ethernet eth0: PHY [stmmac-0:02] driver [RTL8211F Gigabit Ethernet] (irq=POLL) [ 8.160746] thead-dwmac ffe7060000.ethernet eth0: Failed to reset the dma [ 8.170118] thead-dwmac ffe7060000.ethernet eth0: stmmac_hw_setup: DMA engine initialization failed [ 8.179384] thead-dwmac ffe7060000.ethernet eth0: __stmmac_open: Hw setup failed Let's simply write GMAC_PLLCLK_DIV_EN to GMAC_PLLCLK_DIV to enable the divider before MAC initialization. Note that for reconfiguring the divisor, the divider must be disabled first and re-enabled later to make sure the new divisor take effect. The exact clock rate doesn't affect MAC's initialization according to my test. It's set to the speed required by RGMII when the linkspeed is 1Gbps and could be reclocked later after link is up if necessary. Fixes: 33a1a01e3afa ("net: stmmac: Add glue layer for T-HEAD TH1520 SoC") Signed-off-by: Yao Zi Reviewed-by: Drew Fustini Link: https://patch.msgid.link/20250815104803.55294-1-ziyao@disroot.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c index f2946bea0bc2..6c6c49e4b66f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c @@ -152,7 +152,7 @@ static int thead_set_clk_tx_rate(void *bsp_priv, struct clk *clk_tx_i, static int thead_dwmac_enable_clk(struct plat_stmmacenet_data *plat) { struct thead_dwmac *dwmac = plat->bsp_priv; - u32 reg; + u32 reg, div; switch (plat->mac_interface) { case PHY_INTERFACE_MODE_MII: @@ -164,6 +164,13 @@ static int thead_dwmac_enable_clk(struct plat_stmmacenet_data *plat) case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: /* use pll */ + div = clk_get_rate(plat->stmmac_clk) / rgmii_clock(SPEED_1000); + reg = FIELD_PREP(GMAC_PLLCLK_DIV_EN, 1) | + FIELD_PREP(GMAC_PLLCLK_DIV_NUM, div); + + writel(0, dwmac->apb_base + GMAC_PLLCLK_DIV); + writel(reg, dwmac->apb_base + GMAC_PLLCLK_DIV); + writel(GMAC_GTXCLK_SEL_PLL, dwmac->apb_base + GMAC_GTXCLK_SEL); reg = GMAC_TX_CLK_EN | GMAC_TX_CLK_N_EN | GMAC_TX_CLK_OUT_EN | GMAC_RX_CLK_EN | GMAC_RX_CLK_N_EN; From d9cef55ed49117bd63695446fb84b4b91815c0b4 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Mon, 18 Aug 2025 13:46:18 +0800 Subject: [PATCH 2201/2411] net/smc: fix UAF on smcsk after smc_listen_out() BPF CI testing report a UAF issue: [ 16.446633] BUG: kernel NULL pointer dereference, address: 000000000000003 0 [ 16.447134] #PF: supervisor read access in kernel mod e [ 16.447516] #PF: error_code(0x0000) - not-present pag e [ 16.447878] PGD 0 P4D 0 [ 16.448063] Oops: Oops: 0000 [#1] PREEMPT SMP NOPT I [ 16.448409] CPU: 0 UID: 0 PID: 9 Comm: kworker/0:1 Tainted: G OE 6.13.0-rc3-g89e8a75fda73-dirty #4 2 [ 16.449124] Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODUL E [ 16.449502] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/201 4 [ 16.450201] Workqueue: smc_hs_wq smc_listen_wor k [ 16.450531] RIP: 0010:smc_listen_work+0xc02/0x159 0 [ 16.452158] RSP: 0018:ffffb5ab40053d98 EFLAGS: 0001024 6 [ 16.452526] RAX: 0000000000000001 RBX: 0000000000000002 RCX: 000000000000030 0 [ 16.452994] RDX: 0000000000000280 RSI: 00003513840053f0 RDI: 000000000000000 0 [ 16.453492] RBP: ffffa097808e3800 R08: ffffa09782dba1e0 R09: 000000000000000 5 [ 16.453987] R10: 0000000000000000 R11: 0000000000000000 R12: ffffa0978274640 0 [ 16.454497] R13: 0000000000000000 R14: 0000000000000000 R15: ffffa09782d4092 0 [ 16.454996] FS: 0000000000000000(0000) GS:ffffa097bbc00000(0000) knlGS:000000000000000 0 [ 16.455557] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003 3 [ 16.455961] CR2: 0000000000000030 CR3: 0000000102788004 CR4: 0000000000770ef 0 [ 16.456459] PKRU: 5555555 4 [ 16.456654] Call Trace : [ 16.456832] [ 16.456989] ? __die+0x23/0x7 0 [ 16.457215] ? page_fault_oops+0x180/0x4c 0 [ 16.457508] ? __lock_acquire+0x3e6/0x249 0 [ 16.457801] ? exc_page_fault+0x68/0x20 0 [ 16.458080] ? asm_exc_page_fault+0x26/0x3 0 [ 16.458389] ? smc_listen_work+0xc02/0x159 0 [ 16.458689] ? smc_listen_work+0xc02/0x159 0 [ 16.458987] ? lock_is_held_type+0x8f/0x10 0 [ 16.459284] process_one_work+0x1ea/0x6d 0 [ 16.459570] worker_thread+0x1c3/0x38 0 [ 16.459839] ? __pfx_worker_thread+0x10/0x1 0 [ 16.460144] kthread+0xe0/0x11 0 [ 16.460372] ? __pfx_kthread+0x10/0x1 0 [ 16.460640] ret_from_fork+0x31/0x5 0 [ 16.460896] ? __pfx_kthread+0x10/0x1 0 [ 16.461166] ret_from_fork_asm+0x1a/0x3 0 [ 16.461453] [ 16.461616] Modules linked in: bpf_testmod(OE) [last unloaded: bpf_testmod(OE) ] [ 16.462134] CR2: 000000000000003 0 [ 16.462380] ---[ end trace 0000000000000000 ]--- [ 16.462710] RIP: 0010:smc_listen_work+0xc02/0x1590 The direct cause of this issue is that after smc_listen_out_connected(), newclcsock->sk may be NULL since it will releases the smcsk. Therefore, if the application closes the socket immediately after accept, newclcsock->sk can be NULL. A possible execution order could be as follows: smc_listen_work | userspace ----------------------------------------------------------------- lock_sock(sk) | smc_listen_out_connected() | | \- smc_listen_out | | | \- release_sock | | |- sk->sk_data_ready() | | fd = accept(); | close(fd); | \- socket->sk = NULL; /* newclcsock->sk is NULL now */ SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk)) Since smc_listen_out_connected() will not fail, simply swapping the order of the code can easily fix this issue. Fixes: 3b2dec2603d5 ("net/smc: restructure client and server code in af_smc") Signed-off-by: D. Wythe Reviewed-by: Guangguan Wang Reviewed-by: Alexandra Winter Reviewed-by: Dust Li Link: https://patch.msgid.link/20250818054618.41615-1-alibuda@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 9311c38f7abe..e0e48f24cd61 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -2568,8 +2568,9 @@ static void smc_listen_work(struct work_struct *work) goto out_decl; } - smc_listen_out_connected(new_smc); SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini); + /* smc_listen_out() will release smcsk */ + smc_listen_out_connected(new_smc); goto out_free; out_unlock: From 2462c1b9217246a889ec318b3894d84e4dd709c6 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 17 Aug 2025 23:23:17 +0300 Subject: [PATCH 2202/2411] net/mlx5: HWS, fix bad parameter in CQ creation 'cqe_sz' valid value should be 0 for 64-byte CQE. Fixes: 2ca62599aa0b ("net/mlx5: HWS, added send engine and context handling") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Vlad Dogaru Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250817202323.308604-2-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c index c4b22be19a9b..b0595c9b09e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c @@ -964,7 +964,6 @@ static int hws_send_ring_open_cq(struct mlx5_core_dev *mdev, return -ENOMEM; MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.uar->index); - MLX5_SET(cqc, cqc_data, cqe_sz, queue->num_entries); MLX5_SET(cqc, cqc_data, log_cq_size, ilog2(queue->num_entries)); err = hws_send_ring_alloc_cq(mdev, numa_node, queue, cqc_data, cq); From 615b690612b7785ab8632f6a5a941550622e4e36 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 17 Aug 2025 23:23:18 +0300 Subject: [PATCH 2203/2411] net/mlx5: HWS, fix simple rules rehash error flow Moving rules from matcher to matcher should not fail. However, if it does fail due to various reasons, the error flow should allow the kernel to continue functioning (albeit with broken steering rules) instead of going into series of soft lock-ups or some other problematic behaviour. This patch fixes the error flow for moving simple rules: - If new rule creation fails before it was even enqeued, do not poll for completion - If TIMEOUT happened while moving the rule, no point trying to poll for completions for other rules. Something is broken, completion won't come, just abort the rehash sequence. - If some other completion with error received, don't give up. Continue handling rest of the rules to minimize the damage. - Make sure that the first error code that was received will be actually returned to the caller instead of replacing it with the generic error code. All the aforementioned issues stem from the same bad error flow, so no point fixing them one by one and leaving partially broken code - fixing them in one patch. Fixes: ef94799a8741 ("net/mlx5: HWS, rework rehash loop") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Vlad Dogaru Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250817202323.308604-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/steering/hws/bwc.c | 61 +++++++++++++------ 1 file changed, 43 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c index 92de4b761a83..0219a49b2326 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c @@ -74,9 +74,9 @@ static void hws_bwc_matcher_init_attr(struct mlx5hws_bwc_matcher *bwc_matcher, static int hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) { - bool move_error = false, poll_error = false, drain_error = false; struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; struct mlx5hws_matcher *matcher = bwc_matcher->matcher; + int drain_error = 0, move_error = 0, poll_error = 0; u16 bwc_queues = mlx5hws_bwc_queues(ctx); struct mlx5hws_rule_attr rule_attr; struct mlx5hws_bwc_rule *bwc_rule; @@ -99,11 +99,15 @@ hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) ret = mlx5hws_matcher_resize_rule_move(matcher, bwc_rule->rule, &rule_attr); - if (unlikely(ret && !move_error)) { - mlx5hws_err(ctx, - "Moving BWC rule: move failed (%d), attempting to move rest of the rules\n", - ret); - move_error = true; + if (unlikely(ret)) { + if (!move_error) { + mlx5hws_err(ctx, + "Moving BWC rule: move failed (%d), attempting to move rest of the rules\n", + ret); + move_error = ret; + } + /* Rule wasn't queued, no need to poll */ + continue; } pending_rules++; @@ -111,11 +115,19 @@ hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) rule_attr.queue_id, &pending_rules, false); - if (unlikely(ret && !poll_error)) { - mlx5hws_err(ctx, - "Moving BWC rule: poll failed (%d), attempting to move rest of the rules\n", - ret); - poll_error = true; + if (unlikely(ret)) { + if (ret == -ETIMEDOUT) { + mlx5hws_err(ctx, + "Moving BWC rule: timeout polling for completions (%d), aborting rehash\n", + ret); + return ret; + } + if (!poll_error) { + mlx5hws_err(ctx, + "Moving BWC rule: polling for completions failed (%d), attempting to move rest of the rules\n", + ret); + poll_error = ret; + } } } @@ -126,17 +138,30 @@ hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) rule_attr.queue_id, &pending_rules, true); - if (unlikely(ret && !drain_error)) { - mlx5hws_err(ctx, - "Moving BWC rule: drain failed (%d), attempting to move rest of the rules\n", - ret); - drain_error = true; + if (unlikely(ret)) { + if (ret == -ETIMEDOUT) { + mlx5hws_err(ctx, + "Moving bwc rule: timeout draining completions (%d), aborting rehash\n", + ret); + return ret; + } + if (!drain_error) { + mlx5hws_err(ctx, + "Moving bwc rule: drain failed (%d), attempting to move rest of the rules\n", + ret); + drain_error = ret; + } } } } - if (move_error || poll_error || drain_error) - ret = -EINVAL; + /* Return the first error that happened */ + if (unlikely(move_error)) + return move_error; + if (unlikely(poll_error)) + return poll_error; + if (unlikely(drain_error)) + return drain_error; return ret; } From 4a842b1bf18a32ee0c25dd6dd98728b786a76fe4 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 17 Aug 2025 23:23:19 +0300 Subject: [PATCH 2204/2411] net/mlx5: HWS, fix complex rules rehash error flow Moving rules from matcher to matcher should not fail. However, if it does fail due to various reasons, the error flow should allow the kernel to continue functioning (albeit with broken steering rules) instead of going into series of soft lock-ups or some other problematic behaviour. Similar to the simple rules, complex rules rehash logic suffers from the same problems. This patch fixes the error flow for moving complex rules: - If new rule creation fails before it was even enqeued, do not poll for completion - If TIMEOUT happened while moving the rule, no point trying to poll for completions for other rules. Something is broken, completion won't come, just abort the rehash sequence. - If some other completion with error received, don't give up. Continue handling rest of the rules to minimize the damage. - Make sure that the first error code that was received will be actually returned to the caller instead of replacing it with the generic error code. All the aforementioned issues stem from the same bad error flow, so no point fixing them one by one and leaving partially broken code - fixing them in one patch. Fixes: 17e0accac577 ("net/mlx5: HWS, support complex matchers") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Vlad Dogaru Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250817202323.308604-4-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../mlx5/core/steering/hws/bwc_complex.c | 41 +++++++++++++------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c index ca7501c57468..14e79579c719 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c @@ -1328,11 +1328,11 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher) { struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; struct mlx5hws_matcher *matcher = bwc_matcher->matcher; - bool move_error = false, poll_error = false; u16 bwc_queues = mlx5hws_bwc_queues(ctx); struct mlx5hws_bwc_rule *tmp_bwc_rule; struct mlx5hws_rule_attr rule_attr; struct mlx5hws_table *isolated_tbl; + int move_error = 0, poll_error = 0; struct mlx5hws_rule *tmp_rule; struct list_head *rules_list; u32 expected_completions = 1; @@ -1391,11 +1391,15 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher) ret = mlx5hws_matcher_resize_rule_move(matcher, tmp_rule, &rule_attr); - if (unlikely(ret && !move_error)) { - mlx5hws_err(ctx, - "Moving complex BWC rule failed (%d), attempting to move rest of the rules\n", - ret); - move_error = true; + if (unlikely(ret)) { + if (!move_error) { + mlx5hws_err(ctx, + "Moving complex BWC rule: move failed (%d), attempting to move rest of the rules\n", + ret); + move_error = ret; + } + /* Rule wasn't queued, no need to poll */ + continue; } expected_completions = 1; @@ -1403,11 +1407,19 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher) rule_attr.queue_id, &expected_completions, true); - if (unlikely(ret && !poll_error)) { - mlx5hws_err(ctx, - "Moving complex BWC rule: poll failed (%d), attempting to move rest of the rules\n", - ret); - poll_error = true; + if (unlikely(ret)) { + if (ret == -ETIMEDOUT) { + mlx5hws_err(ctx, + "Moving complex BWC rule: timeout polling for completions (%d), aborting rehash\n", + ret); + return ret; + } + if (!poll_error) { + mlx5hws_err(ctx, + "Moving complex BWC rule: polling for completions failed (%d), attempting to move rest of the rules\n", + ret); + poll_error = ret; + } } /* Done moving the rule to the new matcher, @@ -1422,8 +1434,11 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher) } } - if (move_error || poll_error) - ret = -EINVAL; + /* Return the first error that happened */ + if (unlikely(move_error)) + return move_error; + if (unlikely(poll_error)) + return poll_error; return ret; } From 1a72298d27ce4d41b3fd405f6921e8711815767a Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 17 Aug 2025 23:23:20 +0300 Subject: [PATCH 2205/2411] net/mlx5: HWS, prevent rehash from filling up the queues While moving the rules during rehash, CQ is not drained. The flush and drain happens only when all the rules of a certain queue have been moved. This behaviour can lead to accumulating large quantity of rules that haven't got their completion yet, and eventually will fill up the queue and will cause the rehash to fail. Fix this problem by requiring drain once the number of outstanding completions reaches a certain threshold. Fixes: ef94799a8741 ("net/mlx5: HWS, rework rehash loop") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Vlad Dogaru Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250817202323.308604-5-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c index 0219a49b2326..2a59be11fe55 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c @@ -84,6 +84,7 @@ hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) struct list_head *rules_list; u32 pending_rules; int i, ret = 0; + bool drain; mlx5hws_bwc_rule_fill_attr(bwc_matcher, 0, 0, &rule_attr); @@ -111,10 +112,12 @@ hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) } pending_rules++; + drain = pending_rules >= + hws_bwc_get_burst_th(ctx, rule_attr.queue_id); ret = mlx5hws_bwc_queue_poll(ctx, rule_attr.queue_id, &pending_rules, - false); + drain); if (unlikely(ret)) { if (ret == -ETIMEDOUT) { mlx5hws_err(ctx, From 7c60952f83584bc4950057cfed2cc3c87343b5db Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 17 Aug 2025 23:23:21 +0300 Subject: [PATCH 2206/2411] net/mlx5: HWS, don't rehash on every kind of insertion failure If rule creation failed due to a full queue, due to timeout in polling for completion, or due to matcher being in resize, don't try to initiate rehash sequence - rehash would have failed anyway. Fixes: 2111bb970c78 ("net/mlx5: HWS, added backward-compatible API handling") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Vlad Dogaru Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250817202323.308604-6-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/steering/hws/bwc.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c index 2a59be11fe55..adeccc588e5d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c @@ -1063,6 +1063,21 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, return 0; /* rule inserted successfully */ } + /* Rule insertion could fail due to queue being full, timeout, or + * matcher in resize. In such cases, no point in trying to rehash. + */ + if (ret == -EBUSY || ret == -ETIMEDOUT || ret == -EAGAIN) { + mutex_unlock(queue_lock); + mlx5hws_err(ctx, + "BWC rule insertion failed - %s (%d)\n", + ret == -EBUSY ? "queue is full" : + ret == -ETIMEDOUT ? "timeout" : + ret == -EAGAIN ? "matcher in resize" : "N/A", + ret); + hws_bwc_rule_cnt_dec(bwc_rule); + return ret; + } + /* At this point the rule wasn't added. * It could be because there was collision, or some other problem. * Try rehash by size and insert rule again - last chance. From 8a51507320ebddaab32610199774f69cd7d53e78 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Sun, 17 Aug 2025 23:23:22 +0300 Subject: [PATCH 2207/2411] net/mlx5: HWS, Fix table creation UID During table creation, caller passes a UID using ft_attr. The UID value was ignored, which leads to problems when the caller sets the UID to a non-zero value, such as SHARED_RESOURCE_UID (0xffff) - the internal FT objects will be created with UID=0. Fixes: 0869701cba3d ("net/mlx5: HWS, added FW commands handling") Signed-off-by: Alex Vesker Reviewed-by: Yevgeny Kliteynik Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250817202323.308604-7-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/steering/hws/cmd.c | 1 + .../ethernet/mellanox/mlx5/core/steering/hws/cmd.h | 1 + .../mellanox/mlx5/core/steering/hws/fs_hws.c | 1 + .../mellanox/mlx5/core/steering/hws/matcher.c | 5 ++++- .../mellanox/mlx5/core/steering/hws/mlx5hws.h | 1 + .../mellanox/mlx5/core/steering/hws/table.c | 13 ++++++++++--- .../mellanox/mlx5/core/steering/hws/table.h | 3 ++- 7 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c index 9c83753e4592..0bdcab2e5cf3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c @@ -55,6 +55,7 @@ int mlx5hws_cmd_flow_table_create(struct mlx5_core_dev *mdev, MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE); MLX5_SET(create_flow_table_in, in, table_type, ft_attr->type); + MLX5_SET(create_flow_table_in, in, uid, ft_attr->uid); ft_ctx = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context); MLX5_SET(flow_table_context, ft_ctx, level, ft_attr->level); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h index fa6bff210266..122ccc671628 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h @@ -36,6 +36,7 @@ struct mlx5hws_cmd_set_fte_attr { struct mlx5hws_cmd_ft_create_attr { u8 type; u8 level; + u16 uid; bool rtc_valid; bool decap_en; bool reformat_en; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c index 57592b92e24b..131e74b2b774 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c @@ -267,6 +267,7 @@ static int mlx5_cmd_hws_create_flow_table(struct mlx5_flow_root_namespace *ns, tbl_attr.type = MLX5HWS_TABLE_TYPE_FDB; tbl_attr.level = ft_attr->level; + tbl_attr.uid = ft_attr->uid; tbl = mlx5hws_table_create(ctx, &tbl_attr); if (!tbl) { mlx5_core_err(ns->dev, "Failed creating hws flow_table\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c index f3ea09caba2b..32f87fdf3213 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c @@ -85,6 +85,7 @@ static int hws_matcher_create_end_ft_isolated(struct mlx5hws_matcher *matcher) ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, tbl, + 0, &matcher->end_ft_id); if (ret) { mlx5hws_err(tbl->ctx, "Isolated matcher: failed to create end flow table\n"); @@ -112,7 +113,9 @@ static int hws_matcher_create_end_ft(struct mlx5hws_matcher *matcher) if (mlx5hws_matcher_is_isolated(matcher)) ret = hws_matcher_create_end_ft_isolated(matcher); else - ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, tbl, + ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, + tbl, + 0, &matcher->end_ft_id); if (ret) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h index 59c14745ed0c..2498ceff2060 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h @@ -75,6 +75,7 @@ struct mlx5hws_context_attr { struct mlx5hws_table_attr { enum mlx5hws_table_type type; u32 level; + u16 uid; }; enum mlx5hws_matcher_flow_src { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c index 568f691733f3..6113383ae47b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c @@ -9,6 +9,7 @@ u32 mlx5hws_table_get_id(struct mlx5hws_table *tbl) } static void hws_table_init_next_ft_attr(struct mlx5hws_table *tbl, + u16 uid, struct mlx5hws_cmd_ft_create_attr *ft_attr) { ft_attr->type = tbl->fw_ft_type; @@ -16,7 +17,9 @@ static void hws_table_init_next_ft_attr(struct mlx5hws_table *tbl, ft_attr->level = tbl->ctx->caps->fdb_ft.max_level - 1; else ft_attr->level = tbl->ctx->caps->nic_ft.max_level - 1; + ft_attr->rtc_valid = true; + ft_attr->uid = uid; } static void hws_table_set_cap_attr(struct mlx5hws_table *tbl, @@ -119,12 +122,12 @@ static int hws_table_connect_to_default_miss_tbl(struct mlx5hws_table *tbl, u32 int mlx5hws_table_create_default_ft(struct mlx5_core_dev *mdev, struct mlx5hws_table *tbl, - u32 *ft_id) + u16 uid, u32 *ft_id) { struct mlx5hws_cmd_ft_create_attr ft_attr = {0}; int ret; - hws_table_init_next_ft_attr(tbl, &ft_attr); + hws_table_init_next_ft_attr(tbl, uid, &ft_attr); hws_table_set_cap_attr(tbl, &ft_attr); ret = mlx5hws_cmd_flow_table_create(mdev, &ft_attr, ft_id); @@ -189,7 +192,10 @@ static int hws_table_init(struct mlx5hws_table *tbl) } mutex_lock(&ctx->ctrl_lock); - ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, tbl, &tbl->ft_id); + ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, + tbl, + tbl->uid, + &tbl->ft_id); if (ret) { mlx5hws_err(tbl->ctx, "Failed to create flow table object\n"); mutex_unlock(&ctx->ctrl_lock); @@ -239,6 +245,7 @@ struct mlx5hws_table *mlx5hws_table_create(struct mlx5hws_context *ctx, tbl->ctx = ctx; tbl->type = attr->type; tbl->level = attr->level; + tbl->uid = attr->uid; ret = hws_table_init(tbl); if (ret) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h index 0400cce0c317..1246f9bd8422 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h @@ -18,6 +18,7 @@ struct mlx5hws_table { enum mlx5hws_table_type type; u32 fw_ft_type; u32 level; + u16 uid; struct list_head matchers_list; struct list_head tbl_list_node; struct mlx5hws_default_miss default_miss; @@ -47,7 +48,7 @@ u32 mlx5hws_table_get_res_fw_ft_type(enum mlx5hws_table_type tbl_type, int mlx5hws_table_create_default_ft(struct mlx5_core_dev *mdev, struct mlx5hws_table *tbl, - u32 *ft_id); + u16 uid, u32 *ft_id); void mlx5hws_table_destroy_default_ft(struct mlx5hws_table *tbl, u32 ft_id); From d2d6f950cb43be6845a41cac5956cb2a10e657e5 Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Sun, 17 Aug 2025 23:23:23 +0300 Subject: [PATCH 2208/2411] net/mlx5: CT: Use the correct counter offset Specifying the counter action is not enough, as it is used by multiple counters that were allocated in a bulk. By omitting the offset, rules will be associated with a different counter from the same bulk. Subsequently, the CT subsystem checks the correct counter, assumes that no traffic has triggered the rule, and ages out the rule. The end result is intermittent offloading of long lived connections, as rules are aged out then promptly re-added. Fix this by specifying the correct offset along with the counter rule. Fixes: 34eea5b12a10 ("net/mlx5e: CT: Add initial support for Hardware Steering") Signed-off-by: Vlad Dogaru Reviewed-by: Yevgeny Kliteynik Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250817202323.308604-8-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c index a4263137fef5..01d522b02947 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c @@ -173,6 +173,8 @@ static void mlx5_ct_fs_hmfs_fill_rule_actions(struct mlx5_ct_fs_hmfs *fs_hmfs, memset(rule_actions, 0, NUM_CT_HMFS_RULES * sizeof(*rule_actions)); rule_actions[0].action = mlx5_fc_get_hws_action(fs_hmfs->ctx, attr->counter); + rule_actions[0].counter.offset = + attr->counter->id - attr->counter->bulk->base_id; /* Modify header is special, it may require extra arguments outside the action itself. */ if (mh_action->mh_data) { rule_actions[1].modify_header.offset = mh_action->mh_data->offset; From 1683fd1b2fa79864d3c7a951d9cea0a9ba1a1923 Mon Sep 17 00:00:00 2001 From: Parthiban Veerasooran Date: Mon, 18 Aug 2025 11:35:13 +0530 Subject: [PATCH 2209/2411] microchip: lan865x: fix missing netif_start_queue() call on device open This fixes an issue where the transmit queue is started implicitly only the very first time the device is registered. When the device is taken down and brought back up again (using `ip` or `ifconfig`), the transmit queue is not restarted, causing packet transmission to hang. Adding an explicit call to netif_start_queue() in lan865x_net_open() ensures the transmit queue is properly started every time the device is reopened. Fixes: 5cd2340cb6a3 ("microchip: lan865x: add driver support for Microchip's LAN865X MAC-PHY") Signed-off-by: Parthiban Veerasooran Link: https://patch.msgid.link/20250818060514.52795-2-parthiban.veerasooran@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan865x/lan865x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan865x/lan865x.c b/drivers/net/ethernet/microchip/lan865x/lan865x.c index dd436bdff0f8..d03f5a8de58d 100644 --- a/drivers/net/ethernet/microchip/lan865x/lan865x.c +++ b/drivers/net/ethernet/microchip/lan865x/lan865x.c @@ -311,6 +311,8 @@ static int lan865x_net_open(struct net_device *netdev) phy_start(netdev->phydev); + netif_start_queue(netdev); + return 0; } From 2cd58fec912acec273cb155911ab8f06ddbb131a Mon Sep 17 00:00:00 2001 From: Parthiban Veerasooran Date: Mon, 18 Aug 2025 11:35:14 +0530 Subject: [PATCH 2210/2411] microchip: lan865x: fix missing Timer Increment config for Rev.B0/B1 Fix missing configuration for LAN865x silicon revisions B0 and B1 as per Microchip Application Note AN1760 (Rev F, June 2024). The Timer Increment register was not being set, which is required for accurate timestamping. As per the application note, configure the MAC to set timestamping at the end of the Start of Frame Delimiter (SFD), and set the Timer Increment register to 40 ns (corresponding to a 25 MHz internal clock). Link: https://www.microchip.com/en-us/application-notes/an1760 Fixes: 5cd2340cb6a3 ("microchip: lan865x: add driver support for Microchip's LAN865X MAC-PHY") Signed-off-by: Parthiban Veerasooran Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250818060514.52795-3-parthiban.veerasooran@microchip.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/microchip/lan865x/lan865x.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan865x/lan865x.c b/drivers/net/ethernet/microchip/lan865x/lan865x.c index d03f5a8de58d..84c41f193561 100644 --- a/drivers/net/ethernet/microchip/lan865x/lan865x.c +++ b/drivers/net/ethernet/microchip/lan865x/lan865x.c @@ -32,6 +32,10 @@ /* MAC Specific Addr 1 Top Reg */ #define LAN865X_REG_MAC_H_SADDR1 0x00010023 +/* MAC TSU Timer Increment Register */ +#define LAN865X_REG_MAC_TSU_TIMER_INCR 0x00010077 +#define MAC_TSU_TIMER_INCR_COUNT_NANOSECONDS 0x0028 + struct lan865x_priv { struct work_struct multicast_work; struct net_device *netdev; @@ -346,6 +350,21 @@ static int lan865x_probe(struct spi_device *spi) goto free_netdev; } + /* LAN865x Rev.B0/B1 configuration parameters from AN1760 + * As per the Configuration Application Note AN1760 published in the + * link, https://www.microchip.com/en-us/application-notes/an1760 + * Revision F (DS60001760G - June 2024), configure the MAC to set time + * stamping at the end of the Start of Frame Delimiter (SFD) and set the + * Timer Increment reg to 40 ns to be used as a 25 MHz internal clock. + */ + ret = oa_tc6_write_register(priv->tc6, LAN865X_REG_MAC_TSU_TIMER_INCR, + MAC_TSU_TIMER_INCR_COUNT_NANOSECONDS); + if (ret) { + dev_err(&spi->dev, "Failed to config TSU Timer Incr reg: %d\n", + ret); + goto oa_tc6_exit; + } + /* As per the point s3 in the below errata, SPI receive Ethernet frame * transfer may halt when starting the next frame in the same data block * (chunk) as the end of a previous frame. The RFA field should be From 3f4422e7c9436abf81a00270be7e4d6d3760ec0e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 20 Aug 2025 07:19:01 +0200 Subject: [PATCH 2211/2411] ALSA: hda: tas2781: Fix wrong reference of tasdevice_priv During the conversion to unify the calibration data management, the reference to tasdevice_priv was wrongly set to h->hda_priv instead of h->priv. This resulted in memory corruption and crashes eventually. Unfortunately it's a void pointer, hence the compiler couldn't know that it's wrong. Fixes: 4fe238513407 ("ALSA: hda/tas2781: Move and unified the calibrated-data getting function for SPI and I2C into the tas2781_hda lib") Link: https://bugzilla.suse.com/show_bug.cgi?id=1248270 Cc: Link: https://patch.msgid.link/20250820051902.4523-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/hda/codecs/side-codecs/tas2781_hda_i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c index b91fff3fde97..e34b17f0c9b9 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c @@ -305,7 +305,7 @@ static int tas2563_save_calibration(struct tas2781_hda *h) efi_char16_t efi_name[TAS2563_CAL_VAR_NAME_MAX]; unsigned long max_size = TAS2563_CAL_DATA_SIZE; unsigned char var8[TAS2563_CAL_VAR_NAME_MAX]; - struct tasdevice_priv *p = h->hda_priv; + struct tasdevice_priv *p = h->priv; struct calidata *cd = &p->cali_data; struct cali_reg *r = &cd->cali_reg_array; unsigned int offset = 0; From 02c1b0824eb1873b15676257cf1dc80070927e1e Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Wed, 6 Aug 2025 15:56:07 +1000 Subject: [PATCH 2212/2411] KVM: PPC: Fix misleading interrupts comment in kvmppc_prepare_to_enter() Until commit 6c85f52b10fd ("kvm/ppc: IRQ disabling cleanup"), kvmppc_prepare_to_enter() was called with interrupts already disabled by the caller, which was documented in the comment above the function. Post-cleanup, the function is now called with interrupts enabled, and disables interrupts itself. Fix the comment to reflect the current behaviour. Fixes: 6c85f52b10fd ("kvm/ppc: IRQ disabling cleanup") Signed-off-by: Andrew Donnellan Reviewed-by: Amit Machhiwal Reviewed-by: Gautam Menghani Reviewed-by: Shrikanth Hegde [Fixed the double colon in Reviewed-by line] Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250806055607.17081-1-ajd@linux.ibm.com --- arch/powerpc/kvm/powerpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 153587741864..2ba057171ebe 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -69,7 +69,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) /* * Common checks before entering the guest world. Call with interrupts - * disabled. + * enabled. * * returns: * From b018bb26c42049e05d3d65b057cc1250d17d9b0a Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 23 Jul 2025 08:28:42 +0200 Subject: [PATCH 2213/2411] powerpc: Use dev_fwnode() irq_domain_create_simple() takes fwnode as the first argument. It can be extracted from the struct device using dev_fwnode() helper instead of using of_node with of_fwnode_handle(). So use the dev_fwnode() helper. Signed-off-by: Jiri Slaby (SUSE) Acked-by: Christophe Leroy Link: https://lore.kernel.org/all/4bc0e1ca-a523-424a-8759-59e353317fba@kernel.org/ Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250723062842.1831271-1-jirislaby@kernel.org --- arch/powerpc/platforms/8xx/cpm1-ic.c | 3 +-- arch/powerpc/sysdev/fsl_msi.c | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/8xx/cpm1-ic.c b/arch/powerpc/platforms/8xx/cpm1-ic.c index a49d4a9ab3bc..3292071e4da3 100644 --- a/arch/powerpc/platforms/8xx/cpm1-ic.c +++ b/arch/powerpc/platforms/8xx/cpm1-ic.c @@ -110,8 +110,7 @@ static int cpm_pic_probe(struct platform_device *pdev) out_be32(&data->reg->cpic_cimr, 0); - data->host = irq_domain_create_linear(of_fwnode_handle(dev->of_node), - 64, &cpm_pic_host_ops, data); + data->host = irq_domain_create_linear(dev_fwnode(dev), 64, &cpm_pic_host_ops, data); if (!data->host) return -ENODEV; diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 4fe8a7b1b288..2a007bfb038d 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -412,9 +412,8 @@ static int fsl_of_msi_probe(struct platform_device *dev) } platform_set_drvdata(dev, msi); - msi->irqhost = irq_domain_create_linear(of_fwnode_handle(dev->dev.of_node), - NR_MSI_IRQS_MAX, &fsl_msi_host_ops, msi); - + msi->irqhost = irq_domain_create_linear(dev_fwnode(&dev->dev), NR_MSI_IRQS_MAX, + &fsl_msi_host_ops, msi); if (msi->irqhost == NULL) { dev_err(&dev->dev, "No memory for MSI irqhost\n"); err = -ENOMEM; From 8b5d86a63bc9510e094a15d7268c60bd4347b95c Mon Sep 17 00:00:00 2001 From: Xichao Zhao Date: Fri, 1 Aug 2025 11:59:08 +0800 Subject: [PATCH 2214/2411] powerpc/64: Drop unnecessary 'rc' variable Simplify the code to enhance readability and maintain a consistent coding style. Signed-off-by: Xichao Zhao Acked-by: Gautam Menghani Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250801035908.370463-1-zhao.xichao@vivo.com --- arch/powerpc/kernel/setup_64.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 7284c8021eeb..8fd7cbf3bd04 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -141,10 +141,7 @@ void __init check_smt_enabled(void) smt_enabled_at_boot = 0; else { int smt; - int rc; - - rc = kstrtoint(smt_enabled_cmdline, 10, &smt); - if (!rc) + if (!kstrtoint(smt_enabled_cmdline, 10, &smt)) smt_enabled_at_boot = min(threads_per_core, smt); } From eb59d4c5948d93e940b5dde9d1bf3b33367fbcb8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 3 Jun 2025 01:32:24 +0900 Subject: [PATCH 2215/2411] powerpc: use always-y instead of extra-y in Makefiles The extra-y syntax is planned for deprecation because it is similar to always-y. When building the boot wrapper, always-y and extra-y are equivalent. Use always-y instead. In arch/powerpc/kernel/Makefile, I added ifdef KBUILD_BUILTIN to keep the current behavior: prom_init_check is skipped when building only modular objects. Signed-off-by: Masahiro Yamada Acked-by: Michael Ellerman Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250602163302.478765-1-masahiroy@kernel.org --- arch/powerpc/boot/Makefile | 6 +++--- arch/powerpc/kernel/Makefile | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index a7ab087d412c..c47b78c1d3e7 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -243,13 +243,13 @@ $(obj)/wrapper.a: $(obj-wlib) FORCE hostprogs := addnote hack-coff mktree targets += $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a) zImage.lds -extra-y := $(obj)/wrapper.a $(obj-plat) $(obj)/empty.o \ +always-y := $(obj)/wrapper.a $(obj-plat) $(obj)/empty.o \ $(obj)/zImage.lds $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds dtstree := $(src)/dts wrapper := $(src)/wrapper -wrapperbits := $(extra-y) $(addprefix $(obj)/,addnote hack-coff mktree) \ +wrapperbits := $(always-y) $(addprefix $(obj)/,addnote hack-coff mktree) \ $(wrapper) FORCE ############# @@ -456,7 +456,7 @@ WRAPPER_DTSDIR := /usr/lib/kernel-wrapper/dts WRAPPER_BINDIR := /usr/sbin INSTALL := install -extra-installed := $(patsubst $(obj)/%, $(DESTDIR)$(WRAPPER_OBJDIR)/%, $(extra-y)) +extra-installed := $(patsubst $(obj)/%, $(DESTDIR)$(WRAPPER_OBJDIR)/%, $(always-y)) hostprogs-installed := $(patsubst %, $(DESTDIR)$(WRAPPER_BINDIR)/%, $(hostprogs)) wrapper-installed := $(DESTDIR)$(WRAPPER_BINDIR)/wrapper dts-installed := $(patsubst $(dtstree)/%, $(DESTDIR)$(WRAPPER_DTSDIR)/%, $(wildcard $(dtstree)/*.dts)) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index fb2b95267022..2f0a2e69c607 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -199,7 +199,9 @@ obj-$(CONFIG_ALTIVEC) += vector.o obj-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o obj64-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_entry_64.o -extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check +ifdef KBUILD_BUILTIN +always-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check +endif obj-$(CONFIG_PPC64) += $(obj64-y) obj-$(CONFIG_PPC32) += $(obj32-y) From 6a859f1a19d1f8756ffb097f5973dfebbca4811a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 7 Jun 2025 19:13:51 +0900 Subject: [PATCH 2216/2411] powerpc: unify two CONFIG_POWERPC64_CPU entries in the same choice block There are two CONFIG_POWERPC64_CPU entries in the "CPU selection" choice block. I guess the intent is to display a different prompt depending on CPU_LITTLE_ENDIAN: "Generic (POWER5 and PowerPC 970 and above)" for big endian, and "Generic (POWER8 and above)" for little endian. I stumbled on this tricky use case, and worked around it on Kconfig with commit 4d46b5b623e0 ("kconfig: fix infinite loop in sym_calc_choice()"). However, I doubt that supporting multiple entries with the same symbol in a choice block is worth the complexity - this is the only such case in the kernel tree. This commit merges the two entries. Once this cleanup is accepted in the powerpc subsystem, I will proceed to refactor the Kconfig parser. Signed-off-by: Masahiro Yamada Reviewed-by: Christophe Leroy Acked-by: Michael Ellerman Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250607102005.1965409-1-masahiroy@kernel.org --- arch/powerpc/platforms/Kconfig.cputype | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 613b383ed8b3..7b527d18aa5e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -122,16 +122,11 @@ choice If unsure, select Generic. config POWERPC64_CPU - bool "Generic (POWER5 and PowerPC 970 and above)" - depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN + bool "Generic 64 bits powerpc" + depends on PPC_BOOK3S_64 + select ARCH_HAS_FAST_MULTIPLIER if CPU_LITTLE_ENDIAN select PPC_64S_HASH_MMU - -config POWERPC64_CPU - bool "Generic (POWER8 and above)" - depends on PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN - select ARCH_HAS_FAST_MULTIPLIER - select PPC_64S_HASH_MMU - select PPC_HAS_LBARX_LHARX + select PPC_HAS_LBARX_LHARX if CPU_LITTLE_ENDIAN config POWERPC_CPU bool "Generic 32 bits powerpc" From 88688a2c8ac6c8036d983ad8b34ce191c46a10aa Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sun, 18 May 2025 10:11:04 +0530 Subject: [PATCH 2217/2411] powerpc/kvm: Fix ifdef to remove build warning When compiling for pseries or powernv defconfig with "make C=1", these warning were reported bu sparse tool in powerpc/kernel/kvm.c arch/powerpc/kernel/kvm.c:635:9: warning: switch with no cases arch/powerpc/kernel/kvm.c:646:9: warning: switch with no cases Currently #ifdef were added after the switch case which are specific for BOOKE and PPC_BOOK3S_32. These are not enabled in pseries/powernv defconfig. Fix it by moving the #ifdef before switch(){} Fixes: cbe487fac7fc0 ("KVM: PPC: Add mtsrin PV code") Tested-by: Venkat Rao Bagalkote Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250518044107.39928-1-maddy@linux.ibm.com --- arch/powerpc/kernel/kvm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 5b3c093611ba..7209d00a9c25 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -632,19 +632,19 @@ static void __init kvm_check_ins(u32 *inst, u32 features) #endif } - switch (inst_no_rt & ~KVM_MASK_RB) { #ifdef CONFIG_PPC_BOOK3S_32 + switch (inst_no_rt & ~KVM_MASK_RB) { case KVM_INST_MTSRIN: if (features & KVM_MAGIC_FEAT_SR) { u32 inst_rb = _inst & KVM_MASK_RB; kvm_patch_ins_mtsrin(inst, inst_rt, inst_rb); } break; -#endif } +#endif - switch (_inst) { #ifdef CONFIG_BOOKE + switch (_inst) { case KVM_INST_WRTEEI_0: kvm_patch_ins_wrteei_0(inst); break; @@ -652,8 +652,8 @@ static void __init kvm_check_ins(u32 *inst, u32 features) case KVM_INST_WRTEEI_1: kvm_patch_ins_wrtee(inst, 0, 1); break; -#endif } +#endif } extern u32 kvm_template_start[]; From d40ae9033418095642f65f4fd54dc5a7d292ee39 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sun, 18 May 2025 10:11:06 +0530 Subject: [PATCH 2218/2411] powerpc/prom_init: Fix shellcheck warnings Fix "Double quote to prevent globbing and word splitting." warning from shellcheck Tested-by: Venkat Rao Bagalkote Reviewed-by: Stephen Rothwell Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250518044107.39928-3-maddy@linux.ibm.com --- arch/powerpc/kernel/prom_init_check.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 69623b9045d5..3090b97258ae 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -15,8 +15,8 @@ has_renamed_memintrinsics() { - grep -q "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} && \ - ! grep -q "^CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX=y" ${KCONFIG_CONFIG} + grep -q "^CONFIG_KASAN=y$" "${KCONFIG_CONFIG}" && \ + ! grep -q "^CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX=y" "${KCONFIG_CONFIG}" } if has_renamed_memintrinsics @@ -42,15 +42,15 @@ check_section() { file=$1 section=$2 - size=$(objdump -h -j $section $file 2>/dev/null | awk "\$2 == \"$section\" {print \$3}") + size=$(objdump -h -j "$section" "$file" 2>/dev/null | awk "\$2 == \"$section\" {print \$3}") size=${size:-0} - if [ $size -ne 0 ]; then + if [ "$size" -ne 0 ]; then ERROR=1 echo "Error: Section $section not empty in prom_init.c" >&2 fi } -for UNDEF in $($NM -u $OBJ | awk '{print $2}') +for UNDEF in $($NM -u "$OBJ" | awk '{print $2}') do # On 64-bit nm gives us the function descriptors, which have # a leading . on the name, so strip it off here. @@ -87,8 +87,8 @@ do fi done -check_section $OBJ .data -check_section $OBJ .bss -check_section $OBJ .init.data +check_section "$OBJ" .data +check_section "$OBJ" .bss +check_section "$OBJ" .init.data exit $ERROR From 8763d2257f5231cfdfd8a53594647927dbf8bb06 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sun, 18 May 2025 10:11:07 +0530 Subject: [PATCH 2219/2411] powerpc/boot/install.sh: Fix shellcheck warnings Fix shellcheck warning such as "Double quote to prevent globbing and word splitting." and Use $(...) notation instead of legacy backticks `...`. Tested-by: Venkat Rao Bagalkote Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250518044107.39928-4-maddy@linux.ibm.com --- arch/powerpc/boot/install.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/boot/install.sh b/arch/powerpc/boot/install.sh index 101fcb397a0f..c3df6c27ce75 100755 --- a/arch/powerpc/boot/install.sh +++ b/arch/powerpc/boot/install.sh @@ -19,19 +19,19 @@ set -e # this should work for both the pSeries zImage and the iSeries vmlinux.sm -image_name=`basename $2` +image_name=$(basename "$2") echo "Warning: '${INSTALLKERNEL}' command not available... Copying" \ "directly to $4/$image_name-$1" >&2 -if [ -f $4/$image_name-$1 ]; then - mv $4/$image_name-$1 $4/$image_name-$1.old +if [ -f "$4"/"$image_name"-"$1" ]; then + mv "$4"/"$image_name"-"$1" "$4"/"$image_name"-"$1".old fi -if [ -f $4/System.map-$1 ]; then - mv $4/System.map-$1 $4/System-$1.old +if [ -f "$4"/System.map-"$1" ]; then + mv "$4"/System.map-"$1" "$4"/System-"$1".old fi -cat $2 > $4/$image_name-$1 -cp $3 $4/System.map-$1 +cat "$2" > "$4"/"$image_name"-"$1" +cp "$3" "$4"/System.map-"$1" From 6a909ea83f226803ea0e718f6e88613df9234d58 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Wed, 13 Aug 2025 04:02:32 +0000 Subject: [PATCH 2220/2411] tracing: Limit access to parser->buffer when trace_get_user failed When the length of the string written to set_ftrace_filter exceeds FTRACE_BUFF_MAX, the following KASAN alarm will be triggered: BUG: KASAN: slab-out-of-bounds in strsep+0x18c/0x1b0 Read of size 1 at addr ffff0000d00bd5ba by task ash/165 CPU: 1 UID: 0 PID: 165 Comm: ash Not tainted 6.16.0-g6bcdbd62bd56-dirty Hardware name: linux,dummy-virt (DT) Call trace: show_stack+0x34/0x50 (C) dump_stack_lvl+0xa0/0x158 print_address_description.constprop.0+0x88/0x398 print_report+0xb0/0x280 kasan_report+0xa4/0xf0 __asan_report_load1_noabort+0x20/0x30 strsep+0x18c/0x1b0 ftrace_process_regex.isra.0+0x100/0x2d8 ftrace_regex_release+0x484/0x618 __fput+0x364/0xa58 ____fput+0x28/0x40 task_work_run+0x154/0x278 do_notify_resume+0x1f0/0x220 el0_svc+0xec/0xf0 el0t_64_sync_handler+0xa0/0xe8 el0t_64_sync+0x1ac/0x1b0 The reason is that trace_get_user will fail when processing a string longer than FTRACE_BUFF_MAX, but not set the end of parser->buffer to 0. Then an OOB access will be triggered in ftrace_regex_release-> ftrace_process_regex->strsep->strpbrk. We can solve this problem by limiting access to parser->buffer when trace_get_user failed. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250813040232.1344527-1-pulehui@huaweicloud.com Fixes: 8c9af478c06b ("ftrace: Handle commands when closing set_ftrace_filter file") Signed-off-by: Pu Lehui Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 18 ++++++++++++------ kernel/trace/trace.h | 8 +++++++- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4283ed4e8f59..8d8935ed416d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1816,7 +1816,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, ret = get_user(ch, ubuf++); if (ret) - return ret; + goto fail; read++; cnt--; @@ -1830,7 +1830,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, while (cnt && isspace(ch)) { ret = get_user(ch, ubuf++); if (ret) - return ret; + goto fail; read++; cnt--; } @@ -1848,12 +1848,14 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, while (cnt && !isspace(ch) && ch) { if (parser->idx < parser->size - 1) parser->buffer[parser->idx++] = ch; - else - return -EINVAL; + else { + ret = -EINVAL; + goto fail; + } ret = get_user(ch, ubuf++); if (ret) - return ret; + goto fail; read++; cnt--; } @@ -1868,11 +1870,15 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, /* Make sure the parsed string always terminates with '\0'. */ parser->buffer[parser->idx] = 0; } else { - return -EINVAL; + ret = -EINVAL; + goto fail; } *ppos += read; return read; +fail: + trace_parser_fail(parser); + return ret; } /* TODO add a seq_buf_to_buffer() */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 1dbf1d3cf2f1..be6654899cae 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1292,6 +1292,7 @@ bool ftrace_event_is_function(struct trace_event_call *call); */ struct trace_parser { bool cont; + bool fail; char *buffer; unsigned idx; unsigned size; @@ -1299,7 +1300,7 @@ struct trace_parser { static inline bool trace_parser_loaded(struct trace_parser *parser) { - return (parser->idx != 0); + return !parser->fail && parser->idx != 0; } static inline bool trace_parser_cont(struct trace_parser *parser) @@ -1313,6 +1314,11 @@ static inline void trace_parser_clear(struct trace_parser *parser) parser->idx = 0; } +static inline void trace_parser_fail(struct trace_parser *parser) +{ + parser->fail = true; +} + extern int trace_parser_get_init(struct trace_parser *parser, int size); extern void trace_parser_put(struct trace_parser *parser); extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, From cd6e4faba96fe41d6b686e144b96dad5e6f2e771 Mon Sep 17 00:00:00 2001 From: Liao Yuanhong Date: Wed, 13 Aug 2025 17:51:14 +0800 Subject: [PATCH 2221/2411] ring-buffer: Remove redundant semicolons Remove unnecessary semicolons. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250813095114.559530-1-liaoyuanhong@vivo.com Signed-off-by: Liao Yuanhong Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bb71a0dc9d69..43460949ad3f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -7666,7 +7666,7 @@ static __init int test_ringbuffer(void) rb_test_started = true; set_current_state(TASK_INTERRUPTIBLE); - /* Just run for 10 seconds */; + /* Just run for 10 seconds */ schedule_timeout(10 * HZ); kthread_stop(rb_hammer); From edede7a6dcd7435395cf757d053974aaab6ab1c2 Mon Sep 17 00:00:00 2001 From: Ye Weihua Date: Mon, 18 Aug 2025 07:33:32 +0000 Subject: [PATCH 2222/2411] trace/fgraph: Fix the warning caused by missing unregister notifier This warning was triggered during testing on v6.16: notifier callback ftrace_suspend_notifier_call already registered WARNING: CPU: 2 PID: 86 at kernel/notifier.c:23 notifier_chain_register+0x44/0xb0 ... Call Trace: blocking_notifier_chain_register+0x34/0x60 register_ftrace_graph+0x330/0x410 ftrace_profile_write+0x1e9/0x340 vfs_write+0xf8/0x420 ? filp_flush+0x8a/0xa0 ? filp_close+0x1f/0x30 ? do_dup2+0xaf/0x160 ksys_write+0x65/0xe0 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x77/0x7f When writing to the function_profile_enabled interface, the notifier was not unregistered after start_graph_tracing failed, causing a warning the next time function_profile_enabled was written. Fixed by adding unregister_pm_notifier in the exception path. Link: https://lore.kernel.org/20250818073332.3890629-1-yeweihua4@huawei.com Fixes: 4a2b8dda3f870 ("tracing/function-graph-tracer: fix a regression while suspend to disk") Acked-by: Masami Hiramatsu (Google) Signed-off-by: Ye Weihua Signed-off-by: Steven Rostedt (Google) --- kernel/trace/fgraph.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index f4d200f0c610..2a42c1036ea8 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1397,6 +1397,7 @@ int register_ftrace_graph(struct fgraph_ops *gops) ftrace_graph_active--; gops->saved_func = NULL; fgraph_lru_release_index(i); + unregister_pm_notifier(&ftrace_suspend_notifier); } return ret; } From 8151320c747efb22d30b035af989fed0d502176e Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 22 Jul 2025 22:32:33 +0800 Subject: [PATCH 2223/2411] ACPI: pfr_update: Fix the driver update version check The security-version-number check should be used rather than the runtime version check for driver updates. Otherwise, the firmware update would fail when the update binary had a lower runtime version number than the current one. Fixes: 0db89fa243e5 ("ACPI: Introduce Platform Firmware Runtime Update device driver") Cc: 5.17+ # 5.17+ Reported-by: "Govindarajulu, Hariganesh" Signed-off-by: Chen Yu Link: https://patch.msgid.link/20250722143233.3970607-1-yu.c.chen@intel.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pfr_update.c | 2 +- include/uapi/linux/pfrut.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/pfr_update.c b/drivers/acpi/pfr_update.c index 318683744ed1..11b1c2828005 100644 --- a/drivers/acpi/pfr_update.c +++ b/drivers/acpi/pfr_update.c @@ -329,7 +329,7 @@ static bool applicable_image(const void *data, struct pfru_update_cap_info *cap, if (type == PFRU_CODE_INJECT_TYPE) return payload_hdr->rt_ver >= cap->code_rt_version; - return payload_hdr->rt_ver >= cap->drv_rt_version; + return payload_hdr->svn_ver >= cap->drv_svn; } static void print_update_debug_info(struct pfru_updated_result *result, diff --git a/include/uapi/linux/pfrut.h b/include/uapi/linux/pfrut.h index 42fa15f8310d..b77d5c210c26 100644 --- a/include/uapi/linux/pfrut.h +++ b/include/uapi/linux/pfrut.h @@ -89,6 +89,7 @@ struct pfru_payload_hdr { __u32 hw_ver; __u32 rt_ver; __u8 platform_id[16]; + __u32 svn_ver; }; enum pfru_dsm_status { From 4647c4deadcc17f40858be06bcf416369a8f1d57 Mon Sep 17 00:00:00 2001 From: Pratyush Brahma Date: Wed, 20 Aug 2025 06:29:34 +0530 Subject: [PATCH 2224/2411] mm: numa,memblock: Use SZ_1M macro to denote bytes to MB conversion Replace the manual bitwise conversion of bytes to MB with SZ_1M macro, a standard macro used within the mm subsystem, to improve readability. Signed-off-by: Pratyush Brahma Link: https://lore.kernel.org/r/20250820-numa-memblks-refac-v2-1-43bf1af02acd@oss.qualcomm.com Signed-off-by: Mike Rapoport (Microsoft) --- mm/memblock.c | 4 ++-- mm/numa_emulation.c | 4 ++-- mm/numa_memblks.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/memblock.c b/mm/memblock.c index 154f1d73b61f..8a0ed3074af4 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -780,9 +780,9 @@ bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_byt } if ((nr_pages << PAGE_SHIFT) > threshold_bytes) { - mem_size_mb = memblock_phys_mem_size() >> 20; + mem_size_mb = memblock_phys_mem_size() / SZ_1M; pr_err("NUMA: no nodes coverage for %luMB of %luMB RAM\n", - (nr_pages << PAGE_SHIFT) >> 20, mem_size_mb); + (nr_pages << PAGE_SHIFT) / SZ_1M, mem_size_mb); return false; } diff --git a/mm/numa_emulation.c b/mm/numa_emulation.c index 9d55679d99ce..703c8fa05048 100644 --- a/mm/numa_emulation.c +++ b/mm/numa_emulation.c @@ -73,7 +73,7 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei, } printk(KERN_INFO "Faking node %d at [mem %#018Lx-%#018Lx] (%LuMB)\n", - nid, eb->start, eb->end - 1, (eb->end - eb->start) >> 20); + nid, eb->start, eb->end - 1, (eb->end - eb->start) / SZ_1M); return 0; } @@ -264,7 +264,7 @@ static int __init split_nodes_size_interleave_uniform(struct numa_meminfo *ei, min_size = ALIGN(max(min_size, FAKE_NODE_MIN_SIZE), FAKE_NODE_MIN_SIZE); if (size < min_size) { pr_err("Fake node size %LuMB too small, increasing to %LuMB\n", - size >> 20, min_size >> 20); + size / SZ_1M, min_size / SZ_1M); size = min_size; } size = ALIGN_DOWN(size, FAKE_NODE_MIN_SIZE); diff --git a/mm/numa_memblks.c b/mm/numa_memblks.c index de626525a87c..5b009a9cd8b4 100644 --- a/mm/numa_memblks.c +++ b/mm/numa_memblks.c @@ -427,9 +427,9 @@ static int __init numa_register_meminfo(struct numa_meminfo *mi) unsigned long pfn_align = node_map_pfn_alignment(); if (pfn_align && pfn_align < PAGES_PER_SECTION) { - unsigned long node_align_mb = PFN_PHYS(pfn_align) >> 20; + unsigned long node_align_mb = PFN_PHYS(pfn_align) / SZ_1M; - unsigned long sect_align_mb = PFN_PHYS(PAGES_PER_SECTION) >> 20; + unsigned long sect_align_mb = PFN_PHYS(PAGES_PER_SECTION) / SZ_1M; pr_warn("Node alignment %luMB < min %luMB, rejecting NUMA config\n", node_align_mb, sect_align_mb); From efdaa61d73a1deb066ccc3b4d56257cc63ab5be9 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 15 Aug 2025 09:40:52 +0200 Subject: [PATCH 2225/2411] drivers/xen/xenbus: remove quirk for Xen 3.x The kernel is not supported to run as a Xen guest on Xen versions older than 4.0. Remove xen_strict_xenbus_quirk() which is testing the Xen version to be at least 4.0. Acked-by: Stefano Stabellini Reviewed-by: Jason Andryuk Signed-off-by: Juergen Gross Message-ID: <20250815074052.13792-1-jgross@suse.com> --- drivers/xen/xenbus/xenbus_xs.c | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 3c9da446b85d..528682bf0c7f 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -718,26 +718,6 @@ int xs_watch_msg(struct xs_watch_event *event) return 0; } -/* - * Certain older XenBus toolstack cannot handle reading values that are - * not populated. Some Xen 3.4 installation are incapable of doing this - * so if we are running on anything older than 4 do not attempt to read - * control/platform-feature-xs_reset_watches. - */ -static bool xen_strict_xenbus_quirk(void) -{ -#ifdef CONFIG_X86 - uint32_t eax, ebx, ecx, edx, base; - - base = xen_cpuid_base(); - cpuid(base + 1, &eax, &ebx, &ecx, &edx); - - if ((eax >> 16) < 4) - return true; -#endif - return false; - -} static void xs_reset_watches(void) { int err; @@ -745,9 +725,6 @@ static void xs_reset_watches(void) if (!xen_hvm_domain() || xen_initial_domain()) return; - if (xen_strict_xenbus_quirk()) - return; - if (!xenbus_read_unsigned("control", "platform-feature-xs_reset_watches", 0)) return; From a47bc954cf0eb51f2828e1607d169d487df7f11f Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 20 Aug 2025 22:23:15 +0800 Subject: [PATCH 2226/2411] objtool/LoongArch: Get table size correctly if LTO is enabled When compiling with LLVM and CONFIG_LTO_CLANG is set, there exist many objtool warnings "sibling call from callable instruction with modified stack frame". For this special case, the related object file shows that there is no generated relocation section '.rela.discard.tablejump_annotate' for the table jump instruction jirl, thus objtool can not know that what is the actual destination address. It needs to do something on the LLVM side to make sure that there is the relocation section '.rela.discard.tablejump_annotate' if LTO is enabled, but in order to maintain compatibility for the current LLVM compiler, this can be done in the kernel Makefile for now. Ensure it is aware of linker with LTO, '--loongarch-annotate-tablejump' needs to be passed via '-mllvm' to ld.lld. Before doing the above changes, it should handle the special case of the relocation section '.rela.discard.tablejump_annotate' to get the correct table size first, otherwise there are many objtool warnings and errors if LTO is enabled. There are many different rodata for each function if LTO is enabled, it is necessary to enhance get_rodata_table_size_by_table_annotate(). Fixes: b95f852d3af2 ("objtool/LoongArch: Add support for switch table") Closes: https://lore.kernel.org/loongarch/20250731175655.GA1455142@ax162/ Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- tools/objtool/arch/loongarch/special.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/objtool/arch/loongarch/special.c b/tools/objtool/arch/loongarch/special.c index e39f86d97002..a80b75f7b061 100644 --- a/tools/objtool/arch/loongarch/special.c +++ b/tools/objtool/arch/loongarch/special.c @@ -27,6 +27,7 @@ static void get_rodata_table_size_by_table_annotate(struct objtool_file *file, struct table_info *next_table; unsigned long tmp_insn_offset; unsigned long tmp_rodata_offset; + bool is_valid_list = false; rsec = find_section_by_name(file->elf, ".rela.discard.tablejump_annotate"); if (!rsec) @@ -35,6 +36,12 @@ static void get_rodata_table_size_by_table_annotate(struct objtool_file *file, INIT_LIST_HEAD(&table_list); for_each_reloc(rsec, reloc) { + if (reloc->sym->sec->rodata) + continue; + + if (strcmp(insn->sec->name, reloc->sym->sec->name)) + continue; + orig_table = malloc(sizeof(struct table_info)); if (!orig_table) { WARN("malloc failed"); @@ -49,6 +56,22 @@ static void get_rodata_table_size_by_table_annotate(struct objtool_file *file, if (reloc_idx(reloc) + 1 == sec_num_entries(rsec)) break; + + if (strcmp(insn->sec->name, (reloc + 1)->sym->sec->name)) { + list_for_each_entry(orig_table, &table_list, jump_info) { + if (orig_table->insn_offset == insn->offset) { + is_valid_list = true; + break; + } + } + + if (!is_valid_list) { + list_del_init(&table_list); + continue; + } + + break; + } } list_for_each_entry(orig_table, &table_list, jump_info) { From 5dfea6644d201bfeffaa7e0d79d62309856613b7 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 20 Aug 2025 22:23:15 +0800 Subject: [PATCH 2227/2411] LoongArch: Pass annotate-tablejump option if LTO is enabled When compiling with LLVM and CONFIG_LTO_CLANG is set, there exist many objtool warnings "sibling call from callable instruction with modified stack frame". For this special case, the related object file shows that there is no generated relocation section '.rela.discard.tablejump_annotate' for the table jump instruction jirl, thus objtool can not know that what is the actual destination address. It needs to do something on the LLVM side to make sure that there is the relocation section '.rela.discard.tablejump_annotate' if LTO is enabled, but in order to maintain compatibility for the current LLVM compiler, this can be done in the kernel Makefile for now. Ensure it is aware of linker with LTO, '--loongarch-annotate-tablejump' needs to be passed via '-mllvm' to ld.lld. Note that it should also pass the compiler option -mannotate-tablejump rather than only pass '-mllvm --loongarch-annotate-tablejump' to ld.lld if LTO is enabled, otherwise there are no jump info for some table jump instructions. Fixes: e20ab7d454ee ("LoongArch: Enable jump table for objtool") Closes: https://lore.kernel.org/loongarch/20250731175655.GA1455142@ax162/ Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Co-developed-by: WANG Rui Signed-off-by: WANG Rui Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index b0703a4e02a2..a3a9759414f4 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -102,7 +102,13 @@ KBUILD_CFLAGS += $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma) ifdef CONFIG_OBJTOOL ifdef CONFIG_CC_HAS_ANNOTATE_TABLEJUMP +# The annotate-tablejump option can not be passed to LLVM backend when LTO is enabled. +# Ensure it is aware of linker with LTO, '--loongarch-annotate-tablejump' also needs to +# be passed via '-mllvm' to ld.lld. KBUILD_CFLAGS += -mannotate-tablejump +ifdef CONFIG_LTO_CLANG +KBUILD_LDFLAGS += -mllvm --loongarch-annotate-tablejump +endif else KBUILD_CFLAGS += -fno-jump-tables # keep compatibility with older compilers endif From f7794a4d92ade518c813de69a01b27ca6d8d86f3 Mon Sep 17 00:00:00 2001 From: Ming Wang Date: Wed, 20 Aug 2025 22:23:16 +0800 Subject: [PATCH 2228/2411] LoongArch: Increase COMMAND_LINE_SIZE up to 4096 The default COMMAND_LINE_SIZE of 512, inherited from asm-generic, is too small for modern use cases. For example, kdump configurations or extensive debugging parameters can easily exceed this limit. Therefore, increase the command line size to 4096 bytes, aligning LoongArch with the MIPS architecture. This change follows a broader trend among architectures to raise this limit to support modern needs; for instance, PowerPC increased its value for similar reasons in the commit a5980d064fe2 ("powerpc: Bump COMMAND_LINE_SIZE to 2048"). Similar to the change made for RISC-V in the commit 61fc1ee8be26 ("riscv: Bump COMMAND_LINE_SIZE value to 1024"), this is considered a safe change. The broader kernel community has reached a consensus that modifying COMMAND_LINE_SIZE from UAPI headers does not constitute a uABI breakage, as well-behaved userspace applications should not rely on this macro. Suggested-by: Huang Cun Signed-off-by: Ming Wang Signed-off-by: Huacai Chen --- arch/loongarch/include/uapi/asm/setup.h | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 arch/loongarch/include/uapi/asm/setup.h diff --git a/arch/loongarch/include/uapi/asm/setup.h b/arch/loongarch/include/uapi/asm/setup.h new file mode 100644 index 000000000000..d46363ce3e02 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/setup.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_ASM_LOONGARCH_SETUP_H +#define _UAPI_ASM_LOONGARCH_SETUP_H + +#define COMMAND_LINE_SIZE 4096 + +#endif /* _UAPI_ASM_LOONGARCH_SETUP_H */ From 8ef7f3132e4005a103b382e71abea7ad01fbeb86 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 20 Aug 2025 22:23:44 +0800 Subject: [PATCH 2229/2411] LoongArch: Add cpuhotplug hooks to fix high cpu usage of vCPU threads When the CPU is offline, the timer of LoongArch is not correctly closed. This is harmless for real machines, but resulting in an excessively high cpu usage rate of the offline vCPU thread in the virtual machines. To correctly close the timer, we have made the following modifications: Register the cpu hotplug event (CPUHP_AP_LOONGARCH_ARCH_TIMER_STARTING) for LoongArch. This event's hooks will be called to close the timer when the CPU is offline. Clear the timer interrupt when the timer is turned off. Since before the timer is turned off, there may be a timer interrupt that has already been in the pending state due to the interruption of the disabled, which also affects the halt state of the offline vCPU. Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- arch/loongarch/kernel/time.c | 22 ++++++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 2 files changed, 23 insertions(+) diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index 367906b10f81..f3092f2de8b5 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -5,6 +5,7 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include +#include #include #include #include @@ -102,6 +103,23 @@ static int constant_timer_next_event(unsigned long delta, struct clock_event_dev return 0; } +static int arch_timer_starting(unsigned int cpu) +{ + set_csr_ecfg(ECFGF_TIMER); + + return 0; +} + +static int arch_timer_dying(unsigned int cpu) +{ + constant_set_state_shutdown(this_cpu_ptr(&constant_clockevent_device)); + + /* Clear Timer Interrupt */ + write_csr_tintclear(CSR_TINTCLR_TI); + + return 0; +} + static unsigned long get_loops_per_jiffy(void) { unsigned long lpj = (unsigned long)const_clock_freq; @@ -172,6 +190,10 @@ int constant_clockevent_init(void) lpj_fine = get_loops_per_jiffy(); pr_info("Constant clock event device register\n"); + cpuhp_setup_state(CPUHP_AP_LOONGARCH_ARCH_TIMER_STARTING, + "clockevents/loongarch/timer:starting", + arch_timer_starting, arch_timer_dying); + return 0; } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index edfa61d80702..62cd7b35a29c 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -168,6 +168,7 @@ enum cpuhp_state { CPUHP_AP_QCOM_TIMER_STARTING, CPUHP_AP_TEGRA_TIMER_STARTING, CPUHP_AP_ARMADA_TIMER_STARTING, + CPUHP_AP_LOONGARCH_ARCH_TIMER_STARTING, CPUHP_AP_MIPS_GIC_TIMER_STARTING, CPUHP_AP_ARC_TIMER_STARTING, CPUHP_AP_REALTEK_TIMER_STARTING, From 63dbd8fb2af3a89466538599a9acb2d11ef65c06 Mon Sep 17 00:00:00 2001 From: Kanglong Wang Date: Wed, 20 Aug 2025 22:23:44 +0800 Subject: [PATCH 2230/2411] LoongArch: Optimize module load time by optimizing PLT/GOT counting When enabling CONFIG_KASAN, CONFIG_PREEMPT_VOLUNTARY_BUILD and CONFIG_PREEMPT_VOLUNTARY at the same time, there will be soft deadlock, the relevant logs are as follows: rcu: INFO: rcu_sched self-detected stall on CPU ... Call Trace: [<900000000024f9e4>] show_stack+0x5c/0x180 [<90000000002482f4>] dump_stack_lvl+0x94/0xbc [<9000000000224544>] rcu_dump_cpu_stacks+0x1fc/0x280 [<900000000037ac80>] rcu_sched_clock_irq+0x720/0xf88 [<9000000000396c34>] update_process_times+0xb4/0x150 [<90000000003b2474>] tick_nohz_handler+0xf4/0x250 [<9000000000397e28>] __hrtimer_run_queues+0x1d0/0x428 [<9000000000399b2c>] hrtimer_interrupt+0x214/0x538 [<9000000000253634>] constant_timer_interrupt+0x64/0x80 [<9000000000349938>] __handle_irq_event_percpu+0x78/0x1a0 [<9000000000349a78>] handle_irq_event_percpu+0x18/0x88 [<9000000000354c00>] handle_percpu_irq+0x90/0xf0 [<9000000000348c74>] handle_irq_desc+0x94/0xb8 [<9000000001012b28>] handle_cpu_irq+0x68/0xa0 [<9000000001def8c0>] handle_loongarch_irq+0x30/0x48 [<9000000001def958>] do_vint+0x80/0xd0 [<9000000000268a0c>] kasan_mem_to_shadow.part.0+0x2c/0x2a0 [<90000000006344f4>] __asan_load8+0x4c/0x120 [<900000000025c0d0>] module_frob_arch_sections+0x5c8/0x6b8 [<90000000003895f0>] load_module+0x9e0/0x2958 [<900000000038b770>] __do_sys_init_module+0x208/0x2d0 [<9000000001df0c34>] do_syscall+0x94/0x190 [<900000000024d6fc>] handle_syscall+0xbc/0x158 After analysis, this is because the slow speed of loading the amdgpu module leads to the long time occupation of the cpu and then the soft deadlock. When loading a module, module_frob_arch_sections() tries to figure out the number of PLTs/GOTs that will be needed to handle all the RELAs. It will call the count_max_entries() to find in an out-of-order date which counting algorithm has O(n^2) complexity. To make it faster, we sort the relocation list by info and addend. That way, to check for a duplicate relocation, it just needs to compare with the previous entry. This reduces the complexity of the algorithm to O(n log n), as done in commit d4e0340919fb ("arm64/module: Optimize module load time by optimizing PLT counting"). This gives sinificant reduction in module load time for modules with large number of relocations. After applying this patch, the soft deadlock problem has been solved, and the kernel starts normally without "Call Trace". Using the default configuration to test some modules, the results are as follows: Module Size ip_tables 36K fat 143K radeon 2.5MB amdgpu 16MB Without this patch: Module Module load time (ms) Count(PLTs/GOTs) ip_tables 18 59/6 fat 0 162/14 radeon 54 1221/84 amdgpu 1411 4525/1098 With this patch: Module Module load time (ms) Count(PLTs/GOTs) ip_tables 18 59/6 fat 0 162/14 radeon 22 1221/84 amdgpu 45 4525/1098 Fixes: fcdfe9d22bed ("LoongArch: Add ELF and module support") Signed-off-by: Kanglong Wang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/module-sections.c | 38 ++++++++++++------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c index e2f30ff9afde..a43ba7f9f987 100644 --- a/arch/loongarch/kernel/module-sections.c +++ b/arch/loongarch/kernel/module-sections.c @@ -8,6 +8,7 @@ #include #include #include +#include Elf_Addr module_emit_got_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val) { @@ -61,39 +62,38 @@ Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr v return (Elf_Addr)&plt[nr]; } -static int is_rela_equal(const Elf_Rela *x, const Elf_Rela *y) +#define cmp_3way(a, b) ((a) < (b) ? -1 : (a) > (b)) + +static int compare_rela(const void *x, const void *y) { - return x->r_info == y->r_info && x->r_addend == y->r_addend; -} + int ret; + const Elf_Rela *rela_x = x, *rela_y = y; -static bool duplicate_rela(const Elf_Rela *rela, int idx) -{ - int i; + ret = cmp_3way(rela_x->r_info, rela_y->r_info); + if (ret == 0) + ret = cmp_3way(rela_x->r_addend, rela_y->r_addend); - for (i = 0; i < idx; i++) { - if (is_rela_equal(&rela[i], &rela[idx])) - return true; - } - - return false; + return ret; } static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts, unsigned int *gots) { - unsigned int i, type; + unsigned int i; + + sort(relas, num, sizeof(Elf_Rela), compare_rela, NULL); for (i = 0; i < num; i++) { - type = ELF_R_TYPE(relas[i].r_info); - switch (type) { + if (i && !compare_rela(&relas[i-1], &relas[i])) + continue; + + switch (ELF_R_TYPE(relas[i].r_info)) { case R_LARCH_SOP_PUSH_PLT_PCREL: case R_LARCH_B26: - if (!duplicate_rela(relas, i)) - (*plts)++; + (*plts)++; break; case R_LARCH_GOT_PC_HI20: - if (!duplicate_rela(relas, i)) - (*gots)++; + (*gots)++; break; default: break; /* Do nothing. */ From 112ca94f6c3b3e0b2002a240de43c487a33e0234 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 20 Aug 2025 22:23:44 +0800 Subject: [PATCH 2231/2411] LoongArch: Save LBT before FPU in setup_sigcontext() Now if preemption happens between protected_save_fpu_context() and protected_save_lbt_context(), FTOP context is lost. Because FTOP is saved by protected_save_lbt_context() but protected_save_fpu_context() disables TM before that. So save LBT before FPU in setup_sigcontext() to avoid this potential risk. Signed-off-by: Hanlu Li Signed-off-by: Huacai Chen --- arch/loongarch/kernel/signal.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 4740cb5b2388..c9f7ca778364 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -677,6 +677,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, for (i = 1; i < 32; i++) err |= __put_user(regs->regs[i], &sc->sc_regs[i]); +#ifdef CONFIG_CPU_HAS_LBT + if (extctx->lbt.addr) + err |= protected_save_lbt_context(extctx); +#endif + if (extctx->lasx.addr) err |= protected_save_lasx_context(extctx); else if (extctx->lsx.addr) @@ -684,11 +689,6 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, else if (extctx->fpu.addr) err |= protected_save_fpu_context(extctx); -#ifdef CONFIG_CPU_HAS_LBT - if (extctx->lbt.addr) - err |= protected_save_lbt_context(extctx); -#endif - /* Set the "end" magic */ info = (struct sctx_info *)extctx->end.addr; err |= __put_user(0, &info->magic); From 0078e94a4733454d1ffa3888afe88bf19c81b91c Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 20 Aug 2025 22:23:44 +0800 Subject: [PATCH 2232/2411] LoongArch: Rename GCC_PLUGIN_STACKLEAK to KSTACK_ERASE Commit 57fbad15c2eee772 ("stackleak: Rename STACKLEAK to KSTACK_ERASE") misses the stackframe.h part for LoongArch, so fix it. Fixes: 57fbad15c2eee772 ("stackleak: Rename STACKLEAK to KSTACK_ERASE") Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/stackframe.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h index 3eda298702b1..5cb568a60cf8 100644 --- a/arch/loongarch/include/asm/stackframe.h +++ b/arch/loongarch/include/asm/stackframe.h @@ -58,7 +58,7 @@ .endm .macro STACKLEAK_ERASE -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK +#ifdef CONFIG_KSTACK_ERASE bl stackleak_erase_on_task_stack #endif .endm From f135fb24ef29335b94921077588cae445bc7f099 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 20 Aug 2025 15:22:00 +0100 Subject: [PATCH 2233/2411] ASoC: cs35l56: Update Firmware Addresses for CS35L63 for production silicon Production silicon for CS36L63 has some small differences compared to pre-production silicon. Update firmware addresses, which are different. No product was ever released with pre-production silicon so there is no need for the driver to include support for it. Fixes: 978858791ced ("ASoC: cs35l56: Add initial support for CS35L63 for I2C and SoundWire") Signed-off-by: Stefan Binding Link: https://patch.msgid.link/20250820142209.127575-2-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index e17c4cadd04d..f44aabde805e 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -107,8 +107,8 @@ #define CS35L56_DSP1_PMEM_5114 0x3804FE8 #define CS35L63_DSP1_FW_VER CS35L56_DSP1_FW_VER -#define CS35L63_DSP1_HALO_STATE 0x280396C -#define CS35L63_DSP1_PM_CUR_STATE 0x28042C8 +#define CS35L63_DSP1_HALO_STATE 0x2803C04 +#define CS35L63_DSP1_PM_CUR_STATE 0x2804518 #define CS35L63_PROTECTION_STATUS 0x340009C #define CS35L63_TRANSDUCER_ACTUAL_PS 0x34000F4 #define CS35L63_MAIN_RENDER_USER_MUTE 0x3400020 From 8dadc11b67d4b83deff45e4889b3b5540b9c0a7f Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 20 Aug 2025 15:22:01 +0100 Subject: [PATCH 2234/2411] ASoC: cs35l56: Handle new algorithms IDs for CS35L63 CS35L63 uses different algorithm IDs from CS35L56. Add a new mechanism to handle different alg IDs between parts in the CS35L56 driver. Fixes: 978858791ced ("ASoC: cs35l56: Add initial support for CS35L63 for I2C and SoundWire") Signed-off-by: Richard Fitzgerald Signed-off-by: Stefan Binding Link: https://patch.msgid.link/20250820142209.127575-3-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 1 + sound/soc/codecs/cs35l56-shared.c | 29 ++++++++++++++++++++++++++--- sound/soc/codecs/cs35l56.c | 2 +- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index f44aabde805e..7c8bbe8ad1e2 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -306,6 +306,7 @@ struct cs35l56_base { struct gpio_desc *reset_gpio; struct cs35l56_spi_payload *spi_payload_buf; const struct cs35l56_fw_reg *fw_reg; + const struct cirrus_amp_cal_controls *calibration_controls; }; static inline bool cs35l56_is_otp_register(unsigned int reg) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index ba653f6ccfae..850fcf385996 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -838,6 +838,15 @@ const struct cirrus_amp_cal_controls cs35l56_calibration_controls = { }; EXPORT_SYMBOL_NS_GPL(cs35l56_calibration_controls, "SND_SOC_CS35L56_SHARED"); +static const struct cirrus_amp_cal_controls cs35l63_calibration_controls = { + .alg_id = 0xbf210, + .mem_region = WMFW_ADSP2_YM, + .ambient = "CAL_AMBIENT", + .calr = "CAL_R", + .status = "CAL_STATUS", + .checksum = "CAL_CHECKSUM", +}; + int cs35l56_get_calibration(struct cs35l56_base *cs35l56_base) { u64 silicon_uid = 0; @@ -912,19 +921,31 @@ EXPORT_SYMBOL_NS_GPL(cs35l56_read_prot_status, "SND_SOC_CS35L56_SHARED"); void cs35l56_log_tuning(struct cs35l56_base *cs35l56_base, struct cs_dsp *cs_dsp) { __be32 pid, sid, tid; + unsigned int alg_id; int ret; + switch (cs35l56_base->type) { + case 0x54: + case 0x56: + case 0x57: + alg_id = 0x9f212; + break; + default: + alg_id = 0xbf212; + break; + } + scoped_guard(mutex, &cs_dsp->pwr_lock) { ret = cs_dsp_coeff_read_ctrl(cs_dsp_get_ctl(cs_dsp, "AS_PRJCT_ID", - WMFW_ADSP2_XM, 0x9f212), + WMFW_ADSP2_XM, alg_id), 0, &pid, sizeof(pid)); if (!ret) ret = cs_dsp_coeff_read_ctrl(cs_dsp_get_ctl(cs_dsp, "AS_CHNNL_ID", - WMFW_ADSP2_XM, 0x9f212), + WMFW_ADSP2_XM, alg_id), 0, &sid, sizeof(sid)); if (!ret) ret = cs_dsp_coeff_read_ctrl(cs_dsp_get_ctl(cs_dsp, "AS_SNPSHT_ID", - WMFW_ADSP2_XM, 0x9f212), + WMFW_ADSP2_XM, alg_id), 0, &tid, sizeof(tid)); } @@ -974,8 +995,10 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base) case 0x35A54: case 0x35A56: case 0x35A57: + cs35l56_base->calibration_controls = &cs35l56_calibration_controls; break; case 0x35A630: + cs35l56_base->calibration_controls = &cs35l63_calibration_controls; devid = devid >> 4; break; default: diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index b1c65d8331e7..2c1edbd636ef 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -695,7 +695,7 @@ static int cs35l56_write_cal(struct cs35l56_private *cs35l56) return ret; ret = cs_amp_write_cal_coeffs(&cs35l56->dsp.cs_dsp, - &cs35l56_calibration_controls, + cs35l56->base.calibration_controls, &cs35l56->base.cal_data); wm_adsp_stop(&cs35l56->dsp); From 8d13d1bdb59d0a2c526869ee571ec51a3a887463 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 20 Aug 2025 15:22:02 +0100 Subject: [PATCH 2235/2411] ASoC: cs35l56: Remove SoundWire Clock Divider workaround for CS35L63 Production silicon for CS36L63 has some small differences compared to pre-production silicon. Remove soundwire clock workaround as no longer necessary. We don't want to do tricks with low-level clocking controls if we don't need to. Fixes: 978858791ced ("ASoC: cs35l56: Add initial support for CS35L63 for I2C and SoundWire") Signed-off-by: Stefan Binding Link: https://patch.msgid.link/20250820142209.127575-4-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-sdw.c | 69 ---------------------------------- sound/soc/codecs/cs35l56.h | 3 -- 2 files changed, 72 deletions(-) diff --git a/sound/soc/codecs/cs35l56-sdw.c b/sound/soc/codecs/cs35l56-sdw.c index ee14031695a1..3905c9cb188a 100644 --- a/sound/soc/codecs/cs35l56-sdw.c +++ b/sound/soc/codecs/cs35l56-sdw.c @@ -393,74 +393,6 @@ static int cs35l56_sdw_update_status(struct sdw_slave *peripheral, return 0; } -static int cs35l63_sdw_kick_divider(struct cs35l56_private *cs35l56, - struct sdw_slave *peripheral) -{ - unsigned int curr_scale_reg, next_scale_reg; - int curr_scale, next_scale, ret; - - if (!cs35l56->base.init_done) - return 0; - - if (peripheral->bus->params.curr_bank) { - curr_scale_reg = SDW_SCP_BUSCLOCK_SCALE_B1; - next_scale_reg = SDW_SCP_BUSCLOCK_SCALE_B0; - } else { - curr_scale_reg = SDW_SCP_BUSCLOCK_SCALE_B0; - next_scale_reg = SDW_SCP_BUSCLOCK_SCALE_B1; - } - - /* - * Current clock scale value must be different to new value. - * Modify current to guarantee this. If next still has the dummy - * value we wrote when it was current, the core code has not set - * a new scale so restore its original good value - */ - curr_scale = sdw_read_no_pm(peripheral, curr_scale_reg); - if (curr_scale < 0) { - dev_err(cs35l56->base.dev, "Failed to read current clock scale: %d\n", curr_scale); - return curr_scale; - } - - next_scale = sdw_read_no_pm(peripheral, next_scale_reg); - if (next_scale < 0) { - dev_err(cs35l56->base.dev, "Failed to read next clock scale: %d\n", next_scale); - return next_scale; - } - - if (next_scale == CS35L56_SDW_INVALID_BUS_SCALE) { - next_scale = cs35l56->old_sdw_clock_scale; - ret = sdw_write_no_pm(peripheral, next_scale_reg, next_scale); - if (ret < 0) { - dev_err(cs35l56->base.dev, "Failed to modify current clock scale: %d\n", - ret); - return ret; - } - } - - cs35l56->old_sdw_clock_scale = curr_scale; - ret = sdw_write_no_pm(peripheral, curr_scale_reg, CS35L56_SDW_INVALID_BUS_SCALE); - if (ret < 0) { - dev_err(cs35l56->base.dev, "Failed to modify current clock scale: %d\n", ret); - return ret; - } - - dev_dbg(cs35l56->base.dev, "Next bus scale: %#x\n", next_scale); - - return 0; -} - -static int cs35l56_sdw_bus_config(struct sdw_slave *peripheral, - struct sdw_bus_params *params) -{ - struct cs35l56_private *cs35l56 = dev_get_drvdata(&peripheral->dev); - - if ((cs35l56->base.type == 0x63) && (cs35l56->base.rev < 0xa1)) - return cs35l63_sdw_kick_divider(cs35l56, peripheral); - - return 0; -} - static int __maybe_unused cs35l56_sdw_clk_stop(struct sdw_slave *peripheral, enum sdw_clk_stop_mode mode, enum sdw_clk_stop_type type) @@ -476,7 +408,6 @@ static const struct sdw_slave_ops cs35l56_sdw_ops = { .read_prop = cs35l56_sdw_read_prop, .interrupt_callback = cs35l56_sdw_interrupt, .update_status = cs35l56_sdw_update_status, - .bus_config = cs35l56_sdw_bus_config, #ifdef DEBUG .clk_stop = cs35l56_sdw_clk_stop, #endif diff --git a/sound/soc/codecs/cs35l56.h b/sound/soc/codecs/cs35l56.h index bd77a57249d7..40a1800a4585 100644 --- a/sound/soc/codecs/cs35l56.h +++ b/sound/soc/codecs/cs35l56.h @@ -20,8 +20,6 @@ #define CS35L56_SDW_GEN_INT_MASK_1 0xc1 #define CS35L56_SDW_INT_MASK_CODEC_IRQ BIT(0) -#define CS35L56_SDW_INVALID_BUS_SCALE 0xf - #define CS35L56_RX_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE) #define CS35L56_TX_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE \ | SNDRV_PCM_FMTBIT_S32_LE) @@ -52,7 +50,6 @@ struct cs35l56_private { u8 asp_slot_count; bool tdm_mode; bool sysclk_set; - u8 old_sdw_clock_scale; u8 sdw_link_num; u8 sdw_unique_id; }; From eb173ce0e23502e397eae75453936b3ecfb1fd84 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 14 Aug 2025 10:48:52 +0200 Subject: [PATCH 2236/2411] s390/configs: Update defconfigs The usual defconfig updates. Notable changes: - Enable ZONE_DEVICE, and with that DEV_DAX, FS_DAX, and FUSE_DAX - Enable CRYPTO_SELFTESTS_FULL for debug_defconfig - Enable CRASH_DM_CRYPT - Disable legacy IP_NF_FILTER Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/configs/debug_defconfig | 33 ++++++++++++++-------------- arch/s390/configs/defconfig | 32 ++++++++++++--------------- arch/s390/configs/zfcpdump_defconfig | 1 + 3 files changed, 31 insertions(+), 35 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 6b33429f1c4d..5e616bc988ac 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -5,6 +5,7 @@ CONFIG_WATCH_QUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_POSIX_AUX_CLOCKS=y CONFIG_BPF_SYSCALL=y CONFIG_BPF_JIT=y CONFIG_BPF_JIT_ALWAYS_ON=y @@ -19,6 +20,7 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_SCHED_PROXY_EXEC=y CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y @@ -42,6 +44,7 @@ CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y +CONFIG_CRASH_DM_CRYPT=y CONFIG_LIVEPATCH=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 @@ -105,6 +108,7 @@ CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_ZONE_DEVICE=y CONFIG_PERCPU_STATS=y CONFIG_GUP_TEST=y CONFIG_ANON_VMA_NAME=y @@ -223,17 +227,19 @@ CONFIG_NETFILTER_XT_TARGET_CONNMARK=m CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m CONFIG_NETFILTER_XT_TARGET_CT=m CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HL=m CONFIG_NETFILTER_XT_TARGET_HMARK=m CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m CONFIG_NETFILTER_XT_TARGET_LOG=m CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_NAT=m CONFIG_NETFILTER_XT_TARGET_NETMAP=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m CONFIG_NETFILTER_XT_TARGET_REDIRECT=m +CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m CONFIG_NETFILTER_XT_TARGET_TEE=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m -CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m @@ -248,6 +254,7 @@ CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m CONFIG_NETFILTER_XT_MATCH_CONNMARK=m CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m CONFIG_NETFILTER_XT_MATCH_CPU=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m @@ -318,16 +325,8 @@ CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_SECURITY=m -CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -340,15 +339,9 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m CONFIG_IP6_NF_MATCH_MH=m CONFIG_IP6_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_SECURITY=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_NF_TABLES_BRIDGE=m +CONFIG_IP_SCTP=m CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m @@ -383,6 +376,7 @@ CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_PLUG=m CONFIG_NET_SCH_ETS=m +CONFIG_NET_SCH_DUALPI2=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m @@ -504,6 +498,7 @@ CONFIG_DM_VDO=m CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m +CONFIG_OVPN=m CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m @@ -641,6 +636,7 @@ CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m CONFIG_VHOST_VDPA=m +CONFIG_DEV_DAX=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -665,6 +661,7 @@ CONFIG_NILFS2_FS=m CONFIG_BCACHEFS_FS=y CONFIG_BCACHEFS_QUOTA=y CONFIG_BCACHEFS_POSIX_ACL=y +CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y @@ -755,6 +752,8 @@ CONFIG_HARDENED_USERCOPY=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_SELFTESTS=y +CONFIG_CRYPTO_SELFTESTS_FULL=y +CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_BENCHMARK=m @@ -783,7 +782,6 @@ CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_MD4=m @@ -822,6 +820,7 @@ CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_CRYPTO_KRB5=m CONFIG_CRYPTO_KRB5_SELFTESTS=y CONFIG_CORDIC=m +CONFIG_TRACE_MMIO_ACCESS=y CONFIG_RANDOM32_SELFTEST=y CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index b75eb2775850..28f025051cdf 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -4,6 +4,7 @@ CONFIG_WATCH_QUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_POSIX_AUX_CLOCKS=y CONFIG_BPF_SYSCALL=y CONFIG_BPF_JIT=y CONFIG_BPF_JIT_ALWAYS_ON=y @@ -17,6 +18,7 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_SCHED_PROXY_EXEC=y CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y @@ -40,6 +42,7 @@ CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y +CONFIG_CRASH_DM_CRYPT=y CONFIG_LIVEPATCH=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 @@ -97,6 +100,7 @@ CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_ZONE_DEVICE=y CONFIG_PERCPU_STATS=y CONFIG_ANON_VMA_NAME=y CONFIG_USERFAULTFD=y @@ -214,17 +218,19 @@ CONFIG_NETFILTER_XT_TARGET_CONNMARK=m CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m CONFIG_NETFILTER_XT_TARGET_CT=m CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HL=m CONFIG_NETFILTER_XT_TARGET_HMARK=m CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m CONFIG_NETFILTER_XT_TARGET_LOG=m CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_NAT=m CONFIG_NETFILTER_XT_TARGET_NETMAP=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m CONFIG_NETFILTER_XT_TARGET_REDIRECT=m +CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m CONFIG_NETFILTER_XT_TARGET_TEE=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m -CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m @@ -239,6 +245,7 @@ CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m CONFIG_NETFILTER_XT_MATCH_CONNMARK=m CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m CONFIG_NETFILTER_XT_MATCH_CPU=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m @@ -309,16 +316,8 @@ CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_SECURITY=m -CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -331,15 +330,9 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m CONFIG_IP6_NF_MATCH_MH=m CONFIG_IP6_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_SECURITY=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_NF_TABLES_BRIDGE=m +CONFIG_IP_SCTP=m CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m @@ -373,6 +366,7 @@ CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_PLUG=m CONFIG_NET_SCH_ETS=m +CONFIG_NET_SCH_DUALPI2=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m @@ -494,6 +488,7 @@ CONFIG_DM_VDO=m CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m +CONFIG_OVPN=m CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m @@ -631,6 +626,7 @@ CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m CONFIG_VHOST_VDPA=m +CONFIG_DEV_DAX=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -652,6 +648,7 @@ CONFIG_NILFS2_FS=m CONFIG_BCACHEFS_FS=m CONFIG_BCACHEFS_QUOTA=y CONFIG_BCACHEFS_POSIX_ACL=y +CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y @@ -683,7 +680,6 @@ CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_INODE64=y CONFIG_TMPFS_QUOTA=y CONFIG_HUGETLBFS=y -CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_CRAMFS=m CONFIG_SQUASHFS=m @@ -741,6 +737,7 @@ CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_SELFTESTS=y +CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_BENCHMARK=m @@ -769,7 +766,6 @@ CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_MD4=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 8163c1702720..23dd55dc41a3 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -1,5 +1,6 @@ CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_POSIX_AUX_CLOCKS=y CONFIG_BPF_SYSCALL=y # CONFIG_CPU_ISOLATION is not set # CONFIG_UTS_NS is not set From fcc43a7e294f877021c4fa71276920f543e8e298 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 14 Aug 2025 10:48:53 +0200 Subject: [PATCH 2237/2411] s390/configs: Set HZ=1000 Similar to powerpc set HZ to 1000. See also commit a206d2334012 ("powerpc/defconfigs: Set HZ=1000 on ppc64 and powernv defconfigs"). Besides other this will reduce the latency seen with synchronize_rcu(). Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/configs/defconfig | 2 +- arch/s390/configs/zfcpdump_defconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 28f025051cdf..094599cdaf4d 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -47,7 +47,7 @@ CONFIG_LIVEPATCH=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y -CONFIG_HZ_100=y +CONFIG_HZ_1000=y CONFIG_CERT_STORE=y CONFIG_EXPOLINE=y CONFIG_EXPOLINE_AUTO=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 23dd55dc41a3..ed0b137353ad 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -12,7 +12,7 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_KEXEC=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=2 -CONFIG_HZ_100=y +CONFIG_HZ_1000=y # CONFIG_CHSC_SCH is not set # CONFIG_SCM_BUS is not set # CONFIG_AP is not set From 430fa71027b6ac9bb0ce5532b8d0676777d4219a Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Mon, 18 Aug 2025 12:21:52 +0200 Subject: [PATCH 2238/2411] s390/sclp: Fix SCCB present check Tracing code called by the SCLP interrupt handler contains early exits if the SCCB address associated with an interrupt is NULL. This check is performed after physical to virtual address translation. If the kernel identity mapping does not start at address zero, the resulting virtual address is never zero, so that the NULL checks won't work. Subsequently this may result in incorrect accesses to the first page of the identity mapping. Fix this by introducing a function that handles the NULL case before address translation. Fixes: ada1da31ce34 ("s390/sclp: sort out physical vs virtual pointers usage") Cc: stable@vger.kernel.org Reviewed-by: Alexander Gordeev Signed-off-by: Peter Oberparleiter Signed-off-by: Alexander Gordeev --- drivers/s390/char/sclp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index f2e42c1d51aa..98e334724a62 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -77,6 +77,13 @@ unsigned long sclp_console_full; /* The currently active SCLP command word. */ static sclp_cmdw_t active_cmd; +static inline struct sccb_header *sclpint_to_sccb(u32 sccb_int) +{ + if (sccb_int) + return __va(sccb_int); + return NULL; +} + static inline void sclp_trace(int prio, char *id, u32 a, u64 b, bool err) { struct sclp_trace_entry e; @@ -620,7 +627,7 @@ __sclp_find_req(u32 sccb) static bool ok_response(u32 sccb_int, sclp_cmdw_t cmd) { - struct sccb_header *sccb = (struct sccb_header *)__va(sccb_int); + struct sccb_header *sccb = sclpint_to_sccb(sccb_int); struct evbuf_header *evbuf; u16 response; @@ -659,7 +666,7 @@ static void sclp_interrupt_handler(struct ext_code ext_code, /* INT: Interrupt received (a=intparm, b=cmd) */ sclp_trace_sccb(0, "INT", param32, active_cmd, active_cmd, - (struct sccb_header *)__va(finished_sccb), + sclpint_to_sccb(finished_sccb), !ok_response(finished_sccb, active_cmd)); if (finished_sccb) { From 93f616ff870a1fb7e84d472cad0af651b18f9f87 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 7 Aug 2025 17:04:27 +0200 Subject: [PATCH 2239/2411] s390/mm: Do not map lowcore with identity mapping Since the identity mapping is pinned to address zero the lowcore is always also mapped to address zero, this happens regardless of the relocate_lowcore command line option. If the option is specified the lowcore is mapped twice, instead of only once. This means that NULL pointer accesses will succeed instead of causing an exception (low address protection still applies, but covers only parts). To fix this never map the first two pages of physical memory with the identity mapping. Fixes: 32db401965f1 ("s390/mm: Pin identity mapping base to zero") Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/vmem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 1d073acd05a7..cea3de4dce8c 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -530,6 +530,9 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l lowcore_address + sizeof(struct lowcore), POPULATE_LOWCORE); for_each_physmem_usable_range(i, &start, &end) { + /* Do not map lowcore with identity mapping */ + if (!start) + start = sizeof(struct lowcore); pgtable_populate((unsigned long)__identity_va(start), (unsigned long)__identity_va(end), POPULATE_IDENTITY); From ec879e1a0be8007aa232ffedcf6a6445dfc1a3d7 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sat, 16 Aug 2025 23:10:51 +0900 Subject: [PATCH 2240/2411] tracing: fprobe-event: Sanitize wildcard for fprobe event name Fprobe event accepts wildcards for the target functions, but unless user specifies its event name, it makes an event with the wildcards. /sys/kernel/tracing # echo 'f mutex*' >> dynamic_events /sys/kernel/tracing # cat dynamic_events f:fprobes/mutex*__entry mutex* /sys/kernel/tracing # ls events/fprobes/ enable filter mutex*__entry To fix this, replace the wildcard ('*') with an underscore. Link: https://lore.kernel.org/all/175535345114.282990.12294108192847938710.stgit@devnote2/ Fixes: 334e5519c375 ("tracing/probes: Add fprobe events for tracing function entry and exit.") Signed-off-by: Masami Hiramatsu (Google) Cc: stable@vger.kernel.org --- kernel/trace/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 1dbf1d3cf2f1..5a6688832da8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -2204,7 +2204,7 @@ static inline bool is_good_system_name(const char *name) static inline void sanitize_event_name(char *name) { while (*name++ != '\0') - if (*name == ':' || *name == '.') + if (*name == ':' || *name == '.' || *name == '*') *name = '_'; } From 4be8cefc132606b4a6e851f37f8e8c40c406c910 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 20 Aug 2025 22:51:14 +0800 Subject: [PATCH 2241/2411] LoongArch: KVM: Make function kvm_own_lbt() robust Add the flag KVM_LARCH_LBT checking in function kvm_own_lbt(), so that it can be called safely rather than duplicated enabling again. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vcpu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index d1b8c50941ca..ce478151466c 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -1283,9 +1283,11 @@ int kvm_own_lbt(struct kvm_vcpu *vcpu) return -EINVAL; preempt_disable(); - set_csr_euen(CSR_EUEN_LBTEN); - _restore_lbt(&vcpu->arch.lbt); - vcpu->arch.aux_inuse |= KVM_LARCH_LBT; + if (!(vcpu->arch.aux_inuse & KVM_LARCH_LBT)) { + set_csr_euen(CSR_EUEN_LBTEN); + _restore_lbt(&vcpu->arch.lbt); + vcpu->arch.aux_inuse |= KVM_LARCH_LBT; + } preempt_enable(); return 0; From 5c68549c81bcca70fc464e305ffeefd9af968287 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 20 Aug 2025 22:51:15 +0800 Subject: [PATCH 2242/2411] LoongArch: KVM: Fix stack protector issue in send_ipi_data() Function kvm_io_bus_read() is called in function send_ipi_data(), buffer size of parameter *val should be at least 8 bytes. Since some emulation functions like loongarch_ipi_readl() and kvm_eiointc_read() will write the buffer *val with 8 bytes signed extension regardless parameter len. Otherwise there will be buffer overflow issue when CONFIG_STACKPROTECTOR is enabled. The bug report is shown as follows: Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: send_ipi_data+0x194/0x1a0 [kvm] CPU: 11 UID: 107 PID: 2692 Comm: CPU 0/KVM Not tainted 6.17.0-rc1+ #102 PREEMPT(full) Stack : 9000000005901568 0000000000000000 9000000003af371c 900000013c68c000 900000013c68f850 900000013c68f858 0000000000000000 900000013c68f998 900000013c68f990 900000013c68f990 900000013c68f6c0 fffffffffffdb058 fffffffffffdb0e0 900000013c68f858 911e1d4d39cf0ec2 9000000105657a00 0000000000000001 fffffffffffffffe 0000000000000578 282049464555206e 6f73676e6f6f4c20 0000000000000001 00000000086b4000 0000000000000000 0000000000000000 0000000000000000 9000000005709968 90000000058f9000 900000013c68fa68 900000013c68fab4 90000000029279f0 900000010153f940 900000010001f360 0000000000000000 9000000003af3734 000000004390000c 00000000000000b0 0000000000000004 0000000000000000 0000000000071c1d ... Call Trace: [<9000000003af3734>] show_stack+0x5c/0x180 [<9000000003aed168>] dump_stack_lvl+0x6c/0x9c [<9000000003ad0ab0>] vpanic+0x108/0x2c4 [<9000000003ad0ca8>] panic+0x3c/0x40 [<9000000004eb0a1c>] __stack_chk_fail+0x14/0x18 [] send_ipi_data+0x190/0x1a0 [kvm] [] __kvm_io_bus_write+0xa4/0xe8 [kvm] [] kvm_io_bus_write+0x54/0x90 [kvm] [] kvm_emu_iocsr+0x180/0x310 [kvm] [] kvm_handle_gspr+0x280/0x478 [kvm] [] kvm_handle_exit+0xc0/0x130 [kvm] Cc: stable@vger.kernel.org Fixes: daee2f9cae551 ("LoongArch: KVM: Add IPI read and write function") Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/intc/ipi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index e658d5b37c04..7925651d2ccf 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -99,7 +99,7 @@ static void write_mailbox(struct kvm_vcpu *vcpu, int offset, uint64_t data, int static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) { int i, idx, ret; - uint32_t val = 0, mask = 0; + uint64_t val = 0, mask = 0; /* * Bit 27-30 is mask for byte writing. @@ -108,7 +108,7 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) if ((data >> 27) & 0xf) { /* Read the old val */ idx = srcu_read_lock(&vcpu->kvm->srcu); - ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, 4, &val); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) { kvm_err("%s: : read data from addr %llx failed\n", __func__, addr); @@ -124,7 +124,7 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) } val |= ((uint32_t)(data >> 32) & ~mask); idx = srcu_read_lock(&vcpu->kvm->srcu); - ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, 4, &val); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) kvm_err("%s: : write data to addr %llx failed\n", __func__, addr); From 0dfd9ea7bf80fabe11f5b775d762a5cd168cdf41 Mon Sep 17 00:00:00 2001 From: Song Gao Date: Wed, 20 Aug 2025 22:51:15 +0800 Subject: [PATCH 2243/2411] LoongArch: KVM: Use kvm_get_vcpu_by_id() instead of kvm_get_vcpu() Since using kvm_get_vcpu() may fail to retrieve the vCPU context, kvm_get_vcpu_by_id() should be used instead. Fixes: 8e3054261bc3 ("LoongArch: KVM: Add IPI user mode read and write function") Fixes: 3956a52bc05b ("LoongArch: KVM: Add EIOINTC read and write functions") Reviewed-by: Yanteng Si Signed-off-by: Song Gao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/intc/eiointc.c | 7 ++++++- arch/loongarch/kvm/intc/ipi.c | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c index a3a12af9ecbf..026b139dcff2 100644 --- a/arch/loongarch/kvm/intc/eiointc.c +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -45,7 +45,12 @@ static void eiointc_update_irq(struct loongarch_eiointc *s, int irq, int level) } cpu = s->sw_coremap[irq]; - vcpu = kvm_get_vcpu(s->kvm, cpu); + vcpu = kvm_get_vcpu_by_id(s->kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return; + } + if (level) { /* if not enable return false */ if (!test_bit(irq, (unsigned long *)s->enable.reg_u32)) diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index 7925651d2ccf..5a8481dda052 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -298,7 +298,7 @@ static int kvm_ipi_regs_access(struct kvm_device *dev, cpu = (attr->attr >> 16) & 0x3ff; addr = attr->attr & 0xff; - vcpu = kvm_get_vcpu(dev->kvm, cpu); + vcpu = kvm_get_vcpu_by_id(dev->kvm, cpu); if (unlikely(vcpu == NULL)) { kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); return -EINVAL; From 538c06e3964a8e94b645686cc58ccc4a06fa6330 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 20 Aug 2025 22:51:15 +0800 Subject: [PATCH 2244/2411] LoongArch: KVM: Add address alignment check in pch_pic register access With pch_pic device, its register is based on MMIO address space, different access size 1/2/4/8 is supported. And base address should be naturally aligned with its access size, here add alignment check in its register access emulation function. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/intc/pch_pic.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/loongarch/kvm/intc/pch_pic.c b/arch/loongarch/kvm/intc/pch_pic.c index 6f00ffe05c54..119290bcea79 100644 --- a/arch/loongarch/kvm/intc/pch_pic.c +++ b/arch/loongarch/kvm/intc/pch_pic.c @@ -195,6 +195,11 @@ static int kvm_pch_pic_read(struct kvm_vcpu *vcpu, return -EINVAL; } + if (addr & (len - 1)) { + kvm_err("%s: pch pic not aligned addr %llx len %d\n", __func__, addr, len); + return -EINVAL; + } + /* statistics of pch pic reading */ vcpu->stat.pch_pic_read_exits++; ret = loongarch_pch_pic_read(s, addr, len, val); @@ -302,6 +307,11 @@ static int kvm_pch_pic_write(struct kvm_vcpu *vcpu, return -EINVAL; } + if (addr & (len - 1)) { + kvm_err("%s: pch pic not aligned addr %llx len %d\n", __func__, addr, len); + return -EINVAL; + } + /* statistics of pch pic writing */ vcpu->stat.pch_pic_write_exits++; ret = loongarch_pch_pic_write(s, addr, len, val); From 729dc340a4ed1267774fc8518284e976e2210bdc Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 17 Aug 2025 16:21:46 +0200 Subject: [PATCH 2245/2411] bootconfig: Fix negative seeks on 32-bit with LFS enabled Commit 26dda5769509 "tools/bootconfig: Cleanup bootconfig footer size calculations" replaced some expressions of type int with the BOOTCONFIG_FOOTER_SIZE macro, which expands to an expression of type size_t, which is unsigned. On 32-bit architectures with LFS enabled (i.e. off_t is 64-bit), the seek offset of -BOOTCONFIG_FOOTER_SIZE now turns into a positive value. Fix this by casting the size to off_t before negating it. Just in case someone changes BOOTCONFIG_MAGIC_LEN to have type size_t later, do the same thing to the seek offset of -BOOTCONFIG_MAGIC_LEN. Link: https://lore.kernel.org/all/aKHlevxeg6Y7UQrz@decadent.org.uk/ Fixes: 26dda5769509 ("tools/bootconfig: Cleanup bootconfig footer size calculations") Signed-off-by: Ben Hutchings Signed-off-by: Masami Hiramatsu (Google) --- tools/bootconfig/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 57c669d2aa90..55d59ed507d5 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -193,7 +193,7 @@ static int load_xbc_from_initrd(int fd, char **buf) if (stat.st_size < BOOTCONFIG_FOOTER_SIZE) return 0; - if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) + if (lseek(fd, -(off_t)BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) return pr_errno("Failed to lseek for magic", -errno); if (read(fd, magic, BOOTCONFIG_MAGIC_LEN) < 0) @@ -203,7 +203,7 @@ static int load_xbc_from_initrd(int fd, char **buf) if (memcmp(magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN) != 0) return 0; - if (lseek(fd, -BOOTCONFIG_FOOTER_SIZE, SEEK_END) < 0) + if (lseek(fd, -(off_t)BOOTCONFIG_FOOTER_SIZE, SEEK_END) < 0) return pr_errno("Failed to lseek for size", -errno); if (read(fd, &size, sizeof(uint32_t)) < 0) From c81f6ce16785cc07ae81f53deb07b662ed0bb3a5 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Mon, 18 Aug 2025 08:22:21 -0700 Subject: [PATCH 2246/2411] of: dynamic: Fix memleak when of_pci_add_properties() failed When of_pci_add_properties() failed, of_changeset_destroy() is called to free the changeset. And of_changeset_destroy() puts device tree node in each entry but does not free property in the entry. This leads to memory leak in the failure case. In of_changeset_add_prop_helper(), add the property to the device tree node deadprops list. Thus, the property will also be freed along with device tree node. Fixes: b544fc2b8606 ("of: dynamic: Add interfaces for creating device node dynamically") Reported-by: Lorenzo Pieralisi Closes: https://lore.kernel.org/all/aJms+YT8TnpzpCY8@lpieralisi/ Tested-by: Lorenzo Pieralisi Signed-off-by: Lizhi Hou Link: https://lore.kernel.org/r/20250818152221.3685724-1-lizhi.hou@amd.com Signed-off-by: Rob Herring (Arm) --- drivers/of/dynamic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 0aba760f7577..dd30b7d8b5e4 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -938,6 +938,9 @@ static int of_changeset_add_prop_helper(struct of_changeset *ocs, if (ret) __of_prop_free(new_pp); + new_pp->next = np->deadprops; + np->deadprops = new_pp; + return ret; } From 4d4d9ef9dfee877d494e5418f68a1016ef08cad6 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Tue, 19 Aug 2025 15:19:57 -0700 Subject: [PATCH 2247/2411] ixgbe: xsk: resolve the negative overflow of budget in ixgbe_xmit_zc Resolve the budget negative overflow which leads to returning true in ixgbe_xmit_zc even when the budget of descs are thoroughly consumed. Before this patch, when the budget is decreased to zero and finishes sending the last allowed desc in ixgbe_xmit_zc, it will always turn back and enter into the while() statement to see if it should keep processing packets, but in the meantime it unexpectedly decreases the value again to 'unsigned int (0--)', namely, UINT_MAX. Finally, the ixgbe_xmit_zc returns true, showing 'we complete cleaning the budget'. That also means 'clean_complete = true' in ixgbe_poll. The true theory behind this is if that budget number of descs are consumed, it implies that we might have more descs to be done. So we should return false in ixgbe_xmit_zc to tell napi poll to find another chance to start polling to handle the rest of descs. On the contrary, returning true here means job done and we know we finish all the possible descs this time and we don't intend to start a new napi poll. It is apparently against our expectations. Please also see how ixgbe_clean_tx_irq() handles the problem: it uses do..while() statement to make sure the budget can be decreased to zero at most and the negative overflow never happens. The patch adds 'likely' because we rarely would not hit the loop condition since the standard budget is 256. Fixes: 8221c5eba8c1 ("ixgbe: add AF_XDP zero-copy Tx support") Signed-off-by: Jason Xing Reviewed-by: Larysa Zaremba Reviewed-by: Paul Menzel Reviewed-by: Aleksandr Loktionov Tested-by: Priya Singh Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20250819222000.3504873-4-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index ac58964b2f08..7b941505a9d0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -398,7 +398,7 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) dma_addr_t dma; u32 cmd_type; - while (budget-- > 0) { + while (likely(budget)) { if (unlikely(!ixgbe_desc_unused(xdp_ring))) { work_done = false; break; @@ -433,6 +433,8 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) xdp_ring->next_to_use++; if (xdp_ring->next_to_use == xdp_ring->count) xdp_ring->next_to_use = 0; + + budget--; } if (tx_desc) { From f3d9f7fa7f5dbfd4fdb1e69c25fc5627700d19dd Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 19 Aug 2025 15:19:58 -0700 Subject: [PATCH 2248/2411] ixgbe: fix ndo_xdp_xmit() workloads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently ixgbe driver checks periodically in its watchdog subtask if there is anything to be transmitted (considering both Tx and XDP rings) under state of carrier not being 'ok'. Such event is interpreted as Tx hang and therefore results in interface reset. This is currently problematic for ndo_xdp_xmit() as it is allowed to produce descriptors when interface is going through reset or its carrier is turned off. Furthermore, XDP rings should not really be objects of Tx hang detection. This mechanism is rather a matter of ndo_tx_timeout() being called from dev_watchdog against Tx rings exposed to networking stack. Taking into account issues described above, let us have a two fold fix - do not respect XDP rings in local ixgbe watchdog and do not produce Tx descriptors in ndo_xdp_xmit callback when there is some problem with carrier currently. For now, keep the Tx hang checks in clean Tx irq routine, but adjust it to not execute for XDP rings. Cc: Tobias Böhm Reported-by: Marcus Wichelmann Closes: https://lore.kernel.org/netdev/eca1880f-253a-4955-afe6-732d7c6926ee@hetzner-cloud.de/ Fixes: 6453073987ba ("ixgbe: add initial support for xdp redirect") Fixes: 33fdc82f0883 ("ixgbe: add support for XDP_TX action") Reviewed-by: Aleksandr Loktionov Tested-by: Marcus Wichelmann Signed-off-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20250819222000.3504873-5-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 34 ++++++------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 6122a0abb41f..80e6a2ef1350 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -968,10 +968,6 @@ static void ixgbe_update_xoff_rx_lfc(struct ixgbe_adapter *adapter) for (i = 0; i < adapter->num_tx_queues; i++) clear_bit(__IXGBE_HANG_CHECK_ARMED, &adapter->tx_ring[i]->state); - - for (i = 0; i < adapter->num_xdp_queues; i++) - clear_bit(__IXGBE_HANG_CHECK_ARMED, - &adapter->xdp_ring[i]->state); } static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter) @@ -1214,7 +1210,7 @@ static void ixgbe_pf_handle_tx_hang(struct ixgbe_ring *tx_ring, struct ixgbe_adapter *adapter = netdev_priv(tx_ring->netdev); struct ixgbe_hw *hw = &adapter->hw; - e_err(drv, "Detected Tx Unit Hang%s\n" + e_err(drv, "Detected Tx Unit Hang\n" " Tx Queue <%d>\n" " TDH, TDT <%x>, <%x>\n" " next_to_use <%x>\n" @@ -1222,16 +1218,14 @@ static void ixgbe_pf_handle_tx_hang(struct ixgbe_ring *tx_ring, "tx_buffer_info[next_to_clean]\n" " time_stamp <%lx>\n" " jiffies <%lx>\n", - ring_is_xdp(tx_ring) ? " (XDP)" : "", tx_ring->queue_index, IXGBE_READ_REG(hw, IXGBE_TDH(tx_ring->reg_idx)), IXGBE_READ_REG(hw, IXGBE_TDT(tx_ring->reg_idx)), tx_ring->next_to_use, next, tx_ring->tx_buffer_info[next].time_stamp, jiffies); - if (!ring_is_xdp(tx_ring)) - netif_stop_subqueue(tx_ring->netdev, - tx_ring->queue_index); + netif_stop_subqueue(tx_ring->netdev, + tx_ring->queue_index); } /** @@ -1451,6 +1445,9 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, total_bytes); adapter->tx_ipsec += total_ipsec; + if (ring_is_xdp(tx_ring)) + return !!budget; + if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) { if (adapter->hw.mac.type == ixgbe_mac_e610) ixgbe_handle_mdd_event(adapter, tx_ring); @@ -1468,9 +1465,6 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, return true; } - if (ring_is_xdp(tx_ring)) - return !!budget; - #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index); if (!__netif_txq_completed_wake(txq, total_packets, total_bytes, @@ -7974,12 +7968,9 @@ static void ixgbe_check_hang_subtask(struct ixgbe_adapter *adapter) return; /* Force detection of hung controller */ - if (netif_carrier_ok(adapter->netdev)) { + if (netif_carrier_ok(adapter->netdev)) for (i = 0; i < adapter->num_tx_queues; i++) set_check_for_tx_hang(adapter->tx_ring[i]); - for (i = 0; i < adapter->num_xdp_queues; i++) - set_check_for_tx_hang(adapter->xdp_ring[i]); - } if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) { /* @@ -8199,13 +8190,6 @@ static bool ixgbe_ring_tx_pending(struct ixgbe_adapter *adapter) return true; } - for (i = 0; i < adapter->num_xdp_queues; i++) { - struct ixgbe_ring *ring = adapter->xdp_ring[i]; - - if (ring->next_to_use != ring->next_to_clean) - return true; - } - return false; } @@ -11005,6 +10989,10 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n, if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state))) return -ENETDOWN; + if (!netif_carrier_ok(adapter->netdev) || + !netif_running(adapter->netdev)) + return -ENETDOWN; + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; From 1468c1f97cf32418e34dbb40b784ed9333b9e123 Mon Sep 17 00:00:00 2001 From: ValdikSS Date: Tue, 19 Aug 2025 15:19:59 -0700 Subject: [PATCH 2249/2411] igc: fix disabling L1.2 PCI-E link substate on I226 on init Device ID comparison in igc_is_device_id_i226 is performed before the ID is set, resulting in always failing check on init. Before the patch: * L1.2 is not disabled on init * L1.2 is properly disabled after suspend-resume cycle With the patch: * L1.2 is properly disabled both on init and after suspend-resume How to test: Connect to the 1G link with 300+ mbit/s Internet speed, and run the download speed test, such as: curl -o /dev/null http://speedtest.selectel.ru/1GB Without L1.2 disabled, the speed would be no more than ~200 mbit/s. With L1.2 disabled, the speed would reach 1 gbit/s. Note: it's required that the latency between your host and the remote be around 3-5 ms, the test inside LAN (<1 ms latency) won't trigger the issue. Link: https://lore.kernel.org/intel-wired-lan/15248b4f-3271-42dd-8e35-02bfc92b25e1@intel.com Fixes: 0325143b59c6 ("igc: disable L1.2 PCI-E link substate to avoid performance issue") Signed-off-by: ValdikSS Reviewed-by: Vitaly Lifshits Reviewed-by: Paul Menzel Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20250819222000.3504873-6-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igc/igc_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 458e5eaa92e5..e79b14d50b24 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7149,6 +7149,13 @@ static int igc_probe(struct pci_dev *pdev, adapter->port_num = hw->bus.func; adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); + /* PCI config space info */ + hw->vendor_id = pdev->vendor; + hw->device_id = pdev->device; + hw->revision_id = pdev->revision; + hw->subsystem_vendor_id = pdev->subsystem_vendor; + hw->subsystem_device_id = pdev->subsystem_device; + /* Disable ASPM L1.2 on I226 devices to avoid packet loss */ if (igc_is_device_id_i226(hw)) pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); @@ -7175,13 +7182,6 @@ static int igc_probe(struct pci_dev *pdev, netdev->mem_start = pci_resource_start(pdev, 0); netdev->mem_end = pci_resource_end(pdev, 0); - /* PCI config space info */ - hw->vendor_id = pdev->vendor; - hw->device_id = pdev->device; - hw->revision_id = pdev->revision; - hw->subsystem_vendor_id = pdev->subsystem_vendor; - hw->subsystem_device_id = pdev->subsystem_device; - /* Copy the default MAC and PHY function pointers */ memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); From e318cd6714592fb762fcab59c5684a442243a12f Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Mon, 18 Aug 2025 18:04:57 -0700 Subject: [PATCH 2250/2411] net: dsa: microchip: Fix KSZ9477 HSR port setup issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ksz9477_hsr_join() is called once to setup the HSR port membership, but the port can be enabled later, or disabled and enabled back and the port membership is not set correctly inside ksz_update_port_member(). The added code always use the correct HSR port membership for HSR port that is enabled. Fixes: 2d61298fdd7b ("net: dsa: microchip: Enable HSR offloading for KSZ9477") Reported-by: Frieder Schrempf Signed-off-by: Tristram Ha Reviewed-by: Łukasz Majewski Tested-by: Frieder Schrempf Reviewed-by: Frieder Schrempf Link: https://patch.msgid.link/20250819010457.563286-1-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_common.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 4cb14288ff0f..9568cc391fe3 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -2457,6 +2457,12 @@ static void ksz_update_port_member(struct ksz_device *dev, int port) dev->dev_ops->cfg_port_member(dev, i, val | cpu_port); } + /* HSR ports are setup once so need to use the assigned membership + * when the port is enabled. + */ + if (!port_member && p->stp_state == BR_STATE_FORWARDING && + (dev->hsr_ports & BIT(port))) + port_member = dev->hsr_ports; dev->dev_ops->cfg_port_member(dev, port, port_member | cpu_port); } From 15de71d06a400f7fdc15bf377a2552b0ec437cf5 Mon Sep 17 00:00:00 2001 From: William Liu Date: Tue, 19 Aug 2025 03:36:28 +0000 Subject: [PATCH 2251/2411] net/sched: Make cake_enqueue return NET_XMIT_CN when past buffer_limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following setup can trigger a WARNING in htb_activate due to the condition: !cl->leaf.q->q.qlen tc qdisc del dev lo root tc qdisc add dev lo root handle 1: htb default 1 tc class add dev lo parent 1: classid 1:1 \ htb rate 64bit tc qdisc add dev lo parent 1:1 handle f: \ cake memlimit 1b ping -I lo -f -c1 -s64 -W0.001 127.0.0.1 This is because the low memlimit leads to a low buffer_limit, which causes packet dropping. However, cake_enqueue still returns NET_XMIT_SUCCESS, causing htb_enqueue to call htb_activate with an empty child qdisc. We should return NET_XMIT_CN when packets are dropped from the same tin and flow. I do not believe return value of NET_XMIT_CN is necessary for packet drops in the case of ack filtering, as that is meant to optimize performance, not to signal congestion. Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") Signed-off-by: William Liu Reviewed-by: Savino Dicanosa Acked-by: Toke Høiland-Jørgensen Reviewed-by: Jamal Hadi Salim Link: https://patch.msgid.link/20250819033601.579821-1-will@willsroot.io Signed-off-by: Jakub Kicinski --- net/sched/sch_cake.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index dbcfb948c867..32bacfc314c2 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1750,7 +1750,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, ktime_t now = ktime_get(); struct cake_tin_data *b; struct cake_flow *flow; - u32 idx; + u32 idx, tin; /* choose flow to insert into */ idx = cake_classify(sch, &b, skb, q->flow_mode, &ret); @@ -1760,6 +1760,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, __qdisc_drop(skb, to_free); return ret; } + tin = (u32)(b - q->tins); idx--; flow = &b->flows[idx]; @@ -1927,13 +1928,22 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, q->buffer_max_used = q->buffer_used; if (q->buffer_used > q->buffer_limit) { + bool same_flow = false; u32 dropped = 0; + u32 drop_id; while (q->buffer_used > q->buffer_limit) { dropped++; - cake_drop(sch, to_free); + drop_id = cake_drop(sch, to_free); + + if ((drop_id >> 16) == tin && + (drop_id & 0xFFFF) == idx) + same_flow = true; } b->drop_overlimit += dropped; + + if (same_flow) + return NET_XMIT_CN; } return NET_XMIT_SUCCESS; } From 2c2192e5f9c7c2892fe2363244d1387f62710d83 Mon Sep 17 00:00:00 2001 From: William Liu Date: Tue, 19 Aug 2025 03:36:59 +0000 Subject: [PATCH 2252/2411] net/sched: Remove unnecessary WARNING condition for empty child qdisc in htb_activate The WARN_ON trigger based on !cl->leaf.q->q.qlen is unnecessary in htb_activate. htb_dequeue_tree already accounts for that scenario. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: William Liu Reviewed-by: Savino Dicanosa Link: https://patch.msgid.link/20250819033632.579854-1-will@willsroot.io Signed-off-by: Jakub Kicinski --- net/sched/sch_htb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index c968ea763774..b5e40c51655a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -592,7 +592,7 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff) */ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) { - WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen); + WARN_ON(cl->level || !cl->leaf.q); if (!cl->prio_activity) { cl->prio_activity = 1 << cl->prio; From 7af76e9d18a9fd6f8611b3313c86c190f9b6a5a7 Mon Sep 17 00:00:00 2001 From: Jakub Acs Date: Tue, 19 Aug 2025 08:28:42 +0000 Subject: [PATCH 2253/2411] net, hsr: reject HSR frame if skb can't hold tag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Receiving HSR frame with insufficient space to hold HSR tag in the skb can result in a crash (kernel BUG): [ 45.390915] skbuff: skb_under_panic: text:ffffffff86f32cac len:26 put:14 head:ffff888042418000 data:ffff888042417ff4 tail:0xe end:0x180 dev:bridge_slave_1 [ 45.392559] ------------[ cut here ]------------ [ 45.392912] kernel BUG at net/core/skbuff.c:211! [ 45.393276] Oops: invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI [ 45.393809] CPU: 1 UID: 0 PID: 2496 Comm: reproducer Not tainted 6.15.0 #12 PREEMPT(undef) [ 45.394433] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 [ 45.395273] RIP: 0010:skb_panic+0x15b/0x1d0 [ 45.402911] Call Trace: [ 45.403105] [ 45.404470] skb_push+0xcd/0xf0 [ 45.404726] br_dev_queue_push_xmit+0x7c/0x6c0 [ 45.406513] br_forward_finish+0x128/0x260 [ 45.408483] __br_forward+0x42d/0x590 [ 45.409464] maybe_deliver+0x2eb/0x420 [ 45.409763] br_flood+0x174/0x4a0 [ 45.410030] br_handle_frame_finish+0xc7c/0x1bc0 [ 45.411618] br_handle_frame+0xac3/0x1230 [ 45.413674] __netif_receive_skb_core.constprop.0+0x808/0x3df0 [ 45.422966] __netif_receive_skb_one_core+0xb4/0x1f0 [ 45.424478] __netif_receive_skb+0x22/0x170 [ 45.424806] process_backlog+0x242/0x6d0 [ 45.425116] __napi_poll+0xbb/0x630 [ 45.425394] net_rx_action+0x4d1/0xcc0 [ 45.427613] handle_softirqs+0x1a4/0x580 [ 45.427926] do_softirq+0x74/0x90 [ 45.428196] This issue was found by syzkaller. The panic happens in br_dev_queue_push_xmit() once it receives a corrupted skb with ETH header already pushed in linear data. When it attempts the skb_push() call, there's not enough headroom and skb_push() panics. The corrupted skb is put on the queue by HSR layer, which makes a sequence of unintended transformations when it receives a specific corrupted HSR frame (with incomplete TAG). Fix it by dropping and consuming frames that are not long enough to contain both ethernet and hsr headers. Alternative fix would be to check for enough headroom before skb_push() in br_dev_queue_push_xmit(). In the reproducer, this is injected via AF_PACKET, but I don't easily see why it couldn't be sent over the wire from adjacent network. Further Details: In the reproducer, the following network interface chain is set up: ┌────────────────┐ ┌────────────────┐ │ veth0_to_hsr ├───┤ hsr_slave0 ┼───┐ └────────────────┘ └────────────────┘ │ │ ┌──────┐ ├─┤ hsr0 ├───┐ │ └──────┘ │ ┌────────────────┐ ┌────────────────┐ │ │┌────────┐ │ veth1_to_hsr ┼───┤ hsr_slave1 ├───┘ └┤ │ └────────────────┘ └────────────────┘ ┌┼ bridge │ ││ │ │└────────┘ │ ┌───────┐ │ │ ... ├──────┘ └───────┘ To trigger the events leading up to crash, reproducer sends a corrupted HSR frame with incomplete TAG, via AF_PACKET socket on 'veth0_to_hsr'. The first HSR-layer function to process this frame is hsr_handle_frame(). It and then checks if the protocol is ETH_P_PRP or ETH_P_HSR. If it is, it calls skb_set_network_header(skb, ETH_HLEN + HSR_HLEN), without checking that the skb is long enough. For the crashing frame it is not, and hence the skb->network_header and skb->mac_len fields are set incorrectly, pointing after the end of the linear buffer. I will call this a BUG#1 and it is what is addressed by this patch. In the crashing scenario before the fix, the skb continues to go down the hsr path as follows. hsr_handle_frame() then calls this sequence hsr_forward_skb() fill_frame_info() hsr->proto_ops->fill_frame_info() hsr_fill_frame_info() hsr_fill_frame_info() contains a check that intends to check whether the skb actually contains the HSR header. But the check relies on the skb->mac_len field which was erroneously setup due to BUG#1, so the check passes and the execution continues back in the hsr_forward_skb(): hsr_forward_skb() hsr_forward_do() hsr->proto_ops->get_untagged_frame() hsr_get_untagged_frame() create_stripped_skb_hsr() In create_stripped_skb_hsr(), a copy of the skb is created and is further corrupted by operation that attempts to strip the HSR tag in a call to __pskb_copy(). The skb enters create_stripped_skb_hsr() with ethernet header pushed in linear buffer. The skb_pull(skb_in, HSR_HLEN) thus pulls 6 bytes of ethernet header into the headroom, creating skb_in with a headroom of size 8. The subsequent __pskb_copy() then creates an skb with headroom of just 2 and skb->len of just 12, this is how it looks after the copy: gdb) p skb->len $10 = 12 (gdb) p skb->data $11 = (unsigned char *) 0xffff888041e45382 "\252\252\252\252\252!\210\373", (gdb) p skb->head $12 = (unsigned char *) 0xffff888041e45380 "" It seems create_stripped_skb_hsr() assumes that ETH header is pulled in the headroom when it's entered, because it just pulls HSR header on top. But that is not the case in our code-path and we end up with the corrupted skb instead. I will call this BUG#2 *I got confused here because it seems that under no conditions can create_stripped_skb_hsr() work well, the assumption it makes is not true during the processing of hsr frames - since the skb_push() in hsr_handle_frame to skb_pull in hsr_deliver_master(). I wonder whether I missed something here.* Next, the execution arrives in hsr_deliver_master(). It calls skb_pull(ETH_HLEN), which just returns NULL - the SKB does not have enough space for the pull (as it only has 12 bytes in total at this point). *The skb_pull() here further suggests that ethernet header is meant to be pushed through the whole hsr processing and create_stripped_skb_hsr() should pull it before doing the HSR header pull.* hsr_deliver_master() then puts the corrupted skb on the queue, it is then picked up from there by bridge frame handling layer and finally lands in br_dev_queue_push_xmit where it panics. Cc: stable@kernel.org Fixes: 48b491a5cc74 ("net: hsr: fix mac_len checks") Reported-by: syzbot+a81f2759d022496b40ab@syzkaller.appspotmail.com Signed-off-by: Jakub Acs Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250819082842.94378-1-acsjakub@amazon.de Signed-off-by: Jakub Kicinski --- net/hsr/hsr_slave.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index b87b6a6fe070..102eccf5ead7 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -63,8 +63,14 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) skb_push(skb, ETH_HLEN); skb_reset_mac_header(skb); if ((!hsr->prot_version && protocol == htons(ETH_P_PRP)) || - protocol == htons(ETH_P_HSR)) + protocol == htons(ETH_P_HSR)) { + if (!pskb_may_pull(skb, ETH_HLEN + HSR_HLEN)) { + kfree_skb(skb); + goto finish_consume; + } + skb_set_network_header(skb, ETH_HLEN + HSR_HLEN); + } skb_reset_mac_len(skb); /* Only the frames received over the interlink port will assign a From a458b2902115b26a25d67393b12ddd57d1216aaa Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 18 Aug 2025 13:27:24 -0700 Subject: [PATCH 2254/2411] ipv6: sr: Fix MAC comparison to be constant-time To prevent timing attacks, MACs need to be compared in constant time. Use the appropriate helper function for this. Fixes: bf355b8d2c30 ("ipv6: sr: add core files for SR HMAC support") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Reviewed-by: Andrea Mayer Link: https://patch.msgid.link/20250818202724.15713-1-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv6/seg6_hmac.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index d77b52523b6a..fd58426f222b 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -280,7 +281,7 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb) if (seg6_hmac_compute(hinfo, srh, &ipv6_hdr(skb)->saddr, hmac_output)) return false; - if (memcmp(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN) != 0) + if (crypto_memneq(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN)) return false; return true; From c42be534547d6e45c155c347dd792b6ad9c24def Mon Sep 17 00:00:00 2001 From: Ryan Wanner Date: Tue, 19 Aug 2025 09:32:30 -0700 Subject: [PATCH 2255/2411] Revert "net: cadence: macb: sama7g5_emac: Remove USARIO CLKEN flag" This reverts commit db400061b5e7cc55f9b4dd15443e9838964119ea. This commit can cause a Devicetree ABI break for older DTS files that rely this flag for RMII configuration. Adding this back in ensures that the older DTBs will not break. Fixes: db400061b5e7 ("net: cadence: macb: sama7g5_emac: Remove USARIO CLKEN flag") Signed-off-by: Ryan Wanner Link: https://patch.msgid.link/20250819163236.100680-1-Ryan.Wanner@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index ce95fad8cedd..9693f0289435 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5113,7 +5113,8 @@ static const struct macb_config sama7g5_gem_config = { static const struct macb_config sama7g5_emac_config = { .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | - MACB_CAPS_MIIONRGMII | MACB_CAPS_GEM_HAS_PTP, + MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_MIIONRGMII | + MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, .clk_init = macb_clk_init, .init = macb_init, From 5003a65790ed66be882d1987cc2ca86af0de3db1 Mon Sep 17 00:00:00 2001 From: Dewei Meng Date: Thu, 21 Aug 2025 09:43:17 +0800 Subject: [PATCH 2256/2411] ALSA: timer: fix ida_free call while not allocated In the snd_utimer_create() function, if the kasprintf() function return NULL, snd_utimer_put_id() will be called, finally use ida_free() to free the unallocated id 0. the syzkaller reported the following information: ------------[ cut here ]------------ ida_free called for id=0 which is not allocated. WARNING: CPU: 1 PID: 1286 at lib/idr.c:592 ida_free+0x1fd/0x2f0 lib/idr.c:592 Modules linked in: CPU: 1 UID: 0 PID: 1286 Comm: syz-executor164 Not tainted 6.15.8 #3 PREEMPT(lazy) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-4.fc42 04/01/2014 RIP: 0010:ida_free+0x1fd/0x2f0 lib/idr.c:592 Code: f8 fc 41 83 fc 3e 76 69 e8 70 b2 f8 (...) RSP: 0018:ffffc900007f79c8 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 1ffff920000fef3b RCX: ffffffff872176a5 RDX: ffff88800369d200 RSI: 0000000000000000 RDI: ffff88800369d200 RBP: 0000000000000000 R08: ffffffff87ba60a5 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000002 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f6f1abc1740(0000) GS:ffff8880d76a0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f6f1ad7a784 CR3: 000000007a6e2000 CR4: 00000000000006f0 Call Trace: snd_utimer_put_id sound/core/timer.c:2043 [inline] [snd_timer] snd_utimer_create+0x59b/0x6a0 sound/core/timer.c:2184 [snd_timer] snd_utimer_ioctl_create sound/core/timer.c:2202 [inline] [snd_timer] __snd_timer_user_ioctl.isra.0+0x724/0x1340 sound/core/timer.c:2287 [snd_timer] snd_timer_user_ioctl+0x75/0xc0 sound/core/timer.c:2298 [snd_timer] vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:907 [inline] __se_sys_ioctl fs/ioctl.c:893 [inline] __x64_sys_ioctl+0x198/0x200 fs/ioctl.c:893 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0x7b/0x160 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x76/0x7e [...] The utimer->id should be set properly before the kasprintf() function, ensures the snd_utimer_put_id() function will free the allocated id. Fixes: 37745918e0e75 ("ALSA: timer: Introduce virtual userspace-driven timers") Signed-off-by: Dewei Meng Link: https://patch.msgid.link/20250821014317.40786-1-mengdewei@cqsoftware.com.cn Signed-off-by: Takashi Iwai --- sound/core/timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 3ce12264eed8..d9fff5c87613 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -2139,14 +2139,14 @@ static int snd_utimer_create(struct snd_timer_uinfo *utimer_info, goto err_take_id; } + utimer->id = utimer_id; + utimer->name = kasprintf(GFP_KERNEL, "snd-utimer%d", utimer_id); if (!utimer->name) { err = -ENOMEM; goto err_get_name; } - utimer->id = utimer_id; - tid.dev_sclass = SNDRV_TIMER_SCLASS_APPLICATION; tid.dev_class = SNDRV_TIMER_CLASS_GLOBAL; tid.card = -1; From b64d035f77b1f02ab449393342264b44950a75ae Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 15 Aug 2025 06:19:58 +0000 Subject: [PATCH 2257/2411] bonding: update LACP activity flag after setting lacp_active The port's actor_oper_port_state activity flag should be updated immediately after changing the lacp_active option to reflect the current mode correctly. Fixes: 3a755cd8b7c6 ("bonding: add new option lacp_active") Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20250815062000.22220-2-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_3ad.c | 25 +++++++++++++++++++++++++ drivers/net/bonding/bond_options.c | 1 + include/net/bond_3ad.h | 1 + 3 files changed, 27 insertions(+) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 2fca8e84ab10..414fecfd2a0e 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2883,6 +2883,31 @@ void bond_3ad_update_lacp_rate(struct bonding *bond) spin_unlock_bh(&bond->mode_lock); } +/** + * bond_3ad_update_lacp_active - change the lacp active + * @bond: bonding struct + * + * Update actor_oper_port_state when lacp_active is modified. + */ +void bond_3ad_update_lacp_active(struct bonding *bond) +{ + struct port *port = NULL; + struct list_head *iter; + struct slave *slave; + int lacp_active; + + lacp_active = bond->params.lacp_active; + spin_lock_bh(&bond->mode_lock); + bond_for_each_slave(bond, slave, iter) { + port = &(SLAVE_AD_INFO(slave)->port); + if (lacp_active) + port->actor_oper_port_state |= LACP_STATE_LACP_ACTIVITY; + else + port->actor_oper_port_state &= ~LACP_STATE_LACP_ACTIVITY; + } + spin_unlock_bh(&bond->mode_lock); +} + size_t bond_3ad_stats_size(void) { return nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_RX */ diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 1d639a3be6ba..3b6f815c55ff 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1660,6 +1660,7 @@ static int bond_option_lacp_active_set(struct bonding *bond, netdev_dbg(bond->dev, "Setting LACP active to %s (%llu)\n", newval->string, newval->value); bond->params.lacp_active = newval->value; + bond_3ad_update_lacp_active(bond); return 0; } diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index 2053cd8e788a..dba369a2cf27 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -307,6 +307,7 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); int bond_3ad_set_carrier(struct bonding *bond); void bond_3ad_update_lacp_rate(struct bonding *bond); +void bond_3ad_update_lacp_active(struct bonding *bond); void bond_3ad_update_ad_actor_settings(struct bonding *bond); int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats); size_t bond_3ad_stats_size(void); From 0599640a21e98f0d6a3e9ff85c0a687c90a8103b Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 15 Aug 2025 06:19:59 +0000 Subject: [PATCH 2258/2411] bonding: send LACPDUs periodically in passive mode after receiving partner's LACPDU When `lacp_active` is set to `off`, the bond operates in passive mode, meaning it only "speaks when spoken to." However, the current kernel implementation only sends an LACPDU in response when the partner's state changes. As a result, once LACP negotiation succeeds, the actor stops sending LACPDUs until the partner times out and sends an "expired" LACPDU. This causes continuous LACP state flapping. According to IEEE 802.1AX-2014, 6.4.13 Periodic Transmission machine. The values of Partner_Oper_Port_State.LACP_Activity and Actor_Oper_Port_State.LACP_Activity determine whether periodic transmissions take place. If either or both parameters are set to Active LACP, then periodic transmissions occur; if both are set to Passive LACP, then periodic transmissions do not occur. To comply with this, we remove the `!bond->params.lacp_active` check in `ad_periodic_machine()`. Instead, we initialize the actor's port's `LACP_STATE_LACP_ACTIVITY` state based on `lacp_active` setting. Additionally, we avoid setting the partner's state to `LACP_STATE_LACP_ACTIVITY` in the EXPIRED state, since we should not assume the partner is active by default. This ensures that in passive mode, the bond starts sending periodic LACPDUs after receiving one from the partner, and avoids flapping due to inactivity. Fixes: 3a755cd8b7c6 ("bonding: add new option lacp_active") Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20250815062000.22220-3-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_3ad.c | 42 +++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 414fecfd2a0e..4edc8e6b6b64 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -95,13 +95,13 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker); static void ad_mux_machine(struct port *port, bool *update_slave_arr); static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); static void ad_tx_machine(struct port *port); -static void ad_periodic_machine(struct port *port, struct bond_params *bond_params); +static void ad_periodic_machine(struct port *port); static void ad_port_selection_logic(struct port *port, bool *update_slave_arr); static void ad_agg_selection_logic(struct aggregator *aggregator, bool *update_slave_arr); static void ad_clear_agg(struct aggregator *aggregator); static void ad_initialize_agg(struct aggregator *aggregator); -static void ad_initialize_port(struct port *port, int lacp_fast); +static void ad_initialize_port(struct port *port, const struct bond_params *bond_params); static void ad_enable_collecting(struct port *port); static void ad_disable_distributing(struct port *port, bool *update_slave_arr); @@ -1307,10 +1307,16 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port) * case of EXPIRED even if LINK_DOWN didn't arrive for * the port. */ - port->partner_oper.port_state &= ~LACP_STATE_SYNCHRONIZATION; port->sm_vars &= ~AD_PORT_MATCHED; + /* Based on IEEE 8021AX-2014, Figure 6-18 - Receive + * machine state diagram, the statue should be + * Partner_Oper_Port_State.Synchronization = FALSE; + * Partner_Oper_Port_State.LACP_Timeout = Short Timeout; + * start current_while_timer(Short Timeout); + * Actor_Oper_Port_State.Expired = TRUE; + */ + port->partner_oper.port_state &= ~LACP_STATE_SYNCHRONIZATION; port->partner_oper.port_state |= LACP_STATE_LACP_TIMEOUT; - port->partner_oper.port_state |= LACP_STATE_LACP_ACTIVITY; port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(AD_SHORT_TIMEOUT)); port->actor_oper_port_state |= LACP_STATE_EXPIRED; port->sm_vars |= AD_PORT_CHURNED; @@ -1417,11 +1423,10 @@ static void ad_tx_machine(struct port *port) /** * ad_periodic_machine - handle a port's periodic state machine * @port: the port we're looking at - * @bond_params: bond parameters we will use * * Turn ntt flag on priodically to perform periodic transmission of lacpdu's. */ -static void ad_periodic_machine(struct port *port, struct bond_params *bond_params) +static void ad_periodic_machine(struct port *port) { periodic_states_t last_state; @@ -1430,8 +1435,7 @@ static void ad_periodic_machine(struct port *port, struct bond_params *bond_para /* check if port was reinitialized */ if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) || - (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) || - !bond_params->lacp_active) { + (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY))) { port->sm_periodic_state = AD_NO_PERIODIC; } /* check if state machine should change state */ @@ -1955,16 +1959,16 @@ static void ad_initialize_agg(struct aggregator *aggregator) /** * ad_initialize_port - initialize a given port's parameters * @port: the port we're looking at - * @lacp_fast: boolean. whether fast periodic should be used + * @bond_params: bond parameters we will use */ -static void ad_initialize_port(struct port *port, int lacp_fast) +static void ad_initialize_port(struct port *port, const struct bond_params *bond_params) { static const struct port_params tmpl = { .system_priority = 0xffff, .key = 1, .port_number = 1, .port_priority = 0xff, - .port_state = 1, + .port_state = 0, }; static const struct lacpdu lacpdu = { .subtype = 0x01, @@ -1982,12 +1986,14 @@ static void ad_initialize_port(struct port *port, int lacp_fast) port->actor_port_priority = 0xff; port->actor_port_aggregator_identifier = 0; port->ntt = false; - port->actor_admin_port_state = LACP_STATE_AGGREGATION | - LACP_STATE_LACP_ACTIVITY; - port->actor_oper_port_state = LACP_STATE_AGGREGATION | - LACP_STATE_LACP_ACTIVITY; + port->actor_admin_port_state = LACP_STATE_AGGREGATION; + port->actor_oper_port_state = LACP_STATE_AGGREGATION; + if (bond_params->lacp_active) { + port->actor_admin_port_state |= LACP_STATE_LACP_ACTIVITY; + port->actor_oper_port_state |= LACP_STATE_LACP_ACTIVITY; + } - if (lacp_fast) + if (bond_params->lacp_fast) port->actor_oper_port_state |= LACP_STATE_LACP_TIMEOUT; memcpy(&port->partner_admin, &tmpl, sizeof(tmpl)); @@ -2201,7 +2207,7 @@ void bond_3ad_bind_slave(struct slave *slave) /* port initialization */ port = &(SLAVE_AD_INFO(slave)->port); - ad_initialize_port(port, bond->params.lacp_fast); + ad_initialize_port(port, &bond->params); port->slave = slave; port->actor_port_number = SLAVE_AD_INFO(slave)->id; @@ -2513,7 +2519,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work) } ad_rx_machine(NULL, port); - ad_periodic_machine(port, &bond->params); + ad_periodic_machine(port); ad_port_selection_logic(port, &update_slave_arr); ad_mux_machine(port, &update_slave_arr); ad_tx_machine(port); From 87951b566446da04eed1fe8100f99a512ef02756 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 15 Aug 2025 06:20:00 +0000 Subject: [PATCH 2259/2411] selftests: bonding: add test for passive LACP mode Add a selftest to verify bonding behavior when `lacp_active` is set to `off`. The test checks the following: - The passive LACP bond should not send LACPDUs before receiving a partner's LACPDU. - The transmitted LACPDUs must not include the active flag. - After transitioning to EXPIRED and DEFAULTED states, the passive side should still not initiate LACPDUs. Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20250815062000.22220-4-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- .../selftests/drivers/net/bonding/Makefile | 3 +- .../drivers/net/bonding/bond_passive_lacp.sh | 105 ++++++++++++++++++ .../selftests/drivers/net/bonding/config | 1 + 3 files changed, 108 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 2b10854e4b1e..44b98f17f8ff 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -10,7 +10,8 @@ TEST_PROGS := \ mode-2-recovery-updelay.sh \ bond_options.sh \ bond-eth-type-change.sh \ - bond_macvlan_ipvlan.sh + bond_macvlan_ipvlan.sh \ + bond_passive_lacp.sh TEST_FILES := \ lag_lib.sh \ diff --git a/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh new file mode 100755 index 000000000000..9c3b089813df --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test if a bond interface works with lacp_active=off. + +# shellcheck disable=SC2034 +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/forwarding/lib.sh + +# shellcheck disable=SC2317 +check_port_state() +{ + local netns=$1 + local port=$2 + local state=$3 + + ip -n "${netns}" -d -j link show "$port" | \ + jq -e ".[].linkinfo.info_slave_data.ad_actor_oper_port_state_str | index(\"${state}\") != null" > /dev/null +} + +check_pkt_count() +{ + RET=0 + local ns="$1" + local iface="$2" + + # wait 65s, one per 30s + slowwait_for_counter 65 2 tc_rule_handle_stats_get \ + "dev ${iface} egress" 101 ".packets" "-n ${ns}" &> /dev/null +} + +setup() { + setup_ns c_ns s_ns + + # shellcheck disable=SC2154 + ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}" + ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}" + + # Add tc filter to count the pkts + tc -n "${c_ns}" qdisc add dev eth0 clsact + tc -n "${c_ns}" filter add dev eth0 egress handle 101 protocol 0x8809 matchall action pass + tc -n "${s_ns}" qdisc add dev eth1 clsact + tc -n "${s_ns}" filter add dev eth1 egress handle 101 protocol 0x8809 matchall action pass + + ip -n "${s_ns}" link add bond0 type bond mode 802.3ad lacp_active on lacp_rate fast + ip -n "${s_ns}" link set eth0 master bond0 + ip -n "${s_ns}" link set eth1 master bond0 + + ip -n "${c_ns}" link add bond0 type bond mode 802.3ad lacp_active off lacp_rate fast + ip -n "${c_ns}" link set eth0 master bond0 + ip -n "${c_ns}" link set eth1 master bond0 + +} + +trap cleanup_all_ns EXIT +setup + +# The bond will send 2 lacpdu pkts during init time, let's wait at least 2s +# after interface up +ip -n "${c_ns}" link set bond0 up +sleep 2 + +# 1. The passive side shouldn't send LACPDU. +check_pkt_count "${c_ns}" "eth0" && RET=1 +log_test "802.3ad lacp_active off" "init port" + +ip -n "${s_ns}" link set bond0 up +# 2. The passive side should not have the 'active' flag. +RET=0 +slowwait 2 check_port_state "${c_ns}" "eth0" "active" && RET=1 +log_test "802.3ad lacp_active off" "port state active" + +# 3. The active side should have the 'active' flag. +RET=0 +slowwait 2 check_port_state "${s_ns}" "eth0" "active" || RET=1 +log_test "802.3ad lacp_active on" "port state active" + +# 4. Make sure the connection is not expired. +RET=0 +slowwait 5 check_port_state "${s_ns}" "eth0" "distributing" +slowwait 10 check_port_state "${s_ns}" "eth0" "expired" && RET=1 +log_test "bond 802.3ad lacp_active off" "port connection" + +# After testing, disconnect one port on each side to check the state. +ip -n "${s_ns}" link set eth0 nomaster +ip -n "${s_ns}" link set eth0 up +ip -n "${c_ns}" link set eth1 nomaster +ip -n "${c_ns}" link set eth1 up +# Due to Periodic Machine and Rx Machine state change, the bond will still +# send lacpdu pkts in a few seconds. sleep at lease 5s to make sure +# negotiation finished +sleep 5 + +# 5. The active side should keep sending LACPDU. +check_pkt_count "${s_ns}" "eth1" || RET=1 +log_test "bond 802.3ad lacp_active on" "port pkt after disconnect" + +# 6. The passive side shouldn't send LACPDU anymore. +check_pkt_count "${c_ns}" "eth0" && RET=1 +log_test "bond 802.3ad lacp_active off" "port pkt after disconnect" + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config index dad4e5fda4db..4d16a69ffc65 100644 --- a/tools/testing/selftests/drivers/net/bonding/config +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -6,6 +6,7 @@ CONFIG_MACVLAN=y CONFIG_IPVLAN=y CONFIG_NET_ACT_GACT=y CONFIG_NET_CLS_FLOWER=y +CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_SCH_INGRESS=y CONFIG_NLMON=y CONFIG_VETH=y From d2d7a96b29ea6ab093973a1a37d26126db70c79f Mon Sep 17 00:00:00 2001 From: Judith Mendez Date: Wed, 20 Aug 2025 14:30:47 -0500 Subject: [PATCH 2260/2411] mmc: sdhci_am654: Disable HS400 for AM62P SR1.0 and SR1.1 This adds SDHCI_AM654_QUIRK_DISABLE_HS400 quirk which shall be used to disable HS400 support. AM62P SR1.0 and SR1.1 do not support HS400 due to errata i2458 [0] so disable HS400 for these SoC revisions. [0] https://www.ti.com/lit/er/sprz574a/sprz574a.pdf Fixes: 37f28165518f ("arm64: dts: ti: k3-am62p: Add ITAP/OTAP values for MMC") Cc: stable@vger.kernel.org Signed-off-by: Judith Mendez Reviewed-by: Andrew Davis Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20250820193047.4064142-1-jm@ti.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci_am654.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index e4fc345be7e5..17e62c61b6e6 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -156,6 +156,7 @@ struct sdhci_am654_data { #define SDHCI_AM654_QUIRK_FORCE_CDTEST BIT(0) #define SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA BIT(1) +#define SDHCI_AM654_QUIRK_DISABLE_HS400 BIT(2) }; struct window { @@ -765,6 +766,7 @@ static int sdhci_am654_init(struct sdhci_host *host) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host); + struct device *dev = mmc_dev(host->mmc); u32 ctl_cfg_2 = 0; u32 mask; u32 val; @@ -820,6 +822,12 @@ static int sdhci_am654_init(struct sdhci_host *host) if (ret) goto err_cleanup_host; + if (sdhci_am654->quirks & SDHCI_AM654_QUIRK_DISABLE_HS400 && + host->mmc->caps2 & (MMC_CAP2_HS400 | MMC_CAP2_HS400_ES)) { + dev_info(dev, "HS400 mode not supported on this silicon revision, disabling it\n"); + host->mmc->caps2 &= ~(MMC_CAP2_HS400 | MMC_CAP2_HS400_ES); + } + ret = __sdhci_add_host(host); if (ret) goto err_cleanup_host; @@ -883,6 +891,12 @@ static int sdhci_am654_get_of_property(struct platform_device *pdev, return 0; } +static const struct soc_device_attribute sdhci_am654_descope_hs400[] = { + { .family = "AM62PX", .revision = "SR1.0" }, + { .family = "AM62PX", .revision = "SR1.1" }, + { /* sentinel */ } +}; + static const struct of_device_id sdhci_am654_of_match[] = { { .compatible = "ti,am654-sdhci-5.1", @@ -970,6 +984,10 @@ static int sdhci_am654_probe(struct platform_device *pdev) if (ret) return dev_err_probe(dev, ret, "parsing dt failed\n"); + soc = soc_device_match(sdhci_am654_descope_hs400); + if (soc) + sdhci_am654->quirks |= SDHCI_AM654_QUIRK_DISABLE_HS400; + host->mmc_host_ops.start_signal_voltage_switch = sdhci_am654_start_signal_voltage_switch; host->mmc_host_ops.execute_tuning = sdhci_am654_execute_tuning; From 9f6b606b6b37e61427412708411e8e04b1a858e8 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 18 Aug 2025 11:58:25 +0200 Subject: [PATCH 2261/2411] net: airoha: ppe: Do not invalid PPE entries in case of SW hash collision SW hash computed by airoha_ppe_foe_get_entry_hash routine (used for foe_flow hlist) can theoretically produce collisions between two different HW PPE entries. In airoha_ppe_foe_insert_entry() if the collision occurs we will mark the second PPE entry in the list as stale (setting the hw hash to 0xffff). Stale entries are no more updated in airoha_ppe_foe_flow_entry_update routine and so they are removed by Netfilter. Fix the problem not marking the second entry as stale in airoha_ppe_foe_insert_entry routine if we have already inserted the brand new entry in the PPE table and let Netfilter remove real stale entries according to their timestamp. Please note this is just a theoretical issue spotted reviewing the code and not faced running the system. Fixes: cd53f622611f9 ("net: airoha: Add L2 hw acceleration support") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250818-airoha-en7581-hash-collision-fix-v1-1-d190c4b53d1c@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_ppe.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 47411d2cbd28..88694b08afa1 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -736,10 +736,8 @@ static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe, continue; } - if (commit_done || !airoha_ppe_foe_compare_entry(e, hwe)) { - e->hash = 0xffff; + if (!airoha_ppe_foe_compare_entry(e, hwe)) continue; - } airoha_ppe_foe_commit_entry(ppe, &e->data, hash); commit_done = true; From 2d82f3bd8910eb65e30bb2a3c9b945bfb3b6d661 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 15 Aug 2025 21:17:37 +0800 Subject: [PATCH 2262/2411] blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues Commit 5989bfe6ac6b ("block: restore two stage elevator switch while running nr_hw_queue update") reintroduced a lockdep warning by calling blk_mq_freeze_queue_nomemsave() before switching the I/O scheduler. The function blk_mq_elv_switch_none() calls elevator_change_done(). Running this while the queue is frozen causes a lockdep warning. Fix this by reordering the operations: first, switch the I/O scheduler to 'none', and then freeze the queue. This ensures that elevator_change_done() is not called on an already frozen queue. And this way is safe because elevator_set_none() does freeze queue before switching to none. Also we still have to rely on blk_mq_elv_switch_back() for switching back, and it has to cover unfrozen queue case. Cc: Nilay Shroff Cc: Yu Kuai Fixes: 5989bfe6ac6b ("block: restore two stage elevator switch while running nr_hw_queue update") Signed-off-by: Ming Lei Reviewed-by: Yu Kuai Reviewed-by: Nilay Shroff Link: https://lore.kernel.org/r/20250815131737.331692-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index b67d6c02eceb..ba3a4b77f578 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -5033,6 +5033,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, unsigned int memflags; int i; struct xarray elv_tbl, et_tbl; + bool queues_frozen = false; lockdep_assert_held(&set->tag_list_lock); @@ -5056,9 +5057,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, blk_mq_sysfs_unregister_hctxs(q); } - list_for_each_entry(q, &set->tag_list, tag_set_list) - blk_mq_freeze_queue_nomemsave(q); - /* * Switch IO scheduler to 'none', cleaning up the data associated * with the previous scheduler. We will switch back once we are done @@ -5068,6 +5066,9 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (blk_mq_elv_switch_none(q, &elv_tbl)) goto switch_back; + list_for_each_entry(q, &set->tag_list, tag_set_list) + blk_mq_freeze_queue_nomemsave(q); + queues_frozen = true; if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0) goto switch_back; @@ -5091,8 +5092,12 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, } switch_back: /* The blk_mq_elv_switch_back unfreezes queue for us. */ - list_for_each_entry(q, &set->tag_list, tag_set_list) + list_for_each_entry(q, &set->tag_list, tag_set_list) { + /* switch_back expects queue to be frozen */ + if (!queues_frozen) + blk_mq_freeze_queue_nomemsave(q); blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl); + } list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_sysfs_register_hctxs(q); From 7c7cda81159b1abe7d50bcef2ccc6f662e225c8b Mon Sep 17 00:00:00 2001 From: Raphael Gallais-Pou Date: Wed, 20 Aug 2025 20:03:10 +0200 Subject: [PATCH 2263/2411] spi: st: fix PM macros to use CONFIG_PM instead of CONFIG_PM_SLEEP pm_sleep_ptr() depends on CONFIG_PM_SLEEP while pm_ptr() depends on CONFIG_PM. Since ST SSC4 implements runtime PM it makes sense using pm_ptr() here. For the same reason replace PM macros that use CONFIG_PM. Doing so prevents from using __maybe_unused attribute of runtime PM functions. Link: https://lore.kernel.org/lkml/CAMuHMdX9nkROkAJJ5odv4qOWe0bFTmaFs=Rfxsfuc9+DT-bsEQ@mail.gmail.com Fixes: 6f8584a4826f ("spi: st: Switch from CONFIG_PM_SLEEP guards to pm_sleep_ptr()") Signed-off-by: Raphael Gallais-Pou Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20250820180310.9605-1-rgallaispou@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-st-ssc4.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-st-ssc4.c b/drivers/spi/spi-st-ssc4.c index 49ab4c515156..c07c61dc4938 100644 --- a/drivers/spi/spi-st-ssc4.c +++ b/drivers/spi/spi-st-ssc4.c @@ -378,7 +378,7 @@ static void spi_st_remove(struct platform_device *pdev) pinctrl_pm_select_sleep_state(&pdev->dev); } -static int __maybe_unused spi_st_runtime_suspend(struct device *dev) +static int spi_st_runtime_suspend(struct device *dev) { struct spi_controller *host = dev_get_drvdata(dev); struct spi_st *spi_st = spi_controller_get_devdata(host); @@ -391,7 +391,7 @@ static int __maybe_unused spi_st_runtime_suspend(struct device *dev) return 0; } -static int __maybe_unused spi_st_runtime_resume(struct device *dev) +static int spi_st_runtime_resume(struct device *dev) { struct spi_controller *host = dev_get_drvdata(dev); struct spi_st *spi_st = spi_controller_get_devdata(host); @@ -428,8 +428,8 @@ static int __maybe_unused spi_st_resume(struct device *dev) } static const struct dev_pm_ops spi_st_pm = { - SET_SYSTEM_SLEEP_PM_OPS(spi_st_suspend, spi_st_resume) - SET_RUNTIME_PM_OPS(spi_st_runtime_suspend, spi_st_runtime_resume, NULL) + SYSTEM_SLEEP_PM_OPS(spi_st_suspend, spi_st_resume) + RUNTIME_PM_OPS(spi_st_runtime_suspend, spi_st_runtime_resume, NULL) }; static const struct of_device_id stm_spi_match[] = { @@ -441,7 +441,7 @@ MODULE_DEVICE_TABLE(of, stm_spi_match); static struct platform_driver spi_st_driver = { .driver = { .name = "spi-st", - .pm = pm_sleep_ptr(&spi_st_pm), + .pm = pm_ptr(&spi_st_pm), .of_match_table = of_match_ptr(stm_spi_match), }, .probe = spi_st_probe, From 275332877e2fa9d6efa7402b1e897f6c6ee695bb Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Thu, 14 Aug 2025 13:54:57 +0530 Subject: [PATCH 2264/2411] block: skip q->rq_qos check in rq_qos_done_bio() If a bio has BIO_QOS_THROTTLED or BIO_QOS_MERGED set, it implicitly guarantees that q->rq_qos is present. Avoid re-checking q->rq_qos in this case and call __rq_qos_done_bio() directly as a minor optimization. Suggested-by : Yu Kuai Signed-off-by: Nilay Shroff Reviewed-by: Ming Lei Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20250814082612.500845-2-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- block/blk-rq-qos.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 39749f4066fb..28125fc49eff 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -142,8 +142,14 @@ static inline void rq_qos_done_bio(struct bio *bio) bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) || bio_flagged(bio, BIO_QOS_MERGED))) { struct request_queue *q = bdev_get_queue(bio->bi_bdev); - if (q->rq_qos) - __rq_qos_done_bio(q->rq_qos, bio); + + /* + * If a bio has BIO_QOS_xxx set, it implicitly implies that + * q->rq_qos is present. So, we skip re-checking q->rq_qos + * here as an extra optimization and directly call + * __rq_qos_done_bio(). + */ + __rq_qos_done_bio(q->rq_qos, bio); } } From ade1beea1c27657712aa8f594226d461639382ff Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Thu, 14 Aug 2025 13:54:58 +0530 Subject: [PATCH 2265/2411] block: decrement block_rq_qos static key in rq_qos_del() rq_qos_add() increments the block_rq_qos static key when a QoS policy is attached. When a QoS policy is removed via rq_qos_del(), we must symmetrically decrement the static key. If this removal drops the last QoS policy from the queue (q->rq_qos becomes NULL), the static branch can be disabled and the jump label patched to a NOP, avoiding overhead on the hot path. This change ensures rq_qos_add()/rq_qos_del() keep the block_rq_qos static key balanced and prevents leaving the branch permanently enabled after the last policy is removed. Fixes: 033b667a823e ("block: blk-rq-qos: guard rq-qos helpers by static key") Signed-off-by: Nilay Shroff Reviewed-by: Ming Lei Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20250814082612.500845-3-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- block/blk-rq-qos.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 848591fb3c57..b1e24bb85ad2 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -374,6 +374,7 @@ void rq_qos_del(struct rq_qos *rqos) for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { if (*cur == rqos) { *cur = rqos->next; + static_branch_dec(&block_rq_qos); break; } } From 370ac285f23aecae40600851fb4a1a9e75e50973 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Thu, 14 Aug 2025 13:54:59 +0530 Subject: [PATCH 2266/2411] block: avoid cpu_hotplug_lock depedency on freeze_lock A recent lockdep[1] splat observed while running blktest block/005 reveals a potential deadlock caused by the cpu_hotplug_lock dependency on ->freeze_lock. This dependency was introduced by commit 033b667a823e ("block: blk-rq-qos: guard rq-qos helpers by static key"). That change added a static key to avoid fetching q->rq_qos when neither blk-wbt nor blk-iolatency is configured. The static key dynamically patches kernel text to a NOP when disabled, eliminating overhead of fetching q->rq_qos in the I/O hot path. However, enabling a static key at runtime requires acquiring both cpu_hotplug_lock and jump_label_mutex. When this happens after the queue has already been frozen (i.e., while holding ->freeze_lock), it creates a locking dependency from cpu_hotplug_lock to ->freeze_lock, which leads to a potential deadlock reported by lockdep [1]. To resolve this, replace the static key mechanism with q->queue_flags: QUEUE_FLAG_QOS_ENABLED. This flag is evaluated in the fast path before accessing q->rq_qos. If the flag is set, we proceed to fetch q->rq_qos; otherwise, the access is skipped. Since q->queue_flags is commonly accessed in IO hotpath and resides in the first cacheline of struct request_queue, checking it imposes minimal overhead while eliminating the deadlock risk. This change avoids the lockdep splat without introducing performance regressions. [1] https://lore.kernel.org/linux-block/4fdm37so3o4xricdgfosgmohn63aa7wj3ua4e5vpihoamwg3ui@fq42f5q5t5ic/ Reported-by: Shinichiro Kawasaki Closes: https://lore.kernel.org/linux-block/4fdm37so3o4xricdgfosgmohn63aa7wj3ua4e5vpihoamwg3ui@fq42f5q5t5ic/ Fixes: 033b667a823e ("block: blk-rq-qos: guard rq-qos helpers by static key") Tested-by: Shin'ichiro Kawasaki Signed-off-by: Nilay Shroff Reviewed-by: Ming Lei Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20250814082612.500845-4-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 + block/blk-rq-qos.c | 9 ++++---- block/blk-rq-qos.h | 52 ++++++++++++++++++++++++------------------ include/linux/blkdev.h | 1 + 4 files changed, 36 insertions(+), 27 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 7ed3e71f2fc0..32c65efdda46 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -95,6 +95,7 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(SQ_SCHED), QUEUE_FLAG_NAME(DISABLE_WBT_DEF), QUEUE_FLAG_NAME(NO_ELV_SWITCH), + QUEUE_FLAG_NAME(QOS_ENABLED), }; #undef QUEUE_FLAG_NAME diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index b1e24bb85ad2..654478dfbc20 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -2,8 +2,6 @@ #include "blk-rq-qos.h" -__read_mostly DEFINE_STATIC_KEY_FALSE(block_rq_qos); - /* * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, * false if 'v' + 1 would be bigger than 'below'. @@ -319,8 +317,8 @@ void rq_qos_exit(struct request_queue *q) struct rq_qos *rqos = q->rq_qos; q->rq_qos = rqos->next; rqos->ops->exit(rqos); - static_branch_dec(&block_rq_qos); } + blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q); mutex_unlock(&q->rq_qos_mutex); } @@ -346,7 +344,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, goto ebusy; rqos->next = q->rq_qos; q->rq_qos = rqos; - static_branch_inc(&block_rq_qos); + blk_queue_flag_set(QUEUE_FLAG_QOS_ENABLED, q); blk_mq_unfreeze_queue(q, memflags); @@ -374,10 +372,11 @@ void rq_qos_del(struct rq_qos *rqos) for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { if (*cur == rqos) { *cur = rqos->next; - static_branch_dec(&block_rq_qos); break; } } + if (!q->rq_qos) + blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q); blk_mq_unfreeze_queue(q, memflags); mutex_lock(&q->debugfs_mutex); diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 28125fc49eff..1fe22000a379 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -12,7 +12,6 @@ #include "blk-mq-debugfs.h" struct blk_mq_debugfs_attr; -extern struct static_key_false block_rq_qos; enum rq_qos_id { RQ_QOS_WBT, @@ -113,49 +112,55 @@ void __rq_qos_queue_depth_changed(struct rq_qos *rqos); static inline void rq_qos_cleanup(struct request_queue *q, struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_cleanup(q->rq_qos, bio); } static inline void rq_qos_done(struct request_queue *q, struct request *rq) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos && - !blk_rq_is_passthrough(rq)) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos && !blk_rq_is_passthrough(rq)) __rq_qos_done(q->rq_qos, rq); } static inline void rq_qos_issue(struct request_queue *q, struct request *rq) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_issue(q->rq_qos, rq); } static inline void rq_qos_requeue(struct request_queue *q, struct request *rq) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_requeue(q->rq_qos, rq); } static inline void rq_qos_done_bio(struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && - bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) || - bio_flagged(bio, BIO_QOS_MERGED))) { - struct request_queue *q = bdev_get_queue(bio->bi_bdev); + struct request_queue *q; - /* - * If a bio has BIO_QOS_xxx set, it implicitly implies that - * q->rq_qos is present. So, we skip re-checking q->rq_qos - * here as an extra optimization and directly call - * __rq_qos_done_bio(). - */ - __rq_qos_done_bio(q->rq_qos, bio); - } + if (!bio->bi_bdev || (!bio_flagged(bio, BIO_QOS_THROTTLED) && + !bio_flagged(bio, BIO_QOS_MERGED))) + return; + + q = bdev_get_queue(bio->bi_bdev); + + /* + * If a bio has BIO_QOS_xxx set, it implicitly implies that + * q->rq_qos is present. So, we skip re-checking q->rq_qos + * here as an extra optimization and directly call + * __rq_qos_done_bio(). + */ + __rq_qos_done_bio(q->rq_qos, bio); } static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) { + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) { bio_set_flag(bio, BIO_QOS_THROTTLED); __rq_qos_throttle(q->rq_qos, bio); } @@ -164,14 +169,16 @@ static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) static inline void rq_qos_track(struct request_queue *q, struct request *rq, struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_track(q->rq_qos, rq, bio); } static inline void rq_qos_merge(struct request_queue *q, struct request *rq, struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) { + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) { bio_set_flag(bio, BIO_QOS_MERGED); __rq_qos_merge(q->rq_qos, rq, bio); } @@ -179,7 +186,8 @@ static inline void rq_qos_merge(struct request_queue *q, struct request *rq, static inline void rq_qos_queue_depth_changed(struct request_queue *q) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_queue_depth_changed(q->rq_qos); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 95886b404b16..fe1797bbec42 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -656,6 +656,7 @@ enum { QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */ QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */ QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */ + QUEUE_FLAG_QOS_ENABLED, /* qos is enabled */ QUEUE_FLAG_MAX }; From 62708b9452f8eb77513115b17c4f8d1a22ebf843 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Aug 2025 19:19:51 -0700 Subject: [PATCH 2267/2411] tls: fix handling of zero-length records on the rx_list Each recvmsg() call must process either - only contiguous DATA records (any number of them) - one non-DATA record If the next record has different type than what has already been processed we break out of the main processing loop. If the record has already been decrypted (which may be the case for TLS 1.3 where we don't know type until decryption) we queue the pending record to the rx_list. Next recvmsg() will pick it up from there. Queuing the skb to rx_list after zero-copy decrypt is not possible, since in that case we decrypted directly to the user space buffer, and we don't have an skb to queue (darg.skb points to the ciphertext skb for access to metadata like length). Only data records are allowed zero-copy, and we break the processing loop after each non-data record. So we should never zero-copy and then find out that the record type has changed. The corner case we missed is when the initial record comes from rx_list, and it's zero length. Reported-by: Muhammad Alifa Ramdhan Reported-by: Billy Jheng Bing-Jhong Fixes: 84c61fe1a75b ("tls: rx: do not use the standard strparser") Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20250820021952.143068-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 51c98a007dda..bac65d0d4e3e 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1808,6 +1808,9 @@ int decrypt_skb(struct sock *sk, struct scatterlist *sgout) return tls_decrypt_sg(sk, NULL, sgout, &darg); } +/* All records returned from a recvmsg() call must have the same type. + * 0 is not a valid content type. Use it as "no type reported, yet". + */ static int tls_record_content_type(struct msghdr *msg, struct tls_msg *tlm, u8 *control) { @@ -2051,8 +2054,10 @@ int tls_sw_recvmsg(struct sock *sk, if (err < 0) goto end; + /* process_rx_list() will set @control if it processed any records */ copied = err; - if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more) + if (len <= copied || rx_more || + (control && control != TLS_RECORD_TYPE_DATA)) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); From a61a3e961baff65b0a49f862fe21ce304f279b24 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Aug 2025 19:19:52 -0700 Subject: [PATCH 2268/2411] selftests: tls: add tests for zero-length records Test various combinations of zero-length records. Unfortunately, kernel cannot be coerced into producing those, so hardcode the ciphertext messages in the test. Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20250820021952.143068-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 300 +++++++++++++++++++++++++++++- 1 file changed, 295 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 2b8387a83bc7..0f5640d8dc7f 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -181,13 +181,12 @@ static int tls_send_cmsg(int fd, unsigned char record_type, return sendmsg(fd, &msg, flags); } -static int tls_recv_cmsg(struct __test_metadata *_metadata, - int fd, unsigned char record_type, - void *data, size_t len, int flags) +static int __tls_recv_cmsg(struct __test_metadata *_metadata, + int fd, unsigned char *ctype, + void *data, size_t len, int flags) { char cbuf[CMSG_SPACE(sizeof(char))]; struct cmsghdr *cmsg; - unsigned char ctype; struct msghdr msg; struct iovec vec; int n; @@ -206,7 +205,20 @@ static int tls_recv_cmsg(struct __test_metadata *_metadata, EXPECT_NE(cmsg, NULL); EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); - ctype = *((unsigned char *)CMSG_DATA(cmsg)); + if (ctype) + *ctype = *((unsigned char *)CMSG_DATA(cmsg)); + + return n; +} + +static int tls_recv_cmsg(struct __test_metadata *_metadata, + int fd, unsigned char record_type, + void *data, size_t len, int flags) +{ + unsigned char ctype; + int n; + + n = __tls_recv_cmsg(_metadata, fd, &ctype, data, len, flags); EXPECT_EQ(ctype, record_type); return n; @@ -2164,6 +2176,284 @@ TEST_F(tls, rekey_poll_delay) } } +struct raw_rec { + unsigned int plain_len; + unsigned char plain_data[100]; + unsigned int cipher_len; + unsigned char cipher_data[128]; +}; + +/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: 'Hello world' */ +static const struct raw_rec id0_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0xa2, 0x33, + 0xde, 0x8d, 0x94, 0xf0, 0x29, 0x6c, 0xb1, 0xaf, + 0x6a, 0x75, 0xb2, 0x93, 0xad, 0x45, 0xd5, 0xfd, + 0x03, 0x51, 0x57, 0x8f, 0xf9, 0xcc, 0x3b, 0x42, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:0, plaintext: '' */ +static const struct raw_rec id0_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x38, 0x7b, + 0xa6, 0x1c, 0xdd, 0xa7, 0x19, 0x33, 0xab, 0xae, + 0x88, 0xe1, 0xd2, 0x08, 0x4f, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: '' */ +static const struct raw_rec id0_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc5, 0x37, 0x90, + 0x70, 0x45, 0x89, 0xfb, 0x5c, 0xc7, 0x89, 0x03, + 0x68, 0x80, 0xd3, 0xd8, 0xcc, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: 'Hello world' */ +static const struct raw_rec id1_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x3a, 0x1a, 0x9c, + 0xd0, 0xa8, 0x9a, 0xd6, 0x69, 0xd6, 0x1a, 0xe3, + 0xb5, 0x1f, 0x0d, 0x2c, 0xe2, 0x97, 0x46, 0xff, + 0x2b, 0xcc, 0x5a, 0xc4, 0xa3, 0xb9, 0xef, 0xba, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:1, plaintext: '' */ +static const struct raw_rec id1_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x3e, 0xf0, 0xfe, + 0xee, 0xd9, 0xe2, 0x5d, 0xc7, 0x11, 0x4c, 0xe6, + 0xb4, 0x7e, 0xef, 0x40, 0x2b, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: '' */ +static const struct raw_rec id1_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0xce, 0xfc, 0x86, + 0xc8, 0xf0, 0x55, 0xf9, 0x47, 0x3f, 0x74, 0xdc, + 0xc9, 0xbf, 0xfe, 0x5b, 0xb1, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: 'Hello world' */ +static const struct raw_rec id2_ctrl_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19, + 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87, + 0x2a, 0x04, 0x11, 0x3d, 0xf8, 0x64, 0x5f, 0x36, + 0x8b, 0xa8, 0xee, 0x4c, 0x6d, 0x62, 0xa5, 0x00, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: 'Hello world' */ +static const struct raw_rec id2_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19, + 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87, + 0x8e, 0xa1, 0xd0, 0xcd, 0x33, 0xb5, 0x86, 0x2b, + 0x17, 0xf1, 0x52, 0x2a, 0x55, 0x62, 0x65, 0x11, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: '' */ +static const struct raw_rec id2_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xdc, 0x5c, 0x0e, + 0x41, 0xdd, 0xba, 0xd3, 0xcc, 0xcf, 0x6d, 0xd9, + 0x06, 0xdb, 0x79, 0xe5, 0x5d, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: '' */ +static const struct raw_rec id2_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xc3, 0xca, 0x26, + 0x22, 0xe4, 0x25, 0xfb, 0x5f, 0x6d, 0xbf, 0x83, + 0x30, 0x48, 0x69, 0x1a, 0x47, + }, +}; + +FIXTURE(zero_len) +{ + int fd, cfd; + bool notls; +}; + +FIXTURE_VARIANT(zero_len) +{ + const struct raw_rec *recs[4]; + ssize_t recv_ret[4]; +}; + +FIXTURE_VARIANT_ADD(zero_len, data_data_data) +{ + .recs = { &id0_data_l11, &id1_data_l11, &id2_data_l11, }, + .recv_ret = { 33, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, data_0ctrl_data) +{ + .recs = { &id0_data_l11, &id1_ctrl_l0, &id2_data_l11, }, + .recv_ret = { 11, 0, 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0data) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l0, }, + .recv_ret = { -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_ctrl) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l11, }, + .recv_ret = { 0, 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0ctrl) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l0, }, + .recv_ret = { 0, 0, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0ctrl_0ctrl_0ctrl) +{ + .recs = { &id0_ctrl_l0, &id1_ctrl_l0, &id2_ctrl_l0, }, + .recv_ret = { 0, 0, 0, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_data) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l11, }, + .recv_ret = { 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, data_0data_0data) +{ + .recs = { &id0_data_l11, &id1_data_l0, &id2_data_l0, }, + .recv_ret = { 11, -EAGAIN, }, +}; + +FIXTURE_SETUP(zero_len) +{ + struct tls_crypto_info_keys tls12; + int ret; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128, + &tls12, 0); + + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); + if (self->notls) + return; + + /* Don't install keys on fd, we'll send raw records */ + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(zero_len) +{ + close(self->fd); + close(self->cfd); +} + +TEST_F(zero_len, test) +{ + const struct raw_rec *const *rec; + unsigned char buf[128]; + int rec_off; + int i; + + for (i = 0; i < 4 && variant->recs[i]; i++) + EXPECT_EQ(send(self->fd, variant->recs[i]->cipher_data, + variant->recs[i]->cipher_len, 0), + variant->recs[i]->cipher_len); + + rec = &variant->recs[0]; + rec_off = 0; + for (i = 0; i < 4; i++) { + int j, ret; + + ret = variant->recv_ret[i] >= 0 ? variant->recv_ret[i] : -1; + EXPECT_EQ(__tls_recv_cmsg(_metadata, self->cfd, NULL, + buf, sizeof(buf), MSG_DONTWAIT), ret); + if (ret == -1) + EXPECT_EQ(errno, -variant->recv_ret[i]); + if (variant->recv_ret[i] == -EAGAIN) + break; + + for (j = 0; j < ret; j++) { + while (rec_off == (*rec)->plain_len) { + rec++; + rec_off = 0; + } + EXPECT_EQ(buf[j], (*rec)->plain_data[rec_off]); + rec_off++; + } + } +}; + FIXTURE(tls_err) { int fd, cfd; From 8c5d95988c34f0aeba1f34cd5e4ba69494c90c5f Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Wed, 20 Aug 2025 12:09:18 +0530 Subject: [PATCH 2269/2411] Octeontx2-af: Skip overlap check for SPI field Octeontx2/CN10K silicon supports generating a 256-bit key per packet. The specific fields to be extracted from a packet for key generation are configurable via a Key Extraction (MKEX) Profile. The AF driver scans the configured extraction profile to ensure that fields from upper layers do not overwrite fields from lower layers in the key. Example Packet Field Layout: LA: DMAC + SMAC LB: VLAN LC: IPv4/IPv6 LD: TCP/UDP Valid MKEX Profile Configuration: LA -> DMAC -> key_offset[0-5] LC -> SIP -> key_offset[20-23] LD -> SPORT -> key_offset[30-31] Invalid MKEX profile configuration: LA -> DMAC -> key_offset[0-5] LC -> SIP -> key_offset[20-23] LD -> SPORT -> key_offset[2-3] // Overlaps with DMAC field In another scenario, if the MKEX profile is configured to extract the SPI field from both AH and ESP headers at the same key offset, the driver rejecting this configuration. In a regular traffic, ipsec packet will be having either AH(LD) or ESP (LE). This patch relaxes the check for the same. Fixes: 12aa0a3b93f3 ("octeontx2-af: Harden rule validation.") Signed-off-by: Hariprasad Kelam Link: https://patch.msgid.link/20250820063919.1463518-1-hkelam@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 1b765045aa63..b56395ac5a74 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -606,8 +606,8 @@ static void npc_set_features(struct rvu *rvu, int blkaddr, u8 intf) if (!npc_check_field(rvu, blkaddr, NPC_LB, intf)) *features &= ~BIT_ULL(NPC_OUTER_VID); - /* Set SPI flag only if AH/ESP and IPSEC_SPI are in the key */ - if (npc_check_field(rvu, blkaddr, NPC_IPSEC_SPI, intf) && + /* Allow extracting SPI field from AH and ESP headers at same offset */ + if (npc_is_field_present(rvu, NPC_IPSEC_SPI, intf) && (*features & (BIT_ULL(NPC_IPPROTO_ESP) | BIT_ULL(NPC_IPPROTO_AH)))) *features |= BIT_ULL(NPC_IPSEC_SPI); From 1c67f9c54cdc70627e3f6472b89cd3d895df974c Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Wed, 20 Aug 2025 15:27:07 +0200 Subject: [PATCH 2270/2411] net: pse-pd: pd692x0: Fix power budget leak in manager setup error path Fix a resource leak where manager power budgets were freed on both success and error paths during manager setup. Power budgets should only be freed on error paths after regulator registration or during driver removal. Refactor cleanup logic by extracting OF node cleanup and power budget freeing into separate helper functions for better maintainability. Fixes: 359754013e6a ("net: pse-pd: pd692x0: Add support for PSE PI priority feature") Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20250820132708.837255-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/pd692x0.c | 59 +++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/drivers/net/pse-pd/pd692x0.c b/drivers/net/pse-pd/pd692x0.c index 399ce9febda4..395f6c662175 100644 --- a/drivers/net/pse-pd/pd692x0.c +++ b/drivers/net/pse-pd/pd692x0.c @@ -1162,12 +1162,44 @@ pd692x0_write_ports_matrix(struct pd692x0_priv *priv, return 0; } +static void pd692x0_of_put_managers(struct pd692x0_priv *priv, + struct pd692x0_manager *manager, + int nmanagers) +{ + int i, j; + + for (i = 0; i < nmanagers; i++) { + for (j = 0; j < manager[i].nports; j++) + of_node_put(manager[i].port_node[j]); + of_node_put(manager[i].node); + } +} + +static void pd692x0_managers_free_pw_budget(struct pd692x0_priv *priv) +{ + int i; + + for (i = 0; i < PD692X0_MAX_MANAGERS; i++) { + struct regulator *supply; + + if (!priv->manager_reg[i] || !priv->manager_pw_budget[i]) + continue; + + supply = priv->manager_reg[i]->supply; + if (!supply) + continue; + + regulator_free_power_budget(supply, + priv->manager_pw_budget[i]); + } +} + static int pd692x0_setup_pi_matrix(struct pse_controller_dev *pcdev) { struct pd692x0_manager *manager __free(kfree) = NULL; struct pd692x0_priv *priv = to_pd692x0_priv(pcdev); struct pd692x0_matrix port_matrix[PD692X0_MAX_PIS]; - int ret, i, j, nmanagers; + int ret, nmanagers; /* Should we flash the port matrix */ if (priv->fw_state != PD692X0_FW_OK && @@ -1185,31 +1217,27 @@ static int pd692x0_setup_pi_matrix(struct pse_controller_dev *pcdev) nmanagers = ret; ret = pd692x0_register_managers_regulator(priv, manager, nmanagers); if (ret) - goto out; + goto err_of_managers; ret = pd692x0_configure_managers(priv, nmanagers); if (ret) - goto out; + goto err_of_managers; ret = pd692x0_set_ports_matrix(priv, manager, nmanagers, port_matrix); if (ret) - goto out; + goto err_managers_req_pw; ret = pd692x0_write_ports_matrix(priv, port_matrix); if (ret) - goto out; + goto err_managers_req_pw; -out: - for (i = 0; i < nmanagers; i++) { - struct regulator *supply = priv->manager_reg[i]->supply; + pd692x0_of_put_managers(priv, manager, nmanagers); + return 0; - regulator_free_power_budget(supply, - priv->manager_pw_budget[i]); - - for (j = 0; j < manager[i].nports; j++) - of_node_put(manager[i].port_node[j]); - of_node_put(manager[i].node); - } +err_managers_req_pw: + pd692x0_managers_free_pw_budget(priv); +err_of_managers: + pd692x0_of_put_managers(priv, manager, nmanagers); return ret; } @@ -1748,6 +1776,7 @@ static void pd692x0_i2c_remove(struct i2c_client *client) { struct pd692x0_priv *priv = i2c_get_clientdata(client); + pd692x0_managers_free_pw_budget(priv); firmware_upload_unregister(priv->fwl); } From 7ef353879f714602b43f98662069f4fb86536761 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Wed, 20 Aug 2025 15:33:21 +0200 Subject: [PATCH 2271/2411] net: pse-pd: pd692x0: Skip power budget configuration when undefined If the power supply's power budget is not defined in the device tree, the current code still requests power and configures the PSE manager with a 0W power limit, which is undesirable behavior. Skip power budget configuration entirely when the budget is zero, avoiding unnecessary power requests and preventing invalid 0W limits from being set on the PSE manager. Fixes: 359754013e6a ("net: pse-pd: pd692x0: Add support for PSE PI priority feature") Signed-off-by: Kory Maincent Acked-by: Oleksij Rempel Link: https://patch.msgid.link/20250820133321.841054-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/pd692x0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/pse-pd/pd692x0.c b/drivers/net/pse-pd/pd692x0.c index 395f6c662175..f4e91ba64a66 100644 --- a/drivers/net/pse-pd/pd692x0.c +++ b/drivers/net/pse-pd/pd692x0.c @@ -1041,6 +1041,10 @@ pd692x0_configure_managers(struct pd692x0_priv *priv, int nmanagers) int pw_budget; pw_budget = regulator_get_unclaimed_power_budget(supply); + if (!pw_budget) + /* Do nothing if no power budget */ + continue; + /* Max power budget per manager */ if (pw_budget > 6000000) pw_budget = 6000000; From bc17455bc843b2f4b206e0bb8139013eb3d3c08b Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Wed, 20 Aug 2025 16:32:02 +0300 Subject: [PATCH 2272/2411] net/mlx5: Base ECVF devlink port attrs from 0 Adjust the vport number by the base ECVF vport number so the port attributes start at 0. Previously the port attributes would start 1 after the maximum number of host VFs. Fixes: dc13180824b7 ("net/mlx5: Enable devlink port for embedded cpu VF vports") Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Reviewed-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250820133209.389065-2-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index b7102e14d23d..c33accadae0f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -47,10 +47,12 @@ static void mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(struct mlx5_eswitch * devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum, vport_num - 1, external); } else if (mlx5_core_is_ec_vf_vport(esw->dev, vport_num)) { + u16 base_vport = mlx5_core_ec_vf_vport_base(dev); + memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len); dl_port->attrs.switch_id.id_len = ppid.id_len; devlink_port_attrs_pci_vf_set(dl_port, 0, pfnum, - vport_num - 1, false); + vport_num - base_vport, false); } } From 330f0f6713a39581936decac72331e6ab7f13529 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Wed, 20 Aug 2025 16:32:03 +0300 Subject: [PATCH 2273/2411] net/mlx5: Remove default QoS group and attach vports directly to root TSAR Currently, the driver creates a default group (`node0`) and attaches all vports to it unless the user explicitly sets a parent group. As a result, when a user configures tx_share on a group and tx_share on a VF, the expectation is for the group and the VF to share bandwidth relatively. However, since the VF is not connected to the same parent (but to the default node), the proportional share logic is not applied correctly. To fix this, remove the default group (`node0`) and instead connect vports directly to the root TSAR when no parent is specified. This ensures that vports and groups share the same root scheduler and their tx_share values are compared directly under the same hierarchy. Fixes: 0fe132eac38c ("net/mlx5: E-switch, Allow to add vports to rate groups") Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250820133209.389065-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 97 +++++++------------ .../net/ethernet/mellanox/mlx5/core/eswitch.h | 5 - 2 files changed, 33 insertions(+), 69 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 91d863c8c152..cd58d3934596 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -462,6 +462,7 @@ static int esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node, struct netlink_ext_ack *extack) { + struct mlx5_esw_sched_node *parent = vport_node->parent; u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_core_dev *dev = vport_node->esw->dev; void *attr; @@ -477,7 +478,7 @@ esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node, attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport); MLX5_SET(scheduling_context, sched_ctx, parent_element_id, - vport_node->parent->ix); + parent ? parent->ix : vport_node->esw->qos.root_tsar_ix); MLX5_SET(scheduling_context, sched_ctx, max_average_bw, vport_node->max_rate); @@ -786,48 +787,15 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta return err; } - if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { - esw->qos.node0 = __esw_qos_create_vports_sched_node(esw, NULL, extack); - } else { - /* The eswitch doesn't support scheduling nodes. - * Create a software-only node0 using the root TSAR to attach vport QoS to. - */ - if (!__esw_qos_alloc_node(esw, - esw->qos.root_tsar_ix, - SCHED_NODE_TYPE_VPORTS_TSAR, - NULL)) - esw->qos.node0 = ERR_PTR(-ENOMEM); - else - list_add_tail(&esw->qos.node0->entry, - &esw->qos.domain->nodes); - } - if (IS_ERR(esw->qos.node0)) { - err = PTR_ERR(esw->qos.node0); - esw_warn(dev, "E-Switch create rate node 0 failed (%d)\n", err); - goto err_node0; - } refcount_set(&esw->qos.refcnt, 1); return 0; - -err_node0: - if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, - esw->qos.root_tsar_ix)) - esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n"); - - return err; } static void esw_qos_destroy(struct mlx5_eswitch *esw) { int err; - if (esw->qos.node0->ix != esw->qos.root_tsar_ix) - __esw_qos_destroy_node(esw->qos.node0, NULL); - else - __esw_qos_free_node(esw->qos.node0); - esw->qos.node0 = NULL; - err = mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, esw->qos.root_tsar_ix); @@ -990,13 +958,16 @@ esw_qos_vport_tc_enable(struct mlx5_vport *vport, enum sched_node_type type, struct netlink_ext_ack *extack) { struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; - int err, new_level, max_level; + struct mlx5_esw_sched_node *parent = vport_node->parent; + int err; if (type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { + int new_level, max_level; + /* Increase the parent's level by 2 to account for both the * TC arbiter and the vports TC scheduling element. */ - new_level = vport_node->parent->level + 2; + new_level = (parent ? parent->level : 2) + 2; max_level = 1 << MLX5_CAP_QOS(vport_node->esw->dev, log_esw_max_sched_depth); if (new_level > max_level) { @@ -1033,9 +1004,7 @@ esw_qos_vport_tc_enable(struct mlx5_vport *vport, enum sched_node_type type, err_sched_nodes: if (type == SCHED_NODE_TYPE_RATE_LIMITER) { esw_qos_node_destroy_sched_element(vport_node, NULL); - list_add_tail(&vport_node->entry, - &vport_node->parent->children); - vport_node->level = vport_node->parent->level + 1; + esw_qos_node_attach_to_parent(vport_node); } else { esw_qos_tc_arbiter_scheduling_teardown(vport_node, NULL); } @@ -1083,7 +1052,6 @@ static int esw_qos_set_vport_tcs_min_rate(struct mlx5_vport *vport, static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_ack *extack) { struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; - struct mlx5_esw_sched_node *parent = vport_node->parent; enum sched_node_type curr_type = vport_node->type; if (curr_type == SCHED_NODE_TYPE_VPORT) @@ -1093,7 +1061,7 @@ static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_a vport_node->bw_share = 0; list_del_init(&vport_node->entry); - esw_qos_normalize_min_rate(parent->esw, parent, extack); + esw_qos_normalize_min_rate(vport_node->esw, vport_node->parent, extack); trace_mlx5_esw_vport_qos_destroy(vport_node->esw->dev, vport); } @@ -1103,25 +1071,23 @@ static int esw_qos_vport_enable(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent, struct netlink_ext_ack *extack) { + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; int err; esw_assert_qos_lock_held(vport->dev->priv.eswitch); - esw_qos_node_set_parent(vport->qos.sched_node, parent); - if (type == SCHED_NODE_TYPE_VPORT) { - err = esw_qos_vport_create_sched_element(vport->qos.sched_node, - extack); - } else { + esw_qos_node_set_parent(vport_node, parent); + if (type == SCHED_NODE_TYPE_VPORT) + err = esw_qos_vport_create_sched_element(vport_node, extack); + else err = esw_qos_vport_tc_enable(vport, type, extack); - } if (err) return err; - vport->qos.sched_node->type = type; - esw_qos_normalize_min_rate(parent->esw, parent, extack); - trace_mlx5_esw_vport_qos_create(vport->dev, vport, - vport->qos.sched_node->max_rate, - vport->qos.sched_node->bw_share); + vport_node->type = type; + esw_qos_normalize_min_rate(vport_node->esw, parent, extack); + trace_mlx5_esw_vport_qos_create(vport->dev, vport, vport_node->max_rate, + vport_node->bw_share); return 0; } @@ -1132,6 +1098,7 @@ static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_t { struct mlx5_eswitch *esw = vport->dev->priv.eswitch; struct mlx5_esw_sched_node *sched_node; + struct mlx5_eswitch *parent_esw; int err; esw_assert_qos_lock_held(esw); @@ -1139,10 +1106,12 @@ static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_t if (err) return err; - parent = parent ?: esw->qos.node0; - sched_node = __esw_qos_alloc_node(parent->esw, 0, type, parent); + parent_esw = parent ? parent->esw : esw; + sched_node = __esw_qos_alloc_node(parent_esw, 0, type, parent); if (!sched_node) return -ENOMEM; + if (!parent) + list_add_tail(&sched_node->entry, &esw->qos.domain->nodes); sched_node->max_rate = max_rate; sched_node->min_rate = min_rate; @@ -1168,7 +1137,7 @@ void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport) goto unlock; parent = vport->qos.sched_node->parent; - WARN(parent != esw->qos.node0, "Disabling QoS on port before detaching it from node"); + WARN(parent, "Disabling QoS on port before detaching it from node"); esw_qos_vport_disable(vport, NULL); mlx5_esw_qos_vport_qos_free(vport); @@ -1268,7 +1237,6 @@ static int esw_qos_vport_update(struct mlx5_vport *vport, int err; esw_assert_qos_lock_held(vport->dev->priv.eswitch); - parent = parent ?: curr_parent; if (curr_type == type && curr_parent == parent) return 0; @@ -1306,16 +1274,16 @@ static int esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw esw_assert_qos_lock_held(esw); curr_parent = vport->qos.sched_node->parent; - parent = parent ?: esw->qos.node0; if (curr_parent == parent) return 0; /* Set vport QoS type based on parent node type if different from * default QoS; otherwise, use the vport's current QoS type. */ - if (parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) + if (parent && parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) type = SCHED_NODE_TYPE_RATE_LIMITER; - else if (curr_parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) + else if (curr_parent && + curr_parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) type = SCHED_NODE_TYPE_VPORT; else type = vport->qos.sched_node->type; @@ -1654,9 +1622,10 @@ static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw, static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport, u32 *tc_bw) { - struct mlx5_eswitch *esw = vport->qos.sched_node ? - vport->qos.sched_node->parent->esw : - vport->dev->priv.eswitch; + struct mlx5_esw_sched_node *node = vport->qos.sched_node; + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + + esw = (node && node->parent) ? node->parent->esw : esw; return esw_qos_validate_unsupported_tc_bw(esw, tc_bw); } @@ -1763,7 +1732,7 @@ int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate *rate_leaf, if (disable) { if (vport_node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) err = esw_qos_vport_update(vport, SCHED_NODE_TYPE_VPORT, - NULL, extack); + vport_node->parent, extack); goto unlock; } @@ -1775,7 +1744,7 @@ int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate *rate_leaf, } else { err = esw_qos_vport_update(vport, SCHED_NODE_TYPE_TC_ARBITER_TSAR, - NULL, extack); + vport_node->parent, extack); } if (!err) esw_qos_set_tc_arbiter_bw_shares(vport_node, tc_bw, extack); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index b0b8ef3ec3c4..45506ad56847 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -373,11 +373,6 @@ struct mlx5_eswitch { refcount_t refcnt; u32 root_tsar_ix; struct mlx5_qos_domain *domain; - /* Contains all vports with QoS enabled but no explicit node. - * Cannot be NULL if QoS is enabled, but may be a fake node - * referencing the root TSAR if the esw doesn't support nodes. - */ - struct mlx5_esw_sched_node *node0; } qos; struct mlx5_esw_bridge_offloads *br_offloads; From e8f973576ca5387ffd2917b8ae661d3f9acde526 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Wed, 20 Aug 2025 16:32:04 +0300 Subject: [PATCH 2274/2411] net/mlx5e: Preserve tc-bw during parent changes When changing parent of a node/leaf with tc-bw configured, the code saves and restores tc-bw values. However, it was reading the converted hardware bw_share values (where 0 becomes 1) instead of the original user values, causing incorrect tc-bw calculations after parent change. Store original tc-bw values in the node structure and use them directly for save/restore operations. Fixes: cf7e73770d1b ("net/mlx5: Manage TC arbiter nodes and implement full support for tc-bw") Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250820133209.389065-4-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index cd58d3934596..4ed5968f1638 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -102,6 +102,8 @@ struct mlx5_esw_sched_node { u8 level; /* Valid only when this node represents a traffic class. */ u8 tc; + /* Valid only for a TC arbiter node or vport TC arbiter. */ + u32 tc_bw[DEVLINK_RATE_TCS_MAX]; }; static void esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node *node) @@ -609,10 +611,7 @@ static void esw_qos_tc_arbiter_get_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node, u32 *tc_bw) { - struct mlx5_esw_sched_node *vports_tc_node; - - list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) - tc_bw[vports_tc_node->tc] = vports_tc_node->bw_share; + memcpy(tc_bw, tc_arbiter_node->tc_bw, sizeof(tc_arbiter_node->tc_bw)); } static void @@ -629,6 +628,7 @@ esw_qos_set_tc_arbiter_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node, u8 tc = vports_tc_node->tc; u32 bw_share; + tc_arbiter_node->tc_bw[tc] = tc_bw[tc]; bw_share = tc_bw[tc] * fw_max_bw_share; bw_share = esw_qos_calc_bw_share(bw_share, divider, fw_max_bw_share); @@ -1060,6 +1060,7 @@ static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_a esw_qos_vport_tc_disable(vport, extack); vport_node->bw_share = 0; + memset(vport_node->tc_bw, 0, sizeof(vport_node->tc_bw)); list_del_init(&vport_node->entry); esw_qos_normalize_min_rate(vport_node->esw, vport_node->parent, extack); @@ -1231,8 +1232,9 @@ static int esw_qos_vport_update(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent, struct netlink_ext_ack *extack) { - struct mlx5_esw_sched_node *curr_parent = vport->qos.sched_node->parent; - enum sched_node_type curr_type = vport->qos.sched_node->type; + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; + struct mlx5_esw_sched_node *curr_parent = vport_node->parent; + enum sched_node_type curr_type = vport_node->type; u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0}; int err; @@ -1244,10 +1246,8 @@ static int esw_qos_vport_update(struct mlx5_vport *vport, if (err) return err; - if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) { - esw_qos_tc_arbiter_get_bw_shares(vport->qos.sched_node, - curr_tc_bw); - } + if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) + esw_qos_tc_arbiter_get_bw_shares(vport_node, curr_tc_bw); esw_qos_vport_disable(vport, extack); @@ -1258,8 +1258,8 @@ static int esw_qos_vport_update(struct mlx5_vport *vport, } if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) { - esw_qos_set_tc_arbiter_bw_shares(vport->qos.sched_node, - curr_tc_bw, extack); + esw_qos_set_tc_arbiter_bw_shares(vport_node, curr_tc_bw, + extack); } return err; From b697ef4d1d136948d282384e6cc3d1af469ea123 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Wed, 20 Aug 2025 16:32:05 +0300 Subject: [PATCH 2275/2411] net/mlx5: Destroy vport QoS element when no configuration remains If a VF has been configured and the user later clears all QoS settings, the vport element remains in the firmware QoS tree. This leads to inconsistent behavior compared to VFs that were never configured, since the FW assumes that unconfigured VFs are outside the QoS hierarchy. As a result, the bandwidth share across VFs may differ, even though none of them appear to have any configuration. Align the driver behavior with the FW expectation by destroying the vport QoS element when all configurations are removed. Fixes: c9497c98901c ("net/mlx5: Add support for setting VF min rate") Fixes: cf7e73770d1b ("net/mlx5: Manage TC arbiter nodes and implement full support for tc-bw") Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Signed-off-by: Mark Bloch Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/20250820133209.389065-5-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 57 ++++++++++++++++--- 1 file changed, 49 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 4ed5968f1638..452a948a3e6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -1127,6 +1127,19 @@ static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_t return err; } +static void mlx5_esw_qos_vport_disable_locked(struct mlx5_vport *vport) +{ + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + + esw_assert_qos_lock_held(esw); + if (!vport->qos.sched_node) + return; + + esw_qos_vport_disable(vport, NULL); + mlx5_esw_qos_vport_qos_free(vport); + esw_qos_put(esw); +} + void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport) { struct mlx5_eswitch *esw = vport->dev->priv.eswitch; @@ -1140,9 +1153,7 @@ void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport) parent = vport->qos.sched_node->parent; WARN(parent, "Disabling QoS on port before detaching it from node"); - esw_qos_vport_disable(vport, NULL); - mlx5_esw_qos_vport_qos_free(vport); - esw_qos_put(esw); + mlx5_esw_qos_vport_disable_locked(vport); unlock: esw_qos_unlock(esw); } @@ -1642,6 +1653,21 @@ static bool esw_qos_tc_bw_disabled(u32 *tc_bw) return true; } +static void esw_vport_qos_prune_empty(struct mlx5_vport *vport) +{ + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; + + esw_assert_qos_lock_held(vport->dev->priv.eswitch); + if (!vport_node) + return; + + if (vport_node->parent || vport_node->max_rate || + vport_node->min_rate || !esw_qos_tc_bw_disabled(vport_node->tc_bw)) + return; + + mlx5_esw_qos_vport_disable_locked(vport); +} + int mlx5_esw_qos_init(struct mlx5_eswitch *esw) { if (esw->qos.domain) @@ -1675,6 +1701,10 @@ int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void esw_qos_lock(esw); err = mlx5_esw_qos_set_vport_min_rate(vport, tx_share, extack); + if (err) + goto out; + esw_vport_qos_prune_empty(vport); +out: esw_qos_unlock(esw); return err; } @@ -1696,6 +1726,10 @@ int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void * esw_qos_lock(esw); err = mlx5_esw_qos_set_vport_max_rate(vport, tx_max, extack); + if (err) + goto out; + esw_vport_qos_prune_empty(vport); +out: esw_qos_unlock(esw); return err; } @@ -1733,6 +1767,7 @@ int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate *rate_leaf, if (vport_node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) err = esw_qos_vport_update(vport, SCHED_NODE_TYPE_VPORT, vport_node->parent, extack); + esw_vport_qos_prune_empty(vport); goto unlock; } @@ -1893,14 +1928,20 @@ int mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate *devlink_rate, void *priv, void *parent_priv, struct netlink_ext_ack *extack) { - struct mlx5_esw_sched_node *node; + struct mlx5_esw_sched_node *node = parent ? parent_priv : NULL; struct mlx5_vport *vport = priv; + int err; - if (!parent) - return mlx5_esw_qos_vport_update_parent(vport, NULL, extack); + err = mlx5_esw_qos_vport_update_parent(vport, node, extack); + if (!err) { + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; - node = parent_priv; - return mlx5_esw_qos_vport_update_parent(vport, node, extack); + esw_qos_lock(esw); + esw_vport_qos_prune_empty(vport); + esw_qos_unlock(esw); + } + + return err; } static bool esw_qos_is_node_empty(struct mlx5_esw_sched_node *node) From 3c114fb2afe493066df5b9e560ef37216b153c90 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Wed, 20 Aug 2025 16:32:06 +0300 Subject: [PATCH 2276/2411] net/mlx5: Fix QoS reference leak in vport enable error path Add missing esw_qos_put() call when __esw_qos_alloc_node() fails in mlx5_esw_qos_vport_enable(). Fixes: be034baba83e ("net/mlx5: Make vport QoS enablement more flexible for future extensions") Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250820133209.389065-6-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 452a948a3e6d..41aec07bb6c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -1109,8 +1109,10 @@ static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_t parent_esw = parent ? parent->esw : esw; sched_node = __esw_qos_alloc_node(parent_esw, 0, type, parent); - if (!sched_node) + if (!sched_node) { + esw_qos_put(esw); return -ENOMEM; + } if (!parent) list_add_tail(&sched_node->entry, &esw->qos.domain->nodes); From 51b17c98e3dbb2093a81b0490050a0eaa919ebee Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Wed, 20 Aug 2025 16:32:07 +0300 Subject: [PATCH 2277/2411] net/mlx5: Restore missing scheduling node cleanup on vport enable failure Restore the __esw_qos_free_node() call removed by the offending commit. Fixes: 97733d1e00a0 ("net/mlx5: Add traffic class scheduling support for vport QoS") Signed-off-by: Carolina Jubran Reviewed-by: Tariq Toukan Reviewed-by: Cosmin Ratiu Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250820133209.389065-7-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 41aec07bb6c2..8b4977650183 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -1122,6 +1122,7 @@ static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_t vport->qos.sched_node = sched_node; err = esw_qos_vport_enable(vport, type, parent, extack); if (err) { + __esw_qos_free_node(sched_node); esw_qos_put(esw); vport->qos.sched_node = NULL; } From 451d2849ea66659040b59ae3cb7e50cc97404733 Mon Sep 17 00:00:00 2001 From: Alexei Lazar Date: Wed, 20 Aug 2025 16:32:08 +0300 Subject: [PATCH 2278/2411] net/mlx5e: Query FW for buffer ownership The SW currently saves local buffer ownership when setting the buffer. This means that the SW assumes it has ownership of the buffer after the command is set. If setting the buffer fails and we remain in FW ownership, the local buffer ownership state incorrectly remains as SW-owned. This leads to incorrect behavior in subsequent PFC commands, causing failures. Instead of saving local buffer ownership in SW, query the FW for buffer ownership when setting the buffer. This ensures that the buffer ownership state is accurately reflected, avoiding the issues caused by incorrect ownership states. Fixes: ecdf2dadee8e ("net/mlx5e: Receive buffer support for DCBX") Signed-off-by: Alexei Lazar Reviewed-by: Shahar Shitrit Reviewed-by: Dragos Tatulea Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250820133209.389065-8-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/en/dcbnl.h | 1 - .../ethernet/mellanox/mlx5/core/en_dcbnl.c | 12 ++++++++--- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 2 ++ .../net/ethernet/mellanox/mlx5/core/port.c | 20 +++++++++++++++++++ 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h index b59aee75de94..2c98a5299df3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h @@ -26,7 +26,6 @@ struct mlx5e_dcbx { u8 cap; /* Buffer configuration */ - bool manual_buffer; u32 cable_len; u32 xoff; u16 port_buff_cell_sz; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 5fe016e477b3..d166c0d5189e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -362,6 +362,7 @@ static int mlx5e_dcbnl_ieee_getpfc(struct net_device *dev, static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc) { + u8 buffer_ownership = MLX5_BUF_OWNERSHIP_UNKNOWN; struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; u32 old_cable_len = priv->dcbx.cable_len; @@ -389,7 +390,14 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, if (MLX5_BUFFER_SUPPORTED(mdev)) { pfc_new.pfc_en = (changed & MLX5E_PORT_BUFFER_PFC) ? pfc->pfc_en : curr_pfc_en; - if (priv->dcbx.manual_buffer) + ret = mlx5_query_port_buffer_ownership(mdev, + &buffer_ownership); + if (ret) + netdev_err(dev, + "%s, Failed to get buffer ownership: %d\n", + __func__, ret); + + if (buffer_ownership == MLX5_BUF_OWNERSHIP_SW_OWNED) ret = mlx5e_port_manual_buffer_config(priv, changed, dev->mtu, &pfc_new, NULL, NULL); @@ -982,7 +990,6 @@ static int mlx5e_dcbnl_setbuffer(struct net_device *dev, if (!changed) return 0; - priv->dcbx.manual_buffer = true; err = mlx5e_port_manual_buffer_config(priv, changed, dev->mtu, NULL, buffer_size, prio2buffer); return err; @@ -1252,7 +1259,6 @@ void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) priv->dcbx.cap |= DCB_CAP_DCBX_HOST; priv->dcbx.port_buff_cell_sz = mlx5e_query_port_buffers_cell_size(priv); - priv->dcbx.manual_buffer = false; priv->dcbx.cable_len = MLX5E_DEFAULT_CABLE_LEN; mlx5e_ets_init(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index b6d53db27cd5..9d3504f5abfa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -367,6 +367,8 @@ int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out); int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in); int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state); int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state); +int mlx5_query_port_buffer_ownership(struct mlx5_core_dev *mdev, + u8 *buffer_ownership); int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio); int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 549f1066d2a5..2d7adf7444ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -968,6 +968,26 @@ int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state) return err; } +int mlx5_query_port_buffer_ownership(struct mlx5_core_dev *mdev, + u8 *buffer_ownership) +{ + u32 out[MLX5_ST_SZ_DW(pfcc_reg)] = {}; + int err; + + if (!MLX5_CAP_PCAM_FEATURE(mdev, buffer_ownership)) { + *buffer_ownership = MLX5_BUF_OWNERSHIP_UNKNOWN; + return 0; + } + + err = mlx5_query_pfcc_reg(mdev, out, sizeof(out)); + if (err) + return err; + + *buffer_ownership = MLX5_GET(pfcc_reg, out, buf_ownership); + + return 0; +} + int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio) { int sz = MLX5_ST_SZ_BYTES(qpdpm_reg); From 8b0587a885fdb34fd6090a3f8625cb7ac1444826 Mon Sep 17 00:00:00 2001 From: Armen Ratner Date: Wed, 20 Aug 2025 16:32:09 +0300 Subject: [PATCH 2279/2411] net/mlx5e: Preserve shared buffer capacity during headroom updates When port buffer headroom changes, port_update_shared_buffer() recalculates the shared buffer size and splits it in a 3:1 ratio (lossy:lossless) - Currently, the calculation is: lossless = shared / 4; lossy = (shared / 4) * 3; Meaning, the calculation dropped the remainder of shared % 4 due to integer division, unintentionally reducing the total shared buffer by up to three cells on each update. Over time, this could shrink the buffer below usable size. Fix it by changing the calculation to: lossless = shared / 4; lossy = shared - lossless; This retains all buffer cells while still approximating the intended 3:1 split, preventing capacity loss over time. While at it, perform headroom calculations in units of cells rather than in bytes for more accurate calculations avoiding extra divisions. Fixes: a440030d8946 ("net/mlx5e: Update shared buffer along with device buffer changes") Signed-off-by: Armen Ratner Signed-off-by: Maher Sanalla Reviewed-by: Tariq Toukan Signed-off-by: Alexei Lazar Signed-off-by: Mark Bloch Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/20250820133209.389065-9-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en/port_buffer.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 5ae787656a7c..3efa8bf1d14e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -272,8 +272,8 @@ static int port_update_shared_buffer(struct mlx5_core_dev *mdev, /* Total shared buffer size is split in a ratio of 3:1 between * lossy and lossless pools respectively. */ - lossy_epool_size = (shared_buffer_size / 4) * 3; lossless_ipool_size = shared_buffer_size / 4; + lossy_epool_size = shared_buffer_size - lossless_ipool_size; mlx5e_port_set_sbpr(mdev, 0, MLX5_EGRESS_DIR, MLX5_LOSSY_POOL, 0, lossy_epool_size); @@ -288,14 +288,12 @@ static int port_set_buffer(struct mlx5e_priv *priv, u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; struct mlx5_core_dev *mdev = priv->mdev; int sz = MLX5_ST_SZ_BYTES(pbmc_reg); - u32 new_headroom_size = 0; - u32 current_headroom_size; + u32 current_headroom_cells = 0; + u32 new_headroom_cells = 0; void *in; int err; int i; - current_headroom_size = port_buffer->headroom_size; - in = kzalloc(sz, GFP_KERNEL); if (!in) return -ENOMEM; @@ -306,12 +304,14 @@ static int port_set_buffer(struct mlx5e_priv *priv, for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { void *buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]); + current_headroom_cells += MLX5_GET(bufferx_reg, buffer, size); + u64 size = port_buffer->buffer[i].size; u64 xoff = port_buffer->buffer[i].xoff; u64 xon = port_buffer->buffer[i].xon; - new_headroom_size += size; do_div(size, port_buff_cell_sz); + new_headroom_cells += size; do_div(xoff, port_buff_cell_sz); do_div(xon, port_buff_cell_sz); MLX5_SET(bufferx_reg, buffer, size, size); @@ -320,10 +320,8 @@ static int port_set_buffer(struct mlx5e_priv *priv, MLX5_SET(bufferx_reg, buffer, xon_threshold, xon); } - new_headroom_size /= port_buff_cell_sz; - current_headroom_size /= port_buff_cell_sz; - err = port_update_shared_buffer(priv->mdev, current_headroom_size, - new_headroom_size); + err = port_update_shared_buffer(priv->mdev, current_headroom_cells, + new_headroom_cells); if (err) goto out; From 8410fe81093ff231e964891e215b624dabb734b0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 21 Aug 2025 17:08:34 +0200 Subject: [PATCH 2280/2411] ALSA: usb-audio: Use correct sub-type for UAC3 feature unit validation The entry of the validators table for UAC3 feature unit is defined with a wrong sub-type UAC_FEATURE (= 0x06) while it should have been UAC3_FEATURE (= 0x07). This patch corrects the entry value. Fixes: 57f8770620e9 ("ALSA: usb-audio: More validations of descriptor units") Link: https://patch.msgid.link/20250821150835.8894-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/validate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/validate.c b/sound/usb/validate.c index 4f4e8e87a14c..a0d55b77c994 100644 --- a/sound/usb/validate.c +++ b/sound/usb/validate.c @@ -285,7 +285,7 @@ static const struct usb_desc_validator audio_validators[] = { /* UAC_VERSION_3, UAC3_EXTENDED_TERMINAL: not implemented yet */ FUNC(UAC_VERSION_3, UAC3_MIXER_UNIT, validate_mixer_unit), FUNC(UAC_VERSION_3, UAC3_SELECTOR_UNIT, validate_selector_unit), - FUNC(UAC_VERSION_3, UAC_FEATURE_UNIT, validate_uac3_feature_unit), + FUNC(UAC_VERSION_3, UAC3_FEATURE_UNIT, validate_uac3_feature_unit), /* UAC_VERSION_3, UAC3_EFFECT_UNIT: not implemented yet */ FUNC(UAC_VERSION_3, UAC3_PROCESSING_UNIT, validate_processing_unit), FUNC(UAC_VERSION_3, UAC3_EXTENSION_UNIT, validate_processing_unit), From fec7bdfe7f8694a0c39e6c3ec026ff61ca1058b9 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Thu, 21 Aug 2025 14:35:40 +0200 Subject: [PATCH 2281/2411] s390/hypfs: Avoid unnecessary ioctl registration in debugfs Currently, hypfs registers ioctl callbacks for all debugfs files, despite only one file requiring them. This leads to unintended exposure of unused interfaces to user space and can trigger side effects such as restricted access when kernel lockdown is enabled. Restrict ioctl registration to only those files that implement ioctl functionality to avoid interface clutter and unnecessary access restrictions. Tested-by: Mete Durlu Reviewed-by: Vasily Gorbik Fixes: 5496197f9b08 ("debugfs: Restrict debugfs when the kernel is locked down") Signed-off-by: Peter Oberparleiter Signed-off-by: Alexander Gordeev --- arch/s390/hypfs/hypfs_dbfs.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index 5d9effb0867c..e74eb8f9b23a 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -66,23 +66,27 @@ static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) long rc; mutex_lock(&df->lock); - if (df->unlocked_ioctl) - rc = df->unlocked_ioctl(file, cmd, arg); - else - rc = -ENOTTY; + rc = df->unlocked_ioctl(file, cmd, arg); mutex_unlock(&df->lock); return rc; } -static const struct file_operations dbfs_ops = { +static const struct file_operations dbfs_ops_ioctl = { .read = dbfs_read, .unlocked_ioctl = dbfs_ioctl, }; +static const struct file_operations dbfs_ops = { + .read = dbfs_read, +}; + void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df) { - df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, - &dbfs_ops); + const struct file_operations *fops = &dbfs_ops; + + if (df->unlocked_ioctl) + fops = &dbfs_ops_ioctl; + df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, fops); mutex_init(&df->lock); } From 3868f910440c47cd5d158776be4ba4e2186beda7 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Thu, 21 Aug 2025 15:12:37 +0200 Subject: [PATCH 2282/2411] s390/hypfs: Enable limited access during lockdown When kernel lockdown is active, debugfs_locked_down() blocks access to hypfs files that register ioctl callbacks, even if the ioctl interface is not required for a function. This unnecessarily breaks userspace tools that only rely on read operations. Resolve this by registering a minimal set of file operations during lockdown, avoiding ioctl registration and preserving access for affected tooling. Note that this change restores hypfs functionality when lockdown is active from early boot (e.g. via lockdown=integrity kernel parameter), but does not apply to scenarios where lockdown is enabled dynamically while Linux is running. Tested-by: Mete Durlu Reviewed-by: Vasily Gorbik Fixes: 5496197f9b08 ("debugfs: Restrict debugfs when the kernel is locked down") Signed-off-by: Peter Oberparleiter Signed-off-by: Alexander Gordeev --- arch/s390/hypfs/hypfs_dbfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index e74eb8f9b23a..41a0d2066fa0 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -6,6 +6,7 @@ * Author(s): Michael Holzheu */ +#include #include #include "hypfs.h" @@ -84,7 +85,7 @@ void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df) { const struct file_operations *fops = &dbfs_ops; - if (df->unlocked_ioctl) + if (df->unlocked_ioctl && !security_locked_down(LOCKDOWN_DEBUGFS)) fops = &dbfs_ops_ioctl; df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, fops); mutex_init(&df->lock); From cb83b559bea39f207ee214ee2972657e8576ed18 Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Fri, 25 Jul 2025 11:42:18 -0700 Subject: [PATCH 2283/2411] idpf: add support for Tx refillqs in flow scheduling mode In certain production environments, it is possible for completion tags to collide, meaning N packets with the same completion tag are in flight at the same time. In this environment, any given Tx queue is effectively used to send both slower traffic and higher throughput traffic simultaneously. This is the result of a customer's specific configuration in the device pipeline, the details of which Intel cannot provide. This configuration results in a small number of out-of-order completions, i.e., a small number of packets in flight. The existing guardrails in the driver only protect against a large number of packets in flight. The slower flow completions are delayed which causes the out-of-order completions. The fast flow will continue sending traffic and generating tags. Because tags are generated on the fly, the fast flow eventually uses the same tag for a packet that is still in flight from the slower flow. The driver has no idea which packet it should clean when it processes the completion with that tag, but it will look for the packet on the buffer ring before the hash table. If the slower flow packet completion is processed first, it will end up cleaning the fast flow packet on the ring prematurely. This leaves the descriptor ring in a bad state resulting in a crash or Tx timeout. In summary, generating a tag when a packet is sent can lead to the same tag being associated with multiple packets. This can lead to resource leaks, crashes, and/or Tx timeouts. Before we can replace the tag generation, we need a new mechanism for the send path to know what tag to use next. The driver will allocate and initialize a refillq for each TxQ with all of the possible free tag values. During send, the driver grabs the next free tag from the refillq from next_to_clean. While cleaning the packet, the clean routine posts the tag back to the refillq's next_to_use to indicate that it is now free to use. This mechanism works exactly the same way as the existing Rx refill queues, which post the cleaned buffer IDs back to the buffer queue to be reposted to HW. Since we're using the refillqs for both Rx and Tx now, genericize some of the existing refillq support. Note: the refillqs will not be used yet. This is only demonstrating how they will be used to pass free tags back to the send path. Signed-off-by: Joshua Hay Reviewed-by: Madhu Chittim Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 93 +++++++++++++++++++-- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 8 +- 2 files changed, 91 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 66a1b040639d..9b63944235fb 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -139,6 +139,9 @@ static void idpf_tx_desc_rel(struct idpf_tx_queue *txq) if (!txq->desc_ring) return; + if (txq->refillq) + kfree(txq->refillq->ring); + dmam_free_coherent(txq->dev, txq->size, txq->desc_ring, txq->dma); txq->desc_ring = NULL; txq->next_to_use = 0; @@ -244,6 +247,7 @@ static int idpf_tx_desc_alloc(const struct idpf_vport *vport, struct idpf_tx_queue *tx_q) { struct device *dev = tx_q->dev; + struct idpf_sw_queue *refillq; int err; err = idpf_tx_buf_alloc_all(tx_q); @@ -267,6 +271,29 @@ static int idpf_tx_desc_alloc(const struct idpf_vport *vport, tx_q->next_to_clean = 0; idpf_queue_set(GEN_CHK, tx_q); + if (!idpf_queue_has(FLOW_SCH_EN, tx_q)) + return 0; + + refillq = tx_q->refillq; + refillq->desc_count = tx_q->desc_count; + refillq->ring = kcalloc(refillq->desc_count, sizeof(u32), + GFP_KERNEL); + if (!refillq->ring) { + err = -ENOMEM; + goto err_alloc; + } + + for (unsigned int i = 0; i < refillq->desc_count; i++) + refillq->ring[i] = + FIELD_PREP(IDPF_RFL_BI_BUFID_M, i) | + FIELD_PREP(IDPF_RFL_BI_GEN_M, + idpf_queue_has(GEN_CHK, refillq)); + + /* Go ahead and flip the GEN bit since this counts as filling + * up the ring, i.e. we already ring wrapped. + */ + idpf_queue_change(GEN_CHK, refillq); + return 0; err_alloc: @@ -603,18 +630,18 @@ static int idpf_rx_hdr_buf_alloc_all(struct idpf_buf_queue *bufq) } /** - * idpf_rx_post_buf_refill - Post buffer id to refill queue + * idpf_post_buf_refill - Post buffer id to refill queue * @refillq: refill queue to post to * @buf_id: buffer id to post */ -static void idpf_rx_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id) +static void idpf_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id) { u32 nta = refillq->next_to_use; /* store the buffer ID and the SW maintained GEN bit to the refillq */ refillq->ring[nta] = - FIELD_PREP(IDPF_RX_BI_BUFID_M, buf_id) | - FIELD_PREP(IDPF_RX_BI_GEN_M, + FIELD_PREP(IDPF_RFL_BI_BUFID_M, buf_id) | + FIELD_PREP(IDPF_RFL_BI_GEN_M, idpf_queue_has(GEN_CHK, refillq)); if (unlikely(++nta == refillq->desc_count)) { @@ -995,6 +1022,11 @@ static void idpf_txq_group_rel(struct idpf_vport *vport) struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; for (j = 0; j < txq_grp->num_txq; j++) { + if (flow_sch_en) { + kfree(txq_grp->txqs[j]->refillq); + txq_grp->txqs[j]->refillq = NULL; + } + kfree(txq_grp->txqs[j]); txq_grp->txqs[j] = NULL; } @@ -1414,6 +1446,13 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) } idpf_queue_set(FLOW_SCH_EN, q); + + q->refillq = kzalloc(sizeof(*q->refillq), GFP_KERNEL); + if (!q->refillq) + goto err_alloc; + + idpf_queue_set(GEN_CHK, q->refillq); + idpf_queue_set(RFL_GEN_CHK, q->refillq); } if (!split) @@ -2005,6 +2044,8 @@ static void idpf_tx_handle_rs_completion(struct idpf_tx_queue *txq, compl_tag = le16_to_cpu(desc->q_head_compl_tag.compl_tag); + idpf_post_buf_refill(txq->refillq, compl_tag); + /* If we didn't clean anything on the ring, this packet must be * in the hash table. Go clean it there. */ @@ -2364,6 +2405,37 @@ static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_tx_queue *txq, u16 ntu) return ntu; } +/** + * idpf_tx_get_free_buf_id - get a free buffer ID from the refill queue + * @refillq: refill queue to get buffer ID from + * @buf_id: return buffer ID + * + * Return: true if a buffer ID was found, false if not + */ +static bool idpf_tx_get_free_buf_id(struct idpf_sw_queue *refillq, + u16 *buf_id) +{ + u32 ntc = refillq->next_to_clean; + u32 refill_desc; + + refill_desc = refillq->ring[ntc]; + + if (unlikely(idpf_queue_has(RFL_GEN_CHK, refillq) != + !!(refill_desc & IDPF_RFL_BI_GEN_M))) + return false; + + *buf_id = FIELD_GET(IDPF_RFL_BI_BUFID_M, refill_desc); + + if (unlikely(++ntc == refillq->desc_count)) { + idpf_queue_change(RFL_GEN_CHK, refillq); + ntc = 0; + } + + refillq->next_to_clean = ntc; + + return true; +} + /** * idpf_tx_splitq_map - Build the Tx flex descriptor * @tx_q: queue to send buffer on @@ -2912,6 +2984,13 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, } if (idpf_queue_has(FLOW_SCH_EN, tx_q)) { + if (unlikely(!idpf_tx_get_free_buf_id(tx_q->refillq, + &tx_params.compl_tag))) { + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_inc(&tx_q->q_stats.q_busy); + u64_stats_update_end(&tx_q->stats_sync); + } + tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE; tx_params.eop_cmd = IDPF_TXD_FLEX_FLOW_CMD_EOP; /* Set the RE bit to catch any packets that may have not been @@ -3472,7 +3551,7 @@ static int idpf_rx_splitq_clean(struct idpf_rx_queue *rxq, int budget) skip_data: rx_buf->netmem = 0; - idpf_rx_post_buf_refill(refillq, buf_id); + idpf_post_buf_refill(refillq, buf_id); IDPF_RX_BUMP_NTC(rxq, ntc); /* skip if it is non EOP desc */ @@ -3580,10 +3659,10 @@ static void idpf_rx_clean_refillq(struct idpf_buf_queue *bufq, bool failure; if (idpf_queue_has(RFL_GEN_CHK, refillq) != - !!(refill_desc & IDPF_RX_BI_GEN_M)) + !!(refill_desc & IDPF_RFL_BI_GEN_M)) break; - buf_id = FIELD_GET(IDPF_RX_BI_BUFID_M, refill_desc); + buf_id = FIELD_GET(IDPF_RFL_BI_BUFID_M, refill_desc); failure = idpf_rx_update_bufq_desc(bufq, buf_id, buf_desc); if (failure) break; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 281de655a813..58232a1bd0a9 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -108,8 +108,8 @@ do { \ */ #define IDPF_TX_SPLITQ_RE_MIN_GAP 64 -#define IDPF_RX_BI_GEN_M BIT(16) -#define IDPF_RX_BI_BUFID_M GENMASK(15, 0) +#define IDPF_RFL_BI_GEN_M BIT(16) +#define IDPF_RFL_BI_BUFID_M GENMASK(15, 0) #define IDPF_RXD_EOF_SPLITQ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M #define IDPF_RXD_EOF_SINGLEQ VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M @@ -622,6 +622,7 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64, * @cleaned_pkts: Number of packets cleaned for the above said case * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather * @stash: Tx buffer stash for Flow-based scheduling mode + * @refillq: Pointer to refill queue * @compl_tag_bufid_m: Completion tag buffer id mask * @compl_tag_cur_gen: Used to keep track of current completion tag generation * @compl_tag_gen_max: To determine when compl_tag_cur_gen should be reset @@ -671,6 +672,7 @@ struct idpf_tx_queue { u16 tx_max_bufs; struct idpf_txq_stash *stash; + struct idpf_sw_queue *refillq; u16 compl_tag_bufid_m; u16 compl_tag_cur_gen; @@ -692,7 +694,7 @@ struct idpf_tx_queue { __cacheline_group_end_aligned(cold); }; libeth_cacheline_set_assert(struct idpf_tx_queue, 64, - 112 + sizeof(struct u64_stats_sync), + 120 + sizeof(struct u64_stats_sync), 24); /** From f2d18e16479cac7a708d77cbfb4220a9114a71fc Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Fri, 25 Jul 2025 11:42:19 -0700 Subject: [PATCH 2284/2411] idpf: improve when to set RE bit logic Track the gap between next_to_use and the last RE index. Set RE again if the gap is large enough to ensure RE bit is set frequently. This is critical before removing the stashing mechanisms because the opportunistic descriptor ring cleaning from the out-of-order completions will go away. Previously the descriptors would be "cleaned" by both the descriptor (RE) completion and the out-of-order completions. Without the latter, we must ensure the RE bit is set more frequently. Otherwise, it's theoretically possible for the descriptor ring next_to_clean to never advance. The previous implementation was dependent on the start of a packet falling on a 64th index in the descriptor ring, which is not guaranteed with large packets. Signed-off-by: Luigi Rizzo Signed-off-by: Brian Vazquez Signed-off-by: Joshua Hay Reviewed-by: Madhu Chittim Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 20 +++++++++++++++++++- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 6 ++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 9b63944235fb..ee59153508af 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -294,6 +294,8 @@ static int idpf_tx_desc_alloc(const struct idpf_vport *vport, */ idpf_queue_change(GEN_CHK, refillq); + tx_q->last_re = tx_q->desc_count - IDPF_TX_SPLITQ_RE_MIN_GAP; + return 0; err_alloc: @@ -2912,6 +2914,21 @@ static void idpf_tx_set_tstamp_desc(union idpf_flex_tx_ctx_desc *ctx_desc, { } #endif /* CONFIG_PTP_1588_CLOCK */ +/** + * idpf_tx_splitq_need_re - check whether RE bit needs to be set + * @tx_q: pointer to Tx queue + * + * Return: true if RE bit needs to be set, false otherwise + */ +static bool idpf_tx_splitq_need_re(struct idpf_tx_queue *tx_q) +{ + int gap = tx_q->next_to_use - tx_q->last_re; + + gap += (gap < 0) ? tx_q->desc_count : 0; + + return gap >= IDPF_TX_SPLITQ_RE_MIN_GAP; +} + /** * idpf_tx_splitq_frame - Sends buffer on Tx ring using flex descriptors * @skb: send buffer @@ -2998,9 +3015,10 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, * MIN_RING size to ensure it will be set at least once each * time around the ring. */ - if (!(tx_q->next_to_use % IDPF_TX_SPLITQ_RE_MIN_GAP)) { + if (idpf_tx_splitq_need_re(tx_q)) { tx_params.eop_cmd |= IDPF_TXD_FLEX_FLOW_CMD_RE; tx_q->txq_grp->num_completions_pending++; + tx_q->last_re = tx_q->next_to_use; } if (skb->ip_summed == CHECKSUM_PARTIAL) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 58232a1bd0a9..c75ca5d3e57c 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -610,6 +610,8 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64, * @netdev: &net_device corresponding to this queue * @next_to_use: Next descriptor to use * @next_to_clean: Next descriptor to clean + * @last_re: last descriptor index that RE bit was set + * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather * @cleaned_bytes: Splitq only, TXQ only: When a TX completion is received on * the TX completion queue, it can be for any TXQ associated * with that completion queue. This means we can clean up to @@ -620,7 +622,6 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64, * only once at the end of the cleaning routine. * @clean_budget: singleq only, queue cleaning budget * @cleaned_pkts: Number of packets cleaned for the above said case - * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather * @stash: Tx buffer stash for Flow-based scheduling mode * @refillq: Pointer to refill queue * @compl_tag_bufid_m: Completion tag buffer id mask @@ -663,6 +664,8 @@ struct idpf_tx_queue { __cacheline_group_begin_aligned(read_write); u16 next_to_use; u16 next_to_clean; + u16 last_re; + u16 tx_max_bufs; union { u32 cleaned_bytes; @@ -670,7 +673,6 @@ struct idpf_tx_queue { }; u16 cleaned_pkts; - u16 tx_max_bufs; struct idpf_txq_stash *stash; struct idpf_sw_queue *refillq; From b61dfa9bc4430ad82b96d3a7c1c485350f91b467 Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Fri, 25 Jul 2025 11:42:20 -0700 Subject: [PATCH 2285/2411] idpf: simplify and fix splitq Tx packet rollback error path Move (and rename) the existing rollback logic to singleq.c since that will be the only consumer. Create a simplified splitq specific rollback function to loop through and unmap tx_bufs based on the completion tag. This is critical before replacing the Tx buffer ring with the buffer pool since the previous rollback indexing will not work to unmap the chained buffers from the pool. Cache the next_to_use index before any portion of the packet is put on the descriptor ring. In case of an error, the rollback will bump tail to the correct next_to_use value. Because the splitq path now supports different types of context descriptors (and potentially multiple in the future), this will take care of rolling back any and all context descriptors encoded on the ring for the erroneous packet. The previous rollback logic was broken for PTP packets since it would not account for the PTP context descriptor. Fixes: 1a49cf814fe1 ("idpf: add Tx timestamp flows") Signed-off-by: Joshua Hay Reviewed-by: Madhu Chittim Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- .../ethernet/intel/idpf/idpf_singleq_txrx.c | 57 +++++++++++- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 91 ++++++++----------- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 5 +- 3 files changed, 95 insertions(+), 58 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c index 555879b1248d..57c0f5ab8f9e 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -179,6 +179,58 @@ static int idpf_tx_singleq_csum(struct sk_buff *skb, return 1; } +/** + * idpf_tx_singleq_dma_map_error - handle TX DMA map errors + * @txq: queue to send buffer on + * @skb: send buffer + * @first: original first buffer info buffer for packet + * @idx: starting point on ring to unwind + */ +static void idpf_tx_singleq_dma_map_error(struct idpf_tx_queue *txq, + struct sk_buff *skb, + struct idpf_tx_buf *first, u16 idx) +{ + struct libeth_sq_napi_stats ss = { }; + struct libeth_cq_pp cp = { + .dev = txq->dev, + .ss = &ss, + }; + + u64_stats_update_begin(&txq->stats_sync); + u64_stats_inc(&txq->q_stats.dma_map_errs); + u64_stats_update_end(&txq->stats_sync); + + /* clear dma mappings for failed tx_buf map */ + for (;;) { + struct idpf_tx_buf *tx_buf; + + tx_buf = &txq->tx_buf[idx]; + libeth_tx_complete(tx_buf, &cp); + if (tx_buf == first) + break; + if (idx == 0) + idx = txq->desc_count; + idx--; + } + + if (skb_is_gso(skb)) { + union idpf_tx_flex_desc *tx_desc; + + /* If we failed a DMA mapping for a TSO packet, we will have + * used one additional descriptor for a context + * descriptor. Reset that here. + */ + tx_desc = &txq->flex_tx[idx]; + memset(tx_desc, 0, sizeof(*tx_desc)); + if (idx == 0) + idx = txq->desc_count; + idx--; + } + + /* Update tail in case netdev_xmit_more was previously true */ + idpf_tx_buf_hw_update(txq, idx, false); +} + /** * idpf_tx_singleq_map - Build the Tx base descriptor * @tx_q: queue to send buffer on @@ -219,8 +271,9 @@ static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q, for (frag = &skb_shinfo(skb)->frags[0];; frag++) { unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; - if (dma_mapping_error(tx_q->dev, dma)) - return idpf_tx_dma_map_error(tx_q, skb, first, i); + if (unlikely(dma_mapping_error(tx_q->dev, dma))) + return idpf_tx_singleq_dma_map_error(tx_q, skb, + first, i); /* record length, and DMA address */ dma_unmap_len_set(tx_buf, len, size); diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index ee59153508af..527d56bcbbef 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -2339,57 +2339,6 @@ unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, return count; } -/** - * idpf_tx_dma_map_error - handle TX DMA map errors - * @txq: queue to send buffer on - * @skb: send buffer - * @first: original first buffer info buffer for packet - * @idx: starting point on ring to unwind - */ -void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, - struct idpf_tx_buf *first, u16 idx) -{ - struct libeth_sq_napi_stats ss = { }; - struct libeth_cq_pp cp = { - .dev = txq->dev, - .ss = &ss, - }; - - u64_stats_update_begin(&txq->stats_sync); - u64_stats_inc(&txq->q_stats.dma_map_errs); - u64_stats_update_end(&txq->stats_sync); - - /* clear dma mappings for failed tx_buf map */ - for (;;) { - struct idpf_tx_buf *tx_buf; - - tx_buf = &txq->tx_buf[idx]; - libeth_tx_complete(tx_buf, &cp); - if (tx_buf == first) - break; - if (idx == 0) - idx = txq->desc_count; - idx--; - } - - if (skb_is_gso(skb)) { - union idpf_tx_flex_desc *tx_desc; - - /* If we failed a DMA mapping for a TSO packet, we will have - * used one additional descriptor for a context - * descriptor. Reset that here. - */ - tx_desc = &txq->flex_tx[idx]; - memset(tx_desc, 0, sizeof(*tx_desc)); - if (idx == 0) - idx = txq->desc_count; - idx--; - } - - /* Update tail in case netdev_xmit_more was previously true */ - idpf_tx_buf_hw_update(txq, idx, false); -} - /** * idpf_tx_splitq_bump_ntu - adjust NTU and generation * @txq: the tx ring to wrap @@ -2438,6 +2387,37 @@ static bool idpf_tx_get_free_buf_id(struct idpf_sw_queue *refillq, return true; } +/** + * idpf_tx_splitq_pkt_err_unmap - Unmap buffers and bump tail in case of error + * @txq: Tx queue to unwind + * @params: pointer to splitq params struct + * @first: starting buffer for packet to unmap + */ +static void idpf_tx_splitq_pkt_err_unmap(struct idpf_tx_queue *txq, + struct idpf_tx_splitq_params *params, + struct idpf_tx_buf *first) +{ + struct libeth_sq_napi_stats ss = { }; + struct idpf_tx_buf *tx_buf = first; + struct libeth_cq_pp cp = { + .dev = txq->dev, + .ss = &ss, + }; + u32 idx = 0; + + u64_stats_update_begin(&txq->stats_sync); + u64_stats_inc(&txq->q_stats.dma_map_errs); + u64_stats_update_end(&txq->stats_sync); + + do { + libeth_tx_complete(tx_buf, &cp); + idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf); + } while (idpf_tx_buf_compl_tag(tx_buf) == params->compl_tag); + + /* Update tail in case netdev_xmit_more was previously true. */ + idpf_tx_buf_hw_update(txq, params->prev_ntu, false); +} + /** * idpf_tx_splitq_map - Build the Tx flex descriptor * @tx_q: queue to send buffer on @@ -2482,8 +2462,9 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, for (frag = &skb_shinfo(skb)->frags[0];; frag++) { unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; - if (dma_mapping_error(tx_q->dev, dma)) - return idpf_tx_dma_map_error(tx_q, skb, first, i); + if (unlikely(dma_mapping_error(tx_q->dev, dma))) + return idpf_tx_splitq_pkt_err_unmap(tx_q, params, + first); first->nr_frags++; idpf_tx_buf_compl_tag(tx_buf) = params->compl_tag; @@ -2939,7 +2920,9 @@ static bool idpf_tx_splitq_need_re(struct idpf_tx_queue *tx_q) static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, struct idpf_tx_queue *tx_q) { - struct idpf_tx_splitq_params tx_params = { }; + struct idpf_tx_splitq_params tx_params = { + .prev_ntu = tx_q->next_to_use, + }; union idpf_flex_tx_ctx_desc *ctx_desc; struct idpf_tx_buf *first; unsigned int count; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index c75ca5d3e57c..a7632d845a2a 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -196,6 +196,7 @@ struct idpf_tx_offload_params { * @compl_tag: Associated tag for completion * @td_tag: Descriptor tunneling tag * @offload: Offload parameters + * @prev_ntu: stored TxQ next_to_use in case of rollback */ struct idpf_tx_splitq_params { enum idpf_tx_desc_dtype_value dtype; @@ -206,6 +207,8 @@ struct idpf_tx_splitq_params { }; struct idpf_tx_offload_params offload; + + u16 prev_ntu; }; enum idpf_tx_ctx_desc_eipt_offload { @@ -1042,8 +1045,6 @@ void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, bool xmit_more); unsigned int idpf_size_to_txd_count(unsigned int size); netdev_tx_t idpf_tx_drop_skb(struct idpf_tx_queue *tx_q, struct sk_buff *skb); -void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, - struct idpf_tx_buf *first, u16 ring_idx); unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, struct sk_buff *skb); void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue); From 5f417d551324d2894168b362f2429d120ab06243 Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Fri, 25 Jul 2025 11:42:21 -0700 Subject: [PATCH 2286/2411] idpf: replace flow scheduling buffer ring with buffer pool Replace the TxQ buffer ring with one large pool/array of buffers (only for flow scheduling). This eliminates the tag generation and makes it impossible for a tag to be associated with more than one packet. The completion tag passed to HW through the descriptor is the index into the array. That same completion tag is posted back to the driver in the completion descriptor, and used to index into the array to quickly retrieve the buffer during cleaning. In this way, the tags are treated as a fix sized resource. If all tags are in use, no more packets can be sent on that particular queue (until some are freed up). The tag pool size is 64K since the completion tag width is 16 bits. For each packet, the driver pulls a free tag from the refillq to get the next free buffer index. When cleaning is complete, the tag is posted back to the refillq. A multi-frag packet spans multiple buffers in the driver, therefore it uses multiple buffer indexes/tags from the pool. Each frag pulls from the refillq to get the next free buffer index. These are tracked in a next_buf field that replaces the completion tag field in the buffer struct. This chains the buffers together so that the packet can be cleaned from the starting completion tag taken from the completion descriptor, then from the next_buf field for each subsequent buffer. In case of a dma_mapping_error occurs or the refillq runs out of free buf_ids, the packet will execute the rollback error path. This unmaps any buffers previously mapped for the packet. Since several free buf_ids could have already been pulled from the refillq, we need to restore its original state as well. Otherwise, the buf_ids/tags will be leaked and not used again until the queue is reallocated. Descriptor completions only advance the descriptor ring index to "clean" the descriptors. The packet completions only clean the buffers associated with the given packet completion tag and do not update the descriptor ring index. When operating in queue based scheduling mode, the array still acts as a ring and will only have TxQ descriptor count entries. The tx_bufs are still associated 1:1 with the descriptor ring entries and we can use the conventional indexing mechanisms. Fixes: c2d548cad150 ("idpf: add TX splitq napi poll support") Signed-off-by: Luigi Rizzo Signed-off-by: Brian Vazquez Signed-off-by: Joshua Hay Reviewed-by: Madhu Chittim Reviewed-by: Aleksandr Loktionov Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 206 +++++++++----------- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 10 +- 2 files changed, 104 insertions(+), 112 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 527d56bcbbef..5fe329a7c944 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -13,6 +13,7 @@ struct idpf_tx_stash { struct libeth_sqe buf; }; +#define idpf_tx_buf_next(buf) (*(u32 *)&(buf)->priv) #define idpf_tx_buf_compl_tag(buf) (*(u32 *)&(buf)->priv) LIBETH_SQE_CHECK_PRIV(u32); @@ -91,7 +92,7 @@ static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq) return; /* Free all the Tx buffer sk_buffs */ - for (i = 0; i < txq->desc_count; i++) + for (i = 0; i < txq->buf_pool_size; i++) libeth_tx_complete(&txq->tx_buf[i], &cp); kfree(txq->tx_buf); @@ -199,14 +200,17 @@ static void idpf_tx_desc_rel_all(struct idpf_vport *vport) static int idpf_tx_buf_alloc_all(struct idpf_tx_queue *tx_q) { struct idpf_buf_lifo *buf_stack; - int buf_size; int i; /* Allocate book keeping buffers only. Buffers to be supplied to HW * are allocated by kernel network stack and received as part of skb */ - buf_size = sizeof(struct idpf_tx_buf) * tx_q->desc_count; - tx_q->tx_buf = kzalloc(buf_size, GFP_KERNEL); + if (idpf_queue_has(FLOW_SCH_EN, tx_q)) + tx_q->buf_pool_size = U16_MAX; + else + tx_q->buf_pool_size = tx_q->desc_count; + tx_q->tx_buf = kcalloc(tx_q->buf_pool_size, sizeof(*tx_q->tx_buf), + GFP_KERNEL); if (!tx_q->tx_buf) return -ENOMEM; @@ -275,7 +279,7 @@ static int idpf_tx_desc_alloc(const struct idpf_vport *vport, return 0; refillq = tx_q->refillq; - refillq->desc_count = tx_q->desc_count; + refillq->desc_count = tx_q->buf_pool_size; refillq->ring = kcalloc(refillq->desc_count, sizeof(u32), GFP_KERNEL); if (!refillq->ring) { @@ -1869,6 +1873,12 @@ static bool idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, struct idpf_tx_buf *tx_buf; bool clean_complete = true; + if (descs_only) { + /* Bump ring index to mark as cleaned. */ + tx_q->next_to_clean = end; + return true; + } + tx_desc = &tx_q->flex_tx[ntc]; next_pending_desc = &tx_q->flex_tx[end]; tx_buf = &tx_q->tx_buf[ntc]; @@ -1935,87 +1945,43 @@ do { \ } while (0) /** - * idpf_tx_clean_buf_ring - clean flow scheduling TX queue buffers + * idpf_tx_clean_bufs - clean flow scheduling TX queue buffers * @txq: queue to clean - * @compl_tag: completion tag of packet to clean (from completion descriptor) + * @buf_id: packet's starting buffer ID, from completion descriptor * @cleaned: pointer to stats struct to track cleaned packets/bytes * @budget: Used to determine if we are in netpoll * - * Cleans all buffers associated with the input completion tag either from the - * TX buffer ring or from the hash table if the buffers were previously - * stashed. Returns the byte/segment count for the cleaned packet associated - * this completion tag. + * Clean all buffers associated with the packet starting at buf_id. Returns the + * byte/segment count for the cleaned packet. */ -static bool idpf_tx_clean_buf_ring(struct idpf_tx_queue *txq, u16 compl_tag, - struct libeth_sq_napi_stats *cleaned, - int budget) +static bool idpf_tx_clean_bufs(struct idpf_tx_queue *txq, u32 buf_id, + struct libeth_sq_napi_stats *cleaned, + int budget) { - u16 idx = compl_tag & txq->compl_tag_bufid_m; struct idpf_tx_buf *tx_buf = NULL; struct libeth_cq_pp cp = { .dev = txq->dev, .ss = cleaned, .napi = budget, }; - u16 ntc, orig_idx = idx; - - tx_buf = &txq->tx_buf[idx]; - - if (unlikely(tx_buf->type <= LIBETH_SQE_CTX || - idpf_tx_buf_compl_tag(tx_buf) != compl_tag)) - return false; + tx_buf = &txq->tx_buf[buf_id]; if (tx_buf->type == LIBETH_SQE_SKB) { if (skb_shinfo(tx_buf->skb)->tx_flags & SKBTX_IN_PROGRESS) idpf_tx_read_tstamp(txq, tx_buf->skb); libeth_tx_complete(tx_buf, &cp); + idpf_post_buf_refill(txq->refillq, buf_id); } - idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf); + while (idpf_tx_buf_next(tx_buf) != IDPF_TXBUF_NULL) { + buf_id = idpf_tx_buf_next(tx_buf); - while (idpf_tx_buf_compl_tag(tx_buf) == compl_tag) { + tx_buf = &txq->tx_buf[buf_id]; libeth_tx_complete(tx_buf, &cp); - idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf); + idpf_post_buf_refill(txq->refillq, buf_id); } - /* - * It's possible the packet we just cleaned was an out of order - * completion, which means we can stash the buffers starting from - * the original next_to_clean and reuse the descriptors. We need - * to compare the descriptor ring next_to_clean packet's "first" buffer - * to the "first" buffer of the packet we just cleaned to determine if - * this is the case. Howevever, next_to_clean can point to either a - * reserved buffer that corresponds to a context descriptor used for the - * next_to_clean packet (TSO packet) or the "first" buffer (single - * packet). The orig_idx from the packet we just cleaned will always - * point to the "first" buffer. If next_to_clean points to a reserved - * buffer, let's bump ntc once and start the comparison from there. - */ - ntc = txq->next_to_clean; - tx_buf = &txq->tx_buf[ntc]; - - if (tx_buf->type == LIBETH_SQE_CTX) - idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, tx_buf); - - /* - * If ntc still points to a different "first" buffer, clean the - * descriptor ring and stash all of the buffers for later cleaning. If - * we cannot stash all of the buffers, next_to_clean will point to the - * "first" buffer of the packet that could not be stashed and cleaning - * will start there next time. - */ - if (unlikely(tx_buf != &txq->tx_buf[orig_idx] && - !idpf_tx_splitq_clean(txq, orig_idx, budget, cleaned, - true))) - return true; - - /* - * Otherwise, update next_to_clean to reflect the cleaning that was - * done above. - */ - txq->next_to_clean = idx; - return true; } @@ -2046,12 +2012,10 @@ static void idpf_tx_handle_rs_completion(struct idpf_tx_queue *txq, compl_tag = le16_to_cpu(desc->q_head_compl_tag.compl_tag); - idpf_post_buf_refill(txq->refillq, compl_tag); - /* If we didn't clean anything on the ring, this packet must be * in the hash table. Go clean it there. */ - if (!idpf_tx_clean_buf_ring(txq, compl_tag, cleaned, budget)) + if (!idpf_tx_clean_bufs(txq, compl_tag, cleaned, budget)) idpf_tx_clean_stashed_bufs(txq, compl_tag, cleaned, budget); } @@ -2364,7 +2328,7 @@ static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_tx_queue *txq, u16 ntu) * Return: true if a buffer ID was found, false if not */ static bool idpf_tx_get_free_buf_id(struct idpf_sw_queue *refillq, - u16 *buf_id) + u32 *buf_id) { u32 ntc = refillq->next_to_clean; u32 refill_desc; @@ -2397,25 +2361,34 @@ static void idpf_tx_splitq_pkt_err_unmap(struct idpf_tx_queue *txq, struct idpf_tx_splitq_params *params, struct idpf_tx_buf *first) { + struct idpf_sw_queue *refillq = txq->refillq; struct libeth_sq_napi_stats ss = { }; struct idpf_tx_buf *tx_buf = first; struct libeth_cq_pp cp = { .dev = txq->dev, .ss = &ss, }; - u32 idx = 0; u64_stats_update_begin(&txq->stats_sync); u64_stats_inc(&txq->q_stats.dma_map_errs); u64_stats_update_end(&txq->stats_sync); - do { + libeth_tx_complete(tx_buf, &cp); + while (idpf_tx_buf_next(tx_buf) != IDPF_TXBUF_NULL) { + tx_buf = &txq->tx_buf[idpf_tx_buf_next(tx_buf)]; libeth_tx_complete(tx_buf, &cp); - idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf); - } while (idpf_tx_buf_compl_tag(tx_buf) == params->compl_tag); + } /* Update tail in case netdev_xmit_more was previously true. */ idpf_tx_buf_hw_update(txq, params->prev_ntu, false); + + if (!refillq) + return; + + /* Restore refillq state to avoid leaking tags. */ + if (params->prev_refill_gen != idpf_queue_has(RFL_GEN_CHK, refillq)) + idpf_queue_change(RFL_GEN_CHK, refillq); + refillq->next_to_clean = params->prev_refill_ntc; } /** @@ -2439,6 +2412,7 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, struct netdev_queue *nq; struct sk_buff *skb; skb_frag_t *frag; + u32 next_buf_id; u16 td_cmd = 0; dma_addr_t dma; @@ -2456,18 +2430,16 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, tx_buf = first; first->nr_frags = 0; - params->compl_tag = - (tx_q->compl_tag_cur_gen << tx_q->compl_tag_gen_s) | i; - for (frag = &skb_shinfo(skb)->frags[0];; frag++) { unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; - if (unlikely(dma_mapping_error(tx_q->dev, dma))) + if (unlikely(dma_mapping_error(tx_q->dev, dma))) { + idpf_tx_buf_next(tx_buf) = IDPF_TXBUF_NULL; return idpf_tx_splitq_pkt_err_unmap(tx_q, params, first); + } first->nr_frags++; - idpf_tx_buf_compl_tag(tx_buf) = params->compl_tag; tx_buf->type = LIBETH_SQE_FRAG; /* record length, and DMA address */ @@ -2523,29 +2495,14 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, max_data); if (unlikely(++i == tx_q->desc_count)) { - tx_buf = tx_q->tx_buf; tx_desc = &tx_q->flex_tx[0]; i = 0; tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); } else { - tx_buf++; tx_desc++; } - /* Since this packet has a buffer that is going to span - * multiple descriptors, it's going to leave holes in - * to the TX buffer ring. To ensure these holes do not - * cause issues in the cleaning routines, we will clear - * them of any stale data and assign them the same - * completion tag as the current packet. Then when the - * packet is being cleaned, the cleaning routines will - * simply pass over these holes and finish cleaning the - * rest of the packet. - */ - tx_buf->type = LIBETH_SQE_EMPTY; - idpf_tx_buf_compl_tag(tx_buf) = params->compl_tag; - /* Adjust the DMA offset and the remaining size of the * fragment. On the first iteration of this loop, * max_data will be >= 12K and <= 16K-1. On any @@ -2570,15 +2527,26 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size); if (unlikely(++i == tx_q->desc_count)) { - tx_buf = tx_q->tx_buf; tx_desc = &tx_q->flex_tx[0]; i = 0; tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); } else { - tx_buf++; tx_desc++; } + if (idpf_queue_has(FLOW_SCH_EN, tx_q)) { + if (unlikely(!idpf_tx_get_free_buf_id(tx_q->refillq, + &next_buf_id))) { + idpf_tx_buf_next(tx_buf) = IDPF_TXBUF_NULL; + return idpf_tx_splitq_pkt_err_unmap(tx_q, params, + first); + } + } else { + next_buf_id = i; + } + idpf_tx_buf_next(tx_buf) = next_buf_id; + tx_buf = &tx_q->tx_buf[next_buf_id]; + size = skb_frag_size(frag); data_len -= size; @@ -2593,6 +2561,7 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, /* write last descriptor with RS and EOP bits */ first->rs_idx = i; + idpf_tx_buf_next(tx_buf) = IDPF_TXBUF_NULL; td_cmd |= params->eop_cmd; idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size); i = idpf_tx_splitq_bump_ntu(tx_q, i); @@ -2801,8 +2770,6 @@ idpf_tx_splitq_get_ctx_desc(struct idpf_tx_queue *txq) union idpf_flex_tx_ctx_desc *desc; int i = txq->next_to_use; - txq->tx_buf[i].type = LIBETH_SQE_CTX; - /* grab the next descriptor */ desc = &txq->flex_ctx[i]; txq->next_to_use = idpf_tx_splitq_bump_ntu(txq, i); @@ -2927,6 +2894,7 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, struct idpf_tx_buf *first; unsigned int count; int tso, idx; + u32 buf_id; count = idpf_tx_desc_count_required(tx_q, skb); if (unlikely(!count)) @@ -2970,26 +2938,28 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, idpf_tx_set_tstamp_desc(ctx_desc, idx); } - /* record the location of the first descriptor for this packet */ - first = &tx_q->tx_buf[tx_q->next_to_use]; - first->skb = skb; - - if (tso) { - first->packets = tx_params.offload.tso_segs; - first->bytes = skb->len + - ((first->packets - 1) * tx_params.offload.tso_hdr_len); - } else { - first->packets = 1; - first->bytes = max_t(unsigned int, skb->len, ETH_ZLEN); - } - if (idpf_queue_has(FLOW_SCH_EN, tx_q)) { + struct idpf_sw_queue *refillq = tx_q->refillq; + + /* Save refillq state in case of a packet rollback. Otherwise, + * the tags will be leaked since they will be popped from the + * refillq but never reposted during cleaning. + */ + tx_params.prev_refill_gen = + idpf_queue_has(RFL_GEN_CHK, refillq); + tx_params.prev_refill_ntc = refillq->next_to_clean; + if (unlikely(!idpf_tx_get_free_buf_id(tx_q->refillq, - &tx_params.compl_tag))) { - u64_stats_update_begin(&tx_q->stats_sync); - u64_stats_inc(&tx_q->q_stats.q_busy); - u64_stats_update_end(&tx_q->stats_sync); + &buf_id))) { + if (tx_params.prev_refill_gen != + idpf_queue_has(RFL_GEN_CHK, refillq)) + idpf_queue_change(RFL_GEN_CHK, refillq); + refillq->next_to_clean = tx_params.prev_refill_ntc; + + tx_q->next_to_use = tx_params.prev_ntu; + return idpf_tx_drop_skb(tx_q, skb); } + tx_params.compl_tag = buf_id; tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE; tx_params.eop_cmd = IDPF_TXD_FLEX_FLOW_CMD_EOP; @@ -3008,6 +2978,8 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, tx_params.offload.td_cmd |= IDPF_TXD_FLEX_FLOW_CMD_CS_EN; } else { + buf_id = tx_q->next_to_use; + tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2; tx_params.eop_cmd = IDPF_TXD_LAST_DESC_CMD; @@ -3015,6 +2987,18 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, tx_params.offload.td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN; } + first = &tx_q->tx_buf[buf_id]; + first->skb = skb; + + if (tso) { + first->packets = tx_params.offload.tso_segs; + first->bytes = skb->len + + ((first->packets - 1) * tx_params.offload.tso_hdr_len); + } else { + first->packets = 1; + first->bytes = max_t(unsigned int, skb->len, ETH_ZLEN); + } + idpf_tx_splitq_map(tx_q, &tx_params, first); return NETDEV_TX_OK; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index a7632d845a2a..d86246c320c8 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -137,6 +137,8 @@ do { \ ((++(txq)->compl_tag_cur_gen) >= (txq)->compl_tag_gen_max ? \ 0 : (txq)->compl_tag_cur_gen) +#define IDPF_TXBUF_NULL U32_MAX + #define IDPF_TXD_LAST_DESC_CMD (IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS) #define IDPF_TX_FLAGS_TSO BIT(0) @@ -197,6 +199,8 @@ struct idpf_tx_offload_params { * @td_tag: Descriptor tunneling tag * @offload: Offload parameters * @prev_ntu: stored TxQ next_to_use in case of rollback + * @prev_refill_ntc: stored refillq next_to_clean in case of packet rollback + * @prev_refill_gen: stored refillq generation bit in case of packet rollback */ struct idpf_tx_splitq_params { enum idpf_tx_desc_dtype_value dtype; @@ -209,6 +213,8 @@ struct idpf_tx_splitq_params { struct idpf_tx_offload_params offload; u16 prev_ntu; + u16 prev_refill_ntc; + bool prev_refill_gen; }; enum idpf_tx_ctx_desc_eipt_offload { @@ -638,6 +644,7 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64, * @size: Length of descriptor ring in bytes * @dma: Physical address of ring * @q_vector: Backreference to associated vector + * @buf_pool_size: Total number of idpf_tx_buf */ struct idpf_tx_queue { __cacheline_group_begin_aligned(read_mostly); @@ -696,11 +703,12 @@ struct idpf_tx_queue { dma_addr_t dma; struct idpf_q_vector *q_vector; + u32 buf_pool_size; __cacheline_group_end_aligned(cold); }; libeth_cacheline_set_assert(struct idpf_tx_queue, 64, 120 + sizeof(struct u64_stats_sync), - 24); + 32); /** * struct idpf_buf_queue - software structure representing a buffer queue From 0c3f135e840d4a2ba4253e15d530ec61bc30718e Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Fri, 25 Jul 2025 11:42:22 -0700 Subject: [PATCH 2287/2411] idpf: stop Tx if there are insufficient buffer resources The Tx refillq logic will cause packets to be silently dropped if there are not enough buffer resources available to send a packet in flow scheduling mode. Instead, determine how many buffers are needed along with number of descriptors. Make sure there are enough of both resources to send the packet, and stop the queue if not. Fixes: 7292af042bcf ("idpf: fix a race in txq wakeup") Signed-off-by: Joshua Hay Reviewed-by: Madhu Chittim Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- .../ethernet/intel/idpf/idpf_singleq_txrx.c | 4 +- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 47 +++++++++++++------ drivers/net/ethernet/intel/idpf/idpf_txrx.h | 15 +++++- 3 files changed, 47 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c index 57c0f5ab8f9e..b19b462e0bb6 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -415,11 +415,11 @@ netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, { struct idpf_tx_offload_params offload = { }; struct idpf_tx_buf *first; + u32 count, buf_count = 1; int csum, tso, needed; - unsigned int count; __be16 protocol; - count = idpf_tx_desc_count_required(tx_q, skb); + count = idpf_tx_res_count_required(tx_q, skb, &buf_count); if (unlikely(!count)) return idpf_tx_drop_skb(tx_q, skb); diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 5fe329a7c944..fa5432a0566a 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -2191,15 +2191,22 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc, desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag); } -/* Global conditions to tell whether the txq (and related resources) - * has room to allow the use of "size" descriptors. +/** + * idpf_tx_splitq_has_room - check if enough Tx splitq resources are available + * @tx_q: the queue to be checked + * @descs_needed: number of descriptors required for this packet + * @bufs_needed: number of Tx buffers required for this packet + * + * Return: 0 if no room available, 1 otherwise */ -static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 size) +static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 descs_needed, + u32 bufs_needed) { - if (IDPF_DESC_UNUSED(tx_q) < size || + if (IDPF_DESC_UNUSED(tx_q) < descs_needed || IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) > IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq) || - IDPF_TX_BUF_RSV_LOW(tx_q)) + IDPF_TX_BUF_RSV_LOW(tx_q) || + idpf_tx_splitq_get_free_bufs(tx_q->refillq) < bufs_needed) return 0; return 1; } @@ -2208,14 +2215,21 @@ static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 size) * idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions * @tx_q: the queue to be checked * @descs_needed: number of descriptors required for this packet + * @bufs_needed: number of buffers needed for this packet * - * Returns 0 if stop is not needed + * Return: 0 if stop is not needed */ static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q, - unsigned int descs_needed) + u32 descs_needed, + u32 bufs_needed) { + /* Since we have multiple resources to check for splitq, our + * start,stop_thrs becomes a boolean check instead of a count + * threshold. + */ if (netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, - idpf_txq_has_room(tx_q, descs_needed), + idpf_txq_has_room(tx_q, descs_needed, + bufs_needed), 1, 1)) return 0; @@ -2257,14 +2271,16 @@ void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, } /** - * idpf_tx_desc_count_required - calculate number of Tx descriptors needed + * idpf_tx_res_count_required - get number of Tx resources needed for this pkt * @txq: queue to send buffer on * @skb: send buffer + * @bufs_needed: (output) number of buffers needed for this skb. * - * Returns number of data descriptors needed for this skb. + * Return: number of data descriptors and buffers needed for this skb. */ -unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, - struct sk_buff *skb) +unsigned int idpf_tx_res_count_required(struct idpf_tx_queue *txq, + struct sk_buff *skb, + u32 *bufs_needed) { const struct skb_shared_info *shinfo; unsigned int count = 0, i; @@ -2275,6 +2291,7 @@ unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, return count; shinfo = skb_shinfo(skb); + *bufs_needed += shinfo->nr_frags; for (i = 0; i < shinfo->nr_frags; i++) { unsigned int size; @@ -2892,11 +2909,11 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, }; union idpf_flex_tx_ctx_desc *ctx_desc; struct idpf_tx_buf *first; - unsigned int count; + u32 count, buf_count = 1; int tso, idx; u32 buf_id; - count = idpf_tx_desc_count_required(tx_q, skb); + count = idpf_tx_res_count_required(tx_q, skb, &buf_count); if (unlikely(!count)) return idpf_tx_drop_skb(tx_q, skb); @@ -2906,7 +2923,7 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, /* Check for splitq specific TX resources */ count += (IDPF_TX_DESCS_PER_CACHE_LINE + tso); - if (idpf_tx_maybe_stop_splitq(tx_q, count)) { + if (idpf_tx_maybe_stop_splitq(tx_q, count, buf_count)) { idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); return NETDEV_TX_BUSY; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index d86246c320c8..9565e4dc3514 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -1026,6 +1026,17 @@ static inline void idpf_vport_intr_set_wb_on_itr(struct idpf_q_vector *q_vector) reg->dyn_ctl); } +/** + * idpf_tx_splitq_get_free_bufs - get number of free buf_ids in refillq + * @refillq: pointer to refillq containing buf_ids + */ +static inline u32 idpf_tx_splitq_get_free_bufs(struct idpf_sw_queue *refillq) +{ + return (refillq->next_to_use > refillq->next_to_clean ? + 0 : refillq->desc_count) + + refillq->next_to_use - refillq->next_to_clean - 1; +} + int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget); void idpf_vport_init_num_qs(struct idpf_vport *vport, struct virtchnl2_create_vport *vport_msg); @@ -1053,8 +1064,8 @@ void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, bool xmit_more); unsigned int idpf_size_to_txd_count(unsigned int size); netdev_tx_t idpf_tx_drop_skb(struct idpf_tx_queue *tx_q, struct sk_buff *skb); -unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, - struct sk_buff *skb); +unsigned int idpf_tx_res_count_required(struct idpf_tx_queue *txq, + struct sk_buff *skb, u32 *buf_count); void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue); netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, struct idpf_tx_queue *tx_q); From 6c4e68480238274f84aa50d54da0d9e262df6284 Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Fri, 25 Jul 2025 11:42:23 -0700 Subject: [PATCH 2288/2411] idpf: remove obsolete stashing code With the new Tx buffer management scheme, there is no need for all of the stashing mechanisms, the hash table, the reserve buffer stack, etc. Remove all of that. Signed-off-by: Joshua Hay Reviewed-by: Madhu Chittim Reviewed-by: Aleksandr Loktionov Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 314 ++------------------ drivers/net/ethernet/intel/idpf/idpf_txrx.h | 47 +-- 2 files changed, 22 insertions(+), 339 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index fa5432a0566a..eaad52a83b04 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -8,48 +8,12 @@ #include "idpf_ptp.h" #include "idpf_virtchnl.h" -struct idpf_tx_stash { - struct hlist_node hlist; - struct libeth_sqe buf; -}; - #define idpf_tx_buf_next(buf) (*(u32 *)&(buf)->priv) -#define idpf_tx_buf_compl_tag(buf) (*(u32 *)&(buf)->priv) LIBETH_SQE_CHECK_PRIV(u32); static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, unsigned int count); -/** - * idpf_buf_lifo_push - push a buffer pointer onto stack - * @stack: pointer to stack struct - * @buf: pointer to buf to push - * - * Returns 0 on success, negative on failure - **/ -static int idpf_buf_lifo_push(struct idpf_buf_lifo *stack, - struct idpf_tx_stash *buf) -{ - if (unlikely(stack->top == stack->size)) - return -ENOSPC; - - stack->bufs[stack->top++] = buf; - - return 0; -} - -/** - * idpf_buf_lifo_pop - pop a buffer pointer from stack - * @stack: pointer to stack struct - **/ -static struct idpf_tx_stash *idpf_buf_lifo_pop(struct idpf_buf_lifo *stack) -{ - if (unlikely(!stack->top)) - return NULL; - - return stack->bufs[--stack->top]; -} - /** * idpf_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure @@ -78,14 +42,11 @@ void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue) static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq) { struct libeth_sq_napi_stats ss = { }; - struct idpf_buf_lifo *buf_stack; - struct idpf_tx_stash *stash; struct libeth_cq_pp cp = { .dev = txq->dev, .ss = &ss, }; - struct hlist_node *tmp; - u32 i, tag; + u32 i; /* Buffers already cleared, nothing to do */ if (!txq->tx_buf) @@ -97,33 +58,6 @@ static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq) kfree(txq->tx_buf); txq->tx_buf = NULL; - - if (!idpf_queue_has(FLOW_SCH_EN, txq)) - return; - - buf_stack = &txq->stash->buf_stack; - if (!buf_stack->bufs) - return; - - /* - * If a Tx timeout occurred, there are potentially still bufs in the - * hash table, free them here. - */ - hash_for_each_safe(txq->stash->sched_buf_hash, tag, tmp, stash, - hlist) { - if (!stash) - continue; - - libeth_tx_complete(&stash->buf, &cp); - hash_del(&stash->hlist); - idpf_buf_lifo_push(buf_stack, stash); - } - - for (i = 0; i < buf_stack->size; i++) - kfree(buf_stack->bufs[i]); - - kfree(buf_stack->bufs); - buf_stack->bufs = NULL; } /** @@ -199,9 +133,6 @@ static void idpf_tx_desc_rel_all(struct idpf_vport *vport) */ static int idpf_tx_buf_alloc_all(struct idpf_tx_queue *tx_q) { - struct idpf_buf_lifo *buf_stack; - int i; - /* Allocate book keeping buffers only. Buffers to be supplied to HW * are allocated by kernel network stack and received as part of skb */ @@ -214,29 +145,6 @@ static int idpf_tx_buf_alloc_all(struct idpf_tx_queue *tx_q) if (!tx_q->tx_buf) return -ENOMEM; - if (!idpf_queue_has(FLOW_SCH_EN, tx_q)) - return 0; - - buf_stack = &tx_q->stash->buf_stack; - - /* Initialize tx buf stack for out-of-order completions if - * flow scheduling offload is enabled - */ - buf_stack->bufs = kcalloc(tx_q->desc_count, sizeof(*buf_stack->bufs), - GFP_KERNEL); - if (!buf_stack->bufs) - return -ENOMEM; - - buf_stack->size = tx_q->desc_count; - buf_stack->top = tx_q->desc_count; - - for (i = 0; i < tx_q->desc_count; i++) { - buf_stack->bufs[i] = kzalloc(sizeof(*buf_stack->bufs[i]), - GFP_KERNEL); - if (!buf_stack->bufs[i]) - return -ENOMEM; - } - return 0; } @@ -350,8 +258,6 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport) for (i = 0; i < vport->num_txq_grp; i++) { for (j = 0; j < vport->txq_grps[i].num_txq; j++) { struct idpf_tx_queue *txq = vport->txq_grps[i].txqs[j]; - u8 gen_bits = 0; - u16 bufidx_mask; err = idpf_tx_desc_alloc(vport, txq); if (err) { @@ -360,34 +266,6 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport) i); goto err_out; } - - if (!idpf_is_queue_model_split(vport->txq_model)) - continue; - - txq->compl_tag_cur_gen = 0; - - /* Determine the number of bits in the bufid - * mask and add one to get the start of the - * generation bits - */ - bufidx_mask = txq->desc_count - 1; - while (bufidx_mask >> 1) { - txq->compl_tag_gen_s++; - bufidx_mask = bufidx_mask >> 1; - } - txq->compl_tag_gen_s++; - - gen_bits = IDPF_TX_SPLITQ_COMPL_TAG_WIDTH - - txq->compl_tag_gen_s; - txq->compl_tag_gen_max = GETMAXVAL(gen_bits); - - /* Set bufid mask based on location of first - * gen bit; it cannot simply be the descriptor - * ring size-1 since we can have size values - * where not all of those bits are set. - */ - txq->compl_tag_bufid_m = - GETMAXVAL(txq->compl_tag_gen_s); } if (!idpf_is_queue_model_split(vport->txq_model)) @@ -1042,9 +920,6 @@ static void idpf_txq_group_rel(struct idpf_vport *vport) kfree(txq_grp->complq); txq_grp->complq = NULL; - - if (flow_sch_en) - kfree(txq_grp->stashes); } kfree(vport->txq_grps); vport->txq_grps = NULL; @@ -1405,7 +1280,6 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) for (i = 0; i < vport->num_txq_grp; i++) { struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; struct idpf_adapter *adapter = vport->adapter; - struct idpf_txq_stash *stashes; int j; tx_qgrp->vport = vport; @@ -1418,15 +1292,6 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) goto err_alloc; } - if (split && flow_sch_en) { - stashes = kcalloc(num_txq, sizeof(*stashes), - GFP_KERNEL); - if (!stashes) - goto err_alloc; - - tx_qgrp->stashes = stashes; - } - for (j = 0; j < tx_qgrp->num_txq; j++) { struct idpf_tx_queue *q = tx_qgrp->txqs[j]; @@ -1446,11 +1311,6 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) if (!flow_sch_en) continue; - if (split) { - q->stash = &stashes[j]; - hash_init(q->stash->sched_buf_hash); - } - idpf_queue_set(FLOW_SCH_EN, q); q->refillq = kzalloc(sizeof(*q->refillq), GFP_KERNEL); @@ -1742,87 +1602,6 @@ static void idpf_tx_read_tstamp(struct idpf_tx_queue *txq, struct sk_buff *skb) spin_unlock_bh(&tx_tstamp_caps->status_lock); } -/** - * idpf_tx_clean_stashed_bufs - clean bufs that were stored for - * out of order completions - * @txq: queue to clean - * @compl_tag: completion tag of packet to clean (from completion descriptor) - * @cleaned: pointer to stats struct to track cleaned packets/bytes - * @budget: Used to determine if we are in netpoll - */ -static void idpf_tx_clean_stashed_bufs(struct idpf_tx_queue *txq, - u16 compl_tag, - struct libeth_sq_napi_stats *cleaned, - int budget) -{ - struct idpf_tx_stash *stash; - struct hlist_node *tmp_buf; - struct libeth_cq_pp cp = { - .dev = txq->dev, - .ss = cleaned, - .napi = budget, - }; - - /* Buffer completion */ - hash_for_each_possible_safe(txq->stash->sched_buf_hash, stash, tmp_buf, - hlist, compl_tag) { - if (unlikely(idpf_tx_buf_compl_tag(&stash->buf) != compl_tag)) - continue; - - hash_del(&stash->hlist); - - if (stash->buf.type == LIBETH_SQE_SKB && - (skb_shinfo(stash->buf.skb)->tx_flags & SKBTX_IN_PROGRESS)) - idpf_tx_read_tstamp(txq, stash->buf.skb); - - libeth_tx_complete(&stash->buf, &cp); - - /* Push shadow buf back onto stack */ - idpf_buf_lifo_push(&txq->stash->buf_stack, stash); - } -} - -/** - * idpf_stash_flow_sch_buffers - store buffer parameters info to be freed at a - * later time (only relevant for flow scheduling mode) - * @txq: Tx queue to clean - * @tx_buf: buffer to store - */ -static int idpf_stash_flow_sch_buffers(struct idpf_tx_queue *txq, - struct idpf_tx_buf *tx_buf) -{ - struct idpf_tx_stash *stash; - - if (unlikely(tx_buf->type <= LIBETH_SQE_CTX)) - return 0; - - stash = idpf_buf_lifo_pop(&txq->stash->buf_stack); - if (unlikely(!stash)) { - net_err_ratelimited("%s: No out-of-order TX buffers left!\n", - netdev_name(txq->netdev)); - - return -ENOMEM; - } - - /* Store buffer params in shadow buffer */ - stash->buf.skb = tx_buf->skb; - stash->buf.bytes = tx_buf->bytes; - stash->buf.packets = tx_buf->packets; - stash->buf.type = tx_buf->type; - stash->buf.nr_frags = tx_buf->nr_frags; - dma_unmap_addr_set(&stash->buf, dma, dma_unmap_addr(tx_buf, dma)); - dma_unmap_len_set(&stash->buf, len, dma_unmap_len(tx_buf, len)); - idpf_tx_buf_compl_tag(&stash->buf) = idpf_tx_buf_compl_tag(tx_buf); - - /* Add buffer to buf_hash table to be freed later */ - hash_add(txq->stash->sched_buf_hash, &stash->hlist, - idpf_tx_buf_compl_tag(&stash->buf)); - - tx_buf->type = LIBETH_SQE_EMPTY; - - return 0; -} - #define idpf_tx_splitq_clean_bump_ntc(txq, ntc, desc, buf) \ do { \ if (unlikely(++(ntc) == (txq)->desc_count)) { \ @@ -1850,14 +1629,8 @@ do { \ * Separate packet completion events will be reported on the completion queue, * and the buffers will be cleaned separately. The stats are not updated from * this function when using flow-based scheduling. - * - * Furthermore, in flow scheduling mode, check to make sure there are enough - * reserve buffers to stash the packet. If there are not, return early, which - * will leave next_to_clean pointing to the packet that failed to be stashed. - * - * Return: false in the scenario above, true otherwise. */ -static bool idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, +static void idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, int napi_budget, struct libeth_sq_napi_stats *cleaned, bool descs_only) @@ -1871,12 +1644,11 @@ static bool idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, .napi = napi_budget, }; struct idpf_tx_buf *tx_buf; - bool clean_complete = true; if (descs_only) { /* Bump ring index to mark as cleaned. */ tx_q->next_to_clean = end; - return true; + return; } tx_desc = &tx_q->flex_tx[ntc]; @@ -1897,53 +1669,24 @@ static bool idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, break; eop_idx = tx_buf->rs_idx; + libeth_tx_complete(tx_buf, &cp); - if (descs_only) { - if (IDPF_TX_BUF_RSV_UNUSED(tx_q) < tx_buf->nr_frags) { - clean_complete = false; - goto tx_splitq_clean_out; - } + /* unmap remaining buffers */ + while (ntc != eop_idx) { + idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, + tx_desc, tx_buf); - idpf_stash_flow_sch_buffers(tx_q, tx_buf); - - while (ntc != eop_idx) { - idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, - tx_desc, tx_buf); - idpf_stash_flow_sch_buffers(tx_q, tx_buf); - } - } else { + /* unmap any remaining paged data */ libeth_tx_complete(tx_buf, &cp); - - /* unmap remaining buffers */ - while (ntc != eop_idx) { - idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, - tx_desc, tx_buf); - - /* unmap any remaining paged data */ - libeth_tx_complete(tx_buf, &cp); - } } fetch_next_txq_desc: idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, tx_desc, tx_buf); } -tx_splitq_clean_out: tx_q->next_to_clean = ntc; - - return clean_complete; } -#define idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, buf) \ -do { \ - (buf)++; \ - (ntc)++; \ - if (unlikely((ntc) == (txq)->desc_count)) { \ - buf = (txq)->tx_buf; \ - ntc = 0; \ - } \ -} while (0) - /** * idpf_tx_clean_bufs - clean flow scheduling TX queue buffers * @txq: queue to clean @@ -1954,7 +1697,7 @@ do { \ * Clean all buffers associated with the packet starting at buf_id. Returns the * byte/segment count for the cleaned packet. */ -static bool idpf_tx_clean_bufs(struct idpf_tx_queue *txq, u32 buf_id, +static void idpf_tx_clean_bufs(struct idpf_tx_queue *txq, u32 buf_id, struct libeth_sq_napi_stats *cleaned, int budget) { @@ -1981,8 +1724,6 @@ static bool idpf_tx_clean_bufs(struct idpf_tx_queue *txq, u32 buf_id, libeth_tx_complete(tx_buf, &cp); idpf_post_buf_refill(txq->refillq, buf_id); } - - return true; } /** @@ -2001,22 +1742,17 @@ static void idpf_tx_handle_rs_completion(struct idpf_tx_queue *txq, struct libeth_sq_napi_stats *cleaned, int budget) { - u16 compl_tag; + /* RS completion contains queue head for queue based scheduling or + * completion tag for flow based scheduling. + */ + u16 rs_compl_val = le16_to_cpu(desc->q_head_compl_tag.q_head); if (!idpf_queue_has(FLOW_SCH_EN, txq)) { - u16 head = le16_to_cpu(desc->q_head_compl_tag.q_head); - - idpf_tx_splitq_clean(txq, head, budget, cleaned, false); + idpf_tx_splitq_clean(txq, rs_compl_val, budget, cleaned, false); return; } - compl_tag = le16_to_cpu(desc->q_head_compl_tag.compl_tag); - - /* If we didn't clean anything on the ring, this packet must be - * in the hash table. Go clean it there. - */ - if (!idpf_tx_clean_bufs(txq, compl_tag, cleaned, budget)) - idpf_tx_clean_stashed_bufs(txq, compl_tag, cleaned, budget); + idpf_tx_clean_bufs(txq, rs_compl_val, cleaned, budget); } /** @@ -2133,8 +1869,7 @@ static bool idpf_tx_clean_complq(struct idpf_compl_queue *complq, int budget, /* Update BQL */ nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); - dont_wake = !complq_ok || IDPF_TX_BUF_RSV_LOW(tx_q) || - np->state != __IDPF_VPORT_UP || + dont_wake = !complq_ok || np->state != __IDPF_VPORT_UP || !netif_carrier_ok(tx_q->netdev); /* Check if the TXQ needs to and can be restarted */ __netif_txq_completed_wake(nq, tx_q->cleaned_pkts, tx_q->cleaned_bytes, @@ -2205,7 +1940,6 @@ static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 descs_needed, if (IDPF_DESC_UNUSED(tx_q) < descs_needed || IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) > IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq) || - IDPF_TX_BUF_RSV_LOW(tx_q) || idpf_tx_splitq_get_free_bufs(tx_q->refillq) < bufs_needed) return 0; return 1; @@ -2329,10 +2063,8 @@ static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_tx_queue *txq, u16 ntu) { ntu++; - if (ntu == txq->desc_count) { + if (ntu == txq->desc_count) ntu = 0; - txq->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(txq); - } return ntu; } @@ -2514,8 +2246,6 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, if (unlikely(++i == tx_q->desc_count)) { tx_desc = &tx_q->flex_tx[0]; i = 0; - tx_q->compl_tag_cur_gen = - IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); } else { tx_desc++; } @@ -2546,7 +2276,6 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, if (unlikely(++i == tx_q->desc_count)) { tx_desc = &tx_q->flex_tx[0]; i = 0; - tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); } else { tx_desc++; } @@ -2980,10 +2709,9 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE; tx_params.eop_cmd = IDPF_TXD_FLEX_FLOW_CMD_EOP; - /* Set the RE bit to catch any packets that may have not been - * stashed during RS completion cleaning. MIN_GAP is set to - * MIN_RING size to ensure it will be set at least once each - * time around the ring. + /* Set the RE bit to periodically "clean" the descriptor ring. + * MIN_GAP is set to MIN_RING size to ensure it will be set at + * least once each time around the ring. */ if (idpf_tx_splitq_need_re(tx_q)) { tx_params.eop_cmd |= IDPF_TXD_FLEX_FLOW_CMD_RE; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 9565e4dc3514..52753dff381c 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -118,10 +118,6 @@ do { \ ((((txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \ (txq)->next_to_clean - (txq)->next_to_use - 1) -#define IDPF_TX_BUF_RSV_UNUSED(txq) ((txq)->stash->buf_stack.top) -#define IDPF_TX_BUF_RSV_LOW(txq) (IDPF_TX_BUF_RSV_UNUSED(txq) < \ - (txq)->desc_count >> 2) - #define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq) ((txcq)->desc_count >> 1) /* Determine the absolute number of completions pending, i.e. the number of * completions that are expected to arrive on the TX completion queue. @@ -131,12 +127,6 @@ do { \ 0 : U32_MAX) + \ (txq)->num_completions_pending - (txq)->complq->num_completions) -#define IDPF_TX_SPLITQ_COMPL_TAG_WIDTH 16 -/* Adjust the generation for the completion tag and wrap if necessary */ -#define IDPF_TX_ADJ_COMPL_TAG_GEN(txq) \ - ((++(txq)->compl_tag_cur_gen) >= (txq)->compl_tag_gen_max ? \ - 0 : (txq)->compl_tag_cur_gen) - #define IDPF_TXBUF_NULL U32_MAX #define IDPF_TXD_LAST_DESC_CMD (IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS) @@ -154,18 +144,6 @@ union idpf_tx_flex_desc { #define idpf_tx_buf libeth_sqe -/** - * struct idpf_buf_lifo - LIFO for managing OOO completions - * @top: Used to know how many buffers are left - * @size: Total size of LIFO - * @bufs: Backing array - */ -struct idpf_buf_lifo { - u16 top; - u16 size; - struct idpf_tx_stash **bufs; -}; - /** * struct idpf_tx_offload_params - Offload parameters for a given packet * @tx_flags: Feature flags enabled for this packet @@ -476,17 +454,6 @@ struct idpf_tx_queue_stats { #define IDPF_ITR_IDX_SPACING(spacing, dflt) (spacing ? spacing : dflt) #define IDPF_DIM_DEFAULT_PROFILE_IX 1 -/** - * struct idpf_txq_stash - Tx buffer stash for Flow-based scheduling mode - * @buf_stack: Stack of empty buffers to store buffer info for out of order - * buffer completions. See struct idpf_buf_lifo - * @sched_buf_hash: Hash table to store buffers - */ -struct idpf_txq_stash { - struct idpf_buf_lifo buf_stack; - DECLARE_HASHTABLE(sched_buf_hash, 12); -} ____cacheline_aligned; - /** * struct idpf_rx_queue - software structure representing a receive queue * @rx: universal receive descriptor array @@ -631,11 +598,7 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64, * only once at the end of the cleaning routine. * @clean_budget: singleq only, queue cleaning budget * @cleaned_pkts: Number of packets cleaned for the above said case - * @stash: Tx buffer stash for Flow-based scheduling mode * @refillq: Pointer to refill queue - * @compl_tag_bufid_m: Completion tag buffer id mask - * @compl_tag_cur_gen: Used to keep track of current completion tag generation - * @compl_tag_gen_max: To determine when compl_tag_cur_gen should be reset * @cached_tstamp_caps: Tx timestamp capabilities negotiated with the CP * @tstamp_task: Work that handles Tx timestamp read * @stats_sync: See struct u64_stats_sync @@ -666,7 +629,6 @@ struct idpf_tx_queue { u16 desc_count; u16 tx_min_pkt_len; - u16 compl_tag_gen_s; struct net_device *netdev; __cacheline_group_end_aligned(read_mostly); @@ -683,13 +645,8 @@ struct idpf_tx_queue { }; u16 cleaned_pkts; - struct idpf_txq_stash *stash; struct idpf_sw_queue *refillq; - u16 compl_tag_bufid_m; - u16 compl_tag_cur_gen; - u16 compl_tag_gen_max; - struct idpf_ptp_vport_tx_tstamp_caps *cached_tstamp_caps; struct work_struct *tstamp_task; @@ -707,7 +664,7 @@ struct idpf_tx_queue { __cacheline_group_end_aligned(cold); }; libeth_cacheline_set_assert(struct idpf_tx_queue, 64, - 120 + sizeof(struct u64_stats_sync), + 104 + sizeof(struct u64_stats_sync), 32); /** @@ -918,7 +875,6 @@ struct idpf_rxq_group { * @vport: Vport back pointer * @num_txq: Number of TX queues associated * @txqs: Array of TX queue pointers - * @stashes: array of OOO stashes for the queues * @complq: Associated completion queue pointer, split queue only * @num_completions_pending: Total number of completions pending for the * completion queue, acculumated for all TX queues @@ -933,7 +889,6 @@ struct idpf_txq_group { u16 num_txq; struct idpf_tx_queue *txqs[IDPF_LARGE_MAX_Q]; - struct idpf_txq_stash *stashes; struct idpf_compl_queue *complq; From 91a79b792204313153e1bdbbe5acbfc28903b3a5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Aug 2025 14:37:07 +0200 Subject: [PATCH 2289/2411] netfilter: nf_reject: don't leak dst refcount for loopback packets recent patches to add a WARN() when replacing skb dst entry found an old bug: WARNING: include/linux/skbuff.h:1165 skb_dst_check_unset include/linux/skbuff.h:1164 [inline] WARNING: include/linux/skbuff.h:1165 skb_dst_set include/linux/skbuff.h:1210 [inline] WARNING: include/linux/skbuff.h:1165 nf_reject_fill_skb_dst+0x2a4/0x330 net/ipv4/netfilter/nf_reject_ipv4.c:234 [..] Call Trace: nf_send_unreach+0x17b/0x6e0 net/ipv4/netfilter/nf_reject_ipv4.c:325 nft_reject_inet_eval+0x4bc/0x690 net/netfilter/nft_reject_inet.c:27 expr_call_ops_eval net/netfilter/nf_tables_core.c:237 [inline] .. This is because blamed commit forgot about loopback packets. Such packets already have a dst_entry attached, even at PRE_ROUTING stage. Instead of checking hook just check if the skb already has a route attached to it. Fixes: f53b9b0bdc59 ("netfilter: introduce support for reject at prerouting stage") Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20250820123707.10671-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- net/ipv4/netfilter/nf_reject_ipv4.c | 6 ++---- net/ipv6/netfilter/nf_reject_ipv6.c | 5 ++--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 87fd945a0d27..0d3cb2ba6fc8 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -247,8 +247,7 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb, if (!oth) return; - if ((hook == NF_INET_PRE_ROUTING || hook == NF_INET_INGRESS) && - nf_reject_fill_skb_dst(oldskb) < 0) + if (!skb_dst(oldskb) && nf_reject_fill_skb_dst(oldskb) < 0) return; if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) @@ -321,8 +320,7 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) if (iph->frag_off & htons(IP_OFFSET)) return; - if ((hook == NF_INET_PRE_ROUTING || hook == NF_INET_INGRESS) && - nf_reject_fill_skb_dst(skb_in) < 0) + if (!skb_dst(skb_in) && nf_reject_fill_skb_dst(skb_in) < 0) return; if (skb_csum_unnecessary(skb_in) || diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 838295fa32e3..cb2d38e80de9 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -293,7 +293,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, fl6.fl6_sport = otcph->dest; fl6.fl6_dport = otcph->source; - if (hook == NF_INET_PRE_ROUTING || hook == NF_INET_INGRESS) { + if (!skb_dst(oldskb)) { nf_ip6_route(net, &dst, flowi6_to_flowi(&fl6), false); if (!dst) return; @@ -397,8 +397,7 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) skb_in->dev = net->loopback_dev; - if ((hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_INGRESS) && - nf_reject6_fill_skb_dst(skb_in) < 0) + if (!skb_dst(skb_in) && nf_reject6_fill_skb_dst(skb_in) < 0) return; icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); From aea70964b5a7ca491a3701f2dde6c9d05d51878d Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 20 Aug 2025 14:28:04 -0500 Subject: [PATCH 2290/2411] of: reserved_mem: Add missing IORESOURCE_MEM flag on resources Commit f4fcfdda2fd8 ('of: reserved_mem: Add functions to parse "memory-region"') failed to set IORESOURCE_MEM flag on the resources. The result is functions such as devm_ioremap_resource_wc() will fail. Add the missing flag. Fixes: f4fcfdda2fd8 ('of: reserved_mem: Add functions to parse "memory-region"') Reported-by: Iuliana Prodan Reported-by: Daniel Baluta Tested-by: Iuliana Prodan Reviewed-by: Iuliana Prodan Reviewed-by: Saravana Kannan Link: https://lore.kernel.org/r/20250820192805.565568-1-robh@kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/of/of_reserved_mem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 77016c0cc296..d3b7c4ae429c 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -771,6 +771,7 @@ int of_reserved_mem_region_to_resource(const struct device_node *np, return -EINVAL; resource_set_range(res, rmem->base, rmem->size); + res->flags = IORESOURCE_MEM; res->name = rmem->name; return 0; } From 8a30114073639fd97f2c7390abbc34fb8711327a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Pi=C3=B3rkowski?= Date: Mon, 11 Aug 2025 12:43:58 +0200 Subject: [PATCH 2291/2411] drm/xe: Move ASID allocation and user PT BO tracking into xe_vm_create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, ASID assignment for user VMs and page-table BO accounting for client memory tracking are performed in xe_vm_create_ioctl. To consolidate VM object initialization, move this logic to xe_vm_create. v2: - removed unnecessary duplicate BO tracking code - using the local variable xef to verify whether the VM is being created by userspace Fixes: 658a1c8e0a66 ("drm/xe: Assign ioctl xe file handler to vm in xe_vm_create") Suggested-by: Matthew Auld Signed-off-by: Piotr Piórkowski Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20250811104358.2064150-3-piotr.piorkowski@intel.com Signed-off-by: Michał Winiarski (cherry picked from commit 30e0c3f43a414616e0b6ca76cf7f7b2cd387e1d4) Signed-off-by: Rodrigo Vivi [Rodrigo: Added fixes tag] --- drivers/gpu/drm/xe/xe_vm.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 5bff317e335a..1bb73bb63406 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1795,6 +1795,20 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) if (number_tiles > 1) vm->composite_fence_ctx = dma_fence_context_alloc(1); + if (xef && xe->info.has_asid) { + u32 asid; + + down_write(&xe->usm.lock); + err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, + XA_LIMIT(1, XE_MAX_ASID - 1), + &xe->usm.next_asid, GFP_KERNEL); + up_write(&xe->usm.lock); + if (err < 0) + goto err_unlock_close; + + vm->usm.asid = asid; + } + trace_xe_vm_create(vm); return vm; @@ -2062,9 +2076,8 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); struct drm_xe_vm_create *args = data; - struct xe_tile *tile; struct xe_vm *vm; - u32 id, asid; + u32 id; int err; u32 flags = 0; @@ -2104,23 +2117,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(vm)) return PTR_ERR(vm); - if (xe->info.has_asid) { - down_write(&xe->usm.lock); - err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, - XA_LIMIT(1, XE_MAX_ASID - 1), - &xe->usm.next_asid, GFP_KERNEL); - up_write(&xe->usm.lock); - if (err < 0) - goto err_close_and_put; - - vm->usm.asid = asid; - } - - /* Record BO memory for VM pagetable created against client */ - for_each_tile(tile, xe, id) - if (vm->pt_root[id]) - xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); - #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) /* Warning: Security issue - never enable by default */ args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); From 111fb43a557726079a67ce3ab51f602ddbf7097e Mon Sep 17 00:00:00 2001 From: Christoph Manszewski Date: Wed, 13 Aug 2025 12:12:30 +0200 Subject: [PATCH 2292/2411] drm/xe: Fix vm_bind_ioctl double free bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the argument check during an array bind fails, the bind_ops are freed twice as seen below. Fix this by setting bind_ops to NULL after freeing. ================================================================== BUG: KASAN: double-free in xe_vm_bind_ioctl+0x1b2/0x21f0 [xe] Free of addr ffff88813bb9b800 by task xe_vm/14198 CPU: 5 UID: 0 PID: 14198 Comm: xe_vm Not tainted 6.16.0-xe-eudebug-cmanszew+ #520 PREEMPT(full) Hardware name: Intel Corporation Alder Lake Client Platform/AlderLake-P DDR5 RVP, BIOS ADLPFWI1.R00.2411.A02.2110081023 10/08/2021 Call Trace: dump_stack_lvl+0x82/0xd0 print_report+0xcb/0x610 ? __virt_addr_valid+0x19a/0x300 ? xe_vm_bind_ioctl+0x1b2/0x21f0 [xe] kasan_report_invalid_free+0xc8/0xf0 ? xe_vm_bind_ioctl+0x1b2/0x21f0 [xe] ? xe_vm_bind_ioctl+0x1b2/0x21f0 [xe] check_slab_allocation+0x102/0x130 kfree+0x10d/0x440 ? should_fail_ex+0x57/0x2f0 ? xe_vm_bind_ioctl+0x1b2/0x21f0 [xe] xe_vm_bind_ioctl+0x1b2/0x21f0 [xe] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] ? __lock_acquire+0xab9/0x27f0 ? lock_acquire+0x165/0x300 ? drm_dev_enter+0x53/0xe0 [drm] ? find_held_lock+0x2b/0x80 ? drm_dev_exit+0x30/0x50 [drm] ? drm_ioctl_kernel+0x128/0x1c0 [drm] drm_ioctl_kernel+0x128/0x1c0 [drm] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] ? find_held_lock+0x2b/0x80 ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm] ? should_fail_ex+0x57/0x2f0 ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] drm_ioctl+0x352/0x620 [drm] ? __pfx_drm_ioctl+0x10/0x10 [drm] ? __pfx_rpm_resume+0x10/0x10 ? do_raw_spin_lock+0x11a/0x1b0 ? find_held_lock+0x2b/0x80 ? __pm_runtime_resume+0x61/0xc0 ? rcu_is_watching+0x20/0x50 ? trace_irq_enable.constprop.0+0xac/0xe0 xe_drm_ioctl+0x91/0xc0 [xe] __x64_sys_ioctl+0xb2/0x100 ? rcu_is_watching+0x20/0x50 do_syscall_64+0x68/0x2e0 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7fa9acb24ded Fixes: b43e864af0d4 ("drm/xe/uapi: Add DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR") Cc: Matthew Brost Cc: Himal Prasad Ghimiray Cc: Thomas Hellström Signed-off-by: Christoph Manszewski Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250813101231.196632-2-christoph.manszewski@intel.com (cherry picked from commit a01b704527c28a2fd43a17a85f8996b75ec8492a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 1bb73bb63406..ec04bef8ae40 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3418,6 +3418,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, free_bind_ops: if (args->num_binds > 1) kvfree(*bind_ops); + *bind_ops = NULL; return err; } @@ -3524,7 +3525,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct xe_exec_queue *q = NULL; u32 num_syncs, num_ufence = 0; struct xe_sync_entry *syncs = NULL; - struct drm_xe_vm_bind_op *bind_ops; + struct drm_xe_vm_bind_op *bind_ops = NULL; struct xe_vma_ops vops; struct dma_fence *fence; int err; From ac29e4487aa20a21b7c3facbd1f14f5093835dc9 Mon Sep 17 00:00:00 2001 From: Pritesh Patel Date: Mon, 16 Jun 2025 16:53:12 +0530 Subject: [PATCH 2293/2411] dt-bindings: vendor-prefixes: add eswin Add new vendor string to dt bindings. This new vendor string is used by - ESWIN EIC770X SoC - HiFive Premier P550 board which uses EIC7700 SoC. Link: https://www.eswin.com/en/ Signed-off-by: Pritesh Patel Reviewed-by: Samuel Holland Signed-off-by: Pinkesh Vaghela Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250616112316.3833343-4-pinkesh.vaghela@einfochips.com Signed-off-by: Rob Herring (Arm) --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 741b545e3ab0..982ef10eed76 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -499,6 +499,8 @@ patternProperties: description: Espressif Systems Co. Ltd. "^est,.*": description: ESTeem Wireless Modems + "^eswin,.*": + description: Beijing ESWIN Technology Group Co. Ltd. "^ettus,.*": description: NI Ettus Research "^eukrea,.*": From 508c1314b342b78591f51c4b5dadee31a88335df Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 21 Aug 2025 13:23:21 -0600 Subject: [PATCH 2294/2411] io_uring/futex: ensure io_futex_wait() cleans up properly on failure The io_futex_data is allocated upfront and assigned to the io_kiocb async_data field, but the request isn't marked with REQ_F_ASYNC_DATA at that point. Those two should always go together, as the flag tells io_uring whether the field is valid or not. Additionally, on failure cleanup, the futex handler frees the data but does not clear ->async_data. Clear the data and the flag in the error path as well. Thanks to Trend Micro Zero Day Initiative and particularly ReDress for reporting this. Cc: stable@vger.kernel.org Fixes: 194bb58c6090 ("io_uring: add support for futex wake and wait") Signed-off-by: Jens Axboe --- io_uring/futex.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/io_uring/futex.c b/io_uring/futex.c index 692462d50c8c..9113a44984f3 100644 --- a/io_uring/futex.c +++ b/io_uring/futex.c @@ -288,6 +288,7 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) goto done_unlock; } + req->flags |= REQ_F_ASYNC_DATA; req->async_data = ifd; ifd->q = futex_q_init; ifd->q.bitset = iof->futex_mask; @@ -309,6 +310,8 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); + req->async_data = NULL; + req->flags &= ~REQ_F_ASYNC_DATA; kfree(ifd); return IOU_COMPLETE; } From e4e6aaea46b7be818eba0510ba68d30df8689ea3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 21 Aug 2025 13:24:57 -0600 Subject: [PATCH 2295/2411] io_uring: clear ->async_data as part of normal init Opcode handlers like POLL_ADD will use ->async_data as the pointer for double poll handling, which is a bit different than the usual case where it's strictly gated by the REQ_F_ASYNC_DATA flag. Be a bit more proactive in handling ->async_data, and clear it to NULL as part of regular init. Init is touching that cacheline anyway, so might as well clear it. Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 4ef69dd58734..93633613a165 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2119,6 +2119,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, req->file = NULL; req->tctx = current->io_uring; req->cancel_seq_set = false; + req->async_data = NULL; if (unlikely(opcode >= IORING_OP_LAST)) { req->opcode = 0; From b08a784a5d1495c42ff9b0c70887d49211cddfe0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 18 Aug 2025 19:03:54 +0100 Subject: [PATCH 2296/2411] net: Introduce skb_copy_datagram_from_iter_full() In a similar manner to copy_from_iter()/copy_from_iter_full(), introduce skb_copy_datagram_from_iter_full() which reverts the iterator to its initial state when returning an error. A subsequent fix for a vsock regression will make use of this new function. Cc: Christian Brauner Cc: Alexander Viro Signed-off-by: Will Deacon Acked-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Link: https://patch.msgid.link/20250818180355.29275-2-will@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 ++ net/core/datagram.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 14b923ddb6df..fa633657e4c0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4172,6 +4172,8 @@ int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset, struct iov_iter *to, int len, u32 *crcp); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); +int skb_copy_datagram_from_iter_full(struct sk_buff *skb, int offset, + struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); void skb_free_datagram(struct sock *sk, struct sk_buff *skb); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); diff --git a/net/core/datagram.c b/net/core/datagram.c index 94cc4705e91d..f474b9b120f9 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -618,6 +618,20 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, } EXPORT_SYMBOL(skb_copy_datagram_from_iter); +int skb_copy_datagram_from_iter_full(struct sk_buff *skb, int offset, + struct iov_iter *from, int len) +{ + struct iov_iter_state state; + int ret; + + iov_iter_save_state(from, &state); + ret = skb_copy_datagram_from_iter(skb, offset, from, len); + if (ret) + iov_iter_restore(from, &state); + return ret; +} +EXPORT_SYMBOL(skb_copy_datagram_from_iter_full); + int zerocopy_fill_skb_from_iter(struct sk_buff *skb, struct iov_iter *from, size_t length) { From 7fb1291257ea1e27dbc3f34c6a37b4d640aafdd7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 18 Aug 2025 19:03:55 +0100 Subject: [PATCH 2297/2411] vsock/virtio: Fix message iterator handling on transmit path Commit 6693731487a8 ("vsock/virtio: Allocate nonlinear SKBs for handling large transmit buffers") converted the virtio vsock transmit path to utilise nonlinear SKBs when handling large buffers. As part of this change, virtio_transport_fill_skb() was updated to call skb_copy_datagram_from_iter() instead of memcpy_from_msg() as the latter expects a single destination buffer and cannot handle nonlinear SKBs correctly. Unfortunately, during this conversion, I overlooked the error case when the copying function returns -EFAULT due to a fault on the input buffer in userspace. In this case, memcpy_from_msg() reverts the iterator to its initial state thanks to copy_from_iter_full() whereas skb_copy_datagram_from_iter() leaves the iterator partially advanced. This results in a WARN_ONCE() from the vsock code, which expects the iterator to stay in sync with the number of bytes transmitted so that virtio_transport_send_pkt_info() can return -EFAULT when it is called again: ------------[ cut here ]------------ 'send_pkt()' returns 0, but 65536 expected WARNING: CPU: 0 PID: 5503 at net/vmw_vsock/virtio_transport_common.c:428 virtio_transport_send_pkt_info+0xd11/0xf00 net/vmw_vsock/virtio_transport_common.c:426 Modules linked in: CPU: 0 UID: 0 PID: 5503 Comm: syz.0.17 Not tainted 6.16.0-syzkaller-12063-g37816488247d #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Call virtio_transport_fill_skb_full() to restore the previous iterator behaviour. Cc: Jason Wang Cc: Stefano Garzarella Fixes: 6693731487a8 ("vsock/virtio: Allocate nonlinear SKBs for handling large transmit buffers") Reported-by: syzbot+b4d960daf7a3c7c2b7b1@syzkaller.appspotmail.com Signed-off-by: Will Deacon Acked-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Link: https://patch.msgid.link/20250818180355.29275-3-will@kernel.org Signed-off-by: Jakub Kicinski --- net/vmw_vsock/virtio_transport_common.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index fe92e5fa95b4..dcc8a1d5851e 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -105,12 +105,14 @@ static int virtio_transport_fill_skb(struct sk_buff *skb, size_t len, bool zcopy) { + struct msghdr *msg = info->msg; + if (zcopy) - return __zerocopy_sg_from_iter(info->msg, NULL, skb, - &info->msg->msg_iter, len, NULL); + return __zerocopy_sg_from_iter(msg, NULL, skb, + &msg->msg_iter, len, NULL); virtio_vsock_skb_put(skb, len); - return skb_copy_datagram_from_iter(skb, 0, &info->msg->msg_iter, len); + return skb_copy_datagram_from_iter_full(skb, 0, &msg->msg_iter, len); } static void virtio_transport_init_hdr(struct sk_buff *skb, From 01b9128c5db1b470575d07b05b67ffa3cb02ebf1 Mon Sep 17 00:00:00 2001 From: luoguangfei <15388634752@163.com> Date: Tue, 19 Aug 2025 07:25:27 +0800 Subject: [PATCH 2298/2411] net: macb: fix unregister_netdev call order in macb_remove() When removing a macb device, the driver calls phy_exit() before unregister_netdev(). This leads to a WARN from kernfs: ------------[ cut here ]------------ kernfs: can not remove 'attached_dev', no directory WARNING: CPU: 1 PID: 27146 at fs/kernfs/dir.c:1683 Call trace: kernfs_remove_by_name_ns+0xd8/0xf0 sysfs_remove_link+0x24/0x58 phy_detach+0x5c/0x168 phy_disconnect+0x4c/0x70 phylink_disconnect_phy+0x6c/0xc0 [phylink] macb_close+0x6c/0x170 [macb] ... macb_remove+0x60/0x168 [macb] platform_remove+0x5c/0x80 ... The warning happens because the PHY is being exited while the netdev is still registered. The correct order is to unregister the netdev before shutting down the PHY and cleaning up the MDIO bus. Fix this by moving unregister_netdev() ahead of phy_exit() in macb_remove(). Fixes: 8b73fa3ae02b ("net: macb: Added ZynqMP-specific initialization") Signed-off-by: luoguangfei <15388634752@163.com> Link: https://patch.msgid.link/20250818232527.1316-1-15388634752@163.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 9693f0289435..b29c3beae0b2 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5399,11 +5399,11 @@ static void macb_remove(struct platform_device *pdev) if (dev) { bp = netdev_priv(dev); + unregister_netdev(dev); phy_exit(bp->sgmii_phy); mdiobus_unregister(bp->mii_bus); mdiobus_free(bp->mii_bus); - unregister_netdev(dev); cancel_work_sync(&bp->hresp_err_bh_work); pm_runtime_disable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); From a12946bef0407cf2db0899c83d42c47c00af3fbc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 14 Aug 2025 19:27:21 -0700 Subject: [PATCH 2299/2411] pinctrl: STMFX: add missing HAS_IOMEM dependency When building on ARCH=um (which does not set HAS_IOMEM), kconfig reports an unmet dependency caused by PINCTRL_STMFX. It selects MFD_STMFX, which depends on HAS_IOMEM. To stop this warning, PINCTRL_STMFX should also depend on HAS_IOMEM. kconfig warning: WARNING: unmet direct dependencies detected for MFD_STMFX Depends on [n]: HAS_IOMEM [=n] && I2C [=y] && OF [=y] Selected by [y]: - PINCTRL_STMFX [=y] && PINCTRL [=y] && I2C [=y] && OF_GPIO [=y] Fixes: 1490d9f841b1 ("pinctrl: Add STMFX GPIO expander Pinctrl/GPIO driver") Signed-off-by: Randy Dunlap Link: https://lore.kernel.org/20250815022721.1650885-1-rdunlap@infradead.org Signed-off-by: Linus Walleij --- drivers/pinctrl/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig index ddd11668457c..be1ca8e85754 100644 --- a/drivers/pinctrl/Kconfig +++ b/drivers/pinctrl/Kconfig @@ -539,6 +539,7 @@ config PINCTRL_STMFX tristate "STMicroelectronics STMFX GPIO expander pinctrl driver" depends on I2C depends on OF_GPIO + depends on HAS_IOMEM select GENERIC_PINCONF select GPIOLIB_IRQCHIP select MFD_STMFX From 685ca577b408ffd9c5a4057a2acc0cd3e6978b36 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 31 Jul 2025 20:01:27 -0700 Subject: [PATCH 2300/2411] iommu/arm-smmu-v3: Fix smmu_domain->nr_ats_masters decrement The arm_smmu_attach_commit() updates master->ats_enabled before calling arm_smmu_remove_master_domain() that is supposed to clean up everything in the old domain, including the old domain's nr_ats_masters. So, it is supposed to use the old ats_enabled state of the device, not an updated state. This isn't a problem if switching between two domains where: - old ats_enabled = false; new ats_enabled = false - old ats_enabled = true; new ats_enabled = true but can fail cases where: - old ats_enabled = false; new ats_enabled = true (old domain should keep the counter but incorrectly decreased it) - old ats_enabled = true; new ats_enabled = false (old domain needed to decrease the counter but incorrectly missed it) Update master->ats_enabled after arm_smmu_remove_master_domain() to fix this. Fixes: 7497f4211f4f ("iommu/arm-smmu-v3: Make changing domains be hitless for ATS") Cc: stable@vger.kernel.org Signed-off-by: Nicolin Chen Acked-by: Will Deacon Reviewed-by: Jason Gunthorpe Reviewed-by: Pranjal Shrivastava Link: https://lore.kernel.org/r/20250801030127.2006979-1-nicolinc@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 5968043ac802..2a8b46b948f0 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2997,9 +2997,9 @@ void arm_smmu_attach_commit(struct arm_smmu_attach_state *state) /* ATS is being switched off, invalidate the entire ATC */ arm_smmu_atc_inv_master(master, IOMMU_NO_PASID); } - master->ats_enabled = state->ats_enabled; arm_smmu_remove_master_domain(master, state->old_domain, state->ssid); + master->ats_enabled = state->ats_enabled; } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) From 72b6f7cd89cea8251979b65528d302f9c0ed37bf Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 14 Aug 2025 17:47:16 +0100 Subject: [PATCH 2301/2411] iommu/virtio: Make instance lookup robust Much like arm-smmu in commit 7d835134d4e1 ("iommu/arm-smmu: Make instance lookup robust"), virtio-iommu appears to have the same issue where iommu_device_register() makes the IOMMU instance visible to other API callers (including itself) straight away, but internally the instance isn't ready to recognise itself for viommu_probe_device() to work correctly until after viommu_probe() has returned. This matters a lot more now that bus_iommu_probe() has the DT/VIOT knowledge to probe client devices the way that was always intended. Tweak the lookup and initialisation in much the same way as for arm-smmu, to ensure that what we register is functional and ready to go. Cc: stable@vger.kernel.org Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path") Signed-off-by: Robin Murphy Tested-by: Eric Auger Link: https://lore.kernel.org/r/308911aaa1f5be32a3a709996c7bd6cf71d30f33.1755190036.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/virtio-iommu.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 532db1de201b..b39d6f134ab2 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -998,8 +998,7 @@ static void viommu_get_resv_regions(struct device *dev, struct list_head *head) iommu_dma_get_resv_regions(dev, head); } -static const struct iommu_ops viommu_ops; -static struct virtio_driver virtio_iommu_drv; +static const struct bus_type *virtio_bus_type; static int viommu_match_node(struct device *dev, const void *data) { @@ -1008,8 +1007,9 @@ static int viommu_match_node(struct device *dev, const void *data) static struct viommu_dev *viommu_get_by_fwnode(struct fwnode_handle *fwnode) { - struct device *dev = driver_find_device(&virtio_iommu_drv.driver, NULL, - fwnode, viommu_match_node); + struct device *dev = bus_find_device(virtio_bus_type, NULL, fwnode, + viommu_match_node); + put_device(dev); return dev ? dev_to_virtio(dev)->priv : NULL; @@ -1160,6 +1160,9 @@ static int viommu_probe(struct virtio_device *vdev) if (!viommu) return -ENOMEM; + /* Borrow this for easy lookups later */ + virtio_bus_type = dev->bus; + spin_lock_init(&viommu->request_lock); ida_init(&viommu->domain_ids); viommu->dev = dev; @@ -1229,10 +1232,10 @@ static int viommu_probe(struct virtio_device *vdev) if (ret) goto err_free_vqs; - iommu_device_register(&viommu->iommu, &viommu_ops, parent_dev); - vdev->priv = viommu; + iommu_device_register(&viommu->iommu, &viommu_ops, parent_dev); + dev_info(dev, "input address: %u bits\n", order_base_2(viommu->geometry.aperture_end)); dev_info(dev, "page mask: %#llx\n", viommu->pgsize_bitmap); From 99d4d1a070870aa08163af8ce0522992b7f35d8c Mon Sep 17 00:00:00 2001 From: XianLiang Huang Date: Wed, 20 Aug 2025 15:22:48 +0800 Subject: [PATCH 2302/2411] iommu/riscv: prevent NULL deref in iova_to_phys The riscv_iommu_pte_fetch() function returns either NULL for unmapped/never-mapped iova, or a valid leaf pte pointer that requires no further validation. riscv_iommu_iova_to_phys() failed to handle NULL returns. Prevent null pointer dereference in riscv_iommu_iova_to_phys(), and remove the pte validation. Fixes: 488ffbf18171 ("iommu/riscv: Paging domain support") Cc: Tomasz Jeznach Signed-off-by: XianLiang Huang Link: https://lore.kernel.org/r/20250820072248.312-1-huangxianliang@lanxincomputing.com Signed-off-by: Joerg Roedel --- drivers/iommu/riscv/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c index 2d0d31ba2886..0eae2f4bdc5e 100644 --- a/drivers/iommu/riscv/iommu.c +++ b/drivers/iommu/riscv/iommu.c @@ -1283,7 +1283,7 @@ static phys_addr_t riscv_iommu_iova_to_phys(struct iommu_domain *iommu_domain, unsigned long *ptr; ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size); - if (_io_pte_none(*ptr) || !_io_pte_present(*ptr)) + if (!ptr) return 0; return pfn_to_phys(__page_val_to_pfn(*ptr)) | (iova & (pte_size - 1)); From 08fb45446ebf1e2e435f95163c59d692acb0b514 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 21 Aug 2025 08:40:12 +0200 Subject: [PATCH 2303/2411] drm/amdgpu: Pin buffers while vmap'ing exported dma-buf objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current dma-buf vmap semantics require that the mapped buffer remains in place until the corresponding vunmap has completed. For GEM-SHMEM, this used to be guaranteed by a pin operation while creating an S/G table in import. GEM-SHMEN can now import dma-buf objects without creating the S/G table, so the pin is missing. Leads to page-fault errors, such as the one shown below. [ 102.101726] BUG: unable to handle page fault for address: ffffc90127000000 [...] [ 102.157102] RIP: 0010:udl_compress_hline16+0x219/0x940 [udl] [...] [ 102.243250] Call Trace: [ 102.245695] [ 102.2477V95] ? validate_chain+0x24e/0x5e0 [ 102.251805] ? __lock_acquire+0x568/0xae0 [ 102.255807] udl_render_hline+0x165/0x341 [udl] [ 102.260338] ? __pfx_udl_render_hline+0x10/0x10 [udl] [ 102.265379] ? local_clock_noinstr+0xb/0x100 [ 102.269642] ? __lock_release.isra.0+0x16c/0x2e0 [ 102.274246] ? mark_held_locks+0x40/0x70 [ 102.278177] udl_primary_plane_helper_atomic_update+0x43e/0x680 [udl] [ 102.284606] ? __pfx_udl_primary_plane_helper_atomic_update+0x10/0x10 [udl] [ 102.291551] ? lockdep_hardirqs_on_prepare.part.0+0x92/0x170 [ 102.297208] ? lockdep_hardirqs_on+0x88/0x130 [ 102.301554] ? _raw_spin_unlock_irq+0x24/0x50 [ 102.305901] ? wait_for_completion_timeout+0x2bb/0x3a0 [ 102.311028] ? drm_atomic_helper_calc_timestamping_constants+0x141/0x200 [ 102.317714] ? drm_atomic_helper_commit_planes+0x3b6/0x1030 [ 102.323279] drm_atomic_helper_commit_planes+0x3b6/0x1030 [ 102.328664] drm_atomic_helper_commit_tail+0x41/0xb0 [ 102.333622] commit_tail+0x204/0x330 [...] [ 102.529946] ---[ end trace 0000000000000000 ]--- [ 102.651980] RIP: 0010:udl_compress_hline16+0x219/0x940 [udl] In this stack strace, udl (based on GEM-SHMEM) imported and vmap'ed a dma-buf from amdgpu. Amdgpu relocated the buffer, thereby invalidating the mapping. Provide a custom dma-buf vmap method in amdgpu that pins the object before mapping it's buffer's pages into kernel address space. Do the opposite in vunmap. Note that dma-buf vmap differs from GEM vmap in how it handles relocation. While dma-buf vmap keeps the buffer in place, GEM vmap requires the caller to keep the buffer in place. Hence, this fix is in amdgpu's dma-buf code instead of its GEM code. A discussion of various approaches to solving the problem is available at [1]. v3: - try (GTT | VRAM); drop CPU domain (Christian) v2: - only use mapable domains (Christian) - try pinning to domains in preferred order Signed-off-by: Thomas Zimmermann Fixes: 660cd44659a0 ("drm/shmem-helper: Import dmabuf without mapping its sg_table") Reported-by: Thomas Zimmermann Closes: https://lore.kernel.org/dri-devel/ba1bdfb8-dbf7-4372-bdcb-df7e0511c702@suse.de/ Cc: Shixiong Ou Cc: Thomas Zimmermann Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: David Airlie Cc: Simona Vetter Cc: Sumit Semwal Cc: "Christian König" Cc: dri-devel@lists.freedesktop.org Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Link: https://lore.kernel.org/dri-devel/9792c6c3-a2b8-4b2b-b5ba-fba19b153e21@suse.de/ # [1] Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250821064031.39090-1-tzimmermann@suse.de --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 34 +++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 5743ebb2f1b7..ce27cb5bb05e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -285,6 +285,36 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, return ret; } +static int amdgpu_dma_buf_vmap(struct dma_buf *dma_buf, struct iosys_map *map) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + int ret; + + /* + * Pin to keep buffer in place while it's vmap'ed. The actual + * domain is not that important as long as it's mapable. Using + * GTT and VRAM should be compatible with most use cases. + */ + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM); + if (ret) + return ret; + ret = drm_gem_dmabuf_vmap(dma_buf, map); + if (ret) + amdgpu_bo_unpin(bo); + + return ret; +} + +static void amdgpu_dma_buf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + drm_gem_dmabuf_vunmap(dma_buf, map); + amdgpu_bo_unpin(bo); +} + const struct dma_buf_ops amdgpu_dmabuf_ops = { .attach = amdgpu_dma_buf_attach, .pin = amdgpu_dma_buf_pin, @@ -294,8 +324,8 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = { .release = drm_gem_dmabuf_release, .begin_cpu_access = amdgpu_dma_buf_begin_cpu_access, .mmap = drm_gem_dmabuf_mmap, - .vmap = drm_gem_dmabuf_vmap, - .vunmap = drm_gem_dmabuf_vunmap, + .vmap = amdgpu_dma_buf_vmap, + .vunmap = amdgpu_dma_buf_vunmap, }; /** From 563fcd6475931c5c8c652a4dd548256314cc87ed Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 22 Aug 2025 14:14:18 +0200 Subject: [PATCH 2304/2411] pinctrl: airoha: Fix return value in pinconf callbacks Pinctrl stack requires ENOTSUPP error code if the parameter is not supported by the pinctrl driver. Fix the returned error code in pinconf callbacks if the operation is not supported. Fixes: 1c8ace2d0725 ("pinctrl: airoha: Add support for EN7581 SoC") Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/20250822-airoha-pinconf-err-val-fix-v1-1-87b4f264ced2@kernel.org Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-airoha.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-airoha.c b/drivers/pinctrl/mediatek/pinctrl-airoha.c index 5f1ec9e0de21..1b2f132d76f0 100644 --- a/drivers/pinctrl/mediatek/pinctrl-airoha.c +++ b/drivers/pinctrl/mediatek/pinctrl-airoha.c @@ -2696,7 +2696,7 @@ static int airoha_pinconf_get(struct pinctrl_dev *pctrl_dev, arg = 1; break; default: - return -EOPNOTSUPP; + return -ENOTSUPP; } *config = pinconf_to_config_packed(param, arg); @@ -2788,7 +2788,7 @@ static int airoha_pinconf_set(struct pinctrl_dev *pctrl_dev, break; } default: - return -EOPNOTSUPP; + return -ENOTSUPP; } } @@ -2805,10 +2805,10 @@ static int airoha_pinconf_group_get(struct pinctrl_dev *pctrl_dev, if (airoha_pinconf_get(pctrl_dev, airoha_pinctrl_groups[group].pins[i], config)) - return -EOPNOTSUPP; + return -ENOTSUPP; if (i && cur_config != *config) - return -EOPNOTSUPP; + return -ENOTSUPP; cur_config = *config; } From 5d7eba62e5eb68347de59b31b347b24f304cf21c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 22 Aug 2025 13:40:18 -0400 Subject: [PATCH 2305/2411] Bluetooth: hci_conn: Make unacked packet handling more robust This attempts to make unacked packet handling more robust by detecting if there are no connections left then restore all buffers of the respective pool. Fixes: 5638d9ea9c01 ("Bluetooth: hci_conn: Fix not restoring ISO buffer count on disconnect") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 58 ++++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 7a879290dd28..e524bb59bff2 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -149,8 +149,6 @@ static void hci_conn_cleanup(struct hci_conn *conn) hci_chan_list_flush(conn); - hci_conn_hash_del(hdev, conn); - if (HCI_CONN_HANDLE_UNSET(conn->handle)) ida_free(&hdev->unset_handle_ida, conn->handle); @@ -1152,28 +1150,54 @@ void hci_conn_del(struct hci_conn *conn) disable_delayed_work_sync(&conn->auto_accept_work); disable_delayed_work_sync(&conn->idle_work); - if (conn->type == ACL_LINK) { - /* Unacked frames */ - hdev->acl_cnt += conn->sent; - } else if (conn->type == LE_LINK) { - cancel_delayed_work(&conn->le_conn_timeout); + /* Remove the connection from the list so unacked logic can detect when + * a certain pool is not being utilized. + */ + hci_conn_hash_del(hdev, conn); - if (hdev->le_pkts) - hdev->le_cnt += conn->sent; + /* Handle unacked frames: + * + * - In case there are no connection, or if restoring the buffers + * considered in transist would overflow, restore all buffers to the + * pool. + * - Otherwise restore just the buffers considered in transit for the + * hci_conn + */ + switch (conn->type) { + case ACL_LINK: + if (!hci_conn_num(hdev, ACL_LINK) || + hdev->acl_cnt + conn->sent > hdev->acl_pkts) + hdev->acl_cnt = hdev->acl_pkts; else hdev->acl_cnt += conn->sent; - } else { - /* Unacked ISO frames */ - if (conn->type == CIS_LINK || - conn->type == BIS_LINK || - conn->type == PA_LINK) { - if (hdev->iso_pkts) - hdev->iso_cnt += conn->sent; - else if (hdev->le_pkts) + break; + case LE_LINK: + cancel_delayed_work(&conn->le_conn_timeout); + + if (hdev->le_pkts) { + if (!hci_conn_num(hdev, LE_LINK) || + hdev->le_cnt + conn->sent > hdev->le_pkts) + hdev->le_cnt = hdev->le_pkts; + else hdev->le_cnt += conn->sent; + } else { + if ((!hci_conn_num(hdev, LE_LINK) && + !hci_conn_num(hdev, ACL_LINK)) || + hdev->acl_cnt + conn->sent > hdev->acl_pkts) + hdev->acl_cnt = hdev->acl_pkts; else hdev->acl_cnt += conn->sent; } + break; + case CIS_LINK: + case BIS_LINK: + case PA_LINK: + if (!hci_iso_count(hdev) || + hdev->iso_cnt + conn->sent > hdev->iso_pkts) + hdev->iso_cnt = hdev->iso_pkts; + else + hdev->iso_cnt += conn->sent; + break; } skb_queue_purge(&conn->data_q); From 2f050a5392b7a0928bf836d9891df4851463512c Mon Sep 17 00:00:00 2001 From: Ludovico de Nittis Date: Tue, 12 Aug 2025 17:55:26 +0200 Subject: [PATCH 2306/2411] Bluetooth: hci_event: Treat UNKNOWN_CONN_ID on disconnect as success When the host sends an HCI_OP_DISCONNECT command, the controller may respond with the status HCI_ERROR_UNKNOWN_CONN_ID (0x02). E.g. this can happen on resume from suspend, if the link was terminated by the remote device before the event mask was correctly set. This is a btmon snippet that shows the issue: ``` > ACL Data RX: Handle 3 flags 0x02 dlen 12 L2CAP: Disconnection Request (0x06) ident 5 len 4 Destination CID: 65 Source CID: 72 < ACL Data TX: Handle 3 flags 0x00 dlen 12 L2CAP: Disconnection Response (0x07) ident 5 len 4 Destination CID: 65 Source CID: 72 > ACL Data RX: Handle 3 flags 0x02 dlen 12 L2CAP: Disconnection Request (0x06) ident 6 len 4 Destination CID: 64 Source CID: 71 < ACL Data TX: Handle 3 flags 0x00 dlen 12 L2CAP: Disconnection Response (0x07) ident 6 len 4 Destination CID: 64 Source CID: 71 < HCI Command: Set Event Mask (0x03|0x0001) plen 8 Mask: 0x3dbff807fffbffff Inquiry Complete Inquiry Result Connection Complete Connection Request Disconnection Complete Authentication Complete [...] < HCI Command: Disconnect (0x01|0x0006) plen 3 Handle: 3 Address: 78:20:A5:4A:DF:28 (Nintendo Co.,Ltd) Reason: Remote User Terminated Connection (0x13) > HCI Event: Command Status (0x0f) plen 4 Disconnect (0x01|0x0006) ncmd 1 Status: Unknown Connection Identifier (0x02) ``` Currently, the hci_cs_disconnect function treats any non-zero status as a command failure. This can be misleading because the connection is indeed being terminated and the controller is confirming that is has no knowledge of that connection handle. Meaning that the initial request of disconnecting a device should be treated as done. With this change we allow the function to proceed, following the success path, which correctly calls `mgmt_device_disconnected` and ensures a consistent state. Link: https://github.com/bluez/bluez/issues/1226 Fixes: 182ee45da083 ("Bluetooth: hci_sync: Rework hci_suspend_notifier") Signed-off-by: Ludovico de Nittis Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index fe7cdd67ad2a..6c67dfa139e2 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2703,7 +2703,7 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) if (!conn) goto unlock; - if (status) { + if (status && status != HCI_ERROR_UNKNOWN_CONN_ID) { mgmt_disconnect_failed(hdev, &conn->dst, conn->type, conn->dst_type, status); From b7fafbc499b5ee164018eb0eefe9027f5a6aaad2 Mon Sep 17 00:00:00 2001 From: Ludovico de Nittis Date: Tue, 12 Aug 2025 17:55:27 +0200 Subject: [PATCH 2307/2411] Bluetooth: hci_event: Mark connection as closed during suspend disconnect When suspending, the disconnect command for an active Bluetooth connection could be issued, but the corresponding `HCI_EV_DISCONN_COMPLETE` event might not be received before the system completes the suspend process. This can lead to an inconsistent state. On resume, the controller may auto-accept reconnections from the same device (due to suspend event filters), but these new connections are rejected by the kernel which still has connection objects from before suspend. Resulting in errors like: ``` kernel: Bluetooth: hci0: ACL packet for unknown connection handle 1 kernel: Bluetooth: hci0: Ignoring HCI_Connection_Complete for existing connection ``` This is a btmon snippet that shows the issue: ``` < HCI Command: Disconnect (0x01|0x0006) plen 3 Handle: 1 Address: 78:20:A5:4A:DF:28 (Nintendo Co.,Ltd) Reason: Remote User Terminated Connection (0x13) > HCI Event: Command Status (0x0f) plen 4 Disconnect (0x01|0x0006) ncmd 2 Status: Success (0x00) [...] // Host suspends with the event filter set for the device // On resume, the device tries to reconnect with a new handle > HCI Event: Connect Complete (0x03) plen 11 Status: Success (0x00) Handle: 2 Address: 78:20:A5:4A:DF:28 (Nintendo Co.,Ltd) // Kernel ignores this event because there is an existing connection with // handle 1 ``` By explicitly setting the connection state to BT_CLOSED we can ensure a consistent state, even if we don't receive the disconnect complete event in time. Link: https://github.com/bluez/bluez/issues/1226 Fixes: 182ee45da083 ("Bluetooth: hci_sync: Rework hci_suspend_notifier") Signed-off-by: Ludovico de Nittis Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6c67dfa139e2..ce0ff06f2f73 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2718,6 +2718,12 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) goto done; } + /* During suspend, mark connection as closed immediately + * since we might not receive HCI_EV_DISCONN_COMPLETE + */ + if (hdev->suspended) + conn->state = BT_CLOSED; + mgmt_conn = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags); if (conn->type == ACL_LINK) { From 15bf2c6391bafb14a3020d06ec0761bce0803463 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 20 Aug 2025 17:04:00 -0400 Subject: [PATCH 2308/2411] Bluetooth: hci_event: Detect if HCI_EV_NUM_COMP_PKTS is unbalanced This attempts to detect if HCI_EV_NUM_COMP_PKTS contain an unbalanced (more than currently considered outstanding) number of packets otherwise it could cause the hcon->sent to underflow and loop around breaking the tracking of the outstanding packets pending acknowledgment. Fixes: f42809185896 ("Bluetooth: Simplify num_comp_pkts_evt function") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index ce0ff06f2f73..904bcff4f4ca 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4404,7 +4404,17 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, if (!conn) continue; - conn->sent -= count; + /* Check if there is really enough packets outstanding before + * attempting to decrease the sent counter otherwise it could + * underflow.. + */ + if (conn->sent >= count) { + conn->sent -= count; + } else { + bt_dev_warn(hdev, "hcon %p sent %u < count %u", + conn, conn->sent, count); + conn->sent = 0; + } for (i = 0; i < count; ++i) hci_conn_tx_dequeue(conn); From 55b9551fcdf6a2fe7f3422918d5697b56794da72 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 20 Aug 2025 10:16:17 +0800 Subject: [PATCH 2309/2411] Bluetooth: hci_event: Disconnect device when BIG sync is lost When a BIG sync is lost, the device should be set to "disconnected". This ensures symmetry with the ISO path setup, where the device is marked as "connected" once the path is established. Without this change, the device state remains inconsistent and may lead to a memory leak. Fixes: b2a5f2e1c127 ("Bluetooth: hci_event: Add support for handling LE BIG Sync Lost event") Signed-off-by: Yang Li Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 5 +++++ net/bluetooth/mgmt.c | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 904bcff4f4ca..7a2174851857 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -7024,6 +7024,7 @@ static void hci_le_big_sync_lost_evt(struct hci_dev *hdev, void *data, { struct hci_evt_le_big_sync_lost *ev = data; struct hci_conn *bis, *conn; + bool mgmt_conn; bt_dev_dbg(hdev, "big handle 0x%2.2x", ev->handle); @@ -7042,6 +7043,10 @@ static void hci_le_big_sync_lost_evt(struct hci_dev *hdev, void *data, while ((bis = hci_conn_hash_lookup_big_state(hdev, ev->handle, BT_CONNECTED, HCI_ROLE_SLAVE))) { + mgmt_conn = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &bis->flags); + mgmt_device_disconnected(hdev, &bis->dst, bis->type, bis->dst_type, + ev->reason, mgmt_conn); + clear_bit(HCI_CONN_BIG_SYNC, &bis->flags); hci_disconn_cfm(bis, ev->reason); hci_conn_del(bis); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3166f5fb876b..90e37ff2c85d 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -9705,7 +9705,9 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, if (!mgmt_connected) return; - if (link_type != ACL_LINK && link_type != LE_LINK) + if (link_type != ACL_LINK && + link_type != LE_LINK && + link_type != BIS_LINK) return; bacpy(&ev.addr.bdaddr, bdaddr); From 6bbd0d3f0c23fc53c17409dd7476f38ae0ff0cd9 Mon Sep 17 00:00:00 2001 From: Pavel Shpakovskiy Date: Fri, 22 Aug 2025 12:20:55 +0300 Subject: [PATCH 2310/2411] Bluetooth: hci_sync: fix set_local_name race condition Function set_name_sync() uses hdev->dev_name field to send HCI_OP_WRITE_LOCAL_NAME command, but copying from data to hdev->dev_name is called after mgmt cmd was queued, so it is possible that function set_name_sync() will read old name value. This change adds name as a parameter for function hci_update_name_sync() to avoid race condition. Fixes: 6f6ff38a1e14 ("Bluetooth: hci_sync: Convert MGMT_OP_SET_LOCAL_NAME") Signed-off-by: Pavel Shpakovskiy Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 2 +- net/bluetooth/hci_sync.c | 6 +++--- net/bluetooth/mgmt.c | 5 ++++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 5224f57f6af2..e352a4e0ef8d 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -93,7 +93,7 @@ int hci_update_class_sync(struct hci_dev *hdev); int hci_update_eir_sync(struct hci_dev *hdev); int hci_update_class_sync(struct hci_dev *hdev); -int hci_update_name_sync(struct hci_dev *hdev); +int hci_update_name_sync(struct hci_dev *hdev, const u8 *name); int hci_write_ssp_mode_sync(struct hci_dev *hdev, u8 mode); int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 31d72b9683ef..b6f888d8354e 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -3481,13 +3481,13 @@ int hci_update_scan_sync(struct hci_dev *hdev) return hci_write_scan_enable_sync(hdev, scan); } -int hci_update_name_sync(struct hci_dev *hdev) +int hci_update_name_sync(struct hci_dev *hdev, const u8 *name) { struct hci_cp_write_local_name cp; memset(&cp, 0, sizeof(cp)); - memcpy(cp.name, hdev->dev_name, sizeof(cp.name)); + memcpy(cp.name, name, sizeof(cp.name)); return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp, @@ -3540,7 +3540,7 @@ int hci_powered_update_sync(struct hci_dev *hdev) hci_write_fast_connectable_sync(hdev, false); hci_update_scan_sync(hdev); hci_update_class_sync(hdev); - hci_update_name_sync(hdev); + hci_update_name_sync(hdev, hdev->dev_name); hci_update_eir_sync(hdev); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 90e37ff2c85d..50634ef5c8b7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3892,8 +3892,11 @@ static void set_name_complete(struct hci_dev *hdev, void *data, int err) static int set_name_sync(struct hci_dev *hdev, void *data) { + struct mgmt_pending_cmd *cmd = data; + struct mgmt_cp_set_local_name *cp = cmd->param; + if (lmp_bredr_capable(hdev)) { - hci_update_name_sync(hdev); + hci_update_name_sync(hdev, cp->name); hci_update_eir_sync(hdev); } From 7b28232921782aa38048249132899c337405eaa8 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Sun, 17 Aug 2025 14:49:06 +0200 Subject: [PATCH 2311/2411] mips: dts: lantiq: danube: add missing burst length property The upstream dts lacks the lantiq,{rx/tx}-burst-length property. Other issues were also fixed: arch/mips/boot/dts/lantiq/danube_easy50712.dtb: etop@e180000 (lantiq,etop-xway): 'interrupt-names' is a required property from schema $id: http://devicetree.org/schemas/net/lantiq,etop-xway.yaml# arch/mips/boot/dts/lantiq/danube_easy50712.dtb: etop@e180000 (lantiq,etop-xway): 'lantiq,tx-burst-length' is a required property from schema $id: http://devicetree.org/schemas/net/lantiq,etop-xway.yaml# arch/mips/boot/dts/lantiq/danube_easy50712.dtb: etop@e180000 (lantiq,etop-xway): 'lantiq,rx-burst-length' is a required property from schema $id: http://devicetree.org/schemas/net/lantiq,etop-xway.yaml# Fixes: 14d4e308e0aa ("net: lantiq: configure the burst length in ethernet drivers") Signed-off-by: Aleksander Jan Bajkowski Acked-by: Jakub Kicinski --- arch/mips/boot/dts/lantiq/danube_easy50712.dts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/boot/dts/lantiq/danube_easy50712.dts b/arch/mips/boot/dts/lantiq/danube_easy50712.dts index 1ce20b7d05cb..d8b3cd69eda3 100644 --- a/arch/mips/boot/dts/lantiq/danube_easy50712.dts +++ b/arch/mips/boot/dts/lantiq/danube_easy50712.dts @@ -87,8 +87,11 @@ etop@e180000 { reg = <0xe180000 0x40000>; interrupt-parent = <&icu0>; interrupts = <73 78>; + interrupt-names = "tx", "rx"; phy-mode = "rmii"; mac-address = [ 00 11 22 33 44 55 ]; + lantiq,rx-burst-length = <4>; + lantiq,tx-burst-length = <4>; }; stp0: stp@e100bb0 { From 8c431ea8f3f795c4b9cfa57a85bc4166b9cce0ac Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Sun, 17 Aug 2025 14:49:07 +0200 Subject: [PATCH 2312/2411] mips: lantiq: xway: sysctrl: rename the etop node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bindig requires a node name matching ‘^ethernet@[0-9a-f]+$’. This patch changes the clock name from “etop” to “ethernet”. This fixes the following warning: arch/mips/boot/dts/lantiq/danube_easy50712.dtb: etop@e180000 (lantiq,etop-xway): $nodename:0: 'etop@e180000' does not match '^ethernet@[0-9a-f]+$' from schema $id: http://devicetree.org/schemas/net/lantiq,etop-xway.yaml# Fixes: dac0bad93741 ("dt-bindings: net: lantiq,etop-xway: Document Lantiq Xway ETOP bindings") Signed-off-by: Aleksander Jan Bajkowski Acked-by: Jakub Kicinski --- arch/mips/boot/dts/lantiq/danube_easy50712.dts | 2 +- arch/mips/lantiq/xway/sysctrl.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/mips/boot/dts/lantiq/danube_easy50712.dts b/arch/mips/boot/dts/lantiq/danube_easy50712.dts index d8b3cd69eda3..c4d7aa5753b0 100644 --- a/arch/mips/boot/dts/lantiq/danube_easy50712.dts +++ b/arch/mips/boot/dts/lantiq/danube_easy50712.dts @@ -82,7 +82,7 @@ conf_out { }; }; - etop@e180000 { + ethernet@e180000 { compatible = "lantiq,etop-xway"; reg = <0xe180000 0x40000>; interrupt-parent = <&icu0>; diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 5a75283d17f1..6031a0272d87 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -497,7 +497,7 @@ void __init ltq_soc_init(void) ifccr = CGU_IFCCR_VR9; pcicr = CGU_PCICR_VR9; } else { - clkdev_add_pmu("1e180000.etop", NULL, 1, 0, PMU_PPE); + clkdev_add_pmu("1e180000.ethernet", NULL, 1, 0, PMU_PPE); } if (!of_machine_is_compatible("lantiq,ase")) @@ -531,9 +531,9 @@ void __init ltq_soc_init(void) CLOCK_133M, CLOCK_133M); clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0); clkdev_add_pmu("1f203018.usb2-phy", "phy", 1, 0, PMU_USB0_P); - clkdev_add_pmu("1e180000.etop", "ppe", 1, 0, PMU_PPE); - clkdev_add_cgu("1e180000.etop", "ephycgu", CGU_EPHY); - clkdev_add_pmu("1e180000.etop", "ephy", 1, 0, PMU_EPHY); + clkdev_add_pmu("1e180000.ethernet", "ppe", 1, 0, PMU_PPE); + clkdev_add_cgu("1e180000.ethernet", "ephycgu", CGU_EPHY); + clkdev_add_pmu("1e180000.ethernet", "ephy", 1, 0, PMU_EPHY); clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_ASE_SDIO); clkdev_add_pmu("1e116000.mei", "dfe", 1, 0, PMU_DFE); } else if (of_machine_is_compatible("lantiq,grx390")) { @@ -592,7 +592,7 @@ void __init ltq_soc_init(void) clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0 | PMU_AHBM); clkdev_add_pmu("1f203034.usb2-phy", "phy", 1, 0, PMU_USB1_P); clkdev_add_pmu("1e106000.usb", "otg", 1, 0, PMU_USB1 | PMU_AHBM); - clkdev_add_pmu("1e180000.etop", "switch", 1, 0, PMU_SWITCH); + clkdev_add_pmu("1e180000.ethernet", "switch", 1, 0, PMU_SWITCH); clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_SDIO); clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); clkdev_add_pmu("1e116000.mei", "dfe", 1, 0, PMU_DFE); From 80af3745ca465c6c47e833c1902004a7fa944f37 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 22 Aug 2025 11:08:46 +0300 Subject: [PATCH 2313/2411] of: dynamic: Fix use after free in of_changeset_add_prop_helper() If the of_changeset_add_property() function call fails, then this code frees "new_pp" and then dereference it on the next line. Return the error code directly instead. Fixes: c81f6ce16785 ("of: dynamic: Fix memleak when of_pci_add_properties() failed") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/aKgljjhnpa4lVpdx@stanley.mountain Signed-off-by: Rob Herring (Arm) --- drivers/of/dynamic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index dd30b7d8b5e4..2eaaddcb0ec4 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -935,13 +935,15 @@ static int of_changeset_add_prop_helper(struct of_changeset *ocs, return -ENOMEM; ret = of_changeset_add_property(ocs, np, new_pp); - if (ret) + if (ret) { __of_prop_free(new_pp); + return ret; + } new_pp->next = np->deadprops; np->deadprops = new_pp; - return ret; + return 0; } /** From e3d01979e4bff5c87eb4054a22e7568bb679b1fe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 20 Aug 2025 19:55:22 -0400 Subject: [PATCH 2314/2411] fgraph: Copy args in intermediate storage with entry The output of the function graph tracer has two ways to display its entries. One way for leaf functions with no events recorded within them, and the other is for functions with events recorded inside it. As function graph has an entry and exit event, to simplify the output of leaf functions it combines the two, where as non leaf functions are separate: 2) | invoke_rcu_core() { 2) | raise_softirq() { 2) 0.391 us | __raise_softirq_irqoff(); 2) 1.191 us | } 2) 2.086 us | } The __raise_softirq_irqoff() function above is really two events that were merged into one. Otherwise it would have looked like: 2) | invoke_rcu_core() { 2) | raise_softirq() { 2) | __raise_softirq_irqoff() { 2) 0.391 us | } 2) 1.191 us | } 2) 2.086 us | } In order to do this merge, the reading of the trace output file needs to look at the next event before printing. But since the pointer to the event is on the ring buffer, it needs to save the entry event before it looks at the next event as the next event goes out of focus as soon as a new event is read from the ring buffer. After it reads the next event, it will print the entry event with either the '{' (non leaf) or ';' and timestamps (leaf). The iterator used to read the trace file has storage for this event. The problem happens when the function graph tracer has arguments attached to the entry event as the entry now has a variable length "args" field. This field only gets set when funcargs option is used. But the args are not recorded in this temp data and garbage could be printed. The entry field is copied via: data->ent = *curr; Where "curr" is the entry field. But this method only saves the non variable length fields from the structure. Add a helper structure to the iterator data that adds the max args size to the data storage in the iterator. Then simply copy the entire entry into this storage (with size protection). Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Mark Rutland Link: https://lore.kernel.org/20250820195522.51d4a268@gandalf.local.home Reported-by: Sasha Levin Tested-by: Sasha Levin Closes: https://lore.kernel.org/all/aJaxRVKverIjF4a6@lappy/ Fixes: ff5c9c576e75 ("ftrace: Add support for function argument to graph tracer") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_functions_graph.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 66e1a527cf1a..a7f4b9a47a71 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -27,14 +27,21 @@ struct fgraph_cpu_data { unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH]; }; +struct fgraph_ent_args { + struct ftrace_graph_ent_entry ent; + /* Force the sizeof of args[] to have FTRACE_REGS_MAX_ARGS entries */ + unsigned long args[FTRACE_REGS_MAX_ARGS]; +}; + struct fgraph_data { struct fgraph_cpu_data __percpu *cpu_data; /* Place to preserve last processed entry. */ union { - struct ftrace_graph_ent_entry ent; + struct fgraph_ent_args ent; + /* TODO allow retaddr to have args */ struct fgraph_retaddr_ent_entry rent; - } ent; + }; struct ftrace_graph_ret_entry ret; int failed; int cpu; @@ -627,10 +634,13 @@ get_return_for_leaf(struct trace_iterator *iter, * Save current and next entries for later reference * if the output fails. */ - if (unlikely(curr->ent.type == TRACE_GRAPH_RETADDR_ENT)) - data->ent.rent = *(struct fgraph_retaddr_ent_entry *)curr; - else - data->ent.ent = *curr; + if (unlikely(curr->ent.type == TRACE_GRAPH_RETADDR_ENT)) { + data->rent = *(struct fgraph_retaddr_ent_entry *)curr; + } else { + int size = min((int)sizeof(data->ent), (int)iter->ent_size); + + memcpy(&data->ent, curr, size); + } /* * If the next event is not a return type, then * we only care about what type it is. Otherwise we can From 4013aef2ced9b756a410f50d12df9ebe6a883e4a Mon Sep 17 00:00:00 2001 From: Tengda Wu Date: Fri, 22 Aug 2025 03:33:43 +0000 Subject: [PATCH 2315/2411] ftrace: Fix potential warning in trace_printk_seq during ftrace_dump When calling ftrace_dump_one() concurrently with reading trace_pipe, a WARN_ON_ONCE() in trace_printk_seq() can be triggered due to a race condition. The issue occurs because: CPU0 (ftrace_dump) CPU1 (reader) echo z > /proc/sysrq-trigger !trace_empty(&iter) trace_iterator_reset(&iter) <- len = size = 0 cat /sys/kernel/tracing/trace_pipe trace_find_next_entry_inc(&iter) __find_next_entry ring_buffer_empty_cpu <- all empty return NULL trace_printk_seq(&iter.seq) WARN_ON_ONCE(s->seq.len >= s->seq.size) In the context between trace_empty() and trace_find_next_entry_inc() during ftrace_dump, the ring buffer data was consumed by other readers. This caused trace_find_next_entry_inc to return NULL, failing to populate `iter.seq`. At this point, due to the prior trace_iterator_reset, both `iter.seq.len` and `iter.seq.size` were set to 0. Since they are equal, the WARN_ON_ONCE condition is triggered. Move the trace_printk_seq() into the if block that checks to make sure the return value of trace_find_next_entry_inc() is non-NULL in ftrace_dump_one(), ensuring the 'iter.seq' is properly populated before subsequent operations. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Ingo Molnar Link: https://lore.kernel.org/20250822033343.3000289-1-wutengda@huaweicloud.com Fixes: d769041f8653 ("ring_buffer: implement new locking") Signed-off-by: Tengda Wu Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8d8935ed416d..1b7db732c0b1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -10638,10 +10638,10 @@ static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_m ret = print_trace_line(&iter); if (ret != TRACE_TYPE_NO_CONSUME) trace_consume(&iter); + + trace_printk_seq(&iter.seq); } touch_nmi_watchdog(); - - trace_printk_seq(&iter.seq); } if (!cnt) From abadf0ff63be488dc502ecfc9f622929a21b7117 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Thu, 21 Aug 2025 03:03:46 +0000 Subject: [PATCH 2316/2411] page_pool: fix incorrect mp_ops error handling Minor fix to the memory provider error handling, we should be jumping to free_ptr_ring in this error case rather than returning directly. Found by code-inspection. Cc: skhawaja@google.com Fixes: b400f4b87430 ("page_pool: Set `dma_sync` to false for devmem memory provider") Signed-off-by: Mina Almasry Reviewed-by: Samiullah Khawaja Link: https://patch.msgid.link/20250821030349.705244-1-almasrymina@google.com Signed-off-by: Jakub Kicinski --- net/core/page_pool.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 343a6cac21e3..ba70569bd4b0 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -287,8 +287,10 @@ static int page_pool_init(struct page_pool *pool, } if (pool->mp_ops) { - if (!pool->dma_map || !pool->dma_sync) - return -EOPNOTSUPP; + if (!pool->dma_map || !pool->dma_sync) { + err = -EOPNOTSUPP; + goto free_ptr_ring; + } if (WARN_ON(!is_kernel_rodata((unsigned long)pool->mp_ops))) { err = -EFAULT; From a64494aafc56939564e3e9e57f99df5c27204e04 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Thu, 21 Aug 2025 11:55:28 +0530 Subject: [PATCH 2317/2411] Octeontx2-vf: Fix max packet length errors Once driver submits the packets to the hardware, each packet traverse through multiple transmit levels in the following order: SMQ -> TL4 -> TL3 -> TL2 -> TL1 The SMQ supports configurable minimum and maximum packet sizes. It enters to a hang state, if driver submits packets with out of bound lengths. To avoid the same, implement packet length validation before submitting packets to the hardware. Increment tx_dropped counter on failure. Fixes: 3184fb5ba96e ("octeontx2-vf: Virtual function driver support") Fixes: 22f858796758 ("octeontx2-pf: Add basic net_device_ops") Fixes: 3ca6c4c882a7 ("octeontx2-pf: Add packet transmission support") Signed-off-by: Hariprasad Kelam Link: https://patch.msgid.link/20250821062528.1697992-1-hkelam@marvell.com Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/octeontx2/nic/otx2_common.c | 4 +++- .../ethernet/marvell/octeontx2/nic/otx2_common.h | 1 + .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 3 +++ .../net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 10 ++++++++++ drivers/net/ethernet/marvell/octeontx2/nic/rep.c | 13 ++++++++++++- drivers/net/ethernet/marvell/octeontx2/nic/rep.h | 1 + 6 files changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index f674729124e6..aff17c37ddde 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -124,7 +124,9 @@ void otx2_get_dev_stats(struct otx2_nic *pfvf) dev_stats->rx_ucast_frames; dev_stats->tx_bytes = OTX2_GET_TX_STATS(TX_OCTS); - dev_stats->tx_drops = OTX2_GET_TX_STATS(TX_DROP); + dev_stats->tx_drops = OTX2_GET_TX_STATS(TX_DROP) + + (unsigned long)atomic_long_read(&dev_stats->tx_discards); + dev_stats->tx_bcast_frames = OTX2_GET_TX_STATS(TX_BCAST); dev_stats->tx_mcast_frames = OTX2_GET_TX_STATS(TX_MCAST); dev_stats->tx_ucast_frames = OTX2_GET_TX_STATS(TX_UCAST); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index e3765b73c434..1c8a3c078a64 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -153,6 +153,7 @@ struct otx2_dev_stats { u64 tx_bcast_frames; u64 tx_mcast_frames; u64 tx_drops; + atomic_long_t tx_discards; }; /* Driver counted stats */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index b23585c5e5c2..5027fae0aa77 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -2220,6 +2220,7 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev) { struct otx2_nic *pf = netdev_priv(netdev); int qidx = skb_get_queue_mapping(skb); + struct otx2_dev_stats *dev_stats; struct otx2_snd_queue *sq; struct netdev_queue *txq; int sq_idx; @@ -2232,6 +2233,8 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev) /* Check for minimum and maximum packet length */ if (skb->len <= ETH_HLEN || (!skb_shinfo(skb)->gso_size && skb->len > pf->tx_max_pktlen)) { + dev_stats = &pf->hw.dev_stats; + atomic_long_inc(&dev_stats->tx_discards); dev_kfree_skb(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 5589fccd370b..7ebb6e656884 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -417,9 +417,19 @@ static netdev_tx_t otx2vf_xmit(struct sk_buff *skb, struct net_device *netdev) { struct otx2_nic *vf = netdev_priv(netdev); int qidx = skb_get_queue_mapping(skb); + struct otx2_dev_stats *dev_stats; struct otx2_snd_queue *sq; struct netdev_queue *txq; + /* Check for minimum and maximum packet length */ + if (skb->len <= ETH_HLEN || + (!skb_shinfo(skb)->gso_size && skb->len > vf->tx_max_pktlen)) { + dev_stats = &vf->hw.dev_stats; + atomic_long_inc(&dev_stats->tx_discards); + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + sq = &vf->qset.sq[qidx]; txq = netdev_get_tx_queue(netdev, qidx); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c index 25af98034e2e..b476733a0234 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c @@ -371,7 +371,8 @@ static void rvu_rep_get_stats(struct work_struct *work) stats->rx_mcast_frames = rsp->rx.mcast; stats->tx_bytes = rsp->tx.octs; stats->tx_frames = rsp->tx.ucast + rsp->tx.bcast + rsp->tx.mcast; - stats->tx_drops = rsp->tx.drop; + stats->tx_drops = rsp->tx.drop + + (unsigned long)atomic_long_read(&stats->tx_discards); exit: mutex_unlock(&priv->mbox.lock); } @@ -418,6 +419,16 @@ static netdev_tx_t rvu_rep_xmit(struct sk_buff *skb, struct net_device *dev) struct otx2_nic *pf = rep->mdev; struct otx2_snd_queue *sq; struct netdev_queue *txq; + struct rep_stats *stats; + + /* Check for minimum and maximum packet length */ + if (skb->len <= ETH_HLEN || + (!skb_shinfo(skb)->gso_size && skb->len > pf->tx_max_pktlen)) { + stats = &rep->stats; + atomic_long_inc(&stats->tx_discards); + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } sq = &pf->qset.sq[rep->rep_id]; txq = netdev_get_tx_queue(dev, 0); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/rep.h b/drivers/net/ethernet/marvell/octeontx2/nic/rep.h index 38446b3e4f13..5bc9e2c7d800 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/rep.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/rep.h @@ -27,6 +27,7 @@ struct rep_stats { u64 tx_bytes; u64 tx_frames; u64 tx_drops; + atomic_long_t tx_discards; }; struct rep_dev { From e2fe0c54fb7401e6ecd3c10348519ab9e23bd639 Mon Sep 17 00:00:00 2001 From: James Jones Date: Mon, 11 Aug 2025 15:00:16 -0700 Subject: [PATCH 2318/2411] drm/nouveau/disp: Always accept linear modifier On some chipsets, which block-linear modifiers are supported is format-specific. However, linear modifiers are always be supported. The prior modifier filtering logic was not accounting for the linear case. Cc: stable@vger.kernel.org Fixes: c586f30bf74c ("drm/nouveau/kms: Add format mod prop to base/ovly/nvdisp") Signed-off-by: James Jones Link: https://lore.kernel.org/r/20250811220017.1337-3-jajones@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/dispnv50/wndw.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 11d5b923d6e7..e2c55f4b9c5a 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -795,6 +795,10 @@ static bool nv50_plane_format_mod_supported(struct drm_plane *plane, struct nouveau_drm *drm = nouveau_drm(plane->dev); uint8_t i; + /* All chipsets can display all formats in linear layout */ + if (modifier == DRM_FORMAT_MOD_LINEAR) + return true; + if (drm->client.device.info.chipset < 0xc0) { const struct drm_format_info *info = drm_format_info(format); const uint8_t kind = (modifier >> 12) & 0xff; From bfb336cf97df7b37b2b2edec0f69773e06d11955 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 22 Aug 2025 18:36:06 -0400 Subject: [PATCH 2319/2411] ftrace: Also allocate and copy hash for reading of filter files Currently the reader of set_ftrace_filter and set_ftrace_notrace just adds the pointer to the global tracer hash to its iterator. Unlike the writer that allocates a copy of the hash, the reader keeps the pointer to the filter hashes. This is problematic because this pointer is static across function calls that release the locks that can update the global tracer hashes. This can cause UAF and similar bugs. Allocate and copy the hash for reading the filter files like it is done for the writers. This not only fixes UAF bugs, but also makes the code a bit simpler as it doesn't have to differentiate when to free the iterator's hash between writers and readers. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Nathan Chancellor Cc: Linus Torvalds Link: https://lore.kernel.org/20250822183606.12962cc3@batman.local.home Fixes: c20489dad156 ("ftrace: Assign iter->hash to filter or notrace hashes on seq read") Closes: https://lore.kernel.org/all/20250813023044.2121943-1-wutengda@huaweicloud.com/ Closes: https://lore.kernel.org/all/20250822192437.GA458494@ax162/ Reported-by: Tengda Wu Tested-by: Tengda Wu Tested-by: Nathan Chancellor Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 00b76d450a89..a69067367c29 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4661,13 +4661,17 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, } else { iter->hash = alloc_and_copy_ftrace_hash(size_bits, hash); } + } else { + if (hash) + iter->hash = alloc_and_copy_ftrace_hash(hash->size_bits, hash); + else + iter->hash = EMPTY_HASH; + } - if (!iter->hash) { - trace_parser_put(&iter->parser); - goto out_unlock; - } - } else - iter->hash = hash; + if (!iter->hash) { + trace_parser_put(&iter->parser); + goto out_unlock; + } ret = 0; @@ -6543,9 +6547,6 @@ int ftrace_regex_release(struct inode *inode, struct file *file) ftrace_hash_move_and_update_ops(iter->ops, orig_hash, iter->hash, filter_hash); mutex_unlock(&ftrace_lock); - } else { - /* For read only, the hash is the ops hash */ - iter->hash = NULL; } mutex_unlock(&iter->ops->func_hash->regex_lock); From ec79003c5f9d2c7f9576fc69b8dbda80305cbe3a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 21 Aug 2025 02:18:24 +0000 Subject: [PATCH 2320/2411] atm: atmtcp: Prevent arbitrary write in atmtcp_recv_control(). syzbot reported the splat below. [0] When atmtcp_v_open() or atmtcp_v_close() is called via connect() or close(), atmtcp_send_control() is called to send an in-kernel special message. The message has ATMTCP_HDR_MAGIC in atmtcp_control.hdr.length. Also, a pointer of struct atm_vcc is set to atmtcp_control.vcc. The notable thing is struct atmtcp_control is uAPI but has a space for an in-kernel pointer. struct atmtcp_control { struct atmtcp_hdr hdr; /* must be first */ ... atm_kptr_t vcc; /* both directions */ ... } __ATM_API_ALIGN; typedef struct { unsigned char _[8]; } __ATM_API_ALIGN atm_kptr_t; The special message is processed in atmtcp_recv_control() called from atmtcp_c_send(). atmtcp_c_send() is vcc->dev->ops->send() and called from 2 paths: 1. .ndo_start_xmit() (vcc->send() == atm_send_aal0()) 2. vcc_sendmsg() The problem is sendmsg() does not validate the message length and userspace can abuse atmtcp_recv_control() to overwrite any kptr by atmtcp_control. Let's add a new ->pre_send() hook to validate messages from sendmsg(). [0]: Oops: general protection fault, probably for non-canonical address 0xdffffc00200000ab: 0000 [#1] SMP KASAN PTI KASAN: probably user-memory-access in range [0x0000000100000558-0x000000010000055f] CPU: 0 UID: 0 PID: 5865 Comm: syz-executor331 Not tainted 6.17.0-rc1-syzkaller-00215-gbab3ce404553 #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 RIP: 0010:atmtcp_recv_control drivers/atm/atmtcp.c:93 [inline] RIP: 0010:atmtcp_c_send+0x1da/0x950 drivers/atm/atmtcp.c:297 Code: 4d 8d 75 1a 4c 89 f0 48 c1 e8 03 42 0f b6 04 20 84 c0 0f 85 15 06 00 00 41 0f b7 1e 4d 8d b7 60 05 00 00 4c 89 f0 48 c1 e8 03 <42> 0f b6 04 20 84 c0 0f 85 13 06 00 00 66 41 89 1e 4d 8d 75 1c 4c RSP: 0018:ffffc90003f5f810 EFLAGS: 00010203 RAX: 00000000200000ab RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff88802a510000 RSI: 00000000ffffffff RDI: ffff888030a6068c RBP: ffff88802699fb40 R08: ffff888030a606eb R09: 1ffff1100614c0dd R10: dffffc0000000000 R11: ffffffff8718fc40 R12: dffffc0000000000 R13: ffff888030a60680 R14: 000000010000055f R15: 00000000ffffffff FS: 00007f8d7e9236c0(0000) GS:ffff888125c1c000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000045ad50 CR3: 0000000075bde000 CR4: 00000000003526f0 Call Trace: vcc_sendmsg+0xa10/0xc60 net/atm/common.c:645 sock_sendmsg_nosec net/socket.c:714 [inline] __sock_sendmsg+0x219/0x270 net/socket.c:729 ____sys_sendmsg+0x505/0x830 net/socket.c:2614 ___sys_sendmsg+0x21f/0x2a0 net/socket.c:2668 __sys_sendmsg net/socket.c:2700 [inline] __do_sys_sendmsg net/socket.c:2705 [inline] __se_sys_sendmsg net/socket.c:2703 [inline] __x64_sys_sendmsg+0x19b/0x260 net/socket.c:2703 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f8d7e96a4a9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 51 18 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f8d7e923198 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f8d7e9f4308 RCX: 00007f8d7e96a4a9 RDX: 0000000000000000 RSI: 0000200000000240 RDI: 0000000000000005 RBP: 00007f8d7e9f4300 R08: 65732f636f72702f R09: 65732f636f72702f R10: 65732f636f72702f R11: 0000000000000246 R12: 00007f8d7e9c10ac R13: 00007f8d7e9231a0 R14: 0000200000000200 R15: 0000200000000250 Modules linked in: Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+1741b56d54536f4ec349@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68a6767c.050a0220.3d78fd.0011.GAE@google.com/ Tested-by: syzbot+1741b56d54536f4ec349@syzkaller.appspotmail.com Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250821021901.2814721-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- drivers/atm/atmtcp.c | 17 ++++++++++++++--- include/linux/atmdev.h | 1 + net/atm/common.c | 15 ++++++++++++--- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index eeae160c898d..fa3c76a2b49d 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -279,6 +279,19 @@ static struct atm_vcc *find_vcc(struct atm_dev *dev, short vpi, int vci) return NULL; } +static int atmtcp_c_pre_send(struct atm_vcc *vcc, struct sk_buff *skb) +{ + struct atmtcp_hdr *hdr; + + if (skb->len < sizeof(struct atmtcp_hdr)) + return -EINVAL; + + hdr = (struct atmtcp_hdr *)skb->data; + if (hdr->length == ATMTCP_HDR_MAGIC) + return -EINVAL; + + return 0; +} static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb) { @@ -288,9 +301,6 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb) struct sk_buff *new_skb; int result = 0; - if (skb->len < sizeof(struct atmtcp_hdr)) - goto done; - dev = vcc->dev_data; hdr = (struct atmtcp_hdr *) skb->data; if (hdr->length == ATMTCP_HDR_MAGIC) { @@ -347,6 +357,7 @@ static const struct atmdev_ops atmtcp_v_dev_ops = { static const struct atmdev_ops atmtcp_c_dev_ops = { .close = atmtcp_c_close, + .pre_send = atmtcp_c_pre_send, .send = atmtcp_c_send }; diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 45f2f278b50a..70807c679f1a 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -185,6 +185,7 @@ struct atmdev_ops { /* only send is required */ int (*compat_ioctl)(struct atm_dev *dev,unsigned int cmd, void __user *arg); #endif + int (*pre_send)(struct atm_vcc *vcc, struct sk_buff *skb); int (*send)(struct atm_vcc *vcc,struct sk_buff *skb); int (*send_bh)(struct atm_vcc *vcc, struct sk_buff *skb); int (*send_oam)(struct atm_vcc *vcc,void *cell,int flags); diff --git a/net/atm/common.c b/net/atm/common.c index d7f7976ea13a..881c7f259dbd 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -635,18 +635,27 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) skb->dev = NULL; /* for paths shared with net_device interfaces */ if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) { - atm_return_tx(vcc, skb); - kfree_skb(skb); error = -EFAULT; - goto out; + goto free_skb; } if (eff != size) memset(skb->data + size, 0, eff-size); + + if (vcc->dev->ops->pre_send) { + error = vcc->dev->ops->pre_send(vcc, skb); + if (error) + goto free_skb; + } + error = vcc->dev->ops->send(vcc, skb); error = error ? error : size; out: release_sock(sk); return error; +free_skb: + atm_return_tx(vcc, skb); + kfree_skb(skb); + goto out; } __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait) From 1b237f190eb3d36f52dffe07a40b5eb210280e00 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 24 Aug 2025 12:04:12 -0400 Subject: [PATCH 2321/2411] Linux 6.17-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d1adb78c3596..06c28b1d7e67 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Baby Opossum Posse # *DOCUMENTATION* From ef3e9c91ed87f13dba877a20569f4a0accf0612c Mon Sep 17 00:00:00 2001 From: Kamal Wadhwa Date: Sat, 23 Aug 2025 01:56:39 +0530 Subject: [PATCH 2322/2411] regulator: pm8008: fix probe failure due to negative voltage selector In the current design, the `pm8008_regulator_get_voltage_sel()` callback can return a negative value if the raw voltage value is read as 0 uV from the PMIC HW register. This can cause the probe to fail when the `machine_constraints_voltage()` check is called during the regulator registration flow. Fix this by using the helper `regulator_map_voltage_linear_range()` to convert the raw value to a voltage selector inside the mentioned get voltage selector function. This ensures that the value returned is always within the defined range. Signed-off-by: Kamal Wadhwa Message-ID: <20250823-pm8008-negitive-selector-v1-1-52b026a4b5e8@quicinc.com> Signed-off-by: Mark Brown --- drivers/regulator/qcom-pm8008-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/qcom-pm8008-regulator.c b/drivers/regulator/qcom-pm8008-regulator.c index da017c1969d0..90c78ee1c37b 100644 --- a/drivers/regulator/qcom-pm8008-regulator.c +++ b/drivers/regulator/qcom-pm8008-regulator.c @@ -96,7 +96,7 @@ static int pm8008_regulator_get_voltage_sel(struct regulator_dev *rdev) uV = le16_to_cpu(val) * 1000; - return (uV - preg->desc.min_uV) / preg->desc.uV_step; + return regulator_map_voltage_linear_range(rdev, uV, INT_MAX); } static const struct regulator_ops pm8008_regulator_ops = { From d14469ed7c00314fe8957b2841bda329e4eaf4ab Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 25 Aug 2025 17:32:05 +0800 Subject: [PATCH 2323/2411] loop: fix zero sized loop for block special file By default, /dev/sda is block special file from devtmpfs, getattr will return file size as zero, causing loop failed for raw block device. We can add bdev_statx() to return device size, however this may introduce changes that are not acknowledged by user. Fix this problem by reverting changes for block special file, file mapping host is set to bdev inode while opening, and use i_size_read() directly to get device size. Fixes: 47b71abd5846 ("loop: use vfs_getattr_nosec for accurate file size") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202508200409.b2459c02-lkp@intel.com Signed-off-by: Yu Kuai Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250825093205.3684121-1-yukuai1@huaweicloud.com [axboe: fix spelling error] Signed-off-by: Jens Axboe --- drivers/block/loop.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 57263c273f0f..053a086d547e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -139,20 +139,26 @@ static int part_shift; static loff_t lo_calculate_size(struct loop_device *lo, struct file *file) { - struct kstat stat; loff_t loopsize; int ret; - /* - * Get the accurate file size. This provides better results than - * cached inode data, particularly for network filesystems where - * metadata may be stale. - */ - ret = vfs_getattr_nosec(&file->f_path, &stat, STATX_SIZE, 0); - if (ret) - return 0; + if (S_ISBLK(file_inode(file)->i_mode)) { + loopsize = i_size_read(file->f_mapping->host); + } else { + struct kstat stat; + + /* + * Get the accurate file size. This provides better results than + * cached inode data, particularly for network filesystems where + * metadata may be stale. + */ + ret = vfs_getattr_nosec(&file->f_path, &stat, STATX_SIZE, 0); + if (ret) + return 0; + + loopsize = stat.size; + } - loopsize = stat.size; if (lo->lo_offset > 0) loopsize -= lo->lo_offset; /* offset is beyond i_size, weird but possible */ From 66e82b6e0a28d4970383e1ee5d60f431001128cd Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Tue, 12 Aug 2025 19:10:02 -0500 Subject: [PATCH 2324/2411] drm/nouveau: fix error path in nvkm_gsp_fwsec_v2 Function nvkm_gsp_fwsec_v2() sets 'ret' if the kmemdup() call fails, but it never uses or returns 'ret' after that point. We always need to release the firmware regardless, so do that and then check for error. Fixes: 176fdcbddfd2 ("drm/nouveau/gsp/r535: add support for booting GSP-RM") Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Timur Tabi Link: https://lore.kernel.org/r/20250813001004.2986092-1-ttabi@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c index 52412965fac1..5b721bd9d799 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c @@ -209,11 +209,12 @@ nvkm_gsp_fwsec_v2(struct nvkm_gsp *gsp, const char *name, fw->boot_addr = bld->start_tag << 8; fw->boot_size = bld->code_size; fw->boot = kmemdup(bl->data + hdr->data_offset + bld->code_off, fw->boot_size, GFP_KERNEL); - if (!fw->boot) - ret = -ENOMEM; nvkm_firmware_put(bl); + if (!fw->boot) + return -ENOMEM; + /* Patch in interface data. */ return nvkm_gsp_fwsec_patch(gsp, fw, desc->InterfaceOffset, init_cmd); } From f529b8915543fb9ceb732cec5571f7fe12bc9530 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Tue, 12 Aug 2025 19:10:03 -0500 Subject: [PATCH 2325/2411] drm/nouveau: remove unused increment in gm200_flcn_pio_imem_wr The 'tag' parameter is passed by value and is not actually used after being incremented, so remove the increment. It's the function that calls gm200_flcn_pio_imem_wr that is supposed to (and does) increment 'tag'. Fixes: 0e44c2170876 ("drm/nouveau/flcn: new code to load+boot simple HS FWs (VPR scrubber)") Reviewed-by: Philipp Stanner Signed-off-by: Timur Tabi Link: https://lore.kernel.org/r/20250813001004.2986092-2-ttabi@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c b/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c index b7da3ab44c27..6a004c6e6742 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c @@ -103,7 +103,7 @@ gm200_flcn_pio_imem_wr_init(struct nvkm_falcon *falcon, u8 port, bool sec, u32 i static void gm200_flcn_pio_imem_wr(struct nvkm_falcon *falcon, u8 port, const u8 *img, int len, u16 tag) { - nvkm_falcon_wr32(falcon, 0x188 + (port * 0x10), tag++); + nvkm_falcon_wr32(falcon, 0x188 + (port * 0x10), tag); while (len >= 4) { nvkm_falcon_wr32(falcon, 0x184 + (port * 0x10), *(u32 *)img); img += 4; From 64c722b5e7f6b909b0e448e580f64628a0d76208 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Tue, 12 Aug 2025 19:10:04 -0500 Subject: [PATCH 2326/2411] drm/nouveau: remove unused memory target test The memory target check is a hold-over from a refactor. It's harmless but distracting, so just remove it. Fixes: 2541626cfb79 ("drm/nouveau/acr: use common falcon HS FW code for ACR FWs") Signed-off-by: Timur Tabi Link: https://lore.kernel.org/r/20250813001004.2986092-3-ttabi@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c b/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c index 6a004c6e6742..7c43397c19e6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c @@ -249,9 +249,11 @@ int gm200_flcn_fw_load(struct nvkm_falcon_fw *fw) { struct nvkm_falcon *falcon = fw->falcon; - int target, ret; + int ret; if (fw->inst) { + int target; + nvkm_falcon_mask(falcon, 0x048, 0x00000001, 0x00000001); switch (nvkm_memory_target(fw->inst)) { @@ -285,15 +287,6 @@ gm200_flcn_fw_load(struct nvkm_falcon_fw *fw) } if (fw->boot) { - switch (nvkm_memory_target(&fw->fw.mem.memory)) { - case NVKM_MEM_TARGET_VRAM: target = 4; break; - case NVKM_MEM_TARGET_HOST: target = 5; break; - case NVKM_MEM_TARGET_NCOH: target = 6; break; - default: - WARN_ON(1); - return -EINVAL; - } - ret = nvkm_falcon_pio_wr(falcon, fw->boot, 0, 0, IMEM, falcon->code.limit - fw->boot_size, fw->boot_size, fw->boot_addr >> 8, false); From 60dfe2434eed13082f26eb7409665dfafb38fa51 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Tue, 24 Jun 2025 07:26:40 -0700 Subject: [PATCH 2327/2411] ice: fix NULL pointer dereference in ice_unplug_aux_dev() on reset Issuing a reset when the driver is loaded without RDMA support, will results in a crash as it attempts to remove RDMA's non-existent auxbus device: echo 1 > /sys/class/net//device/reset BUG: kernel NULL pointer dereference, address: 0000000000000008 ... RIP: 0010:ice_unplug_aux_dev+0x29/0x70 [ice] ... Call Trace: ice_prepare_for_reset+0x77/0x260 [ice] pci_dev_save_and_disable+0x2c/0x70 pci_reset_function+0x88/0x130 reset_store+0x5a/0xa0 kernfs_fop_write_iter+0x15e/0x210 vfs_write+0x273/0x520 ksys_write+0x6b/0xe0 do_syscall_64+0x79/0x3b0 entry_SYSCALL_64_after_hwframe+0x76/0x7e ice_unplug_aux_dev() checks pf->cdev_info->adev for NULL pointer, but pf->cdev_info will also be NULL, leading to the deref in the trace above. Introduce a flag to be set when the creation of the auxbus device is successful, to avoid multiple NULL pointer checks in ice_unplug_aux_dev(). Fixes: c24a65b6a27c7 ("iidc/ice/irdma: Update IDC to support multiple consumers") Signed-off-by: Emil Tantilov Reviewed-by: Przemek Kitszel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 1 + drivers/net/ethernet/intel/ice/ice_idc.c | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 2098f00b3cd3..8a8a01a4bb40 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -510,6 +510,7 @@ enum ice_pf_flags { ICE_FLAG_LINK_LENIENT_MODE_ENA, ICE_FLAG_PLUG_AUX_DEV, ICE_FLAG_UNPLUG_AUX_DEV, + ICE_FLAG_AUX_DEV_CREATED, ICE_FLAG_MTU_CHANGED, ICE_FLAG_GNSS, /* GNSS successfully initialized */ ICE_FLAG_DPLL, /* SyncE/PTP dplls initialized */ diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index 6ab53e430f91..420d45c2558b 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -336,6 +336,7 @@ int ice_plug_aux_dev(struct ice_pf *pf) mutex_lock(&pf->adev_mutex); cdev->adev = adev; mutex_unlock(&pf->adev_mutex); + set_bit(ICE_FLAG_AUX_DEV_CREATED, pf->flags); return 0; } @@ -347,15 +348,16 @@ void ice_unplug_aux_dev(struct ice_pf *pf) { struct auxiliary_device *adev; + if (!test_and_clear_bit(ICE_FLAG_AUX_DEV_CREATED, pf->flags)) + return; + mutex_lock(&pf->adev_mutex); adev = pf->cdev_info->adev; pf->cdev_info->adev = NULL; mutex_unlock(&pf->adev_mutex); - if (adev) { - auxiliary_device_delete(adev); - auxiliary_device_uninit(adev); - } + auxiliary_device_delete(adev); + auxiliary_device_uninit(adev); } /** From 86aae43f21cf784c1d7f6a9af93e5116b0f232ab Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 17 Jul 2025 09:57:09 -0700 Subject: [PATCH 2328/2411] ice: don't leave device non-functional if Tx scheduler config fails The ice_cfg_tx_topo function attempts to apply Tx scheduler topology configuration based on NVM parameters, selecting either a 5 or 9 layer topology. As part of this flow, the driver acquires the "Global Configuration Lock", which is a hardware resource associated with programming the DDP package to the device. This "lock" is implemented by firmware as a way to guarantee that only one PF can program the DDP for a device. Unlike a traditional lock, once a PF has acquired this lock, no other PF will be able to acquire it again (including that PF) until a CORER of the device. Future requests to acquire the lock report that global configuration has already completed. The following flow is used to program the Tx topology: * Read the DDP package for scheduler configuration data * Acquire the global configuration lock * Program Tx scheduler topology according to DDP package data * Trigger a CORER which clears the global configuration lock This is followed by the flow for programming the DDP package: * Acquire the global configuration lock (again) * Download the DDP package to the device * Release the global configuration lock. However, if configuration of the Tx topology fails, (i.e. ice_get_set_tx_topo returns an error code), the driver exits ice_cfg_tx_topo() immediately, and fails to trigger CORER. While the global configuration lock is held, the firmware rejects most AdminQ commands, as it is waiting for the DDP package download (or Tx scheduler topology programming) to occur. The current driver flows assume that the global configuration lock has been reset by CORER after programming the Tx topology. Thus, the same PF attempts to acquire the global lock again, and fails. This results in the driver reporting "an unknown error occurred when loading the DDP package". It then attempts to enter safe mode, but ultimately fails to finish ice_probe() since nearly all AdminQ command report error codes, and the driver stops loading the device at some point during its initialization. The only currently known way that ice_get_set_tx_topo() can fail is with certain older DDP packages which contain invalid topology configuration, on firmware versions which strictly validate this data. The most recent releases of the DDP have resolved the invalid data. However, it is still poor practice to essentially brick the device, and prevent access to the device even through safe mode or recovery mode. It is also plausible that this command could fail for some other reason in the future. We cannot simply release the global lock after a failed call to ice_get_set_tx_topo(). Releasing the lock indicates to firmware that global configuration (downloading of the DDP) has completed. Future attempts by this or other PFs to load the DDP will fail with a report that the DDP package has already been downloaded. Then, PFs will enter safe mode as they realize that the package on the device does not meet the minimum version requirement to load. The reported error messages are confusing, as they indicate the version of the default "safe mode" package in the NVM, rather than the version of the file loaded from /lib/firmware. Instead, we need to trigger CORER to clear global configuration. This is the lowest level of hardware reset which clears the global configuration lock and related state. It also clears any already downloaded DDP. Crucially, it does *not* clear the Tx scheduler topology configuration. Refactor ice_cfg_tx_topo() to always trigger a CORER after acquiring the global lock, regardless of success or failure of the topology configuration. We need to re-initialize the HW structure when we trigger the CORER. Thus, it makes sense for this to be the responsibility of ice_cfg_tx_topo() rather than its caller, ice_init_tx_topology(). This avoids needless re-initialization in cases where we don't attempt to update the Tx scheduler topology, such as if it has already been programmed. There is one catch: failure to re-initialize the HW struct should stop ice_probe(). If this function fails, we won't have a valid HW structure and cannot ensure the device is functioning properly. To handle this, ensure ice_cfg_tx_topo() returns a limited set of error codes. Set aside one specifically, -ENODEV, to indicate that the ice_init_tx_topology() should fail and stop probe. Other error codes indicate failure to apply the Tx scheduler topology. This is treated as a non-fatal error, with an informational message informing the system administrator that the updated Tx topology did not apply. This allows the device to load and function with the default Tx scheduler topology, rather than failing to load entirely. Note that this use of CORER will not result in loops with future PFs attempting to also load the invalid Tx topology configuration. The first PF will acquire the global configuration lock as part of programming the DDP. Each PF after this will attempt to acquire the global lock as part of programming the Tx topology, and will fail with the indication from firmware that global configuration is already complete. Tx scheduler topology configuration is only performed during driver init (probe or devlink reload) and not during cleanup for a CORER that happens after probe completes. Fixes: 91427e6d9030 ("ice: Support 5 layer topology") Signed-off-by: Jacob Keller Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ddp.c | 44 ++++++++++++++++------- drivers/net/ethernet/intel/ice/ice_main.c | 16 ++++++--- 2 files changed, 43 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c index e2a036ce76ca..3b2d9c436979 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.c +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c @@ -2377,7 +2377,13 @@ ice_get_set_tx_topo(struct ice_hw *hw, u8 *buf, u16 buf_size, * The function will apply the new Tx topology from the package buffer * if available. * - * Return: zero when update was successful, negative values otherwise. + * Return: + * * 0 - Successfully applied topology configuration. + * * -EBUSY - Failed to acquire global configuration lock. + * * -EEXIST - Topology configuration has already been applied. + * * -EIO - Unable to apply topology configuration. + * * -ENODEV - Failed to re-initialize device after applying configuration. + * * Other negative error codes indicate unexpected failures. */ int ice_cfg_tx_topo(struct ice_hw *hw, const void *buf, u32 len) { @@ -2410,7 +2416,7 @@ int ice_cfg_tx_topo(struct ice_hw *hw, const void *buf, u32 len) if (status) { ice_debug(hw, ICE_DBG_INIT, "Get current topology is failed\n"); - return status; + return -EIO; } /* Is default topology already applied ? */ @@ -2497,31 +2503,45 @@ int ice_cfg_tx_topo(struct ice_hw *hw, const void *buf, u32 len) ICE_GLOBAL_CFG_LOCK_TIMEOUT); if (status) { ice_debug(hw, ICE_DBG_INIT, "Failed to acquire global lock\n"); - return status; + return -EBUSY; } /* Check if reset was triggered already. */ reg = rd32(hw, GLGEN_RSTAT); if (reg & GLGEN_RSTAT_DEVSTATE_M) { - /* Reset is in progress, re-init the HW again */ ice_debug(hw, ICE_DBG_INIT, "Reset is in progress. Layer topology might be applied already\n"); ice_check_reset(hw); - return 0; + /* Reset is in progress, re-init the HW again */ + goto reinit_hw; } /* Set new topology */ status = ice_get_set_tx_topo(hw, new_topo, size, NULL, NULL, true); if (status) { - ice_debug(hw, ICE_DBG_INIT, "Failed setting Tx topology\n"); - return status; + ice_debug(hw, ICE_DBG_INIT, "Failed to set Tx topology, status %pe\n", + ERR_PTR(status)); + /* only report -EIO here as the caller checks the error value + * and reports an informational error message informing that + * the driver failed to program Tx topology. + */ + status = -EIO; } - /* New topology is updated, delay 1 second before issuing the CORER */ + /* Even if Tx topology config failed, we need to CORE reset here to + * clear the global configuration lock. Delay 1 second to allow + * hardware to settle then issue a CORER + */ msleep(1000); ice_reset(hw, ICE_RESET_CORER); - /* CORER will clear the global lock, so no explicit call - * required for release. - */ + ice_check_reset(hw); - return 0; +reinit_hw: + /* Since we triggered a CORER, re-initialize hardware */ + ice_deinit_hw(hw); + if (ice_init_hw(hw)) { + ice_debug(hw, ICE_DBG_INIT, "Failed to re-init hardware after setting Tx topology\n"); + return -ENODEV; + } + + return status; } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 8e0b06c1e02b..cae992d8f03c 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4536,17 +4536,23 @@ ice_init_tx_topology(struct ice_hw *hw, const struct firmware *firmware) dev_info(dev, "Tx scheduling layers switching feature disabled\n"); else dev_info(dev, "Tx scheduling layers switching feature enabled\n"); - /* if there was a change in topology ice_cfg_tx_topo triggered - * a CORER and we need to re-init hw + return 0; + } else if (err == -ENODEV) { + /* If we failed to re-initialize the device, we can no longer + * continue loading. */ - ice_deinit_hw(hw); - err = ice_init_hw(hw); - + dev_warn(dev, "Failed to initialize hardware after applying Tx scheduling configuration.\n"); return err; } else if (err == -EIO) { dev_info(dev, "DDP package does not support Tx scheduling layers switching feature - please update to the latest DDP package and try again\n"); + return 0; + } else if (err == -EEXIST) { + return 0; } + /* Do not treat this as a fatal error. */ + dev_info(dev, "Failed to apply Tx scheduling configuration, err %pe\n", + ERR_PTR(err)); return 0; } From 5c5e5b52bf05c7fe88768318c041052c5fac36b8 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 1 Aug 2025 15:27:12 -0700 Subject: [PATCH 2329/2411] ice: use fixed adapter index for E825C embedded devices The ice_adapter structure is used by the ice driver to connect multiple physical functions of a device in software. It was introduced by commit 0e2bddf9e5f9 ("ice: add ice_adapter for shared data across PFs on the same NIC") and is primarily used for PTP support, as well as for handling certain cross-PF synchronization. The original design of ice_adapter used PCI address information to determine which devices should be connected. This was extended to support E825C devices by commit fdb7f54700b1 ("ice: Initial support for E825C hardware in ice_adapter"), which used the device ID for E825C devices instead of the PCI address. Later, commit 0093cb194a75 ("ice: use DSN instead of PCI BDF for ice_adapter index") replaced the use of Bus/Device/Function addressing with use of the device serial number. E825C devices may appear in "Dual NAC" configuration which has multiple physical devices tied to the same clock source and which need to use the same ice_adapter. Unfortunately, each "NAC" has its own NVM which has its own unique Device Serial Number. Thus, use of the DSN for connecting ice_adapter does not work properly. It "worked" in the pre-production systems because the DSN was not initialized on the test NVMs and all the NACs had the same zero'd serial number. Since we cannot rely on the DSN, lets fall back to the logic in the original E825C support which used the device ID. This is safe for E825C only because of the embedded nature of the device. It isn't a discreet adapter that can be plugged into an arbitrary system. All E825C devices on a given system are connected to the same clock source and need to be configured through the same PTP clock. To make this separation clear, reserve bit 63 of the 64-bit index values as a "fixed index" indicator. Always clear this bit when using the device serial number as an index. For E825C, use a fixed value defined as the 0x579C E825C backplane device ID bitwise ORed with the fixed index indicator. This is slightly different than the original logic of just using the device ID directly. Doing so prevents a potential issue with systems where only one of the NACs is connected with an external PHY over SGMII. In that case, one NAC would have the E825C_SGMII device ID, but the other would not. Separate the determination of the full 64-bit index from the 32-bit reduction logic. Provide both ice_adapter_index() and a wrapping ice_adapter_xa_index() which handles reducing the index to a long on 32-bit systems. As before, cache the full index value in the adapter structure to warn about collisions. This fixes issues with E825C not initializing PTP on both NACs, due to failure to connect the appropriate devices to the same ice_adapter. Fixes: 0093cb194a75 ("ice: use DSN instead of PCI BDF for ice_adapter index") Signed-off-by: Jacob Keller Reviewed-by: Grzegorz Nitka Reviewed-by: Aleksandr Loktionov Reviewed-by: Przemek Kitszel Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_adapter.c | 49 +++++++++++++++----- drivers/net/ethernet/intel/ice/ice_adapter.h | 4 +- 2 files changed, 40 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c index 9e4adc43e474..b53561c34708 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.c +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c @@ -13,16 +13,45 @@ static DEFINE_XARRAY(ice_adapters); static DEFINE_MUTEX(ice_adapters_mutex); -static unsigned long ice_adapter_index(u64 dsn) +#define ICE_ADAPTER_FIXED_INDEX BIT_ULL(63) + +#define ICE_ADAPTER_INDEX_E825C \ + (ICE_DEV_ID_E825C_BACKPLANE | ICE_ADAPTER_FIXED_INDEX) + +static u64 ice_adapter_index(struct pci_dev *pdev) { + switch (pdev->device) { + case ICE_DEV_ID_E825C_BACKPLANE: + case ICE_DEV_ID_E825C_QSFP: + case ICE_DEV_ID_E825C_SFP: + case ICE_DEV_ID_E825C_SGMII: + /* E825C devices have multiple NACs which are connected to the + * same clock source, and which must share the same + * ice_adapter structure. We can't use the serial number since + * each NAC has its own NVM generated with its own unique + * Device Serial Number. Instead, rely on the embedded nature + * of the E825C devices, and use a fixed index. This relies on + * the fact that all E825C physical functions in a given + * system are part of the same overall device. + */ + return ICE_ADAPTER_INDEX_E825C; + default: + return pci_get_dsn(pdev) & ~ICE_ADAPTER_FIXED_INDEX; + } +} + +static unsigned long ice_adapter_xa_index(struct pci_dev *pdev) +{ + u64 index = ice_adapter_index(pdev); + #if BITS_PER_LONG == 64 - return dsn; + return index; #else - return (u32)dsn ^ (u32)(dsn >> 32); + return (u32)index ^ (u32)(index >> 32); #endif } -static struct ice_adapter *ice_adapter_new(u64 dsn) +static struct ice_adapter *ice_adapter_new(struct pci_dev *pdev) { struct ice_adapter *adapter; @@ -30,7 +59,7 @@ static struct ice_adapter *ice_adapter_new(u64 dsn) if (!adapter) return NULL; - adapter->device_serial_number = dsn; + adapter->index = ice_adapter_index(pdev); spin_lock_init(&adapter->ptp_gltsyn_time_lock); spin_lock_init(&adapter->txq_ctx_lock); refcount_set(&adapter->refcount, 1); @@ -64,24 +93,23 @@ static void ice_adapter_free(struct ice_adapter *adapter) */ struct ice_adapter *ice_adapter_get(struct pci_dev *pdev) { - u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; unsigned long index; int err; - index = ice_adapter_index(dsn); + index = ice_adapter_xa_index(pdev); scoped_guard(mutex, &ice_adapters_mutex) { err = xa_insert(&ice_adapters, index, NULL, GFP_KERNEL); if (err == -EBUSY) { adapter = xa_load(&ice_adapters, index); refcount_inc(&adapter->refcount); - WARN_ON_ONCE(adapter->device_serial_number != dsn); + WARN_ON_ONCE(adapter->index != ice_adapter_index(pdev)); return adapter; } if (err) return ERR_PTR(err); - adapter = ice_adapter_new(dsn); + adapter = ice_adapter_new(pdev); if (!adapter) return ERR_PTR(-ENOMEM); xa_store(&ice_adapters, index, adapter, GFP_KERNEL); @@ -100,11 +128,10 @@ struct ice_adapter *ice_adapter_get(struct pci_dev *pdev) */ void ice_adapter_put(struct pci_dev *pdev) { - u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; unsigned long index; - index = ice_adapter_index(dsn); + index = ice_adapter_xa_index(pdev); scoped_guard(mutex, &ice_adapters_mutex) { adapter = xa_load(&ice_adapters, index); if (WARN_ON(!adapter)) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h index db66d03c9f96..e95266c7f20b 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.h +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h @@ -33,7 +33,7 @@ struct ice_port_list { * @txq_ctx_lock: Spinlock protecting access to the GLCOMM_QTX_CNTX_CTL register * @ctrl_pf: Control PF of the adapter * @ports: Ports list - * @device_serial_number: DSN cached for collision detection on 32bit systems + * @index: 64-bit index cached for collision detection on 32bit systems */ struct ice_adapter { refcount_t refcount; @@ -44,7 +44,7 @@ struct ice_adapter { struct ice_pf *ctrl_pf; struct ice_port_list ports; - u64 device_serial_number; + u64 index; }; struct ice_adapter *ice_adapter_get(struct pci_dev *pdev); From b1a0c977c6f1130f7dd125ee3db8c2435d7e3d41 Mon Sep 17 00:00:00 2001 From: Michal Kubiak Date: Fri, 8 Aug 2025 17:53:10 +0200 Subject: [PATCH 2330/2411] ice: fix incorrect counter for buffer allocation failures Currently, the driver increments `alloc_page_failed` when buffer allocation fails in `ice_clean_rx_irq()`. However, this counter is intended for page allocation failures, not buffer allocation issues. This patch corrects the counter by incrementing `alloc_buf_failed` instead, ensuring accurate statistics reporting for buffer allocation failures. Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side") Reported-by: Jacob Keller Suggested-by: Paul Menzel Signed-off-by: Michal Kubiak Reviewed-by: Paul Menzel Reviewed-by: Jason Xing Reviewed-by: Aleksandr Loktionov Tested-by: Priya Singh Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 29e0088ab6b2..d2871757ec94 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1352,7 +1352,7 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) skb = ice_construct_skb(rx_ring, xdp); /* exit if we failed to retrieve a buffer */ if (!skb) { - rx_ring->ring_stats->rx_stats.alloc_page_failed++; + rx_ring->ring_stats->rx_stats.alloc_buf_failed++; xdp_verdict = ICE_XDP_CONSUMED; } ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); From ed913b343dcf9f623e7436fa1a153c89b22d109b Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Thu, 31 Jul 2025 14:45:33 +0200 Subject: [PATCH 2331/2411] ixgbe: fix ixgbe_orom_civd_info struct layout The current layout of struct ixgbe_orom_civd_info causes incorrect data storage due to compiler-inserted padding. This results in issues when writing OROM data into the structure. Add the __packed attribute to ensure the structure layout matches the expected binary format without padding. Fixes: 70db0788a262 ("ixgbe: read the OROM version information") Reviewed-by: Aleksandr Loktionov Signed-off-by: Jedrzej Jagielski Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c index d74116441d1c..bfeef5b0b99d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c @@ -3125,7 +3125,7 @@ static int ixgbe_get_orom_ver_info(struct ixgbe_hw *hw, if (err) return err; - combo_ver = le32_to_cpu(civd.combo_ver); + combo_ver = get_unaligned_le32(&civd.combo_ver); orom->major = (u8)FIELD_GET(IXGBE_OROM_VER_MASK, combo_ver); orom->patch = (u8)FIELD_GET(IXGBE_OROM_VER_PATCH_MASK, combo_ver); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h index d2f22d8558f8..ff8d640a50b1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h @@ -932,7 +932,7 @@ struct ixgbe_orom_civd_info { __le32 combo_ver; /* Combo Image Version number */ u8 combo_name_len; /* Length of the unicode combo image version string, max of 32 */ __le16 combo_name[32]; /* Unicode string representing the Combo Image version */ -}; +} __packed; /* Function specific capabilities */ struct ixgbe_hw_func_caps { From e228e7d382fa85005ee2ebf303e1bf194aca49a8 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 25 Aug 2025 09:22:09 +0000 Subject: [PATCH 2332/2411] drm/gpuvm: fix various typos in .c and .h gpuvm file After working with this code for a while, I came across several typos. This patch fixes them. Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20250825-gpuvm-typo-fix-v1-1-14e9e78e28e6@google.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/drm_gpuvm.c | 78 ++++++++++++++++++------------------- include/drm/drm_gpuvm.h | 10 ++--- 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index 74d949995a72..60b672d3fd83 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -40,7 +40,7 @@ * mapping's backing &drm_gem_object buffers. * * &drm_gem_object buffers maintain a list of &drm_gpuva objects representing - * all existent GPU VA mappings using this &drm_gem_object as backing buffer. + * all existing GPU VA mappings using this &drm_gem_object as backing buffer. * * GPU VAs can be flagged as sparse, such that drivers may use GPU VAs to also * keep track of sparse PTEs in order to support Vulkan 'Sparse Resources'. @@ -72,7 +72,7 @@ * but it can also be a 'dummy' object, which can be allocated with * drm_gpuvm_resv_object_alloc(). * - * In order to connect a struct drm_gpuva its backing &drm_gem_object each + * In order to connect a struct drm_gpuva to its backing &drm_gem_object each * &drm_gem_object maintains a list of &drm_gpuvm_bo structures, and each * &drm_gpuvm_bo contains a list of &drm_gpuva structures. * @@ -81,7 +81,7 @@ * This is ensured by the API through drm_gpuvm_bo_obtain() and * drm_gpuvm_bo_obtain_prealloc() which first look into the corresponding * &drm_gem_object list of &drm_gpuvm_bos for an existing instance of this - * particular combination. If not existent a new instance is created and linked + * particular combination. If not present, a new instance is created and linked * to the &drm_gem_object. * * &drm_gpuvm_bo structures, since unique for a given &drm_gpuvm, are also used @@ -108,7 +108,7 @@ * sequence of operations to satisfy a given map or unmap request. * * Therefore the DRM GPU VA manager provides an algorithm implementing splitting - * and merging of existent GPU VA mappings with the ones that are requested to + * and merging of existing GPU VA mappings with the ones that are requested to * be mapped or unmapped. This feature is required by the Vulkan API to * implement Vulkan 'Sparse Memory Bindings' - drivers UAPIs often refer to this * as VM BIND. @@ -119,7 +119,7 @@ * execute in order to integrate the new mapping cleanly into the current state * of the GPU VA space. * - * Depending on how the new GPU VA mapping intersects with the existent mappings + * Depending on how the new GPU VA mapping intersects with the existing mappings * of the GPU VA space the &drm_gpuvm_ops callbacks contain an arbitrary amount * of unmap operations, a maximum of two remap operations and a single map * operation. The caller might receive no callback at all if no operation is @@ -139,16 +139,16 @@ * one unmap operation and one or two map operations, such that drivers can * derive the page table update delta accordingly. * - * Note that there can't be more than two existent mappings to split up, one at + * Note that there can't be more than two existing mappings to split up, one at * the beginning and one at the end of the new mapping, hence there is a * maximum of two remap operations. * * Analogous to drm_gpuvm_sm_map() drm_gpuvm_sm_unmap() uses &drm_gpuvm_ops to * call back into the driver in order to unmap a range of GPU VA space. The - * logic behind this function is way simpler though: For all existent mappings + * logic behind this function is way simpler though: For all existing mappings * enclosed by the given range unmap operations are created. For mappings which - * are only partically located within the given range, remap operations are - * created such that those mappings are split up and re-mapped partically. + * are only partially located within the given range, remap operations are + * created such that those mappings are split up and re-mapped partially. * * As an alternative to drm_gpuvm_sm_map() and drm_gpuvm_sm_unmap(), * drm_gpuvm_sm_map_ops_create() and drm_gpuvm_sm_unmap_ops_create() can be used @@ -168,7 +168,7 @@ * provided helper functions drm_gpuva_map(), drm_gpuva_remap() and * drm_gpuva_unmap() instead. * - * The following diagram depicts the basic relationships of existent GPU VA + * The following diagram depicts the basic relationships of existing GPU VA * mappings, a newly requested mapping and the resulting mappings as implemented * by drm_gpuvm_sm_map() - it doesn't cover any arbitrary combinations of these. * @@ -218,7 +218,7 @@ * * * 4) Existent mapping is a left aligned subset of the requested one, hence - * replace the existent one. + * replace the existing one. * * :: * @@ -236,9 +236,9 @@ * and/or non-contiguous BO offset. * * - * 5) Requested mapping's range is a left aligned subset of the existent one, + * 5) Requested mapping's range is a left aligned subset of the existing one, * but backed by a different BO. Hence, map the requested mapping and split - * the existent one adjusting its BO offset. + * the existing one adjusting its BO offset. * * :: * @@ -271,9 +271,9 @@ * new: |-----|-----| (a.bo_offset=n, a'.bo_offset=n+1) * * - * 7) Requested mapping's range is a right aligned subset of the existent one, + * 7) Requested mapping's range is a right aligned subset of the existing one, * but backed by a different BO. Hence, map the requested mapping and split - * the existent one, without adjusting the BO offset. + * the existing one, without adjusting the BO offset. * * :: * @@ -304,7 +304,7 @@ * * 9) Existent mapping is overlapped at the end by the requested mapping backed * by a different BO. Hence, map the requested mapping and split up the - * existent one, without adjusting the BO offset. + * existing one, without adjusting the BO offset. * * :: * @@ -334,9 +334,9 @@ * new: |-----|-----------| (a'.bo_offset=n, a.bo_offset=n+1) * * - * 11) Requested mapping's range is a centered subset of the existent one + * 11) Requested mapping's range is a centered subset of the existing one * having a different backing BO. Hence, map the requested mapping and split - * up the existent one in two mappings, adjusting the BO offset of the right + * up the existing one in two mappings, adjusting the BO offset of the right * one accordingly. * * :: @@ -351,7 +351,7 @@ * new: |-----|-----|-----| (a.bo_offset=n,b.bo_offset=m,a'.bo_offset=n+2) * * - * 12) Requested mapping is a contiguous subset of the existent one. Split it + * 12) Requested mapping is a contiguous subset of the existing one. Split it * up, but indicate that the backing PTEs could be kept. * * :: @@ -367,7 +367,7 @@ * * * 13) Existent mapping is a right aligned subset of the requested one, hence - * replace the existent one. + * replace the existing one. * * :: * @@ -386,7 +386,7 @@ * * * 14) Existent mapping is a centered subset of the requested one, hence - * replace the existent one. + * replace the existing one. * * :: * @@ -406,7 +406,7 @@ * * 15) Existent mappings is overlapped at the beginning by the requested mapping * backed by a different BO. Hence, map the requested mapping and split up - * the existent one, adjusting its BO offset accordingly. + * the existing one, adjusting its BO offset accordingly. * * :: * @@ -469,8 +469,8 @@ * make use of them. * * The below code is strictly limited to illustrate the generic usage pattern. - * To maintain simplicitly, it doesn't make use of any abstractions for common - * code, different (asyncronous) stages with fence signalling critical paths, + * To maintain simplicity, it doesn't make use of any abstractions for common + * code, different (asynchronous) stages with fence signalling critical paths, * any other helpers or error handling in terms of freeing memory and dropping * previously taken locks. * @@ -479,7 +479,7 @@ * // Allocates a new &drm_gpuva. * struct drm_gpuva * driver_gpuva_alloc(void); * - * // Typically drivers would embedd the &drm_gpuvm and &drm_gpuva + * // Typically drivers would embed the &drm_gpuvm and &drm_gpuva * // structure in individual driver structures and lock the dma-resv with * // drm_exec or similar helpers. * int driver_mapping_create(struct drm_gpuvm *gpuvm, @@ -582,7 +582,7 @@ * .sm_step_unmap = driver_gpuva_unmap, * }; * - * // Typically drivers would embedd the &drm_gpuvm and &drm_gpuva + * // Typically drivers would embed the &drm_gpuvm and &drm_gpuva * // structure in individual driver structures and lock the dma-resv with * // drm_exec or similar helpers. * int driver_mapping_create(struct drm_gpuvm *gpuvm, @@ -680,7 +680,7 @@ * * This helper is here to provide lockless list iteration. Lockless as in, the * iterator releases the lock immediately after picking the first element from - * the list, so list insertion deletion can happen concurrently. + * the list, so list insertion and deletion can happen concurrently. * * Elements popped from the original list are kept in a local list, so removal * and is_empty checks can still happen while we're iterating the list. @@ -1160,7 +1160,7 @@ drm_gpuvm_prepare_objects_locked(struct drm_gpuvm *gpuvm, } /** - * drm_gpuvm_prepare_objects() - prepare all assoiciated BOs + * drm_gpuvm_prepare_objects() - prepare all associated BOs * @gpuvm: the &drm_gpuvm * @exec: the &drm_exec locking context * @num_fences: the amount of &dma_fences to reserve @@ -1230,13 +1230,13 @@ drm_gpuvm_prepare_range(struct drm_gpuvm *gpuvm, struct drm_exec *exec, EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_range); /** - * drm_gpuvm_exec_lock() - lock all dma-resv of all assoiciated BOs + * drm_gpuvm_exec_lock() - lock all dma-resv of all associated BOs * @vm_exec: the &drm_gpuvm_exec wrapper * * Acquires all dma-resv locks of all &drm_gem_objects the given * &drm_gpuvm contains mappings of. * - * Addionally, when calling this function with struct drm_gpuvm_exec::extra + * Additionally, when calling this function with struct drm_gpuvm_exec::extra * being set the driver receives the given @fn callback to lock additional * dma-resv in the context of the &drm_gpuvm_exec instance. Typically, drivers * would call drm_exec_prepare_obj() from within this callback. @@ -1293,7 +1293,7 @@ fn_lock_array(struct drm_gpuvm_exec *vm_exec) } /** - * drm_gpuvm_exec_lock_array() - lock all dma-resv of all assoiciated BOs + * drm_gpuvm_exec_lock_array() - lock all dma-resv of all associated BOs * @vm_exec: the &drm_gpuvm_exec wrapper * @objs: additional &drm_gem_objects to lock * @num_objs: the number of additional &drm_gem_objects to lock @@ -1588,7 +1588,7 @@ drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, EXPORT_SYMBOL_GPL(drm_gpuvm_bo_find); /** - * drm_gpuvm_bo_obtain() - obtains and instance of the &drm_gpuvm_bo for the + * drm_gpuvm_bo_obtain() - obtains an instance of the &drm_gpuvm_bo for the * given &drm_gpuvm and &drm_gem_object * @gpuvm: The &drm_gpuvm the @obj is mapped in. * @obj: The &drm_gem_object being mapped in the @gpuvm. @@ -1624,7 +1624,7 @@ drm_gpuvm_bo_obtain(struct drm_gpuvm *gpuvm, EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain); /** - * drm_gpuvm_bo_obtain_prealloc() - obtains and instance of the &drm_gpuvm_bo + * drm_gpuvm_bo_obtain_prealloc() - obtains an instance of the &drm_gpuvm_bo * for the given &drm_gpuvm and &drm_gem_object * @__vm_bo: A pre-allocated struct drm_gpuvm_bo. * @@ -1688,7 +1688,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_extobj_add); * @vm_bo: the &drm_gpuvm_bo to add or remove * @evict: indicates whether the object is evicted * - * Adds a &drm_gpuvm_bo to or removes it from the &drm_gpuvms evicted list. + * Adds a &drm_gpuvm_bo to or removes it from the &drm_gpuvm's evicted list. */ void drm_gpuvm_bo_evict(struct drm_gpuvm_bo *vm_bo, bool evict) @@ -1790,7 +1790,7 @@ __drm_gpuva_remove(struct drm_gpuva *va) * drm_gpuva_remove() - remove a &drm_gpuva * @va: the &drm_gpuva to remove * - * This removes the given &va from the underlaying tree. + * This removes the given &va from the underlying tree. * * It is safe to use this function using the safe versions of iterating the GPU * VA space, such as drm_gpuvm_for_each_va_safe() and @@ -2358,7 +2358,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_sm_map); * * This function iterates the given range of the GPU VA space. It utilizes the * &drm_gpuvm_ops to call back into the driver providing the operations to - * unmap and, if required, split existent mappings. + * unmap and, if required, split existing mappings. * * Drivers may use these callbacks to update the GPU VA space right away within * the callback. In case the driver decides to copy and store the operations for @@ -2475,7 +2475,7 @@ static const struct drm_gpuvm_ops lock_ops = { * required without the earlier DRIVER_OP_MAP. This is safe because we've * already locked the GEM object in the earlier DRIVER_OP_MAP step. * - * Returns: 0 on success or a negative error codec + * Returns: 0 on success or a negative error code */ int drm_gpuvm_sm_map_exec_lock(struct drm_gpuvm *gpuvm, @@ -2619,12 +2619,12 @@ static const struct drm_gpuvm_ops gpuvm_list_ops = { * @req_offset: the offset within the &drm_gem_object * * This function creates a list of operations to perform splitting and merging - * of existent mapping(s) with the newly requested one. + * of existing mapping(s) with the newly requested one. * * The list can be iterated with &drm_gpuva_for_each_op and must be processed * in the given order. It can contain map, unmap and remap operations, but it * also can be empty if no operation is required, e.g. if the requested mapping - * already exists is the exact same way. + * already exists in the exact same way. * * There can be an arbitrary amount of unmap operations, a maximum of two remap * operations and a single map operation. The latter one represents the original diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 274532facfd6..2e7088264355 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -103,7 +103,7 @@ struct drm_gpuva { } va; /** - * @gem: structure containing the &drm_gem_object and it's offset + * @gem: structure containing the &drm_gem_object and its offset */ struct { /** @@ -843,7 +843,7 @@ struct drm_gpuva_op_map { } va; /** - * @gem: structure containing the &drm_gem_object and it's offset + * @gem: structure containing the &drm_gem_object and its offset */ struct { /** @@ -1189,11 +1189,11 @@ struct drm_gpuvm_ops { /** * @sm_step_unmap: called from &drm_gpuvm_sm_map and - * &drm_gpuvm_sm_unmap to unmap an existent mapping + * &drm_gpuvm_sm_unmap to unmap an existing mapping * - * This callback is called when existent mapping needs to be unmapped. + * This callback is called when existing mapping needs to be unmapped. * This is the case when either a newly requested mapping encloses an - * existent mapping or an unmap of an existent mapping is requested. + * existing mapping or an unmap of an existing mapping is requested. * * The &priv pointer matches the one the driver passed to * &drm_gpuvm_sm_map or &drm_gpuvm_sm_unmap, respectively. From 7ab3b7579a6d2660a3425b9ea93b9a140b07f49c Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 9 Aug 2025 11:36:54 +0300 Subject: [PATCH 2333/2411] dt-bindings: display/msm: qcom,mdp5: drop lut clock None of MDP5 platforms have a LUT clock on the display-controller, it was added by the mistake. Drop it, fixing DT warnings on MSM8976 / MSM8956 platforms. Technically it's an ABI break, but no other platforms are affected. Fixes: 385c8ac763b3 ("dt-bindings: display/msm: convert MDP5 schema to YAML format") Signed-off-by: Dmitry Baryshkov Acked-by: Rob Herring (Arm) Patchwork: https://patchwork.freedesktop.org/patch/667822/ Signed-off-by: Rob Clark --- Documentation/devicetree/bindings/display/msm/qcom,mdp5.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/msm/qcom,mdp5.yaml b/Documentation/devicetree/bindings/display/msm/qcom,mdp5.yaml index e153f8d26e7a..2735c78b0b67 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,mdp5.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,mdp5.yaml @@ -60,7 +60,6 @@ properties: - const: bus - const: core - const: vsync - - const: lut - const: tbu - const: tbu_rt # MSM8996 has additional iommu clock From abebfed208515726760d79cf4f9f1a76b9a10a84 Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Tue, 22 Jul 2025 16:17:40 -0500 Subject: [PATCH 2334/2411] drm/msm/dpu: Add a null ptr check for dpu_encoder_needs_modeset The drm_atomic_get_new_connector_state() can return NULL if the connector is not part of the atomic state. Add a check to prevent a NULL pointer dereference. This follows the same pattern used in dpu_encoder_update_topology() within the same file, which checks for NULL before using conn_state. Signed-off-by: Chenyuan Yang Fixes: 1ce69c265a53 ("drm/msm/dpu: move resource allocation to CRTC") Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/665188/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 05e5f3463e30..258edaa18fc0 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -730,6 +730,8 @@ bool dpu_encoder_needs_modeset(struct drm_encoder *drm_enc, struct drm_atomic_st return false; conn_state = drm_atomic_get_new_connector_state(state, connector); + if (!conn_state) + return false; /** * These checks are duplicated from dpu_encoder_update_topology() since From 5cfd298cc0359697f26b2b6e25385c665e431a7e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 14 Aug 2025 10:22:20 +0300 Subject: [PATCH 2335/2411] soc: qcom: ubwc: use no-uwbc config for MSM8917 MSM8917 has MDSS 1.15 and Adreno 308, neither of which support UBWC. Change UBWC configuration to point out that UBWC is not supported on this platform. Fixes: 1924272b9ce1 ("soc: qcom: Add UBWC config provider") Signed-off-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/668500/ Signed-off-by: Rob Clark --- drivers/soc/qcom/ubwc_config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/qcom/ubwc_config.c b/drivers/soc/qcom/ubwc_config.c index 1490a7f63767..5113c2902bf2 100644 --- a/drivers/soc/qcom/ubwc_config.c +++ b/drivers/soc/qcom/ubwc_config.c @@ -225,7 +225,7 @@ static const struct of_device_id qcom_ubwc_configs[] __maybe_unused = { { .compatible = "qcom,apq8096", .data = &msm8998_data }, { .compatible = "qcom,msm8226", .data = &no_ubwc_data }, { .compatible = "qcom,msm8916", .data = &no_ubwc_data }, - { .compatible = "qcom,msm8917", .data = &msm8937_data }, + { .compatible = "qcom,msm8917", .data = &no_ubwc_data }, { .compatible = "qcom,msm8937", .data = &msm8937_data }, { .compatible = "qcom,msm8939", .data = &no_ubwc_data }, { .compatible = "qcom,msm8953", .data = &msm8937_data }, From 61f3c19af5ce6606a8f50ba9a0661881925d28c2 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 14 Aug 2025 10:22:21 +0300 Subject: [PATCH 2336/2411] soc: qcom: ubwc: add more missing platforms Add UBWC configuration for SDA660 (modem-less variant of SDM660), SDM450 (similar to MSM8953), SDM632 (similar to MSM8953) and SM7325 (similar to SC7280). Fixes: 1924272b9ce1 ("soc: qcom: Add UBWC config provider") Signed-off-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/668501/ Signed-off-by: Rob Clark --- drivers/soc/qcom/ubwc_config.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/soc/qcom/ubwc_config.c b/drivers/soc/qcom/ubwc_config.c index 5113c2902bf2..8b23b4d4e398 100644 --- a/drivers/soc/qcom/ubwc_config.c +++ b/drivers/soc/qcom/ubwc_config.c @@ -244,7 +244,10 @@ static const struct of_device_id qcom_ubwc_configs[] __maybe_unused = { { .compatible = "qcom,sc7280", .data = &sc7280_data, }, { .compatible = "qcom,sc8180x", .data = &sc8180x_data, }, { .compatible = "qcom,sc8280xp", .data = &sc8280xp_data, }, + { .compatible = "qcom,sda660", .data = &msm8937_data }, + { .compatible = "qcom,sdm450", .data = &msm8937_data }, { .compatible = "qcom,sdm630", .data = &msm8937_data }, + { .compatible = "qcom,sdm632", .data = &msm8937_data }, { .compatible = "qcom,sdm636", .data = &msm8937_data }, { .compatible = "qcom,sdm660", .data = &msm8937_data }, { .compatible = "qcom,sdm670", .data = &sdm670_data, }, @@ -258,6 +261,7 @@ static const struct of_device_id qcom_ubwc_configs[] __maybe_unused = { { .compatible = "qcom,sm7125", .data = &sc7180_data }, { .compatible = "qcom,sm7150", .data = &sm7150_data, }, { .compatible = "qcom,sm7225", .data = &sm6350_data, }, + { .compatible = "qcom,sm7325", .data = &sc7280_data, }, { .compatible = "qcom,sm8150", .data = &sm8150_data, }, { .compatible = "qcom,sm8250", .data = &sm8250_data, }, { .compatible = "qcom,sm8350", .data = &sm8350_data, }, From ec770bb2e19196b28868698a81321d3a3c74da9d Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 14 Aug 2025 10:22:22 +0300 Subject: [PATCH 2337/2411] soc: qcom: add configuration for MSM8929 MSM8929 is similar to MSM8939, it doesn't support UBWC. Provide no-UBWC config for the platform. Fixes: 197713d0cf01 ("soc: qcom: ubwc: provide no-UBWC configuration") Signed-off-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/668502/ Signed-off-by: Rob Clark --- drivers/soc/qcom/ubwc_config.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/qcom/ubwc_config.c b/drivers/soc/qcom/ubwc_config.c index 8b23b4d4e398..689e333ae443 100644 --- a/drivers/soc/qcom/ubwc_config.c +++ b/drivers/soc/qcom/ubwc_config.c @@ -227,6 +227,7 @@ static const struct of_device_id qcom_ubwc_configs[] __maybe_unused = { { .compatible = "qcom,msm8916", .data = &no_ubwc_data }, { .compatible = "qcom,msm8917", .data = &no_ubwc_data }, { .compatible = "qcom,msm8937", .data = &msm8937_data }, + { .compatible = "qcom,msm8929", .data = &no_ubwc_data }, { .compatible = "qcom,msm8939", .data = &no_ubwc_data }, { .compatible = "qcom,msm8953", .data = &msm8937_data }, { .compatible = "qcom,msm8956", .data = &msm8937_data }, From 3cf6147f2b51a569761e1ef010efbd891e3a3a15 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 14 Aug 2025 10:22:23 +0300 Subject: [PATCH 2338/2411] soc: qcom: use no-UBWC config for MSM8956/76 Both MSM8956 and MSM8976 have MDSS 1.11 which doesn't support UBWC (although they also have Adreno 510, which might support UBWC). Disable UBWC support for those platforms. Fixes: 1924272b9ce1 ("soc: qcom: Add UBWC config provider") Signed-off-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/668503/ Signed-off-by: Rob Clark --- drivers/soc/qcom/ubwc_config.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soc/qcom/ubwc_config.c b/drivers/soc/qcom/ubwc_config.c index 689e333ae443..15d373bff231 100644 --- a/drivers/soc/qcom/ubwc_config.c +++ b/drivers/soc/qcom/ubwc_config.c @@ -230,9 +230,9 @@ static const struct of_device_id qcom_ubwc_configs[] __maybe_unused = { { .compatible = "qcom,msm8929", .data = &no_ubwc_data }, { .compatible = "qcom,msm8939", .data = &no_ubwc_data }, { .compatible = "qcom,msm8953", .data = &msm8937_data }, - { .compatible = "qcom,msm8956", .data = &msm8937_data }, + { .compatible = "qcom,msm8956", .data = &no_ubwc_data }, { .compatible = "qcom,msm8974", .data = &no_ubwc_data }, - { .compatible = "qcom,msm8976", .data = &msm8937_data }, + { .compatible = "qcom,msm8976", .data = &no_ubwc_data }, { .compatible = "qcom,msm8996", .data = &msm8998_data }, { .compatible = "qcom,msm8998", .data = &msm8998_data }, { .compatible = "qcom,qcm2290", .data = &qcm2290_data, }, From ba0b7081f7a521d7c28b527a4f18666a148471e7 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 22 Aug 2025 17:00:23 -0700 Subject: [PATCH 2339/2411] perf symbol-minimal: Fix ehdr reading in filename__read_build_id The e_ident is part of the ehdr and so reading it a second time would mean the read ehdr was displaced by 16-bytes. Switch from stdio to open/read/lseek syscalls for similarity with the symbol-elf version of the function and so that later changes can alter then open flags. Fixes: fef8f648bb47 ("perf symbol: Fix use-after-free in filename__read_build_id") Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250823000024.724394-2-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/symbol-minimal.c | 55 ++++++++++++++++---------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 7201494c5c20..8d41bd7842df 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -4,7 +4,6 @@ #include #include -#include #include #include #include @@ -88,11 +87,8 @@ int filename__read_debuglink(const char *filename __maybe_unused, */ int filename__read_build_id(const char *filename, struct build_id *bid) { - FILE *fp; - int ret = -1; + int fd, ret = -1; bool need_swap = false, elf32; - u8 e_ident[EI_NIDENT]; - int i; union { struct { Elf32_Ehdr ehdr32; @@ -103,28 +99,27 @@ int filename__read_build_id(const char *filename, struct build_id *bid) Elf64_Phdr *phdr64; }; } hdrs; - void *phdr; - size_t phdr_size; - void *buf = NULL; - size_t buf_size = 0; + void *phdr, *buf = NULL; + ssize_t phdr_size, ehdr_size, buf_size = 0; - fp = fopen(filename, "r"); - if (fp == NULL) + fd = open(filename, O_RDONLY); + if (fd < 0) return -1; - if (fread(e_ident, sizeof(e_ident), 1, fp) != 1) + if (read(fd, hdrs.ehdr32.e_ident, EI_NIDENT) != EI_NIDENT) goto out; - if (memcmp(e_ident, ELFMAG, SELFMAG) || - e_ident[EI_VERSION] != EV_CURRENT) + if (memcmp(hdrs.ehdr32.e_ident, ELFMAG, SELFMAG) || + hdrs.ehdr32.e_ident[EI_VERSION] != EV_CURRENT) goto out; - need_swap = check_need_swap(e_ident[EI_DATA]); - elf32 = e_ident[EI_CLASS] == ELFCLASS32; + need_swap = check_need_swap(hdrs.ehdr32.e_ident[EI_DATA]); + elf32 = hdrs.ehdr32.e_ident[EI_CLASS] == ELFCLASS32; + ehdr_size = (elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64)) - EI_NIDENT; - if (fread(elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64, - elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64), - 1, fp) != 1) + if (read(fd, + (elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64) + EI_NIDENT, + ehdr_size) != ehdr_size) goto out; if (need_swap) { @@ -138,14 +133,18 @@ int filename__read_build_id(const char *filename, struct build_id *bid) hdrs.ehdr64.e_phnum = bswap_16(hdrs.ehdr64.e_phnum); } } - phdr_size = elf32 ? hdrs.ehdr32.e_phentsize * hdrs.ehdr32.e_phnum - : hdrs.ehdr64.e_phentsize * hdrs.ehdr64.e_phnum; + if ((elf32 && hdrs.ehdr32.e_phentsize != sizeof(Elf32_Phdr)) || + (!elf32 && hdrs.ehdr64.e_phentsize != sizeof(Elf64_Phdr))) + goto out; + + phdr_size = elf32 ? sizeof(Elf32_Phdr) * hdrs.ehdr32.e_phnum + : sizeof(Elf64_Phdr) * hdrs.ehdr64.e_phnum; phdr = malloc(phdr_size); if (phdr == NULL) goto out; - fseek(fp, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET); - if (fread(phdr, phdr_size, 1, fp) != 1) + lseek(fd, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET); + if (read(fd, phdr, phdr_size) != phdr_size) goto out_free; if (elf32) @@ -153,8 +152,8 @@ int filename__read_build_id(const char *filename, struct build_id *bid) else hdrs.phdr64 = phdr; - for (i = 0; i < elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum; i++) { - size_t p_filesz; + for (int i = 0; i < (elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum); i++) { + ssize_t p_filesz; if (need_swap) { if (elf32) { @@ -180,8 +179,8 @@ int filename__read_build_id(const char *filename, struct build_id *bid) goto out_free; buf = tmp; } - fseek(fp, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET); - if (fread(buf, p_filesz, 1, fp) != 1) + lseek(fd, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET); + if (read(fd, buf, p_filesz) != p_filesz) goto out_free; ret = read_build_id(buf, p_filesz, bid, need_swap); @@ -194,7 +193,7 @@ int filename__read_build_id(const char *filename, struct build_id *bid) free(buf); free(phdr); out: - fclose(fp); + close(fd); return ret; } From 2c369d91d0933aaff96b6b807b22363e6a38a625 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 22 Aug 2025 17:00:24 -0700 Subject: [PATCH 2340/2411] perf symbol: Add blocking argument to filename__read_build_id When synthesizing build-ids, for build ID mmap2 events, they will be added for data mmaps if -d/--data is specified. The files opened for their build IDs may block on the open causing perf to hang during synthesis. There is some robustness in existing calls to filename__read_build_id by checking the file path is to a regular file, which unfortunately fails for symlinks. Rather than adding more is_regular_file calls, switch filename__read_build_id to take a "block" argument and specify O_NONBLOCK when this is false. The existing is_regular_file checking callers and the event synthesis callers are made to pass false and thereby avoiding the hang. Fixes: 53b00ff358dc ("perf record: Make --buildid-mmap the default") Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250823000024.724394-3-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/bench/inject-buildid.c | 2 +- tools/perf/builtin-buildid-cache.c | 8 ++++---- tools/perf/builtin-inject.c | 4 ++-- tools/perf/tests/sdt.c | 2 +- tools/perf/util/build-id.c | 4 ++-- tools/perf/util/debuginfo.c | 8 ++++++-- tools/perf/util/dsos.c | 4 ++-- tools/perf/util/symbol-elf.c | 9 +++++---- tools/perf/util/symbol-minimal.c | 6 +++--- tools/perf/util/symbol.c | 8 ++++---- tools/perf/util/symbol.h | 2 +- tools/perf/util/synthetic-events.c | 2 +- 12 files changed, 32 insertions(+), 27 deletions(-) diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index aad572a78d7f..12387ea88b9a 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -85,7 +85,7 @@ static int add_dso(const char *fpath, const struct stat *sb __maybe_unused, if (typeflag == FTW_D || typeflag == FTW_SL) return 0; - if (filename__read_build_id(fpath, &bid) < 0) + if (filename__read_build_id(fpath, &bid, /*block=*/true) < 0) return 0; dso->name = realpath(fpath, NULL); diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index c98104481c8a..2e0f2004696a 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -180,7 +180,7 @@ static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi) struct nscookie nsc; nsinfo__mountns_enter(nsi, &nsc); - err = filename__read_build_id(filename, &bid); + err = filename__read_build_id(filename, &bid, /*block=*/true); nsinfo__mountns_exit(&nsc); if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); @@ -204,7 +204,7 @@ static int build_id_cache__remove_file(const char *filename, struct nsinfo *nsi) int err; nsinfo__mountns_enter(nsi, &nsc); - err = filename__read_build_id(filename, &bid); + err = filename__read_build_id(filename, &bid, /*block=*/true); nsinfo__mountns_exit(&nsc); if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); @@ -280,7 +280,7 @@ static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) if (!dso__build_id_filename(dso, filename, sizeof(filename), false)) return true; - if (filename__read_build_id(filename, &bid) == -1) { + if (filename__read_build_id(filename, &bid, /*block=*/true) == -1) { if (errno == ENOENT) return false; @@ -309,7 +309,7 @@ static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi) int err; nsinfo__mountns_enter(nsi, &nsc); - err = filename__read_build_id(filename, &bid); + err = filename__read_build_id(filename, &bid, /*block=*/true); nsinfo__mountns_exit(&nsc); if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 40ba6a94f719..a114b3fa1bea 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -680,12 +680,12 @@ static int dso__read_build_id(struct dso *dso) mutex_lock(dso__lock(dso)); nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); - if (filename__read_build_id(dso__long_name(dso), &bid) > 0) + if (filename__read_build_id(dso__long_name(dso), &bid, /*block=*/true) > 0) dso__set_build_id(dso, &bid); else if (dso__nsinfo(dso)) { char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso)); - if (new_name && filename__read_build_id(new_name, &bid) > 0) + if (new_name && filename__read_build_id(new_name, &bid, /*block=*/true) > 0) dso__set_build_id(dso, &bid); free(new_name); } diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index 93baee2eae42..6132f1af3e22 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -31,7 +31,7 @@ static int build_id_cache__add_file(const char *filename) struct build_id bid = { .size = 0, }; int err; - err = filename__read_build_id(filename, &bid); + err = filename__read_build_id(filename, &bid, /*block=*/true); if (err < 0) { pr_debug("Failed to read build id of %s\n", filename); return err; diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index a7018a3b0437..bf7f3268b9a2 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -115,7 +115,7 @@ int filename__snprintf_build_id(const char *pathname, char *sbuild_id, size_t sb struct build_id bid = { .size = 0, }; int ret; - ret = filename__read_build_id(pathname, &bid); + ret = filename__read_build_id(pathname, &bid, /*block=*/true); if (ret < 0) return ret; @@ -841,7 +841,7 @@ static int filename__read_build_id_ns(const char *filename, int ret; nsinfo__mountns_enter(nsi, &nsc); - ret = filename__read_build_id(filename, bid); + ret = filename__read_build_id(filename, bid, /*block=*/true); nsinfo__mountns_exit(&nsc); return ret; diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c index a44c70f93156..bb9ebd84ec2d 100644 --- a/tools/perf/util/debuginfo.c +++ b/tools/perf/util/debuginfo.c @@ -110,8 +110,12 @@ struct debuginfo *debuginfo__new(const char *path) if (!dso) goto out; - /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */ - if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0) + /* + * Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO. Don't block + * incase the path isn't for a regular file. + */ + assert(!dso__has_build_id(dso)); + if (filename__read_build_id(path, &bid, /*block=*/false) > 0) dso__set_build_id(dso, &bid); for (type = distro_dwarf_types; diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 0a7645c7fae7..64c1d65b0149 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -81,13 +81,13 @@ static int dsos__read_build_ids_cb(struct dso *dso, void *data) return 0; } nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); - if (filename__read_build_id(dso__long_name(dso), &bid) > 0) { + if (filename__read_build_id(dso__long_name(dso), &bid, /*block=*/true) > 0) { dso__set_build_id(dso, &bid); args->have_build_id = true; } else if (errno == ENOENT && dso__nsinfo(dso)) { char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso)); - if (new_name && filename__read_build_id(new_name, &bid) > 0) { + if (new_name && filename__read_build_id(new_name, &bid, /*block=*/true) > 0) { dso__set_build_id(dso, &bid); args->have_build_id = true; } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 6d2c280a1730..033c79231a54 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -902,7 +902,7 @@ static int read_build_id(const char *filename, struct build_id *bid) #else // HAVE_LIBBFD_BUILDID_SUPPORT -static int read_build_id(const char *filename, struct build_id *bid) +static int read_build_id(const char *filename, struct build_id *bid, bool block) { size_t size = sizeof(bid->data); int fd, err = -1; @@ -911,7 +911,7 @@ static int read_build_id(const char *filename, struct build_id *bid) if (size < BUILD_ID_SIZE) goto out; - fd = open(filename, O_RDONLY); + fd = open(filename, block ? O_RDONLY : (O_RDONLY | O_NONBLOCK)); if (fd < 0) goto out; @@ -934,7 +934,7 @@ static int read_build_id(const char *filename, struct build_id *bid) #endif // HAVE_LIBBFD_BUILDID_SUPPORT -int filename__read_build_id(const char *filename, struct build_id *bid) +int filename__read_build_id(const char *filename, struct build_id *bid, bool block) { struct kmod_path m = { .name = NULL, }; char path[PATH_MAX]; @@ -958,9 +958,10 @@ int filename__read_build_id(const char *filename, struct build_id *bid) } close(fd); filename = path; + block = true; } - err = read_build_id(filename, bid); + err = read_build_id(filename, bid, block); if (m.comp) unlink(filename); diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 8d41bd7842df..41e4ebe5eac5 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -85,7 +85,7 @@ int filename__read_debuglink(const char *filename __maybe_unused, /* * Just try PT_NOTE header otherwise fails */ -int filename__read_build_id(const char *filename, struct build_id *bid) +int filename__read_build_id(const char *filename, struct build_id *bid, bool block) { int fd, ret = -1; bool need_swap = false, elf32; @@ -102,7 +102,7 @@ int filename__read_build_id(const char *filename, struct build_id *bid) void *phdr, *buf = NULL; ssize_t phdr_size, ehdr_size, buf_size = 0; - fd = open(filename, O_RDONLY); + fd = open(filename, block ? O_RDONLY : (O_RDONLY | O_NONBLOCK)); if (fd < 0) return -1; @@ -323,7 +323,7 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused, if (ret >= 0) RC_CHK_ACCESS(dso)->is_64_bit = ret; - if (filename__read_build_id(ss->name, &bid) > 0) + if (filename__read_build_id(ss->name, &bid, /*block=*/true) > 0) dso__set_build_id(dso, &bid); return 0; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index e816e4220d33..3fed54de5401 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1869,14 +1869,14 @@ int dso__load(struct dso *dso, struct map *map) /* * Read the build id if possible. This is required for - * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work + * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work. Don't block in case path + * isn't for a regular file. */ - if (!dso__has_build_id(dso) && - is_regular_file(dso__long_name(dso))) { + if (!dso__has_build_id(dso)) { struct build_id bid = { .size = 0, }; __symbol__join_symfs(name, PATH_MAX, dso__long_name(dso)); - if (filename__read_build_id(name, &bid) > 0) + if (filename__read_build_id(name, &bid, /*block=*/false) > 0) dso__set_build_id(dso, &bid); } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 3fb5d146d9b1..347106218799 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -140,7 +140,7 @@ struct symbol *dso__next_symbol(struct symbol *sym); enum dso_type dso__type_fd(int fd); -int filename__read_build_id(const char *filename, struct build_id *id); +int filename__read_build_id(const char *filename, struct build_id *id, bool block); int sysfs__read_build_id(const char *filename, struct build_id *bid); int modules__parse(const char *filename, void *arg, int (*process_module)(void *arg, const char *name, diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index cb2c1ace304a..fcd1fd13c30e 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -401,7 +401,7 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event, nsi = nsinfo__new(event->pid); nsinfo__mountns_enter(nsi, &nc); - rc = filename__read_build_id(event->filename, &bid) > 0 ? 0 : -1; + rc = filename__read_build_id(event->filename, &bid, /*block=*/false) > 0 ? 0 : -1; nsinfo__mountns_exit(&nc); nsinfo__put(nsi); From 70c1595c181c48a022756116a6c46d5e8bad2c6f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 21 Aug 2025 16:23:14 +0900 Subject: [PATCH 2341/2411] ata: ahci: Allow ignoring the external/hotplug capability of ports Commit 4edf1505b76d ("ata: ahci: Disallow LPM policy control for external ports") introduced disabling link power management (LPM) for ports that are advertized as external/hotplug capable. This is necessary to force the maximum power policy (ATA_LPM_MAX_POWER) onto the port link to ensure that the hotplug capability of the port is functional. However, doing so blindly for all ports can prevent systems from going into a low power state, even if the external/hotplug ports on the system are unused. E.g., a laptop may see the internal SATA slot of a docking station as an external hotplug capable port, and in such case, the user may prefer to not use the port and to favor instead enabling LPM to allow the laptop to transition to low power states. Since there is no easy method to automatically detect such choice, introduce the new mask_port_ext module parameter to allow a user to ignore the external/hotplug capability of a port. The format for this parameter value is identical to the format used for the mask_port_map parameter: a mask can be defined for all AHCI adapters of a system or for a particular adapters identified with their PCI IDs (bus:dev.func format). The function ahci_get_port_map_mask() is renamed to ahci_get_port_mask() and modified to return a mask, either for the port map mask of an adapter (to ignore ports) or for the external/hotplug capability of an adapter. Differentiation between map_port_mask and map_port_ext_mask is done by passing the parameter string to ahci_get_port_mask() as a second argument. To be consistent with this change, the function ahci_apply_port_map_mask() is renamed ahci_port_mask() and changed to return a mask value. The mask for the external/hotplug capability for an adapter, if defined by the map_port_ext_mask parameter, is stored in the new field mask_port_ext of struct ahci_host_priv. ahci_mark_external_port() is modified to not set the ATA_PFLAG_EXTERNAL flag for a port if hpriv->mask_port_ext includes the number of the port. In such case, an information message is printed to notify that the external/hotplug capability is being ignored. Reported-by: Dieter Mummenschanz Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220465 Fixes: 4edf1505b76d ("ata: ahci: Disallow LPM policy control for external ports") Signed-off-by: Damien Le Moal Tested-by: Dieter Mummenschanz --- drivers/ata/ahci.c | 57 ++++++++++++++++++++++++++++++++-------------- drivers/ata/ahci.h | 1 + 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index e1c24bbacf64..7a7f88b3fa2b 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -689,40 +689,50 @@ MODULE_PARM_DESC(mask_port_map, "where is the PCI ID of an AHCI controller in the " "form \"domain:bus:dev.func\""); -static void ahci_apply_port_map_mask(struct device *dev, - struct ahci_host_priv *hpriv, char *mask_s) +static char *ahci_mask_port_ext; +module_param_named(mask_port_ext, ahci_mask_port_ext, charp, 0444); +MODULE_PARM_DESC(mask_port_ext, + "32-bits mask to ignore the external/hotplug capability of ports. " + "Valid values are: " + "\"\" to apply the same mask to all AHCI controller " + "devices, and \"=,=,...\" to " + "specify different masks for the controllers specified, " + "where is the PCI ID of an AHCI controller in the " + "form \"domain:bus:dev.func\""); + +static u32 ahci_port_mask(struct device *dev, char *mask_s) { unsigned int mask; if (kstrtouint(mask_s, 0, &mask)) { dev_err(dev, "Invalid port map mask\n"); - return; + return 0; } - hpriv->mask_port_map = mask; + return mask; } -static void ahci_get_port_map_mask(struct device *dev, - struct ahci_host_priv *hpriv) +static u32 ahci_get_port_mask(struct device *dev, char *mask_p) { char *param, *end, *str, *mask_s; char *name; + u32 mask = 0; - if (!strlen(ahci_mask_port_map)) - return; + if (!mask_p || !strlen(mask_p)) + return 0; - str = kstrdup(ahci_mask_port_map, GFP_KERNEL); + str = kstrdup(mask_p, GFP_KERNEL); if (!str) - return; + return 0; /* Handle single mask case */ if (!strchr(str, '=')) { - ahci_apply_port_map_mask(dev, hpriv, str); + mask = ahci_port_mask(dev, str); goto free; } /* - * Mask list case: parse the parameter to apply the mask only if + * Mask list case: parse the parameter to get the mask only if * the device name matches. */ param = str; @@ -752,11 +762,13 @@ static void ahci_get_port_map_mask(struct device *dev, param++; } - ahci_apply_port_map_mask(dev, hpriv, mask_s); + mask = ahci_port_mask(dev, mask_s); } free: kfree(str); + + return mask; } static void ahci_pci_save_initial_config(struct pci_dev *pdev, @@ -782,8 +794,10 @@ static void ahci_pci_save_initial_config(struct pci_dev *pdev, } /* Handle port map masks passed as module parameter. */ - if (ahci_mask_port_map) - ahci_get_port_map_mask(&pdev->dev, hpriv); + hpriv->mask_port_map = + ahci_get_port_mask(&pdev->dev, ahci_mask_port_map); + hpriv->mask_port_ext = + ahci_get_port_mask(&pdev->dev, ahci_mask_port_ext); ahci_save_initial_config(&pdev->dev, hpriv); } @@ -1757,11 +1771,20 @@ static void ahci_mark_external_port(struct ata_port *ap) void __iomem *port_mmio = ahci_port_base(ap); u32 tmp; - /* mark external ports (hotplug-capable, eSATA) */ + /* + * Mark external ports (hotplug-capable, eSATA), unless we were asked to + * ignore this feature. + */ tmp = readl(port_mmio + PORT_CMD); if (((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS)) || - (tmp & PORT_CMD_HPCP)) + (tmp & PORT_CMD_HPCP)) { + if (hpriv->mask_port_ext & (1U << ap->port_no)) { + ata_port_info(ap, + "Ignoring external/hotplug capability\n"); + return; + } ap->pflags |= ATA_PFLAG_EXTERNAL; + } } static void ahci_update_initial_lpm_policy(struct ata_port *ap) diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 2c10c8f440d1..293b7fb216b5 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -330,6 +330,7 @@ struct ahci_host_priv { /* Input fields */ unsigned int flags; /* AHCI_HFLAG_* */ u32 mask_port_map; /* Mask of valid ports */ + u32 mask_port_ext; /* Mask of ports ext capability */ void __iomem * mmio; /* bus-independent mem map */ u32 cap; /* cap to use */ From d280233fc86692f495d5e08092e5422bc2f583a8 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 22 Aug 2025 16:28:05 +0530 Subject: [PATCH 2342/2411] Octeontx2-af: Fix NIX X2P calibration failures Before configuring the NIX block, the AF driver initiates the "NIX block X2P bus calibration" and verifies that NIX interfaces such as CGX and LBK are active and functioning correctly. On few silicon variants(CNF10KA and CNF10KB), X2P calibration failures have been observed on some CGX blocks that are not mapped to the NIX block. Since both NIX-mapped and non-NIX-mapped CGX blocks share the same VENDOR,DEVICE,SUBSYS_DEVID, it's not possible to skip probe based on these parameters. This patch introuduces "is_cgx_mapped_to_nix" API to detect and skip probe of non NIX mapped CGX blocks. Fixes: aba53d5dbcea ("octeontx2-af: NIX block admin queue init") Signed-off-by: Hariprasad Kelam Link: https://patch.msgid.link/20250822105805.2236528-1-hkelam@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 7 +++++++ drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 14 ++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 4ff19a04b23e..0c46ba8a5adc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1978,6 +1978,13 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_release_regions; } + if (!is_cn20k(pdev) && + !is_cgx_mapped_to_nix(pdev->subsystem_device, cgx->cgx_id)) { + dev_notice(dev, "CGX %d not mapped to NIX, skipping probe\n", + cgx->cgx_id); + goto err_release_regions; + } + cgx->lmac_count = cgx->mac_ops->get_nr_lmacs(cgx); if (!cgx->lmac_count) { dev_notice(dev, "CGX %d LMAC count is zero, skipping probe\n", cgx->cgx_id); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 7ee1fdeb5295..18c7bb39dbc7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -783,6 +783,20 @@ static inline bool is_cn10kb(struct rvu *rvu) return false; } +static inline bool is_cgx_mapped_to_nix(unsigned short id, u8 cgx_id) +{ + /* On CNF10KA and CNF10KB silicons only two CGX blocks are connected + * to NIX. + */ + if (id == PCI_SUBSYS_DEVID_CNF10K_A || id == PCI_SUBSYS_DEVID_CNF10K_B) + return cgx_id <= 1; + + return !(cgx_id && !(id == PCI_SUBSYS_DEVID_96XX || + id == PCI_SUBSYS_DEVID_98XX || + id == PCI_SUBSYS_DEVID_CN10K_A || + id == PCI_SUBSYS_DEVID_CN10K_B)); +} + static inline bool is_rvu_npc_hash_extract_en(struct rvu *rvu) { u64 npc_const3; From 97766512a9951b9fd6fc97f1b93211642bb0b220 Mon Sep 17 00:00:00 2001 From: Vladimir Riabchun Date: Fri, 22 Aug 2025 20:11:36 +0200 Subject: [PATCH 2343/2411] mISDN: hfcpci: Fix warning when deleting uninitialized timer With CONFIG_DEBUG_OBJECTS_TIMERS unloading hfcpci module leads to the following splat: [ 250.215892] ODEBUG: assert_init not available (active state 0) object: ffffffffc01a3dc0 object type: timer_list hint: 0x0 [ 250.217520] WARNING: CPU: 0 PID: 233 at lib/debugobjects.c:612 debug_print_object+0x1b6/0x2c0 [ 250.218775] Modules linked in: hfcpci(-) mISDN_core [ 250.219537] CPU: 0 UID: 0 PID: 233 Comm: rmmod Not tainted 6.17.0-rc2-g6f713187ac98 #2 PREEMPT(voluntary) [ 250.220940] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 250.222377] RIP: 0010:debug_print_object+0x1b6/0x2c0 [ 250.223131] Code: fc ff df 48 89 fa 48 c1 ea 03 80 3c 02 00 75 4f 41 56 48 8b 14 dd a0 4e 01 9f 48 89 ee 48 c7 c7 20 46 01 9f e8 cb 84d [ 250.225805] RSP: 0018:ffff888015ea7c08 EFLAGS: 00010286 [ 250.226608] RAX: 0000000000000000 RBX: 0000000000000005 RCX: ffffffff9be93a95 [ 250.227708] RDX: 1ffff1100d945138 RSI: 0000000000000008 RDI: ffff88806ca289c0 [ 250.228993] RBP: ffffffff9f014a00 R08: 0000000000000001 R09: ffffed1002bd4f39 [ 250.230043] R10: ffff888015ea79cf R11: 0000000000000001 R12: 0000000000000001 [ 250.231185] R13: ffffffff9eea0520 R14: 0000000000000000 R15: ffff888015ea7cc8 [ 250.232454] FS: 00007f3208f01540(0000) GS:ffff8880caf5a000(0000) knlGS:0000000000000000 [ 250.233851] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 250.234856] CR2: 00007f32090a7421 CR3: 0000000004d63000 CR4: 00000000000006f0 [ 250.236117] Call Trace: [ 250.236599] [ 250.236967] ? trace_irq_enable.constprop.0+0xd4/0x130 [ 250.237920] debug_object_assert_init+0x1f6/0x310 [ 250.238762] ? __pfx_debug_object_assert_init+0x10/0x10 [ 250.239658] ? __lock_acquire+0xdea/0x1c70 [ 250.240369] __try_to_del_timer_sync+0x69/0x140 [ 250.241172] ? __pfx___try_to_del_timer_sync+0x10/0x10 [ 250.242058] ? __timer_delete_sync+0xc6/0x120 [ 250.242842] ? lock_acquire+0x30/0x80 [ 250.243474] ? __timer_delete_sync+0xc6/0x120 [ 250.244262] __timer_delete_sync+0x98/0x120 [ 250.245015] HFC_cleanup+0x10/0x20 [hfcpci] [ 250.245704] __do_sys_delete_module+0x348/0x510 [ 250.246461] ? __pfx___do_sys_delete_module+0x10/0x10 [ 250.247338] do_syscall_64+0xc1/0x360 [ 250.247924] entry_SYSCALL_64_after_hwframe+0x77/0x7f Fix this by initializing hfc_tl timer with DEFINE_TIMER macro. Also, use mod_timer instead of manual timeout update. Fixes: 87c5fa1bb426 ("mISDN: Add different different timer settings for hfc-pci") Fixes: 175302f6b79e ("mISDN: hfcpci: Fix use-after-free bug in hfcpci_softirq") Signed-off-by: Vladimir Riabchun Link: https://patch.msgid.link/aKiy2D_LiWpQ5kXq@vova-pc Signed-off-by: Jakub Kicinski --- drivers/isdn/hardware/mISDN/hfcpci.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index 2b05722d4dbe..ea8a0ab47afd 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -39,12 +39,13 @@ #include "hfc_pci.h" +static void hfcpci_softirq(struct timer_list *unused); static const char *hfcpci_revision = "2.0"; static int HFC_cnt; static uint debug; static uint poll, tics; -static struct timer_list hfc_tl; +static DEFINE_TIMER(hfc_tl, hfcpci_softirq); static unsigned long hfc_jiffies; MODULE_AUTHOR("Karsten Keil"); @@ -2305,8 +2306,7 @@ hfcpci_softirq(struct timer_list *unused) hfc_jiffies = jiffies + 1; else hfc_jiffies += tics; - hfc_tl.expires = hfc_jiffies; - add_timer(&hfc_tl); + mod_timer(&hfc_tl, hfc_jiffies); } static int __init @@ -2332,10 +2332,8 @@ HFC_init(void) if (poll != HFCPCI_BTRANS_THRESHOLD) { printk(KERN_INFO "%s: Using alternative poll value of %d\n", __func__, poll); - timer_setup(&hfc_tl, hfcpci_softirq, 0); - hfc_tl.expires = jiffies + tics; - hfc_jiffies = hfc_tl.expires; - add_timer(&hfc_tl); + hfc_jiffies = jiffies + tics; + mod_timer(&hfc_tl, hfc_jiffies); } else tics = 0; /* indicate the use of controller's timer */ From 007a5ffadc4fd51739527f1503b7cf048f31c413 Mon Sep 17 00:00:00 2001 From: Yeounsu Moon Date: Sun, 24 Aug 2025 03:29:24 +0900 Subject: [PATCH 2344/2411] net: dlink: fix multicast stats being counted incorrectly `McstFramesRcvdOk` counts the number of received multicast packets, and it reports the value correctly. However, reading `McstFramesRcvdOk` clears the register to zero. As a result, the driver was reporting only the packets since the last read, instead of the accumulated total. Fix this by updating the multicast statistics accumulatively instaed of instantaneously. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Tested-on: D-Link DGE-550T Rev-A3 Signed-off-by: Yeounsu Moon Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250823182927.6063-3-yyyynoom@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/dlink/dl2k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index cc60ee454bf9..6bbf6e5584e5 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -1099,7 +1099,7 @@ get_stats (struct net_device *dev) dev->stats.rx_bytes += dr32(OctetRcvOk); dev->stats.tx_bytes += dr32(OctetXmtOk); - dev->stats.multicast = dr32(McstFramesRcvdOk); + dev->stats.multicast += dr32(McstFramesRcvdOk); dev->stats.collisions += dr32(SingleColFrames) + dr32(MultiColFrames); From a39d13e291c2681e475d9fd41655764dab09be7b Mon Sep 17 00:00:00 2001 From: Liming Wu Date: Thu, 31 Jul 2025 17:27:57 +0800 Subject: [PATCH 2345/2411] virtio_pci: Fix misleading comment for queue vector This patch fixes misleading comments in both legacy and modern virtio-pci device implementations. The comments previously referred to the "config vector" for parameters and return values of the `vp_legacy_queue_vector()` and `vp_modern_queue_vector()` functions, which is incorrect. Signed-off-by: Liming Wu Message-Id: <20250731092757.1000-1-liming.wu@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_legacy_dev.c | 4 ++-- drivers/virtio/virtio_pci_modern_dev.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_pci_legacy_dev.c b/drivers/virtio/virtio_pci_legacy_dev.c index 677d1f68bc9b..bbbf89c22880 100644 --- a/drivers/virtio/virtio_pci_legacy_dev.c +++ b/drivers/virtio/virtio_pci_legacy_dev.c @@ -140,9 +140,9 @@ EXPORT_SYMBOL_GPL(vp_legacy_set_status); * vp_legacy_queue_vector - set the MSIX vector for a specific virtqueue * @ldev: the legacy virtio-pci device * @index: queue index - * @vector: the config vector + * @vector: the queue vector * - * Returns the config vector read from the device + * Returns the queue vector read from the device */ u16 vp_legacy_queue_vector(struct virtio_pci_legacy_device *ldev, u16 index, u16 vector) diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index d665f8f73ea8..9e503b7a58d8 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -546,9 +546,9 @@ EXPORT_SYMBOL_GPL(vp_modern_set_queue_reset); * vp_modern_queue_vector - set the MSIX vector for a specific virtqueue * @mdev: the modern virtio-pci device * @index: queue index - * @vector: the config vector + * @vector: the queue vector * - * Returns the config vector read from the device + * Returns the queue vector read from the device */ u16 vp_modern_queue_vector(struct virtio_pci_modern_device *mdev, u16 index, u16 vector) From dd54bcf86c91a4455b1f95cbc8e9ac91205f3193 Mon Sep 17 00:00:00 2001 From: Nikolay Kuratov Date: Tue, 5 Aug 2025 16:09:17 +0300 Subject: [PATCH 2346/2411] vhost/net: Protect ubufs with rcu read lock in vhost_net_ubuf_put() When operating on struct vhost_net_ubuf_ref, the following execution sequence is theoretically possible: CPU0 is finalizing DMA operation CPU1 is doing VHOST_NET_SET_BACKEND // ubufs->refcount == 2 vhost_net_ubuf_put() vhost_net_ubuf_put_wait_and_free(oldubufs) vhost_net_ubuf_put_and_wait() vhost_net_ubuf_put() int r = atomic_sub_return(1, &ubufs->refcount); // r = 1 int r = atomic_sub_return(1, &ubufs->refcount); // r = 0 wait_event(ubufs->wait, !atomic_read(&ubufs->refcount)); // no wait occurs here because condition is already true kfree(ubufs); if (unlikely(!r)) wake_up(&ubufs->wait); // use-after-free This leads to use-after-free on ubufs access. This happens because CPU1 skips waiting for wake_up() when refcount is already zero. To prevent that use a read-side RCU critical section in vhost_net_ubuf_put(), as suggested by Hillf Danton. For this lock to take effect, free ubufs with kfree_rcu(). Cc: stable@vger.kernel.org Fixes: 0ad8b480d6ee9 ("vhost: fix ref cnt checking deadlock") Reported-by: Andrey Ryabinin Suggested-by: Hillf Danton Signed-off-by: Nikolay Kuratov Message-Id: <20250805130917.727332-1-kniv@yandex-team.ru> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 6edac0c1ba9b..c6508fe0d5c8 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -99,6 +99,7 @@ struct vhost_net_ubuf_ref { atomic_t refcount; wait_queue_head_t wait; struct vhost_virtqueue *vq; + struct rcu_head rcu; }; #define VHOST_NET_BATCH 64 @@ -250,9 +251,13 @@ vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy) static int vhost_net_ubuf_put(struct vhost_net_ubuf_ref *ubufs) { - int r = atomic_sub_return(1, &ubufs->refcount); + int r; + + rcu_read_lock(); + r = atomic_sub_return(1, &ubufs->refcount); if (unlikely(!r)) wake_up(&ubufs->wait); + rcu_read_unlock(); return r; } @@ -265,7 +270,7 @@ static void vhost_net_ubuf_put_and_wait(struct vhost_net_ubuf_ref *ubufs) static void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs) { vhost_net_ubuf_put_and_wait(ubufs); - kfree(ubufs); + kfree_rcu(ubufs, rcu); } static void vhost_net_clear_ubuf_info(struct vhost_net *n) From ced17ee32a9988b8a260628e7c31a100d7dc082e Mon Sep 17 00:00:00 2001 From: Igor Torrente Date: Thu, 7 Aug 2025 09:41:45 -0300 Subject: [PATCH 2347/2411] Revert "virtio: reject shm region if length is zero" The commit 206cc44588f7 ("virtio: reject shm region if length is zero") breaks the Virtio-gpu `host_visible` feature. As you can see in the snippet below, host_visible_region is zero because of the `kzalloc`. It's using the `vm_get_shm_region` (drivers/virtio/virtio_mmio.c:536) to read the `addr` and `len` from qemu/crosvm. ``` drivers/gpu/drm/virtio/virtgpu_kms.c 132 vgdev = drmm_kzalloc(dev, sizeof(struct virtio_gpu_device), GFP_KERNEL); [...] 177 if (virtio_get_shm_region(vgdev->vdev, &vgdev->host_visible_region, 178 VIRTIO_GPU_SHM_ID_HOST_VISIBLE)) { ``` Now it always fails. To fix, revert the offending commit. Fixes: 206cc44588f7 ("virtio: reject shm region if length is zero") Signed-off-by: Igor Torrente Message-Id: <20250807124145.81816-1-igor.torrente@collabora.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 918cf25cd3c6..8bf156dde554 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -328,8 +328,6 @@ static inline bool virtio_get_shm_region(struct virtio_device *vdev, struct virtio_shm_region *region, u8 id) { - if (!region->len) - return false; if (!vdev->config->get_shm_region) return false; return vdev->config->get_shm_region(vdev, region, id); From 24fc631539cc78225f5c61f99c7666fcff48024d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 23:39:57 -0700 Subject: [PATCH 2348/2411] vhost: Fix ioctl # for VHOST_[GS]ET_FORK_FROM_OWNER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VHOST_[GS]ET_FEATURES_ARRAY ioctl already took 0x83 and it would result in a build error when the vhost uapi header is used for perf tool build like below. In file included from trace/beauty/ioctl.c:93: tools/perf/trace/beauty/generated/ioctl/vhost_virtio_ioctl_array.c: In function ‘ioctl__scnprintf_vhost_virtio_cmd’: tools/perf/trace/beauty/generated/ioctl/vhost_virtio_ioctl_array.c:36:18: error: initialized field overwritten [-Werror=override-init] 36 | [0x83] = "SET_FORK_FROM_OWNER", | ^~~~~~~~~~~~~~~~~~~~~ tools/perf/trace/beauty/generated/ioctl/vhost_virtio_ioctl_array.c:36:18: note: (near initialization for ‘vhost_virtio_ioctl_cmds[131]’) Fixes: 7d9896e9f6d02d8a ("vhost: Reintroduce kthread API and add mode selection") Signed-off-by: Namhyung Kim Message-Id: <20250819063958.833770-1-namhyung@kernel.org> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang --- include/uapi/linux/vhost.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 283348b64af9..c57674a6aa0d 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -260,7 +260,7 @@ * When fork_owner is set to VHOST_FORK_OWNER_KTHREAD: * - Vhost will create vhost workers as kernel threads. */ -#define VHOST_SET_FORK_FROM_OWNER _IOW(VHOST_VIRTIO, 0x83, __u8) +#define VHOST_SET_FORK_FROM_OWNER _IOW(VHOST_VIRTIO, 0x84, __u8) /** * VHOST_GET_FORK_OWNER - Get the current fork_owner flag for the vhost device. @@ -268,6 +268,6 @@ * * @return: An 8-bit value indicating the current thread mode. */ -#define VHOST_GET_FORK_FROM_OWNER _IOR(VHOST_VIRTIO, 0x84, __u8) +#define VHOST_GET_FORK_FROM_OWNER _IOR(VHOST_VIRTIO, 0x85, __u8) #endif From 528d92bfc0937a6a1ec837dbbcb3612a8545cd37 Mon Sep 17 00:00:00 2001 From: Ying Gao Date: Tue, 12 Aug 2025 17:51:18 +0800 Subject: [PATCH 2349/2411] virtio_input: Improve freeze handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When executing suspend to ram, if lacking the operations to reset device and free unused buffers before deleting a vq, resource leaks and inconsistent device status will appear. According to chapter "3.3.1 Driver Requirements: Device Cleanup:" of virtio-specification: Driver MUST ensure a virtqueue isn’t live (by device reset) before removing exposed buffers. Therefore, modify the virtinput_freeze function to reset the device and delete the unused buffers before deleting the virtqueue, just like virtinput_remove does. Co-developed-by: Ying Xu Signed-off-by: Ying Xu Co-developed-by: Junnan Wu Signed-off-by: Junnan Wu Signed-off-by: Ying Gao Message-Id: <20250812095118.3622717-1-ying01.gao@samsung.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_input.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c index a5d63269f20b..d0728285b6ce 100644 --- a/drivers/virtio/virtio_input.c +++ b/drivers/virtio/virtio_input.c @@ -360,11 +360,15 @@ static int virtinput_freeze(struct virtio_device *vdev) { struct virtio_input *vi = vdev->priv; unsigned long flags; + void *buf; spin_lock_irqsave(&vi->lock, flags); vi->ready = false; spin_unlock_irqrestore(&vi->lock, flags); + virtio_reset_device(vdev); + while ((buf = virtqueue_detach_unused_buf(vi->sts)) != NULL) + kfree(buf); vdev->config->del_vqs(vdev); return 0; } From 45d8ef6322b8a828d3b1e2cfb8893e2ff882cb23 Mon Sep 17 00:00:00 2001 From: Junnan Wu Date: Tue, 12 Aug 2025 17:08:17 +0800 Subject: [PATCH 2350/2411] virtio_net: adjust the execution order of function `virtnet_close` during freeze "Use after free" issue appears in suspend once race occurs when napi poll scheduls after `netif_device_detach` and before napi disables. For details, during suspend flow of virtio-net, the tx queue state is set to "__QUEUE_STATE_DRV_XOFF" by CPU-A. And at some coincidental times, if a TCP connection is still working, CPU-B does `virtnet_poll` before napi disable. In this flow, the state "__QUEUE_STATE_DRV_XOFF" of tx queue will be cleared. This is not the normal process it expects. After that, CPU-A continues to close driver then virtqueue is removed. Sequence likes below: -------------------------------------------------------------------------- CPU-A CPU-B ----- ----- suspend is called A TCP based on virtio-net still work virtnet_freeze |- virtnet_freeze_down | |- netif_device_detach | | |- netif_tx_stop_all_queues | | |- netif_tx_stop_queue | | |- set_bit | | (__QUEUE_STATE_DRV_XOFF,...) | | softirq rasied | | |- net_rx_action | | |- napi_poll | | |- virtnet_poll | | |- virtnet_poll_cleantx | | |- netif_tx_wake_queue | | |- test_and_clear_bit | | (__QUEUE_STATE_DRV_XOFF,...) | |- virtnet_close | |- virtnet_disable_queue_pair | |- virtnet_napi_tx_disable |- remove_vq_common -------------------------------------------------------------------------- When TCP delayack timer is up, a cpu gets softirq and irq handler `tcp_delack_timer_handler` will be called, which will finally call `start_xmit` in virtio net driver. Then the access to tx virtq will cause panic. The root cause of this issue is that napi tx is not disable before `netif_tx_stop_queue`, once `virnet_poll` schedules in such coincidental time, the tx queue state will be cleared. To solve this issue, adjusts the order of function `virtnet_close` in `virtnet_freeze_down`. Co-developed-by: Ying Xu Signed-off-by: Ying Xu Signed-off-by: Junnan Wu Message-Id: <20250812090817.3463403-1-junnan01.wu@samsung.com> Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index d14e6d602273..975bdc5dab84 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -5758,14 +5758,15 @@ static void virtnet_freeze_down(struct virtio_device *vdev) disable_rx_mode_work(vi); flush_work(&vi->rx_mode_work); - netif_tx_lock_bh(vi->dev); - netif_device_detach(vi->dev); - netif_tx_unlock_bh(vi->dev); if (netif_running(vi->dev)) { rtnl_lock(); virtnet_close(vi->dev); rtnl_unlock(); } + + netif_tx_lock_bh(vi->dev); + netif_device_detach(vi->dev); + netif_tx_unlock_bh(vi->dev); } static int init_vqs(struct virtnet_info *vi); From b3dcc9d1d806fb1e175f85978713eef868531da4 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Tue, 26 Aug 2025 10:19:46 +0300 Subject: [PATCH 2351/2411] memblock: fix kernel-doc for MEMBLOCK_RSRV_NOINIT The kernel-doc description of MEMBLOCK_RSRV_NOINIT and memblock_reserved_mark_noinit() do not accurately describe their functionality. Expand their kernel doc to make it clear that the user of MEMBLOCK_RSRV_NOINIT is responsible to properly initialize the struct pages for such regions and add more details about effects of using this flag. Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/f8140a17-c4ec-489b-b314-d45abe48bf36@redhat.com Link: https://lore.kernel.org/r/20250826071947.1949725-1-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) --- include/linux/memblock.h | 5 +++-- mm/memblock.c | 15 +++++++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b96746376e17..fcda8481de9a 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -40,8 +40,9 @@ extern unsigned long long max_possible_pfn; * via a driver, and never indicated in the firmware-provided memory map as * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the * kernel resource tree. - * @MEMBLOCK_RSRV_NOINIT: memory region for which struct pages are - * not initialized (only for reserved regions). + * @MEMBLOCK_RSRV_NOINIT: reserved memory region for which struct pages are not + * fully initialized. Users of this flag are responsible to properly initialize + * struct pages of this region * @MEMBLOCK_RSRV_KERN: memory region that is reserved for kernel use, * either explictitly with memblock_reserve_kern() or via memblock * allocation APIs. All memblock allocations set this flag. diff --git a/mm/memblock.c b/mm/memblock.c index 8a0ed3074af4..117d963e677c 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1091,13 +1091,20 @@ int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size) /** * memblock_reserved_mark_noinit - Mark a reserved memory region with flag - * MEMBLOCK_RSRV_NOINIT which results in the struct pages not being initialized - * for this region. + * MEMBLOCK_RSRV_NOINIT + * * @base: the base phys addr of the region * @size: the size of the region * - * struct pages will not be initialized for reserved memory regions marked with - * %MEMBLOCK_RSRV_NOINIT. + * The struct pages for the reserved regions marked %MEMBLOCK_RSRV_NOINIT will + * not be fully initialized to allow the caller optimize their initialization. + * + * When %CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled, setting this flag + * completely bypasses the initialization of struct pages for such region. + * + * When %CONFIG_DEFERRED_STRUCT_PAGE_INIT is disabled, struct pages in this + * region will be initialized with default values but won't be marked as + * reserved. * * Return: 0 on success, -errno on failure. */ From 16fdb3cc6af8460f23a706512c6f5e7dfdd4f338 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 15 Jul 2025 10:45:39 +0200 Subject: [PATCH 2352/2411] Revert "drm/tegra: Use dma_buf from GEM object instance" This reverts commit 482c7e296edc0f594e8869a789a40be53c49bd6a. The dma_buf field in struct drm_gem_object is not stable over the object instance's lifetime. The field becomes NULL when user space releases the final GEM handle on the buffer object. This resulted in a NULL-pointer deref. Workarounds in commit 5307dce878d4 ("drm/gem: Acquire references on GEM handles for framebuffers") and commit f6bfc9afc751 ("drm/framebuffer: Acquire internal references on GEM handles") only solved the problem partially. They especially don't work for buffer objects without a DRM framebuffer associated. Hence, this revert to going back to using .import_attach->dmabuf. Signed-off-by: Thomas Zimmermann Reviewed-by: Simona Vetter Link: https://lore.kernel.org/r/20250715084549.41473-1-tzimmermann@suse.de --- drivers/gpu/drm/tegra/gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 41a285ec889f..8ede07fb7a21 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -526,7 +526,7 @@ void tegra_bo_free_object(struct drm_gem_object *gem) if (drm_gem_is_imported(gem)) { dma_buf_unmap_attachment_unlocked(gem->import_attach, bo->sgt, DMA_TO_DEVICE); - dma_buf_detach(gem->dma_buf, gem->import_attach); + dma_buf_detach(gem->import_attach->dmabuf, gem->import_attach); } } From e246518aa24f1460902725e97d0abf574aec6ade Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Aug 2025 13:43:47 +0200 Subject: [PATCH 2353/2411] PM: sleep: annotate RCU list iterations These iterations require the read lock, otherwise RCU lockdep will splat: ============================= WARNING: suspicious RCU usage 6.17.0-rc3-00014-g31419c045d64 #6 Tainted: G O ----------------------------- drivers/base/power/main.c:1333 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 5 locks held by rtcwake/547: #0: 00000000643ab418 (sb_writers#6){.+.+}-{0:0}, at: file_start_write+0x2b/0x3a #1: 0000000067a0ca88 (&of->mutex#2){+.+.}-{4:4}, at: kernfs_fop_write_iter+0x181/0x24b #2: 00000000631eac40 (kn->active#3){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x191/0x24b #3: 00000000609a1308 (system_transition_mutex){+.+.}-{4:4}, at: pm_suspend+0xaf/0x30b #4: 0000000060c0fdb0 (device_links_srcu){.+.+}-{0:0}, at: device_links_read_lock+0x75/0x98 stack backtrace: CPU: 0 UID: 0 PID: 547 Comm: rtcwake Tainted: G O 6.17.0-rc3-00014-g31419c045d64 #6 VOLUNTARY Tainted: [O]=OOT_MODULE Stack: 223721b3a80 6089eac6 00000001 00000001 ffffff00 6089eac6 00000535 6086e528 721b3ac0 6003c294 00000000 60031fc0 Call Trace: [<600407ed>] show_stack+0x10e/0x127 [<6003c294>] dump_stack_lvl+0x77/0xc6 [<6003c2fd>] dump_stack+0x1a/0x20 [<600bc2f8>] lockdep_rcu_suspicious+0x116/0x13e [<603d8ea1>] dpm_async_suspend_superior+0x117/0x17e [<603d980f>] device_suspend+0x528/0x541 [<603da24b>] dpm_suspend+0x1a2/0x267 [<603da837>] dpm_suspend_start+0x5d/0x72 [<600ca0c9>] suspend_devices_and_enter+0xab/0x736 [...] Add the fourth argument to the iteration to annotate this and avoid the splat. Fixes: 06799631d522 ("PM: sleep: Make async suspend handle suppliers like parents") Fixes: ed18738fff02 ("PM: sleep: Make async resume handle consumers like children") Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20250826134348.aba79f6e6299.I9ecf55da46ccf33778f2c018a82e1819d815b348@changeid Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index dbf5456cd891..2ea6e05e6ec9 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -675,7 +675,7 @@ static void dpm_async_resume_subordinate(struct device *dev, async_func_t func) idx = device_links_read_lock(); /* Start processing the device's "async" consumers. */ - list_for_each_entry_rcu(link, &dev->links.consumers, s_node) + list_for_each_entry_rcu_locked(link, &dev->links.consumers, s_node) if (READ_ONCE(link->status) != DL_STATE_DORMANT) dpm_async_with_cleanup(link->consumer, func); @@ -1330,7 +1330,7 @@ static void dpm_async_suspend_superior(struct device *dev, async_func_t func) idx = device_links_read_lock(); /* Start processing the device's "async" suppliers. */ - list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) + list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) if (READ_ONCE(link->status) != DL_STATE_DORMANT) dpm_async_with_cleanup(link->supplier, func); From 04e1f683cd28dc9407b238543871a6e09a570dc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20Kempczy=C5=84ski?= Date: Wed, 20 Aug 2025 10:39:04 +0200 Subject: [PATCH 2354/2411] drm/xe/xe_sync: avoid race during ufence signaling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marking ufence as signalled after copy_to_user() is too late. Worker thread which signals ufence by memory write might be raced with another userspace vm-bind call. In map/unmap scenario unmap may still see ufence is not signalled causing -EBUSY. Change the order of marking / write to user-fence fixes this issue. Fixes: 977e5b82e090 ("drm/xe: Expose user fence from xe_sync_entry") Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5536 Signed-off-by: Zbigniew Kempczyński Cc: Matthew Brost Cc: Matthew Auld Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250820083903.2109891-2-zbigniew.kempczynski@intel.com (cherry picked from commit 8ae04fe9ffc93d6bc3bc63ac08375427d69cee06) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index f87276df18f2..82872a51f098 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -77,6 +77,7 @@ static void user_fence_worker(struct work_struct *w) { struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker); + WRITE_ONCE(ufence->signalled, 1); if (mmget_not_zero(ufence->mm)) { kthread_use_mm(ufence->mm); if (copy_to_user(ufence->addr, &ufence->value, sizeof(ufence->value))) @@ -91,7 +92,6 @@ static void user_fence_worker(struct work_struct *w) * Wake up waiters only after updating the ufence state, allowing the UMD * to safely reuse the same ufence without encountering -EBUSY errors. */ - WRITE_ONCE(ufence->signalled, 1); wake_up_all(&ufence->xe->ufence_wq); user_fence_put(ufence); } From 7551865cd12af2dc47e5a174eebcfb0b94b5449b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Thu, 21 Aug 2025 16:30:43 +0200 Subject: [PATCH 2355/2411] drm/xe/vm: Don't pin the vm_resv during validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pinning has the odd side-effect that unlocking *any* resv during validation triggers an "unlocking pinned lock" warning. Cc: Matthew Brost Fixes: 5cc3325584c4 ("drm/xe: Rework eviction rejection of bound external bos") Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250821143045.106005-2-thomas.hellstrom@linux.intel.com (cherry picked from commit 0a51bf3e54dd8b77e6f1febbbb66def0660862d2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 5 ++--- drivers/gpu/drm/xe/xe_vm.h | 15 ++------------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 18f27da47a36..d3ef79ebceee 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -2438,7 +2438,6 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) .no_wait_gpu = false, .gfp_retry_mayfail = true, }; - struct pin_cookie cookie; int ret; if (vm) { @@ -2449,10 +2448,10 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) ctx.resv = xe_vm_resv(vm); } - cookie = xe_vm_set_validating(vm, allow_res_evict); + xe_vm_set_validating(vm, allow_res_evict); trace_xe_bo_validate(bo); ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); - xe_vm_clear_validating(vm, allow_res_evict, cookie); + xe_vm_clear_validating(vm, allow_res_evict); return ret; } diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 2f213737c7e5..2ecb417c19a2 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -315,22 +315,14 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); * Register this task as currently making bos resident for the vm. Intended * to avoid eviction by the same task of shared bos bound to the vm. * Call with the vm's resv lock held. - * - * Return: A pin cookie that should be used for xe_vm_clear_validating(). */ -static inline struct pin_cookie xe_vm_set_validating(struct xe_vm *vm, - bool allow_res_evict) +static inline void xe_vm_set_validating(struct xe_vm *vm, bool allow_res_evict) { - struct pin_cookie cookie = {}; - if (vm && !allow_res_evict) { xe_vm_assert_held(vm); - cookie = lockdep_pin_lock(&xe_vm_resv(vm)->lock.base); /* Pairs with READ_ONCE in xe_vm_is_validating() */ WRITE_ONCE(vm->validating, current); } - - return cookie; } /** @@ -338,17 +330,14 @@ static inline struct pin_cookie xe_vm_set_validating(struct xe_vm *vm, * @vm: Pointer to the vm or NULL * @allow_res_evict: Eviction from @vm was allowed. Must be set to the same * value as for xe_vm_set_validation(). - * @cookie: Cookie obtained from xe_vm_set_validating(). * * Register this task as currently making bos resident for the vm. Intended * to avoid eviction by the same task of shared bos bound to the vm. * Call with the vm's resv lock held. */ -static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict, - struct pin_cookie cookie) +static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict) { if (vm && !allow_res_evict) { - lockdep_unpin_lock(&xe_vm_resv(vm)->lock.base, cookie); /* Pairs with READ_ONCE in xe_vm_is_validating() */ WRITE_ONCE(vm->validating, NULL); } From 2b55ddf36229e0278c956215784ab1feeff510aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Thu, 21 Aug 2025 16:30:45 +0200 Subject: [PATCH 2356/2411] drm/xe/vm: Clear the scratch_pt pointer on error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid triggering a dereference of an error pointer on cleanup in xe_vm_free_scratch() by clearing any scratch_pt error pointer. Signed-off-by: Thomas Hellström Fixes: 06951c2ee72d ("drm/xe: Use NULL PTEs as scratch PTEs") Cc: Brian Welty Cc: Rodrigo Vivi Cc: Lucas De Marchi Cc: # v6.8+ Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250821143045.106005-4-thomas.hellstrom@linux.intel.com (cherry picked from commit 358ee50ab565f3c8ea32480e9d03127a81ba32f8) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index ec04bef8ae40..d60c4b115304 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1610,8 +1610,12 @@ static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); - if (IS_ERR(vm->scratch_pt[id][i])) - return PTR_ERR(vm->scratch_pt[id][i]); + if (IS_ERR(vm->scratch_pt[id][i])) { + int err = PTR_ERR(vm->scratch_pt[id][i]); + + vm->scratch_pt[id][i] = NULL; + return err; + } xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); } From 16ca06aa2c2218cb21907c0c45a746958c944def Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 25 Aug 2025 08:28:41 -0700 Subject: [PATCH 2357/2411] drm/xe: Don't trigger rebind on initial dma-buf validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the first validate of an imported dma-buf (initial bind), the device has no GPU mappings, so a rebind is unnecessary. Rebinding here is harmful in multi-GPU setups and for VMs using preempt-fence mode, as it would evict in-flight GPU work. v2: - Drop dma_buf_validated, check for XE_PL_SYSTEM (Thomas) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://lore.kernel.org/r/20250825152841.3837378-1-matthew.brost@intel.com (cherry picked from commit ffdf968762e4fb3cdae54e811ec3525e67440a60) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index d3ef79ebceee..1be2415966df 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -812,7 +812,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } if (ttm_bo->type == ttm_bo_type_sg) { - ret = xe_bo_move_notify(bo, ctx); + if (new_mem->mem_type == XE_PL_SYSTEM) + ret = xe_bo_move_notify(bo, ctx); if (!ret) ret = xe_bo_move_dmabuf(ttm_bo, new_mem); return ret; From 75671d90fde8c78e940e15a1366a50ece56c6b69 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Mon, 25 Aug 2025 15:57:42 +0000 Subject: [PATCH 2358/2411] drm/xe: switch to local xbasename() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit b0a2ee5567ab ("drm/xe: prepare xe_gen_wa_oob to be multi-use") introduced a call to basename(). The GNU version of this function is not portable and fails to build with alternative libc implementations like musl or bionic. This causes the following build error: drivers/gpu/drm/xe/xe_gen_wa_oob.c:130:12: error: assignment to ‘const char *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] 130 | fn = basename(fn); | ^ While a POSIX version of basename() could be used, it would require a separate header plus the behavior differs from GNU version in that it might modify its argument. Not great. Instead, implement a local xbasename() helper based on strrchr() that provides the same functionality and avoids portability issues. Fixes: b0a2ee5567ab ("drm/xe: prepare xe_gen_wa_oob to be multi-use") Suggested-by: Lucas De Marchi Reviewed-by: Tiffany Yang Signed-off-by: Carlos Llamas Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250825155743.1132433-1-cmllamas@google.com Signed-off-by: Lucas De Marchi (cherry picked from commit 41be792f5baaf90d744a9a9e82994ce560ca9582) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gen_wa_oob.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c index 6581cb0f0e59..247e41c1c48d 100644 --- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c +++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c @@ -123,11 +123,19 @@ static int parse(FILE *input, FILE *csource, FILE *cheader, char *prefix) return 0; } +/* Avoid GNU vs POSIX basename() discrepancy, just use our own */ +static const char *xbasename(const char *s) +{ + const char *p = strrchr(s, '/'); + + return p ? p + 1 : s; +} + static int fn_to_prefix(const char *fn, char *prefix, size_t size) { size_t len; - fn = basename(fn); + fn = xbasename(fn); len = strlen(fn); if (len > size - 1) From 198f36f902ec7e99b645382505f74b87a4523ed9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 25 Aug 2025 11:27:19 -0700 Subject: [PATCH 2359/2411] blk-zoned: Fix a lockdep complaint about recursive locking If preparing a write bio fails then blk_zone_wplug_bio_work() calls bio_endio() with zwplug->lock held. If a device mapper driver is stacked on top of the zoned block device then this results in nested locking of zwplug->lock. The resulting lockdep complaint is a false positive because this is nested locking and not recursive locking. Suppress this false positive by calling blk_zone_wplug_bio_io_error() without holding zwplug->lock. This is safe because no code in blk_zone_wplug_bio_io_error() depends on zwplug->lock being held. This patch suppresses the following lockdep complaint: WARNING: possible recursive locking detected -------------------------------------------- kworker/3:0H/46 is trying to acquire lock: ffffff882968b830 (&zwplug->lock){-...}-{2:2}, at: blk_zone_write_plug_bio_endio+0x64/0x1f0 but task is already holding lock: ffffff88315bc230 (&zwplug->lock){-...}-{2:2}, at: blk_zone_wplug_bio_work+0x8c/0x48c other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&zwplug->lock); lock(&zwplug->lock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by kworker/3:0H/46: #0: ffffff8809486758 ((wq_completion)sdd_zwplugs){+.+.}-{0:0}, at: process_one_work+0x1bc/0x65c #1: ffffffc085de3d70 ((work_completion)(&zwplug->bio_work)){+.+.}-{0:0}, at: process_one_work+0x1e4/0x65c #2: ffffff88315bc230 (&zwplug->lock){-...}-{2:2}, at: blk_zone_wplug_bio_work+0x8c/0x48c stack backtrace: CPU: 3 UID: 0 PID: 46 Comm: kworker/3:0H Tainted: G W OE 6.12.38-android16-5-maybe-dirty-4k #1 8b362b6f76e3645a58cd27d86982bce10d150025 Tainted: [W]=WARN, [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: Spacecraft board based on MALIBU (DT) Workqueue: sdd_zwplugs blk_zone_wplug_bio_work Call trace: dump_backtrace+0xfc/0x17c show_stack+0x18/0x28 dump_stack_lvl+0x40/0xa0 dump_stack+0x18/0x24 print_deadlock_bug+0x38c/0x398 __lock_acquire+0x13e8/0x2e1c lock_acquire+0x134/0x2b4 _raw_spin_lock_irqsave+0x5c/0x80 blk_zone_write_plug_bio_endio+0x64/0x1f0 bio_endio+0x9c/0x240 __dm_io_complete+0x214/0x260 clone_endio+0xe8/0x214 bio_endio+0x218/0x240 blk_zone_wplug_bio_work+0x204/0x48c process_one_work+0x26c/0x65c worker_thread+0x33c/0x498 kthread+0x110/0x134 ret_from_fork+0x10/0x20 Cc: stable@vger.kernel.org Cc: Damien Le Moal Cc: Christoph Hellwig Fixes: dd291d77cc90 ("block: Introduce zone write plugging") Signed-off-by: Bart Van Assche Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20250825182720.1697203-1-bvanassche@acm.org Signed-off-by: Jens Axboe --- block/blk-zoned.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index ef43aaca49f4..5e2a5788dc3b 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -1286,14 +1286,14 @@ static void blk_zone_wplug_bio_work(struct work_struct *work) struct block_device *bdev; unsigned long flags; struct bio *bio; + bool prepared; /* * Submit the next plugged BIO. If we do not have any, clear * the plugged flag. */ - spin_lock_irqsave(&zwplug->lock, flags); - again: + spin_lock_irqsave(&zwplug->lock, flags); bio = bio_list_pop(&zwplug->bio_list); if (!bio) { zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED; @@ -1304,13 +1304,14 @@ static void blk_zone_wplug_bio_work(struct work_struct *work) trace_blk_zone_wplug_bio(zwplug->disk->queue, zwplug->zone_no, bio->bi_iter.bi_sector, bio_sectors(bio)); - if (!blk_zone_wplug_prepare_bio(zwplug, bio)) { + prepared = blk_zone_wplug_prepare_bio(zwplug, bio); + spin_unlock_irqrestore(&zwplug->lock, flags); + + if (!prepared) { blk_zone_wplug_bio_io_error(zwplug, bio); goto again; } - spin_unlock_irqrestore(&zwplug->lock, flags); - bdev = bio->bi_bdev; /* From e3ef9445cd9d90e43de0bd3cd55d437773dfd139 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Tue, 26 Aug 2025 22:00:32 +0530 Subject: [PATCH 2360/2411] block: validate QoS before calling __rq_qos_done_bio() If a bio has BIO_QOS_xxx set, it doesn't guarantee that q->rq_qos is also present at-least for stacked block devices. For instance, in case of NVMe when multipath is enabled, the bottom device may have QoS enabled but top device doesn't. So always validate QoS is enabled and q->rq_qos is present before calling __rq_qos_done_bio(). Fixes: 370ac285f23a ("block: avoid cpu_hotplug_lock depedency on freeze_lock") Reported-by: Venkat Rao Bagalkote Closes: https://lore.kernel.org/all/3a07b752-06a4-4eee-b302-f4669feb859d@linux.ibm.com/ Signed-off-by: Nilay Shroff Link: https://lore.kernel.org/r/20250826163128.1952394-1-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- block/blk-rq-qos.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 1fe22000a379..b538f2c0febc 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -149,12 +149,15 @@ static inline void rq_qos_done_bio(struct bio *bio) q = bdev_get_queue(bio->bi_bdev); /* - * If a bio has BIO_QOS_xxx set, it implicitly implies that - * q->rq_qos is present. So, we skip re-checking q->rq_qos - * here as an extra optimization and directly call - * __rq_qos_done_bio(). + * A BIO may carry BIO_QOS_* flags even if the associated request_queue + * does not have rq_qos enabled. This can happen with stacked block + * devices — for example, NVMe multipath, where it's possible that the + * bottom device has QoS enabled but the top device does not. Therefore, + * always verify that q->rq_qos is present and QoS is enabled before + * calling __rq_qos_done_bio(). */ - __rq_qos_done_bio(q->rq_qos, bio); + if (test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags) && q->rq_qos) + __rq_qos_done_bio(q->rq_qos, bio); } static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) From e81a7f65288c7e2cfb7e7890f648e099fd885ab3 Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Fri, 22 Aug 2025 11:13:24 +0200 Subject: [PATCH 2361/2411] net: usb: qmi_wwan: add Telit Cinterion LE910C4-WWX new compositions Add the following Telit Cinterion LE910C4-WWX new compositions: 0x1034: tty (AT) + tty (AT) + rmnet T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1034 Rev=00.00 S: Manufacturer=Telit S: Product=LE910C4-WWX S: SerialNumber=93f617e7 C: #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x1037: tty (diag) + tty (Telit custom) + tty (AT) + tty (AT) + rmnet T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 15 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1037 Rev=00.00 S: Manufacturer=Telit S: Product=LE910C4-WWX S: SerialNumber=93f617e7 C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x1038: tty (Telit custom) + tty (AT) + tty (AT) + rmnet T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 9 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1038 Rev=00.00 S: Manufacturer=Telit S: Product=LE910C4-WWX S: SerialNumber=93f617e7 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Cc: stable@vger.kernel.org Signed-off-by: Fabio Porcedda Link: https://patch.msgid.link/20250822091324.39558-1-Fabio.Porcedda@telit.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index e56901bb6ebc..11352d85475a 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1355,6 +1355,9 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1031, 3)}, /* Telit LE910C1-EUX */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1034, 2)}, /* Telit LE910C4-WWX */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1037, 4)}, /* Telit LE910C4-WWX */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1038, 3)}, /* Telit LE910C4-WWX */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x103a, 0)}, /* Telit LE910C4-WWX */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ From 882e57cbc7204662f6c5672d5b04336c1d790b03 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 25 Aug 2025 08:55:43 +0200 Subject: [PATCH 2362/2411] phy: mscc: Fix when PTP clock is register and unregister It looks like that every time when the interface was set down and up the driver was creating a new ptp clock. On top of this the function ptp_clock_unregister was never called. Therefore fix this by calling ptp_clock_register and initialize the mii_ts struct inside the probe function and call ptp_clock_unregister when driver is removed. Fixes: 7d272e63e0979d ("net: phy: mscc: timestamping and PHC support") Signed-off-by: Horatiu Vultur Reviewed-by: Vadim Fedorenko Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250825065543.2916334-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mscc/mscc.h | 4 ++++ drivers/net/phy/mscc/mscc_main.c | 4 +--- drivers/net/phy/mscc/mscc_ptp.c | 34 ++++++++++++++++++++------------ 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/net/phy/mscc/mscc.h b/drivers/net/phy/mscc/mscc.h index 58c6d47fbe04..2bfe314ef881 100644 --- a/drivers/net/phy/mscc/mscc.h +++ b/drivers/net/phy/mscc/mscc.h @@ -481,6 +481,7 @@ static inline void vsc8584_config_macsec_intr(struct phy_device *phydev) void vsc85xx_link_change_notify(struct phy_device *phydev); void vsc8584_config_ts_intr(struct phy_device *phydev); int vsc8584_ptp_init(struct phy_device *phydev); +void vsc8584_ptp_deinit(struct phy_device *phydev); int vsc8584_ptp_probe_once(struct phy_device *phydev); int vsc8584_ptp_probe(struct phy_device *phydev); irqreturn_t vsc8584_handle_ts_interrupt(struct phy_device *phydev); @@ -495,6 +496,9 @@ static inline int vsc8584_ptp_init(struct phy_device *phydev) { return 0; } +static inline void vsc8584_ptp_deinit(struct phy_device *phydev) +{ +} static inline int vsc8584_ptp_probe_once(struct phy_device *phydev) { return 0; diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index f1c9ce351ab4..24c75903f535 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -2337,9 +2337,7 @@ static int vsc85xx_probe(struct phy_device *phydev) static void vsc85xx_remove(struct phy_device *phydev) { - struct vsc8531_private *priv = phydev->priv; - - skb_queue_purge(&priv->rx_skbs_list); + vsc8584_ptp_deinit(phydev); } /* Microsemi VSC85xx PHYs */ diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index de6c7312e8f2..72847320cb65 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -1298,7 +1298,6 @@ static void vsc8584_set_input_clk_configured(struct phy_device *phydev) static int __vsc8584_init_ptp(struct phy_device *phydev) { - struct vsc8531_private *vsc8531 = phydev->priv; static const u32 ltc_seq_e[] = { 0, 400000, 0, 0, 0 }; static const u8 ltc_seq_a[] = { 8, 6, 5, 4, 2 }; u32 val; @@ -1515,17 +1514,7 @@ static int __vsc8584_init_ptp(struct phy_device *phydev) vsc85xx_ts_eth_cmp1_sig(phydev); - vsc8531->mii_ts.rxtstamp = vsc85xx_rxtstamp; - vsc8531->mii_ts.txtstamp = vsc85xx_txtstamp; - vsc8531->mii_ts.hwtstamp = vsc85xx_hwtstamp; - vsc8531->mii_ts.ts_info = vsc85xx_ts_info; - phydev->mii_ts = &vsc8531->mii_ts; - - memcpy(&vsc8531->ptp->caps, &vsc85xx_clk_caps, sizeof(vsc85xx_clk_caps)); - - vsc8531->ptp->ptp_clock = ptp_clock_register(&vsc8531->ptp->caps, - &phydev->mdio.dev); - return PTR_ERR_OR_ZERO(vsc8531->ptp->ptp_clock); + return 0; } void vsc8584_config_ts_intr(struct phy_device *phydev) @@ -1552,6 +1541,16 @@ int vsc8584_ptp_init(struct phy_device *phydev) return 0; } +void vsc8584_ptp_deinit(struct phy_device *phydev) +{ + struct vsc8531_private *vsc8531 = phydev->priv; + + if (vsc8531->ptp->ptp_clock) { + ptp_clock_unregister(vsc8531->ptp->ptp_clock); + skb_queue_purge(&vsc8531->rx_skbs_list); + } +} + irqreturn_t vsc8584_handle_ts_interrupt(struct phy_device *phydev) { struct vsc8531_private *priv = phydev->priv; @@ -1612,7 +1611,16 @@ int vsc8584_ptp_probe(struct phy_device *phydev) vsc8531->ptp->phydev = phydev; - return 0; + vsc8531->mii_ts.rxtstamp = vsc85xx_rxtstamp; + vsc8531->mii_ts.txtstamp = vsc85xx_txtstamp; + vsc8531->mii_ts.hwtstamp = vsc85xx_hwtstamp; + vsc8531->mii_ts.ts_info = vsc85xx_ts_info; + phydev->mii_ts = &vsc8531->mii_ts; + + memcpy(&vsc8531->ptp->caps, &vsc85xx_clk_caps, sizeof(vsc85xx_clk_caps)); + vsc8531->ptp->ptp_clock = ptp_clock_register(&vsc8531->ptp->caps, + &phydev->mdio.dev); + return PTR_ERR_OR_ZERO(vsc8531->ptp->ptp_clock); } int vsc8584_ptp_probe_once(struct phy_device *phydev) From 26c1f55f7ec8d1a4bde8c50e4ee04e3c8c6b27e8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 25 Aug 2025 08:57:53 -0700 Subject: [PATCH 2363/2411] MAINTAINERS: retire Boris from TLS maintainers There's a steady stream of TLS changes and bugs. We need active maintainers in this area, and Boris hasn't been participating much in upstream work. Move him to CREDITS. While at it also add Dave Watson there who was the author of the initial SW implementation, AFAIU. Link: https://patch.msgid.link/20250825155753.2178045-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- CREDITS | 7 +++++++ MAINTAINERS | 1 - 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index a357f9cbb05d..a687c3c35c4c 100644 --- a/CREDITS +++ b/CREDITS @@ -3222,6 +3222,10 @@ D: AIC5800 IEEE 1394, RAW I/O on 1394 D: Starter of Linux1394 effort S: ask per mail for current address +N: Boris Pismenny +E: borisp@mellanox.com +D: Kernel TLS implementation and offload support. + N: Nicolas Pitre E: nico@fluxnic.net D: StrongARM SA1100 support integrator & hacker @@ -4168,6 +4172,9 @@ S: 1513 Brewster Dr. S: Carrollton, TX 75010 S: USA +N: Dave Watson +D: Kernel TLS implementation. + N: Tim Waugh E: tim@cyberelk.net D: Co-architect of the parallel-port sharing system diff --git a/MAINTAINERS b/MAINTAINERS index 2720544cd91f..1897d8b45df4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17818,7 +17818,6 @@ F: net/ipv6/syncookies.c F: net/ipv6/tcp*.c NETWORKING [TLS] -M: Boris Pismenny M: John Fastabend M: Jakub Kicinski L: netdev@vger.kernel.org From 16c8a3a67ec799fc731919e3e51be9af6cdf541d Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Mon, 25 Aug 2025 13:21:34 -0400 Subject: [PATCH 2364/2411] net: macb: Fix offset error in gem_update_stats hw_stats now has only one variable for tx_octets/rx_octets, so we should only increment p once, not twice. This would cause the statistics to be reported under the wrong categories in `ethtool -S --all-groups` (which uses hw_stats) but not `ethtool -S` (which uses ethtool_stats). Signed-off-by: Sean Anderson Fixes: f6af690a295a ("net: cadence: macb: Report standard stats") Link: https://patch.msgid.link/20250825172134.681861-1-sean.anderson@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index b29c3beae0b2..106885451147 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3090,7 +3090,7 @@ static void gem_update_stats(struct macb *bp) /* Add GEM_OCTTXH, GEM_OCTRXH */ val = bp->macb_reg_readl(bp, offset + 4); bp->ethtool_stats[i] += ((u64)val) << 32; - *(p++) += ((u64)val) << 32; + *p += ((u64)val) << 32; } } From d9b0ca1334d8a9a03bef45e95825564c56ca3367 Mon Sep 17 00:00:00 2001 From: Boon Khai Ng Date: Mon, 25 Aug 2025 15:13:21 +0800 Subject: [PATCH 2365/2411] MAINTAINERS: Update maintainer information for Altera Triple Speed Ethernet Driver The previous maintainer, Joyce Ooi, is no longer with the company, and her email is no longer reachable. As a result, the maintainer information for the Altera Triple Speed Ethernet Driver has been updated. Changes: - Replaced Joyce Ooi's email with Boon Khai Ng's email address. - Kept the component's status as "Maintained". Signed-off-by: Boon Khai Ng Link: https://patch.msgid.link/20250825071321.30131-1-boon.khai.ng@altera.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1897d8b45df4..c5b47955d2a6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -937,7 +937,7 @@ S: Maintained F: drivers/gpio/gpio-altera.c ALTERA TRIPLE SPEED ETHERNET DRIVER -M: Joyce Ooi +M: Boon Khai Ng L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/altera/ From 2747328ba2714f1a7454208dbbc1dc0631990b4a Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Mon, 25 Aug 2025 10:59:25 -0700 Subject: [PATCH 2366/2411] bnxt_en: Fix memory corruption when FW resources change during ifdown bnxt_set_dflt_rings() assumes that it is always called before any TC has been created. So it doesn't take bp->num_tc into account and assumes that it is always 0 or 1. In the FW resource or capability change scenario, the FW will return flags in bnxt_hwrm_if_change() that will cause the driver to reinitialize and call bnxt_cancel_reservations(). This will lead to bnxt_init_dflt_ring_mode() calling bnxt_set_dflt_rings() and bp->num_tc may be greater than 1. This will cause bp->tx_ring[] to be sized too small and cause memory corruption in bnxt_alloc_cp_rings(). Fix it by properly scaling the TX rings by bp->num_tc in the code paths mentioned above. Add 2 helper functions to determine bp->tx_nr_rings and bp->tx_nr_rings_per_tc. Fixes: ec5d31e3c15d ("bnxt_en: Handle firmware reset status during IF_UP.") Reviewed-by: Kalesh AP Reviewed-by: Andy Gospodarek Signed-off-by: Sreekanth Reddy Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250825175927.459987-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 207a8bb36ae5..1f5c06f1296b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12851,6 +12851,17 @@ static int bnxt_set_xps_mapping(struct bnxt *bp) return rc; } +static int bnxt_tx_nr_rings(struct bnxt *bp) +{ + return bp->num_tc ? bp->tx_nr_rings_per_tc * bp->num_tc : + bp->tx_nr_rings_per_tc; +} + +static int bnxt_tx_nr_rings_per_tc(struct bnxt *bp) +{ + return bp->num_tc ? bp->tx_nr_rings / bp->num_tc : bp->tx_nr_rings; +} + static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) { int rc = 0; @@ -16325,7 +16336,7 @@ static void bnxt_trim_dflt_sh_rings(struct bnxt *bp) bp->cp_nr_rings = min_t(int, bp->tx_nr_rings_per_tc, bp->rx_nr_rings); bp->rx_nr_rings = bp->cp_nr_rings; bp->tx_nr_rings_per_tc = bp->cp_nr_rings; - bp->tx_nr_rings = bp->tx_nr_rings_per_tc; + bp->tx_nr_rings = bnxt_tx_nr_rings(bp); } static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) @@ -16357,7 +16368,7 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) bnxt_trim_dflt_sh_rings(bp); else bp->cp_nr_rings = bp->tx_nr_rings_per_tc + bp->rx_nr_rings; - bp->tx_nr_rings = bp->tx_nr_rings_per_tc; + bp->tx_nr_rings = bnxt_tx_nr_rings(bp); avail_msix = bnxt_get_max_func_irqs(bp) - bp->cp_nr_rings; if (avail_msix >= BNXT_MIN_ROCE_CP_RINGS) { @@ -16370,7 +16381,7 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) rc = __bnxt_reserve_rings(bp); if (rc && rc != -ENODEV) netdev_warn(bp->dev, "Unable to reserve tx rings\n"); - bp->tx_nr_rings_per_tc = bp->tx_nr_rings; + bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); if (sh) bnxt_trim_dflt_sh_rings(bp); @@ -16379,7 +16390,7 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) rc = __bnxt_reserve_rings(bp); if (rc && rc != -ENODEV) netdev_warn(bp->dev, "2nd rings reservation failed.\n"); - bp->tx_nr_rings_per_tc = bp->tx_nr_rings; + bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); } if (BNXT_CHIP_TYPE_NITRO_A0(bp)) { bp->rx_nr_rings++; @@ -16413,7 +16424,7 @@ static int bnxt_init_dflt_ring_mode(struct bnxt *bp) if (rc) goto init_dflt_ring_err; - bp->tx_nr_rings_per_tc = bp->tx_nr_rings; + bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); bnxt_set_dflt_rfs(bp); From 1ee581c24dfdcbc6de25aac95a48c1f08e9a542c Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Aug 2025 10:59:26 -0700 Subject: [PATCH 2367/2411] bnxt_en: Adjust TX rings if reservation is less than requested Before we accept an ethtool request to increase a resource (such as rings), we call the FW to check that the requested resource is likely available first before we commit. But it is still possible that the actual reservation or allocation can fail. The existing code is missing the logic to adjust the TX rings in case the reserved TX rings are less than requested. Add a warning message (a similar message for RX rings already exists) and add the logic to adjust the TX rings. Without this fix, the number of TX rings reported to the stack can exceed the actual TX rings and ethtool -l will report more than the actual TX rings. Fixes: 674f50a5b026 ("bnxt_en: Implement new method to reserve rings.") Reviewed-by: Kalesh AP Reviewed-by: Somnath Kotur Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250825175927.459987-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1f5c06f1296b..86fc9d340dab 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8024,6 +8024,11 @@ static int __bnxt_reserve_rings(struct bnxt *bp) hwr.rx = rx_rings << 1; tx_cp = bnxt_num_tx_to_cp(bp, hwr.tx); hwr.cp = sh ? max_t(int, tx_cp, rx_rings) : tx_cp + rx_rings; + if (hwr.tx != bp->tx_nr_rings) { + netdev_warn(bp->dev, + "Able to reserve only %d out of %d requested TX rings\n", + hwr.tx, bp->tx_nr_rings); + } bp->tx_nr_rings = hwr.tx; /* If we cannot reserve all the RX rings, reset the RSS map only @@ -12879,6 +12884,13 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) if (rc) return rc; + /* Make adjustments if reserved TX rings are less than requested */ + bp->tx_nr_rings -= bp->tx_nr_rings_xdp; + bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); + if (bp->tx_nr_rings_xdp) { + bp->tx_nr_rings_xdp = bp->tx_nr_rings_per_tc; + bp->tx_nr_rings += bp->tx_nr_rings_xdp; + } rc = bnxt_alloc_mem(bp, irq_re_init); if (rc) { netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc); From b4fc8faacfea2538184a1dbd616ae9447a361f3d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Aug 2025 10:59:27 -0700 Subject: [PATCH 2368/2411] bnxt_en: Fix stats context reservation logic The HW resource reservation logic allows the L2 driver to use the RoCE resources if the RoCE driver is not registered. When calculating the stats contexts available for L2, we should not blindly subtract the stats contexts reserved for RoCE unless the RoCE driver is registered. This bug may cause the L2 rings to be less than the number requested when we are close to running out of stats contexts. Fixes: 2e4592dc9bee ("bnxt_en: Change MSIX/NQs allocation policy") Reviewed-by: Kalesh AP Reviewed-by: Somnath Kotur Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250825175927.459987-4-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 86fc9d340dab..31e3d825b4bc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8016,7 +8016,8 @@ static int __bnxt_reserve_rings(struct bnxt *bp) } rx_rings = min_t(int, rx_rings, hwr.grp); hwr.cp = min_t(int, hwr.cp, bp->cp_nr_rings); - if (hwr.stat > bnxt_get_ulp_stat_ctxs(bp)) + if (bnxt_ulp_registered(bp->edev) && + hwr.stat > bnxt_get_ulp_stat_ctxs(bp)) hwr.stat -= bnxt_get_ulp_stat_ctxs(bp); hwr.cp = min_t(int, hwr.cp, hwr.stat); rc = bnxt_trim_rings(bp, &rx_rings, &hwr.tx, hwr.cp, sh); From 2c0a959bebdc1ada13cf9a8242f177c5400299e6 Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Mon, 25 Aug 2025 17:34:24 +0300 Subject: [PATCH 2369/2411] net/mlx5: HWS, Fix memory leak in hws_pool_buddy_init error path In the error path of hws_pool_buddy_init(), the buddy allocator cleanup doesn't free the allocator structure itself, causing a memory leak. Add the missing kfree() to properly release all allocated memory. Fixes: c61afff94373 ("net/mlx5: HWS, added memory management handling") Signed-off-by: Lama Kayal Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-2-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c index 7e37d6e9eb83..7b5071c3df36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c @@ -124,6 +124,7 @@ static int hws_pool_buddy_init(struct mlx5hws_pool *pool) mlx5hws_err(pool->ctx, "Failed to create resource type: %d size %zu\n", pool->type, pool->alloc_log_sz); mlx5hws_buddy_cleanup(buddy); + kfree(buddy); return -ENOMEM; } From a630f83592cdad1253523a1b760cfe78fef6cd9c Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Mon, 25 Aug 2025 17:34:25 +0300 Subject: [PATCH 2370/2411] net/mlx5: HWS, Fix memory leak in hws_action_get_shared_stc_nic error flow When an invalid stc_type is provided, the function allocates memory for shared_stc but jumps to unlock_and_out without freeing it, causing a memory leak. Fix by jumping to free_shared_stc label instead to ensure proper cleanup. Fixes: 504e536d9010 ("net/mlx5: HWS, added actions handling") Signed-off-by: Lama Kayal Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c index 396804369b00..6b36a4a7d895 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c @@ -117,7 +117,7 @@ static int hws_action_get_shared_stc_nic(struct mlx5hws_context *ctx, mlx5hws_err(ctx, "No such stc_type: %d\n", stc_type); pr_warn("HWS: Invalid stc_type: %d\n", stc_type); ret = -EINVAL; - goto unlock_and_out; + goto free_shared_stc; } ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, tbl_type, From 24b6e53140475b56cadcccd4e82a93aa5bacf1eb Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Mon, 25 Aug 2025 17:34:26 +0300 Subject: [PATCH 2371/2411] net/mlx5: HWS, Fix uninitialized variables in mlx5hws_pat_calc_nop error flow In mlx5hws_pat_calc_nop(), src_field and dst_field are passed to hws_action_modify_get_target_fields() which should set their values. However, if an invalid action type is encountered, these variables remain uninitialized and are later used to update prev_src_field and prev_dst_field. Initialize both variables to INVALID_FIELD to ensure they have defined values in all code paths. Fixes: 01e035fd0380 ("net/mlx5: HWS, handle modify header actions dependency") Signed-off-by: Lama Kayal Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-4-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c index 51e4c551e0ef..622fd579f140 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c @@ -527,7 +527,6 @@ int mlx5hws_pat_calc_nop(__be64 *pattern, size_t num_actions, u32 *nop_locations, __be64 *new_pat) { u16 prev_src_field = INVALID_FIELD, prev_dst_field = INVALID_FIELD; - u16 src_field, dst_field; u8 action_type; bool dependent; size_t i, j; @@ -539,6 +538,9 @@ int mlx5hws_pat_calc_nop(__be64 *pattern, size_t num_actions, return 0; for (i = 0, j = 0; i < num_actions; i++, j++) { + u16 src_field = INVALID_FIELD; + u16 dst_field = INVALID_FIELD; + if (j >= max_actions) return -EINVAL; From 00a50e4e8974cbf5d6a1dc91cfa5cce4aa7af05a Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Mon, 25 Aug 2025 17:34:27 +0300 Subject: [PATCH 2372/2411] net/mlx5: HWS, Fix pattern destruction in mlx5hws_pat_get_pattern error path In mlx5hws_pat_get_pattern(), when mlx5hws_pat_add_pattern_to_cache() fails, the function attempts to clean up the pattern created by mlx5hws_cmd_header_modify_pattern_create(). However, it incorrectly uses *pattern_id which hasn't been set yet, instead of the local ptrn_id variable that contains the actual pattern ID. This results in attempting to destroy a pattern using uninitialized data from the output parameter, rather than the valid pattern ID returned by the firmware. Use ptrn_id instead of *pattern_id in the cleanup path to properly destroy the created pattern. Fixes: aefc15a0fa1c ("net/mlx5: HWS, added modify header pattern and args handling") Signed-off-by: Lama Kayal Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-5-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c index 622fd579f140..d56271a9e4f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c @@ -279,7 +279,7 @@ int mlx5hws_pat_get_pattern(struct mlx5hws_context *ctx, return ret; clean_pattern: - mlx5hws_cmd_header_modify_pattern_destroy(ctx->mdev, *pattern_id); + mlx5hws_cmd_header_modify_pattern_destroy(ctx->mdev, ptrn_id); out_unlock: mutex_unlock(&ctx->pattern_cache->lock); return ret; From 34cc6a54914f478c93e176450fae6313404f9f74 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 25 Aug 2025 17:34:28 +0300 Subject: [PATCH 2373/2411] net/mlx5: Reload auxiliary drivers on fw_activate The devlink reload fw_activate command performs firmware activation followed by driver reload, while devlink reload driver_reinit triggers only driver reload. However, the driver reload logic differs between the two modes, as on driver_reinit mode mlx5 also reloads auxiliary drivers, while in fw_activate mode the auxiliary drivers are suspended where applicable. Additionally, following the cited commit, if the device has multiple PFs, the behavior during fw_activate may vary between PFs: one PF may suspend auxiliary drivers, while another reloads them. Align devlink dev reload fw_activate behavior with devlink dev reload driver_reinit, to reload all auxiliary drivers. Fixes: 72ed5d5624af ("net/mlx5: Suspend auxiliary devices only in case of PCI device suspend") Signed-off-by: Moshe Shemesh Reviewed-by: Tariq Toukan Reviewed-by: Akiva Goldberger Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-6-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 3ffa3fbacd16..26091e7536d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -160,7 +160,7 @@ static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netli if (err) return err; - mlx5_unload_one_devl_locked(dev, true); + mlx5_unload_one_devl_locked(dev, false); err = mlx5_health_wait_pci_up(dev); if (err) NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset"); From 902a8bc23a24882200f57cadc270e15a2cfaf2bb Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 25 Aug 2025 17:34:29 +0300 Subject: [PATCH 2374/2411] net/mlx5: Fix lockdep assertion on sync reset unload event Fix lockdep assertion triggered during sync reset unload event. When the sync reset flow is initiated using the devlink reload fw_activate option, the PF already holds the devlink lock while handling unload event. In this case, delegate sync reset unload event handling back to the devlink callback process to avoid double-locking and resolve the lockdep warning. Kernel log: WARNING: CPU: 9 PID: 1578 at devl_assert_locked+0x31/0x40 [...] Call Trace: mlx5_unload_one_devl_locked+0x2c/0xc0 [mlx5_core] mlx5_sync_reset_unload_event+0xaf/0x2f0 [mlx5_core] process_one_work+0x222/0x640 worker_thread+0x199/0x350 kthread+0x10b/0x230 ? __pfx_worker_thread+0x10/0x10 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x8e/0x100 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Fixes: 7a9770f1bfea ("net/mlx5: Handle sync reset unload event") Signed-off-by: Moshe Shemesh Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-7-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/devlink.c | 2 +- .../ethernet/mellanox/mlx5/core/fw_reset.c | 126 ++++++++++-------- .../ethernet/mellanox/mlx5/core/fw_reset.h | 1 + 3 files changed, 72 insertions(+), 57 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 26091e7536d3..2c0e0c16ca90 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -160,7 +160,7 @@ static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netli if (err) return err; - mlx5_unload_one_devl_locked(dev, false); + mlx5_sync_reset_unload_flow(dev, true); err = mlx5_health_wait_pci_up(dev); if (err) NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 69933addd921..38b9b184ae01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -12,7 +12,8 @@ enum { MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, MLX5_FW_RESET_FLAGS_PENDING_COMP, MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, - MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED + MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, + MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, }; struct mlx5_fw_reset { @@ -219,7 +220,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false); } -static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unloaded) +static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; struct devlink *devlink = priv_to_devlink(dev); @@ -228,8 +229,7 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unload if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { complete(&fw_reset->done); } else { - if (!unloaded) - mlx5_unload_one(dev, false); + mlx5_sync_reset_unload_flow(dev, false); if (mlx5_health_wait_pci_up(dev)) mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); else @@ -272,7 +272,7 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work) mlx5_sync_reset_clear_reset_requested(dev, false); mlx5_enter_error_state(dev, true); - mlx5_fw_reset_complete_reload(dev, false); + mlx5_fw_reset_complete_reload(dev); } #define MLX5_RESET_POLL_INTERVAL (HZ / 10) @@ -586,65 +586,23 @@ static int mlx5_sync_pci_reset(struct mlx5_core_dev *dev, u8 reset_method) return err; } -static void mlx5_sync_reset_now_event(struct work_struct *work) +void mlx5_sync_reset_unload_flow(struct mlx5_core_dev *dev, bool locked) { - struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, - reset_now_work); - struct mlx5_core_dev *dev = fw_reset->dev; - int err; - - if (mlx5_sync_reset_clear_reset_requested(dev, false)) - return; - - mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n"); - - err = mlx5_cmd_fast_teardown_hca(dev); - if (err) { - mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err); - goto done; - } - - err = mlx5_sync_pci_reset(dev, fw_reset->reset_method); - if (err) { - mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, no reset done, err %d\n", err); - set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags); - } - - mlx5_enter_error_state(dev, true); -done: - fw_reset->ret = err; - mlx5_fw_reset_complete_reload(dev, false); -} - -static void mlx5_sync_reset_unload_event(struct work_struct *work) -{ - struct mlx5_fw_reset *fw_reset; - struct mlx5_core_dev *dev; + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; unsigned long timeout; int poll_freq = 20; bool reset_action; u8 rst_state; int err; - fw_reset = container_of(work, struct mlx5_fw_reset, reset_unload_work); - dev = fw_reset->dev; - - if (mlx5_sync_reset_clear_reset_requested(dev, false)) - return; - - mlx5_core_warn(dev, "Sync Reset Unload. Function is forced down.\n"); - - err = mlx5_cmd_fast_teardown_hca(dev); - if (err) - mlx5_core_warn(dev, "Fast teardown failed, unloading, err %d\n", err); - else - mlx5_enter_error_state(dev, true); - - if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) + if (locked) mlx5_unload_one_devl_locked(dev, false); else mlx5_unload_one(dev, false); + if (!test_bit(MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, &fw_reset->reset_flags)) + return; + mlx5_set_fw_rst_ack(dev); mlx5_core_warn(dev, "Sync Reset Unload done, device reset expected\n"); @@ -672,17 +630,73 @@ static void mlx5_sync_reset_unload_event(struct work_struct *work) goto done; } - mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", rst_state); + mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", + rst_state); if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ) { err = mlx5_sync_pci_reset(dev, fw_reset->reset_method); if (err) { - mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, err %d\n", err); + mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, err %d\n", + err); fw_reset->ret = err; } } done: - mlx5_fw_reset_complete_reload(dev, true); + clear_bit(MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, &fw_reset->reset_flags); +} + +static void mlx5_sync_reset_now_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_now_work); + struct mlx5_core_dev *dev = fw_reset->dev; + int err; + + if (mlx5_sync_reset_clear_reset_requested(dev, false)) + return; + + mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n"); + + err = mlx5_cmd_fast_teardown_hca(dev); + if (err) { + mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err); + goto done; + } + + err = mlx5_sync_pci_reset(dev, fw_reset->reset_method); + if (err) { + mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, no reset done, err %d\n", err); + set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags); + } + + mlx5_enter_error_state(dev, true); +done: + fw_reset->ret = err; + mlx5_fw_reset_complete_reload(dev); +} + +static void mlx5_sync_reset_unload_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset; + struct mlx5_core_dev *dev; + int err; + + fw_reset = container_of(work, struct mlx5_fw_reset, reset_unload_work); + dev = fw_reset->dev; + + if (mlx5_sync_reset_clear_reset_requested(dev, false)) + return; + + set_bit(MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, &fw_reset->reset_flags); + mlx5_core_warn(dev, "Sync Reset Unload. Function is forced down.\n"); + + err = mlx5_cmd_fast_teardown_hca(dev); + if (err) + mlx5_core_warn(dev, "Fast teardown failed, unloading, err %d\n", err); + else + mlx5_enter_error_state(dev, true); + + mlx5_fw_reset_complete_reload(dev); } static void mlx5_sync_reset_abort_event(struct work_struct *work) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h index ea527d06a85f..d5b28525c960 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h @@ -12,6 +12,7 @@ int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev); int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev); +void mlx5_sync_reset_unload_flow(struct mlx5_core_dev *dev, bool locked); int mlx5_fw_reset_verify_fw_complete(struct mlx5_core_dev *dev, struct netlink_ext_ack *extack); void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev); From 26e42ec7712d392d561964514b1f253b1a96f42d Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 25 Aug 2025 17:34:30 +0300 Subject: [PATCH 2375/2411] net/mlx5: Nack sync reset when SFs are present If PF (Physical Function) has SFs (Sub-Functions), since the SFs are not taking part in the synchronization flow, sync reset can lead to fatal error on the SFs, as the function will be closed unexpectedly from the SF point of view. Add a check to prevent sync reset when there are SFs on a PF device which is not ECPF, as ECPF is teardowned gracefully before reset. Fixes: 92501fa6e421 ("net/mlx5: Ack on sync_reset_request only if PF can do reset_now") Signed-off-by: Moshe Shemesh Reviewed-by: Parav Pandit Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-8-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 6 ++++++ drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c | 10 ++++++++++ drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h | 6 ++++++ 3 files changed, 22 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 38b9b184ae01..22995131824a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -6,6 +6,7 @@ #include "fw_reset.h" #include "diag/fw_tracer.h" #include "lib/tout.h" +#include "sf/sf.h" enum { MLX5_FW_RESET_FLAGS_RESET_REQUESTED, @@ -428,6 +429,11 @@ static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev, return false; } + if (!mlx5_core_is_ecpf(dev) && !mlx5_sf_table_empty(dev)) { + mlx5_core_warn(dev, "SFs should be removed before reset\n"); + return false; + } + #if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE) if (reset_method != MLX5_MFRL_REG_PCI_RESET_METHOD_HOT_RESET) { err = mlx5_check_hotplug_interrupt(dev, bridge); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index 0864ba625c07..3304f25cc805 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -518,3 +518,13 @@ void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev) WARN_ON(!xa_empty(&table->function_ids)); kfree(table); } + +bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev) +{ + struct mlx5_sf_table *table = dev->priv.sf_table; + + if (!table) + return true; + + return xa_empty(&table->function_ids); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h index 860f9ddb7107..89559a37997a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -17,6 +17,7 @@ void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev); int mlx5_sf_table_init(struct mlx5_core_dev *dev); void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev); +bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev); int mlx5_devlink_sf_port_new(struct devlink *devlink, const struct devlink_port_new_attrs *add_attr, @@ -61,6 +62,11 @@ static inline void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev) { } +static inline bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev) +{ + return true; +} + #endif #endif From cf9a8627b9a369ba01d37be6f71b297beb688faa Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 25 Aug 2025 17:34:31 +0300 Subject: [PATCH 2376/2411] net/mlx5: Prevent flow steering mode changes in switchdev mode Changing flow steering modes is not allowed when eswitch is in switchdev mode. This fix ensures that any steering mode change, including to firmware steering, is correctly blocked while eswitch mode is switchdev. Fixes: e890acd5ff18 ("net/mlx5: Add devlink flow_steering_mode parameter") Signed-off-by: Moshe Shemesh Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-9-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index d87392360dbd..cb165085a4c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -3734,6 +3734,13 @@ static int mlx5_fs_mode_validate(struct devlink *devlink, u32 id, char *value = val.vstr; u8 eswitch_mode; + eswitch_mode = mlx5_eswitch_mode(dev); + if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Changing fs mode is not supported when eswitch offloads enabled."); + return -EOPNOTSUPP; + } + if (!strcmp(value, "dmfs")) return 0; @@ -3759,14 +3766,6 @@ static int mlx5_fs_mode_validate(struct devlink *devlink, u32 id, return -EINVAL; } - eswitch_mode = mlx5_eswitch_mode(dev); - if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) { - NL_SET_ERR_MSG_FMT_MOD(extack, - "Moving to %s is not supported when eswitch offloads enabled.", - value); - return -EOPNOTSUPP; - } - return 0; } From ceddedc969f0532b7c62ca971ee50d519d2bc0cb Mon Sep 17 00:00:00 2001 From: Alexei Lazar Date: Mon, 25 Aug 2025 17:34:32 +0300 Subject: [PATCH 2377/2411] net/mlx5e: Update and set Xon/Xoff upon MTU set Xon/Xoff sizes are derived from calculation that include the MTU size. Set Xon/Xoff when MTU is set. If Xon/Xoff fails, set the previous MTU. Fixes: 0696d60853d5 ("net/mlx5e: Receive buffer configuration") Signed-off-by: Alexei Lazar Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-10-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en/port_buffer.h | 12 ++++++++++++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 17 ++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h index f4a19ffbb641..66d276a1be83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h @@ -66,11 +66,23 @@ struct mlx5e_port_buffer { struct mlx5e_bufferx_reg buffer[MLX5E_MAX_NETWORK_BUFFER]; }; +#ifdef CONFIG_MLX5_CORE_EN_DCB int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, u32 change, unsigned int mtu, struct ieee_pfc *pfc, u32 *buffer_size, u8 *prio2buffer); +#else +static inline int +mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, + u32 change, unsigned int mtu, + void *pfc, + u32 *buffer_size, + u8 *prio2buffer) +{ + return 0; +} +#endif int mlx5e_port_query_buffer(struct mlx5e_priv *priv, struct mlx5e_port_buffer *port_buffer); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 21bb88c5d3dc..15eded36b872 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -49,6 +49,7 @@ #include "en.h" #include "en/dim.h" #include "en/txrx.h" +#include "en/port_buffer.h" #include "en_tc.h" #include "en_rep.h" #include "en_accel/ipsec.h" @@ -3040,9 +3041,11 @@ int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) struct mlx5e_params *params = &priv->channels.params; struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; - u16 mtu; + u16 mtu, prev_mtu; int err; + mlx5e_query_mtu(mdev, params, &prev_mtu); + err = mlx5e_set_mtu(mdev, params, params->sw_mtu); if (err) return err; @@ -3052,6 +3055,18 @@ int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n", __func__, mtu, params->sw_mtu); + if (mtu != prev_mtu && MLX5_BUFFER_SUPPORTED(mdev)) { + err = mlx5e_port_manual_buffer_config(priv, 0, mtu, + NULL, NULL, NULL); + if (err) { + netdev_warn(netdev, "%s: Failed to set Xon/Xoff values with MTU %d (err %d), setting back to previous MTU %d\n", + __func__, mtu, err, prev_mtu); + + mlx5e_set_mtu(mdev, params, prev_mtu); + return err; + } + } + params->sw_mtu = mtu; return 0; } From d24341740fe48add8a227a753e68b6eedf4b385a Mon Sep 17 00:00:00 2001 From: Alexei Lazar Date: Mon, 25 Aug 2025 17:34:33 +0300 Subject: [PATCH 2378/2411] net/mlx5e: Update and set Xon/Xoff upon port speed set Xon/Xoff sizes are derived from calculations that include the port speed. These settings need to be updated and applied whenever the port speed is changed. The port speed is typically set after the physical link goes down and is negotiated as part of the link-up process between the two connected interfaces. Xon/Xoff parameters being updated at the point where the new negotiated speed is established. Fixes: 0696d60853d5 ("net/mlx5e: Receive buffer configuration") Signed-off-by: Alexei Lazar Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-11-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 15eded36b872..e680673ffb72 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -139,6 +139,8 @@ void mlx5e_update_carrier(struct mlx5e_priv *priv) if (up) { netdev_info(priv->netdev, "Link up\n"); netif_carrier_on(priv->netdev); + mlx5e_port_manual_buffer_config(priv, 0, priv->netdev->mtu, + NULL, NULL, NULL); } else { netdev_info(priv->netdev, "Link down\n"); netif_carrier_off(priv->netdev); From aca0c31af61e0d5cf1675a0cbd29460b95ae693c Mon Sep 17 00:00:00 2001 From: Alexei Lazar Date: Mon, 25 Aug 2025 17:34:34 +0300 Subject: [PATCH 2379/2411] net/mlx5e: Set local Xoff after FW update The local Xoff value is being set before the firmware (FW) update. In case of a failure where the FW is not updated with the new value, there is no fallback to the previous value. Update the local Xoff value after the FW has been successfully set. Fixes: 0696d60853d5 ("net/mlx5e: Receive buffer configuration") Signed-off-by: Alexei Lazar Reviewed-by: Tariq Toukan Reviewed-by: Dragos Tatulea Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-12-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 3efa8bf1d14e..4720523813b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -575,7 +575,6 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; } - priv->dcbx.xoff = xoff; /* Apply the settings */ if (update_buffer) { @@ -584,6 +583,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, return err; } + priv->dcbx.xoff = xoff; + if (update_prio2buffer) err = mlx5e_port_set_priority2buffer(priv->mdev, prio2buffer); From 4f23382841e67174211271a454811dd17c0ef3c5 Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Mon, 25 Aug 2025 12:36:52 +0800 Subject: [PATCH 2380/2411] net: stmmac: xgmac: Do not enable RX FIFO Overflow interrupts Enabling RX FIFO Overflow interrupts is counterproductive and causes an interrupt storm when RX FIFO overflows. Disabling this interrupt has no side effect and eliminates interrupt storms when the RX FIFO overflows. Commit 8a7cb245cf28 ("net: stmmac: Do not enable RX FIFO overflow interrupts") disables RX FIFO overflow interrupts for DWMAC4 IP and removes the corresponding handling of this interrupt. This patch is doing the same thing for XGMAC IP. Fixes: 2142754f8b9c ("net: stmmac: Add MAC related callbacks for XGMAC2") Signed-off-by: Rohan G Thomas Reviewed-by: Matthew Gerlach Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250825-xgmac-minor-fixes-v3-1-c225fe4444c0@altera.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 5dcc95bc0ad2..7201a3884265 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -203,10 +203,6 @@ static void dwxgmac2_dma_rx_mode(struct stmmac_priv *priv, void __iomem *ioaddr, } writel(value, ioaddr + XGMAC_MTL_RXQ_OPMODE(channel)); - - /* Enable MTL RX overflow */ - value = readl(ioaddr + XGMAC_MTL_QINTEN(channel)); - writel(value | XGMAC_RXOIE, ioaddr + XGMAC_MTL_QINTEN(channel)); } static void dwxgmac2_dma_tx_mode(struct stmmac_priv *priv, void __iomem *ioaddr, From 42ef11b2bff5b6a2910c28d2ea47cc00e0fbcaec Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Mon, 25 Aug 2025 12:36:53 +0800 Subject: [PATCH 2381/2411] net: stmmac: xgmac: Correct supported speed modes Correct supported speed modes as per the XGMAC databook. Commit 9cb54af214a7 ("net: stmmac: Fix IP-cores specific MAC capabilities") removes support for 10M, 100M and 1000HD. 1000HD is not supported by XGMAC IP, but it does support 10M and 100M FD mode for XGMAC version >= 2_20, and it also supports 10M and 100M HD mode if the HDSEL bit is set in the MAC_HW_FEATURE0 reg. This commit enables support for 10M and 100M speed modes for XGMAC IP based on XGMAC version and MAC capabilities. Fixes: 9cb54af214a7 ("net: stmmac: Fix IP-cores specific MAC capabilities") Signed-off-by: Rohan G Thomas Reviewed-by: Matthew Gerlach Link: https://patch.msgid.link/20250825-xgmac-minor-fixes-v3-2-c225fe4444c0@altera.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 13 +++++++++++-- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 5 +++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 6cadf8de4fdf..00e929bf280b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -49,6 +49,14 @@ static void dwxgmac2_core_init(struct mac_device_info *hw, writel(XGMAC_INT_DEFAULT_EN, ioaddr + XGMAC_INT_EN); } +static void dwxgmac2_update_caps(struct stmmac_priv *priv) +{ + if (!priv->dma_cap.mbps_10_100) + priv->hw->link.caps &= ~(MAC_10 | MAC_100); + else if (!priv->dma_cap.half_duplex) + priv->hw->link.caps &= ~(MAC_10HD | MAC_100HD); +} + static void dwxgmac2_set_mac(void __iomem *ioaddr, bool enable) { u32 tx = readl(ioaddr + XGMAC_TX_CONFIG); @@ -1424,6 +1432,7 @@ static void dwxgmac2_set_arp_offload(struct mac_device_info *hw, bool en, const struct stmmac_ops dwxgmac210_ops = { .core_init = dwxgmac2_core_init, + .update_caps = dwxgmac2_update_caps, .set_mac = dwxgmac2_set_mac, .rx_ipc = dwxgmac2_rx_ipc, .rx_queue_enable = dwxgmac2_rx_queue_enable, @@ -1532,8 +1541,8 @@ int dwxgmac2_setup(struct stmmac_priv *priv) mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins); mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | - MAC_1000FD | MAC_2500FD | MAC_5000FD | - MAC_10000FD; + MAC_10 | MAC_100 | MAC_1000FD | + MAC_2500FD | MAC_5000FD | MAC_10000FD; mac->link.duplex = 0; mac->link.speed10 = XGMAC_CONFIG_SS_10_MII; mac->link.speed100 = XGMAC_CONFIG_SS_100_MII; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 7201a3884265..4d6bb995d8d8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -382,8 +382,11 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv, static int dwxgmac2_get_hw_feature(void __iomem *ioaddr, struct dma_features *dma_cap) { + struct stmmac_priv *priv; u32 hw_cap; + priv = container_of(dma_cap, struct stmmac_priv, dma_cap); + /* MAC HW feature 0 */ hw_cap = readl(ioaddr + XGMAC_HW_FEATURE0); dma_cap->edma = (hw_cap & XGMAC_HWFEAT_EDMA) >> 31; @@ -406,6 +409,8 @@ static int dwxgmac2_get_hw_feature(void __iomem *ioaddr, dma_cap->vlhash = (hw_cap & XGMAC_HWFEAT_VLHASH) >> 4; dma_cap->half_duplex = (hw_cap & XGMAC_HWFEAT_HDSEL) >> 3; dma_cap->mbps_1000 = (hw_cap & XGMAC_HWFEAT_GMIISEL) >> 1; + if (dma_cap->mbps_1000 && priv->synopsys_id >= DWXGMAC_CORE_2_20) + dma_cap->mbps_10_100 = 1; /* MAC HW feature 1 */ hw_cap = readl(ioaddr + XGMAC_HW_FEATURE1); From b1eded580ab28119de0b0f21efe37ee2b4419144 Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Mon, 25 Aug 2025 12:36:54 +0800 Subject: [PATCH 2382/2411] net: stmmac: Set CIC bit only for TX queues with COE Currently, in the AF_XDP transmit paths, the CIC bit of TX Desc3 is set for all packets. Setting this bit for packets transmitting through queues that don't support checksum offloading causes the TX DMA to get stuck after transmitting some packets. This patch ensures the CIC bit of TX Desc3 is set only if the TX queue supports checksum offloading. Fixes: 132c32ee5bc0 ("net: stmmac: Add TX via XDP zero-copy socket") Signed-off-by: Rohan G Thomas Reviewed-by: Matthew Gerlach Link: https://patch.msgid.link/20250825-xgmac-minor-fixes-v3-3-c225fe4444c0@altera.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f1abf4242cd2..7b16d1207b80 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2584,6 +2584,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) struct netdev_queue *nq = netdev_get_tx_queue(priv->dev, queue); struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[queue]; + bool csum = !priv->plat->tx_queues_cfg[queue].coe_unsupported; struct xsk_buff_pool *pool = tx_q->xsk_pool; unsigned int entry = tx_q->cur_tx; struct dma_desc *tx_desc = NULL; @@ -2671,7 +2672,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) } stmmac_prepare_tx_desc(priv, tx_desc, 1, xdp_desc.len, - true, priv->mode, true, true, + csum, priv->mode, true, true, xdp_desc.len); stmmac_enable_dma_transmission(priv, priv->ioaddr, queue); @@ -4983,6 +4984,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, { struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[queue]; struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; + bool csum = !priv->plat->tx_queues_cfg[queue].coe_unsupported; unsigned int entry = tx_q->cur_tx; struct dma_desc *tx_desc; dma_addr_t dma_addr; @@ -5034,7 +5036,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, stmmac_set_desc_addr(priv, tx_desc, dma_addr); stmmac_prepare_tx_desc(priv, tx_desc, 1, xdpf->len, - true, priv->mode, true, true, + csum, priv->mode, true, true, xdpf->len); tx_q->tx_count_frames++; From 9448ccd853368582efa9db05db344f8bb9dffe0f Mon Sep 17 00:00:00 2001 From: Dipayaan Roy Date: Mon, 25 Aug 2025 04:56:27 -0700 Subject: [PATCH 2383/2411] net: hv_netvsc: fix loss of early receive events from host during channel open. The hv_netvsc driver currently enables NAPI after opening the primary and subchannels. This ordering creates a race: if the Hyper-V host places data in the host -> guest ring buffer and signals the channel before napi_enable() has been called, the channel callback will run but napi_schedule_prep() will return false. As a result, the NAPI poller never gets scheduled, the data in the ring buffer is not consumed, and the receive queue may remain permanently stuck until another interrupt happens to arrive. Fix this by enabling NAPI and registering it with the RX/TX queues before vmbus channel is opened. This guarantees that any early host signal after open will correctly trigger NAPI scheduling and the ring buffer will be drained. Fixes: 76bb5db5c749d ("netvsc: fix use after free on module removal") Signed-off-by: Dipayaan Roy Link: https://patch.msgid.link/20250825115627.GA32189@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/netvsc.c | 17 ++++++++--------- drivers/net/hyperv/rndis_filter.c | 23 ++++++++++++++++------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 720104661d7f..60a4629fe6ba 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1812,6 +1812,11 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, /* Enable NAPI handler before init callbacks */ netif_napi_add(ndev, &net_device->chan_table[0].napi, netvsc_poll); + napi_enable(&net_device->chan_table[0].napi); + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX, + &net_device->chan_table[0].napi); + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX, + &net_device->chan_table[0].napi); /* Open the channel */ device->channel->next_request_id_callback = vmbus_next_request_id; @@ -1831,12 +1836,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, /* Channel is opened */ netdev_dbg(ndev, "hv_netvsc channel opened successfully\n"); - napi_enable(&net_device->chan_table[0].napi); - netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX, - &net_device->chan_table[0].napi); - netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX, - &net_device->chan_table[0].napi); - /* Connect with the NetVsp */ ret = netvsc_connect_vsp(device, net_device, device_info); if (ret != 0) { @@ -1854,14 +1853,14 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, close: RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); - netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX, NULL); - netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX, NULL); - napi_disable(&net_device->chan_table[0].napi); /* Now, we can close the channel safely */ vmbus_close(device->channel); cleanup: + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX, NULL); + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX, NULL); + napi_disable(&net_device->chan_table[0].napi); netif_napi_del(&net_device->chan_table[0].napi); cleanup2: diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 9e73959e61ee..c35f9685b6bf 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1252,17 +1252,26 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) new_sc->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes); new_sc->max_pkt_size = NETVSC_MAX_PKT_SIZE; + /* Enable napi before opening the vmbus channel to avoid races + * as the host placing data on the host->guest ring may be left + * out if napi was not enabled. + */ + napi_enable(&nvchan->napi); + netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_RX, + &nvchan->napi); + netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_TX, + &nvchan->napi); + ret = vmbus_open(new_sc, netvsc_ring_bytes, netvsc_ring_bytes, NULL, 0, netvsc_channel_cb, nvchan); - if (ret == 0) { - napi_enable(&nvchan->napi); - netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_RX, - &nvchan->napi); - netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_TX, - &nvchan->napi); - } else { + if (ret != 0) { netdev_notice(ndev, "sub channel open failed: %d\n", ret); + netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_TX, + NULL); + netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_RX, + NULL); + napi_disable(&nvchan->napi); } if (atomic_inc_return(&nvscdev->open_chn) == nvscdev->num_chn) From c64eff368ac676e8540344d27a3de47e0ad90d21 Mon Sep 17 00:00:00 2001 From: Qingyue Zhang Date: Wed, 27 Aug 2025 19:43:39 +0800 Subject: [PATCH 2384/2411] io_uring/kbuf: fix signedness in this_len calculation When importing and using buffers, buf->len is considered unsigned. However, buf->len is converted to signed int when committing. This can lead to unexpected behavior if the buffer is large enough to be interpreted as a negative value. Make min_t calculation unsigned. Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption") Co-developed-by: Suoxing Zhang Signed-off-by: Suoxing Zhang Signed-off-by: Qingyue Zhang Link: https://lore.kernel.org/r/tencent_4DBB3674C0419BEC2C0C525949DA410CA307@qq.com Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index f2d2cc319faa..81a13338dfab 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -39,7 +39,7 @@ static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len) u32 this_len; buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask); - this_len = min_t(int, len, buf->len); + this_len = min_t(u32, len, buf->len); buf->len -= this_len; if (buf->len) { buf->addr += this_len; From dcb34659028f856c423a29ef9b4e2571d203444d Mon Sep 17 00:00:00 2001 From: Takamitsu Iwai Date: Sat, 23 Aug 2025 17:58:55 +0900 Subject: [PATCH 2385/2411] net: rose: split remove and free operations in rose_remove_neigh() The current rose_remove_neigh() performs two distinct operations: 1. Removes rose_neigh from rose_neigh_list 2. Frees the rose_neigh structure Split these operations into separate functions to improve maintainability and prepare for upcoming refcount_t conversion. The timer cleanup remains in rose_remove_neigh() because free operations can be called from timer itself. This patch introduce rose_neigh_put() to handle the freeing of rose_neigh structures and modify rose_remove_neigh() to handle removal only. Signed-off-by: Takamitsu Iwai Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250823085857.47674-2-takamitz@amazon.co.jp Signed-off-by: Jakub Kicinski --- include/net/rose.h | 8 ++++++++ net/rose/rose_route.c | 15 ++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/include/net/rose.h b/include/net/rose.h index 23267b4efcfa..174b4f605d84 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -151,6 +151,14 @@ struct rose_sock { #define rose_sk(sk) ((struct rose_sock *)(sk)) +static inline void rose_neigh_put(struct rose_neigh *rose_neigh) +{ + if (rose_neigh->ax25) + ax25_cb_put(rose_neigh->ax25); + kfree(rose_neigh->digipeat); + kfree(rose_neigh); +} + /* af_rose.c */ extern ax25_address rose_callsign; extern int sysctl_rose_restart_request_timeout; diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index b72bf8a08d48..0c44c416f485 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -234,20 +234,12 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh) if ((s = rose_neigh_list) == rose_neigh) { rose_neigh_list = rose_neigh->next; - if (rose_neigh->ax25) - ax25_cb_put(rose_neigh->ax25); - kfree(rose_neigh->digipeat); - kfree(rose_neigh); return; } while (s != NULL && s->next != NULL) { if (s->next == rose_neigh) { s->next = rose_neigh->next; - if (rose_neigh->ax25) - ax25_cb_put(rose_neigh->ax25); - kfree(rose_neigh->digipeat); - kfree(rose_neigh); return; } @@ -331,8 +323,10 @@ static int rose_del_node(struct rose_route_struct *rose_route, if (rose_node->neighbour[i] == rose_neigh) { rose_neigh->count--; - if (rose_neigh->count == 0 && rose_neigh->use == 0) + if (rose_neigh->count == 0 && rose_neigh->use == 0) { rose_remove_neigh(rose_neigh); + rose_neigh_put(rose_neigh); + } rose_node->count--; @@ -513,6 +507,7 @@ void rose_rt_device_down(struct net_device *dev) } rose_remove_neigh(s); + rose_neigh_put(s); } spin_unlock_bh(&rose_neigh_list_lock); spin_unlock_bh(&rose_node_list_lock); @@ -569,6 +564,7 @@ static int rose_clear_routes(void) if (s->use == 0 && !s->loopback) { s->count = 0; rose_remove_neigh(s); + rose_neigh_put(s); } } @@ -1301,6 +1297,7 @@ void __exit rose_rt_free(void) rose_neigh = rose_neigh->next; rose_remove_neigh(s); + rose_neigh_put(s); } while (rose_node != NULL) { From d860d1faa6b2ce3becfdb8b0c2b048ad31800061 Mon Sep 17 00:00:00 2001 From: Takamitsu Iwai Date: Sat, 23 Aug 2025 17:58:56 +0900 Subject: [PATCH 2386/2411] net: rose: convert 'use' field to refcount_t The 'use' field in struct rose_neigh is used as a reference counter but lacks atomicity. This can lead to race conditions where a rose_neigh structure is freed while still being referenced by other code paths. For example, when rose_neigh->use becomes zero during an ioctl operation via rose_rt_ioctl(), the structure may be removed while its timer is still active, potentially causing use-after-free issues. This patch changes the type of 'use' from unsigned short to refcount_t and updates all code paths to use rose_neigh_hold() and rose_neigh_put() which operate reference counts atomically. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Takamitsu Iwai Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250823085857.47674-3-takamitz@amazon.co.jp Signed-off-by: Jakub Kicinski --- include/net/rose.h | 18 +++++++++++++----- net/rose/af_rose.c | 13 +++++++------ net/rose/rose_in.c | 12 ++++++------ net/rose/rose_route.c | 33 ++++++++++++++++++--------------- net/rose/rose_timer.c | 2 +- 5 files changed, 45 insertions(+), 33 deletions(-) diff --git a/include/net/rose.h b/include/net/rose.h index 174b4f605d84..2b5491bbf39a 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -8,6 +8,7 @@ #ifndef _ROSE_H #define _ROSE_H +#include #include #include #include @@ -96,7 +97,7 @@ struct rose_neigh { ax25_cb *ax25; struct net_device *dev; unsigned short count; - unsigned short use; + refcount_t use; unsigned int number; char restarted; char dce_mode; @@ -151,12 +152,19 @@ struct rose_sock { #define rose_sk(sk) ((struct rose_sock *)(sk)) +static inline void rose_neigh_hold(struct rose_neigh *rose_neigh) +{ + refcount_inc(&rose_neigh->use); +} + static inline void rose_neigh_put(struct rose_neigh *rose_neigh) { - if (rose_neigh->ax25) - ax25_cb_put(rose_neigh->ax25); - kfree(rose_neigh->digipeat); - kfree(rose_neigh); + if (refcount_dec_and_test(&rose_neigh->use)) { + if (rose_neigh->ax25) + ax25_cb_put(rose_neigh->ax25); + kfree(rose_neigh->digipeat); + kfree(rose_neigh); + } } /* af_rose.c */ diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 4e72b636a46a..543f9e8ebb69 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -170,7 +170,7 @@ void rose_kill_by_neigh(struct rose_neigh *neigh) if (rose->neighbour == neigh) { rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); rose->neighbour = NULL; } } @@ -212,7 +212,7 @@ static void rose_kill_by_device(struct net_device *dev) if (rose->device == dev) { rose_disconnect(sk, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); if (rose->neighbour) - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); netdev_put(rose->device, &rose->dev_tracker); rose->device = NULL; } @@ -655,7 +655,7 @@ static int rose_release(struct socket *sock) break; case ROSE_STATE_2: - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); release_sock(sk); rose_disconnect(sk, 0, -1, -1); lock_sock(sk); @@ -823,6 +823,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le rose->lci = rose_new_lci(rose->neighbour); if (!rose->lci) { err = -ENETUNREACH; + rose_neigh_put(rose->neighbour); goto out_release; } @@ -834,12 +835,14 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le dev = rose_dev_first(); if (!dev) { err = -ENETUNREACH; + rose_neigh_put(rose->neighbour); goto out_release; } user = ax25_findbyuid(current_euid()); if (!user) { err = -EINVAL; + rose_neigh_put(rose->neighbour); dev_put(dev); goto out_release; } @@ -874,8 +877,6 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le rose->state = ROSE_STATE_1; - rose->neighbour->use++; - rose_write_internal(sk, ROSE_CALL_REQUEST); rose_start_heartbeat(sk); rose_start_t1timer(sk); @@ -1077,7 +1078,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros GFP_ATOMIC); make_rose->facilities = facilities; - make_rose->neighbour->use++; + rose_neigh_hold(make_rose->neighbour); if (rose_sk(sk)->defer) { make_rose->state = ROSE_STATE_5; diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c index 3e99181e759f..0276b393f0e5 100644 --- a/net/rose/rose_in.c +++ b/net/rose/rose_in.c @@ -56,7 +56,7 @@ static int rose_state1_machine(struct sock *sk, struct sk_buff *skb, int framety case ROSE_CLEAR_REQUEST: rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION); rose_disconnect(sk, ECONNREFUSED, skb->data[3], skb->data[4]); - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); break; default: @@ -79,12 +79,12 @@ static int rose_state2_machine(struct sock *sk, struct sk_buff *skb, int framety case ROSE_CLEAR_REQUEST: rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION); rose_disconnect(sk, 0, skb->data[3], skb->data[4]); - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); break; case ROSE_CLEAR_CONFIRMATION: rose_disconnect(sk, 0, -1, -1); - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); break; default: @@ -121,7 +121,7 @@ static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int framety case ROSE_CLEAR_REQUEST: rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION); rose_disconnect(sk, 0, skb->data[3], skb->data[4]); - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); break; case ROSE_RR: @@ -234,7 +234,7 @@ static int rose_state4_machine(struct sock *sk, struct sk_buff *skb, int framety case ROSE_CLEAR_REQUEST: rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION); rose_disconnect(sk, 0, skb->data[3], skb->data[4]); - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); break; default: @@ -254,7 +254,7 @@ static int rose_state5_machine(struct sock *sk, struct sk_buff *skb, int framety if (frametype == ROSE_CLEAR_REQUEST) { rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION); rose_disconnect(sk, 0, skb->data[3], skb->data[4]); - rose_sk(sk)->neighbour->use--; + rose_neigh_put(rose_sk(sk)->neighbour); } return 0; diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 0c44c416f485..8efb9033c057 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -93,11 +93,11 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route, rose_neigh->ax25 = NULL; rose_neigh->dev = dev; rose_neigh->count = 0; - rose_neigh->use = 0; rose_neigh->dce_mode = 0; rose_neigh->loopback = 0; rose_neigh->number = rose_neigh_no++; rose_neigh->restarted = 0; + refcount_set(&rose_neigh->use, 1); skb_queue_head_init(&rose_neigh->queue); @@ -255,10 +255,10 @@ static void rose_remove_route(struct rose_route *rose_route) struct rose_route *s; if (rose_route->neigh1 != NULL) - rose_route->neigh1->use--; + rose_neigh_put(rose_route->neigh1); if (rose_route->neigh2 != NULL) - rose_route->neigh2->use--; + rose_neigh_put(rose_route->neigh2); if ((s = rose_route_list) == rose_route) { rose_route_list = rose_route->next; @@ -323,7 +323,7 @@ static int rose_del_node(struct rose_route_struct *rose_route, if (rose_node->neighbour[i] == rose_neigh) { rose_neigh->count--; - if (rose_neigh->count == 0 && rose_neigh->use == 0) { + if (rose_neigh->count == 0) { rose_remove_neigh(rose_neigh); rose_neigh_put(rose_neigh); } @@ -375,11 +375,11 @@ void rose_add_loopback_neigh(void) sn->ax25 = NULL; sn->dev = NULL; sn->count = 0; - sn->use = 0; sn->dce_mode = 1; sn->loopback = 1; sn->number = rose_neigh_no++; sn->restarted = 1; + refcount_set(&sn->use, 1); skb_queue_head_init(&sn->queue); @@ -561,8 +561,7 @@ static int rose_clear_routes(void) s = rose_neigh; rose_neigh = rose_neigh->next; - if (s->use == 0 && !s->loopback) { - s->count = 0; + if (!s->loopback) { rose_remove_neigh(s); rose_neigh_put(s); } @@ -680,6 +679,7 @@ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, for (i = 0; i < node->count; i++) { if (node->neighbour[i]->restarted) { res = node->neighbour[i]; + rose_neigh_hold(node->neighbour[i]); goto out; } } @@ -691,6 +691,7 @@ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, for (i = 0; i < node->count; i++) { if (!rose_ftimer_running(node->neighbour[i])) { res = node->neighbour[i]; + rose_neigh_hold(node->neighbour[i]); goto out; } failed = 1; @@ -780,13 +781,13 @@ static void rose_del_route_by_neigh(struct rose_neigh *rose_neigh) } if (rose_route->neigh1 == rose_neigh) { - rose_route->neigh1->use--; + rose_neigh_put(rose_route->neigh1); rose_route->neigh1 = NULL; rose_transmit_clear_request(rose_route->neigh2, rose_route->lci2, ROSE_OUT_OF_ORDER, 0); } if (rose_route->neigh2 == rose_neigh) { - rose_route->neigh2->use--; + rose_neigh_put(rose_route->neigh2); rose_route->neigh2 = NULL; rose_transmit_clear_request(rose_route->neigh1, rose_route->lci1, ROSE_OUT_OF_ORDER, 0); } @@ -915,7 +916,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) rose_clear_queues(sk); rose->cause = ROSE_NETWORK_CONGESTION; rose->diagnostic = 0; - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); rose->neighbour = NULL; rose->lci = 0; rose->state = ROSE_STATE_0; @@ -1040,12 +1041,12 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) if ((new_lci = rose_new_lci(new_neigh)) == 0) { rose_transmit_clear_request(rose_neigh, lci, ROSE_NETWORK_CONGESTION, 71); - goto out; + goto put_neigh; } if ((rose_route = kmalloc(sizeof(*rose_route), GFP_ATOMIC)) == NULL) { rose_transmit_clear_request(rose_neigh, lci, ROSE_NETWORK_CONGESTION, 120); - goto out; + goto put_neigh; } rose_route->lci1 = lci; @@ -1058,8 +1059,8 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) rose_route->lci2 = new_lci; rose_route->neigh2 = new_neigh; - rose_route->neigh1->use++; - rose_route->neigh2->use++; + rose_neigh_hold(rose_route->neigh1); + rose_neigh_hold(rose_route->neigh2); rose_route->next = rose_route_list; rose_route_list = rose_route; @@ -1071,6 +1072,8 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) rose_transmit_link(skb, rose_route->neigh2); res = 1; +put_neigh: + rose_neigh_put(new_neigh); out: spin_unlock_bh(&rose_route_list_lock); spin_unlock_bh(&rose_neigh_list_lock); @@ -1186,7 +1189,7 @@ static int rose_neigh_show(struct seq_file *seq, void *v) (rose_neigh->loopback) ? "RSLOOP-0" : ax2asc(buf, &rose_neigh->callsign), rose_neigh->dev ? rose_neigh->dev->name : "???", rose_neigh->count, - rose_neigh->use, + refcount_read(&rose_neigh->use) - 1, (rose_neigh->dce_mode) ? "DCE" : "DTE", (rose_neigh->restarted) ? "yes" : "no", ax25_display_timer(&rose_neigh->t0timer) / HZ, diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index 020369c49587..bb60a1654d61 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -180,7 +180,7 @@ static void rose_timer_expiry(struct timer_list *t) break; case ROSE_STATE_2: /* T3 */ - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); rose_disconnect(sk, ETIMEDOUT, -1, -1); break; From da9c9c877597170b929a6121a68dcd3dd9a80f45 Mon Sep 17 00:00:00 2001 From: Takamitsu Iwai Date: Sat, 23 Aug 2025 17:58:57 +0900 Subject: [PATCH 2387/2411] net: rose: include node references in rose_neigh refcount Current implementation maintains two separate reference counting mechanisms: the 'count' field in struct rose_neigh tracks references from rose_node structures, while the 'use' field (now refcount_t) tracks references from rose_sock. This patch merges these two reference counting systems using 'use' field for proper reference management. Specifically, this patch adds incrementing and decrementing of rose_neigh->use when rose_neigh->count is incremented or decremented. This patch also modifies rose_rt_free(), rose_rt_device_down() and rose_clear_route() to properly release references to rose_neigh objects before freeing a rose_node through rose_remove_node(). These changes ensure rose_neigh structures are properly freed only when all references, including those from rose_node structures, are released. As a result, this resolves a slab-use-after-free issue reported by Syzbot. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+942297eecf7d2d61d1f1@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=942297eecf7d2d61d1f1 Signed-off-by: Takamitsu Iwai Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250823085857.47674-4-takamitz@amazon.co.jp Signed-off-by: Jakub Kicinski --- net/rose/rose_route.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 8efb9033c057..1adee1fbc2ed 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -178,6 +178,7 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route, } } rose_neigh->count++; + rose_neigh_hold(rose_neigh); goto out; } @@ -187,6 +188,7 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route, rose_node->neighbour[rose_node->count] = rose_neigh; rose_node->count++; rose_neigh->count++; + rose_neigh_hold(rose_neigh); } out: @@ -322,6 +324,7 @@ static int rose_del_node(struct rose_route_struct *rose_route, for (i = 0; i < rose_node->count; i++) { if (rose_node->neighbour[i] == rose_neigh) { rose_neigh->count--; + rose_neigh_put(rose_neigh); if (rose_neigh->count == 0) { rose_remove_neigh(rose_neigh); @@ -430,6 +433,7 @@ int rose_add_loopback_node(const rose_address *address) rose_node_list = rose_node; rose_loopback_neigh->count++; + rose_neigh_hold(rose_loopback_neigh); out: spin_unlock_bh(&rose_node_list_lock); @@ -461,6 +465,7 @@ void rose_del_loopback_node(const rose_address *address) rose_remove_node(rose_node); rose_loopback_neigh->count--; + rose_neigh_put(rose_loopback_neigh); out: spin_unlock_bh(&rose_node_list_lock); @@ -500,6 +505,7 @@ void rose_rt_device_down(struct net_device *dev) memmove(&t->neighbour[i], &t->neighbour[i + 1], sizeof(t->neighbour[0]) * (t->count - i)); + rose_neigh_put(s); } if (t->count <= 0) @@ -543,6 +549,7 @@ static int rose_clear_routes(void) { struct rose_neigh *s, *rose_neigh; struct rose_node *t, *rose_node; + int i; spin_lock_bh(&rose_node_list_lock); spin_lock_bh(&rose_neigh_list_lock); @@ -553,8 +560,12 @@ static int rose_clear_routes(void) while (rose_node != NULL) { t = rose_node; rose_node = rose_node->next; - if (!t->loopback) + + if (!t->loopback) { + for (i = 0; i < rose_node->count; i++) + rose_neigh_put(t->neighbour[i]); rose_remove_node(t); + } } while (rose_neigh != NULL) { @@ -1189,7 +1200,7 @@ static int rose_neigh_show(struct seq_file *seq, void *v) (rose_neigh->loopback) ? "RSLOOP-0" : ax2asc(buf, &rose_neigh->callsign), rose_neigh->dev ? rose_neigh->dev->name : "???", rose_neigh->count, - refcount_read(&rose_neigh->use) - 1, + refcount_read(&rose_neigh->use) - rose_neigh->count - 1, (rose_neigh->dce_mode) ? "DCE" : "DTE", (rose_neigh->restarted) ? "yes" : "no", ax25_display_timer(&rose_neigh->t0timer) / HZ, @@ -1294,6 +1305,7 @@ void __exit rose_rt_free(void) struct rose_neigh *s, *rose_neigh = rose_neigh_list; struct rose_node *t, *rose_node = rose_node_list; struct rose_route *u, *rose_route = rose_route_list; + int i; while (rose_neigh != NULL) { s = rose_neigh; @@ -1307,6 +1319,8 @@ void __exit rose_rt_free(void) t = rose_node; rose_node = rose_node->next; + for (i = 0; i < t->count; i++) + rose_neigh_put(t->neighbour[i]); rose_remove_node(t); } From 27f5e0c1321ee280189cea16044de2e157dc4bb9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 24 Jun 2025 11:37:16 -0400 Subject: [PATCH 2388/2411] drm/amdgpu/gfx11: set MQD as appriopriate for queue types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the MQD as appropriate for the kernel vs user queues. Acked-by: Christian König Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher (cherry picked from commit 063d6683208722b1875f888a45084e3d112701ac) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index c01c241a1b06..456ba758fa94 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4129,6 +4129,8 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, #endif if (prop->tmz_queue) tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); + if (!prop->kernel_queue) + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1); mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ @@ -4281,8 +4283,10 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, prop->allow_tunneling); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + if (prop->kernel_queue) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + } if (prop->tmz_queue) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); mqd->cp_hqd_pq_control = tmp; From 29f155c5e82fe35ff85b1f13612cb8c2dbe1dca3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 24 Jun 2025 11:38:14 -0400 Subject: [PATCH 2389/2411] drm/amdgpu/gfx12: set MQD as appriopriate for queue types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the MQD as appropriate for the kernel vs user queues. Acked-by: Christian König Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher (cherry picked from commit 7b9110f2897957efd9715b52fc01986509729db3) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 3e138527d534..fd44d5503e28 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3026,6 +3026,8 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, #endif if (prop->tmz_queue) tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); + if (!prop->kernel_queue) + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1); mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ @@ -3175,8 +3177,10 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + if (prop->kernel_queue) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + } if (prop->tmz_queue) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); mqd->cp_hqd_pq_control = tmp; From ac4ed2da4c1305a1a002415058aa7deaf49ffe3e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 25 Aug 2025 13:40:22 -0400 Subject: [PATCH 2390/2411] Revert "drm/amdgpu: fix incorrect vm flags to map bo" This reverts commit b08425fa77ad2f305fe57a33dceb456be03b653f. Revert this to align with 6.17 because the fixes tag was wrong on this commit. Signed-off-by: Alex Deucher (cherry picked from commit be33e8a239aac204d7e9e673c4220ef244eb1ba3) --- drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index dfb6cfd83760..02138aa55793 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -88,8 +88,8 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, } r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, - AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | - AMDGPU_VM_PAGE_EXECUTABLE); + AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | + AMDGPU_PTE_EXECUTABLE); if (r) { DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); From 5dff50802b285da8284a7bf17ae2fdc6f1357023 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 25 Aug 2025 12:54:01 +0800 Subject: [PATCH 2391/2411] drm/amd/amdgpu: disable hwmon power1_cap* for gfx 11.0.3 on vf mode the PPSMC_MSG_GetPptLimit msg is not valid for gfx 11.0.3 on vf mode, so skiped to create power1_cap* hwmon sysfs node. Signed-off-by: Yang Wang Reviewed-by: Asad Kamal Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit e82a8d441038d8cb10b63047a9e705c42479d156) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 4b64851fdb42..5fbfe7333b54 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -3458,14 +3458,16 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, effective_mode &= ~S_IWUSR; /* not implemented yet for APUs other than GC 10.3.1 (vangogh) and 9.4.3 */ - if (((adev->family == AMDGPU_FAMILY_SI) || - ((adev->flags & AMD_IS_APU) && (gc_ver != IP_VERSION(10, 3, 1)) && - (gc_ver != IP_VERSION(9, 4, 3) && gc_ver != IP_VERSION(9, 4, 4)))) && - (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || - attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr || - attr == &sensor_dev_attr_power1_cap.dev_attr.attr || - attr == &sensor_dev_attr_power1_cap_default.dev_attr.attr)) - return 0; + if (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap_default.dev_attr.attr) { + if (adev->family == AMDGPU_FAMILY_SI || + ((adev->flags & AMD_IS_APU) && gc_ver != IP_VERSION(10, 3, 1) && + (gc_ver != IP_VERSION(9, 4, 3) && gc_ver != IP_VERSION(9, 4, 4))) || + (amdgpu_sriov_vf(adev) && gc_ver == IP_VERSION(11, 0, 3))) + return 0; + } /* not implemented yet for APUs having < GC 9.3.0 (Renoir) */ if (((adev->family == AMDGPU_FAMILY_SI) || From ee38ea0ae4ed13fe33e033dc98d11e76bc7167cd Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Tue, 26 Aug 2025 17:30:58 +0800 Subject: [PATCH 2392/2411] drm/amdgpu: update firmware version checks for user queue support The minimum firmware versions required for user queue functionality have been increased to address an issue where the queue privilege state was lost during queue connect operations. The problem occurred because the privilege state was being restored to its initial value at the beginning of the function, overwriting the state that was properly set during the queue connect case. This commit updates the minimum version requirements: - ME firmware from 2390 to 2420 - PFP firmware from 2530 to 2580 - MEC firmware from 2600 to 2650 - MES firmware remains at 120 These updated firmware versions contain the necessary fixes to properly maintain queue privilege state throughout connect operations. Fixes: 61ca97e9590c ("drm/amdgpu: Add fw minimum version check for usermode queue") Acked-by: Alex Deucher Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher (cherry picked from commit 5f976c9939f0d5916d2b8ef3156a6d1799781df1) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 456ba758fa94..c85de8c8f6f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1612,9 +1612,9 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): if (!adev->gfx.disable_uq && - adev->gfx.me_fw_version >= 2390 && - adev->gfx.pfp_fw_version >= 2530 && - adev->gfx.mec_fw_version >= 2600 && + adev->gfx.me_fw_version >= 2420 && + adev->gfx.pfp_fw_version >= 2580 && + adev->gfx.mec_fw_version >= 2650 && adev->mes.fw_version[0] >= 120) { adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; From c767d74a9cdd1042046d02319d16b85d9aa8a8aa Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 22 Aug 2025 12:12:37 -0400 Subject: [PATCH 2393/2411] drm/amdgpu/userq: fix error handling of invalid doorbell If the doorbell is invalid, be sure to set the r to an error state so the function returns an error. Reviewed-by: David (Ming Qiang) Wu Signed-off-by: Alex Deucher (cherry picked from commit 7e2a5b0a9a165a7c51274aa01b18be29491b4345) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index c3ace8030530..8190c24a649a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -471,6 +471,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) if (index == (uint64_t)-EINVAL) { drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n"); kfree(queue); + r = -EINVAL; goto unlock; } From bcd6f8954dc4a3aa32edda5602e43a0174dc8f0f Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Tue, 26 Aug 2025 14:50:46 -0700 Subject: [PATCH 2394/2411] MAINTAINERS: rmnet: Update email addresses Switch to oss.qualcomm.com ids. Signed-off-by: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Link: https://patch.msgid.link/20250826215046.865530-1-subash.a.kasiviswanathan@oss.qualcomm.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c5b47955d2a6..3337577ce545 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20847,8 +20847,8 @@ S: Maintained F: drivers/firmware/qcom/qcom_qseecom_uefisecapp.c QUALCOMM RMNET DRIVER -M: Subash Abhinov Kasiviswanathan -M: Sean Tranchetti +M: Subash Abhinov Kasiviswanathan +M: Sean Tranchetti L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst From 2e8750469242cad8f01f320131fd5a6f540dbb99 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Aug 2025 14:13:14 +0000 Subject: [PATCH 2395/2411] sctp: initialize more fields in sctp_v6_from_sk() syzbot found that sin6_scope_id was not properly initialized, leading to undefined behavior. Clear sin6_scope_id and sin6_flowinfo. BUG: KMSAN: uninit-value in __sctp_v6_cmp_addr+0x887/0x8c0 net/sctp/ipv6.c:649 __sctp_v6_cmp_addr+0x887/0x8c0 net/sctp/ipv6.c:649 sctp_inet6_cmp_addr+0x4f2/0x510 net/sctp/ipv6.c:983 sctp_bind_addr_conflict+0x22a/0x3b0 net/sctp/bind_addr.c:390 sctp_get_port_local+0x21eb/0x2440 net/sctp/socket.c:8452 sctp_get_port net/sctp/socket.c:8523 [inline] sctp_listen_start net/sctp/socket.c:8567 [inline] sctp_inet_listen+0x710/0xfd0 net/sctp/socket.c:8636 __sys_listen_socket net/socket.c:1912 [inline] __sys_listen net/socket.c:1927 [inline] __do_sys_listen net/socket.c:1932 [inline] __se_sys_listen net/socket.c:1930 [inline] __x64_sys_listen+0x343/0x4c0 net/socket.c:1930 x64_sys_call+0x271d/0x3e20 arch/x86/include/generated/asm/syscalls_64.h:51 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xd9/0x210 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Local variable addr.i.i created at: sctp_get_port net/sctp/socket.c:8515 [inline] sctp_listen_start net/sctp/socket.c:8567 [inline] sctp_inet_listen+0x650/0xfd0 net/sctp/socket.c:8636 __sys_listen_socket net/socket.c:1912 [inline] __sys_listen net/socket.c:1927 [inline] __do_sys_listen net/socket.c:1932 [inline] __se_sys_listen net/socket.c:1930 [inline] __x64_sys_listen+0x343/0x4c0 net/socket.c:1930 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+e69f06a0f30116c68056@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68adc0a2.050a0220.37038e.00c4.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Marcelo Ricardo Leitner Acked-by: Xin Long Link: https://patch.msgid.link/20250826141314.1802610-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sctp/ipv6.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 3336dcfb4515..568ff8797c39 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -547,7 +547,9 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk) { addr->v6.sin6_family = AF_INET6; addr->v6.sin6_port = 0; + addr->v6.sin6_flowinfo = 0; addr->v6.sin6_addr = sk->sk_v6_rcv_saddr; + addr->v6.sin6_scope_id = 0; } /* Initialize sk->sk_rcv_saddr from sctp_addr. */ From 9b8c88f875c04d4cb9111bd5dd9291c7e9691bf5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Aug 2025 13:44:35 +0000 Subject: [PATCH 2396/2411] l2tp: do not use sock_hold() in pppol2tp_session_get_sock() pppol2tp_session_get_sock() is using RCU, it must be ready for sk_refcnt being zero. Commit ee40fb2e1eb5 ("l2tp: protect sock pointer of struct pppol2tp_session with RCU") was correct because it had a call_rcu(..., pppol2tp_put_sk) which was later removed in blamed commit. pppol2tp_recv() can use pppol2tp_session_get_sock() as well. Fixes: c5cbaef992d6 ("l2tp: refactor ppp socket/session relationship") Signed-off-by: Eric Dumazet Cc: James Chapman Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20250826134435.1683435-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/l2tp/l2tp_ppp.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index fc5c2fd8f34c..5e12e7ce17d8 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -129,22 +129,12 @@ static const struct ppp_channel_ops pppol2tp_chan_ops = { static const struct proto_ops pppol2tp_ops; -/* Retrieves the pppol2tp socket associated to a session. - * A reference is held on the returned socket, so this function must be paired - * with sock_put(). - */ +/* Retrieves the pppol2tp socket associated to a session. */ static struct sock *pppol2tp_session_get_sock(struct l2tp_session *session) { struct pppol2tp_session *ps = l2tp_session_priv(session); - struct sock *sk; - rcu_read_lock(); - sk = rcu_dereference(ps->sk); - if (sk) - sock_hold(sk); - rcu_read_unlock(); - - return sk; + return rcu_dereference(ps->sk); } /* Helpers to obtain tunnel/session contexts from sockets. @@ -206,14 +196,13 @@ static int pppol2tp_recvmsg(struct socket *sock, struct msghdr *msg, static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len) { - struct pppol2tp_session *ps = l2tp_session_priv(session); - struct sock *sk = NULL; + struct sock *sk; /* If the socket is bound, send it in to PPP's input queue. Otherwise * queue it on the session socket. */ rcu_read_lock(); - sk = rcu_dereference(ps->sk); + sk = pppol2tp_session_get_sock(session); if (!sk) goto no_sock; @@ -510,13 +499,14 @@ static void pppol2tp_show(struct seq_file *m, void *arg) struct l2tp_session *session = arg; struct sock *sk; + rcu_read_lock(); sk = pppol2tp_session_get_sock(session); if (sk) { struct pppox_sock *po = pppox_sk(sk); seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); - sock_put(sk); } + rcu_read_unlock(); } static void pppol2tp_session_init(struct l2tp_session *session) @@ -1530,6 +1520,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) port = ntohs(inet->inet_sport); } + rcu_read_lock(); sk = pppol2tp_session_get_sock(session); if (sk) { state = sk->sk_state; @@ -1565,8 +1556,8 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) struct pppox_sock *po = pppox_sk(sk); seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); - sock_put(sk); } + rcu_read_unlock(); } static int pppol2tp_seq_show(struct seq_file *m, void *v) From 1cc8a5b534e5f9b5e129e54ee2e63c9f5da4f39a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Aug 2025 17:21:49 +0000 Subject: [PATCH 2397/2411] net: rose: fix a typo in rose_clear_routes() syzbot crashed in rose_clear_routes(), after a recent patch typo. KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 0 UID: 0 PID: 10591 Comm: syz.3.1856 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 RIP: 0010:rose_clear_routes net/rose/rose_route.c:565 [inline] RIP: 0010:rose_rt_ioctl+0x162/0x1250 net/rose/rose_route.c:760 rose_ioctl+0x3ce/0x8b0 net/rose/af_rose.c:1381 sock_do_ioctl+0xd9/0x300 net/socket.c:1238 sock_ioctl+0x576/0x790 net/socket.c:1359 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:598 [inline] __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:584 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: da9c9c877597 ("net: rose: include node references in rose_neigh refcount") Reported-by: syzbot+2eb8d1719f7cfcfa6840@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68af3e29.a70a0220.3cafd4.002e.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Takamitsu Iwai Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250827172149.5359-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/rose/rose_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 1adee1fbc2ed..a1e9b05ef6f5 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -562,7 +562,7 @@ static int rose_clear_routes(void) rose_node = rose_node->next; if (!t->loopback) { - for (i = 0; i < rose_node->count; i++) + for (i = 0; i < t->count; i++) rose_neigh_put(t->neighbour[i]); rose_remove_node(t); } From 2ddaa562b465921a5d1da3fc939993b92b953e20 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 25 Aug 2025 15:56:06 -0700 Subject: [PATCH 2398/2411] fbnic: Fixup rtnl_lock and devl_lock handling related to mailbox code The exception handling path for the __fbnic_pm_resume function had a bug in that it was taking the devlink lock and then exiting to exception handling instead of waiting until after it released the lock to do so. In order to handle that I am swapping the placement of the unlock and the exception handling jump to label so that we don't trigger a deadlock by holding the lock longer than we need to. In addition this change applies the same ordering to the rtnl_lock/unlock calls in the same function as it should make the code easier to follow if it adheres to a consistent pattern. Fixes: 82534f446daa ("eth: fbnic: Add devlink dev flash support") Signed-off-by: Alexander Duyck Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/175616256667.1963577.5543500806256052549.stgit@ahduyck-xeon-server.home.arpa Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_pci.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index b70e4cadb37b..a7784deea88f 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -443,11 +443,10 @@ static int __fbnic_pm_resume(struct device *dev) /* Re-enable mailbox */ err = fbnic_fw_request_mbx(fbd); + devl_unlock(priv_to_devlink(fbd)); if (err) goto err_free_irqs; - devl_unlock(priv_to_devlink(fbd)); - /* Only send log history if log buffer is empty to prevent duplicate * log entries. */ @@ -464,20 +463,20 @@ static int __fbnic_pm_resume(struct device *dev) rtnl_lock(); - if (netif_running(netdev)) { + if (netif_running(netdev)) err = __fbnic_open(fbn); - if (err) - goto err_free_mbx; - } rtnl_unlock(); + if (err) + goto err_free_mbx; return 0; err_free_mbx: fbnic_fw_log_disable(fbd); - rtnl_unlock(); + devl_lock(priv_to_devlink(fbd)); fbnic_fw_free_mbx(fbd); + devl_unlock(priv_to_devlink(fbd)); err_free_irqs: fbnic_free_irqs(fbd); err_invalidate_uc_addr: From 6ede14a2c6365e7e5d855643c7c8390b5268c467 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 25 Aug 2025 15:56:13 -0700 Subject: [PATCH 2399/2411] fbnic: Move phylink resume out of service_task and into open/close The fbnic driver was presenting with the following locking assert coming out of a PM resume: [ 42.208116][ T164] RTNL: assertion failed at drivers/net/phy/phylink.c (2611) [ 42.208492][ T164] WARNING: CPU: 1 PID: 164 at drivers/net/phy/phylink.c:2611 phylink_resume+0x190/0x1e0 [ 42.208872][ T164] Modules linked in: [ 42.209140][ T164] CPU: 1 UID: 0 PID: 164 Comm: bash Not tainted 6.17.0-rc2-virtme #134 PREEMPT(full) [ 42.209496][ T164] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.17.0-5.fc42 04/01/2014 [ 42.209861][ T164] RIP: 0010:phylink_resume+0x190/0x1e0 [ 42.210057][ T164] Code: 83 e5 01 0f 85 b0 fe ff ff c6 05 1c cd 3e 02 01 90 ba 33 0a 00 00 48 c7 c6 20 3a 1d a5 48 c7 c7 e0 3e 1d a5 e8 21 b8 90 fe 90 <0f> 0b 90 90 e9 86 fe ff ff e8 42 ea 1f ff e9 e2 fe ff ff 48 89 ef [ 42.210708][ T164] RSP: 0018:ffffc90000affbd8 EFLAGS: 00010296 [ 42.210983][ T164] RAX: 0000000000000000 RBX: ffff8880078d8400 RCX: 0000000000000000 [ 42.211235][ T164] RDX: 0000000000000000 RSI: 1ffffffff4f10938 RDI: 0000000000000001 [ 42.211466][ T164] RBP: 0000000000000000 R08: ffffffffa2ae79ea R09: fffffbfff4b3eb84 [ 42.211707][ T164] R10: 0000000000000003 R11: 0000000000000000 R12: ffff888007ad8000 [ 42.211997][ T164] R13: 0000000000000002 R14: ffff888006a18800 R15: ffffffffa34c59e0 [ 42.212234][ T164] FS: 00007f0dc8e39740(0000) GS:ffff88808f51f000(0000) knlGS:0000000000000000 [ 42.212505][ T164] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 42.212704][ T164] CR2: 00007f0dc8e9fe10 CR3: 000000000b56d003 CR4: 0000000000772ef0 [ 42.213227][ T164] PKRU: 55555554 [ 42.213366][ T164] Call Trace: [ 42.213483][ T164] [ 42.213565][ T164] __fbnic_pm_attach.isra.0+0x8e/0xa0 [ 42.213725][ T164] pci_reset_function+0x116/0x1d0 [ 42.213895][ T164] reset_store+0xa0/0x100 [ 42.214025][ T164] ? pci_dev_reset_attr_is_visible+0x50/0x50 [ 42.214221][ T164] ? sysfs_file_kobj+0xc1/0x1e0 [ 42.214374][ T164] ? sysfs_kf_write+0x65/0x160 [ 42.214526][ T164] kernfs_fop_write_iter+0x2f8/0x4c0 [ 42.214677][ T164] ? kernfs_vma_page_mkwrite+0x1f0/0x1f0 [ 42.214836][ T164] new_sync_write+0x308/0x6f0 [ 42.214987][ T164] ? __lock_acquire+0x34c/0x740 [ 42.215135][ T164] ? new_sync_read+0x6f0/0x6f0 [ 42.215288][ T164] ? lock_acquire.part.0+0xbc/0x260 [ 42.215440][ T164] ? ksys_write+0xff/0x200 [ 42.215590][ T164] ? perf_trace_sched_switch+0x6d0/0x6d0 [ 42.215742][ T164] vfs_write+0x65e/0xbb0 [ 42.215876][ T164] ksys_write+0xff/0x200 [ 42.215994][ T164] ? __ia32_sys_read+0xc0/0xc0 [ 42.216141][ T164] ? do_user_addr_fault+0x269/0x9f0 [ 42.216292][ T164] ? rcu_is_watching+0x15/0xd0 [ 42.216442][ T164] do_syscall_64+0xbb/0x360 [ 42.216591][ T164] entry_SYSCALL_64_after_hwframe+0x4b/0x53 [ 42.216784][ T164] RIP: 0033:0x7f0dc8ea9986 A bit of digging showed that we were invoking the phylink_resume as a part of the fbnic_up path when we were enabling the service task while not holding the RTNL lock. We should be enabling this sooner as a part of the ndo_open path and then just letting the service task come online later. This will help to enforce the correct locking and brings the phylink interface online at the same time as the network interface, instead of at a later time. I tested this on QEMU to verify this was working by putting the system to sleep using "echo mem > /sys/power/state" to put the system to sleep in the guest and then using the command "system_wakeup" in the QEMU monitor. Fixes: 69684376eed5 ("eth: fbnic: Add link detection") Signed-off-by: Alexander Duyck Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/175616257316.1963577.12238158800417771119.stgit@ahduyck-xeon-server.home.arpa Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_netdev.c | 4 ++++ drivers/net/ethernet/meta/fbnic/fbnic_pci.c | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index e67e99487a27..40581550da1a 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -52,6 +52,8 @@ int __fbnic_open(struct fbnic_net *fbn) fbnic_bmc_rpc_init(fbd); fbnic_rss_reinit(fbd, fbn); + phylink_resume(fbn->phylink); + return 0; time_stop: fbnic_time_stop(fbn); @@ -84,6 +86,8 @@ static int fbnic_stop(struct net_device *netdev) { struct fbnic_net *fbn = netdev_priv(netdev); + phylink_suspend(fbn->phylink, fbnic_bmc_present(fbn->fbd)); + fbnic_down(fbn); fbnic_pcs_free_irq(fbn->fbd); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index a7784deea88f..28e23e3ffca8 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -118,14 +118,12 @@ static void fbnic_service_task_start(struct fbnic_net *fbn) struct fbnic_dev *fbd = fbn->fbd; schedule_delayed_work(&fbd->service_task, HZ); - phylink_resume(fbn->phylink); } static void fbnic_service_task_stop(struct fbnic_net *fbn) { struct fbnic_dev *fbd = fbn->fbd; - phylink_suspend(fbn->phylink, fbnic_bmc_present(fbd)); cancel_delayed_work(&fbd->service_task); } From 82b8166171bdebbc74717e4a0cfb4b89cd0510aa Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Tue, 26 Aug 2025 15:54:18 +0800 Subject: [PATCH 2400/2411] ata: ahci_xgene: Use int type for 'rc' to store error codes Use int instead of u32 for the 'rc' variable in xgene_ahci_softreset() to store negative error codes returned by ahci_do_softreset(). In xgene_ahci_pmp_softreset(), remove the redundant 'rc' variable and directly return the result of the ahci_do_softreset() call instead. Signed-off-by: Qianfeng Rong Signed-off-by: Damien Le Moal --- drivers/ata/ahci_xgene.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c index 5d5a51a77f5d..6b8844646fcd 100644 --- a/drivers/ata/ahci_xgene.c +++ b/drivers/ata/ahci_xgene.c @@ -450,7 +450,6 @@ static int xgene_ahci_pmp_softreset(struct ata_link *link, unsigned int *class, { int pmp = sata_srst_pmp(link); struct ata_port *ap = link->ap; - u32 rc; void __iomem *port_mmio = ahci_port_base(ap); u32 port_fbs; @@ -463,9 +462,7 @@ static int xgene_ahci_pmp_softreset(struct ata_link *link, unsigned int *class, port_fbs |= pmp << PORT_FBS_DEV_OFFSET; writel(port_fbs, port_mmio + PORT_FBS); - rc = ahci_do_softreset(link, class, pmp, deadline, ahci_check_ready); - - return rc; + return ahci_do_softreset(link, class, pmp, deadline, ahci_check_ready); } /** @@ -500,7 +497,7 @@ static int xgene_ahci_softreset(struct ata_link *link, unsigned int *class, u32 port_fbs; u32 port_fbs_save; u32 retry = 1; - u32 rc; + int rc; port_fbs_save = readl(port_mmio + PORT_FBS); From dac978e51cce0c1f00a14c4a82f81d387f79b2d4 Mon Sep 17 00:00:00 2001 From: Neil Mandir Date: Tue, 26 Aug 2025 10:30:22 -0400 Subject: [PATCH 2401/2411] net: macb: Disable clocks once When the driver is removed the clocks are disabled twice: once in macb_remove and a second time by runtime pm. Disable wakeup in remove so all the clocks are disabled and skip the second call to macb_clks_disable. Always suspend the device as we always set it active in probe. Fixes: d54f89af6cc4 ("net: macb: Add pm runtime support") Signed-off-by: Neil Mandir Co-developed-by: Sean Anderson Signed-off-by: Sean Anderson Link: https://patch.msgid.link/20250826143022.935521-1-sean.anderson@linux.dev Signed-off-by: Paolo Abeni --- drivers/net/ethernet/cadence/macb_main.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 106885451147..16d28a8b3b56 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5404,14 +5404,11 @@ static void macb_remove(struct platform_device *pdev) mdiobus_unregister(bp->mii_bus); mdiobus_free(bp->mii_bus); + device_set_wakeup_enable(&bp->pdev->dev, 0); cancel_work_sync(&bp->hresp_err_bh_work); pm_runtime_disable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); - if (!pm_runtime_suspended(&pdev->dev)) { - macb_clks_disable(bp->pclk, bp->hclk, bp->tx_clk, - bp->rx_clk, bp->tsu_clk); - pm_runtime_set_suspended(&pdev->dev); - } + pm_runtime_set_suspended(&pdev->dev); phylink_destroy(bp->phylink); free_netdev(dev); } From 5189446ba995556eaa3755a6e875bc06675b88bd Mon Sep 17 00:00:00 2001 From: Oscar Maes Date: Wed, 27 Aug 2025 08:23:21 +0200 Subject: [PATCH 2402/2411] net: ipv4: fix regression in local-broadcast routes Commit 9e30ecf23b1b ("net: ipv4: fix incorrect MTU in broadcast routes") introduced a regression where local-broadcast packets would have their gateway set in __mkroute_output, which was caused by fi = NULL being removed. Fix this by resetting the fib_info for local-broadcast packets. This preserves the intended changes for directed-broadcast packets. Cc: stable@vger.kernel.org Fixes: 9e30ecf23b1b ("net: ipv4: fix incorrect MTU in broadcast routes") Reported-by: Brett A C Sheffield Closes: https://lore.kernel.org/regressions/20250822165231.4353-4-bacs@librecast.net Signed-off-by: Oscar Maes Reviewed-by: David Ahern Link: https://patch.msgid.link/20250827062322.4807-1-oscmaes92@gmail.com Signed-off-by: Paolo Abeni --- net/ipv4/route.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f639a2ae881a..baa43e5966b1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2575,12 +2575,16 @@ static struct rtable *__mkroute_output(const struct fib_result *res, !netif_is_l3_master(dev_out)) return ERR_PTR(-EINVAL); - if (ipv4_is_lbcast(fl4->daddr)) + if (ipv4_is_lbcast(fl4->daddr)) { type = RTN_BROADCAST; - else if (ipv4_is_multicast(fl4->daddr)) + + /* reset fi to prevent gateway resolution */ + fi = NULL; + } else if (ipv4_is_multicast(fl4->daddr)) { type = RTN_MULTICAST; - else if (ipv4_is_zeronet(fl4->daddr)) + } else if (ipv4_is_zeronet(fl4->daddr)) { return ERR_PTR(-EINVAL); + } if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; From 98b6fa62c84f2e129161e976a5b9b3cb4ccd117b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 27 Aug 2025 15:27:30 -0600 Subject: [PATCH 2403/2411] io_uring/kbuf: always use READ_ONCE() to read ring provided buffer lengths Since the buffers are mapped from userspace, it is prudent to use READ_ONCE() to read the value into a local variable, and use that for any other actions taken. Having a stable read of the buffer length avoids worrying about it changing after checking, or being read multiple times. Similarly, the buffer may well change in between it being picked and being committed. Ensure the looping for incremental ring buffer commit stops if it hits a zero sized buffer, as no further progress can be made at that point. Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption") Link: https://lore.kernel.org/io-uring/tencent_000C02641F6250C856D0C26228DE29A3D30A@qq.com/ Reported-by: Qingyue Zhang Reported-by: Suoxing Zhang Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 81a13338dfab..19a8bde5e1e1 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -36,15 +36,19 @@ static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len) { while (len) { struct io_uring_buf *buf; - u32 this_len; + u32 buf_len, this_len; buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask); - this_len = min_t(u32, len, buf->len); - buf->len -= this_len; - if (buf->len) { + buf_len = READ_ONCE(buf->len); + this_len = min_t(u32, len, buf_len); + buf_len -= this_len; + /* Stop looping for invalid buffer length of 0 */ + if (buf_len || !this_len) { buf->addr += this_len; + buf->len = buf_len; return false; } + buf->len = 0; bl->head++; len -= this_len; } @@ -159,6 +163,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, __u16 tail, head = bl->head; struct io_uring_buf *buf; void __user *ret; + u32 buf_len; tail = smp_load_acquire(&br->tail); if (unlikely(tail == head)) @@ -168,8 +173,9 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, req->flags |= REQ_F_BL_EMPTY; buf = io_ring_head_to_buf(br, head, bl->mask); - if (*len == 0 || *len > buf->len) - *len = buf->len; + buf_len = READ_ONCE(buf->len); + if (*len == 0 || *len > buf_len) + *len = buf_len; req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT; req->buf_list = bl; req->buf_index = buf->bid; @@ -265,7 +271,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, req->buf_index = buf->bid; do { - u32 len = buf->len; + u32 len = READ_ONCE(buf->len); /* truncate end piece, if needed, for non partial buffers */ if (len > arg->max_len) { From c5c5eb24ed6177fc0ef4bb75fc18d07a99c1d3f0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 27 Aug 2025 20:15:59 +0800 Subject: [PATCH 2404/2411] ublk: avoid ublk_io_release() called after ublk char dev is closed When running test_stress_04.sh, the following warning is triggered: WARNING: CPU: 1 PID: 135 at drivers/block/ublk_drv.c:1933 ublk_ch_release+0x423/0x4b0 [ublk_drv] This happens when the daemon is abruptly killed: - some references may still be held, because registering IO buffer doesn't grab ublk char device reference OR - io->task_registered_buffers won't be cleared because io buffer is released from non-daemon context For zero-copy and auto buffer register modes, I/O reference crosses syscalls, so IO reference may not be dropped naturally when ublk server is killed abruptly. However, when releasing io_uring context, it is guaranteed that the reference is dropped finally, see io_sqe_buffers_unregister() from io_ring_ctx_free(). Fix this by adding ublk_drain_io_references() that: - Waits for active I/O references dropped in async way by scheduling work function, for avoiding ublk dev and io_uring file's release dependency - Reinitializes io->ref and io->task_registered_buffers to clean state This ensures the reference count state is clean when ublk_queue_reinit() is called, preventing the warning and potential use-after-free. Fixes: 1f6540e2aabb ("ublk: zc register/unregister bvec") Fixes: 1ceeedb59749 ("ublk: optimize UBLK_IO_UNREGISTER_IO_BUF on daemon task") Fixes: 8a8fe42d765b ("ublk: optimize UBLK_IO_REGISTER_IO_BUF on daemon task") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250827121602.2619736-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 72 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 70 insertions(+), 2 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 99abd67b708b..67d4a867aec4 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -239,6 +239,7 @@ struct ublk_device { struct mutex cancel_mutex; bool canceling; pid_t ublksrv_tgid; + struct delayed_work exit_work; }; /* header of ublk_params */ @@ -1595,12 +1596,62 @@ static void ublk_set_canceling(struct ublk_device *ub, bool canceling) ublk_get_queue(ub, i)->canceling = canceling; } -static int ublk_ch_release(struct inode *inode, struct file *filp) +static bool ublk_check_and_reset_active_ref(struct ublk_device *ub) { - struct ublk_device *ub = filp->private_data; + int i, j; + + if (!(ub->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | + UBLK_F_AUTO_BUF_REG))) + return false; + + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { + struct ublk_queue *ubq = ublk_get_queue(ub, i); + + for (j = 0; j < ubq->q_depth; j++) { + struct ublk_io *io = &ubq->ios[j]; + unsigned int refs = refcount_read(&io->ref) + + io->task_registered_buffers; + + /* + * UBLK_REFCOUNT_INIT or zero means no active + * reference + */ + if (refs != UBLK_REFCOUNT_INIT && refs != 0) + return true; + + /* reset to zero if the io hasn't active references */ + refcount_set(&io->ref, 0); + io->task_registered_buffers = 0; + } + } + return false; +} + +static void ublk_ch_release_work_fn(struct work_struct *work) +{ + struct ublk_device *ub = + container_of(work, struct ublk_device, exit_work.work); struct gendisk *disk; int i; + /* + * For zero-copy and auto buffer register modes, I/O references + * might not be dropped naturally when the daemon is killed, but + * io_uring guarantees that registered bvec kernel buffers are + * unregistered finally when freeing io_uring context, then the + * active references are dropped. + * + * Wait until active references are dropped for avoiding use-after-free + * + * registered buffer may be unregistered in io_ring's release hander, + * so have to wait by scheduling work function for avoiding the two + * file release dependency. + */ + if (ublk_check_and_reset_active_ref(ub)) { + schedule_delayed_work(&ub->exit_work, 1); + return; + } + /* * disk isn't attached yet, either device isn't live, or it has * been removed already, so we needn't to do anything @@ -1673,6 +1724,23 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) ublk_reset_ch_dev(ub); out: clear_bit(UB_STATE_OPEN, &ub->state); + + /* put the reference grabbed in ublk_ch_release() */ + ublk_put_device(ub); +} + +static int ublk_ch_release(struct inode *inode, struct file *filp) +{ + struct ublk_device *ub = filp->private_data; + + /* + * Grab ublk device reference, so it won't be gone until we are + * really released from work function. + */ + ublk_get_device(ub); + + INIT_DELAYED_WORK(&ub->exit_work, ublk_ch_release_work_fn); + schedule_delayed_work(&ub->exit_work, 0); return 0; } From 9b2785ea8592f239836405de023c75c4f3f5ce00 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 27 Aug 2025 20:16:00 +0800 Subject: [PATCH 2405/2411] ublk selftests: add --no_ublk_fixed_fd for not using registered ublk char device Add a new command line option --no_ublk_fixed_fd that excludes the ublk control device (/dev/ublkcN) from io_uring's registered files array. When this option is used, only backing files are registered starting from index 1, while the ublk control device is accessed using its raw file descriptor. Add ublk_get_registered_fd() helper function that returns the appropriate file descriptor for use with io_uring operations. Key optimizations implemented: - Cache UBLKS_Q_NO_UBLK_FIXED_FD flag in ublk_queue.flags to avoid reading dev->no_ublk_fixed_fd in fast path - Cache ublk char device fd in ublk_queue.ublk_fd for fast access - Update ublk_get_registered_fd() to use ublk_queue * parameter - Update io_uring_prep_buf_register/unregister() to use ublk_queue * - Replace ublk_device * access with ublk_queue * access in fast paths Also pass --no_ublk_fixed_fd to test_stress_04.sh for covering plain ublk char device mode. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250827121602.2619736-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/file_backed.c | 10 ++--- tools/testing/selftests/ublk/kublk.c | 38 +++++++++++++--- tools/testing/selftests/ublk/kublk.h | 45 +++++++++++++------ tools/testing/selftests/ublk/null.c | 4 +- tools/testing/selftests/ublk/stripe.c | 4 +- .../testing/selftests/ublk/test_stress_04.sh | 6 +-- 6 files changed, 74 insertions(+), 33 deletions(-) diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c index 2d93ac860bd5..cd9fe69ecce2 100644 --- a/tools/testing/selftests/ublk/file_backed.c +++ b/tools/testing/selftests/ublk/file_backed.c @@ -20,7 +20,7 @@ static int loop_queue_flush_io(struct ublk_thread *t, struct ublk_queue *q, struct io_uring_sqe *sqe[1]; ublk_io_alloc_sqes(t, sqe, 1); - io_uring_prep_fsync(sqe[0], 1 /*fds[1]*/, IORING_FSYNC_DATASYNC); + io_uring_prep_fsync(sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/, IORING_FSYNC_DATASYNC); io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE); /* bit63 marks us as tgt io */ sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1); @@ -42,7 +42,7 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, if (!sqe[0]) return -ENOMEM; - io_uring_prep_rw(op, sqe[0], 1 /*fds[1]*/, + io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/, addr, iod->nr_sectors << 9, iod->start_sector << 9); @@ -56,19 +56,19 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, ublk_io_alloc_sqes(t, sqe, 3); - io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); + io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; sqe[0]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1); - io_uring_prep_rw(op, sqe[1], 1 /*fds[1]*/, 0, + io_uring_prep_rw(op, sqe[1], ublk_get_registered_fd(q, 1) /*fds[1]*/, 0, iod->nr_sectors << 9, iod->start_sector << 9); sqe[1]->buf_index = tag; sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK; sqe[1]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1); - io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); + io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1); return 2; diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 6512dfbdbce3..b71faba86c3b 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -432,7 +432,7 @@ static void ublk_thread_deinit(struct ublk_thread *t) } } -static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) +static int ublk_queue_init(struct ublk_queue *q, unsigned long long extra_flags) { struct ublk_dev *dev = q->dev; int depth = dev->dev_info.queue_depth; @@ -446,6 +446,9 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) q->flags = dev->dev_info.flags; q->flags |= extra_flags; + /* Cache fd in queue for fast path access */ + q->ublk_fd = dev->fds[0]; + cmd_buf_size = ublk_queue_cmd_buf_sz(q); off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ, @@ -481,9 +484,10 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) return -ENOMEM; } -static int ublk_thread_init(struct ublk_thread *t) +static int ublk_thread_init(struct ublk_thread *t, unsigned long long extra_flags) { struct ublk_dev *dev = t->dev; + unsigned long long flags = dev->dev_info.flags | extra_flags; int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth; int ret; @@ -512,7 +516,17 @@ static int ublk_thread_init(struct ublk_thread *t) io_uring_register_ring_fd(&t->ring); - ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds); + if (flags & UBLKS_Q_NO_UBLK_FIXED_FD) { + /* Register only backing files starting from index 1, exclude ublk control device */ + if (dev->nr_fds > 1) { + ret = io_uring_register_files(&t->ring, &dev->fds[1], dev->nr_fds - 1); + } else { + /* No backing files to register, skip file registration */ + ret = 0; + } + } else { + ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds); + } if (ret) { ublk_err("ublk dev %d thread %d register files failed %d\n", t->dev->dev_info.dev_id, t->idx, ret); @@ -626,9 +640,12 @@ int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io) /* These fields should be written once, never change */ ublk_set_sqe_cmd_op(sqe[0], cmd_op); - sqe[0]->fd = 0; /* dev->fds[0] */ + sqe[0]->fd = ublk_get_registered_fd(q, 0); /* dev->fds[0] */ sqe[0]->opcode = IORING_OP_URING_CMD; - sqe[0]->flags = IOSQE_FIXED_FILE; + if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) + sqe[0]->flags = 0; /* Use raw FD, not fixed file */ + else + sqe[0]->flags = IOSQE_FIXED_FILE; sqe[0]->rw_flags = 0; cmd->tag = io->tag; cmd->q_id = q->q_id; @@ -832,6 +849,7 @@ struct ublk_thread_info { unsigned idx; sem_t *ready; cpu_set_t *affinity; + unsigned long long extra_flags; }; static void *ublk_io_handler_fn(void *data) @@ -844,7 +862,7 @@ static void *ublk_io_handler_fn(void *data) t->dev = info->dev; t->idx = info->idx; - ret = ublk_thread_init(t); + ret = ublk_thread_init(t, info->extra_flags); if (ret) { ublk_err("ublk dev %d thread %u init failed\n", dev_id, t->idx); @@ -934,6 +952,8 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) if (ctx->auto_zc_fallback) extra_flags = UBLKS_Q_AUTO_BUF_REG_FALLBACK; + if (ctx->no_ublk_fixed_fd) + extra_flags |= UBLKS_Q_NO_UBLK_FIXED_FD; for (i = 0; i < dinfo->nr_hw_queues; i++) { dev->q[i].dev = dev; @@ -951,6 +971,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) tinfo[i].dev = dev; tinfo[i].idx = i; tinfo[i].ready = &ready; + tinfo[i].extra_flags = extra_flags; /* * If threads are not tied 1:1 to queues, setting thread @@ -1471,7 +1492,7 @@ static void __cmd_create_help(char *exe, bool recovery) printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", exe, recovery ? "recover" : "add"); printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n"); - printf("\t[-e 0|1 ] [-i 0|1]\n"); + printf("\t[-e 0|1 ] [-i 0|1] [--no_ublk_fixed_fd]\n"); printf("\t[--nthreads threads] [--per_io_tasks]\n"); printf("\t[target options] [backfile1] [backfile2] ...\n"); printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); @@ -1534,6 +1555,7 @@ int main(int argc, char *argv[]) { "size", 1, NULL, 's'}, { "nthreads", 1, NULL, 0 }, { "per_io_tasks", 0, NULL, 0 }, + { "no_ublk_fixed_fd", 0, NULL, 0 }, { 0, 0, 0, 0 } }; const struct ublk_tgt_ops *ops = NULL; @@ -1613,6 +1635,8 @@ int main(int argc, char *argv[]) ctx.nthreads = strtol(optarg, NULL, 10); if (!strcmp(longopts[option_idx].name, "per_io_tasks")) ctx.per_io_tasks = 1; + if (!strcmp(longopts[option_idx].name, "no_ublk_fixed_fd")) + ctx.no_ublk_fixed_fd = 1; break; case '?': /* diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 219233f8a053..5e55484fb0aa 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -77,6 +77,7 @@ struct dev_ctx { unsigned int recovery:1; unsigned int auto_zc_fallback:1; unsigned int per_io_tasks:1; + unsigned int no_ublk_fixed_fd:1; int _evtfd; int _shmid; @@ -166,7 +167,9 @@ struct ublk_queue { /* borrow one bit of ublk uapi flags, which may never be used */ #define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63) +#define UBLKS_Q_NO_UBLK_FIXED_FD (1ULL << 62) __u64 flags; + int ublk_fd; /* cached ublk char device fd */ struct ublk_io ios[UBLK_QUEUE_DEPTH]; }; @@ -273,34 +276,48 @@ static inline int ublk_io_alloc_sqes(struct ublk_thread *t, return nr_sqes; } -static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, - int dev_fd, int tag, int q_id, __u64 index) +static inline int ublk_get_registered_fd(struct ublk_queue *q, int fd_index) +{ + if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) { + if (fd_index == 0) + /* Return the raw ublk FD for index 0 */ + return q->ublk_fd; + /* Adjust index for backing files (index 1 becomes 0, etc.) */ + return fd_index - 1; + } + return fd_index; +} + +static inline void __io_uring_prep_buf_reg_unreg(struct io_uring_sqe *sqe, + struct ublk_queue *q, int tag, int q_id, __u64 index) { struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; + int dev_fd = ublk_get_registered_fd(q, 0); io_uring_prep_read(sqe, dev_fd, 0, 0, 0); sqe->opcode = IORING_OP_URING_CMD; - sqe->flags |= IOSQE_FIXED_FILE; - sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; + if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) + sqe->flags &= ~IOSQE_FIXED_FILE; + else + sqe->flags |= IOSQE_FIXED_FILE; cmd->tag = tag; cmd->addr = index; cmd->q_id = q_id; } -static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, - int dev_fd, int tag, int q_id, __u64 index) +static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, + struct ublk_queue *q, int tag, int q_id, __u64 index) { - struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; + __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); + sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; +} - io_uring_prep_read(sqe, dev_fd, 0, 0, 0); - sqe->opcode = IORING_OP_URING_CMD; - sqe->flags |= IOSQE_FIXED_FILE; +static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, + struct ublk_queue *q, int tag, int q_id, __u64 index) +{ + __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF; - - cmd->tag = tag; - cmd->addr = index; - cmd->q_id = q_id; } static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe) diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c index f0e0003a4860..280043f6b689 100644 --- a/tools/testing/selftests/ublk/null.c +++ b/tools/testing/selftests/ublk/null.c @@ -63,7 +63,7 @@ static int null_queue_zc_io(struct ublk_thread *t, struct ublk_queue *q, ublk_io_alloc_sqes(t, sqe, 3); - io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); + io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[0]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; @@ -71,7 +71,7 @@ static int null_queue_zc_io(struct ublk_thread *t, struct ublk_queue *q, __setup_nop_io(tag, iod, sqe[1], q->q_id); sqe[1]->flags |= IOSQE_IO_HARDLINK; - io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); + io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1); // buf register is marked as IOSQE_CQE_SKIP_SUCCESS diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 1fb9b7cc281b..791fa8dc1651 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -142,7 +142,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, ublk_io_alloc_sqes(t, sqe, s->nr + extra); if (zc) { - io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, io->buf_index); + io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, io->buf_index); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; sqe[0]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1); @@ -168,7 +168,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, if (zc) { struct io_uring_sqe *unreg = sqe[s->nr + 1]; - io_uring_prep_buf_unregister(unreg, 0, tag, q->q_id, io->buf_index); + io_uring_prep_buf_unregister(unreg, q, tag, q->q_id, io->buf_index); unreg->user_data = build_user_data( tag, ublk_cmd_op_nr(unreg->cmd_op), 0, q->q_id, 1); } diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh index 40d1437ca298..3f901db4d09d 100755 --- a/tools/testing/selftests/ublk/test_stress_04.sh +++ b/tools/testing/selftests/ublk/test_stress_04.sh @@ -28,14 +28,14 @@ _create_backfile 0 256M _create_backfile 1 128M _create_backfile 2 128M -ublk_io_and_kill_daemon 8G -t null -q 4 -z & -ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & +ublk_io_and_kill_daemon 8G -t null -q 4 -z --no_ublk_fixed_fd & +ublk_io_and_kill_daemon 256M -t loop -q 4 -z --no_ublk_fixed_fd "${UBLK_BACKFILES[0]}" & ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & if _have_feature "AUTO_BUF_REG"; then ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc & ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" & - ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc --no_ublk_fixed_fd "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback & fi From 95a7c5000956f939b86d8b00b8e6b8345f4a9b65 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Thu, 28 Aug 2025 23:48:35 +0800 Subject: [PATCH 2406/2411] bcache: change maintainer's email address Change to my new email address on fnnas.com. Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20250828154835.32926-1-colyli@kernel.org Signed-off-by: Jens Axboe --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fe168477caa4..b47daf498a97 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4205,7 +4205,7 @@ W: http://www.baycom.org/~tom/ham/ham.html F: drivers/net/hamradio/baycom* BCACHE (BLOCK LAYER CACHE) -M: Coly Li +M: Coly Li M: Kent Overstreet L: linux-bcache@vger.kernel.org S: Maintained From ebf2bfec412ad293a0b118fb1a20a551088ebc9b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 28 Aug 2025 15:16:16 -0700 Subject: [PATCH 2407/2411] MAINTAINERS: mark bcachefs externally maintained As per many long discussion threads, public and private. Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fed6cd812d79..adcbb094ebd5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4216,7 +4216,7 @@ F: drivers/md/bcache/ BCACHEFS M: Kent Overstreet L: linux-bcachefs@vger.kernel.org -S: Supported +S: Externally maintained C: irc://irc.oftc.net/bcache P: Documentation/filesystems/bcachefs/SubmittingPatches.rst T: git https://evilpiepirate.org/git/bcachefs.git From c34414883f773412964404d77cd2fea04c6f7d60 Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Mon, 18 Aug 2025 16:17:52 +0200 Subject: [PATCH 2408/2411] drm/mediatek: mtk_hdmi: Fix inverted parameters in some regmap_update_bits calls In mtk_hdmi driver, a recent change replaced custom register access function calls by regmap ones, but two replacements by regmap_update_bits were done incorrectly, because original offset and mask parameters were inverted, so fix them. Fixes: d6e25b3590a0 ("drm/mediatek: hdmi: Use regmap instead of iomem for main registers") Signed-off-by: Louis-Alexis Eyraud Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20250818-mt8173-fix-hdmi-issue-v1-1-55aff9b0295d@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 845fd8aa43c3..b766dd5e6c8d 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -182,8 +182,8 @@ static inline struct mtk_hdmi *hdmi_ctx_from_bridge(struct drm_bridge *b) static void mtk_hdmi_hw_vid_black(struct mtk_hdmi *hdmi, bool black) { - regmap_update_bits(hdmi->regs, VIDEO_SOURCE_SEL, - VIDEO_CFG_4, black ? GEN_RGB : NORMAL_PATH); + regmap_update_bits(hdmi->regs, VIDEO_CFG_4, + VIDEO_SOURCE_SEL, black ? GEN_RGB : NORMAL_PATH); } static void mtk_hdmi_hw_make_reg_writable(struct mtk_hdmi *hdmi, bool enable) @@ -310,8 +310,8 @@ static void mtk_hdmi_hw_send_info_frame(struct mtk_hdmi *hdmi, u8 *buffer, static void mtk_hdmi_hw_send_aud_packet(struct mtk_hdmi *hdmi, bool enable) { - regmap_update_bits(hdmi->regs, AUDIO_PACKET_OFF, - GRL_SHIFT_R2, enable ? 0 : AUDIO_PACKET_OFF); + regmap_update_bits(hdmi->regs, GRL_SHIFT_R2, + AUDIO_PACKET_OFF, enable ? 0 : AUDIO_PACKET_OFF); } static void mtk_hdmi_hw_config_sys(struct mtk_hdmi *hdmi) From 4e411a3f7bba10579bbeec3af77ae2a05c9e4259 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 21 Aug 2025 16:38:13 +0200 Subject: [PATCH 2409/2411] HID: core: factor out hid_set_group() When we load a bpf, we can change the report descriptor. However, the current implementation doesn't change the group meaning that we can not rebind a device from hid-generic to hid-multitouch. This is a preparatory patch to force a rescan of the device after the bpf has been loaded. Signed-off-by: Benjamin Tissoires Reviewed-by: Peter Hutterer Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 5419a6c10907..cf68fdffe058 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2708,6 +2708,20 @@ static bool hid_check_device_match(struct hid_device *hdev, return !hid_ignore_special_drivers && !(hdev->quirks & HID_QUIRK_IGNORE_SPECIAL_DRIVER); } +static void hid_set_group(struct hid_device *hdev) +{ + int ret; + + if (hid_ignore_special_drivers) { + hdev->group = HID_GROUP_GENERIC; + } else if (!hdev->group && + !(hdev->quirks & HID_QUIRK_HAVE_SPECIAL_DRIVER)) { + ret = hid_scan_report(hdev); + if (ret) + hid_warn(hdev, "bad device descriptor (%d)\n", ret); + } +} + static int __hid_device_probe(struct hid_device *hdev, struct hid_driver *hdrv) { const struct hid_device_id *id; @@ -2903,14 +2917,7 @@ int hid_add_device(struct hid_device *hdev) /* * Scan generic devices for group information */ - if (hid_ignore_special_drivers) { - hdev->group = HID_GROUP_GENERIC; - } else if (!hdev->group && - !(hdev->quirks & HID_QUIRK_HAVE_SPECIAL_DRIVER)) { - ret = hid_scan_report(hdev); - if (ret) - hid_warn(hdev, "bad device descriptor (%d)\n", ret); - } + hid_set_group(hdev); hdev->id = atomic_inc_return(&id); From 4c2c5ff9f3d72fb41f884f7c493ae9df83379340 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 21 Aug 2025 16:38:14 +0200 Subject: [PATCH 2410/2411] HID: bpf: rescan the device for the group after a load/unload When a BPF gets loaded, it was previously not possible to bind a hid-generic device to hid-multitouch because the group was never updated. This change forces a rescan of the report descriptor after a bpf is loaded/unloaded so we set up the proper group. This was detected while Peter was trying to fix a Viewsonic device: the HID device sending multiotuch data through a proprietary collection was handled by hid-generic, and we don't have any way of attaching it to hid-multitouch because the pre-scanning wasn't able to see the Contact ID HID usage. Suggested-by: Peter Hutterer Reviewed-by: Peter Hutterer Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index cf68fdffe058..a5b3a8ca2fcb 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -943,6 +943,15 @@ static int hid_scan_report(struct hid_device *hid) parser->device = hid; hid->group = HID_GROUP_GENERIC; + /* + * In case we are re-scanning after a BPF has been loaded, + * we need to use the bpf report descriptor, not the original one. + */ + if (hid->bpf_rdesc && hid->bpf_rsize) { + start = hid->bpf_rdesc; + end = start + hid->bpf_rsize; + } + /* * The parsing is simpler than the one in hid_open_report() as we should * be robust against hid errors. Those errors will be raised by @@ -2728,6 +2737,12 @@ static int __hid_device_probe(struct hid_device *hdev, struct hid_driver *hdrv) int ret; if (!hdev->bpf_rsize) { + /* we keep a reference to the currently scanned report descriptor */ + const __u8 *original_rdesc = hdev->bpf_rdesc; + + if (!original_rdesc) + original_rdesc = hdev->dev_rdesc; + /* in case a bpf program gets detached, we need to free the old one */ hid_free_bpf_rdesc(hdev); @@ -2737,6 +2752,12 @@ static int __hid_device_probe(struct hid_device *hdev, struct hid_driver *hdrv) /* call_hid_bpf_rdesc_fixup will always return a valid pointer */ hdev->bpf_rdesc = call_hid_bpf_rdesc_fixup(hdev, hdev->dev_rdesc, &hdev->bpf_rsize); + + /* the report descriptor changed, we need to re-scan it */ + if (original_rdesc != hdev->bpf_rdesc) { + hdev->group = 0; + hid_set_group(hdev); + } } if (!hid_check_device_match(hdev, hdrv, &id)) From d1dd75c6500c74b91c5286fd3277710371d3e3ca Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Sat, 13 Sep 2025 16:12:54 +0000 Subject: [PATCH 2411/2411] HID: core: Change hid_driver to use a const char* for name name is never mutated by the core HID stack. Making name a const char* simplifies passing the string from Rust to C. Otherwise, it becomes difficult to pass a 'static lifetime CStr from Rust to a char*, rather than a const char*, due to lack of guarantee that the underlying data of the CStr will not be mutated by the C code. Signed-off-by: Rahul Rameshbabu Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/hid.h b/include/linux/hid.h index 2cc4f1e4ea96..426b22ed42b4 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -816,7 +816,7 @@ struct hid_usage_id { * zero from them. */ struct hid_driver { - char *name; + const char *name; const struct hid_device_id *id_table; struct list_head dyn_list;